Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
| SHA1 Hash: | ee694a32e45ed1e27be7ad9a619de9ab94a8a172 |
|---|---|
| Date: | 2012-11-17 10:51:01 |
| User: | dan |
| Comment: | Avoid writing pages to disk out of sequential order (one problem still remains). |
Tags And Properties
- branch=trunk inherited from [84d5dea8fd]
- sym-trunk inherited from [84d5dea8fd]
Changes
Changes to lsm-test/lsmtest_main.c
493 lsm_db *pLsm; 493 lsm_db *pLsm; 494 pLsm = tdb_lsm(pDb); 494 pLsm = tdb_lsm(pDb); 495 if( pLsm ){ 495 if( pLsm ){ 496 tdb_lsm_config_str(pDb, "mmap=1 autowork=1 automerge=4 worker_automerge=4"); 496 tdb_lsm_config_str(pDb, "mmap=1 autowork=1 automerge=4 worker_automerge=4"); 497 } 497 } 498 return pLsm; 498 return pLsm; 499 } 499 } > 500 > 501 typedef struct WriteHookEvent WriteHookEvent; > 502 struct WriteHookEvent { > 503 i64 iOff; > 504 int nData; > 505 int nUs; > 506 }; > 507 WriteHookEvent prev = {0, 0, 0}; > 508 > 509 static void flushPrev(FILE *pOut){ > 510 if( prev.nData ){ > 511 fprintf(pOut, "w %s %lld %d %d\n", "d", prev.iOff, prev.nData, prev.nUs); > 512 prev.nData = 0; > 513 } > 514 } > 515 > 516 static void do_speed_write_hook2( > 517 void *pCtx, > 518 int bLog, > 519 i64 iOff, > 520 int nData, > 521 int nUs > 522 ){ > 523 FILE *pOut = (FILE *)pCtx; > 524 if( bLog ) return; > 525 > 526 if( prev.nData && nData && iOff==prev.iOff+prev.nData ){ > 527 prev.nData += nData; > 528 prev.nUs += nUs; > 529 }else{ > 530 flushPrev(pOut); > 531 if( nData==0 ){ > 532 fprintf(pOut, "s %s 0 0 %d\n", (bLog ? "l" : "d"), nUs); > 533 }else{ > 534 prev.iOff = iOff; > 535 prev.nData = nData; > 536 prev.nUs = nUs; > 537 } > 538 } > 539 } 500 540 501 #define ST_REPEAT 0 541 #define ST_REPEAT 0 502 #define ST_WRITE 1 542 #define ST_WRITE 1 503 #define ST_PAUSE 2 543 #define ST_PAUSE 2 504 #define ST_FETCH 3 544 #define ST_FETCH 3 505 #define ST_SCAN 4 545 #define ST_SCAN 4 506 #define ST_NSCAN 5 546 #define ST_NSCAN 5 ................................................................................................................................................................................ 556 int nContent = 0; 596 int nContent = 0; 557 597 558 TestDb *pDb; 598 TestDb *pDb; 559 Datasource *pData; 599 Datasource *pData; 560 DatasourceDefn defn = { TEST_DATASOURCE_RANDOM, 0, 0, 0, 0 }; 600 DatasourceDefn defn = { TEST_DATASOURCE_RANDOM, 0, 0, 0, 0 }; 561 char *zSystem = ""; 601 char *zSystem = ""; 562 int bLsm = 1; 602 int bLsm = 1; > 603 FILE *pLog = 0; 563 604 564 #ifdef NDEBUG 605 #ifdef NDEBUG 565 /* If NDEBUG is defined, disable the dynamic memory related checks in 606 /* If NDEBUG is defined, disable the dynamic memory related checks in 566 ** lsmtest_mem.c. They slow things down. */ 607 ** lsmtest_mem.c. They slow things down. */ 567 testMallocUninstall(tdb_lsm_env()); 608 testMallocUninstall(tdb_lsm_env()); 568 #endif 609 #endif 569 610 ................................................................................................................................................................................ 625 pDb = testOpen(zSystem, !bReadonly, &rc); 666 pDb = testOpen(zSystem, !bReadonly, &rc); 626 } 667 } 627 if( rc!=0 ) return rc; 668 if( rc!=0 ) return rc; 628 if( bReadonly ){ 669 if( bReadonly ){ 629 nContent = testCountDatabase(pDb); 670 nContent = testCountDatabase(pDb); 630 } 671 } 631 672 > 673 #if 0 > 674 pLog = fopen("/tmp/speed.log", "w"); > 675 tdb_lsm_write_hook(pDb, do_speed_write_hook2, (void *)pLog); > 676 #endif > 677 632 for(i=0; i<aParam[ST_REPEAT] && rc==0; i++){ 678 for(i=0; i<aParam[ST_REPEAT] && rc==0; i++){ 633 int msWrite, msFetch, msScan; 679 int msWrite, msFetch, msScan; 634 int iFetch; 680 int iFetch; 635 int nWrite = aParam[ST_WRITE]; 681 int nWrite = aParam[ST_WRITE]; 636 682 637 if( bReadonly ){ 683 if( bReadonly ){ 638 msWrite = 0; 684 msWrite = 0; ................................................................................................................................................................................ 678 printf("%d %d %d\n", i, msWrite, msFetch); 724 printf("%d %d %d\n", i, msWrite, msFetch); 679 fflush(stdout); 725 fflush(stdout); 680 } 726 } 681 727 682 testClose(&pDb); 728 testClose(&pDb); 683 testDatasourceFree(pData); 729 testDatasourceFree(pData); 684 730 > 731 if( pLog ){ > 732 flushPrev(pLog); > 733 fclose(pLog); > 734 } 685 return rc; 735 return rc; 686 } 736 } 687 737 688 static void do_speed_write_hook2( < 689 void *pCtx, < 690 int bLog, < 691 i64 iOff, < 692 int nData, < 693 int nUs < 694 ){ < 695 FILE *pOut = (FILE *)pCtx; < 696 if( bLog ) return; < 697 < 698 if( nData==0 ){ < 699 fprintf(pOut, "s %s 0 0 %d\n", (bLog ? "l" : "d"), nUs); < 700 }else{ < 701 fprintf(pOut, "w %s %d %d %d\n", (bLog ? "l" : "d"), < 702 (int)iOff, nData, nUs < 703 ); < 704 } < 705 } < 706 < 707 int do_speed_tests(int nArg, char **azArg){ 738 int do_speed_tests(int nArg, char **azArg){ 708 739 709 struct DbSystem { 740 struct DbSystem { 710 const char *zLibrary; 741 const char *zLibrary; 711 const char *zColor; 742 const char *zColor; 712 } aSys[] = { 743 } aSys[] = { 713 { "sqlite3", "black" }, 744 { "sqlite3", "black" },
Changes to src/lsmInt.h
638 638 639 int lsmFsFileid(lsm_db *pDb, void **ppId, int *pnId); 639 int lsmFsFileid(lsm_db *pDb, void **ppId, int *pnId); 640 640 641 /* Creating, populating, gobbling and deleting sorted runs. */ 641 /* Creating, populating, gobbling and deleting sorted runs. */ 642 void lsmFsGobble(lsm_db *, Segment *, Pgno *, int); 642 void lsmFsGobble(lsm_db *, Segment *, Pgno *, int); 643 int lsmFsSortedDelete(FileSystem *, Snapshot *, int, Segment *); 643 int lsmFsSortedDelete(FileSystem *, Snapshot *, int, Segment *); 644 int lsmFsSortedFinish(FileSystem *, Segment *); 644 int lsmFsSortedFinish(FileSystem *, Segment *); 645 int lsmFsSortedAppend(FileSystem *, Snapshot *, Segment *, Page **); | 645 int lsmFsSortedAppend(FileSystem *, Snapshot *, Segment *, int, Page **); 646 int lsmFsSortedPadding(FileSystem *, Snapshot *, Segment *); 646 int lsmFsSortedPadding(FileSystem *, Snapshot *, Segment *); 647 647 648 /* Functions to retrieve the lsm_env pointer from a FileSystem or Page object */ 648 /* Functions to retrieve the lsm_env pointer from a FileSystem or Page object */ 649 lsm_env *lsmFsEnv(FileSystem *); 649 lsm_env *lsmFsEnv(FileSystem *); 650 lsm_env *lsmPageEnv(Page *); 650 lsm_env *lsmPageEnv(Page *); 651 FileSystem *lsmPageFS(Page *); 651 FileSystem *lsmPageFS(Page *); 652 652 ................................................................................................................................................................................ 751 751 752 int lsmSaveCursors(lsm_db *pDb); 752 int lsmSaveCursors(lsm_db *pDb); 753 int lsmRestoreCursors(lsm_db *pDb); 753 int lsmRestoreCursors(lsm_db *pDb); 754 754 755 void lsmSortedDumpStructure(lsm_db *pDb, Snapshot *, int, int, const char *); 755 void lsmSortedDumpStructure(lsm_db *pDb, Snapshot *, int, int, const char *); 756 void lsmFsDumpBlocklists(lsm_db *); 756 void lsmFsDumpBlocklists(lsm_db *); 757 757 > 758 void lsmSortedExpandBtreePage(Page *pPg, int nOrig); 758 759 759 void lsmPutU32(u8 *, u32); 760 void lsmPutU32(u8 *, u32); 760 u32 lsmGetU32(u8 *); 761 u32 lsmGetU32(u8 *); 761 762 762 /* 763 /* 763 ** Functions from "lsm_varint.c". 764 ** Functions from "lsm_varint.c". 764 */ 765 */
Changes to src/lsm_file.c
726 return pPage->aData; 726 return pPage->aData; 727 } 727 } 728 728 729 /* 729 /* 730 ** Return the page number of a page. 730 ** Return the page number of a page. 731 */ 731 */ 732 Pgno lsmFsPageNumber(Page *pPage){ 732 Pgno lsmFsPageNumber(Page *pPage){ 733 assert( (pPage->flags & PAGE_DIRTY)==0 ); | 733 /* assert( (pPage->flags & PAGE_DIRTY)==0 ); */ 734 return pPage ? pPage->iPg : 0; 734 return pPage ? pPage->iPg : 0; 735 } 735 } 736 736 737 /* 737 /* 738 ** Page pPg is currently part of the LRU list belonging to pFS. Remove 738 ** Page pPg is currently part of the LRU list belonging to pFS. Remove 739 ** it from the list. pPg->pLruNext and pPg->pLruPrev are cleared by this 739 ** it from the list. pPg->pLruNext and pPg->pLruPrev are cleared by this 740 ** operation. 740 ** operation. ................................................................................................................................................................................ 1497 ** to it. The page is writable until either lsmFsPagePersist() is called on 1497 ** to it. The page is writable until either lsmFsPagePersist() is called on 1498 ** it or the ref-count drops to zero. 1498 ** it or the ref-count drops to zero. 1499 */ 1499 */ 1500 int lsmFsSortedAppend( 1500 int lsmFsSortedAppend( 1501 FileSystem *pFS, 1501 FileSystem *pFS, 1502 Snapshot *pSnapshot, 1502 Snapshot *pSnapshot, 1503 Segment *p, 1503 Segment *p, > 1504 int bDefer, 1504 Page **ppOut 1505 Page **ppOut 1505 ){ 1506 ){ 1506 int rc = LSM_OK; 1507 int rc = LSM_OK; 1507 Page *pPg = 0; 1508 Page *pPg = 0; 1508 *ppOut = 0; 1509 *ppOut = 0; 1509 int iApp = 0; 1510 int iApp = 0; 1510 int iNext = 0; 1511 int iNext = 0; 1511 int iPrev = p->iLastPg; 1512 int iPrev = p->iLastPg; 1512 1513 1513 if( pFS->pCompress ){ | 1514 if( pFS->pCompress || bDefer ){ 1514 /* In compressed database mode the page is not assigned a page number 1515 /* In compressed database mode the page is not assigned a page number 1515 ** or location in the database file at this point. This will be done 1516 ** or location in the database file at this point. This will be done 1516 ** by the lsmFsPagePersist() call. */ 1517 ** by the lsmFsPagePersist() call. */ 1517 rc = fsPageBuffer(pFS, 1, &pPg); 1518 rc = fsPageBuffer(pFS, 1, &pPg); 1518 if( rc==LSM_OK ){ 1519 if( rc==LSM_OK ){ 1519 pPg->pFS = pFS; 1520 pPg->pFS = pFS; 1520 pPg->pSeg = p; 1521 pPg->pSeg = p; 1521 pPg->iPg = 0; 1522 pPg->iPg = 0; 1522 pPg->flags |= PAGE_DIRTY; 1523 pPg->flags |= PAGE_DIRTY; 1523 pPg->nData = pFS->nPagesize; 1524 pPg->nData = pFS->nPagesize; 1524 assert( pPg->aData ); 1525 assert( pPg->aData ); > 1526 if( pFS->pCompress==0 ) pPg->nData -= 4; 1525 1527 1526 pPg->nRef = 1; 1528 pPg->nRef = 1; 1527 pFS->nOut++; 1529 pFS->nOut++; 1528 } 1530 } 1529 }else{ 1531 }else{ 1530 if( iPrev==0 ){ 1532 if( iPrev==0 ){ 1531 iApp = findAppendPoint(pFS); 1533 iApp = findAppendPoint(pFS); ................................................................................................................................................................................ 1535 if( rc!=LSM_OK ) return rc; 1537 if( rc!=LSM_OK ) return rc; 1536 iApp = fsFirstPageOnBlock(pFS, iNext); 1538 iApp = fsFirstPageOnBlock(pFS, iNext); 1537 }else{ 1539 }else{ 1538 iApp = iPrev + 1; 1540 iApp = iPrev + 1; 1539 } 1541 } 1540 1542 1541 /* If this is the first page allocated, or if the page allocated is the 1543 /* If this is the first page allocated, or if the page allocated is the 1542 ** last in the block, allocate a new block here. */ | 1544 ** last in the block, also allocate the next block here. */ 1543 if( iApp==0 || fsIsLast(pFS, iApp) ){ 1545 if( iApp==0 || fsIsLast(pFS, iApp) ){ 1544 int iNew; /* New block number */ 1546 int iNew; /* New block number */ 1545 1547 1546 rc = lsmBlockAllocate(pFS->pDb, &iNew); 1548 rc = lsmBlockAllocate(pFS->pDb, &iNew); 1547 if( rc!=LSM_OK ) return rc; 1549 if( rc!=LSM_OK ) return rc; 1548 if( iApp==0 ){ 1550 if( iApp==0 ){ 1549 iApp = fsFirstPageOnBlock(pFS, iNew); 1551 iApp = fsFirstPageOnBlock(pFS, iNew); ................................................................................................................................................................................ 1859 1861 1860 pPg->nCompress = pFS->nBuffer; 1862 pPg->nCompress = pFS->nBuffer; 1861 return p->xCompress(p->pCtx, 1863 return p->xCompress(p->pCtx, 1862 (char *)pFS->aOBuffer, &pPg->nCompress, 1864 (char *)pFS->aOBuffer, &pPg->nCompress, 1863 (const char *)pPg->aData, pPg->nData 1865 (const char *)pPg->aData, pPg->nData 1864 ); 1866 ); 1865 } 1867 } > 1868 > 1869 static int fsAppendPage( > 1870 FileSystem *pFS, > 1871 Segment *pSeg, > 1872 Pgno *piNew, > 1873 int *piPrev, > 1874 int *piNext > 1875 ){ > 1876 Pgno iPrev = pSeg->iLastPg; > 1877 int rc; > 1878 assert( iPrev!=0 ); > 1879 > 1880 *piPrev = 0; > 1881 *piNext = 0; > 1882 > 1883 if( fsIsLast(pFS, iPrev) ){ > 1884 /* Grab the first page on the next block (which has already be > 1885 ** allocated). In this case set *piPrev to tell the caller to set > 1886 ** the "previous block" pointer in the first 4 bytes of the page. > 1887 */ > 1888 int iNext; > 1889 int iBlk = fsPageToBlock(pFS, iPrev); > 1890 rc = fsBlockNext(pFS, iBlk, &iNext); > 1891 if( rc!=LSM_OK ) return rc; > 1892 *piNew = fsFirstPageOnBlock(pFS, iNext); > 1893 *piPrev = iBlk; > 1894 }else{ > 1895 *piNew = iPrev+1; > 1896 if( fsIsLast(pFS, *piNew) ){ > 1897 /* Allocate the next block here. */ > 1898 int iBlk; > 1899 rc = lsmBlockAllocate(pFS->pDb, &iBlk); > 1900 if( rc!=LSM_OK ) return rc; > 1901 *piNext = iBlk; > 1902 } > 1903 } > 1904 > 1905 pSeg->nSize++; > 1906 pSeg->iLastPg = *piNew; > 1907 return LSM_OK; > 1908 } 1866 1909 1867 /* 1910 /* 1868 ** If the page passed as an argument is dirty, update the database file 1911 ** If the page passed as an argument is dirty, update the database file 1869 ** (or mapping of the database file) with its current contents and mark 1912 ** (or mapping of the database file) with its current contents and mark 1870 ** the page as clean. 1913 ** the page as clean. 1871 ** 1914 ** 1872 ** Return LSM_OK if the operation is a success, or an LSM error code 1915 ** Return LSM_OK if the operation is a success, or an LSM error code ................................................................................................................................................................................ 1898 pPg->pHashNext = pFS->apHash[iHash]; 1941 pPg->pHashNext = pFS->apHash[iHash]; 1899 pFS->apHash[iHash] = pPg; 1942 pFS->apHash[iHash] = pPg; 1900 1943 1901 pPg->pSeg->nSize += (sizeof(aSz) * 2) + pPg->nCompress; 1944 pPg->pSeg->nSize += (sizeof(aSz) * 2) + pPg->nCompress; 1902 1945 1903 }else{ 1946 }else{ 1904 i64 iOff; /* Offset to write within database file */ 1947 i64 iOff; /* Offset to write within database file */ > 1948 > 1949 if( pPg->iPg==0 ){ > 1950 /* No page number has been assigned yet. This occurs with pages used > 1951 ** in the b-tree hierarchy. */ > 1952 int iPrev = 0; > 1953 int iNext = 0; > 1954 int iHash; > 1955 > 1956 assert( pPg->pSeg->iFirst ); > 1957 assert( pPg->flags & PAGE_FREE ); > 1958 assert( (pPg->flags & PAGE_HASPREV)==0 ); > 1959 assert( pPg->nData==pFS->nPagesize-4 ); > 1960 > 1961 rc = fsAppendPage(pFS, pPg->pSeg, &pPg->iPg, &iPrev, &iNext); > 1962 if( rc!=LSM_OK ) return rc; > 1963 > 1964 iHash = fsHashKey(pFS->nHash, pPg->iPg); > 1965 pPg->pHashNext = pFS->apHash[iHash]; > 1966 pFS->apHash[iHash] = pPg; > 1967 > 1968 if( iPrev ){ > 1969 assert( iNext==0 ); > 1970 memmove(&pPg->aData[4], pPg->aData, pPg->nData); > 1971 lsmPutU32(pPg->aData, iPrev); > 1972 pPg->flags |= PAGE_HASPREV; > 1973 pPg->aData += 4; > 1974 }else if( iNext ){ > 1975 assert( iPrev==0 ); > 1976 lsmPutU32(&pPg->aData[pPg->nData], iNext); > 1977 }else{ > 1978 int nData = pPg->nData; > 1979 pPg->nData += 4; > 1980 lsmSortedExpandBtreePage(pPg, nData); > 1981 } > 1982 } > 1983 1905 iOff = (i64)pFS->nPagesize * (i64)(pPg->iPg-1); 1984 iOff = (i64)pFS->nPagesize * (i64)(pPg->iPg-1); 1906 if( pFS->bUseMmap==0 ){ 1985 if( pFS->bUseMmap==0 ){ 1907 u8 *aData = pPg->aData - (pPg->flags & PAGE_HASPREV); 1986 u8 *aData = pPg->aData - (pPg->flags & PAGE_HASPREV); 1908 rc = lsmEnvWrite(pFS->pEnv, pFS->fdDb, iOff, aData, pFS->nPagesize); 1987 rc = lsmEnvWrite(pFS->pEnv, pFS->fdDb, iOff, aData, pFS->nPagesize); 1909 }else if( pPg->flags & PAGE_FREE ){ 1988 }else if( pPg->flags & PAGE_FREE ){ 1910 fsGrowMapping(pFS, iOff + pFS->nPagesize, &rc); 1989 fsGrowMapping(pFS, iOff + pFS->nPagesize, &rc); 1911 if( rc==LSM_OK ){ 1990 if( rc==LSM_OK ){ 1912 u8 *aTo = &((u8 *)(pFS->pMap))[iOff]; 1991 u8 *aTo = &((u8 *)(pFS->pMap))[iOff]; > 1992 u8 *aFrom = pPg->aData - (pPg->flags & PAGE_HASPREV); 1913 memcpy(aTo, pPg->aData, pFS->nPagesize); | 1993 memcpy(aTo, aFrom, pFS->nPagesize); 1914 lsmFree(pFS->pEnv, pPg->aData); | 1994 lsmFree(pFS->pEnv, aFrom); 1915 pPg->aData = aTo; | 1995 pPg->aData = aTo + (pPg->flags & PAGE_HASPREV); 1916 pPg->flags &= ~PAGE_FREE; 1996 pPg->flags &= ~PAGE_FREE; 1917 fsPageAddToLru(pFS, pPg); 1997 fsPageAddToLru(pFS, pPg); 1918 } 1998 } 1919 } 1999 } 1920 } 2000 } 1921 pPg->flags &= ~PAGE_DIRTY; 2001 pPg->flags &= ~PAGE_DIRTY; 1922 pFS->nWrite++; 2002 pFS->nWrite++;
Changes to src/lsm_main.c
470 ** 470 ** 471 ** Given the context in which this function is called (as a result of an 471 ** Given the context in which this function is called (as a result of an 472 ** lsm_info(LSM_INFO_TREE_SIZE) request), neither of these are considered to 472 ** lsm_info(LSM_INFO_TREE_SIZE) request), neither of these are considered to 473 ** be problems. 473 ** be problems. 474 */ 474 */ 475 *pnNew = (int)p->root.nByte; 475 *pnNew = (int)p->root.nByte; 476 if( p->iOldShmid ){ 476 if( p->iOldShmid ){ 477 if( p->iLogOff==lsmCheckpointLogOffset(pShm->aSnap1) ){ | 477 if( p->iOldLog==lsmCheckpointLogOffset(pShm->aSnap1) ){ 478 *pnOld = 0; 478 *pnOld = 0; 479 }else{ 479 }else{ 480 *pnOld = (int)p->oldroot.nByte; 480 *pnOld = (int)p->oldroot.nByte; 481 } 481 } 482 }else{ 482 }else{ 483 *pnOld = 0; 483 *pnOld = 0; 484 } 484 }
Changes to src/lsm_sorted.c
3090 Page **apHier = pMW->hier.apHier; 3090 Page **apHier = pMW->hier.apHier; 3091 int nHier = pMW->hier.nHier; 3091 int nHier = pMW->hier.nHier; 3092 3092 3093 pSeg = &pMW->pLevel->lhs; 3093 pSeg = &pMW->pLevel->lhs; 3094 3094 3095 for(i=0; rc==LSM_OK && i<nHier; i++){ 3095 for(i=0; rc==LSM_OK && i<nHier; i++){ 3096 Page *pNew = 0; 3096 Page *pNew = 0; 3097 rc = lsmFsSortedAppend(pDb->pFS, pDb->pWorker, pSeg, &pNew); | 3097 rc = lsmFsSortedAppend(pDb->pFS, pDb->pWorker, pSeg, 1, &pNew); 3098 assert( rc==LSM_OK ); 3098 assert( rc==LSM_OK ); 3099 3099 3100 if( rc==LSM_OK ){ 3100 if( rc==LSM_OK ){ 3101 u8 *a1; int n1; 3101 u8 *a1; int n1; 3102 u8 *a2; int n2; 3102 u8 *a2; int n2; 3103 3103 3104 a1 = fsPageData(pNew, &n1); 3104 a1 = fsPageData(pNew, &n1); 3105 a2 = fsPageData(apHier[i], &n2); 3105 a2 = fsPageData(apHier[i], &n2); > 3106 3106 assert( n1==n2 || n1+4==n2 || n2+4==n1 ); | 3107 assert( n1==n2 || n1+4==n2 ); 3107 3108 > 3109 if( n1==n2 ){ > 3110 memcpy(a1, a2, n2); > 3111 }else{ > 3112 int nEntry = pageGetNRec(a2, n2); > 3113 int iEof1 = SEGMENT_EOF(n1, nEntry); > 3114 int iEof2 = SEGMENT_EOF(n2, nEntry); > 3115 > 3116 memcpy(a1, a2, iEof2 - 4); > 3117 memcpy(&a1[iEof1], &a2[iEof2], n2 - iEof2); > 3118 } > 3119 > 3120 lsmFsPageRelease(apHier[i]); > 3121 apHier[i] = pNew; > 3122 > 3123 #if 0 > 3124 assert( n1==n2 || n1+4==n2 || n2+4==n1 ); 3108 if( n1>=n2 ){ 3125 if( n1>=n2 ){ 3109 /* If n1 (size of the new page) is equal to or greater than n2 (the 3126 /* If n1 (size of the new page) is equal to or greater than n2 (the 3110 ** size of the old page), then copy the data into the new page. If 3127 ** size of the old page), then copy the data into the new page. If 3111 ** n1==n2, this could be done with a single memcpy(). However, 3128 ** n1==n2, this could be done with a single memcpy(). However, 3112 ** since sometimes n1>n2, the page content and footer must be copied 3129 ** since sometimes n1>n2, the page content and footer must be copied 3113 ** separately. */ 3130 ** separately. */ 3114 int nEntry = pageGetNRec(a2, n2); 3131 int nEntry = pageGetNRec(a2, n2); ................................................................................................................................................................................ 3121 }else{ 3138 }else{ 3122 lsmPutU16(&a1[SEGMENT_FLAGS_OFFSET(n1)], SEGMENT_BTREE_FLAG); 3139 lsmPutU16(&a1[SEGMENT_FLAGS_OFFSET(n1)], SEGMENT_BTREE_FLAG); 3123 lsmPutU16(&a1[SEGMENT_NRECORD_OFFSET(n1)], 0); 3140 lsmPutU16(&a1[SEGMENT_NRECORD_OFFSET(n1)], 0); 3124 lsmPutU64(&a1[SEGMENT_POINTER_OFFSET(n1)], 0); 3141 lsmPutU64(&a1[SEGMENT_POINTER_OFFSET(n1)], 0); 3125 i = i - 1; 3142 i = i - 1; 3126 lsmFsPageRelease(pNew); 3143 lsmFsPageRelease(pNew); 3127 } 3144 } > 3145 #endif 3128 } 3146 } 3129 } 3147 } 3130 3148 3131 #ifdef LSM_DEBUG 3149 #ifdef LSM_DEBUG 3132 if( rc==LSM_OK ){ 3150 if( rc==LSM_OK ){ 3133 for(i=0; i<nHier; i++) assert( lsmFsPageWritable(apHier[i]) ); 3151 for(i=0; i<nHier; i++) assert( lsmFsPageWritable(apHier[i]) ); 3134 } 3152 } ................................................................................................................................................................................ 3306 nRec = pageGetNRec(aData, nData); 3324 nRec = pageGetNRec(aData, nData); 3307 nFree = SEGMENT_EOF(nData, nRec) - mergeWorkerPageOffset(aData, nData); 3325 nFree = SEGMENT_EOF(nData, nRec) - mergeWorkerPageOffset(aData, nData); 3308 if( nByte<=nFree ) break; 3326 if( nByte<=nFree ) break; 3309 3327 3310 /* Otherwise, this page is full. Set the right-hand-child pointer 3328 /* Otherwise, this page is full. Set the right-hand-child pointer 3311 ** to iPtr and release it. */ 3329 ** to iPtr and release it. */ 3312 lsmPutU64(&aData[SEGMENT_POINTER_OFFSET(nData)], iPtr); 3330 lsmPutU64(&aData[SEGMENT_POINTER_OFFSET(nData)], iPtr); > 3331 assert( lsmFsPageNumber(pOld)==0 ); 3313 rc = lsmFsPagePersist(pOld); 3332 rc = lsmFsPagePersist(pOld); 3314 if( rc==LSM_OK ){ 3333 if( rc==LSM_OK ){ 3315 iPtr = lsmFsPageNumber(pOld); 3334 iPtr = lsmFsPageNumber(pOld); 3316 lsmFsPageRelease(pOld); 3335 lsmFsPageRelease(pOld); 3317 } 3336 } 3318 } 3337 } 3319 3338 3320 /* Allocate a new page for apHier[iLevel]. */ 3339 /* Allocate a new page for apHier[iLevel]. */ 3321 p->apHier[iLevel] = 0; 3340 p->apHier[iLevel] = 0; 3322 if( rc==LSM_OK ){ 3341 if( rc==LSM_OK ){ 3323 rc = lsmFsSortedAppend( 3342 rc = lsmFsSortedAppend( 3324 pDb->pFS, pDb->pWorker, pSeg, &p->apHier[iLevel] | 3343 pDb->pFS, pDb->pWorker, pSeg, 1, &p->apHier[iLevel] 3325 ); 3344 ); 3326 } 3345 } 3327 if( rc!=LSM_OK ) return rc; 3346 if( rc!=LSM_OK ) return rc; 3328 3347 3329 aData = fsPageData(p->apHier[iLevel], &nData); 3348 aData = fsPageData(p->apHier[iLevel], &nData); 3330 memset(aData, 0, nData); 3349 memset(aData, 0, nData); 3331 lsmPutU16(&aData[SEGMENT_FLAGS_OFFSET(nData)], SEGMENT_BTREE_FLAG); 3350 lsmPutU16(&aData[SEGMENT_FLAGS_OFFSET(nData)], SEGMENT_BTREE_FLAG); ................................................................................................................................................................................ 3526 ){ 3545 ){ 3527 int rc = LSM_OK; /* Return code */ 3546 int rc = LSM_OK; /* Return code */ 3528 Page *pNext = 0; /* New page appended to run */ 3547 Page *pNext = 0; /* New page appended to run */ 3529 lsm_db *pDb = pMW->pDb; /* Database handle */ 3548 lsm_db *pDb = pMW->pDb; /* Database handle */ 3530 Segment *pSeg; /* Run to append to */ 3549 Segment *pSeg; /* Run to append to */ 3531 3550 3532 pSeg = &pMW->pLevel->lhs; 3551 pSeg = &pMW->pLevel->lhs; 3533 rc = lsmFsSortedAppend(pDb->pFS, pDb->pWorker, pSeg, &pNext); | 3552 rc = lsmFsSortedAppend(pDb->pFS, pDb->pWorker, pSeg, 0, &pNext); 3534 assert( rc!=LSM_OK || pSeg->iFirst>0 || pMW->pDb->compress.xCompress ); 3553 assert( rc!=LSM_OK || pSeg->iFirst>0 || pMW->pDb->compress.xCompress ); 3535 3554 3536 if( rc==LSM_OK ){ 3555 if( rc==LSM_OK ){ 3537 u8 *aData; /* Data buffer belonging to page pNext */ 3556 u8 *aData; /* Data buffer belonging to page pNext */ 3538 int nData; /* Size of aData[] in bytes */ 3557 int nData; /* Size of aData[] in bytes */ 3539 3558 3540 rc = mergeWorkerPersistAndRelease(pMW); 3559 rc = mergeWorkerPersistAndRelease(pMW); ................................................................................................................................................................................ 4126 assert( rc!=LSM_OK || pDb->pWorker->freelist.nEntry==0 ); 4145 assert( rc!=LSM_OK || pDb->pWorker->freelist.nEntry==0 ); 4127 lsmDbSnapshotSetLevel(pDb->pWorker, pNext); 4146 lsmDbSnapshotSetLevel(pDb->pWorker, pNext); 4128 sortedFreeLevel(pDb->pEnv, pNew); 4147 sortedFreeLevel(pDb->pEnv, pNew); 4129 }else{ 4148 }else{ 4130 if( pDel ) pDel->iRoot = 0; 4149 if( pDel ) pDel->iRoot = 0; 4131 4150 4132 #if 0 4151 #if 0 4133 lsmSortedDumpStructure(pDb, pDb->pWorker, 0, 0, "new-toplevel"); | 4152 lsmSortedDumpStructure(pDb, pDb->pWorker, 1, 0, "new-toplevel"); 4134 #endif 4153 #endif 4135 4154 4136 if( freelist.nEntry ){ 4155 if( freelist.nEntry ){ 4137 Freelist *p = &pDb->pWorker->freelist; 4156 Freelist *p = &pDb->pWorker->freelist; 4138 lsmFree(pDb->pEnv, p->aEntry); 4157 lsmFree(pDb->pEnv, p->aEntry); 4139 memcpy(p, &freelist, sizeof(freelist)); 4158 memcpy(p, &freelist, sizeof(freelist)); 4140 freelist.aEntry = 0; 4159 freelist.aEntry = 0; ................................................................................................................................................................................ 4565 /* Clean up the MergeWorker object initialized above. If no error 4584 /* Clean up the MergeWorker object initialized above. If no error 4566 ** has occurred, invoke the work-hook to inform the application that 4585 ** has occurred, invoke the work-hook to inform the application that 4567 ** the database structure has changed. */ 4586 ** the database structure has changed. */ 4568 mergeWorkerShutdown(&mergeworker, &rc); 4587 mergeWorkerShutdown(&mergeworker, &rc); 4569 if( rc==LSM_OK ) sortedInvokeWorkHook(pDb); 4588 if( rc==LSM_OK ) sortedInvokeWorkHook(pDb); 4570 4589 4571 #if 0 4590 #if 0 4572 lsmSortedDumpStructure(pDb, pDb->pWorker, 0, 0, "work"); | 4591 lsmSortedDumpStructure(pDb, pDb->pWorker, 1, 0, "work"); 4573 #endif 4592 #endif 4574 assertBtreeOk(pDb, &pLevel->lhs); 4593 assertBtreeOk(pDb, &pLevel->lhs); 4575 assertRunInOrder(pDb, &pLevel->lhs); 4594 assertRunInOrder(pDb, &pLevel->lhs); 4576 4595 4577 /* If bFlush is true and the database is no longer considered "full", 4596 /* If bFlush is true and the database is no longer considered "full", 4578 ** break out of the loop even if nRemaining is still greater than 4597 ** break out of the loop even if nRemaining is still greater than 4579 ** zero. The caller has an in-memory tree to flush to disk. */ 4598 ** zero. The caller has an in-memory tree to flush to disk. */ ................................................................................................................................................................................ 4658 u32 nUnsync; 4677 u32 nUnsync; 4659 int nPgsz; 4678 int nPgsz; 4660 4679 4661 lsmCheckpointSynced(pDb, 0, 0, &nSync); 4680 lsmCheckpointSynced(pDb, 0, 0, &nSync); 4662 nUnsync = lsmCheckpointNWrite(pDb->pShmhdr->aSnap1, 0); 4681 nUnsync = lsmCheckpointNWrite(pDb->pShmhdr->aSnap1, 0); 4663 nPgsz = lsmCheckpointPgsz(pDb->pShmhdr->aSnap1); 4682 nPgsz = lsmCheckpointPgsz(pDb->pShmhdr->aSnap1); 4664 4683 4665 nMax = LSM_MIN(nMax, (pDb->nAutockpt/nPgsz) - (nUnsync-nSync)); | 4684 nMax = LSM_MIN(nMax, (pDb->nAutockpt/nPgsz) - (int)(nUnsync-nSync)); 4666 if( nMax<nRem ){ 4685 if( nMax<nRem ){ 4667 bCkpt = 1; 4686 bCkpt = 1; 4668 nRem = LSM_MAX(nMax, 0); 4687 nRem = LSM_MAX(nMax, 0); 4669 } 4688 } 4670 } 4689 } 4671 4690 4672 /* If there exists in-memory data ready to be flushed to disk, attempt 4691 /* If there exists in-memory data ready to be flushed to disk, attempt ................................................................................................................................................................................ 4726 }else{ 4745 }else{ 4727 int rcdummy = LSM_BUSY; 4746 int rcdummy = LSM_BUSY; 4728 lsmFinishWork(pDb, 0, &rcdummy); 4747 lsmFinishWork(pDb, 0, &rcdummy); 4729 } 4748 } 4730 assert( pDb->pWorker==0 ); 4749 assert( pDb->pWorker==0 ); 4731 4750 4732 if( rc==LSM_OK ){ 4751 if( rc==LSM_OK ){ 4733 if( pnWrite ) *pnWrite = (nMax - nRem); | 4752 *pnWrite = (nMax - nRem); 4734 if( pbCkpt ) *pbCkpt = (bCkpt && nRem<=0); | 4753 *pbCkpt = (bCkpt && nRem<=0); 4735 }else{ 4754 }else{ 4736 if( pnWrite ) *pnWrite = 0; | 4755 *pnWrite = 0; 4737 if( pbCkpt ) *pbCkpt = 0; | 4756 *pbCkpt = 0; 4738 } 4757 } 4739 4758 4740 return rc; 4759 return rc; 4741 } 4760 } 4742 4761 4743 static int doLsmWork(lsm_db *pDb, int nMerge, int nPage, int *pnWrite){ 4762 static int doLsmWork(lsm_db *pDb, int nMerge, int nPage, int *pnWrite){ 4744 int rc; 4763 int rc; ................................................................................................................................................................................ 5334 void lsmSortedSaveTreeCursors(lsm_db *pDb){ 5353 void lsmSortedSaveTreeCursors(lsm_db *pDb){ 5335 MultiCursor *pCsr; 5354 MultiCursor *pCsr; 5336 for(pCsr=pDb->pCsr; pCsr; pCsr=pCsr->pNext){ 5355 for(pCsr=pDb->pCsr; pCsr; pCsr=pCsr->pNext){ 5337 lsmTreeCursorSave(pCsr->apTreeCsr[0]); 5356 lsmTreeCursorSave(pCsr->apTreeCsr[0]); 5338 lsmTreeCursorSave(pCsr->apTreeCsr[1]); 5357 lsmTreeCursorSave(pCsr->apTreeCsr[1]); 5339 } 5358 } 5340 } 5359 } > 5360 > 5361 void lsmSortedExpandBtreePage(Page *pPg, int nOrig){ > 5362 u8 *aData; > 5363 int nData; > 5364 int nEntry; > 5365 int iHdr; > 5366 > 5367 aData = lsmFsPageData(pPg, &nData); > 5368 nEntry = pageGetNRec(aData, nOrig); > 5369 iHdr = SEGMENT_EOF(nOrig, nEntry); > 5370 memmove(&aData[iHdr + (nData-nOrig)], &aData[iHdr], nOrig-iHdr); > 5371 } 5341 5372 5342 #ifdef LSM_DEBUG_EXPENSIVE 5373 #ifdef LSM_DEBUG_EXPENSIVE 5343 static void assertRunInOrder(lsm_db *pDb, Segment *pSeg){ 5374 static void assertRunInOrder(lsm_db *pDb, Segment *pSeg){ 5344 Page *pPg = 0; 5375 Page *pPg = 0; 5345 Blob blob1 = {0, 0, 0, 0}; 5376 Blob blob1 = {0, 0, 0, 0}; 5346 Blob blob2 = {0, 0, 0, 0}; 5377 Blob blob2 = {0, 0, 0, 0}; 5347 5378
Changes to tool/lsmperf.tcl
186 append script $data3 186 append script $data3 187 append script $data4 187 append script $data4 188 188 189 append script "pause -1\n" 189 append script "pause -1\n" 190 exec_gnuplot_script $script $zPng 190 exec_gnuplot_script $script $zPng 191 } 191 } 192 192 193 do_write_test x.png 600 50000 50000 20 { | 193 do_write_test x.png 100 50000 50000 20 { 194 lsm-mt-1 "mmap=1 multi_proc=0 safety=0 threads=3 autowork=0 block_size=1M" | 194 lsm safety=0 195 } 195 } 196 196 > 197 > 198 #lsm "mmap=1 multi_proc=0 page_size=4096 block_size=2097152 autocheckpoint=419 > 199 #lsm-mt "mmap=1 multi_proc=0 threads=2 autowork=0 autocheckpoint=4196000" > 200 > 201 # lsm "safety=1 multi_proc=0" > 202 197 # lsm-mt "mmap=1 multi_proc=0 threads=2 autowork=0 autocheckpoint=8192000" 203 # lsm-mt "mmap=1 multi_proc=0 threads=2 autowork=0 autocheckpoint=8192000" 198 # lsm-mt "mmap=1 multi_proc=0 safety=1 threads=3 autowork=0" 204 # lsm-mt "mmap=1 multi_proc=0 safety=1 threads=3 autowork=0" 199 # lsm-st "mmap=1 multi_proc=0 safety=1 threads=1 autowork=1" 205 # lsm-st "mmap=1 multi_proc=0 safety=1 threads=1 autowork=1" 200 # lsm-mt "mmap=1 multi_proc=0 safety=1 threads=3 autowork=0" 206 # lsm-mt "mmap=1 multi_proc=0 safety=1 threads=3 autowork=0" 201 # lsm-mt "mmap=1 multi_proc=0 safety=1 threads=3 autowork=0" 207 # lsm-mt "mmap=1 multi_proc=0 safety=1 threads=3 autowork=0" 202 # LevelDB leveldb 208 # LevelDB leveldb 203 # lsm-st "mmap=1 multi_proc=0 safety=1 threads=1 autowork=1" 209 # lsm-st "mmap=1 multi_proc=0 safety=1 threads=1 autowork=1" 204 # LevelDB leveldb 210 # LevelDB leveldb 205 # SQLite sqlite3 211 # SQLite sqlite3 206 212 207 213 208 214 209 215
Changes to www/lsmusr.wiki
519 if any other client has written to the database since the current clients 519 if any other client has written to the database since the current clients 520 read-transaction was opened, it will not be possible to upgrade to a 520 read-transaction was opened, it will not be possible to upgrade to a 521 write-transaction. 521 write-transaction. 522 522 523 <p>Write-transactions may be opened either implicitly or explicitly. If any 523 <p>Write-transactions may be opened either implicitly or explicitly. If any 524 of the following functions are called to write to the database when there 524 of the following functions are called to write to the database when there 525 is no write-transaction open, then an implicit write-transaction is opened and 525 is no write-transaction open, then an implicit write-transaction is opened and 526 close (committed) within the function: | 526 closed (committed) within the call: 527 527 528 <ul> 528 <ul> 529 <li> lsm_insert() 529 <li> lsm_insert() 530 <li> lsm_delete() 530 <li> lsm_delete() 531 <li> lsm_delete_range() 531 <li> lsm_delete_range() 532 </ul> 532 </ul> 533 533