Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Avoid writing delete markers to the oldest segment in an FTS index. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts5 |
Files: | files | file ages | folders |
SHA1: |
1baeb1cee61d9c56c718b50af034a24f |
User & Date: | dan 2014-08-06 20:04:14.831 |
Context
2014-08-07
| ||
18:47 | Add "segment promotion" to fts5. This prevents the FTS index from growing indefinitely as data is added and deleted. (check-in: ba359d78e1 user: dan tags: fts5) | |
2014-08-06
| ||
20:04 | Avoid writing delete markers to the oldest segment in an FTS index. (check-in: 1baeb1cee6 user: dan tags: fts5) | |
16:30 | Add support for savepoints to fts5. (check-in: 3b19eba042 user: dan tags: fts5) | |
Changes
Changes to ext/fts5/fts5_index.c.
︙ | ︙ | |||
636 637 638 639 640 641 642 | fts5DecodeRowid(iKey, &iIdx, &iSegid, &iHeight, &iPgno); if( iSegid==0 ){ if( iKey==FTS5_AVERAGES_ROWID ){ sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(averages) "); }else{ sqlite3Fts5BufferAppendPrintf(pRc, pBuf, | | | 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 | fts5DecodeRowid(iKey, &iIdx, &iSegid, &iHeight, &iPgno); if( iSegid==0 ){ if( iKey==FTS5_AVERAGES_ROWID ){ sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(averages) "); }else{ sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{structure idx=%d}", (int)(iKey-10) ); } } else if( iHeight==FTS5_SEGMENT_MAX_HEIGHT ){ sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(dlidx idx=%d segid=%d pgno=%d)", iIdx, iSegid, iPgno ); |
︙ | ︙ | |||
1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 | fts5BufferAppendVarint(&p->rc, &buf, (i64)pStruct->nWriteCounter); for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ int iSeg; /* Used to iterate through segments */ Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge); fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg); for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){ fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid); fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].nHeight); fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst); fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast); } | > | 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 | fts5BufferAppendVarint(&p->rc, &buf, (i64)pStruct->nWriteCounter); for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ int iSeg; /* Used to iterate through segments */ Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge); fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg); assert( pLvl->nMerge<=pLvl->nSeg ); for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){ fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid); fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].nHeight); fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst); fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast); } |
︙ | ︙ | |||
1224 1225 1226 1227 1228 1229 1230 | if( (a[iOff-1] & 0x80)==0 ) break; } getVarint(&a[iOff], (u64*)&iVal); pIter->iRowid += iVal; pIter->iLeafPgno--; | > | > | 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 | if( (a[iOff-1] & 0x80)==0 ) break; } getVarint(&a[iOff], (u64*)&iVal); pIter->iRowid += iVal; pIter->iLeafPgno--; while( iOff>pIter->iFirstOff && a[iOff-1]==0x00 && (a[iOff-2] & 0x80)==0 ){ iOff--; pIter->iLeafPgno--; } pIter->iOff = iOff; } return pIter->bEof; |
︙ | ︙ | |||
2831 2832 2833 2834 2835 2836 2837 | Fts5Index *p, Fts5SegWriter *pWriter, /* Writer object */ int *pnHeight, /* OUT: Height of the b-tree */ int *pnLeaf /* OUT: Number of leaf pages in b-tree */ ){ int i; *pnLeaf = pWriter->aWriter[0].pgno; | > > > > | | | | | | | | | | > | | > > | 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 | Fts5Index *p, Fts5SegWriter *pWriter, /* Writer object */ int *pnHeight, /* OUT: Height of the b-tree */ int *pnLeaf /* OUT: Number of leaf pages in b-tree */ ){ int i; *pnLeaf = pWriter->aWriter[0].pgno; if( *pnLeaf==1 && pWriter->aWriter[0].buf.n==0 ){ *pnLeaf = 0; *pnHeight = 0; }else{ fts5WriteFlushLeaf(p, pWriter); if( pWriter->nWriter==1 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ fts5WriteBtreeGrow(p, pWriter); } if( pWriter->nWriter>1 ){ fts5WriteBtreeNEmpty(p, pWriter); } *pnHeight = pWriter->nWriter; for(i=1; i<pWriter->nWriter; i++){ Fts5PageWriter *pPg = &pWriter->aWriter[i]; fts5DataWrite(p, FTS5_SEGMENT_ROWID(pWriter->iIdx, pWriter->iSegid, i, pPg->pgno), pPg->buf.p, pPg->buf.n ); } } for(i=0; i<pWriter->nWriter; i++){ Fts5PageWriter *pPg = &pWriter->aWriter[i]; fts5BufferFree(&pPg->term); fts5BufferFree(&pPg->buf); } sqlite3_free(pWriter->aWriter); |
︙ | ︙ | |||
2966 2967 2968 2969 2970 2971 2972 | Fts5StructureLevel *pLvlOut = &pStruct->aLevel[iLvl+1]; Fts5MultiSegIter *pIter = 0; /* Iterator to read input data */ int nRem = *pnRem; /* Output leaf pages left to write */ int nInput; /* Number of input segments */ Fts5SegWriter writer; /* Writer object */ Fts5StructureSegment *pSeg; /* Output segment */ Fts5Buffer term; | | > | 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 | Fts5StructureLevel *pLvlOut = &pStruct->aLevel[iLvl+1]; Fts5MultiSegIter *pIter = 0; /* Iterator to read input data */ int nRem = *pnRem; /* Output leaf pages left to write */ int nInput; /* Number of input segments */ Fts5SegWriter writer; /* Writer object */ Fts5StructureSegment *pSeg; /* Output segment */ Fts5Buffer term; int bRequireDoclistTerm = 0; /* Doclist terminator (0x00) required */ int bOldest; /* True if the output segment is the oldest */ assert( iLvl<pStruct->nLevel ); assert( pLvl->nMerge<=pLvl->nSeg ); memset(&writer, 0, sizeof(Fts5SegWriter)); memset(&term, 0, sizeof(Fts5Buffer)); writer.iIdx = iIdx; |
︙ | ︙ | |||
2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 | pLvlOut->nSeg++; pSeg->pgnoFirst = 1; pSeg->iSegid = iSegid; /* Read input from all segments in the input level */ nInput = pLvl->nSeg; } #if 0 fprintf(stdout, "merging %d segments from level %d!", nInput, iLvl); fflush(stdout); #endif for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, 0, iLvl, nInput, &pIter); fts5MultiIterEof(p, pIter)==0; fts5MultiIterNext(p, pIter, 0, 0) ){ Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1] ]; Fts5ChunkIter sPos; /* Used to iterate through position list */ | > > > > > > > > | | < | | > > | > | | | | | | | | | | | < | | | | | | | | | > > > | 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 | pLvlOut->nSeg++; pSeg->pgnoFirst = 1; pSeg->iSegid = iSegid; /* Read input from all segments in the input level */ nInput = pLvl->nSeg; } bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2); #if 0 fprintf(stdout, "merging %d segments from level %d!", nInput, iLvl); fflush(stdout); #endif for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, 0, iLvl, nInput, &pIter); fts5MultiIterEof(p, pIter)==0; fts5MultiIterNext(p, pIter, 0, 0) ){ Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1] ]; Fts5ChunkIter sPos; /* Used to iterate through position list */ /* If the segment being written is the oldest in the entire index and ** the position list is empty (i.e. the entry is a delete marker), no ** entry need be written to the output. */ fts5ChunkIterInit(p, pSeg, &sPos); if( bOldest==0 || sPos.nRem>0 ){ int nTerm; const u8 *pTerm = fts5MultiIterTerm(pIter, &nTerm); if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){ if( writer.nLeafWritten>nRem ){ fts5ChunkIterRelease(&sPos); break; } /* This is a new term. Append a term to the output segment. */ if( bRequireDoclistTerm ){ fts5WriteAppendZerobyte(p, &writer); } fts5WriteAppendTerm(p, &writer, nTerm, pTerm); fts5BufferSet(&p->rc, &term, nTerm, pTerm); bRequireDoclistTerm = 1; } /* Append the rowid to the output */ fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter)); /* Copy the position list from input to output */ fts5WriteAppendPoslistInt(p, &writer, sPos.nRem); for(/* noop */; !fts5ChunkIterEof(p, &sPos); fts5ChunkIterNext(p, &sPos)){ int iOff = 0; while( iOff<sPos.n ){ int iVal; iOff += getVarint32(&sPos.p[iOff], iVal); fts5WriteAppendPoslistInt(p, &writer, iVal); } } } fts5ChunkIterRelease(&sPos); } /* Flush the last leaf page to disk. Set the output segment b-tree height ** and last leaf page number at the same time. */ fts5WriteFinish(p, &writer, &pSeg->nHeight, &pSeg->pgnoLast); if( fts5MultiIterEof(p, pIter) ){ |
︙ | ︙ | |||
3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 | /* Remove the redundant segments from the input level */ if( pLvl->nSeg!=nInput ){ int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment); memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove); } pLvl->nSeg -= nInput; pLvl->nMerge = 0; }else{ fts5TrimSegments(p, pIter); pLvl->nMerge = nInput; } fts5MultiIterFree(p, pIter); fts5BufferFree(&term); *pnRem -= writer.nLeafWritten; | > > > > | 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 | /* Remove the redundant segments from the input level */ if( pLvl->nSeg!=nInput ){ int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment); memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove); } pLvl->nSeg -= nInput; pLvl->nMerge = 0; if( pSeg->pgnoLast==0 ){ pLvlOut->nSeg--; } }else{ assert( pSeg->nHeight>0 && pSeg->pgnoLast>0 ); fts5TrimSegments(p, pIter); pLvl->nMerge = nInput; } fts5MultiIterFree(p, pIter); fts5BufferFree(&term); *pnRem -= writer.nLeafWritten; |
︙ | ︙ | |||
3091 3092 3093 3094 3095 3096 3097 | nWrite = pStruct->nWriteCounter; nWork = ((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit); pStruct->nWriteCounter += nLeaf; nRem = p->nWorkUnit * nWork * pStruct->nLevel; while( nRem>0 ){ int iLvl; /* To iterate through levels */ | | > | > > > > > > | 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 | nWrite = pStruct->nWriteCounter; nWork = ((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit); pStruct->nWriteCounter += nLeaf; nRem = p->nWorkUnit * nWork * pStruct->nLevel; while( nRem>0 ){ int iLvl; /* To iterate through levels */ int iBestLvl = 0; /* Level offering the most input segments */ int nBest = 0; /* Number of input segments on best level */ /* Set iBestLvl to the level to read input segments from. */ assert( pStruct->nLevel>0 ); for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; if( pLvl->nMerge ){ if( pLvl->nMerge>nBest ){ iBestLvl = iLvl; nBest = pLvl->nMerge; } break; } if( pLvl->nSeg>nBest ){ nBest = pLvl->nSeg; iBestLvl = iLvl; } } /* If nBest is still 0, then the index must be empty. */ #ifdef SQLITE_DEBUG for(iLvl=0; nBest==0 && iLvl<pStruct->nLevel; iLvl++){ assert( pStruct->aLevel[iLvl].nSeg==0 ); } #endif if( nBest<p->nMinMerge && pStruct->aLevel[iBestLvl].nMerge==0 ) break; fts5IndexMergeLevel(p, iIdx, pStruct, iBestLvl, &nRem); assert( nRem==0 || p->rc==SQLITE_OK ); } } |
︙ | ︙ | |||
3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 | i64 cksum1 = 13; i64 cksum2 = 13; for(fts5DlidxIterInit(p, 0, iIdx, iSegid, iLeaf, &pDlidx); fts5DlidxIterEof(p, pDlidx)==0; fts5DlidxIterNext(pDlidx) ){ cksum1 = (cksum1 ^ ( (i64)(pDlidx->iLeafPgno) << 32 )); cksum1 = (cksum1 ^ pDlidx->iRowid); } fts5DlidxIterFree(pDlidx); pDlidx = 0; for(fts5DlidxIterInit(p, 1, iIdx, iSegid, iLeaf, &pDlidx); fts5DlidxIterEof(p, pDlidx)==0; fts5DlidxIterPrev(pDlidx) ){ cksum2 = (cksum2 ^ ( (i64)(pDlidx->iLeafPgno) << 32 )); cksum2 = (cksum2 ^ pDlidx->iRowid); } fts5DlidxIterFree(pDlidx); pDlidx = 0; if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT; | > > | 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 | i64 cksum1 = 13; i64 cksum2 = 13; for(fts5DlidxIterInit(p, 0, iIdx, iSegid, iLeaf, &pDlidx); fts5DlidxIterEof(p, pDlidx)==0; fts5DlidxIterNext(pDlidx) ){ assert( pDlidx->iLeafPgno>iLeaf ); cksum1 = (cksum1 ^ ( (i64)(pDlidx->iLeafPgno) << 32 )); cksum1 = (cksum1 ^ pDlidx->iRowid); } fts5DlidxIterFree(pDlidx); pDlidx = 0; for(fts5DlidxIterInit(p, 1, iIdx, iSegid, iLeaf, &pDlidx); fts5DlidxIterEof(p, pDlidx)==0; fts5DlidxIterPrev(pDlidx) ){ assert( pDlidx->iLeafPgno>iLeaf ); cksum2 = (cksum2 ^ ( (i64)(pDlidx->iLeafPgno) << 32 )); cksum2 = (cksum2 ^ pDlidx->iRowid); } fts5DlidxIterFree(pDlidx); pDlidx = 0; if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT; |
︙ | ︙ |
Changes to test/fts5aa.test.
︙ | ︙ | |||
46 47 48 49 50 51 52 | } do_execsql_test 2.1 { INSERT INTO t1 VALUES('a b c', 'd e f'); } do_execsql_test 2.2 { SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 } { | | | 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 | } do_execsql_test 2.1 { INSERT INTO t1 VALUES('a b c', 'd e f'); } do_execsql_test 2.2 { SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 } { {{structure idx=0} {lvl=0 nMerge=0 {id=27723 h=1 leaves=1..1}}} } do_execsql_test 2.3 { INSERT INTO t1(t1) VALUES('integrity-check'); } #------------------------------------------------------------------------- # |
︙ | ︙ |
Added test/fts5aj.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 | # 2014 June 17 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # # Specifically, this tests that, provided the amount of data remains # constant, the FTS index does not grow indefinitely as rows are inserted # and deleted, # set testdir [file dirname $argv0] source $testdir/tester.tcl set testprefix fts5aj # If SQLITE_ENABLE_FTS3 is defined, omit this file. ifcapable !fts3 { finish_test return } proc doc {} { set dict [list a b c d e f g h i j k l m n o p q r s t u v w x y z] set res [list] for {set i 0} {$i < 20} {incr i} { lappend res [lindex $dict [expr int(rand() * 26)]] } set res } proc structure {} { set val [db one {SELECT fts5_decode(rowid,block) FROM t1_data WHERE rowid=10}] foreach lvl [lrange $val 1 end] { lappend res [expr [llength $lvl]-2] } set res } expr srand(0) do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(x); INSERT INTO t1(t1) VALUES('pgsz=64'); } for {set iTest 0} {$iTest < 50000} {incr iTest} { if {$iTest > 1000} { execsql { DELETE FROM t1 WHERE rowid=($iTest-1000) } } set new [doc] execsql { INSERT INTO t1 VALUES($new) } if {$iTest==10000} { set sz1 [db one {SELECT count(*) FROM t1_data}] } if {0==($iTest % 1000)} { set sz [db one {SELECT count(*) FROM t1_data}] set s [structure] do_test 1.$iTest.$sz.{$s} {} {} } } #db eval { SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r} do_execsql_test 2.0 { INSERT INTO t1(t1) VALUES('integrity-check') } finish_test |
Changes to test/permutations.test.
︙ | ︙ | |||
222 223 224 225 226 227 228 | fts4growth.test fts4growth2.test } test_suite "fts5" -prefix "" -description { All FTS5 tests. } -files { fts5aa.test fts5ab.test fts5ac.test fts5ad.test fts5ae.test fts5ea.test | | | 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 | fts4growth.test fts4growth2.test } test_suite "fts5" -prefix "" -description { All FTS5 tests. } -files { fts5aa.test fts5ab.test fts5ac.test fts5ad.test fts5ae.test fts5ea.test fts5af.test fts5ag.test fts5ah.test fts5ai.test fts5aj.test } test_suite "nofaultsim" -prefix "" -description { "Very" quick test suite. Runs in less than 5 minutes on a workstation. This test suite is the same as the "quick" tests, except that some files that test malloc and IO errors are omitted. } -files [ |
︙ | ︙ |