Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Fix a problem preventing delete markers from ever being removed from the FTS index. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts4-experimental |
Files: | files | file ages | folders |
SHA1: |
7f47ae5c5ddb1227484ddae7c6960183 |
User & Date: | dan 2014-05-13 20:11:37.423 |
Context
2014-05-14
| ||
15:58 | Fix various problems to do with segment promotion. Add test file fts4growth2.test, containing tests to check that the FTS index does not grow indefinitely as the table is updated. Allow the user to configure the number of segments merged simultaneously by the automerge option. (check-in: 21491a9bc6 user: dan tags: fts4-experimental) | |
2014-05-13
| ||
20:11 | Fix a problem preventing delete markers from ever being removed from the FTS index. (check-in: 7f47ae5c5d user: dan tags: fts4-experimental) | |
2014-05-12
| ||
20:04 | Experimental code to prevent FTS indexes from growing indefinitely as the table is updated. (check-in: b3b505a4dd user: dan tags: fts4-experimental) | |
Changes
Changes to ext/fts3/fts3_write.c.
︙ | ︙ | |||
378 379 380 381 382 383 384 | /* Return segments in order from oldest to newest.*/ /* 37 */ "SELECT level, idx, end_block " "FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?" "ORDER BY level DESC, idx ASC", /* Update statements used while promoting segments */ | | > | | 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 | /* Return segments in order from oldest to newest.*/ /* 37 */ "SELECT level, idx, end_block " "FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?" "ORDER BY level DESC, idx ASC", /* Update statements used while promoting segments */ /* 38 */ "UPDATE OR FAIL %Q.'%q_segdir' SET level=-1,idx=? " "WHERE level=? AND idx=?", /* 39 */ "UPDATE OR FAIL %Q.'%q_segdir' SET level=? WHERE level=-1" }; int rc = SQLITE_OK; sqlite3_stmt *pStmt; assert( SizeofArray(azSql)==SizeofArray(p->aStmt) ); assert( eStmt<SizeofArray(azSql) && eStmt>=0 ); |
︙ | ︙ | |||
3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 | int rc; /* Return code */ int iIdx = 0; /* Index of new segment */ sqlite3_int64 iNewLevel = 0; /* Level/index to create new segment at */ SegmentWriter *pWriter = 0; /* Used to write the new, merged, segment */ Fts3SegFilter filter; /* Segment term filter condition */ Fts3MultiSegReader csr; /* Cursor to iterate through level(s) */ int bIgnoreEmpty = 0; /* True to ignore empty segments */ assert( iLevel==FTS3_SEGCURSOR_ALL || iLevel==FTS3_SEGCURSOR_PENDING || iLevel>=0 ); assert( iLevel<FTS3_SEGDIR_MAXLEVEL ); assert( iIndex>=0 && iIndex<p->nIndex ); rc = sqlite3Fts3SegReaderCursor(p, iLangid, iIndex, iLevel, 0, 0, 1, 0, &csr); if( rc!=SQLITE_OK || csr.nSegment==0 ) goto finished; if( iLevel==FTS3_SEGCURSOR_ALL ){ /* This call is to merge all segments in the database to a single ** segment. The level of the new segment is equal to the numerically ** greatest segment level currently present in the database for this ** index. The idx of the new segment is always 0. */ if( csr.nSegment==1 ){ rc = SQLITE_DONE; goto finished; } | > > > > > > | < < < > > | > | > | | | > | 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 | int rc; /* Return code */ int iIdx = 0; /* Index of new segment */ sqlite3_int64 iNewLevel = 0; /* Level/index to create new segment at */ SegmentWriter *pWriter = 0; /* Used to write the new, merged, segment */ Fts3SegFilter filter; /* Segment term filter condition */ Fts3MultiSegReader csr; /* Cursor to iterate through level(s) */ int bIgnoreEmpty = 0; /* True to ignore empty segments */ i64 iMaxLevel = 0; /* Max level number for this index/langid */ assert( iLevel==FTS3_SEGCURSOR_ALL || iLevel==FTS3_SEGCURSOR_PENDING || iLevel>=0 ); assert( iLevel<FTS3_SEGDIR_MAXLEVEL ); assert( iIndex>=0 && iIndex<p->nIndex ); rc = sqlite3Fts3SegReaderCursor(p, iLangid, iIndex, iLevel, 0, 0, 1, 0, &csr); if( rc!=SQLITE_OK || csr.nSegment==0 ) goto finished; if( iLevel!=FTS3_SEGCURSOR_PENDING ){ rc = fts3SegmentMaxLevel(p, iLangid, iIndex, &iMaxLevel); if( rc!=SQLITE_OK ) goto finished; } if( iLevel==FTS3_SEGCURSOR_ALL ){ /* This call is to merge all segments in the database to a single ** segment. The level of the new segment is equal to the numerically ** greatest segment level currently present in the database for this ** index. The idx of the new segment is always 0. */ if( csr.nSegment==1 ){ rc = SQLITE_DONE; goto finished; } iNewLevel = iMaxLevel; bIgnoreEmpty = 1; }else{ /* This call is to merge all segments at level iLevel. find the next ** available segment index at level iLevel+1. The call to ** fts3AllocateSegdirIdx() will merge the segments at level iLevel+1 to ** a single iLevel+2 segment if necessary. */ assert( FTS3_SEGCURSOR_PENDING==-1 ); iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, iLevel+1); rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, iLevel+1, &iIdx); bIgnoreEmpty = (iLevel!=FTS3_SEGCURSOR_PENDING) && (iNewLevel>iMaxLevel); } if( rc!=SQLITE_OK ) goto finished; assert( csr.nSegment>0 ); assert( iNewLevel>=getAbsoluteLevel(p, iLangid, iIndex, 0) ); assert( iNewLevel<getAbsoluteLevel(p, iLangid, iIndex,FTS3_SEGDIR_MAXLEVEL) ); memset(&filter, 0, sizeof(Fts3SegFilter)); filter.flags = FTS3_SEGMENT_REQUIRE_POS; filter.flags |= (bIgnoreEmpty ? FTS3_SEGMENT_IGNORE_EMPTY : 0); rc = sqlite3Fts3SegReaderStart(p, &csr, &filter); while( SQLITE_OK==rc ){ rc = sqlite3Fts3SegReaderStep(p, &csr); if( rc!=SQLITE_ROW ) break; rc = fts3SegWriterAdd(p, &pWriter, 1, csr.zTerm, csr.nTerm, csr.aDoclist, csr.nDoclist); } if( rc!=SQLITE_OK ) goto finished; assert( pWriter || bIgnoreEmpty ); if( iLevel!=FTS3_SEGCURSOR_PENDING ){ rc = fts3DeleteSegdir( p, iLangid, iIndex, iLevel, csr.apSegment, csr.nSegment ); if( rc!=SQLITE_OK ) goto finished; } if( pWriter ){ rc = fts3SegWriterFlush(p, pWriter, iNewLevel, iIdx); if( rc==SQLITE_OK ){ rc = fts3PromoteSegments(p, iNewLevel, pWriter->nLeafData); } } finished: fts3SegWriterFree(pWriter); sqlite3Fts3SegReaderFinish(&csr); return rc; } |
︙ | ︙ | |||
4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 | pCsr = (Fts3MultiSegReader *)&pFilter[1]; rc = fts3IncrmergeHintLoad(p, &hint); while( rc==SQLITE_OK && nRem>0 ){ const i64 nMod = FTS3_SEGDIR_MAXLEVEL * p->nIndex; sqlite3_stmt *pFindLevel = 0; /* SQL used to determine iAbsLevel */ int bUseHint = 0; /* True if attempting to append */ /* Search the %_segdir table for the absolute level with the smallest ** relative level number that contains at least nMin segments, if any. ** If one is found, set iAbsLevel to the absolute level number and ** nSeg to nMin. If no level with at least nMin segments can be found, ** set nSeg to -1. */ | > | 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 | pCsr = (Fts3MultiSegReader *)&pFilter[1]; rc = fts3IncrmergeHintLoad(p, &hint); while( rc==SQLITE_OK && nRem>0 ){ const i64 nMod = FTS3_SEGDIR_MAXLEVEL * p->nIndex; sqlite3_stmt *pFindLevel = 0; /* SQL used to determine iAbsLevel */ int bUseHint = 0; /* True if attempting to append */ int iIdx = 0; /* Largest idx in level (iAbsLevel+1) */ /* Search the %_segdir table for the absolute level with the smallest ** relative level number that contains at least nMin segments, if any. ** If one is found, set iAbsLevel to the absolute level number and ** nSeg to nMin. If no level with at least nMin segments can be found, ** set nSeg to -1. */ |
︙ | ︙ | |||
4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 | ** indexes of absolute level iAbsLevel. If this cursor is opened using ** the 'hint' parameters, it is possible that there are less than nSeg ** segments available in level iAbsLevel. In this case, no work is ** done on iAbsLevel - fall through to the next iteration of the loop ** to start work on some other level. */ memset(pWriter, 0, nAlloc); pFilter->flags = FTS3_SEGMENT_REQUIRE_POS; if( rc==SQLITE_OK ){ rc = fts3IncrmergeCsr(p, iAbsLevel, nSeg, pCsr); } if( SQLITE_OK==rc && pCsr->nSegment==nSeg && SQLITE_OK==(rc = sqlite3Fts3SegReaderStart(p, pCsr, pFilter)) && SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pCsr)) ){ | > > > > > > < < < | | | | | | < | 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 | ** indexes of absolute level iAbsLevel. If this cursor is opened using ** the 'hint' parameters, it is possible that there are less than nSeg ** segments available in level iAbsLevel. In this case, no work is ** done on iAbsLevel - fall through to the next iteration of the loop ** to start work on some other level. */ memset(pWriter, 0, nAlloc); pFilter->flags = FTS3_SEGMENT_REQUIRE_POS; if( rc==SQLITE_OK ){ rc = fts3IncrmergeOutputIdx(p, iAbsLevel, &iIdx); assert( bUseHint==1 || bUseHint==0 ); if( (iIdx-bUseHint)==0 ) pFilter->flags |= FTS3_SEGMENT_IGNORE_EMPTY; } if( rc==SQLITE_OK ){ rc = fts3IncrmergeCsr(p, iAbsLevel, nSeg, pCsr); } if( SQLITE_OK==rc && pCsr->nSegment==nSeg && SQLITE_OK==(rc = sqlite3Fts3SegReaderStart(p, pCsr, pFilter)) && SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pCsr)) ){ if( bUseHint && iIdx>0 ){ const char *zKey = pCsr->zTerm; int nKey = pCsr->nTerm; rc = fts3IncrmergeLoad(p, iAbsLevel, iIdx-1, zKey, nKey, pWriter); }else{ rc = fts3IncrmergeWriter(p, iAbsLevel, iIdx, pCsr, pWriter); } if( rc==SQLITE_OK && pWriter->nLeafEst ){ fts3LogMerge(nSeg, iAbsLevel); do { rc = fts3IncrmergeAppend(p, pWriter, pCsr); if( rc==SQLITE_OK ) rc = sqlite3Fts3SegReaderStep(p, pCsr); |
︙ | ︙ |
Changes to test/fts3d.test.
︙ | ︙ | |||
209 210 211 212 213 214 215 | SELECT OFFSETS(t1) FROM t1 WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY docid; } } [list {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4} \ {0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4} \ {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4}] | > | | | | | | | | | | 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 | SELECT OFFSETS(t1) FROM t1 WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY docid; } } [list {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4} \ {0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4} \ {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4}] puts [db eval {SELECT c FROM t1 } ] check_terms_all fts3d-4.1 {a four is test that this was} check_doclist_all fts3d-4.1.1 a {[1 0[2]] [2 0[2]] [3 0[2]]} check_doclist_all fts3d-4.1.2 four {} check_doclist_all fts3d-4.1.3 is {[1 0[1]] [3 0[1]]} #check_doclist_all fts3d-4.1.4 one {} check_doclist_all fts3d-4.1.5 test {[1 0[3]] [2 0[3]] [3 0[3]]} check_doclist_all fts3d-4.1.6 that {[2 0[0]]} check_doclist_all fts3d-4.1.7 this {[1 0[0]] [3 0[0]]} #check_doclist_all fts3d-4.1.8 three {} #check_doclist_all fts3d-4.1.9 two {} check_doclist_all fts3d-4.1.10 was {[2 0[1]]} check_terms fts3d-4.2 0 0 {a four test that was} check_doclist fts3d-4.2.1 0 0 a {[2 0[2]]} check_doclist fts3d-4.2.2 0 0 four {[2]} check_doclist fts3d-4.2.3 0 0 test {[2 0[3]]} check_doclist fts3d-4.2.4 0 0 that {[2 0[0]]} check_doclist fts3d-4.2.5 0 0 was {[2 0[1]]} check_terms fts3d-4.3 0 1 {a four is test this} check_doclist fts3d-4.3.1 0 1 a {[3 0[2]]} check_doclist fts3d-4.3.2 0 1 four {[3]} check_doclist fts3d-4.3.3 0 1 is {[3 0[1]]} check_doclist fts3d-4.3.4 0 1 test {[3 0[3]]} check_doclist fts3d-4.3.5 0 1 this {[3 0[0]]} check_terms fts3d-4.4 1 0 {a four is test that this was} check_doclist fts3d-4.4.1 1 0 a {[1 0[2]] [2 0[2]] [3 0[2]]} check_doclist fts3d-4.4.2 1 0 four {[2 0[4]] [3 0[4]]} check_doclist fts3d-4.4.3 1 0 is {[1 0[1]] [3 0[1]]} #check_doclist fts3d-4.4.4 1 0 one {[1] [2] [3]} check_doclist fts3d-4.4.5 1 0 test {[1 0[3]] [2 0[3]] [3 0[3]]} check_doclist fts3d-4.4.6 1 0 that {[2 0[0]]} check_doclist fts3d-4.4.7 1 0 this {[1 0[0]] [3 0[0]]} #check_doclist fts3d-4.4.8 1 0 three {[1] [2] [3]} #check_doclist fts3d-4.4.9 1 0 two {[1] [2] [3]} check_doclist fts3d-4.4.10 1 0 was {[2 0[1]]} # Optimize should leave the result in the level of the highest-level # prior segment. do_test fts3d-4.5 { execsql { SELECT OPTIMIZE(t1) FROM t1 LIMIT 1; |
︙ | ︙ |
Changes to test/fts4growth.test.
︙ | ︙ | |||
147 148 149 150 151 152 153 | do_execsql_test 2.8 { SELECT sum(length(block)) FROM x2_segdir, x2_segments WHERE blockid BETWEEN start_block AND leaves_end_block AND level=3 } {86120} #-------------------------------------------------------------------------- | > > > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 | do_execsql_test 2.8 { SELECT sum(length(block)) FROM x2_segdir, x2_segments WHERE blockid BETWEEN start_block AND leaves_end_block AND level=3 } {86120} #-------------------------------------------------------------------------- # Test that delete markers are removed from FTS segments when possible. # It is only possible to remove delete markers when the output of the # merge operation will become the oldest segment in the index. # # 3.1 - when the oldest segment is created by an 'optimize'. # 3.2 - when the oldest segment is created by an incremental merge. # 3.3 - by a crisis merge. # proc insert_doc {args} { foreach iDoc $args { set L [lindex { {In your eagerness to engage the Trojans,} {don’t any of you charge ahead of others,} {trusting in your strength and horsemanship.} {And don’t lag behind. That will hurt our charge.} {Any man whose chariot confronts an enemy’s} {should thrust with his spear at him from there.} {That’s the most effective tactic, the way} {men wiped out city strongholds long ago —} {their chests full of that style and spirit.} } [expr $iDoc%9]] execsql { REPLACE INTO x3(docid, content) VALUES($iDoc, $L) } } } proc delete_doc {args} { foreach iDoc $args { execsql { DELETE FROM x3 WHERE docid = $iDoc } } } proc second {x} { lindex $x 1 } db func second second do_execsql_test 3.0 { CREATE VIRTUAL TABLE x3 USING fts4 } do_test 3.1.1 { db transaction { insert_doc 1 2 3 4 5 6 } execsql { SELECT level, idx, second(end_block) FROM x3_segdir } } {0 0 412} do_test 3.1.2 { delete_doc 1 2 3 4 5 6 execsql { SELECT count(*) FROM x3_segdir } } {0} do_test 3.1.3 { db transaction { insert_doc 1 2 3 4 5 6 7 8 9 delete_doc 9 8 7 } execsql { SELECT level, idx, second(end_block) FROM x3_segdir } } {0 0 591 0 1 65 0 2 72 0 3 76} do_test 3.1.4 { execsql { INSERT INTO x3(x3) VALUES('optimize') } execsql { SELECT level, idx, second(end_block) FROM x3_segdir } } {0 0 412} do_test 3.2.1 { execsql { DELETE FROM x3 } insert_doc 8 7 6 5 4 3 2 1 delete_doc 7 8 execsql { SELECT count(*) FROM x3_segdir } } {10} do_test 3.2.2 { execsql { INSERT INTO x3(x3) VALUES('merge=500,10') } execsql { SELECT level, idx, second(end_block) FROM x3_segdir } } {1 0 412} # This assumes the crisis merge happens when there are already 16 # segments and one more is added. # do_test 3.3.1 { execsql { DELETE FROM x3 } insert_doc 1 2 3 4 5 6 7 8 9 10 11 delete_doc 11 10 9 8 7 execsql { SELECT count(*) FROM x3_segdir } } {16} do_test 3.3.2 { insert_doc 12 execsql { SELECT level, idx, second(end_block) FROM x3_segdir WHERE level=1 } } {1 0 412} #-------------------------------------------------------------------------- do_execsql_test 4.1 { DROP TABLE IF EXISTS x4; DROP TABLE IF EXISTS t1; CREATE TABLE t1(docid, words); CREATE VIRTUAL TABLE x4 USING fts4(words); } do_test 4.2 { fts_kjv_genesis execsql { INSERT INTO x4 SELECT words FROM t1 } execsql { INSERT INTO x4 SELECT words FROM t1 } } {} do_execsql_test 4.3 { SELECT level, idx, second(end_block) FROM x4_segdir } {0 0 117483 0 1 118006} do_execsql_test 4.4 { INSERT INTO x4(x4) VALUES('merge=10,2'); SELECT count(*) FROM x4_segdir; } {3} breakpoint do_execsql_test 4.5 { INSERT INTO x4(x4) VALUES('merge=10,2'); SELECT count(*) FROM x4_segdir; } {3} if 0 { do_execsql_test 3.1 { DROP TABLE IF EXISTS x2; DROP TABLE IF EXISTS t1; CREATE TABLE t1(docid, words); CREATE VIRTUAL TABLE x2 USING fts4; } fts_kjv_genesis |
︙ | ︙ | |||
170 171 172 173 174 175 176 | } #do_test 3.2 { #t1_to_x2 #execsql {SELECT level, count(*) FROM x2_segdir GROUP BY level} #} {0 13 1 15 2 5} | | | | | | | | | < < > > > | 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 | } #do_test 3.2 { #t1_to_x2 #execsql {SELECT level, count(*) FROM x2_segdir GROUP BY level} #} {0 13 1 15 2 5} proc second {x} { lindex $x 1 } db func second second for {set i 0} {$i <1000} {incr i} { t1_to_x2 db eval { SELECT level, group_concat( second(end_block), ' ' ) AS c FROM x2_segdir GROUP BY level; } { puts "$i.$level: $c" } } } finish_test |