Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Fix a performance problem in the FTS4 auxiliary functions triggered by an OR clause in the full-text query. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
fa0f2f0e3e79ae653118b901e1cca772 |
User & Date: | dan 2013-09-30 18:14:45.059 |
Context
2013-09-30
| ||
19:05 | Add some timing tests to the amatch test script. (check-in: ad71c72be5 user: drh tags: trunk) | |
18:16 | Merge trunk changes with this branch. (check-in: e294a9c7c5 user: dan tags: fts4-docid-range-constraints) | |
18:14 | Fix a performance problem in the FTS4 auxiliary functions triggered by an OR clause in the full-text query. (check-in: fa0f2f0e3e user: dan tags: trunk) | |
17:37 | Fix memory leaks in the amatch extension. Add a few simple test cases. (check-in: 604134732e user: drh tags: trunk) | |
Changes
Changes to ext/fts3/fts3.c.
︙ | ︙ | |||
5264 5265 5266 5267 5268 5269 5270 5271 5272 | return SQLITE_OK; } iDocid = pExpr->iDocid; pIter = pPhrase->doclist.pList; if( iDocid!=pCsr->iPrevId || pExpr->bEof ){ int bDescDoclist = pTab->bDescIdx; /* For DOCID_CMP macro */ int bOr = 0; u8 bEof = 0; | > > | > | > > > > > | > > | > > | > > | > > > | | > > > > > > > > > > > > > > > > | | | | | | | > > > > | | | | | > | 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 | return SQLITE_OK; } iDocid = pExpr->iDocid; pIter = pPhrase->doclist.pList; if( iDocid!=pCsr->iPrevId || pExpr->bEof ){ int bDescDoclist = pTab->bDescIdx; /* For DOCID_CMP macro */ int iMul; /* +1 if csr dir matches index dir, else -1 */ int bOr = 0; u8 bEof = 0; u8 bTreeEof = 0; Fts3Expr *p; /* Used to iterate from pExpr to root */ Fts3Expr *pNear; /* Most senior NEAR ancestor (or pExpr) */ /* Check if this phrase descends from an OR expression node. If not, ** return NULL. Otherwise, the entry that corresponds to docid ** pCsr->iPrevId may lie earlier in the doclist buffer. Or, if the ** tree that the node is part of has been marked as EOF, but the node ** itself is not EOF, then it may point to an earlier entry. */ pNear = pExpr; for(p=pExpr->pParent; p; p=p->pParent){ if( p->eType==FTSQUERY_OR ) bOr = 1; if( p->eType==FTSQUERY_NEAR ) pNear = p; if( p->bEof ) bTreeEof = 1; } if( bOr==0 ) return SQLITE_OK; /* This is the descendent of an OR node. In this case we cannot use ** an incremental phrase. Load the entire doclist for the phrase ** into memory in this case. */ if( pPhrase->bIncr ){ int rc = SQLITE_OK; int bEofSave = pExpr->bEof; fts3EvalRestart(pCsr, pExpr, &rc); while( rc==SQLITE_OK && !pExpr->bEof ){ fts3EvalNextRow(pCsr, pExpr, &rc); if( bEofSave==0 && pExpr->iDocid==iDocid ) break; } pIter = pPhrase->doclist.pList; assert( rc!=SQLITE_OK || pPhrase->bIncr==0 ); if( rc!=SQLITE_OK ) return rc; } iMul = ((pCsr->bDesc==bDescDoclist) ? 1 : -1); while( bTreeEof==1 && pNear->bEof==0 && (DOCID_CMP(pNear->iDocid, pCsr->iPrevId) * iMul)<0 ){ int rc = SQLITE_OK; fts3EvalNextRow(pCsr, pExpr, &rc); if( rc!=SQLITE_OK ) return rc; iDocid = pExpr->iDocid; pIter = pPhrase->doclist.pList; } bEof = (pPhrase->doclist.nAll==0); assert( bDescDoclist==0 || bDescDoclist==1 ); assert( pCsr->bDesc==0 || pCsr->bDesc==1 ); if( bEof==0 ){ if( pCsr->bDesc==bDescDoclist ){ int dummy; if( pNear->bEof ){ /* This expression is already at EOF. So position it to point to the ** last entry in the doclist at pPhrase->doclist.aAll[]. Variable ** iDocid is already set for this entry, so all that is required is ** to set pIter to point to the first byte of the last position-list ** in the doclist. ** ** It would also be correct to set pIter and iDocid to zero. In ** this case, the first call to sqltie3Fts4DoclistPrev() below ** would also move the iterator to point to the last entry in the ** doclist. However, this is expensive, as to do so it has to ** iterate through the entire doclist from start to finish (since ** it does not know the docid for the last entry). */ pIter = &pPhrase->doclist.aAll[pPhrase->doclist.nAll-1]; fts3ReversePoslist(pPhrase->doclist.aAll, &pIter); } while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)>0 ) && bEof==0 ){ sqlite3Fts3DoclistPrev( bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll, &pIter, &iDocid, &dummy, &bEof ); } }else{ if( pNear->bEof ){ pIter = 0; iDocid = 0; } while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)<0 ) && bEof==0 ){ sqlite3Fts3DoclistNext( bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll, &pIter, &iDocid, &bEof ); } } } if( bEof || iDocid!=pCsr->iPrevId ) pIter = 0; } if( pIter==0 ) return SQLITE_OK; |
︙ | ︙ |
Changes to test/fts3snippet.test.
︙ | ︙ | |||
12 13 14 15 16 17 18 19 20 21 22 23 24 25 | # The tests in this file test the FTS3 auxillary functions offsets(), # snippet() and matchinfo() work. At time of writing, running this file # provides full coverage of fts3_snippet.c. # set testdir [file dirname $argv0] source $testdir/tester.tcl # If SQLITE_ENABLE_FTS3 is not defined, omit this file. ifcapable !fts3 { finish_test ; return } source $testdir/fts3_common.tcl set sqlite_fts3_enable_parentheses 1 set DO_MALLOC_TEST 0 | > | 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 | # The tests in this file test the FTS3 auxillary functions offsets(), # snippet() and matchinfo() work. At time of writing, running this file # provides full coverage of fts3_snippet.c. # set testdir [file dirname $argv0] source $testdir/tester.tcl set testprefix fts3snippet # If SQLITE_ENABLE_FTS3 is not defined, omit this file. ifcapable !fts3 { finish_test ; return } source $testdir/fts3_common.tcl set sqlite_fts3_enable_parentheses 1 set DO_MALLOC_TEST 0 |
︙ | ︙ | |||
134 135 136 137 138 139 140 | forcedelete test.db sqlite3 db test.db sqlite3_db_config_lookaside db 0 0 0 db eval "PRAGMA encoding = \"$enc\"" # Set variable $T to the test name prefix for this iteration of the loop. # | | | 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 | forcedelete test.db sqlite3 db test.db sqlite3_db_config_lookaside db 0 0 0 db eval "PRAGMA encoding = \"$enc\"" # Set variable $T to the test name prefix for this iteration of the loop. # set T "fts3snippet-1.$enc" ########################################################################## # Test the offset function. # do_test $T.1.1 { execsql { CREATE VIRTUAL TABLE ft USING fts3; |
︙ | ︙ | |||
454 455 456 457 458 459 460 461 462 463 | do_select_test $T.10.5 { SELECT length(matchinfo(ft)), typeof(matchinfo(ft)) FROM ft; } {0 blob 0 blob 0 blob} do_select_test $T.10.6 { SELECT length(matchinfo(ft)), typeof(matchinfo(ft)) FROM ft WHERE rowid = $r } {0 blob} } set sqlite_fts3_enable_parentheses 0 finish_test | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 | do_select_test $T.10.5 { SELECT length(matchinfo(ft)), typeof(matchinfo(ft)) FROM ft; } {0 blob 0 blob 0 blob} do_select_test $T.10.6 { SELECT length(matchinfo(ft)), typeof(matchinfo(ft)) FROM ft WHERE rowid = $r } {0 blob} } #------------------------------------------------------------------------- # Test an interaction between the snippet() function and OR clauses. # do_execsql_test 2.1 { CREATE VIRTUAL TABLE t2 USING fts4; INSERT INTO t2 VALUES('one two three four five'); INSERT INTO t2 VALUES('two three four five one'); INSERT INTO t2 VALUES('three four five one two'); INSERT INTO t2 VALUES('four five one two three'); INSERT INTO t2 VALUES('five one two three four'); } do_execsql_test 2.2 { SELECT snippet(t2, '[', ']') FROM t2 WHERE t2 MATCH 'one OR (four AND six)' } { {[one] two three [four] five} {two three [four] five [one]} {three [four] five [one] two} {[four] five [one] two three} {five [one] two three [four]} } do_execsql_test 2.3 { SELECT snippet(t2, '[', ']') FROM t2 WHERE t2 MATCH 'one OR (four AND six)' ORDER BY docid DESC } { {five [one] two three [four]} {[four] five [one] two three} {three [four] five [one] two} {two three [four] five [one]} {[one] two three [four] five} } do_execsql_test 2.4 { INSERT INTO t2 VALUES('six'); } do_execsql_test 2.5 { SELECT snippet(t2, '[', ']') FROM t2 WHERE t2 MATCH 'one OR (four AND six)' } { {[one] two three [four] five} {two three [four] five [one]} {three [four] five [one] two} {[four] five [one] two three} {five [one] two three [four]} } do_execsql_test 2.6 { SELECT snippet(t2, '[', ']') FROM t2 WHERE t2 MATCH 'one OR (four AND six)' ORDER BY docid DESC } { {five [one] two three [four]} {[four] five [one] two three} {three [four] five [one] two} {two three [four] five [one]} {[one] two three [four] five} } set sqlite_fts3_enable_parentheses 0 finish_test |