Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Fix problems to do with using both OR and NEAR operators in a single expression. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts3-prefix-search |
Files: | files | file ages | folders |
SHA1: |
4e8dd19eef04777d800977faf1859a40 |
User & Date: | dan 2011-06-09 10:48:02.352 |
Context
2011-06-13
| ||
09:11 | Fix a bug exposed by combining matchinfo(), NEAR and "ORDER BY rowid DESC". (check-in: 5f6b87f420 user: dan tags: fts3-prefix-search) | |
2011-06-09
| ||
10:48 | Fix problems to do with using both OR and NEAR operators in a single expression. (check-in: 4e8dd19eef user: dan tags: fts3-prefix-search) | |
2011-06-08
| ||
18:39 | Fix various issues to do with deferred tokens, NEAR expressions and matchinfo(). (check-in: 3972a787df user: dan tags: fts3-prefix-search) | |
Changes
Changes to ext/fts3/fts3.c.
︙ | ︙ | |||
3759 3760 3761 3762 3763 3764 3765 | } } fts3EvalStartReaders(pCsr, pExpr, bOptOk, &rc); return rc; } | | > | | | < > > | 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 | } } fts3EvalStartReaders(pCsr, pExpr, bOptOk, &rc); return rc; } static void fts3EvalZeroPoslist(Fts3Phrase *pPhrase){ if( pPhrase->doclist.bFreeList ){ sqlite3_free(pPhrase->doclist.pList); } pPhrase->doclist.pList = 0; pPhrase->doclist.nList = 0; pPhrase->doclist.bFreeList = 0; } static int fts3EvalNearTrim2( int nNear, char *aTmp, /* Temporary space to use */ char **paPoslist, /* IN/OUT: Position list */ int *pnToken, /* IN/OUT: Tokens in phrase of *paPoslist */ Fts3Phrase *pPhrase /* The phrase object to trim the doclist of */ ){ int nParam1 = nNear + pPhrase->nToken; int nParam2 = nNear + *pnToken; int nNew; char *p2; char *pOut; int res; assert( pPhrase->doclist.pList ); p2 = pOut = pPhrase->doclist.pList; res = fts3PoslistNearMerge( &pOut, aTmp, nParam1, nParam2, paPoslist, &p2 ); if( res ){ nNew = (pOut - pPhrase->doclist.pList) - 1; |
︙ | ︙ | |||
3955 3956 3957 3958 3959 3960 3961 | Fts3Expr *pLeft = pExpr->pLeft; Fts3Expr *pRight = pExpr->pRight; if( pRight->bStart==0 ){ fts3EvalNext(pCsr, pRight, pRc); assert( *pRc!=SQLITE_OK || pRight->bStart ); } | | | | < > | | | | | | > | > > > | | > > | > | > > > > | > | 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 | Fts3Expr *pLeft = pExpr->pLeft; Fts3Expr *pRight = pExpr->pRight; if( pRight->bStart==0 ){ fts3EvalNext(pCsr, pRight, pRc); assert( *pRc!=SQLITE_OK || pRight->bStart ); } fts3EvalNext(pCsr, pLeft, pRc); if( pLeft->bEof==0 ){ while( !*pRc && !pRight->bEof && DOCID_CMP(pLeft->iDocid, pRight->iDocid)>0 ){ fts3EvalNext(pCsr, pRight, pRc); } } pExpr->iDocid = pLeft->iDocid; pExpr->bEof = pLeft->bEof; break; } default: { Fts3Phrase *pPhrase = pExpr->pPhrase; fts3EvalZeroPoslist(pPhrase); *pRc = fts3EvalPhraseNext(pCsr, pPhrase, &pExpr->bEof); pExpr->iDocid = pPhrase->doclist.iDocid; break; } } } } static int fts3EvalDeferredTest(Fts3Cursor *pCsr, Fts3Expr *pExpr, int *pRc){ int bHit = 1; if( *pRc==SQLITE_OK ){ switch( pExpr->eType ){ case FTSQUERY_NEAR: case FTSQUERY_AND: bHit = ( fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc) && fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc) && fts3EvalNearTest(pExpr, pRc) ); /* If the NEAR expression does not match any rows, zero the doclist for ** all phrases involved in the NEAR. This is because the snippet(), ** offsets() and matchinfo() functions are not supposed to recognize ** any instances of phrases that are part of unmatched NEAR queries. ** For example if this expression: ** ** ... MATCH 'a OR (b NEAR c)' ** ** is matched against a row containing: ** ** 'a b d e' ** ** then any snippet() should ony highlight the "a" term, not the "b" ** (as "b" is part of a non-matching NEAR clause). */ if( bHit==0 && pExpr->eType==FTSQUERY_NEAR && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR) ){ Fts3Expr *p; for(p=pExpr; p->pPhrase==0; p=p->pLeft){ if( p->pRight->iDocid==pCsr->iPrevId ){ fts3EvalZeroPoslist(p->pRight->pPhrase); } } if( p->iDocid==pCsr->iPrevId ){ fts3EvalZeroPoslist(p->pPhrase); } } break; case FTSQUERY_OR: { int bHit1 = fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc); int bHit2 = fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc); bHit = bHit1 || bHit2; break; } case FTSQUERY_NOT: bHit = ( fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc) && !fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc) ); break; default: { if( pCsr->pDeferred && (pExpr->iDocid==pCsr->iPrevId || pExpr->bDeferred) ){ Fts3Phrase *pPhrase = pExpr->pPhrase; assert( pExpr->bDeferred || pPhrase->doclist.bFreeList==0 ); if( pExpr->bDeferred ){ fts3EvalZeroPoslist(pPhrase); } *pRc = fts3EvalDeferredPhrase(pCsr, pPhrase); bHit = (pPhrase->doclist.pList!=0); pExpr->iDocid = pCsr->iPrevId; }else{ bHit = (pExpr->bEof==0 && pExpr->iDocid==pCsr->iPrevId); } break; |
︙ | ︙ | |||
4112 4113 4114 4115 4116 4117 4118 | Fts3Expr *pExpr, int *pRc ){ if( pExpr && *pRc==SQLITE_OK ){ Fts3Phrase *pPhrase = pExpr->pPhrase; if( pPhrase ){ | | | 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 | Fts3Expr *pExpr, int *pRc ){ if( pExpr && *pRc==SQLITE_OK ){ Fts3Phrase *pPhrase = pExpr->pPhrase; if( pPhrase ){ fts3EvalZeroPoslist(pPhrase); if( pPhrase->bIncr ){ sqlite3Fts3EvalPhraseCleanup(pPhrase); memset(&pPhrase->doclist, 0, sizeof(Fts3Doclist)); *pRc = sqlite3Fts3EvalStart(pCsr, pExpr, 0); }else{ pPhrase->doclist.pNextDocid = 0; pPhrase->doclist.iDocid = 0; |
︙ | ︙ | |||
4168 4169 4170 4171 4172 4173 4174 | } fts3EvalUpdateCounts(pCsr, pExpr->pLeft, pRc); fts3EvalUpdateCounts(pCsr, pExpr->pRight, pRc); } } | | | 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 | } fts3EvalUpdateCounts(pCsr, pExpr->pLeft, pRc); fts3EvalUpdateCounts(pCsr, pExpr->pRight, pRc); } } static int fts3EvalGatherStats( Fts3Cursor *pCsr, Fts3Expr *pExpr ){ int rc = SQLITE_OK; /* Return code */ assert( pExpr->eType==FTSQUERY_PHRASE ); if( pExpr->aMI==0 ){ |
︙ | ︙ | |||
4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 | /* Find the root of the NEAR expression */ pRoot = pExpr; while( pRoot->pParent && pRoot->pParent->eType==FTSQUERY_NEAR ){ pRoot = pRoot->pParent; } iDocid = pRoot->iDocid; bEof = pRoot->bEof; /* Allocate space for the aMSI[] array of each FTSQUERY_PHRASE node */ for(p=pRoot; p; p=p->pLeft){ Fts3Expr *pE = (p->eType==FTSQUERY_PHRASE?p:p->pRight); assert( pE->aMI==0 ); pE->aMI = (u32 *)sqlite3_malloc(pTab->nColumn * 3 * sizeof(u32)); if( !pE->aMI ) return SQLITE_NOMEM; | > | 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 | /* Find the root of the NEAR expression */ pRoot = pExpr; while( pRoot->pParent && pRoot->pParent->eType==FTSQUERY_NEAR ){ pRoot = pRoot->pParent; } iDocid = pRoot->iDocid; bEof = pRoot->bEof; assert( pRoot->bStart ); /* Allocate space for the aMSI[] array of each FTSQUERY_PHRASE node */ for(p=pRoot; p; p=p->pLeft){ Fts3Expr *pE = (p->eType==FTSQUERY_PHRASE?p:p->pRight); assert( pE->aMI==0 ); pE->aMI = (u32 *)sqlite3_malloc(pTab->nColumn * 3 * sizeof(u32)); if( !pE->aMI ) return SQLITE_NOMEM; |
︙ | ︙ | |||
4232 4233 4234 4235 4236 4237 4238 4239 | pCsr->isEof = 0; pCsr->iPrevId = iPrevId; if( bEof ){ pRoot->bEof = bEof; }else{ fts3EvalRestart(pCsr, pRoot, &rc); | > > > > > > < > < > | 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 | pCsr->isEof = 0; pCsr->iPrevId = iPrevId; if( bEof ){ pRoot->bEof = bEof; }else{ /* Caution: pRoot may iterate through docids in ascending or descending ** order. For this reason, even though it seems more defensive, the ** do loop can not be written: ** ** do {...} while( pRoot->iDocid<iDocid && rc==SQLITE_OK ); */ fts3EvalRestart(pCsr, pRoot, &rc); do { fts3EvalNext(pCsr, pRoot, &rc); assert( pRoot->bEof==0 ); }while( pRoot->iDocid!=iDocid && rc==SQLITE_OK ); fts3EvalLoadDeferred(pCsr, &rc); } } return rc; } /* |
︙ | ︙ | |||
4289 4290 4291 4292 4293 4294 4295 | if( pExpr->bDeferred ){ assert( pCsr->nDoc>0 ); for(iCol=0; iCol<pTab->nColumn; iCol++){ aiOut[iCol*3 + 1] = pCsr->nDoc; aiOut[iCol*3 + 2] = pCsr->nDoc; } }else{ | | | 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 | if( pExpr->bDeferred ){ assert( pCsr->nDoc>0 ); for(iCol=0; iCol<pTab->nColumn; iCol++){ aiOut[iCol*3 + 1] = pCsr->nDoc; aiOut[iCol*3 + 2] = pCsr->nDoc; } }else{ rc = fts3EvalGatherStats(pCsr, pExpr); if( rc==SQLITE_OK ){ assert( pExpr->aMI ); for(iCol=0; iCol<pTab->nColumn; iCol++){ aiOut[iCol*3 + 1] = pExpr->aMI[iCol*3 + 1]; aiOut[iCol*3 + 2] = pExpr->aMI[iCol*3 + 2]; } } |
︙ | ︙ | |||
4368 4369 4370 4371 4372 4373 4374 | ** * the contents of pPhrase->doclist, and ** * any Fts3MultiSegReader objects held by phrase tokens. */ void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *pPhrase){ if( pPhrase ){ int i; sqlite3_free(pPhrase->doclist.aAll); | | | 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 | ** * the contents of pPhrase->doclist, and ** * any Fts3MultiSegReader objects held by phrase tokens. */ void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *pPhrase){ if( pPhrase ){ int i; sqlite3_free(pPhrase->doclist.aAll); fts3EvalZeroPoslist(pPhrase); memset(&pPhrase->doclist, 0, sizeof(Fts3Doclist)); for(i=0; i<pPhrase->nToken; i++){ fts3SegReaderCursorFree(pPhrase->aToken[i].pSegcsr); pPhrase->aToken[i].pSegcsr = 0; } } } #endif |
Changes to test/fts3rnd.test.
︙ | ︙ | |||
268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 | proc do_orderbydocid_test {tn sql res} { uplevel [list do_select_test $tn.asc "$sql ORDER BY docid ASC" $res] uplevel [list do_select_test $tn.desc "$sql ORDER BY docid DESC" \ [lsort -int -dec $res] ] } foreach {nodesize order} { 50 DESC 50 ASC 500 ASC 1000 DESC 2000 ASC } { catch { array unset ::t1 } set testname "$nodesize/$order" # Create the FTS3 table. Populate it (and the Tcl array) with 100 rows. # db transaction { catchsql { DROP TABLE t1 } execsql "CREATE VIRTUAL TABLE t1 USING fts4(a, b, c, order=$order)" execsql "INSERT INTO t1(t1) VALUES('nodesize=$nodesize')" for {set i 0} {$i < 100} {incr i} { insert_row $i } } | > > | | 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 | proc do_orderbydocid_test {tn sql res} { uplevel [list do_select_test $tn.asc "$sql ORDER BY docid ASC" $res] uplevel [list do_select_test $tn.desc "$sql ORDER BY docid DESC" \ [lsort -int -dec $res] ] } set NUM_TRIALS 100 foreach {nodesize order} { 50 DESC 50 ASC 500 ASC 1000 DESC 2000 ASC } { catch { array unset ::t1 } set testname "$nodesize/$order" # Create the FTS3 table. Populate it (and the Tcl array) with 100 rows. # db transaction { catchsql { DROP TABLE t1 } execsql "CREATE VIRTUAL TABLE t1 USING fts4(a, b, c, order=$order)" execsql "INSERT INTO t1(t1) VALUES('nodesize=$nodesize')" for {set i 0} {$i < 100} {incr i} { insert_row $i } } for {set iTest 1} {$iTest <= $NUM_TRIALS} {incr iTest} { catchsql COMMIT set DO_MALLOC_TEST 0 set nRep 10 if {$iTest==100 && $nodesize==50} { set DO_MALLOC_TEST 1 set nRep 2 |
︙ | ︙ | |||
330 331 332 333 334 335 336 | # Pick 10 terms from the vocabulary. Check that the results of querying # the database for the set of documents containing each of these terms # is the same as the result obtained by scanning the contents of the Tcl # array for each term. # for {set i 0} {$i < 10} {incr i} { set term [random_term] | | | | 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 | # Pick 10 terms from the vocabulary. Check that the results of querying # the database for the set of documents containing each of these terms # is the same as the result obtained by scanning the contents of the Tcl # array for each term. # for {set i 0} {$i < 10} {incr i} { set term [random_term] do_select_test 1.$i.asc { SELECT docid, mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH $term ORDER BY docid ASC } [simple_token_matchinfo $term 0] do_select_test 1.$i.desc { SELECT docid, mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH $term ORDER BY docid DESC } [simple_token_matchinfo $term 1] } # This time, use the first two characters of each term as a term prefix # to query for. Test that querying the Tcl array produces the same results |
︙ | ︙ | |||
428 429 430 431 432 433 434 | } [$proc [simple_phrase $term1] [simple_phrase $term2]] } } # Set operations on NEAR queries. # foreach {tn op proc} { | | | | | 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 | } [$proc [simple_phrase $term1] [simple_phrase $term2]] } } # Set operations on NEAR queries. # foreach {tn op proc} { 11 OR setop_or 12 NOT setop_not 13 AND setop_and } { for {set i 0} {$i < $nRep} {incr i} { set term1 [random_term] set term2 [random_term] set term3 [random_term] set term4 [random_term] set match "$term1 NEAR $term2 $op $term3 NEAR $term4" |
︙ | ︙ |