Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | If a token within an FTS query is prefixed with a '^' character, it must be the first token in a column of data to match. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts4-content |
Files: | files | file ages | folders |
SHA1: |
63ac33c860eb32ce96699f06bf83121c |
User & Date: | dan 2011-10-18 19:39:41.203 |
Context
2011-10-19
| ||
09:40 | Fix a problem in FTS to do with ^ tokens and the snippet() function. (check-in: 2c03b24f4c user: dan tags: fts4-content) | |
2011-10-18
| ||
19:39 | If a token within an FTS query is prefixed with a '^' character, it must be the first token in a column of data to match. (check-in: 63ac33c860 user: dan tags: fts4-content) | |
12:49 | Cherrypick patch [3126754c72] from the trunk into the content= branch. (check-in: f9b5b21708 user: dan tags: fts4-content) | |
Changes
Changes to ext/fts3/fts3.c.
︙ | ︙ | |||
2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 | fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2); } } *pnRight = p - aOut; } /* ** Merge all doclists in the TermSelect.aaOutput[] array into a single ** doclist stored in TermSelect.aaOutput[0]. If successful, delete all ** other doclists (except the aaOutput[0] one) and return SQLITE_OK. ** ** If an OOM error occurs, return SQLITE_NOMEM. In this case it is | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 | fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2); } } *pnRight = p - aOut; } /* ** When this function is called, pList points to a doclist containing position ** data, length *pnList bytes. This removes all entries from the doclist that ** do not correspond to the first token in a column and overwrites pList ** with the result. *pnList is set to the length of the new doclist before ** returning. ** ** If bDescDoclist is true, then both the input and output are in descending ** order. Otherwise, ascending. */ static void fts3DoclistFirstFilter( int bDescDoclist, /* True if pList is a descending doclist */ char *pList, /* Buffer containing doclist */ int *pnList /* IN/OUT: Size of doclist */ ){ char *p = pList; char *pOut = pList; char *pEnd = &pList[*pnList]; sqlite3_int64 iDoc; sqlite3_int64 iPrev; int bFirstOut = 0; fts3GetDeltaVarint3(&p, pEnd, 0, &iDoc); while( p ){ int bWritten = 0; if( *p!=0x01 ){ if( *p==0x02 ){ fts3PutDeltaVarint3(&pOut, bDescDoclist, &iPrev, &bFirstOut, iDoc); *pOut++ = 0x02; bWritten = 1; } fts3ColumnlistCopy(0, &p); } while( *p==0x01 ){ sqlite3_int64 iCol; p++; p += sqlite3Fts3GetVarint(p, &iCol); if( *p==0x02 ){ if( bWritten==0 ){ fts3PutDeltaVarint3(&pOut, bDescDoclist, &iPrev, &bFirstOut, iDoc); bWritten = 1; } pOut += sqlite3Fts3PutVarint(pOut, iCol); *pOut++ = 0x02; } fts3ColumnlistCopy(0, &p); } if( bWritten ){ *pOut++ = 0x00; } assert( *p==0x00 ); p++; fts3GetDeltaVarint3(&p, pEnd, bDescDoclist, &iDoc); } *pnList = (pOut - pList); } /* ** Merge all doclists in the TermSelect.aaOutput[] array into a single ** doclist stored in TermSelect.aaOutput[0]. If successful, delete all ** other doclists (except the aaOutput[0] one) and return SQLITE_OK. ** ** If an OOM error occurs, return SQLITE_NOMEM. In this case it is |
︙ | ︙ | |||
3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 | Fts3Table *pTab, /* FTS Table pointer */ Fts3Phrase *p, /* Phrase to merge pList/nList into */ int iToken, /* Token pList/nList corresponds to */ char *pList, /* Pointer to doclist */ int nList /* Number of bytes in pList */ ){ assert( iToken!=p->iDoclistToken ); if( pList==0 ){ sqlite3_free(p->doclist.aAll); p->doclist.aAll = 0; p->doclist.nAll = 0; } | > > > > | 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 | Fts3Table *pTab, /* FTS Table pointer */ Fts3Phrase *p, /* Phrase to merge pList/nList into */ int iToken, /* Token pList/nList corresponds to */ char *pList, /* Pointer to doclist */ int nList /* Number of bytes in pList */ ){ assert( iToken!=p->iDoclistToken ); if( p->aToken[iToken].bFirst ){ fts3DoclistFirstFilter(pTab->bDescIdx, pList, &nList); } if( pList==0 ){ sqlite3_free(p->doclist.aAll); p->doclist.aAll = 0; p->doclist.nAll = 0; } |
︙ | ︙ | |||
3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 | Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; if( pCsr->bDesc==pTab->bDescIdx && bOptOk==1 && p->nToken==1 && pFirst->pSegcsr && pFirst->pSegcsr->bLookup ){ /* Use the incremental approach. */ int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn); rc = sqlite3Fts3MsrIncrStart( pTab, pFirst->pSegcsr, iCol, pFirst->z, pFirst->n); p->bIncr = 1; | > | 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 | Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; if( pCsr->bDesc==pTab->bDescIdx && bOptOk==1 && p->nToken==1 && pFirst->pSegcsr && pFirst->pSegcsr->bLookup && pFirst->bFirst==0 ){ /* Use the incremental approach. */ int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn); rc = sqlite3Fts3MsrIncrStart( pTab, pFirst->pSegcsr, iCol, pFirst->z, pFirst->n); p->bIncr = 1; |
︙ | ︙ |
Changes to ext/fts3/fts3Int.h.
︙ | ︙ | |||
306 307 308 309 310 311 312 313 314 315 316 317 318 319 | ** For a sequence of tokens contained in double-quotes (i.e. "one two three") ** nToken will be the number of tokens in the string. */ struct Fts3PhraseToken { char *z; /* Text of the token */ int n; /* Number of bytes in buffer z */ int isPrefix; /* True if token ends with a "*" character */ /* Variables above this point are populated when the expression is ** parsed (by code in fts3_expr.c). Below this point the variables are ** used when evaluating the expression. */ Fts3DeferredToken *pDeferred; /* Deferred token object for this token */ Fts3MultiSegReader *pSegcsr; /* Segment-reader for this token */ }; | > | 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 | ** For a sequence of tokens contained in double-quotes (i.e. "one two three") ** nToken will be the number of tokens in the string. */ struct Fts3PhraseToken { char *z; /* Text of the token */ int n; /* Number of bytes in buffer z */ int isPrefix; /* True if token ends with a "*" character */ int bFirst; /* True if token must appear at position 0 */ /* Variables above this point are populated when the expression is ** parsed (by code in fts3_expr.c). Below this point the variables are ** used when evaluating the expression. */ Fts3DeferredToken *pDeferred; /* Deferred token object for this token */ Fts3MultiSegReader *pSegcsr; /* Segment-reader for this token */ }; |
︙ | ︙ |
Changes to ext/fts3/fts3_expr.c.
︙ | ︙ | |||
176 177 178 179 180 181 182 | pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1]; memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken); if( iEnd<n && z[iEnd]=='*' ){ pRet->pPhrase->aToken[0].isPrefix = 1; iEnd++; } | > > | > > | > > > > > > | > > | 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 | pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1]; memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken); if( iEnd<n && z[iEnd]=='*' ){ pRet->pPhrase->aToken[0].isPrefix = 1; iEnd++; } while( 1 ){ if( !sqlite3_fts3_enable_parentheses && iStart>0 && z[iStart-1]=='-' ){ pParse->isNot = 1; iStart--; }else if( iStart>0 && z[iStart-1]=='^' ){ pRet->pPhrase->aToken[0].bFirst = 1; iStart--; }else{ break; } } } nConsumed = iEnd; } pModule->xClose(pCursor); } |
︙ | ︙ | |||
277 278 279 280 281 282 283 284 285 286 287 288 289 290 | memset(pToken, 0, sizeof(Fts3PhraseToken)); memcpy(&zTemp[nTemp], zByte, nByte); nTemp += nByte; pToken->n = nByte; pToken->isPrefix = (iEnd<nInput && zInput[iEnd]=='*'); nToken = ii+1; } } pModule->xClose(pCursor); pCursor = 0; } | > | 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 | memset(pToken, 0, sizeof(Fts3PhraseToken)); memcpy(&zTemp[nTemp], zByte, nByte); nTemp += nByte; pToken->n = nByte; pToken->isPrefix = (iEnd<nInput && zInput[iEnd]=='*'); pToken->bFirst = (iBegin>0 && zInput[iBegin-1]=='^'); nToken = ii+1; } } pModule->xClose(pCursor); pCursor = 0; } |
︙ | ︙ |
Changes to ext/fts3/fts3_write.c.
︙ | ︙ | |||
3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 | int iPos; /* Position of token in zText */ pTC->pTokenizer = pT; rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos); for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){ Fts3PhraseToken *pPT = pDef->pToken; if( (pDef->iCol>=p->nColumn || pDef->iCol==i) && (pPT->n==nToken || (pPT->isPrefix && pPT->n<nToken)) && (0==memcmp(zToken, pPT->z, pPT->n)) ){ fts3PendingListAppend(&pDef->pList, iDocid, i, iPos, &rc); } } } | > | 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 | int iPos; /* Position of token in zText */ pTC->pTokenizer = pT; rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos); for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){ Fts3PhraseToken *pPT = pDef->pToken; if( (pDef->iCol>=p->nColumn || pDef->iCol==i) && (pPT->bFirst==0 || iPos==0) && (pPT->n==nToken || (pPT->isPrefix && pPT->n<nToken)) && (0==memcmp(zToken, pPT->z, pPT->n)) ){ fts3PendingListAppend(&pDef->pList, iDocid, i, iPos, &rc); } } } |
︙ | ︙ |
Changes to test/fts3defer.test.
︙ | ︙ | |||
422 423 424 425 426 427 428 429 430 431 432 433 434 435 | SELECT rowid FROM t1 WHERE t1 MATCH '"zm azavwm"' } {15 26 92 96} if {$fts3_simple_deferred_tokens_only==0} { do_select_test 6.2.3 { SELECT rowid FROM t1 WHERE t1 MATCH '"jk xduvfhk" OR "zm azavwm"' } {8 15 26 92 96} } } set testprefix fts3defer do_execsql_test 3.1 { CREATE VIRTUAL TABLE x1 USING fts4(a, b); INSERT INTO x1 VALUES('a b c', 'd e f'); | > > > > > > > | 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 | SELECT rowid FROM t1 WHERE t1 MATCH '"zm azavwm"' } {15 26 92 96} if {$fts3_simple_deferred_tokens_only==0} { do_select_test 6.2.3 { SELECT rowid FROM t1 WHERE t1 MATCH '"jk xduvfhk" OR "zm azavwm"' } {8 15 26 92 96} } do_select_test 7.1 { SELECT rowid FROM t1 WHERE t1 MATCH '^zm mjpavjuhw' } {56 62} do_select_test 7.2 { SELECT rowid FROM t1 WHERE t1 MATCH '^azavwm zm' } {43} } set testprefix fts3defer do_execsql_test 3.1 { CREATE VIRTUAL TABLE x1 USING fts4(a, b); INSERT INTO x1 VALUES('a b c', 'd e f'); |
︙ | ︙ |
Added test/fts3first.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | # 2011 October 18 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** set testdir [file dirname $argv0] source $testdir/tester.tcl source $testdir/malloc_common.tcl ifcapable !fts3 { finish_test return } do_execsql_test 1.0 { CREATE VIRTUAL TABLE x1 USING FTS4(a, b, c); INSERT INTO x1(docid,a,b,c) VALUES(0, 'K H D S T', 'V M N Y K', 'S Z N Q S'); INSERT INTO x1(docid,a,b,c) VALUES(1, 'K N J L W', 'S Z W J Q', 'D U W S E'); INSERT INTO x1(docid,a,b,c) VALUES(2, 'B P M O I', 'R P H W S', 'R J L L E'); INSERT INTO x1(docid,a,b,c) VALUES(3, 'U R Q M L', 'M J K A V', 'Q W J T J'); INSERT INTO x1(docid,a,b,c) VALUES(4, 'N J C Y N', 'R U D X V', 'B O U A Q'); INSERT INTO x1(docid,a,b,c) VALUES(5, 'Q L X L U', 'I F N X S', 'U Q A N Y'); INSERT INTO x1(docid,a,b,c) VALUES(6, 'M R G U T', 'U V I Q P', 'X Y D L S'); INSERT INTO x1(docid,a,b,c) VALUES(7, 'D Y P O I', 'X J P K R', 'V O T H V'); INSERT INTO x1(docid,a,b,c) VALUES(8, 'R Y D L R', 'U U E S J', 'N W L M R'); INSERT INTO x1(docid,a,b,c) VALUES(9, 'Z P F N P', 'W A X D U', 'V A E Q A'); INSERT INTO x1(docid,a,b,c) VALUES(10, 'Q I A Q M', 'N D K H C', 'A H T Q Z'); INSERT INTO x1(docid,a,b,c) VALUES(11, 'T E R Q B', 'C I B C B', 'F Z U W R'); INSERT INTO x1(docid,a,b,c) VALUES(12, 'E S V U W', 'T P F W H', 'A M D J Q'); INSERT INTO x1(docid,a,b,c) VALUES(13, 'X S B T Y', 'U D N D P', 'X Z Y G F'); INSERT INTO x1(docid,a,b,c) VALUES(14, 'K H A B L', 'S R C C Z', 'D W E H J'); INSERT INTO x1(docid,a,b,c) VALUES(15, 'C E U C C', 'W F M N M', 'T Z U X T'); INSERT INTO x1(docid,a,b,c) VALUES(16, 'Q G C G H', 'H N N B H', 'B Q I H Y'); INSERT INTO x1(docid,a,b,c) VALUES(17, 'Q T S K B', 'W B D Y N', 'V J P E C'); INSERT INTO x1(docid,a,b,c) VALUES(18, 'A J M O Q', 'L G Y Y A', 'G N M R N'); INSERT INTO x1(docid,a,b,c) VALUES(19, 'T R Y P Y', 'N V Y B X', 'L Z T N T'); CREATE VIRTUAL TABLE x2 USING FTS4(a, b, c, order=DESC); INSERT INTO x2(docid, a, b, c) SELECT docid, a, b, c FROM x1; } foreach x {1 2} { foreach {tn match res} { 1 "^K" {0 1 14} 2 "^S" {0 1 14} 3 "^W" {9 15 17} 4 "^J" {} 5 "^E" {12} 6 "V ^-E" {0 3 4 6 7 9 17 19} 7 "V -^E" {0 3 4 6 7 9 17 19} 8 "^-E V" {0 3 4 6 7 9 17 19} 9 "-^E V" {0 3 4 6 7 9 17 19} 10 "V" {0 3 4 6 7 9 12 17 19} 11 {"^K H"} {0 14} 12 {"K H"} {0 10 14} 13 {"K ^H"} {} } { set rev [list] for {set ii [expr [llength $res]-1]} {$ii>=0} {incr ii -1} { lappend rev [lindex $res $ii] } do_execsql_test 1.$x.$tn.1 {SELECT docid FROM x1 WHERE x1 MATCH $match} $res do_execsql_test 1.$x.$tn.2 {SELECT docid FROM x2 WHERE x2 MATCH $match} $rev } do_execsql_test 1.$x.[expr $tn+1] { INSERT INTO x1(x1) VALUES('optimize'); INSERT INTO x2(x2) VALUES('optimize'); } {} } finish_test |
Changes to test/permutations.test.
︙ | ︙ | |||
181 182 183 184 185 186 187 188 189 190 191 192 193 194 | fts3near.test fts3query.test fts3shared.test fts3snippet.test fts3sort.test fts3fault.test fts3malloc.test fts3matchinfo.test fts3aux1.test fts3comp1.test fts3auto.test fts4aa.test fts4content.test fts3conf.test fts3prefix.test fts3fault2.test fts3corrupt.test fts3corrupt2.test } lappend ::testsuitelist xxx #------------------------------------------------------------------------- # Define the coverage related test suites: # | > | 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 | fts3near.test fts3query.test fts3shared.test fts3snippet.test fts3sort.test fts3fault.test fts3malloc.test fts3matchinfo.test fts3aux1.test fts3comp1.test fts3auto.test fts4aa.test fts4content.test fts3conf.test fts3prefix.test fts3fault2.test fts3corrupt.test fts3corrupt2.test fts3first.test } lappend ::testsuitelist xxx #------------------------------------------------------------------------- # Define the coverage related test suites: # |
︙ | ︙ |