Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Fix the way parenthesis in MATCH expressions are handled by FTS if the tokenizer considers them to be token characters. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
e21bf7a2ade6373e94ea403c665f78e1 |
User & Date: | dan 2014-05-07 19:59:36 |
References
2014-10-09
| ||
15:08 | Allow FTS tokenizers to choose whether or not to consider the "*" character part of tokens or not. This restores the pre-[e21bf7a2ad] behaviour. Also fix a problem causing FTS to interpret tokens beginning with "*" characters as EOF. (check-in: 49dfee7c user: dan tags: trunk) | |
Context
2014-05-07
| ||
20:24 | A better fix for the group_concat() problem. (check-in: 1c086dee user: drh tags: trunk) | |
19:59 | Fix the way parenthesis in MATCH expressions are handled by FTS if the tokenizer considers them to be token characters. (check-in: e21bf7a2 user: dan tags: trunk) | |
18:23 | Make sure the group_concat() function returns an empty string, not a NULL, if it has at least one input row. Fix for ticket [55746f9e65f8587]. (check-in: d01cedaa user: drh tags: trunk) | |
Changes
Changes to ext/fts3/fts3_expr.c.
︙ | ︙ | |||
181 182 183 184 185 186 187 | int *pnConsumed /* OUT: Number of bytes consumed */ ){ sqlite3_tokenizer *pTokenizer = pParse->pTokenizer; sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; int rc; sqlite3_tokenizer_cursor *pCursor; Fts3Expr *pRet = 0; | > | > > > > | > > | < < < < < < < < | < < < < < < < < < < < < < < < < | 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 | int *pnConsumed /* OUT: Number of bytes consumed */ ){ sqlite3_tokenizer *pTokenizer = pParse->pTokenizer; sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; int rc; sqlite3_tokenizer_cursor *pCursor; Fts3Expr *pRet = 0; int i = 0; /* Set variable i to the maximum number of bytes of input to tokenize. */ for(i=0; i<n; i++){ if( sqlite3_fts3_enable_parentheses && (z[i]=='(' || z[i]==')') ) break; if( z[i]=='*' || z[i]=='"' ) break; } *pnConsumed = i; rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, i, &pCursor); if( rc==SQLITE_OK ){ const char *zToken; int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0; int nByte; /* total space to allocate */ rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition); if( rc==SQLITE_OK ){ nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken; pRet = (Fts3Expr *)fts3MallocZero(nByte); if( !pRet ){ rc = SQLITE_NOMEM; }else{ pRet->eType = FTSQUERY_PHRASE; pRet->pPhrase = (Fts3Phrase *)&pRet[1]; |
︙ | ︙ | |||
248 249 250 251 252 253 254 | iStart--; }else{ break; } } } | | > > < | 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 | iStart--; }else{ break; } } } *pnConsumed = iEnd; }else if( i && rc==SQLITE_DONE ){ rc = SQLITE_OK; } pModule->xClose(pCursor); } *ppExpr = pRet; return rc; } /* ** Enlarge a memory allocation. If an out-of-memory allocation occurs, |
︙ | ︙ | |||
504 505 506 507 508 509 510 511 512 513 514 515 516 517 | *pnConsumed = (int)((zInput - z) + ii + 1); if( ii==nInput ){ return SQLITE_ERROR; } return getNextString(pParse, &zInput[1], ii-1, ppExpr); } /* If control flows to this point, this must be a regular token, or ** the end of the input. Read a regular token using the sqlite3_tokenizer ** interface. Before doing so, figure out if there is an explicit ** column specifier for the token. ** ** TODO: Strangely, it is not possible to associate a column specifier | > > > > > > > > > > > > > > > | 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 | *pnConsumed = (int)((zInput - z) + ii + 1); if( ii==nInput ){ return SQLITE_ERROR; } return getNextString(pParse, &zInput[1], ii-1, ppExpr); } if( sqlite3_fts3_enable_parentheses ){ if( *zInput=='(' ){ int nConsumed = 0; pParse->nNest++; rc = fts3ExprParse(pParse, zInput+1, nInput-1, ppExpr, &nConsumed); if( rc==SQLITE_OK && !*ppExpr ){ rc = SQLITE_DONE; } *pnConsumed = (int)(zInput - z) + 1 + nConsumed; return rc; }else if( *zInput==')' ){ pParse->nNest--; *pnConsumed = (zInput - z) + 1; *ppExpr = 0; return SQLITE_DONE; } } /* If control flows to this point, this must be a regular token, or ** the end of the input. Read a regular token using the sqlite3_tokenizer ** interface. Before doing so, figure out if there is an explicit ** column specifier for the token. ** ** TODO: Strangely, it is not possible to associate a column specifier |
︙ | ︙ | |||
622 623 624 625 626 627 628 629 630 | const char *zIn = z; int rc = SQLITE_OK; int isRequirePhrase = 1; while( rc==SQLITE_OK ){ Fts3Expr *p = 0; int nByte = 0; rc = getNextNode(pParse, zIn, nIn, &p, &nByte); if( rc==SQLITE_OK ){ | > > > | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | > > < | 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 | const char *zIn = z; int rc = SQLITE_OK; int isRequirePhrase = 1; while( rc==SQLITE_OK ){ Fts3Expr *p = 0; int nByte = 0; rc = getNextNode(pParse, zIn, nIn, &p, &nByte); assert( nByte>0 || (rc!=SQLITE_OK && p==0) ); if( rc==SQLITE_OK ){ if( p ){ int isPhrase; if( !sqlite3_fts3_enable_parentheses && p->eType==FTSQUERY_PHRASE && pParse->isNot ){ /* Create an implicit NOT operator. */ Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr)); if( !pNot ){ sqlite3Fts3ExprFree(p); rc = SQLITE_NOMEM; goto exprparse_out; } pNot->eType = FTSQUERY_NOT; pNot->pRight = p; p->pParent = pNot; if( pNotBranch ){ pNot->pLeft = pNotBranch; pNotBranch->pParent = pNot; } pNotBranch = pNot; p = pPrev; }else{ int eType = p->eType; isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft); /* The isRequirePhrase variable is set to true if a phrase or ** an expression contained in parenthesis is required. If a ** binary operator (AND, OR, NOT or NEAR) is encounted when ** isRequirePhrase is set, this is a syntax error. */ if( !isPhrase && isRequirePhrase ){ sqlite3Fts3ExprFree(p); rc = SQLITE_ERROR; goto exprparse_out; } if( isPhrase && !isRequirePhrase ){ /* Insert an implicit AND operator. */ Fts3Expr *pAnd; assert( pRet && pPrev ); pAnd = fts3MallocZero(sizeof(Fts3Expr)); if( !pAnd ){ sqlite3Fts3ExprFree(p); rc = SQLITE_NOMEM; goto exprparse_out; } pAnd->eType = FTSQUERY_AND; insertBinaryOperator(&pRet, pPrev, pAnd); pPrev = pAnd; } /* This test catches attempts to make either operand of a NEAR ** operator something other than a phrase. For example, either of ** the following: ** ** (bracketed expression) NEAR phrase ** phrase NEAR (bracketed expression) ** ** Return an error in either case. */ if( pPrev && ( (eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE) || (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR) )){ sqlite3Fts3ExprFree(p); rc = SQLITE_ERROR; goto exprparse_out; } if( isPhrase ){ if( pRet ){ assert( pPrev && pPrev->pLeft && pPrev->pRight==0 ); pPrev->pRight = p; p->pParent = pPrev; }else{ pRet = p; } }else{ insertBinaryOperator(&pRet, pPrev, p); } isRequirePhrase = !isPhrase; } pPrev = p; } assert( nByte>0 ); } assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) ); nIn -= nByte; zIn += nByte; } if( rc==SQLITE_DONE && pRet && isRequirePhrase ){ rc = SQLITE_ERROR; } if( rc==SQLITE_DONE ){ |
︙ | ︙ |
Changes to test/fts3defer2.test.
︙ | ︙ | |||
54 55 56 57 58 59 60 61 62 63 64 65 66 67 | do_execsql_test 1.2.0 { SELECT content FROM t1 WHERE t1 MATCH 'f (e a)'; } {{a b c d e f a x y}} do_execsql_test 1.2.1 { SELECT content FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)'; } {{a b c d e f a x y}} do_execsql_test 1.2.2 { SELECT snippet(t1, '[', ']'), offsets(t1), mit(matchinfo(t1, 'pcxnal')) FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)'; } [list \ {a b c d [e] [f] [a] x y} \ {0 1 8 1 0 0 10 1 0 2 12 1} \ | > | 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 | do_execsql_test 1.2.0 { SELECT content FROM t1 WHERE t1 MATCH 'f (e a)'; } {{a b c d e f a x y}} do_execsql_test 1.2.1 { SELECT content FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)'; } {{a b c d e f a x y}} do_execsql_test 1.2.2 { SELECT snippet(t1, '[', ']'), offsets(t1), mit(matchinfo(t1, 'pcxnal')) FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)'; } [list \ {a b c d [e] [f] [a] x y} \ {0 1 8 1 0 0 10 1 0 2 12 1} \ |
︙ | ︙ |
Changes to test/fts3expr.test.
︙ | ︙ | |||
505 506 507 508 509 510 511 512 | do_test fts3expr-8.5 { test_fts3expr "((blah.))" } {PHRASE 3 0 blah} do_test fts3expr-8.6 { test_fts3expr "(((blah,)))" } {PHRASE 3 0 blah} do_test fts3expr-8.7 { test_fts3expr "((((blah!))))" } {PHRASE 3 0 blah} do_test fts3expr-8.8 { test_fts3expr "(,(blah-),)" } {PHRASE 3 0 blah} set sqlite_fts3_enable_parentheses 0 finish_test | > > > > > | 505 506 507 508 509 510 511 512 513 514 515 516 517 | do_test fts3expr-8.5 { test_fts3expr "((blah.))" } {PHRASE 3 0 blah} do_test fts3expr-8.6 { test_fts3expr "(((blah,)))" } {PHRASE 3 0 blah} do_test fts3expr-8.7 { test_fts3expr "((((blah!))))" } {PHRASE 3 0 blah} do_test fts3expr-8.8 { test_fts3expr "(,(blah-),)" } {PHRASE 3 0 blah} set sqlite_fts3_enable_parentheses 0 do_test fts3expr-9.1 { test_fts3expr "f (e NEAR/2 a)" } {AND {PHRASE 3 0 f} {NEAR/2 {PHRASE 3 0 e} {PHRASE 3 0 a}}} finish_test |
Added test/fts3expr4.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 | # 2014 May 7 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS3 module. # set testdir [file dirname $argv0] source $testdir/tester.tcl set testprefix fts3expr4 # If SQLITE_ENABLE_FTS3 is defined, omit this file. ifcapable !fts3||!icu { finish_test return } set sqlite_fts3_enable_parentheses 1 proc test_icu_fts3expr {expr} { db one {SELECT fts3_exprtest('icu', $expr, 'a', 'b', 'c')} } proc do_icu_expr_test {tn expr res} { uplevel [list do_test $tn [list test_icu_fts3expr $expr] $res] } #------------------------------------------------------------------------- # do_icu_expr_test 1.1 "abcd" {PHRASE 3 0 abcd} do_icu_expr_test 1.2 " tag " {PHRASE 3 0 tag} do_icu_expr_test 1.3 {"x y z"} {PHRASE 3 0 x y z} do_icu_expr_test 1.4 {x OR y} {OR {PHRASE 3 0 x} {PHRASE 3 0 y}} do_icu_expr_test 1.5 {(x OR y)} {OR {PHRASE 3 0 x} {PHRASE 3 0 y}} do_icu_expr_test 1.6 { "(x OR y)" } {PHRASE 3 0 ( x or y )} # In "col:word", if "col" is not the name of a column, the entire thing # is passed to the tokenizer. # do_icu_expr_test 1.7 {a:word} {PHRASE 0 0 word} do_icu_expr_test 1.8 {d:word} {PHRASE 3 0 d:word} set sqlite_fts3_enable_parentheses 0 do_icu_expr_test 2.1 { f (e NEAR/2 a) } {AND {AND {AND {PHRASE 3 0 f} {PHRASE 3 0 (}} {NEAR/2 {PHRASE 3 0 e} {PHRASE 3 0 a}}} {PHRASE 3 0 )}} finish_test |