SQLite

Check-in [e21bf7a2]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix the way parenthesis in MATCH expressions are handled by FTS if the tokenizer considers them to be token characters.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: e21bf7a2ade6373e94ea403c665f78e1ad22143f
User & Date: dan 2014-05-07 19:59:36
References
2014-10-09
15:08
Allow FTS tokenizers to choose whether or not to consider the "*" character part of tokens or not. This restores the pre-[e21bf7a2ad] behaviour. Also fix a problem causing FTS to interpret tokens beginning with "*" characters as EOF. (check-in: 49dfee7c user: dan tags: trunk)
Context
2014-05-07
20:24
A better fix for the group_concat() problem. (check-in: 1c086dee user: drh tags: trunk)
19:59
Fix the way parenthesis in MATCH expressions are handled by FTS if the tokenizer considers them to be token characters. (check-in: e21bf7a2 user: dan tags: trunk)
18:23
Make sure the group_concat() function returns an empty string, not a NULL, if it has at least one input row. Fix for ticket [55746f9e65f8587]. (check-in: d01cedaa user: drh tags: trunk)
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3_expr.c.

181
182
183
184
185
186
187

188




189


190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
  int *pnConsumed                         /* OUT: Number of bytes consumed */
){
  sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
  sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
  int rc;
  sqlite3_tokenizer_cursor *pCursor;
  Fts3Expr *pRet = 0;

  int nConsumed = 0;







  rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, n, &pCursor);
  if( rc==SQLITE_OK ){
    const char *zToken;
    int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0;
    int nByte;                               /* total space to allocate */

    rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);

    if( (rc==SQLITE_OK || rc==SQLITE_DONE) && sqlite3_fts3_enable_parentheses ){
      int i;
      if( rc==SQLITE_DONE ) iStart = n;
      for(i=0; i<iStart; i++){
        if( z[i]=='(' ){
          pParse->nNest++;
          rc = fts3ExprParse(pParse, &z[i+1], n-i-1, &pRet, &nConsumed);
          if( rc==SQLITE_OK && !pRet ){
            rc = SQLITE_DONE;
          }
          nConsumed = (int)(i + 1 + nConsumed);
          break;
        }

        if( z[i]==')' ){
          rc = SQLITE_DONE;
          pParse->nNest--;
          nConsumed = i+1;
          break;
        }
      }
    }

    if( nConsumed==0 && rc==SQLITE_OK ){
      nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken;
      pRet = (Fts3Expr *)fts3MallocZero(nByte);
      if( !pRet ){
        rc = SQLITE_NOMEM;
      }else{
        pRet->eType = FTSQUERY_PHRASE;
        pRet->pPhrase = (Fts3Phrase *)&pRet[1];







>
|
>
>
>
>
|
>
>
|






<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203








204
















205
206
207
208
209
210
211
  int *pnConsumed                         /* OUT: Number of bytes consumed */
){
  sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
  sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
  int rc;
  sqlite3_tokenizer_cursor *pCursor;
  Fts3Expr *pRet = 0;
  int i = 0;

  /* Set variable i to the maximum number of bytes of input to tokenize. */
  for(i=0; i<n; i++){
    if( sqlite3_fts3_enable_parentheses && (z[i]=='(' || z[i]==')') ) break;
    if( z[i]=='*' || z[i]=='"' ) break;
  }

  *pnConsumed = i;
  rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, i, &pCursor);
  if( rc==SQLITE_OK ){
    const char *zToken;
    int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0;
    int nByte;                               /* total space to allocate */

    rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);








    if( rc==SQLITE_OK ){
















      nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken;
      pRet = (Fts3Expr *)fts3MallocZero(nByte);
      if( !pRet ){
        rc = SQLITE_NOMEM;
      }else{
        pRet->eType = FTSQUERY_PHRASE;
        pRet->pPhrase = (Fts3Phrase *)&pRet[1];
248
249
250
251
252
253
254
255


256
257
258
259
260
261
262
263
264
265
266
267
268
            iStart--;
          }else{
            break;
          }
        }

      }
      nConsumed = iEnd;


    }

    pModule->xClose(pCursor);
  }
  
  *pnConsumed = nConsumed;
  *ppExpr = pRet;
  return rc;
}


/*
** Enlarge a memory allocation.  If an out-of-memory allocation occurs,







|
>
>





<







231
232
233
234
235
236
237
238
239
240
241
242
243
244
245

246
247
248
249
250
251
252
            iStart--;
          }else{
            break;
          }
        }

      }
      *pnConsumed = iEnd;
    }else if( i && rc==SQLITE_DONE ){
      rc = SQLITE_OK;
    }

    pModule->xClose(pCursor);
  }
  

  *ppExpr = pRet;
  return rc;
}


/*
** Enlarge a memory allocation.  If an out-of-memory allocation occurs,
504
505
506
507
508
509
510















511
512
513
514
515
516
517
    *pnConsumed = (int)((zInput - z) + ii + 1);
    if( ii==nInput ){
      return SQLITE_ERROR;
    }
    return getNextString(pParse, &zInput[1], ii-1, ppExpr);
  }

















  /* If control flows to this point, this must be a regular token, or 
  ** the end of the input. Read a regular token using the sqlite3_tokenizer
  ** interface. Before doing so, figure out if there is an explicit
  ** column specifier for the token. 
  **
  ** TODO: Strangely, it is not possible to associate a column specifier







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
    *pnConsumed = (int)((zInput - z) + ii + 1);
    if( ii==nInput ){
      return SQLITE_ERROR;
    }
    return getNextString(pParse, &zInput[1], ii-1, ppExpr);
  }

  if( sqlite3_fts3_enable_parentheses ){
    if( *zInput=='(' ){
      int nConsumed = 0;
      pParse->nNest++;
      rc = fts3ExprParse(pParse, zInput+1, nInput-1, ppExpr, &nConsumed);
      if( rc==SQLITE_OK && !*ppExpr ){ rc = SQLITE_DONE; }
      *pnConsumed = (int)(zInput - z) + 1 + nConsumed;
      return rc;
    }else if( *zInput==')' ){
      pParse->nNest--;
      *pnConsumed = (zInput - z) + 1;
      *ppExpr = 0;
      return SQLITE_DONE;
    }
  }

  /* If control flows to this point, this must be a regular token, or 
  ** the end of the input. Read a regular token using the sqlite3_tokenizer
  ** interface. Before doing so, figure out if there is an explicit
  ** column specifier for the token. 
  **
  ** TODO: Strangely, it is not possible to associate a column specifier
622
623
624
625
626
627
628

629

630

631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711


712
713
714
715
716
717
718
719
720
721
722
723
724
725
  const char *zIn = z;
  int rc = SQLITE_OK;
  int isRequirePhrase = 1;

  while( rc==SQLITE_OK ){
    Fts3Expr *p = 0;
    int nByte = 0;

    rc = getNextNode(pParse, zIn, nIn, &p, &nByte);

    if( rc==SQLITE_OK ){

      int isPhrase;

      if( !sqlite3_fts3_enable_parentheses 
       && p->eType==FTSQUERY_PHRASE && pParse->isNot 
      ){
        /* Create an implicit NOT operator. */
        Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr));
        if( !pNot ){
          sqlite3Fts3ExprFree(p);
          rc = SQLITE_NOMEM;
          goto exprparse_out;
        }
        pNot->eType = FTSQUERY_NOT;
        pNot->pRight = p;
        p->pParent = pNot;
        if( pNotBranch ){
          pNot->pLeft = pNotBranch;
          pNotBranch->pParent = pNot;
        }
        pNotBranch = pNot;
        p = pPrev;
      }else{
        int eType = p->eType;
        isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft);

        /* The isRequirePhrase variable is set to true if a phrase or
        ** an expression contained in parenthesis is required. If a
        ** binary operator (AND, OR, NOT or NEAR) is encounted when
        ** isRequirePhrase is set, this is a syntax error.
        */
        if( !isPhrase && isRequirePhrase ){
          sqlite3Fts3ExprFree(p);
          rc = SQLITE_ERROR;
          goto exprparse_out;
        }
  
        if( isPhrase && !isRequirePhrase ){
          /* Insert an implicit AND operator. */
          Fts3Expr *pAnd;
          assert( pRet && pPrev );
          pAnd = fts3MallocZero(sizeof(Fts3Expr));
          if( !pAnd ){
            sqlite3Fts3ExprFree(p);
            rc = SQLITE_NOMEM;
            goto exprparse_out;
          }
          pAnd->eType = FTSQUERY_AND;
          insertBinaryOperator(&pRet, pPrev, pAnd);
          pPrev = pAnd;
        }

        /* This test catches attempts to make either operand of a NEAR
        ** operator something other than a phrase. For example, either of
        ** the following:
        **
        **    (bracketed expression) NEAR phrase
        **    phrase NEAR (bracketed expression)
        **
        ** Return an error in either case.
        */
        if( pPrev && (
            (eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE)
         || (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR)
        )){
          sqlite3Fts3ExprFree(p);
          rc = SQLITE_ERROR;
          goto exprparse_out;
        }
  
        if( isPhrase ){
          if( pRet ){
            assert( pPrev && pPrev->pLeft && pPrev->pRight==0 );
            pPrev->pRight = p;
            p->pParent = pPrev;
          }else{
            pRet = p;
          }
        }else{
          insertBinaryOperator(&pRet, pPrev, p);
        }
        isRequirePhrase = !isPhrase;


      }
      assert( nByte>0 );
    }
    assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) );
    nIn -= nByte;
    zIn += nByte;
    pPrev = p;
  }

  if( rc==SQLITE_DONE && pRet && isRequirePhrase ){
    rc = SQLITE_ERROR;
  }

  if( rc==SQLITE_DONE ){







>

>

>
|

|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|

|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|

|
|
|
|
|
|
|
|
|
|


|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>
>






<







621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721

722
723
724
725
726
727
728
  const char *zIn = z;
  int rc = SQLITE_OK;
  int isRequirePhrase = 1;

  while( rc==SQLITE_OK ){
    Fts3Expr *p = 0;
    int nByte = 0;

    rc = getNextNode(pParse, zIn, nIn, &p, &nByte);
    assert( nByte>0 || (rc!=SQLITE_OK && p==0) );
    if( rc==SQLITE_OK ){
      if( p ){
        int isPhrase;

        if( !sqlite3_fts3_enable_parentheses 
            && p->eType==FTSQUERY_PHRASE && pParse->isNot 
        ){
          /* Create an implicit NOT operator. */
          Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr));
          if( !pNot ){
            sqlite3Fts3ExprFree(p);
            rc = SQLITE_NOMEM;
            goto exprparse_out;
          }
          pNot->eType = FTSQUERY_NOT;
          pNot->pRight = p;
          p->pParent = pNot;
          if( pNotBranch ){
            pNot->pLeft = pNotBranch;
            pNotBranch->pParent = pNot;
          }
          pNotBranch = pNot;
          p = pPrev;
        }else{
          int eType = p->eType;
          isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft);

          /* The isRequirePhrase variable is set to true if a phrase or
          ** an expression contained in parenthesis is required. If a
          ** binary operator (AND, OR, NOT or NEAR) is encounted when
          ** isRequirePhrase is set, this is a syntax error.
          */
          if( !isPhrase && isRequirePhrase ){
            sqlite3Fts3ExprFree(p);
            rc = SQLITE_ERROR;
            goto exprparse_out;
          }

          if( isPhrase && !isRequirePhrase ){
            /* Insert an implicit AND operator. */
            Fts3Expr *pAnd;
            assert( pRet && pPrev );
            pAnd = fts3MallocZero(sizeof(Fts3Expr));
            if( !pAnd ){
              sqlite3Fts3ExprFree(p);
              rc = SQLITE_NOMEM;
              goto exprparse_out;
            }
            pAnd->eType = FTSQUERY_AND;
            insertBinaryOperator(&pRet, pPrev, pAnd);
            pPrev = pAnd;
          }

          /* This test catches attempts to make either operand of a NEAR
           ** operator something other than a phrase. For example, either of
           ** the following:
           **
           **    (bracketed expression) NEAR phrase
           **    phrase NEAR (bracketed expression)
           **
           ** Return an error in either case.
           */
          if( pPrev && (
            (eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE)
         || (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR)
          )){
            sqlite3Fts3ExprFree(p);
            rc = SQLITE_ERROR;
            goto exprparse_out;
          }

          if( isPhrase ){
            if( pRet ){
              assert( pPrev && pPrev->pLeft && pPrev->pRight==0 );
              pPrev->pRight = p;
              p->pParent = pPrev;
            }else{
              pRet = p;
            }
          }else{
            insertBinaryOperator(&pRet, pPrev, p);
          }
          isRequirePhrase = !isPhrase;
        }
        pPrev = p;
      }
      assert( nByte>0 );
    }
    assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) );
    nIn -= nByte;
    zIn += nByte;

  }

  if( rc==SQLITE_DONE && pRet && isRequirePhrase ){
    rc = SQLITE_ERROR;
  }

  if( rc==SQLITE_DONE ){

Changes to test/fts3defer2.test.

54
55
56
57
58
59
60

61
62
63
64
65
66
67
do_execsql_test 1.2.0 {
  SELECT content FROM t1 WHERE t1 MATCH 'f (e a)';
} {{a b c d e f a x y}}

do_execsql_test 1.2.1 {
  SELECT content FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)';
} {{a b c d e f a x y}}


do_execsql_test 1.2.2 {
  SELECT snippet(t1, '[', ']'), offsets(t1), mit(matchinfo(t1, 'pcxnal'))
  FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)';
} [list                              \
   {a b c d [e] [f] [a] x y}         \
   {0 1 8 1 0 0 10 1 0 2 12 1}       \







>







54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
do_execsql_test 1.2.0 {
  SELECT content FROM t1 WHERE t1 MATCH 'f (e a)';
} {{a b c d e f a x y}}

do_execsql_test 1.2.1 {
  SELECT content FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)';
} {{a b c d e f a x y}}


do_execsql_test 1.2.2 {
  SELECT snippet(t1, '[', ']'), offsets(t1), mit(matchinfo(t1, 'pcxnal'))
  FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)';
} [list                              \
   {a b c d [e] [f] [a] x y}         \
   {0 1 8 1 0 0 10 1 0 2 12 1}       \

Changes to test/fts3expr.test.

505
506
507
508
509
510
511





512
do_test fts3expr-8.5 { test_fts3expr "((blah.))" } {PHRASE 3 0 blah}
do_test fts3expr-8.6 { test_fts3expr "(((blah,)))" } {PHRASE 3 0 blah}
do_test fts3expr-8.7 { test_fts3expr "((((blah!))))" } {PHRASE 3 0 blah}

do_test fts3expr-8.8 { test_fts3expr "(,(blah-),)" } {PHRASE 3 0 blah}

set sqlite_fts3_enable_parentheses 0





finish_test







>
>
>
>
>

505
506
507
508
509
510
511
512
513
514
515
516
517
do_test fts3expr-8.5 { test_fts3expr "((blah.))" } {PHRASE 3 0 blah}
do_test fts3expr-8.6 { test_fts3expr "(((blah,)))" } {PHRASE 3 0 blah}
do_test fts3expr-8.7 { test_fts3expr "((((blah!))))" } {PHRASE 3 0 blah}

do_test fts3expr-8.8 { test_fts3expr "(,(blah-),)" } {PHRASE 3 0 blah}

set sqlite_fts3_enable_parentheses 0

do_test fts3expr-9.1 {
  test_fts3expr "f (e NEAR/2 a)"
} {AND {PHRASE 3 0 f} {NEAR/2 {PHRASE 3 0 e} {PHRASE 3 0 a}}}

finish_test

Added test/fts3expr4.test.



















































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# 2014 May 7
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is testing the FTS3 module.
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl
set testprefix fts3expr4

# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts3||!icu {
  finish_test
  return
}

set sqlite_fts3_enable_parentheses 1

proc test_icu_fts3expr {expr} {
  db one {SELECT fts3_exprtest('icu', $expr, 'a', 'b', 'c')}
}

proc do_icu_expr_test {tn expr res} {
  uplevel [list do_test $tn [list test_icu_fts3expr $expr] $res]
}

#-------------------------------------------------------------------------
#
do_icu_expr_test 1.1 "abcd"    {PHRASE 3 0 abcd}
do_icu_expr_test 1.2 " tag "   {PHRASE 3 0 tag}
do_icu_expr_test 1.3 {"x y z"} {PHRASE 3 0 x y z}
do_icu_expr_test 1.4 {x OR y}       {OR {PHRASE 3 0 x} {PHRASE 3 0 y}}
do_icu_expr_test 1.5 {(x OR y)}     {OR {PHRASE 3 0 x} {PHRASE 3 0 y}}
do_icu_expr_test 1.6 { "(x OR y)" } {PHRASE 3 0 ( x or y )}

# In "col:word", if "col" is not the name of a column, the entire thing
# is passed to the tokenizer.
#
do_icu_expr_test 1.7 {a:word} {PHRASE 0 0 word}
do_icu_expr_test 1.8 {d:word} {PHRASE 3 0 d:word}

set sqlite_fts3_enable_parentheses 0

do_icu_expr_test 2.1 {
  f (e NEAR/2 a)
} {AND {AND {AND {PHRASE 3 0 f} {PHRASE 3 0 (}} {NEAR/2 {PHRASE 3 0 e} {PHRASE 3 0 a}}} {PHRASE 3 0 )}}

finish_test