/ Check-in [7389b9ec]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Modify fts3 to support a more complex expression syntax that allows parenthesis. The new syntax is not entirely backwards compatible, so is disabled by default. Use -DSQLITE_ENABLE_FTS3_PARENTHESIS to enable it. (CVS 6034)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:7389b9ecb80294569845c40a23e0c832d07f7a45
User & Date: danielk1977 2008-12-17 15:18:18
Context
2008-12-17
15:49
Fix some strict-aliasing problems in fts3_expr.c. (CVS 6035) check-in: 20a4ca5d user: danielk1977 tags: trunk
15:18
Modify fts3 to support a more complex expression syntax that allows parenthesis. The new syntax is not entirely backwards compatible, so is disabled by default. Use -DSQLITE_ENABLE_FTS3_PARENTHESIS to enable it. (CVS 6034) check-in: 7389b9ec user: danielk1977 tags: trunk
2008-12-16
18:15
Version 3.6.7 (CVS 6033) check-in: f4f40370 user: drh tags: trunk, release
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3.c.

281
282
283
284
285
286
287

288
289
290
291
292
293
294
...
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
....
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
....
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
....
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200

3201
3202
3203
3204
3205
3206
3207
....
3228
3229
3230
3231
3232
3233
3234







































3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282


3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297

3298
3299
3300
3301
3302
3303
3304
....
3320
3321
3322
3323
3324
3325
3326





3327


3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343



3344
3345



3346
3347
3348



















3349



3350
3351



3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363














3364
3365

3366
3367
3368
3369
3370
3371
3372
3373
3374




3375
3376
3377
3378
3379

3380
3381


3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392



3393
3394
3395

3396
3397


3398
3399
3400
3401

3402
3403
3404
3405

3406







3407
3408
3409
3410
3411
3412
3413
3414
3415
3416

3417
3418
3419
3420
3421

3422
3423
3424
3425
3426
3427
3428
....
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439


3440
3441
3442
3443
3444

3445
3446
3447
3448
3449
3450
3451
....
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
....
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649


3650
3651
3652
3653
3654
3655
3656
....
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720






3721
3722
3723
3724
3725
3726
3727

3728
3729
3730
3731
3732
3733
3734
3735
3736

3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749


3750
3751
3752
3753
3754

3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815


3816
3817
3818
3819
3820
3821



3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852





3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865





3866
3867

3868
3869


3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905



3906
3907


3908
3909




3910
3911
3912
3913
3914
3915
3916
3917
3918



3919
3920
3921
3922
3923
3924





3925

3926
3927
3928
3929
3930



3931



3932



3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949


3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
....
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055




4056
4057
4058

4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069

4070
4071
4072
4073
4074
4075
4076
....
4142
4143
4144
4145
4146
4147
4148

4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
....
6037
6038
6039
6040
6041
6042
6043
6044


6045

6046


6047
6048
6049
6050
6051
6052
6053
6054
6055

6056
6057
6058
6059
6060
6061
6062
....
7165
7166
7167
7168
7169
7170
7171




7172
7173
7174
7175
7176
7177
7178
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>

#include "fts3.h"

#include "fts3_hash.h"
#include "fts3_tokenizer.h"
#ifndef SQLITE_CORE 
# include "sqlite3ext.h"
  SQLITE_EXTENSION_INIT1
#endif

................................................................................

#if 0
# define FTSTRACE(A)  printf A; fflush(stdout)
#else
# define FTSTRACE(A)
#endif

/*
** Default span for NEAR operators.
*/
#define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10

/* It is not safe to call isspace(), tolower(), or isalnum() on
** hi-bit-set characters.  This is the same solution used in the
** tokenizer.
*/
/* TODO(shess) The snippet-generation code should be using the
** tokenizer-generated tokens rather than doing its own local
** tokenization.
................................................................................
  return rc;
}

/* end utility functions */

/* Forward reference */
typedef struct fulltext_vtab fulltext_vtab;

/* A single term in a query is represented by an instances of
** the following structure. Each word which may match against
** document content is a term. Operators, like NEAR or OR, are
** not terms. Query terms are organized as a flat list stored
** in the Query.pTerms array.
**
** If the QueryTerm.nPhrase variable is non-zero, then the QueryTerm
** is the first in a contiguous string of terms that are either part
** of the same phrase, or connected by the NEAR operator.
**
** If the QueryTerm.nNear variable is non-zero, then the token is followed 
** by a NEAR operator with span set to (nNear-1). For example, the 
** following query:
**
** The QueryTerm.iPhrase variable stores the index of the token within
** its phrase, indexed starting at 1, or 1 if the token is not part 
** of any phrase.
**
** For example, the data structure used to represent the following query:
**
**     ... MATCH 'sqlite NEAR/5 google NEAR/2 "search engine"'
**
** is:
**
**     {nPhrase=4, iPhrase=1, nNear=6, pTerm="sqlite"},
**     {nPhrase=0, iPhrase=1, nNear=3, pTerm="google"},
**     {nPhrase=0, iPhrase=1, nNear=0, pTerm="search"},
**     {nPhrase=0, iPhrase=2, nNear=0, pTerm="engine"},
**
** compiling the FTS3 syntax to Query structures is done by the parseQuery()
** function.
*/
typedef struct QueryTerm {
  short int nPhrase; /* How many following terms are part of the same phrase */
  short int iPhrase; /* This is the i-th term of a phrase. */
  short int iColumn; /* Column of the index that must match this term */
  short int nNear;   /* term followed by a NEAR operator with span=(nNear-1) */
  signed char isOr;  /* this term is preceded by "OR" */
  signed char isNot; /* this term is preceded by "-" */
  signed char isPrefix; /* this term is followed by "*" */
  char *pTerm;       /* text of the term.  '\000' terminated.  malloced */
  int nTerm;         /* Number of bytes in pTerm[] */
} QueryTerm;


/* A query string is parsed into a Query structure.
 *
 * We could, in theory, allow query strings to be complicated
 * nested expressions with precedence determined by parentheses.
 * But none of the major search engines do this.  (Perhaps the
 * feeling is that an parenthesized expression is two complex of
 * an idea for the average user to grasp.)  Taking our lead from
 * the major search engines, we will allow queries to be a list
 * of terms (with an implied AND operator) or phrases in double-quotes,
 * with a single optional "-" before each non-phrase term to designate
 * negation and an optional OR connector.
 *
 * OR binds more tightly than the implied AND, which is what the
 * major search engines seem to do.  So, for example:
 * 
 *    [one two OR three]     ==>    one AND (two OR three)
 *    [one OR two three]     ==>    (one OR two) AND three
 *
 * A "-" before a term matches all entries that lack that term.
 * The "-" must occur immediately before the term with in intervening
 * space.  This is how the search engines do it.
 *
 * A NOT term cannot be the right-hand operand of an OR.  If this
 * occurs in the query string, the NOT is ignored:
 *
 *    [one OR -two]          ==>    one OR two
 *
 */
typedef struct Query {
  fulltext_vtab *pFts;  /* The full text index */
  int nTerms;           /* Number of terms in the query */
  QueryTerm *pTerms;    /* Array of terms.  Space obtained from malloc() */
  int nextIsOr;         /* Set the isOr flag on the next inserted term */
  int nextIsNear;       /* Set the isOr flag on the next inserted term */
  int nextColumn;       /* Next word parsed must be in this column */
  int dfltColumn;       /* The default column */
} Query;


/*
** An instance of the following structure keeps track of generated
** matching-word offset information and snippets.
*/
typedef struct Snippet {
  int nMatch;     /* Total number of matches */
................................................................................
** is destroyed by xClose.
*/
typedef struct fulltext_cursor {
  sqlite3_vtab_cursor base;        /* Base class used by SQLite core */
  QueryType iCursorType;           /* Copy of sqlite3_index_info.idxNum */
  sqlite3_stmt *pStmt;             /* Prepared statement in use by the cursor */
  int eof;                         /* True if at End Of Results */
  Query q;                         /* Parsed query string */
  Snippet snippet;                 /* Cached snippet for the current row */
  int iColumn;                     /* Column being searched */
  DataBuffer result;               /* Doclist results from fulltextQuery */
  DLReader reader;                 /* Result reader if result not empty */
} fulltext_cursor;

static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){
  return (fulltext_vtab *) c->base.pVtab;
}

static const sqlite3_module fts3Module;   /* forward declaration */

/* Return a dynamically generated statement of the form
 *   insert into %_content (docid, ...) values (?, ...)
................................................................................
    FTSTRACE(("FTS3 Open %p: %p\n", pVTab, c));
    return SQLITE_OK;
  }else{
    return SQLITE_NOMEM;
  }
}


/* Free all of the dynamically allocated memory held by *q
*/
static void queryClear(Query *q){
  int i;
  for(i = 0; i < q->nTerms; ++i){
    sqlite3_free(q->pTerms[i].pTerm);
  }
  sqlite3_free(q->pTerms);
  CLEAR(q);
}

/* Free all of the dynamically allocated memory held by the
** Snippet
*/
static void snippetClear(Snippet *p){
  sqlite3_free(p->aMatch);
  sqlite3_free(p->zOffset);
  sqlite3_free(p->zSnippet);
  CLEAR(p);
}

/*
** Append a single entry to the p->aMatch[] log.
*/
static void snippetAppendMatch(
  Snippet *p,               /* Append the entry to this snippet */
  int iCol, int iTerm,      /* The column and query term */
  int iToken,               /* Matching token in document */
................................................................................
}

/*
** Sizing information for the circular buffer used in snippetOffsetsOfColumn()
*/
#define FTS3_ROTOR_SZ   (32)
#define FTS3_ROTOR_MASK (FTS3_ROTOR_SZ-1)








































/*
** Add entries to pSnippet->aMatch[] for every match that occurs against
** document zDoc[0..nDoc-1] which is stored in column iColumn.
*/
static void snippetOffsetsOfColumn(
  Query *pQuery,
  Snippet *pSnippet,
  int iColumn,
  const char *zDoc,
  int nDoc
){
  const sqlite3_tokenizer_module *pTModule;  /* The tokenizer module */
  sqlite3_tokenizer *pTokenizer;             /* The specific tokenizer */
  sqlite3_tokenizer_cursor *pTCursor;        /* Tokenizer cursor */
  fulltext_vtab *pVtab;                /* The full text index */
  int nColumn;                         /* Number of columns in the index */
  const QueryTerm *aTerm;              /* Query string terms */
  int nTerm;                           /* Number of query string terms */  
  int i, j;                            /* Loop counters */
  int rc;                              /* Return code */
  unsigned int match, prevMatch;       /* Phrase search bitmasks */
  const char *zToken;                  /* Next token from the tokenizer */
  int nToken;                          /* Size of zToken */
  int iBegin, iEnd, iPos;              /* Offsets of beginning and end */

  /* The following variables keep a circular buffer of the last
  ** few tokens */
  unsigned int iRotor = 0;             /* Index of current token */
  int iRotorBegin[FTS3_ROTOR_SZ];      /* Beginning offset of token */
  int iRotorLen[FTS3_ROTOR_SZ];        /* Length of token */

  pVtab = pQuery->pFts;
  nColumn = pVtab->nColumn;
  pTokenizer = pVtab->pTokenizer;
  pTModule = pTokenizer->pModule;
  rc = pTModule->xOpen(pTokenizer, zDoc, nDoc, &pTCursor);
  if( rc ) return;
  pTCursor->pTokenizer = pTokenizer;
  aTerm = pQuery->pTerms;
  nTerm = pQuery->nTerms;
  if( nTerm>=FTS3_ROTOR_SZ ){
    nTerm = FTS3_ROTOR_SZ - 1;
  }
  prevMatch = 0;
  while(1){
    rc = pTModule->xNext(pTCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos);
    if( rc ) break;


    iRotorBegin[iRotor&FTS3_ROTOR_MASK] = iBegin;
    iRotorLen[iRotor&FTS3_ROTOR_MASK] = iEnd-iBegin;
    match = 0;
    for(i=0; i<nTerm; i++){
      int iCol;
      iCol = aTerm[i].iColumn;
      if( iCol>=0 && iCol<nColumn && iCol!=iColumn ) continue;
      if( aTerm[i].nTerm>nToken ) continue;
      if( !aTerm[i].isPrefix && aTerm[i].nTerm<nToken ) continue;
      assert( aTerm[i].nTerm<=nToken );
      if( memcmp(aTerm[i].pTerm, zToken, aTerm[i].nTerm) ) continue;
      if( aTerm[i].iPhrase>1 && (prevMatch & (1<<i))==0 ) continue;
      match |= 1<<i;
      if( i==nTerm-1 || aTerm[i+1].iPhrase==1 ){
        for(j=aTerm[i].iPhrase-1; j>=0; j--){

          int k = (iRotor-j) & FTS3_ROTOR_MASK;
          snippetAppendMatch(pSnippet, iColumn, i-j, iPos-j,
                iRotorBegin[k], iRotorLen[k]);
        }
      }
    }
    prevMatch = match<<1;
................................................................................
** and the query is:
** 
**     A NEAR/0 E
**
** then when this function is called the Snippet contains token offsets
** 0, 4 and 5. This function removes the "0" entry (because the first A
** is not near enough to an E).





*/


static void trimSnippetOffsetsForNear(Query *pQuery, Snippet *pSnippet){
  int ii;
  int iDir = 1;

  while(iDir>-2) {
    assert( iDir==1 || iDir==-1 );
    for(ii=0; ii<pSnippet->nMatch; ii++){
      int jj;
      int nNear;
      struct snippetMatch *pMatch = &pSnippet->aMatch[ii];
      QueryTerm *pQueryTerm = &pQuery->pTerms[pMatch->iTerm];

      if( (pMatch->iTerm+iDir)<0 
       || (pMatch->iTerm+iDir)>=pQuery->nTerms
      ){
        continue;



      }
     



      nNear = pQueryTerm->nNear;
      if( iDir<0 ){
        nNear = pQueryTerm[-1].nNear;



















      }



  
      if( pMatch->iTerm>=0 && nNear ){



        int isOk = 0;
        int iNextTerm = pMatch->iTerm+iDir;
        int iPrevTerm = iNextTerm;

        int iEndToken;
        int iStartToken;

        if( iDir<0 ){
          int nPhrase = 1;
          iStartToken = pMatch->iToken;
          while( (pMatch->iTerm+nPhrase)<pQuery->nTerms 
              && pQuery->pTerms[pMatch->iTerm+nPhrase].iPhrase>1 














          ){
            nPhrase++;

          }
          iEndToken = iStartToken + nPhrase - 1;
        }else{
          iEndToken   = pMatch->iToken;
          iStartToken = pMatch->iToken+1-pQueryTerm->iPhrase;
        }

        while( pQuery->pTerms[iNextTerm].iPhrase>1 ){
          iNextTerm--;




        }
        while( (iPrevTerm+1)<pQuery->nTerms && 
               pQuery->pTerms[iPrevTerm+1].iPhrase>1 
        ){
          iPrevTerm++;

        }
  


        for(jj=0; isOk==0 && jj<pSnippet->nMatch; jj++){
          struct snippetMatch *p = &pSnippet->aMatch[jj];
          if( p->iCol==pMatch->iCol && ((
               p->iTerm==iNextTerm && 
               p->iToken>iEndToken && 
               p->iToken<=iEndToken+nNear
          ) || (
               p->iTerm==iPrevTerm && 
               p->iToken<iStartToken && 
               p->iToken>=iStartToken-nNear
          ))){



            isOk = 1;
          }
        }

        if( !isOk ){
          for(jj=1-pQueryTerm->iPhrase; jj<=0; jj++){


            pMatch[jj].iTerm = -1;
          }
          ii = -1;
          iDir = 1;

        }
      }
    }
    iDir -= 2;

  }







}

/*
** Compute all offsets for the current row of the query.  
** If the offsets have already been computed, this routine is a no-op.
*/
static void snippetAllOffsets(fulltext_cursor *p){
  int nColumn;
  int iColumn, i;
  int iFirst, iLast;

  fulltext_vtab *pFts;

  if( p->snippet.nMatch ) return;
  if( p->q.nTerms==0 ) return;
  pFts = p->q.pFts;

  nColumn = pFts->nColumn;
  iColumn = (p->iCursorType - QUERY_FULLTEXT);
  if( iColumn<0 || iColumn>=nColumn ){
    iFirst = 0;
    iLast = nColumn-1;
  }else{
    iFirst = iColumn;
................................................................................
    iLast = iColumn;
  }
  for(i=iFirst; i<=iLast; i++){
    const char *zDoc;
    int nDoc;
    zDoc = (const char*)sqlite3_column_text(p->pStmt, i+1);
    nDoc = sqlite3_column_bytes(p->pStmt, i+1);
    snippetOffsetsOfColumn(&p->q, &p->snippet, i, zDoc, nDoc);
  }

  trimSnippetOffsetsForNear(&p->q, &p->snippet);


}

/*
** Convert the information in the aMatch[] array of the snippet
** into the string zOffset[0..nOffset-1].

*/
static void snippetOffsetText(Snippet *p){
  int i;
  int cnt = 0;
  StringBuffer sb;
  char zBuf[200];
  if( p->zOffset ) return;
................................................................................
  nMatch = pCursor->snippet.nMatch;
  initStringBuffer(&sb);

  for(i=0; i<nMatch; i++){
    aMatch[i].snStatus = SNIPPET_IGNORE;
  }
  nDesired = 0;
  for(i=0; i<pCursor->q.nTerms; i++){
    for(j=0; j<nMatch; j++){
      if( aMatch[j].iTerm==i ){
        aMatch[j].snStatus = SNIPPET_DESIRED;
        nDesired++;
        break;
      }
    }
................................................................................
** Close the cursor.  For additional information see the documentation
** on the xClose method of the virtual table interface.
*/
static int fulltextClose(sqlite3_vtab_cursor *pCursor){
  fulltext_cursor *c = (fulltext_cursor *) pCursor;
  FTSTRACE(("FTS3 Close %p\n", c));
  sqlite3_finalize(c->pStmt);
  queryClear(&c->q);
  snippetClear(&c->snippet);
  if( c->result.nData!=0 ) dlrDestroy(&c->reader);


  dataBufferDestroy(&c->result);
  sqlite3_free(c);
  return SQLITE_OK;
}

static int fulltextNext(sqlite3_vtab_cursor *pCursor){
  fulltext_cursor *c = (fulltext_cursor *) pCursor;
................................................................................
** another file, term_select() could be pushed above
** docListOfTerm().
*/
static int termSelect(fulltext_vtab *v, int iColumn,
                      const char *pTerm, int nTerm, int isPrefix,
                      DocListType iType, DataBuffer *out);

/* Return a DocList corresponding to the query term *pTerm.  If *pTerm
** is the first term of a phrase query, go ahead and evaluate the phrase
** query and return the doclist for the entire phrase query.
**
** The resulting DL_DOCIDS doclist is stored in pResult, which is
** overwritten.
*/
static int docListOfTerm(
  fulltext_vtab *v,    /* The full text index */
  int iColumn,         /* column to restrict to.  No restriction if >=nColumn */
  QueryTerm *pQTerm,   /* Term we are looking for, or 1st term of a phrase */
  DataBuffer *pResult  /* Write the result here */
){
  DataBuffer left, right, new;
  int i, rc;







  /* No phrase search if no position info. */
  assert( pQTerm->nPhrase==0 || DL_DEFAULT!=DL_DOCIDS );

  /* This code should never be called with buffered updates. */
  assert( v->nPendingData<0 );


  dataBufferInit(&left, 0);
  rc = termSelect(v, iColumn, pQTerm->pTerm, pQTerm->nTerm, pQTerm->isPrefix,
                  (0<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS), &left);
  if( rc ) return rc;
  for(i=1; i<=pQTerm->nPhrase && left.nData>0; i++){
    /* If this token is connected to the next by a NEAR operator, and
    ** the next token is the start of a phrase, then set nPhraseRight
    ** to the number of tokens in the phrase. Otherwise leave it at 1.
    */

    int nPhraseRight = 1;
    while( (i+nPhraseRight)<=pQTerm->nPhrase 
        && pQTerm[i+nPhraseRight].nNear==0 
    ){
      nPhraseRight++;
    }

    dataBufferInit(&right, 0);
    rc = termSelect(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm,
                    pQTerm[i].isPrefix, DL_POSITIONS, &right);
    if( rc ){
      dataBufferDestroy(&left);
      return rc;


    }
    dataBufferInit(&new, 0);
    docListPhraseMerge(left.pData, left.nData, right.pData, right.nData,
                       pQTerm[i-1].nNear, pQTerm[i-1].iPhrase + nPhraseRight,
                       ((i<pQTerm->nPhrase) ? DL_POSITIONS : DL_DOCIDS),

                       &new);
    dataBufferDestroy(&left);
    dataBufferDestroy(&right);
    left = new;
  }
  *pResult = left;
  return SQLITE_OK;
}

/* Add a new term pTerm[0..nTerm-1] to the query *q.
*/
static void queryAdd(Query *q, const char *pTerm, int nTerm){
  QueryTerm *t;
  ++q->nTerms;
  q->pTerms = sqlite3_realloc(q->pTerms, q->nTerms * sizeof(q->pTerms[0]));
  if( q->pTerms==0 ){
    q->nTerms = 0;
    return;
  }
  t = &q->pTerms[q->nTerms - 1];
  CLEAR(t);
  t->pTerm = sqlite3_malloc(nTerm+1);
  memcpy(t->pTerm, pTerm, nTerm);
  t->pTerm[nTerm] = 0;
  t->nTerm = nTerm;
  t->isOr = q->nextIsOr;
  t->isPrefix = 0;
  q->nextIsOr = 0;
  t->iColumn = q->nextColumn;
  q->nextColumn = q->dfltColumn;
}

/*
** Check to see if the string zToken[0...nToken-1] matches any
** column name in the virtual table.   If it does,
** return the zero-indexed column number.  If not, return -1.
*/
static int checkColumnSpecifier(
  fulltext_vtab *pVtab,    /* The virtual table */
  const char *zToken,      /* Text of the token */
  int nToken               /* Number of characters in the token */
){
  int i;
  for(i=0; i<pVtab->nColumn; i++){
    if( memcmp(pVtab->azColumn[i], zToken, nToken)==0
        && pVtab->azColumn[i][nToken]==0 ){
      return i;
    }
  }
  return -1;
}

/*
** Parse the text at zSegment[0..nSegment-1].  Add additional terms
** to the query being assemblied in pQuery.
**
** inPhrase is true if zSegment[0..nSegement-1] is contained within
** double-quotes.  If inPhrase is true, then the first term
** is marked with the number of terms in the phrase less one and
** OR and "-" syntax is ignored.  If inPhrase is false, then every
** term found is marked with nPhrase=0 and OR and "-" syntax is significant.


*/
static int tokenizeSegment(
  sqlite3_tokenizer *pTokenizer,          /* The tokenizer to use */
  const char *zSegment, int nSegment,     /* Query expression being parsed */
  int inPhrase,                           /* True if within "..." */
  Query *pQuery                           /* Append results here */



){
  const sqlite3_tokenizer_module *pModule = pTokenizer->pModule;
  sqlite3_tokenizer_cursor *pCursor;
  int firstIndex = pQuery->nTerms;
  int iCol;
  int nTerm = 1;
  
  int rc = pModule->xOpen(pTokenizer, zSegment, nSegment, &pCursor);
  if( rc!=SQLITE_OK ) return rc;
  pCursor->pTokenizer = pTokenizer;

  while( 1 ){
    const char *zToken;
    int nToken, iBegin, iEnd, iPos;

    rc = pModule->xNext(pCursor,
                        &zToken, &nToken,
                        &iBegin, &iEnd, &iPos);
    if( rc!=SQLITE_OK ) break;
    if( !inPhrase &&
        zSegment[iEnd]==':' &&
         (iCol = checkColumnSpecifier(pQuery->pFts, zToken, nToken))>=0 ){
      pQuery->nextColumn = iCol;
      continue;
    }
    if( !inPhrase && pQuery->nTerms>0 && nToken==2 
     && zSegment[iBegin+0]=='O'
     && zSegment[iBegin+1]=='R' 
    ){
      pQuery->nextIsOr = 1;
      continue;





    }
    if( !inPhrase && pQuery->nTerms>0 && !pQuery->nextIsOr && nToken==4 
      && memcmp(&zSegment[iBegin], "NEAR", 4)==0
    ){
      QueryTerm *pTerm = &pQuery->pTerms[pQuery->nTerms-1];
      if( (iBegin+6)<nSegment 
       && zSegment[iBegin+4] == '/'
       && isdigit(zSegment[iBegin+5])
      ){
        int k;
        pTerm->nNear = 0;
        for(k=5; (iBegin+k)<=nSegment && isdigit(zSegment[iBegin+k]); k++){
          pTerm->nNear = pTerm->nNear*10 + (zSegment[iBegin+k] - '0');





        }
        pModule->xNext(pCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos);

      } else {
        pTerm->nNear = SQLITE_FTS3_DEFAULT_NEAR_PARAM;


      }
      pTerm->nNear++;
      continue;
    }

    queryAdd(pQuery, zToken, nToken);
    if( !inPhrase && iBegin>0 && zSegment[iBegin-1]=='-' ){
      pQuery->pTerms[pQuery->nTerms-1].isNot = 1;
    }
    if( iEnd<nSegment && zSegment[iEnd]=='*' ){
      pQuery->pTerms[pQuery->nTerms-1].isPrefix = 1;
    }
    pQuery->pTerms[pQuery->nTerms-1].iPhrase = nTerm;
    if( inPhrase ){
      nTerm++;
    }
  }

  if( inPhrase && pQuery->nTerms>firstIndex ){
    pQuery->pTerms[firstIndex].nPhrase = pQuery->nTerms - firstIndex - 1;
  }

  return pModule->xClose(pCursor);
}

/* Parse a query string, yielding a Query object pQuery.
**
** The calling function will need to queryClear() to clean up
** the dynamically allocated memory held by pQuery.
*/
static int parseQuery(
  fulltext_vtab *v,        /* The fulltext index */
  const char *zInput,      /* Input text of the query string */
  int nInput,              /* Size of the input text */
  int dfltColumn,          /* Default column of the index to match against */
  Query *pQuery            /* Write the parse results here. */



){
  int iInput, inPhrase = 0;


  int ii;
  QueryTerm *aTerm;





  if( zInput==0 ) nInput = 0;
  if( nInput<0 ) nInput = strlen(zInput);
  pQuery->nTerms = 0;
  pQuery->pTerms = NULL;
  pQuery->nextIsOr = 0;
  pQuery->nextColumn = dfltColumn;
  pQuery->dfltColumn = dfltColumn;
  pQuery->pFts = v;




  for(iInput=0; iInput<nInput; ++iInput){
    int i;
    for(i=iInput; i<nInput && zInput[i]!='"'; ++i){}
    if( i>iInput ){
      tokenizeSegment(v->pTokenizer, zInput+iInput, i-iInput, inPhrase,





                       pQuery);

    }
    iInput = i;
    if( i<nInput ){
      assert( zInput[i]=='"' );
      inPhrase = !inPhrase;



    }



  }




  if( inPhrase ){
    /* unmatched quote */
    queryClear(pQuery);
    return SQLITE_ERROR;
  }

  /* Modify the values of the QueryTerm.nPhrase variables to account for
  ** the NEAR operator. For the purposes of QueryTerm.nPhrase, phrases
  ** and tokens connected by the NEAR operator are handled as a single
  ** phrase. See comments above the QueryTerm structure for details.
  */
  aTerm = pQuery->pTerms;
  for(ii=0; ii<pQuery->nTerms; ii++){
    if( aTerm[ii].nNear || aTerm[ii].nPhrase ){
      while (aTerm[ii+aTerm[ii].nPhrase].nNear) {
        aTerm[ii].nPhrase += (1 + aTerm[ii+aTerm[ii].nPhrase+1].nPhrase);


      }
    }
  }

  return SQLITE_OK;
}

/* TODO(shess) Refactor the code to remove this forward decl. */
static int flushPendingTerms(fulltext_vtab *v);

/* Perform a full-text query using the search expression in
** zInput[0..nInput-1].  Return a list of matching documents
................................................................................
*/
static int fulltextQuery(
  fulltext_vtab *v,      /* The full text index */
  int iColumn,           /* Match against this column by default */
  const char *zInput,    /* The query string */
  int nInput,            /* Number of bytes in zInput[] */
  DataBuffer *pResult,   /* Write the result doclist here */
  Query *pQuery          /* Put parsed query string here */
){
  int i, iNext, rc;
  DataBuffer left, right, or, new;
  int nNot = 0;
  QueryTerm *aTerm;

  /* TODO(shess) Instead of flushing pendingTerms, we could query for
  ** the relevant term and merge the doclist into what we receive from
  ** the database.  Wait and see if this is a common issue, first.
  **
  ** A good reason not to flush is to not generate update-related
  ** error codes from here.
  */

  /* Flush any buffered updates before executing the query. */
  rc = flushPendingTerms(v);
  if( rc!=SQLITE_OK ) return rc;

  /* TODO(shess) I think that the queryClear() calls below are not
  ** necessary, because fulltextClose() already clears the query.
  */
  rc = parseQuery(v, zInput, nInput, iColumn, pQuery);
  if( rc!=SQLITE_OK ) return rc;

  /* Empty or NULL queries return no results. */
  if( pQuery->nTerms==0 ){
    dataBufferInit(pResult, 0);
    return SQLITE_OK;
  }

  /* Merge AND terms. */
  /* TODO(shess) I think we can early-exit if( i>nNot && left.nData==0 ). */
  aTerm = pQuery->pTerms;
  for(i = 0; i<pQuery->nTerms; i=iNext){
    if( aTerm[i].isNot ){
      /* Handle all NOT terms in a separate pass */
      nNot++;
      iNext = i + aTerm[i].nPhrase+1;
      continue;
    }
    iNext = i + aTerm[i].nPhrase + 1;
    rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &right);
    if( rc ){
      if( i!=nNot ) dataBufferDestroy(&left);
      queryClear(pQuery);
      return rc;
    }
    while( iNext<pQuery->nTerms && aTerm[iNext].isOr ){
      rc = docListOfTerm(v, aTerm[iNext].iColumn, &aTerm[iNext], &or);
      iNext += aTerm[iNext].nPhrase + 1;
      if( rc ){
        if( i!=nNot ) dataBufferDestroy(&left);
        dataBufferDestroy(&right);
        queryClear(pQuery);
        return rc;
      }
      dataBufferInit(&new, 0);
      docListOrMerge(right.pData, right.nData, or.pData, or.nData, &new);
      dataBufferDestroy(&right);
      dataBufferDestroy(&or);
      right = new;
    }
    if( i==nNot ){           /* first term processed. */
      left = right;
    }else{
      dataBufferInit(&new, 0);
      docListAndMerge(left.pData, left.nData, right.pData, right.nData, &new);
      dataBufferDestroy(&right);
      dataBufferDestroy(&left);
      left = new;
    }
  }

  if( nNot==pQuery->nTerms ){
    /* We do not yet know how to handle a query of only NOT terms */
    return SQLITE_ERROR;
  }

  /* Do the EXCEPT terms */
  for(i=0; i<pQuery->nTerms;  i += aTerm[i].nPhrase + 1){
    if( !aTerm[i].isNot ) continue;
    rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &right);




    if( rc ){
      queryClear(pQuery);
      dataBufferDestroy(&left);

      return rc;
    }
    dataBufferInit(&new, 0);
    docListExceptMerge(left.pData, left.nData, right.pData, right.nData, &new);
    dataBufferDestroy(&right);
    dataBufferDestroy(&left);
    left = new;
  }

  *pResult = left;
  return rc;

}

/*
** This is the xFilter interface for the virtual table.  See
** the virtual table xFilter method documentation for additional
** information.
**
................................................................................
    case QUERY_DOCID:
      rc = sqlite3_bind_int64(c->pStmt, 1, sqlite3_value_int64(argv[0]));
      if( rc!=SQLITE_OK ) return rc;
      break;

    default:   /* full-text search */
    {

      const char *zQuery = (const char *)sqlite3_value_text(argv[0]);
      assert( idxNum<=QUERY_FULLTEXT+v->nColumn);
      assert( argc==1 );
      queryClear(&c->q);
      if( c->result.nData!=0 ){
        /* This case happens if the same cursor is used repeatedly. */
        dlrDestroy(&c->reader);
        dataBufferReset(&c->result);
      }else{
        dataBufferInit(&c->result, 0);
      }
      rc = fulltextQuery(v, idxNum-QUERY_FULLTEXT, zQuery, -1, &c->result, &c->q);
      if( rc!=SQLITE_OK ) return rc;
      if( c->result.nData!=0 ){
        dlrInit(&c->reader, DL_DOCIDS, c->result.pData, c->result.nData);
      }
      break;
    }
  }
................................................................................
  dataBufferDestroy(&result);
  return rc;
}

/* Scan the database and merge together the posting lists for the term
** into *out.
*/
static int termSelect(fulltext_vtab *v, int iColumn,


                      const char *pTerm, int nTerm, int isPrefix,

                      DocListType iType, DataBuffer *out){


  DataBuffer doclist;
  sqlite3_stmt *s;
  int rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s);
  if( rc!=SQLITE_OK ) return rc;

  /* This code should never be called with buffered updates. */
  assert( v->nPendingData<0 );

  dataBufferInit(&doclist, 0);


  /* Traverse the segments from oldest to newest so that newer doclist
  ** elements for given docids overwrite older elements.
  */
  while( (rc = sqlite3_step(s))==SQLITE_ROW ){
    const char *pData = sqlite3_column_blob(s, 2);
    const int nData = sqlite3_column_bytes(s, 2);
................................................................................
    if( sqlite3Fts3HashInsert(pHash, "simple", 7, (void *)pSimple)
     || sqlite3Fts3HashInsert(pHash, "porter", 7, (void *)pPorter) 
     || (pIcu && sqlite3Fts3HashInsert(pHash, "icu", 4, (void *)pIcu))
    ){
      rc = SQLITE_NOMEM;
    }
  }





  /* Create the virtual table wrapper around the hash-table and overload 
  ** the two scalar functions. If this is successful, register the
  ** module with sqlite.
  */
  if( SQLITE_OK==rc 
   && SQLITE_OK==(rc = sqlite3Fts3InitHashTable(db, pHash, "fts3_tokenizer"))







>







 







<
<
<
<
<







 







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







 







|






|







 







<
<
<
<
<
<
<
<
<
<
<
<









>







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>






|










<
<













|






<
<
<
<
|

<
|
<
>
>



|
|
|
|
|
|
|
|
|
|
|
|
>







 







>
>
>
>
>

>
>
|
|
<
<
<
<
<
<
<
<
<
<
<
<
|
<
>
>
>
|
|
>
>
>
|
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
|
<
>
>
>
|
<
<
<
<
<
<
<
<
<
<
<
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
<
>
|
<
<
<
<
|
|
<
<
>
>
>
>
|
<
<
<
<
>
|
|
>
>
|
|
<
|
<
<
<
<
<
<
<
>
>
>
|
|
|
>
|
<
>
>
|
|
<
<
>
|
|
|
<
>
|
>
>
>
>
>
>
>










>
|

|
|
<
>







 







|


|
>
>




|
>







 







|







 







|

|
>
>







 







|
|
<




|
|
|
|
|

<
|
>
>
>
>
>
>
|
<
<


|

>
|
<
<
<
<
|
|
|
<
>
|
|
|
<
<
<
<
|
<
<
<
<
<
>
>
|
<
|
<
<
>
|
|
|
<
|
<
<
|
|
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
|
|
<
<
<

<
<
<
<
<
<
<
<
>
>

|
<
<
<
<
>
>
>

<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
|
<
<
<
<
<
<
>
>
>
>
>
|
<
<
<
<
<
<
<
<
<
<
<
<
>
>
>
>
>
|
<
>
|
<
>
>
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
>
>
>
|
<
>
>
|
<
>
>
>
>
|
<
<
<
<
<
<
<
<
>
>
>
|
<
<
<
<
<
>
>
>
>
>
|
>
|
<
<
<
<
>
>
>
|
>
>
>
|
>
>
>
|
<
<
<
<
|
|
<
<
<
<
<
<
<
<
<
<
>
>
|
|
|
<
|







 







|

<
<
|
<











|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
|
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
>
>
>
>
|
<
<
>
|
|
<
<
<
<
<
|
<
<
<
>







 







>



<







|







 







|
>
>
|
>
|
>
>









>







 







>
>
>
>







281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
...
309
310
311
312
313
314
315





316
317
318
319
320
321
322
....
1779
1780
1781
1782
1783
1784
1785




















































































1786
1787
1788
1789
1790
1791
1792
....
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
....
3085
3086
3087
3088
3089
3090
3091












3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
....
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191


3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211




3212
3213

3214

3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
....
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271












3272

3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307

3308
3309
3310
3311











3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326

3327
3328




3329
3330


3331
3332
3333
3334
3335




3336
3337
3338
3339
3340
3341
3342

3343







3344
3345
3346
3347
3348
3349
3350
3351

3352
3353
3354
3355


3356
3357
3358
3359

3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383

3384
3385
3386
3387
3388
3389
3390
3391
....
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
....
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
....
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
....
3667
3668
3669
3670
3671
3672
3673
3674
3675

3676
3677
3678
3679
3680
3681
3682
3683
3684
3685

3686
3687
3688
3689
3690
3691
3692
3693


3694
3695
3696
3697
3698
3699




3700
3701
3702

3703
3704
3705
3706




3707





3708
3709
3710

3711


3712
3713
3714
3715

3716


3717
3718









3719



























3720
3721
3722



3723








3724
3725
3726
3727




3728
3729
3730
3731

















3732





3733






3734
3735
3736
3737
3738
3739












3740
3741
3742
3743
3744
3745

3746
3747

3748
3749
3750



































3751
3752
3753
3754

3755
3756
3757

3758
3759
3760
3761
3762








3763
3764
3765
3766





3767
3768
3769
3770
3771
3772
3773
3774




3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786




3787
3788










3789
3790
3791
3792
3793

3794
3795
3796
3797
3798
3799
3800
3801
....
3806
3807
3808
3809
3810
3811
3812
3813
3814


3815

3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827




























3828
3829








3830


























3831
3832
3833
3834
3835


3836
3837
3838





3839



3840
3841
3842
3843
3844
3845
3846
3847
....
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923

3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
....
5808
5809
5810
5811
5812
5813
5814
5815
5816
5817
5818
5819
5820
5821
5822
5823
5824
5825
5826
5827
5828
5829
5830
5831
5832
5833
5834
5835
5836
5837
5838
5839
....
6942
6943
6944
6945
6946
6947
6948
6949
6950
6951
6952
6953
6954
6955
6956
6957
6958
6959
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>

#include "fts3.h"
#include "fts3_expr.h"
#include "fts3_hash.h"
#include "fts3_tokenizer.h"
#ifndef SQLITE_CORE 
# include "sqlite3ext.h"
  SQLITE_EXTENSION_INIT1
#endif

................................................................................

#if 0
# define FTSTRACE(A)  printf A; fflush(stdout)
#else
# define FTSTRACE(A)
#endif






/* It is not safe to call isspace(), tolower(), or isalnum() on
** hi-bit-set characters.  This is the same solution used in the
** tokenizer.
*/
/* TODO(shess) The snippet-generation code should be using the
** tokenizer-generated tokens rather than doing its own local
** tokenization.
................................................................................
  return rc;
}

/* end utility functions */

/* Forward reference */
typedef struct fulltext_vtab fulltext_vtab;





















































































/*
** An instance of the following structure keeps track of generated
** matching-word offset information and snippets.
*/
typedef struct Snippet {
  int nMatch;     /* Total number of matches */
................................................................................
** is destroyed by xClose.
*/
typedef struct fulltext_cursor {
  sqlite3_vtab_cursor base;        /* Base class used by SQLite core */
  QueryType iCursorType;           /* Copy of sqlite3_index_info.idxNum */
  sqlite3_stmt *pStmt;             /* Prepared statement in use by the cursor */
  int eof;                         /* True if at End Of Results */
  Fts3Expr *pExpr;                 /* Parsed MATCH query string */
  Snippet snippet;                 /* Cached snippet for the current row */
  int iColumn;                     /* Column being searched */
  DataBuffer result;               /* Doclist results from fulltextQuery */
  DLReader reader;                 /* Result reader if result not empty */
} fulltext_cursor;

static fulltext_vtab *cursor_vtab(fulltext_cursor *c){
  return (fulltext_vtab *) c->base.pVtab;
}

static const sqlite3_module fts3Module;   /* forward declaration */

/* Return a dynamically generated statement of the form
 *   insert into %_content (docid, ...) values (?, ...)
................................................................................
    FTSTRACE(("FTS3 Open %p: %p\n", pVTab, c));
    return SQLITE_OK;
  }else{
    return SQLITE_NOMEM;
  }
}













/* Free all of the dynamically allocated memory held by the
** Snippet
*/
static void snippetClear(Snippet *p){
  sqlite3_free(p->aMatch);
  sqlite3_free(p->zOffset);
  sqlite3_free(p->zSnippet);
  CLEAR(p);
}

/*
** Append a single entry to the p->aMatch[] log.
*/
static void snippetAppendMatch(
  Snippet *p,               /* Append the entry to this snippet */
  int iCol, int iTerm,      /* The column and query term */
  int iToken,               /* Matching token in document */
................................................................................
}

/*
** Sizing information for the circular buffer used in snippetOffsetsOfColumn()
*/
#define FTS3_ROTOR_SZ   (32)
#define FTS3_ROTOR_MASK (FTS3_ROTOR_SZ-1)

/*
** Function to iterate through the tokens of a compiled expression.
*/
static int nextExprToken(Fts3Expr **ppExpr, int *piToken){
  Fts3Expr *p = *ppExpr;
  int iToken = *piToken;
  if( iToken<0 ){
    /* In this case the expression p is the root of an expression tree.
    ** Move to the first token in the expression tree.
    */
    while( p->pLeft ){
      p = p->pLeft;
    }
    iToken = 0;
  }else{
    assert(p && p->eType==FTSQUERY_PHRASE );
    if( iToken<(p->pPhrase->nToken-1) ){
      iToken++;
    }else{
      iToken = 0;
      while( p->pParent && p->pParent->pLeft!=p ){
        assert( p->pParent->pRight==p );
        p = p->pParent;
      }
      p = p->pParent;
      if( p ){
        p = p->pRight;
        while( p->pLeft ){
          p = p->pLeft;
        }
      }
    }
  }

  *ppExpr = p;
  *piToken = iToken;
  return p?1:0;
}

/*
** Add entries to pSnippet->aMatch[] for every match that occurs against
** document zDoc[0..nDoc-1] which is stored in column iColumn.
*/
static void snippetOffsetsOfColumn(
  fulltext_cursor *pCur,
  Snippet *pSnippet,
  int iColumn,
  const char *zDoc,
  int nDoc
){
  const sqlite3_tokenizer_module *pTModule;  /* The tokenizer module */
  sqlite3_tokenizer *pTokenizer;             /* The specific tokenizer */
  sqlite3_tokenizer_cursor *pTCursor;        /* Tokenizer cursor */
  fulltext_vtab *pVtab;                /* The full text index */
  int nColumn;                         /* Number of columns in the index */


  int i, j;                            /* Loop counters */
  int rc;                              /* Return code */
  unsigned int match, prevMatch;       /* Phrase search bitmasks */
  const char *zToken;                  /* Next token from the tokenizer */
  int nToken;                          /* Size of zToken */
  int iBegin, iEnd, iPos;              /* Offsets of beginning and end */

  /* The following variables keep a circular buffer of the last
  ** few tokens */
  unsigned int iRotor = 0;             /* Index of current token */
  int iRotorBegin[FTS3_ROTOR_SZ];      /* Beginning offset of token */
  int iRotorLen[FTS3_ROTOR_SZ];        /* Length of token */

  pVtab = cursor_vtab(pCur);
  nColumn = pVtab->nColumn;
  pTokenizer = pVtab->pTokenizer;
  pTModule = pTokenizer->pModule;
  rc = pTModule->xOpen(pTokenizer, zDoc, nDoc, &pTCursor);
  if( rc ) return;
  pTCursor->pTokenizer = pTokenizer;





  prevMatch = 0;

  while( !pTModule->xNext(pTCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos) ){

    Fts3Expr *pIter = pCur->pExpr;
    int iIter = -1;
    iRotorBegin[iRotor&FTS3_ROTOR_MASK] = iBegin;
    iRotorLen[iRotor&FTS3_ROTOR_MASK] = iEnd-iBegin;
    match = 0;
    for(i=0; i<(FTS3_ROTOR_SZ-1) && nextExprToken(&pIter, &iIter); i++){
      int nPhrase = pIter->pPhrase->nToken;   /* Tokens in current phrase */
      struct PhraseToken *pToken = &pIter->pPhrase->aToken[iIter];
      int iCol = pIter->pPhrase->iColumn;
      if( iCol>=0 && iCol<nColumn && iCol!=iColumn ) continue;
      if( pToken->n>nToken ) continue;
      if( !pToken->isPrefix && pToken->n<nToken ) continue;
      assert( pToken->n<=nToken );
      if( memcmp(pToken->z, zToken, pToken->n) ) continue;
      if( iIter>0 && (prevMatch & (1<<i))==0 ) continue;
      match |= 1<<i;
      if( i==(FTS3_ROTOR_SZ-2) || nPhrase==iIter+1 ){
        for(j=nPhrase-1; j>=0; j--){
          int k = (iRotor-j) & FTS3_ROTOR_MASK;
          snippetAppendMatch(pSnippet, iColumn, i-j, iPos-j,
                iRotorBegin[k], iRotorLen[k]);
        }
      }
    }
    prevMatch = match<<1;
................................................................................
** and the query is:
** 
**     A NEAR/0 E
**
** then when this function is called the Snippet contains token offsets
** 0, 4 and 5. This function removes the "0" entry (because the first A
** is not near enough to an E).
**
** When this function is called, the value pointed to by parameter piLeft is
** the integer id of the left-most token in the expression tree headed by
** pExpr. This function increments *piLeft by the total number of tokens
** in the expression tree headed by pExpr.
*/
static int trimSnippetOffsets(
  Fts3Expr *pExpr, 
  Snippet *pSnippet,
  int *piLeft












){

  if( pExpr ){
    if( trimSnippetOffsets(pExpr->pLeft, pSnippet, piLeft) ){
      return 1;
    }

    switch( pExpr->eType ){
      case FTSQUERY_PHRASE:
        *piLeft += pExpr->pPhrase->nToken;
        break;
      case FTSQUERY_NEAR: {
        /* The right-hand-side of a NEAR operator is always a phrase. The
        ** left-hand-side is either a phrase or an expression tree that is 
        ** itself headed by a NEAR operator. The following initializations
        ** set local variable iLeft to the token number of the left-most
        ** token in the right-hand phrase, and iRight to the right most
        ** token in the same phrase. For example, if we had:
        **
        **     <col> MATCH '"abc def" NEAR/2 "ghi jkl"'
        **
        ** then iLeft will be set to 2 (token number of ghi) and nToken will
        ** be set to 4.
        */
        Fts3Expr *pLeft = pExpr->pLeft;
        Fts3Expr *pRight = pExpr->pRight;
        int iLeft = *piLeft;
        int nNear = pExpr->nNear;
        int nToken = pRight->pPhrase->nToken;
        int jj, ii;
        if( pLeft->eType==FTSQUERY_NEAR ){
          pLeft = pLeft->pRight;
        }
        assert( pRight->eType==FTSQUERY_PHRASE );
        assert( pLeft->eType==FTSQUERY_PHRASE );
        nToken += pLeft->pPhrase->nToken;


        for(ii=0; ii<pSnippet->nMatch; ii++){
          struct snippetMatch *p = &pSnippet->aMatch[ii];
          if( p->iTerm==iLeft ){
            int isOk = 0;











            /* Snippet ii is an occurence of query term iLeft in the document.
            ** It occurs at position (p->iToken) of the document. We now
            ** search for an instance of token (iLeft-1) somewhere in the 
            ** range (p->iToken - nNear)...(p->iToken + nNear + nToken) within 
            ** the set of snippetMatch structures. If one is found, proceed. 
            ** If one cannot be found, then remove snippets ii..(ii+N-1) 
            ** from the matching snippets, where N is the number of tokens 
            ** in phrase pRight->pPhrase.
            */
            for(jj=0; isOk==0 && jj<pSnippet->nMatch; jj++){
              struct snippetMatch *p2 = &pSnippet->aMatch[jj];
              if( p2->iTerm==(iLeft-1) ){
                if( p2->iToken>=(p->iToken-nNear-1) 
                 && p2->iToken<(p->iToken+nNear+nToken) 
                ){

                  isOk = 1;
                }




              }
            }


            if( !isOk ){
              int kk;
              for(kk=0; kk<pRight->pPhrase->nToken; kk++){
                pSnippet->aMatch[kk+ii].iTerm = -2;
              }




              return 1;
            }
          }
          if( p->iTerm==(iLeft-1) ){
            int isOk = 0;
            for(jj=0; isOk==0 && jj<pSnippet->nMatch; jj++){
              struct snippetMatch *p2 = &pSnippet->aMatch[jj];

              if( p2->iTerm==iLeft ){







                if( p2->iToken<=(p->iToken+nNear+1) 
                 && p2->iToken>(p->iToken-nNear-nToken) 
                ){
                  isOk = 1;
                }
              }
            }
            if( !isOk ){

              int kk;
              for(kk=0; kk<pLeft->pPhrase->nToken; kk++){
                pSnippet->aMatch[ii-kk].iTerm = -2;
              }


              return 1;
            }
          }
        }

        break;
      }
    }

    if( trimSnippetOffsets(pExpr->pRight, pSnippet, piLeft) ){
      return 1;
    }
  }
  return 0;
}

/*
** Compute all offsets for the current row of the query.  
** If the offsets have already been computed, this routine is a no-op.
*/
static void snippetAllOffsets(fulltext_cursor *p){
  int nColumn;
  int iColumn, i;
  int iFirst, iLast;
  int iTerm = 0;
  fulltext_vtab *pFts = cursor_vtab(p);

  if( p->snippet.nMatch || p->pExpr==0 ){
    return;

  }
  nColumn = pFts->nColumn;
  iColumn = (p->iCursorType - QUERY_FULLTEXT);
  if( iColumn<0 || iColumn>=nColumn ){
    iFirst = 0;
    iLast = nColumn-1;
  }else{
    iFirst = iColumn;
................................................................................
    iLast = iColumn;
  }
  for(i=iFirst; i<=iLast; i++){
    const char *zDoc;
    int nDoc;
    zDoc = (const char*)sqlite3_column_text(p->pStmt, i+1);
    nDoc = sqlite3_column_bytes(p->pStmt, i+1);
    snippetOffsetsOfColumn(p, &p->snippet, i, zDoc, nDoc);
  }

  while( trimSnippetOffsets(p->pExpr, &p->snippet, &iTerm) ){
    iTerm = 0;
  }
}

/*
** Convert the information in the aMatch[] array of the snippet
** into the string zOffset[0..nOffset-1]. This string is used as
** the return of the SQL offsets() function.
*/
static void snippetOffsetText(Snippet *p){
  int i;
  int cnt = 0;
  StringBuffer sb;
  char zBuf[200];
  if( p->zOffset ) return;
................................................................................
  nMatch = pCursor->snippet.nMatch;
  initStringBuffer(&sb);

  for(i=0; i<nMatch; i++){
    aMatch[i].snStatus = SNIPPET_IGNORE;
  }
  nDesired = 0;
  for(i=0; i<FTS3_ROTOR_SZ; i++){
    for(j=0; j<nMatch; j++){
      if( aMatch[j].iTerm==i ){
        aMatch[j].snStatus = SNIPPET_DESIRED;
        nDesired++;
        break;
      }
    }
................................................................................
** Close the cursor.  For additional information see the documentation
** on the xClose method of the virtual table interface.
*/
static int fulltextClose(sqlite3_vtab_cursor *pCursor){
  fulltext_cursor *c = (fulltext_cursor *) pCursor;
  FTSTRACE(("FTS3 Close %p\n", c));
  sqlite3_finalize(c->pStmt);
  sqlite3Fts3ExprFree(c->pExpr);
  snippetClear(&c->snippet);
  if( c->result.nData!=0 ){
    dlrDestroy(&c->reader);
  }
  dataBufferDestroy(&c->result);
  sqlite3_free(c);
  return SQLITE_OK;
}

static int fulltextNext(sqlite3_vtab_cursor *pCursor){
  fulltext_cursor *c = (fulltext_cursor *) pCursor;
................................................................................
** another file, term_select() could be pushed above
** docListOfTerm().
*/
static int termSelect(fulltext_vtab *v, int iColumn,
                      const char *pTerm, int nTerm, int isPrefix,
                      DocListType iType, DataBuffer *out);

/* 
** Return a DocList corresponding to the phrase *pPhrase.

**
** The resulting DL_DOCIDS doclist is stored in pResult, which is
** overwritten.
*/
static int docListOfPhrase(
  fulltext_vtab *pTab,   /* The full text index */
  Fts3Phrase *pPhrase,   /* Phrase to return a doclist corresponding to */
  DocListType eListType, /* Either DL_DOCIDS or DL_POSITIONS */
  DataBuffer *pResult    /* Write the result here */
){

  int ii;
  int rc = SQLITE_OK;
  int iCol = pPhrase->iColumn;
  DocListType eType = eListType;
  assert( eType==DL_POSITIONS || eType==DL_DOCIDS );
  if( pPhrase->nToken>1 ){
    eType = DL_POSITIONS;
  }



  /* This code should never be called with buffered updates. */
  assert( pTab->nPendingData<0 );

  for(ii=0; rc==SQLITE_OK && ii<pPhrase->nToken; ii++){
    DataBuffer tmp;




    struct PhraseToken *p = &pPhrase->aToken[ii];
    rc = termSelect(pTab, iCol, p->z, p->n, p->isPrefix, eType, &tmp);
    if( rc==SQLITE_OK ){

      if( ii==0 ){
        *pResult = tmp;
      }else{
        DataBuffer res = *pResult;




        dataBufferInit(pResult, 0);





        if( ii==(pPhrase->nToken-1) ){
          eType = eListType;
        }

        docListPhraseMerge(


          res.pData, res.nData, tmp.pData, tmp.nData, 0, 0, eType, pResult
        );
        dataBufferDestroy(&res);
        dataBufferDestroy(&tmp);

      }


    }
  }





































  return rc;
}




/*








** Evaluate the full-text expression pExpr against fts3 table pTab. Write
** the results into pRes.
*/
static int evalFts3Expr(




  fulltext_vtab *pTab,           /* Fts3 Virtual table object */
  Fts3Expr *pExpr,               /* Parsed fts3 expression */
  DataBuffer *pRes               /* OUT: Write results of the expression here */
){

















  int rc = SQLITE_OK;












  /* Initialize the output buffer. If this is an empty query (pExpr==0), 
  ** this is all that needs to be done. Empty queries produce empty 
  ** result sets.
  */
  dataBufferInit(pRes, 0);













  if( pExpr ){
    if( pExpr->eType==FTSQUERY_PHRASE ){
      DocListType eType = DL_DOCIDS;
      if( pExpr->pParent && pExpr->pParent->eType==FTSQUERY_NEAR ){
        eType = DL_POSITIONS;
      }

      rc = docListOfPhrase(pTab, pExpr->pPhrase, eType, pRes);
    }else{

      DataBuffer lhs;
      DataBuffer rhs;




































      dataBufferInit(&rhs, 0);
      if( SQLITE_OK==(rc = evalFts3Expr(pTab, pExpr->pLeft, &lhs)) 
       && SQLITE_OK==(rc = evalFts3Expr(pTab, pExpr->pRight, &rhs)) 
      ){

        switch( pExpr->eType ){
          case FTSQUERY_NEAR: {
            int nToken;

            Fts3Expr *pLeft;
            DocListType eType = DL_DOCIDS;
            if( pExpr->pParent && pExpr->pParent->eType==FTSQUERY_NEAR ){
              eType = DL_POSITIONS;
            }








            pLeft = pExpr->pLeft;
            while( pLeft->eType==FTSQUERY_NEAR ){ 
              pLeft=pLeft->pRight;
            }





            assert( pExpr->pRight->eType==FTSQUERY_PHRASE );
            assert( pLeft->eType==FTSQUERY_PHRASE );
            nToken = pLeft->pPhrase->nToken + pExpr->pRight->pPhrase->nToken;
            docListPhraseMerge(lhs.pData, lhs.nData, rhs.pData, rhs.nData, 
                pExpr->nNear+1, nToken, eType, pRes
            );
            break;
          }




          case FTSQUERY_NOT: {
            docListExceptMerge(lhs.pData, lhs.nData, rhs.pData, rhs.nData,pRes);
            break;
          }
          case FTSQUERY_AND: {
            docListAndMerge(lhs.pData, lhs.nData, rhs.pData, rhs.nData, pRes);
            break;
          }
          case FTSQUERY_OR: {
            docListOrMerge(lhs.pData, lhs.nData, rhs.pData, rhs.nData, pRes);
            break;
          }




        }
      }










      dataBufferDestroy(&lhs);
      dataBufferDestroy(&rhs);
    }
  }


  return rc;
}

/* TODO(shess) Refactor the code to remove this forward decl. */
static int flushPendingTerms(fulltext_vtab *v);

/* Perform a full-text query using the search expression in
** zInput[0..nInput-1].  Return a list of matching documents
................................................................................
*/
static int fulltextQuery(
  fulltext_vtab *v,      /* The full text index */
  int iColumn,           /* Match against this column by default */
  const char *zInput,    /* The query string */
  int nInput,            /* Number of bytes in zInput[] */
  DataBuffer *pResult,   /* Write the result doclist here */
  Fts3Expr **ppExpr        /* Put parsed query string here */
){


  int rc;


  /* TODO(shess) Instead of flushing pendingTerms, we could query for
  ** the relevant term and merge the doclist into what we receive from
  ** the database.  Wait and see if this is a common issue, first.
  **
  ** A good reason not to flush is to not generate update-related
  ** error codes from here.
  */

  /* Flush any buffered updates before executing the query. */
  rc = flushPendingTerms(v);
  if( rc!=SQLITE_OK ){




























    return rc;
  }



































  /* Parse the query passed to the MATCH operator. */
  rc = sqlite3Fts3ExprParse(v->pTokenizer, 
      v->azColumn, v->nColumn, iColumn, zInput, nInput, ppExpr
  );
  if( rc!=SQLITE_OK ){


    assert( 0==(*ppExpr) );
    return rc;
  }









  return evalFts3Expr(v, *ppExpr, pResult);
}

/*
** This is the xFilter interface for the virtual table.  See
** the virtual table xFilter method documentation for additional
** information.
**
................................................................................
    case QUERY_DOCID:
      rc = sqlite3_bind_int64(c->pStmt, 1, sqlite3_value_int64(argv[0]));
      if( rc!=SQLITE_OK ) return rc;
      break;

    default:   /* full-text search */
    {
      int iCol = idxNum-QUERY_FULLTEXT;
      const char *zQuery = (const char *)sqlite3_value_text(argv[0]);
      assert( idxNum<=QUERY_FULLTEXT+v->nColumn);
      assert( argc==1 );

      if( c->result.nData!=0 ){
        /* This case happens if the same cursor is used repeatedly. */
        dlrDestroy(&c->reader);
        dataBufferReset(&c->result);
      }else{
        dataBufferInit(&c->result, 0);
      }
      rc = fulltextQuery(v, iCol, zQuery, -1, &c->result, &c->pExpr);
      if( rc!=SQLITE_OK ) return rc;
      if( c->result.nData!=0 ){
        dlrInit(&c->reader, DL_DOCIDS, c->result.pData, c->result.nData);
      }
      break;
    }
  }
................................................................................
  dataBufferDestroy(&result);
  return rc;
}

/* Scan the database and merge together the posting lists for the term
** into *out.
*/
static int termSelect(
  fulltext_vtab *v, 
  int iColumn,
  const char *pTerm, int nTerm,             /* Term to query for */
  int isPrefix,                             /* True for a prefix search */
  DocListType iType, 
  DataBuffer *out                           /* Write results here */
){
  DataBuffer doclist;
  sqlite3_stmt *s;
  int rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s);
  if( rc!=SQLITE_OK ) return rc;

  /* This code should never be called with buffered updates. */
  assert( v->nPendingData<0 );

  dataBufferInit(&doclist, 0);
  dataBufferInit(out, 0);

  /* Traverse the segments from oldest to newest so that newer doclist
  ** elements for given docids overwrite older elements.
  */
  while( (rc = sqlite3_step(s))==SQLITE_ROW ){
    const char *pData = sqlite3_column_blob(s, 2);
    const int nData = sqlite3_column_bytes(s, 2);
................................................................................
    if( sqlite3Fts3HashInsert(pHash, "simple", 7, (void *)pSimple)
     || sqlite3Fts3HashInsert(pHash, "porter", 7, (void *)pPorter) 
     || (pIcu && sqlite3Fts3HashInsert(pHash, "icu", 4, (void *)pIcu))
    ){
      rc = SQLITE_NOMEM;
    }
  }

#ifdef SQLITE_TEST
  sqlite3Fts3ExprInitTestInterface(db);
#endif

  /* Create the virtual table wrapper around the hash-table and overload 
  ** the two scalar functions. If this is successful, register the
  ** module with sqlite.
  */
  if( SQLITE_OK==rc 
   && SQLITE_OK==(rc = sqlite3Fts3InitHashTable(db, pHash, "fts3_tokenizer"))

Added ext/fts3/fts3_expr.c.

































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
/*
** 2008 Nov 28
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
******************************************************************************
**
** This module contains code that implements a parser for fts3 query strings
** (the right-hand argument to the MATCH operator). Because the supported 
** syntax is relatively simple, the whole tokenizer/parser system is
** hand-coded. The public interface to this module is declared in source
** code file "fts3_expr.h".
*/

/*
** By default, this module parses the legacy syntax that has been 
** traditionally used by fts3. Or, if SQLITE_ENABLE_FTS3_PARENTHESIS
** is defined, then it uses the new syntax. The differences between
** the new and the old syntaxes are:
**
**  a) The new syntax supports parenthesis. The old does not.
**
**  b) The new syntax supports the AND and NOT operators. The old does not.
**
**  c) The old syntax supports the "-" token qualifier. This is not 
**     supported by the new syntax (it is replaced by the NOT operator).
**
**  d) When using the old syntax, the OR operator has a greater precedence
**     than an implicit AND. When using the new, both implicity and explicit
**     AND operators have a higher precedence than OR.
**
** If compiled with SQLITE_TEST defined, then this module exports the
** symbol "int sqlite3_fts3_enable_parentheses". Setting this variable
** to zero causes the module to use the old syntax. If it is set to 
** non-zero the new syntax is activated. This is so both syntaxes can
** be tested using a single build of testfixture.
*/
#ifdef SQLITE_TEST
int sqlite3_fts3_enable_parentheses = 0;
#else
# ifdef SQLITE_ENABLE_FTS3_PARENTHESIS 
#  define sqlite3_fts3_enable_parentheses 1
# else
#  define sqlite3_fts3_enable_parentheses 0
# endif
#endif

/*
** Default span for NEAR operators.
*/
#define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10

#include "fts3_expr.h"
#include "sqlite3.h"
#include <ctype.h>
#include <string.h>
#include <assert.h>

typedef struct ParseContext ParseContext;
struct ParseContext {
  sqlite3_tokenizer *pTokenizer;      /* Tokenizer module */
  const char **azCol;                 /* Array of column names for fts3 table */
  int nCol;                           /* Number of entries in azCol[] */
  int iDefaultCol;                    /* Default column to query */
  sqlite3_context *pCtx;              /* Write error message here */
  int nNest;                          /* Number of nested brackets */
};

/*
** This function is equivalent to the standard isspace() function. 
**
** The standard isspace() can be awkward to use safely, because although it
** is defined to accept an argument of type int, its behaviour when passed
** an integer that falls outside of the range of the unsigned char type
** is undefined (and sometimes, "undefined" means segfault). This wrapper
** is defined to accept an argument of type char, and always returns 0 for
** any values that fall outside of the range of the unsigned char type (i.e.
** negative values).
*/
static int safe_isspace(char c){
  return (c&0x80)==0 ? isspace(c) : 0;
}

/*
** Extract the next token from buffer z (length n) using the tokenizer
** and other information (column names etc.) in pParse. Create an Fts3Expr
** structure of type FTSQUERY_PHRASE containing a phrase consisting of this
** single token and set *ppExpr to point to it. If the end of the buffer is
** reached before a token is found, set *ppExpr to zero. It is the
** responsibility of the caller to eventually deallocate the allocated 
** Fts3Expr structure (if any) by passing it to sqlite3_free().
**
** Return SQLITE_OK if successful, or SQLITE_NOMEM if a memory allocation
** fails.
*/
static int getNextToken(
  ParseContext *pParse,                   /* fts3 query parse context */
  int iCol,                               /* Value for Fts3Phrase.iColumn */
  const char *z, int n,                   /* Input string */
  Fts3Expr **ppExpr,                      /* OUT: expression */
  int *pnConsumed                         /* OUT: Number of bytes consumed */
){
  sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
  sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
  int rc;
  sqlite3_tokenizer_cursor *pCursor;
  Fts3Expr *pRet = 0;
  int nConsumed = 0;

  rc = pModule->xOpen(pTokenizer, z, n, &pCursor);
  if( rc==SQLITE_OK ){
    const char *zToken;
    int nToken, iStart, iEnd, iPosition;
    int nByte;                               /* total space to allocate */

    pCursor->pTokenizer = pTokenizer;
    rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);

    if( rc==SQLITE_OK ){
      nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken;
      pRet = (Fts3Expr *)sqlite3_malloc(nByte);
      if( !pRet ){
        rc = SQLITE_NOMEM;
      }else{
        memset(pRet, 0, nByte);
        pRet->eType = FTSQUERY_PHRASE;
        pRet->pPhrase = (Fts3Phrase *)&pRet[1];
        pRet->pPhrase->nToken = 1;
        pRet->pPhrase->iColumn = iCol;
        pRet->pPhrase->aToken[0].n = nToken;
        pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1];
        memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken);

        if( iEnd<n && z[iEnd]=='*' ){
          pRet->pPhrase->aToken[0].isPrefix = 1;
          iEnd++;
        }
        if( !sqlite3_fts3_enable_parentheses && iStart>0 && z[iStart-1]=='-' ){
          pRet->pPhrase->isNot = 1;
        }
      }
    }
    nConsumed = iEnd;

    pModule->xClose(pCursor);
  }
  
  *pnConsumed = nConsumed;
  *ppExpr = pRet;
  return rc;
}

void realloc_or_free(void **ppOrig, int nNew){
  void *pRet = sqlite3_realloc(*ppOrig, nNew);
  if( !pRet ){
    sqlite3_free(*ppOrig);
  }
  *ppOrig = pRet;
}

/*
** Buffer zInput, length nInput, contains the contents of a quoted string
** that appeared as part of an fts3 query expression. Neither quote character
** is included in the buffer. This function attempts to tokenize the entire
** input buffer and create an Fts3Expr structure of type FTSQUERY_PHRASE 
** containing the results.
**
** If successful, SQLITE_OK is returned and *ppExpr set to point at the
** allocated Fts3Expr structure. Otherwise, either SQLITE_NOMEM (out of memory
** error) or SQLITE_ERROR (tokenization error) is returned and *ppExpr set
** to 0.
*/
static int getNextString(
  ParseContext *pParse,                   /* fts3 query parse context */
  const char *zInput, int nInput,         /* Input string */
  Fts3Expr **ppExpr                       /* OUT: expression */
){
  sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
  sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
  int rc;
  Fts3Expr *p = 0;
  sqlite3_tokenizer_cursor *pCursor = 0;
  char *zTemp = 0;
  int nTemp = 0;

  rc = pModule->xOpen(pTokenizer, zInput, nInput, &pCursor);
  if( rc==SQLITE_OK ){
    int ii;
    pCursor->pTokenizer = pTokenizer;
    for(ii=0; rc==SQLITE_OK; ii++){
      const char *zToken;
      int nToken, iBegin, iEnd, iPos;
      rc = pModule->xNext(pCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos);
      if( rc==SQLITE_OK ){
        int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
        realloc_or_free((void **)&p, nByte+ii*sizeof(struct PhraseToken));
        realloc_or_free((void **)&zTemp, nTemp + nToken);
        if( !p || !zTemp ){
          goto no_mem;
        }
        if( ii==0 ){
          memset(p, 0, nByte);
          p->pPhrase = (Fts3Phrase *)&p[1];
          p->eType = FTSQUERY_PHRASE;
          p->pPhrase->iColumn = pParse->iDefaultCol;
        }
        p->pPhrase = (Fts3Phrase *)&p[1];
        p->pPhrase->nToken = ii+1;
        p->pPhrase->aToken[ii].n = nToken;
        memcpy(&zTemp[nTemp], zToken, nToken);
        nTemp += nToken;
        if( iEnd<nInput && zInput[iEnd]=='*' ){
          p->pPhrase->aToken[ii].isPrefix = 1;
        }else{
          p->pPhrase->aToken[ii].isPrefix = 0;
        }
      }
    }

    pModule->xClose(pCursor);
    pCursor = 0;
  }

  if( rc==SQLITE_DONE ){
    int jj;
    char *zNew;
    int nNew = 0;
    int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
    nByte += (p->pPhrase->nToken-1) * sizeof(struct PhraseToken);
    realloc_or_free((void **)&p, nByte + nTemp);
    if( !p ){
      goto no_mem;
    }
    p->pPhrase = (Fts3Phrase *)&p[1];
    zNew = &(((char *)p)[nByte]);
    memcpy(zNew, zTemp, nTemp);
    for(jj=0; jj<p->pPhrase->nToken; jj++){
      p->pPhrase->aToken[jj].z = &zNew[nNew];
      nNew += p->pPhrase->aToken[jj].n;
    }
    sqlite3_free(zTemp);
    rc = SQLITE_OK;
  }

  *ppExpr = p;
  return rc;
no_mem:

  if( pCursor ){
    pModule->xClose(pCursor);
  }
  sqlite3_free(zTemp);
  sqlite3_free(p);
  *ppExpr = 0;
  return SQLITE_NOMEM;
}

/*
** Function getNextNode(), which is called by fts3ExprParse(), may itself
** call fts3ExprParse(). So this forward declaration is required.
*/
static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *);

/*
** The output variable *ppExpr is populated with an allocated Fts3Expr 
** structure, or set to 0 if the end of the input buffer is reached.
**
** Returns an SQLite error code. SQLITE_OK if everything works, SQLITE_NOMEM
** if a malloc failure occurs, or SQLITE_ERROR if a parse error is encountered.
** If SQLITE_ERROR is returned, pContext is populated with an error message.
*/
static int getNextNode(
  ParseContext *pParse,                   /* fts3 query parse context */
  const char *z, int n,                   /* Input string */
  Fts3Expr **ppExpr,                      /* OUT: expression */
  int *pnConsumed                         /* OUT: Number of bytes consumed */
){
  struct Fts3Keyword {
    char *z;
    int n;
    int eType;
  } aKeyword[] = {
    { "OR" ,  2, FTSQUERY_OR   },
    { "AND",  3, FTSQUERY_AND  },
    { "NOT",  3, FTSQUERY_NOT  },
    { "NEAR", 4, FTSQUERY_NEAR }
  };
  int ii;
  int iCol;
  int iColLen;
  int rc;
  Fts3Expr *pRet = 0;

  const char *zInput = z;
  int nInput = n;

  /* Skip over any whitespace before checking for a keyword, an open or
  ** close bracket, or a quoted string. 
  */
  while( nInput>0 && safe_isspace(*zInput) ){
    nInput--;
    zInput++;
  }

  /* See if we are dealing with a keyword. */
  for(ii=0; ii<sizeof(aKeyword)/sizeof(struct Fts3Keyword); ii++){
    struct Fts3Keyword *pKey = &aKeyword[ii];

    if( (0==sqlite3_fts3_enable_parentheses)
     && (pKey->eType==FTSQUERY_AND || pKey->eType==FTSQUERY_NOT) 
    ){
      continue;
    }

    if( nInput>=pKey->n && 0==memcmp(zInput, pKey->z, pKey->n) ){
      int nNear = SQLITE_FTS3_DEFAULT_NEAR_PARAM;
      int nKey = pKey->n;
      char cNext;

      /* If this is a "NEAR" keyword, check for an explicit nearness. */
      if( pKey->eType==FTSQUERY_NEAR ){
        assert( nKey==4 );
        if( zInput[4]=='/' && zInput[5]>='0' && zInput[5]<='9' ){
          nNear = 0;
          for(nKey=5; zInput[nKey]>='0' && zInput[nKey]<='9'; nKey++){
            nNear = nNear * 10 + (zInput[nKey] - '0');
          }
        }
      }

      /* At this point this is probably a keyword. But for that to be true,
      ** the next byte must contain either whitespace, an open or close
      ** bracket, a quote character, or EOF. 
      */
      cNext = zInput[nKey];
      if( safe_isspace(cNext) 
       || cNext=='"' || cNext=='(' || cNext==')' || cNext==0
      ){
        pRet = (Fts3Expr *)sqlite3_malloc(sizeof(Fts3Expr));
        memset(pRet, 0, sizeof(Fts3Expr));
        pRet->eType = pKey->eType;
        pRet->nNear = nNear;
        *ppExpr = pRet;
        *pnConsumed = (zInput - z) + nKey;
        return SQLITE_OK;
      }

      /* Turns out that wasn't a keyword after all. This happens if the
      ** user has supplied a token such as "ORacle". Continue.
      */
    }
  }

  /* Check for an open bracket. */
  if( sqlite3_fts3_enable_parentheses ){
    if( *zInput=='(' ){
      int nConsumed;
      int rc;
      pParse->nNest++;
      rc = fts3ExprParse(pParse, &zInput[1], nInput-1, ppExpr, &nConsumed);
      *pnConsumed = (zInput - z) + 1 + nConsumed;
      return rc;
    }
  
    /* Check for a close bracket. */
    if( *zInput==')' ){
      pParse->nNest--;
      *pnConsumed = (zInput - z) + 1;
      return SQLITE_DONE;
    }
  }

  /* See if we are dealing with a quoted phrase. If this is the case, then
  ** search for the closing quote and pass the whole string to getNextString()
  ** for processing. This is easy to do, as fts3 has no syntax for escaping
  ** a quote character embedded in a string.
  */
  if( *zInput=='"' ){
    for(ii=1; ii<nInput && zInput[ii]!='"'; ii++);
    *pnConsumed = (zInput - z) + ii + 1;
    if( ii==nInput ){
      return SQLITE_ERROR;
    }
    return getNextString(pParse, &zInput[1], ii-1, ppExpr);
  }


  /* If control flows to this point, this must be a regular token, or 
  ** the end of the input. Read a regular token using the sqlite3_tokenizer
  ** interface. Before doing so, figure out if there is an explicit
  ** column specifier for the token. 
  **
  ** TODO: Strangely, it is not possible to associate a column specifier
  ** with a quoted phrase, only with a single token. Not sure if this was
  ** an implementation artifact or an intentional decision when fts3 was
  ** first implemented. Whichever it was, this module duplicates the 
  ** limitation.
  */
  iCol = pParse->iDefaultCol;
  iColLen = 0;
  for(ii=0; ii<pParse->nCol; ii++){
    const char *zStr = pParse->azCol[ii];
    int nStr = strlen(zStr);
    if( nInput>nStr && zInput[nStr]==':' && memcmp(zStr, zInput, nStr)==0 ){
      iCol = ii;
      iColLen = ((zInput - z) + nStr + 1);
      break;
    }
  }
  rc = getNextToken(pParse, iCol, &z[iColLen], n-iColLen, ppExpr, pnConsumed);
  *pnConsumed += iColLen;
  return rc;
}

/*
** The argument is an Fts3Expr structure for a binary operator (any type
** except an FTSQUERY_PHRASE). Return an integer value representing the
** precedence of the operator. Lower values have a higher precedence (i.e.
** group more tightly). For example, in the C language, the == operator
** groups more tightly than ||, and would therefore have a higher precedence.
**
** When using the new fts3 query syntax (when SQLITE_ENABLE_FTS3_PARENTHESIS
** is defined), the order of the operators in precedence from highest to
** lowest is:
**
**   NEAR
**   NOT
**   AND (including implicit ANDs)
**   OR
**
** Note that when using the old query syntax, the OR operator has a higher
** precedence than the AND operator.
*/
static int opPrecedence(Fts3Expr *p){
  assert( p->eType!=FTSQUERY_PHRASE );
  if( sqlite3_fts3_enable_parentheses ){
    return p->eType;
  }else if( p->eType==FTSQUERY_NEAR ){
    return 1;
  }else if( p->eType==FTSQUERY_OR ){
    return 2;
  }
  assert( p->eType==FTSQUERY_AND );
  return 3;
}

/*
** Argument ppHead contains a pointer to the current head of a query 
** expression tree being parsed. pPrev is the expression node most recently
** inserted into the tree. This function adds pNew, which is always a binary
** operator node, into the expression tree based on the relative precedence
** of pNew and the existing nodes of the tree. This may result in the head
** of the tree changing, in which case *ppHead is set to the new root node.
*/
static void insertBinaryOperator(
  Fts3Expr **ppHead,       /* Pointer to the root node of a tree */
  Fts3Expr *pPrev,         /* Node most recently inserted into the tree */
  Fts3Expr *pNew           /* New binary node to insert into expression tree */
){
  Fts3Expr *pSplit = pPrev;
  while( pSplit->pParent && opPrecedence(pSplit->pParent)<=opPrecedence(pNew) ){
    pSplit = pSplit->pParent;
  }

  if( pSplit->pParent ){
    assert( pSplit->pParent->pRight==pSplit );
    pSplit->pParent->pRight = pNew;
    pNew->pParent = pSplit->pParent;
  }else{
    *ppHead = pNew;
  }
  pNew->pLeft = pSplit;
  pSplit->pParent = pNew;
}

/*
** Parse the fts3 query expression found in buffer z, length n. This function
** returns either when the end of the buffer is reached or an unmatched 
** closing bracket - ')' - is encountered.
**
** If successful, SQLITE_OK is returned, *ppExpr is set to point to the
** parsed form of the expression and *pnConsumed is set to the number of
** bytes read from buffer z. Otherwise, *ppExpr is set to 0 and SQLITE_NOMEM
** (out of memory error) or SQLITE_ERROR (parse error) is returned.
*/
static int fts3ExprParse(
  ParseContext *pParse,                   /* fts3 query parse context */
  const char *z, int n,                   /* Text of MATCH query */
  Fts3Expr **ppExpr,                      /* OUT: Parsed query structure */
  int *pnConsumed                         /* OUT: Number of bytes consumed */
){
  Fts3Expr *pRet = 0;
  Fts3Expr *pPrev = 0;
  Fts3Expr *pNotBranch = 0;               /* Only used in legacy parse mode */
  int nIn = n;
  const char *zIn = z;
  int rc = SQLITE_OK;
  int isRequirePhrase = 1;

  while( rc==SQLITE_OK ){
    Fts3Expr *p = 0;
    int nByte;
    rc = getNextNode(pParse, zIn, nIn, &p, &nByte);
    if( rc==SQLITE_OK ){
      int isPhrase;

      if( !sqlite3_fts3_enable_parentheses 
       && p->eType==FTSQUERY_PHRASE && p->pPhrase->isNot 
      ){
        /* Create an implicit NOT operator. */
        Fts3Expr *pNot = sqlite3_malloc(sizeof(Fts3Expr));
        if( !pNot ){
          sqlite3Fts3ExprFree(p);
          rc = SQLITE_NOMEM;
          goto exprparse_out;
        }
        memset(pNot, 0, sizeof(Fts3Expr));
        pNot->eType = FTSQUERY_NOT;
        pNot->pRight = p;
        if( pNotBranch ){
          pNotBranch->pLeft = p;
          pNot->pRight = pNotBranch;
        }
        pNotBranch = pNot;
      }else{
        assert( p->eType!=FTSQUERY_PHRASE || !p->pPhrase->isNot );
        isPhrase = (p->eType==FTSQUERY_PHRASE || p->pLeft);
        if( !isPhrase && isRequirePhrase ){
          sqlite3Fts3ExprFree(p);
          rc = SQLITE_ERROR;
          goto exprparse_out;
        }
  
        if( isPhrase && !isRequirePhrase ){
          /* Insert an implicit AND operator. */
          Fts3Expr *pAnd;
          assert( pRet && pPrev );
          pAnd = sqlite3_malloc(sizeof(Fts3Expr));
          if( !pAnd ){
            sqlite3Fts3ExprFree(p);
            rc = SQLITE_NOMEM;
            goto exprparse_out;
          }
          memset(pAnd, 0, sizeof(Fts3Expr));
          pAnd->eType = FTSQUERY_AND;
          insertBinaryOperator(&pRet, pPrev, pAnd);
          pPrev = pAnd;
        }

        if( pPrev && (
            (pPrev->eType==FTSQUERY_NEAR && p->eType!=FTSQUERY_PHRASE)
         || (p->eType==FTSQUERY_NEAR && pPrev->eType!=FTSQUERY_PHRASE) 
        )){
          /* This is an attempt to do "phrase NEAR (bracketed expression)"
          ** or "(bracketed expression) NEAR phrase", both of which are
          ** illegal. Return an error.
          */
          sqlite3Fts3ExprFree(p);
          rc = SQLITE_ERROR;
          goto exprparse_out;
        }
  
        if( isPhrase ){
          if( pRet ){
            assert( pPrev && pPrev->pLeft && pPrev->pRight==0 );
            pPrev->pRight = p;
            p->pParent = pPrev;
          }else{
            pRet = p;
          }
        }else{
          insertBinaryOperator(&pRet, pPrev, p);
        }
        isRequirePhrase = !isPhrase;
      }
      assert( nByte>0 );
    }
    nIn -= nByte;
    zIn += nByte;
    pPrev = p;
  }

  if( rc==SQLITE_DONE && pRet && isRequirePhrase ){
    rc = SQLITE_ERROR;
  }

  if( rc==SQLITE_DONE ){
    rc = SQLITE_OK;
    if( !sqlite3_fts3_enable_parentheses && pNotBranch ){
      if( !pRet ){
        rc = SQLITE_ERROR;
      }else{
        pNotBranch->pLeft = pRet;
        pRet = pNotBranch;
      }
    }
  }
  *pnConsumed = n - nIn;

exprparse_out:
  if( rc!=SQLITE_OK ){
    sqlite3Fts3ExprFree(pRet);
    sqlite3Fts3ExprFree(pNotBranch);
    pRet = 0;
  }
  *ppExpr = pRet;
  return rc;
}

/*
** Parameters z and n contain a pointer to and length of a buffer containing
** an fts3 query expression, respectively. This function attempts to parse the
** query expression and create a tree of Fts3Expr structures representing the
** parsed expression. If successful, *ppExpr is set to point to the head
** of the parsed expression tree and SQLITE_OK is returned. If an error
** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse
** error) is returned and *ppExpr is set to 0.
**
** If parameter n is a negative number, then z is assumed to point to a
** nul-terminated string and the length is determined using strlen().
**
** The first parameter, pTokenizer, is passed the fts3 tokenizer module to
** use to normalize query tokens while parsing the expression. The azCol[]
** array, which is assumed to contain nCol entries, should contain the names
** of each column in the target fts3 table, in order from left to right. 
** Column names must be nul-terminated strings.
**
** The iDefaultCol parameter should be passed the index of the table column
** that appears on the left-hand-side of the MATCH operator (the default
** column to match against for tokens for which a column name is not explicitly
** specified as part of the query string), or -1 if tokens may by default
** match any table column.
*/
int sqlite3Fts3ExprParse(
  sqlite3_tokenizer *pTokenizer,      /* Tokenizer module */
  char **azCol,                       /* Array of column names for fts3 table */
  int nCol,                           /* Number of entries in azCol[] */
  int iDefaultCol,                    /* Default column to query */
  const char *z, int n,               /* Text of MATCH query */
  Fts3Expr **ppExpr                   /* OUT: Parsed query structure */
){
  int nParsed;
  int rc;
  ParseContext sParse;
  sParse.pTokenizer = pTokenizer;
  sParse.azCol = (const char **)azCol;
  sParse.nCol = nCol;
  sParse.iDefaultCol = iDefaultCol;
  sParse.nNest = 0;
  if( z==0 ){
    *ppExpr = 0;
    return SQLITE_OK;
  }
  if( n<0 ){
    n = strlen(z);
  }
  rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed);

  /* Check for mismatched parenthesis */
  if( rc==SQLITE_OK && sParse.nNest ){
    rc = SQLITE_ERROR;
    sqlite3Fts3ExprFree(*ppExpr);
    *ppExpr = 0;
  }

  return rc;
}

/*
** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse().
*/
void sqlite3Fts3ExprFree(Fts3Expr *p){
  if( p ){
    sqlite3Fts3ExprFree(p->pLeft);
    sqlite3Fts3ExprFree(p->pRight);
    sqlite3_free(p);
  }
}

/****************************************************************************
*****************************************************************************
** Everything after this point is just test code.
*/

#ifdef SQLITE_TEST

#include <stdio.h>

/*
** Function to query the hash-table of tokenizers (see README.tokenizers).
*/
static int queryTokenizer(
  sqlite3 *db, 
  const char *zName,  
  const sqlite3_tokenizer_module **pp
){
  int rc;
  sqlite3_stmt *pStmt;
  const char zSql[] = "SELECT fts3_tokenizer(?)";

  *pp = 0;
  rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
  if( rc!=SQLITE_OK ){
    return rc;
  }

  sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
  if( SQLITE_ROW==sqlite3_step(pStmt) ){
    if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
      memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
    }
  }

  return sqlite3_finalize(pStmt);
}

/*
** This function is part of the test interface for the query parser. It
** writes a text representation of the query expression pExpr into the
** buffer pointed to by argument zBuf. It is assumed that zBuf is large 
** enough to store the required text representation.
*/
static void exprToString(Fts3Expr *pExpr, char *zBuf){
  switch( pExpr->eType ){
    case FTSQUERY_PHRASE: {
      Fts3Phrase *pPhrase = pExpr->pPhrase;
      int i;
      zBuf += sprintf(zBuf, "PHRASE %d %d", pPhrase->iColumn, pPhrase->isNot);
      for(i=0; i<pPhrase->nToken; i++){
        zBuf += sprintf(zBuf," %.*s",pPhrase->aToken[i].n,pPhrase->aToken[i].z);
        zBuf += sprintf(zBuf,"%s", (pPhrase->aToken[i].isPrefix?"+":""));
      }
      return;
    }

    case FTSQUERY_NEAR:
      zBuf += sprintf(zBuf, "NEAR/%d ", pExpr->nNear);
      break;
    case FTSQUERY_NOT:
      zBuf += sprintf(zBuf, "NOT ");
      break;
    case FTSQUERY_AND:
      zBuf += sprintf(zBuf, "AND ");
      break;
    case FTSQUERY_OR:
      zBuf += sprintf(zBuf, "OR ");
      break;
  }

  zBuf += sprintf(zBuf, "{");
  exprToString(pExpr->pLeft, zBuf);
  zBuf += strlen(zBuf);
  zBuf += sprintf(zBuf, "} ");

  zBuf += sprintf(zBuf, "{");
  exprToString(pExpr->pRight, zBuf);
  zBuf += strlen(zBuf);
  zBuf += sprintf(zBuf, "}");
}

/*
** This is the implementation of a scalar SQL function used to test the 
** expression parser. It should be called as follows:
**
**   fts3_exprtest(<tokenizer>, <expr>, <column 1>, ...);
**
** The first argument, <tokenizer>, is the name of the fts3 tokenizer used
** to parse the query expression (see README.tokenizers). The second argument
** is the query expression to parse. Each subsequent argument is the name
** of a column of the fts3 table that the query expression may refer to.
** For example:
**
**   SELECT fts3_exprtest('simple', 'Bill col2:Bloggs', 'col1', 'col2');
*/
static void fts3ExprTest(
  sqlite3_context *context,
  int argc,
  sqlite3_value **argv
){
  sqlite3_tokenizer_module const *pModule = 0;
  sqlite3_tokenizer *pTokenizer;
  int rc;
  char **azCol = 0;
  const char *zExpr;
  int nExpr;
  int nCol;
  int ii;
  Fts3Expr *pExpr;
  sqlite3 *db = sqlite3_context_db_handle(context);

  if( argc<3 ){
    sqlite3_result_error(context, 
        "Usage: fts3_exprtest(tokenizer, expr, col1, ...", -1
    );
    return;
  }

  rc = queryTokenizer(db, (const char *)sqlite3_value_text(argv[0]), &pModule);
  if( rc==SQLITE_NOMEM ){
    sqlite3_result_error_nomem(context);
    goto exprtest_out;
  }else if( !pModule ){
    sqlite3_result_error(context, "No such tokenizer module", -1);
    goto exprtest_out;
  }

  rc = pModule->xCreate(0, 0, &pTokenizer);
  assert( rc==SQLITE_NOMEM || rc==SQLITE_OK );
  if( rc==SQLITE_NOMEM ){
    sqlite3_result_error_nomem(context);
    goto exprtest_out;
  }
  pTokenizer->pModule = pModule;

  zExpr = (const char *)sqlite3_value_text(argv[1]);
  nExpr = sqlite3_value_bytes(argv[1]);
  nCol = argc-2;
  azCol = (char **)sqlite3_malloc(nCol*sizeof(char *));
  if( !azCol ){
    sqlite3_result_error_nomem(context);
    goto exprtest_out;
  }
  for(ii=0; ii<nCol; ii++){
    azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]);
  }

  rc = sqlite3Fts3ExprParse(
      pTokenizer, azCol, nCol, nCol, zExpr, nExpr, &pExpr
  );
  if( rc==SQLITE_NOMEM ){
    sqlite3_result_error_nomem(context);
    goto exprtest_out;
  }else if( rc==SQLITE_OK ){
    char zBuf[4096];
    exprToString(pExpr, zBuf);
    sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT);
    sqlite3Fts3ExprFree(pExpr);
  }else{
    sqlite3_result_error(context, "Error parsing expression", -1);
  }

exprtest_out:
  if( pTokenizer ){
    rc = pModule->xDestroy(pTokenizer);
  }
  sqlite3_free(azCol);
}

/*
** Register the query expression parser test function fts3_exprtest() 
** with database connection db. 
*/
void sqlite3Fts3ExprInitTestInterface(sqlite3* db){
  sqlite3_create_function(
      db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0
  );
}

#endif

Added ext/fts3/fts3_expr.h.















































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
/*
** 2008 Nov 28
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
******************************************************************************
**
*/

#include "fts3_tokenizer.h"
#include "sqlite3.h"

/*
** The following describes the syntax supported by the fts3 MATCH
** operator in a similar format to that used by the lemon parser
** generator. This module does not use actually lemon, it uses a
** custom parser.
**
**   phrase ::= TOKEN.
**   phrase ::= TOKEN:COLUMN.
**   phrase ::= "TOKEN TOKEN TOKEN...".
**   phrase ::= phrase near phrase.
**
**   near ::= NEAR.
**   near ::= NEAR / INTEGER.
**
**   query ::= -TOKEN.
**   query ::= phrase.
**   query ::= LP query RP.
**   query ::= query NOT query.
**   query ::= query OR query.
**   query ::= query AND query.
*/

typedef struct Fts3Expr Fts3Expr;
typedef struct Fts3Phrase Fts3Phrase;

struct Fts3Phrase {
  int nToken;          /* Number of entries in aToken[] */
  int iColumn;         /* Index of column this phrase must match */
  int isNot;           /* Phrase prefixed by unary not (-) operator */
  struct PhraseToken {
    char *z;
    int n;             /* Number of bytes in buffer pointed to by z */
    int isPrefix;      /* True if token ends in with a "*" character */
  } aToken[1];
};

struct Fts3Expr {
  int eType;                 /* One of the FTSQUERY_XXX values defined below */
  int nNear;                 /* Valid if eType==FTSQUERY_NEAR */
  Fts3Expr *pParent;
  Fts3Expr *pLeft;
  Fts3Expr *pRight;
  Fts3Phrase *pPhrase;       /* Valid if eType==FTSQUERY_PHRASE */
};

int sqlite3Fts3ExprParse(sqlite3_tokenizer *, char **, int, int, 
                         const char *, int, Fts3Expr **);
void sqlite3Fts3ExprFree(Fts3Expr *);

/*
** Candidate values for Fts3Query.eType. Note that the order of the first
** four values is in order of precedence when parsing expressions. For 
** example, the following:
**
**   "a OR b AND c NOT d NEAR e"
**
** is equivalent to:
**
**   "a OR (b AND (c NOT (d NEAR e)))"
*/
#define FTSQUERY_NEAR   1
#define FTSQUERY_NOT    2
#define FTSQUERY_AND    3
#define FTSQUERY_OR     4
#define FTSQUERY_PHRASE 5

#ifdef SQLITE_TEST
void sqlite3Fts3ExprInitTestInterface(sqlite3 *db);
#endif

Changes to main.mk.

48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
...
174
175
176
177
178
179
180


181
182
183
184
185
186
187
...
243
244
245
246
247
248
249

250
251
252
253
254
255
256
257
...
279
280
281
282
283
284
285

286
287
288
289
290
291
292
...
407
408
409
410
411
412
413



414
415
416
417
418
419
420
TCCX += -I$(TOP)/ext/rtree -I$(TOP)/ext/icu -I$(TOP)/ext/fts3

# Object files for the SQLite library.
#
LIBOBJ+= alter.o analyze.o attach.o auth.o \
         bitvec.o btmutex.o btree.o build.o \
         callback.o complete.o date.o delete.o expr.o fault.o \
         fts3.o fts3_hash.o fts3_icu.o fts3_porter.o \
         fts3_tokenizer.o fts3_tokenizer1.o \
         func.o global.o hash.o \
         icu.o insert.o journal.o legacy.o loadext.o \
         main.o malloc.o mem0.o mem1.o mem2.o mem3.o mem5.o \
         memjournal.o \
         mutex.o mutex_noop.o mutex_os2.o mutex_unix.o mutex_w32.o \
         opcodes.o os.o os_os2.o os_unix.o os_win.o \
................................................................................
  $(TOP)/ext/fts2/fts2_porter.c \
  $(TOP)/ext/fts2/fts2_tokenizer.h \
  $(TOP)/ext/fts2/fts2_tokenizer.c \
  $(TOP)/ext/fts2/fts2_tokenizer1.c
SRC += \
  $(TOP)/ext/fts3/fts3.c \
  $(TOP)/ext/fts3/fts3.h \


  $(TOP)/ext/fts3/fts3_hash.c \
  $(TOP)/ext/fts3/fts3_hash.h \
  $(TOP)/ext/fts3/fts3_icu.c \
  $(TOP)/ext/fts3/fts3_porter.c \
  $(TOP)/ext/fts3/fts3_tokenizer.h \
  $(TOP)/ext/fts3/fts3_tokenizer.c \
  $(TOP)/ext/fts3/fts3_tokenizer1.c
................................................................................
  $(TOP)/src/expr.c $(TOP)/src/func.c $(TOP)/src/insert.c $(TOP)/src/os.c      \
  $(TOP)/src/os_os2.c $(TOP)/src/os_unix.c $(TOP)/src/os_win.c                 \
  $(TOP)/src/pager.c $(TOP)/src/pragma.c $(TOP)/src/prepare.c                  \
  $(TOP)/src/printf.c $(TOP)/src/random.c $(TOP)/src/pcache.c                  \
  $(TOP)/src/pcache1.c $(TOP)/src/select.c $(TOP)/src/tokenize.c               \
  $(TOP)/src/utf.c $(TOP)/src/util.c $(TOP)/src/vdbeapi.c $(TOP)/src/vdbeaux.c \
  $(TOP)/src/vdbe.c $(TOP)/src/vdbemem.c $(TOP)/src/where.c parse.c            \

  $(TOP)/ext/fts3/fts3.c $(TOP)/ext/fts3/fts3_tokenizer.c 

# Header files used by all library source files.
#
HDR = \
   $(TOP)/src/btree.h \
   $(TOP)/src/btreeInt.h \
   $(TOP)/src/hash.h \
................................................................................
  $(TOP)/ext/fts1/fts1_tokenizer.h
EXTHDR += \
  $(TOP)/ext/fts2/fts2.h \
  $(TOP)/ext/fts2/fts2_hash.h \
  $(TOP)/ext/fts2/fts2_tokenizer.h
EXTHDR += \
  $(TOP)/ext/fts3/fts3.h \

  $(TOP)/ext/fts3/fts3_hash.h \
  $(TOP)/ext/fts3/fts3_tokenizer.h
EXTHDR += \
  $(TOP)/ext/rtree/rtree.h
EXTHDR += \
  $(TOP)/ext/icu/sqliteicu.h

................................................................................
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts2/fts2_tokenizer.c

fts2_tokenizer1.o:	$(TOP)/ext/fts2/fts2_tokenizer1.c $(HDR) $(EXTHDR)
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts2/fts2_tokenizer1.c

fts3.o:	$(TOP)/ext/fts3/fts3.c $(HDR) $(EXTHDR)
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3.c




fts3_hash.o:	$(TOP)/ext/fts3/fts3_hash.c $(HDR) $(EXTHDR)
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_hash.c

fts3_icu.o:	$(TOP)/ext/fts3/fts3_icu.c $(HDR) $(EXTHDR)
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_icu.c








|







 







>
>







 







>
|







 







>







 







>
>
>







48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
...
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
...
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
...
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
...
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
TCCX += -I$(TOP)/ext/rtree -I$(TOP)/ext/icu -I$(TOP)/ext/fts3

# Object files for the SQLite library.
#
LIBOBJ+= alter.o analyze.o attach.o auth.o \
         bitvec.o btmutex.o btree.o build.o \
         callback.o complete.o date.o delete.o expr.o fault.o \
         fts3.o fts3_expr.o fts3_hash.o fts3_icu.o fts3_porter.o \
         fts3_tokenizer.o fts3_tokenizer1.o \
         func.o global.o hash.o \
         icu.o insert.o journal.o legacy.o loadext.o \
         main.o malloc.o mem0.o mem1.o mem2.o mem3.o mem5.o \
         memjournal.o \
         mutex.o mutex_noop.o mutex_os2.o mutex_unix.o mutex_w32.o \
         opcodes.o os.o os_os2.o os_unix.o os_win.o \
................................................................................
  $(TOP)/ext/fts2/fts2_porter.c \
  $(TOP)/ext/fts2/fts2_tokenizer.h \
  $(TOP)/ext/fts2/fts2_tokenizer.c \
  $(TOP)/ext/fts2/fts2_tokenizer1.c
SRC += \
  $(TOP)/ext/fts3/fts3.c \
  $(TOP)/ext/fts3/fts3.h \
  $(TOP)/ext/fts3/fts3_expr.c \
  $(TOP)/ext/fts3/fts3_expr.h \
  $(TOP)/ext/fts3/fts3_hash.c \
  $(TOP)/ext/fts3/fts3_hash.h \
  $(TOP)/ext/fts3/fts3_icu.c \
  $(TOP)/ext/fts3/fts3_porter.c \
  $(TOP)/ext/fts3/fts3_tokenizer.h \
  $(TOP)/ext/fts3/fts3_tokenizer.c \
  $(TOP)/ext/fts3/fts3_tokenizer1.c
................................................................................
  $(TOP)/src/expr.c $(TOP)/src/func.c $(TOP)/src/insert.c $(TOP)/src/os.c      \
  $(TOP)/src/os_os2.c $(TOP)/src/os_unix.c $(TOP)/src/os_win.c                 \
  $(TOP)/src/pager.c $(TOP)/src/pragma.c $(TOP)/src/prepare.c                  \
  $(TOP)/src/printf.c $(TOP)/src/random.c $(TOP)/src/pcache.c                  \
  $(TOP)/src/pcache1.c $(TOP)/src/select.c $(TOP)/src/tokenize.c               \
  $(TOP)/src/utf.c $(TOP)/src/util.c $(TOP)/src/vdbeapi.c $(TOP)/src/vdbeaux.c \
  $(TOP)/src/vdbe.c $(TOP)/src/vdbemem.c $(TOP)/src/where.c parse.c            \
  $(TOP)/ext/fts3/fts3.c $(TOP)/ext/fts3/fts3_expr.c                           \
  $(TOP)/ext/fts3/fts3_tokenizer.c 

# Header files used by all library source files.
#
HDR = \
   $(TOP)/src/btree.h \
   $(TOP)/src/btreeInt.h \
   $(TOP)/src/hash.h \
................................................................................
  $(TOP)/ext/fts1/fts1_tokenizer.h
EXTHDR += \
  $(TOP)/ext/fts2/fts2.h \
  $(TOP)/ext/fts2/fts2_hash.h \
  $(TOP)/ext/fts2/fts2_tokenizer.h
EXTHDR += \
  $(TOP)/ext/fts3/fts3.h \
  $(TOP)/ext/fts3/fts3_expr.h \
  $(TOP)/ext/fts3/fts3_hash.h \
  $(TOP)/ext/fts3/fts3_tokenizer.h
EXTHDR += \
  $(TOP)/ext/rtree/rtree.h
EXTHDR += \
  $(TOP)/ext/icu/sqliteicu.h

................................................................................
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts2/fts2_tokenizer.c

fts2_tokenizer1.o:	$(TOP)/ext/fts2/fts2_tokenizer1.c $(HDR) $(EXTHDR)
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts2/fts2_tokenizer1.c

fts3.o:	$(TOP)/ext/fts3/fts3.c $(HDR) $(EXTHDR)
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3.c

fts3_expr.o:	$(TOP)/ext/fts3/fts3_expr.c $(HDR) $(EXTHDR)
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_expr.c

fts3_hash.o:	$(TOP)/ext/fts3/fts3_hash.c $(HDR) $(EXTHDR)
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_hash.c

fts3_icu.o:	$(TOP)/ext/fts3/fts3_icu.c $(HDR) $(EXTHDR)
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_icu.c

Changes to src/test1.c.

9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
....
4926
4927
4928
4929
4930
4931
4932



4933
4934
4935
4936
4937
4938
4939
....
5008
5009
5010
5011
5012
5013
5014




5015
5016
5017
**    May you share freely, never taking more than you give.
**
*************************************************************************
** Code for testing all sorts of SQLite interfaces.  This code
** is not included in the SQLite library.  It is used for automated
** testing of the SQLite library.
**
** $Id: test1.c,v 1.337 2008/12/11 02:56:07 drh Exp $
*/
#include "sqliteInt.h"
#include "tcl.h"
#include <stdlib.h>
#include <string.h>

/*
................................................................................
  extern int sqlite3OSTrace;
  extern int sqlite3VdbeAddopTrace;
#endif
#ifdef SQLITE_TEST
  extern int sqlite3_enable_in_opt;
  extern char sqlite3_query_plan[];
  static char *query_plan = sqlite3_query_plan;



#endif

  for(i=0; i<sizeof(aCmd)/sizeof(aCmd[0]); i++){
    Tcl_CreateCommand(interp, aCmd[i].zName, aCmd[i].xProc, 0, 0);
  }
  for(i=0; i<sizeof(aObjCmd)/sizeof(aObjCmd[0]); i++){
    Tcl_CreateObjCommand(interp, aObjCmd[i].zName, 
................................................................................
  Tcl_LinkVar(interp, "sqlite_sync_count",
      (char*)&sqlite3_sync_count, TCL_LINK_INT);
  Tcl_LinkVar(interp, "sqlite_fullsync_count",
      (char*)&sqlite3_fullsync_count, TCL_LINK_INT);
#ifdef SQLITE_TEST
  Tcl_LinkVar(interp, "sqlite_enable_in_opt",
      (char*)&sqlite3_enable_in_opt, TCL_LINK_INT);




#endif
  return TCL_OK;
}







|







 







>
>
>







 







>
>
>
>



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
....
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
....
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
**    May you share freely, never taking more than you give.
**
*************************************************************************
** Code for testing all sorts of SQLite interfaces.  This code
** is not included in the SQLite library.  It is used for automated
** testing of the SQLite library.
**
** $Id: test1.c,v 1.338 2008/12/17 15:18:18 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "tcl.h"
#include <stdlib.h>
#include <string.h>

/*
................................................................................
  extern int sqlite3OSTrace;
  extern int sqlite3VdbeAddopTrace;
#endif
#ifdef SQLITE_TEST
  extern int sqlite3_enable_in_opt;
  extern char sqlite3_query_plan[];
  static char *query_plan = sqlite3_query_plan;
#ifdef SQLITE_ENABLE_FTS3
  extern int sqlite3_fts3_enable_parentheses;
#endif
#endif

  for(i=0; i<sizeof(aCmd)/sizeof(aCmd[0]); i++){
    Tcl_CreateCommand(interp, aCmd[i].zName, aCmd[i].xProc, 0, 0);
  }
  for(i=0; i<sizeof(aObjCmd)/sizeof(aObjCmd[0]); i++){
    Tcl_CreateObjCommand(interp, aObjCmd[i].zName, 
................................................................................
  Tcl_LinkVar(interp, "sqlite_sync_count",
      (char*)&sqlite3_sync_count, TCL_LINK_INT);
  Tcl_LinkVar(interp, "sqlite_fullsync_count",
      (char*)&sqlite3_fullsync_count, TCL_LINK_INT);
#ifdef SQLITE_TEST
  Tcl_LinkVar(interp, "sqlite_enable_in_opt",
      (char*)&sqlite3_enable_in_opt, TCL_LINK_INT);
#ifdef SQLITE_ENABLE_FTS3
  Tcl_LinkVar(interp, "sqlite_fts3_enable_parentheses",
      (char*)&sqlite3_fts3_enable_parentheses, TCL_LINK_INT);
#endif
#endif
  return TCL_OK;
}

Added test/fts3expr.test.





































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
# 2006 September 9
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is testing the FTS3 module.
#
# $Id: fts3expr.test,v 1.1 2008/12/17 15:18:18 danielk1977 Exp $
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl

# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts3 {
  finish_test
  return
}

set sqlite_fts3_enable_parentheses 1

proc test_fts3expr {expr} {
  db one {SELECT fts3_exprtest('simple', $expr, 'a', 'b', 'c')}
}
do_test fts3expr-1.0 {
  test_fts3expr "abcd"
} {PHRASE 3 0 abcd}
do_test fts3expr-1.1 {
  test_fts3expr " tag "
} {PHRASE 3 0 tag}

do_test fts3expr-1.2 {
  test_fts3expr "ab AND cd"
} {AND {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
do_test fts3expr-1.3 {
  test_fts3expr "ab OR cd"
} {OR {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
do_test fts3expr-1.4 {
  test_fts3expr "ab NOT cd"
} {NOT {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
do_test fts3expr-1.5 {
  test_fts3expr "ab NEAR cd"
} {NEAR/10 {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
do_test fts3expr-1.6 {
  test_fts3expr "ab NEAR/5 cd"
} {NEAR/5 {PHRASE 3 0 ab} {PHRASE 3 0 cd}}

do_test fts3expr-1.7 {
  test_fts3expr {"one two three"}
} {PHRASE 3 0 one two three}
do_test fts3expr-1.8 {
  test_fts3expr {zero "one two three" four}
} {AND {AND {PHRASE 3 0 zero} {PHRASE 3 0 one two three}} {PHRASE 3 0 four}}
do_test fts3expr-1.9 {
  test_fts3expr {"one* two three*"}
} {PHRASE 3 0 one+ two three+}

do_test fts3expr-1.10 {
  test_fts3expr {one* two}
} {AND {PHRASE 3 0 one+} {PHRASE 3 0 two}}
do_test fts3expr-1.11 {
  test_fts3expr {one two*}
} {AND {PHRASE 3 0 one} {PHRASE 3 0 two+}}

do_test fts3expr-1.14 {
  test_fts3expr {a:one two}
} {AND {PHRASE 0 0 one} {PHRASE 3 0 two}}
do_test fts3expr-1.15 {
  test_fts3expr {one b:two}
} {AND {PHRASE 3 0 one} {PHRASE 1 0 two}}

proc strip_phrase_data {L} {
  if {[lindex $L 0] eq "PHRASE"} {
    return [lrange $L 3 end]
  }
  return [list \
    [lindex $L 0] \
    [strip_phrase_data [lindex $L 1]] \
    [strip_phrase_data [lindex $L 2]] \
  ]
}
proc test_fts3expr2 {expr} {
  strip_phrase_data [
    db one {SELECT fts3_exprtest('simple', $expr, 'a', 'b', 'c')}
  ]
}
do_test fts3expr-2.1 {
  test_fts3expr2 "ab OR cd AND ef"
} {OR ab {AND cd ef}}
do_test fts3expr-2.2 {
  test_fts3expr2 "cd AND ef OR ab"
} {OR {AND cd ef} ab}
do_test fts3expr-2.3 {
  test_fts3expr2 "ab AND cd AND ef OR gh"
} {OR {AND {AND ab cd} ef} gh}
do_test fts3expr-2.4 {
  test_fts3expr2 "ab AND cd OR ef AND gh"
} {OR {AND ab cd} {AND ef gh}}
do_test fts3expr-2.5 {
  test_fts3expr2 "ab cd"
} {AND ab cd}

do_test fts3expr-3.1 {
  test_fts3expr2 "(ab OR cd) AND ef"
} {AND {OR ab cd} ef}
do_test fts3expr-3.2 {
  test_fts3expr2 "ef AND (ab OR cd)"
} {AND ef {OR ab cd}}
do_test fts3expr-3.3 {
  test_fts3expr2 "(ab OR cd)"
} {OR ab cd}
do_test fts3expr-3.4 {
  test_fts3expr2 "(((ab OR cd)))"
} {OR ab cd}

#------------------------------------------------------------------------
# The following tests, fts3expr-4.*, test the parsers response to syntax
# errors in query expressions. This is done using a real fts3 table and
# MATCH clauses, not the parser test interface.
# 
do_test fts3expr-4.1 {
  execsql { CREATE VIRTUAL TABLE t1 USING fts3(a, b, c) }
} {}

# Mismatched parenthesis:
do_test fts3expr-4.2.1 {
  catchsql { SELECT * FROM t1 WHERE t1 MATCH 'example AND (hello OR world))' }
} {1 {SQL logic error or missing database}}
do_test fts3expr-4.2.2 {
  catchsql { SELECT * FROM t1 WHERE t1 MATCH 'example AND (hello OR world' }
} {1 {SQL logic error or missing database}}

# Unterminated quotation marks:
do_test fts3expr-4.3.1 {
  catchsql { SELECT * FROM t1 WHERE t1 MATCH 'example OR "hello world' }
} {1 {SQL logic error or missing database}}
do_test fts3expr-4.3.2 {
  catchsql { SELECT * FROM t1 WHERE t1 MATCH 'example OR hello world"' }
} {1 {SQL logic error or missing database}}

# Binary operators without the required operands.
do_test fts3expr-4.4.1 {
  catchsql { SELECT * FROM t1 WHERE t1 MATCH 'OR hello world' }
} {1 {SQL logic error or missing database}}
do_test fts3expr-4.4.2 {
  catchsql { SELECT * FROM t1 WHERE t1 MATCH 'hello world OR' }
} {1 {SQL logic error or missing database}}
do_test fts3expr-4.4.3 {
  catchsql { SELECT * FROM t1 WHERE t1 MATCH 'one (hello world OR) two' }
} {1 {SQL logic error or missing database}}
do_test fts3expr-4.4.4 {
  catchsql { SELECT * FROM t1 WHERE t1 MATCH 'one (OR hello world) two' }
} {1 {SQL logic error or missing database}}

# NEAR operators with something other than phrases as arguments.
do_test fts3expr-4.5.1 {
  catchsql { SELECT * FROM t1 WHERE t1 MATCH '(hello OR world) NEAR one' }
} {1 {SQL logic error or missing database}}
do_test fts3expr-4.5.2 {
  catchsql { SELECT * FROM t1 WHERE t1 MATCH 'one NEAR (hello OR world)' }
} {1 {SQL logic error or missing database}}

#------------------------------------------------------------------------
# The following OOM tests are designed to cover cases in fts3_expr.c.
# 
source $testdir/malloc_common.tcl
do_malloc_test fts3expr-malloc-1 -sqlbody {
  SELECT fts3_exprtest('simple', 'a b c "d e f"', 'a', 'b', 'c')
}
do_malloc_test fts3expr-malloc-2 -tclprep {
  set sqlite_fts3_enable_parentheses 0
} -sqlbody {
  SELECT fts3_exprtest('simple', 'a -b', 'a', 'b', 'c')
} -cleanup {
  set sqlite_fts3_enable_parentheses 1
}

#------------------------------------------------------------------------
# The following tests are not very important. They cover error handling
# cases in the test code, which makes test coverage easier to measure.
# 
do_test fts3expr-5.1 {
  catchsql { SELECT fts3_exprtest('simple', 'a b') }
} {1 {Usage: fts3_exprtest(tokenizer, expr, col1, ...}}
do_test fts3expr-5.2 {
  catchsql { SELECT fts3_exprtest('doesnotexist', 'a b', 'c') }
} {1 {No such tokenizer module}}
do_test fts3expr-5.3 {
  catchsql { SELECT fts3_exprtest('simple', 'a b OR', 'c') }
} {1 {Error parsing expression}}

#------------------------------------------------------------------------
# The next set of tests verifies that things actually work as they are
# supposed to when using the new syntax.
# 
do_test fts3expr-6.1 {
  execsql {
    CREATE VIRTUAL TABLE t1 USING fts3(a);
  }
  for {set ii 1} {$ii < 32} {incr ii} {
    set v [list]
    if {$ii & 1}  { lappend v one }
    if {$ii & 2}  { lappend v two }
    if {$ii & 4}  { lappend v three }
    if {$ii & 8}  { lappend v four }
    if {$ii & 16} { lappend v five }
    execsql { INSERT INTO t1 VALUES($v) }
  }

  execsql {SELECT rowid FROM t1 WHERE t1 MATCH 'five four one' ORDER BY rowid}
} {25 27 29 31}

foreach {id expr res} {

  2 "five four NOT one" {24 26 28 30}

  3 "five AND four OR one" 
      {1 3 5 7 9 11 13 15 17 19 21 23 24 25 26 27 28 29 30 31}

  4 "five AND (four OR one)" {17 19 21 23 24 25 26 27 28 29 30 31}

  5 "five NOT (four OR one)" {16 18 20 22}

  6 "(five NOT (four OR one)) OR (five AND (four OR one))"
      {16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31}

  7 "(five OR one) AND two AND three" {7 15 22 23 30 31}

  8 "five OR one AND two AND three" 
    {7 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31}

  9 "five OR one two three" 
    {7 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31}

  10 "five OR \"one two three\"" 
    {7 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31}

  11 "one two OR four five NOT three" {3 7 11 15 19 23 24 25 26 27 31}

  12 "(one two OR four five) NOT three" {3 11 19 24 25 26 27}

  13 "((((((one two OR four five)))))) NOT three" {3 11 19 24 25 26 27}

} {
  do_test fts3expr-6.$id {
    execsql { SELECT rowid FROM t1 WHERE t1 MATCH $expr ORDER BY rowid }
  } $res
}

set sqlite_fts3_enable_parentheses 0
finish_test