/ Check-in [0b7e4ab8]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Modify the fts5 custom tokenizer interface to permit synonym support. The fts5_api.iVersion value is now set to 2. Existing fts5 custom tokenizers (if there are such things) will need to be updated to use the new api version.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 0b7e4ab8abde3ae32459233df115c433dd58d2c1
User & Date: dan 2015-09-04 10:31:51
Context
2015-09-04
11:13
Enhance showfts5.tcl so that it can optionally display the number of terms in each segment. check-in: d648ddd9 user: dan tags: trunk
10:31
Modify the fts5 custom tokenizer interface to permit synonym support. The fts5_api.iVersion value is now set to 2. Existing fts5 custom tokenizers (if there are such things) will need to be updated to use the new api version. check-in: 0b7e4ab8 user: dan tags: trunk
10:24
Merge latest trunk changes. Closed-Leaf check-in: 443a5eb8 user: dan tags: fts5-incompatible
04:31
Simplification of the LRU list handling in pcache1. check-in: 05a3a2cd user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts5/extract_api_docs.tcl.

   104    104     set res "<dl>\n"
   105    105     foreach line [split [string trim $docs] "\n"] {
   106    106       regexp {[*][*](.*)} $line -> line
   107    107       if {[regexp {^ ?x.*:} $line]} {
   108    108         append res "<dt><b>$line</b></dt><dd><p style=margin-top:0>\n"
   109    109         continue
   110    110       }
          111  +    if {[regexp {SYNONYM SUPPORT} $line]} {
          112  +      set line "</dl><h3>Synonym Support</h3>"
          113  +    }
   111    114       if {[string trim $line] == ""} {
   112    115         append res "<p>\n"
   113    116       } else {
   114    117         append res "$line\n"
   115    118       }
   116    119     }
   117         -  append res "</dl>\n"
   118    120   
   119    121     set res
   120    122   }
   121    123   
   122    124   proc get_api_docs {data} {
   123    125     # Initialize global array M as a map from Fts5StructureApi member name
   124    126     # to member definition. i.e.
................................................................................
   204    206     switch $::extract_api_docs_mode {
   205    207       fts5_api {
   206    208         output [get_fts5_struct $data "typedef struct fts5_api" "^\};"]
   207    209       }
   208    210   
   209    211       fts5_tokenizer {
   210    212         output [get_fts5_struct $data "typedef struct Fts5Tokenizer" "^\};"]
          213  +      output [get_fts5_struct $data \
          214  +        "Flags that may be passed as the third argument to xTokenize()" \
          215  +        "#define FTS5_TOKEN_COLOCATED"
          216  +      ]
   211    217       }
   212    218   
   213    219       fts5_extension {
   214    220         output [get_fts5_struct $data "typedef.*Fts5ExtensionApi" "^.;"]
   215    221       }
   216    222   
   217    223       Fts5ExtensionApi {

Changes to ext/fts5/fts5.h.

   213    213     int (*xColumnCount)(Fts5Context*);
   214    214     int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow);
   215    215     int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken);
   216    216   
   217    217     int (*xTokenize)(Fts5Context*, 
   218    218       const char *pText, int nText, /* Text to tokenize */
   219    219       void *pCtx,                   /* Context passed to xToken() */
   220         -    int (*xToken)(void*, const char*, int, int, int)       /* Callback */
          220  +    int (*xToken)(void*, int, const char*, int, int, int)       /* Callback */
   221    221     );
   222    222   
   223    223     int (*xPhraseCount)(Fts5Context*);
   224    224     int (*xPhraseSize)(Fts5Context*, int iPhrase);
   225    225   
   226    226     int (*xInstCount)(Fts5Context*, int *pnInst);
   227    227     int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff);
................................................................................
   274    274   ** xDelete:
   275    275   **   This function is invoked to delete a tokenizer handle previously
   276    276   **   allocated using xCreate(). Fts5 guarantees that this function will
   277    277   **   be invoked exactly once for each successful call to xCreate().
   278    278   **
   279    279   ** xTokenize:
   280    280   **   This function is expected to tokenize the nText byte string indicated 
   281         -**   by argument pText. pText may not be nul-terminated. The first argument
   282         -**   passed to this function is a pointer to an Fts5Tokenizer object returned 
   283         -**   by an earlier call to xCreate().
          281  +**   by argument pText. pText may or may not be nul-terminated. The first
          282  +**   argument passed to this function is a pointer to an Fts5Tokenizer object
          283  +**   returned by an earlier call to xCreate().
          284  +**
          285  +**   The second argument indicates the reason that FTS5 is requesting
          286  +**   tokenization of the supplied text. This is always one of the following
          287  +**   four values:
          288  +**
          289  +**   <ul><li> <b>FTS5_TOKENIZE_DOCUMENT</b> - A document is being inserted into
          290  +**            or removed from the FTS table. The tokenizer is being invoked to
          291  +**            determine the set of tokens to add to (or delete from) the
          292  +**            FTS index.
          293  +**
          294  +**       <li> <b>FTS5_TOKENIZE_QUERY</b> - A MATCH query is being executed 
          295  +**            against the FTS index. The tokenizer is being called to tokenize 
          296  +**            a bareword or quoted string specified as part of the query.
          297  +**
          298  +**       <li> <b>(FTS5_TOKENIZE_QUERY | FTS5_TOKENIZE_PREFIX)</b> - Same as
          299  +**            FTS5_TOKENIZE_QUERY, except that the bareword or quoted string is
          300  +**            followed by a "*" character, indicating that the last token
          301  +**            returned by the tokenizer will be treated as a token prefix.
          302  +**
          303  +**       <li> <b>FTS5_TOKENIZE_AUX</b> - The tokenizer is being invoked to 
          304  +**            satisfy an fts5_api.xTokenize() request made by an auxiliary
          305  +**            function. Or an fts5_api.xColumnSize() request made by the same
          306  +**            on a columnsize=0 database.  
          307  +**   </ul>
   284    308   **
   285    309   **   For each token in the input string, the supplied callback xToken() must
   286    310   **   be invoked. The first argument to it should be a copy of the pointer
   287         -**   passed as the second argument to xTokenize(). The next two arguments
   288         -**   are a pointer to a buffer containing the token text, and the size of
   289         -**   the token in bytes. The 4th and 5th arguments are the byte offsets of
   290         -**   the first byte of and first byte immediately following the text from 
          311  +**   passed as the second argument to xTokenize(). The third and fourth
          312  +**   arguments are a pointer to a buffer containing the token text, and the
          313  +**   size of the token in bytes. The 4th and 5th arguments are the byte offsets
          314  +**   of the first byte of and first byte immediately following the text from
   291    315   **   which the token is derived within the input.
          316  +**
          317  +**   The second argument passed to the xToken() callback ("tflags") should
          318  +**   normally be set to 0. The exception is if the tokenizer supports 
          319  +**   synonyms. In this case see the discussion below for details.
   292    320   **
   293    321   **   FTS5 assumes the xToken() callback is invoked for each token in the 
   294    322   **   order that they occur within the input text.
   295    323   **
   296    324   **   If an xToken() callback returns any value other than SQLITE_OK, then
   297    325   **   the tokenization should be abandoned and the xTokenize() method should
   298    326   **   immediately return a copy of the xToken() return value. Or, if the
   299    327   **   input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally,
   300    328   **   if an error occurs with the xTokenize() implementation itself, it
   301    329   **   may abandon the tokenization and return any error code other than
   302    330   **   SQLITE_OK or SQLITE_DONE.
   303    331   **
          332  +** SYNONYM SUPPORT
          333  +**
          334  +**   Custom tokenizers may also support synonyms. Consider a case in which a
          335  +**   user wishes to query for a phrase such as "first place". Using the 
          336  +**   built-in tokenizers, the FTS5 query 'first + place' will match instances
          337  +**   of "first place" within the document set, but not alternative forms
          338  +**   such as "1st place". In some applications, it would be better to match
          339  +**   all instances of "first place" or "1st place" regardless of which form
          340  +**   the user specified in the MATCH query text.
          341  +**
          342  +**   There are several ways to approach this in FTS5:
          343  +**
          344  +**   <ol><li> By mapping all synonyms to a single token. In this case, the 
          345  +**            In the above example, this means that the tokenizer returns the
          346  +**            same token for inputs "first" and "1st". Say that token is in
          347  +**            fact "first", so that when the user inserts the document "I won
          348  +**            1st place" entries are added to the index for tokens "i", "won",
          349  +**            "first" and "place". If the user then queries for '1st + place',
          350  +**            the tokenizer substitutes "first" for "1st" and the query works
          351  +**            as expected.
          352  +**
          353  +**       <li> By adding multiple synonyms for a single term to the FTS index.
          354  +**            In this case, when tokenizing query text, the tokenizer may 
          355  +**            provide multiple synonyms for a single term within the document.
          356  +**            FTS5 then queries the index for each synonym individually. For
          357  +**            example, faced with the query:
          358  +**
          359  +**   <codeblock>
          360  +**     ... MATCH 'first place'</codeblock>
          361  +**
          362  +**            the tokenizer offers both "1st" and "first" as synonyms for the
          363  +**            first token in the MATCH query and FTS5 effectively runs a query 
          364  +**            similar to:
          365  +**
          366  +**   <codeblock>
          367  +**     ... MATCH '(first OR 1st) place'</codeblock>
          368  +**
          369  +**            except that, for the purposes of auxiliary functions, the query
          370  +**            still appears to contain just two phrases - "(first OR 1st)" 
          371  +**            being treated as a single phrase.
          372  +**
          373  +**       <li> By adding multiple synonyms for a single term to the FTS index.
          374  +**            Using this method, when tokenizing document text, the tokenizer
          375  +**            provides multiple synonyms for each token. So that when a 
          376  +**            document such as "I won first place" is tokenized, entries are
          377  +**            added to the FTS index for "i", "won", "first", "1st" and
          378  +**            "place".
          379  +**
          380  +**            This way, even if the tokenizer does not provide synonyms
          381  +**            when tokenizing query text (it should not - to do would be
          382  +**            inefficient), it doesn't matter if the user queries for 
          383  +**            'first + place' or '1st + place', as there are entires in the
          384  +**            FTS index corresponding to both forms of the first token.
          385  +**   </ol>
          386  +**
          387  +**   Whether is is parsing document or query text, any call to xToken that
          388  +**   specifies a <i>tflags</i> argument with the FTS5_TOKEN_COLOCATED bit
          389  +**   is considered to supply a synonym for the previous token. For example,
          390  +**   when parsing the document "I won first place", a tokenizer that supports
          391  +**   synonyms would call xToken() 5 times, as follows:
          392  +**
          393  +**   <codeblock>
          394  +**       xToken(pCtx, 0, "i",                      1,  0,  1);
          395  +**       xToken(pCtx, 0, "won",                    3,  2,  5);
          396  +**       xToken(pCtx, 0, "first",                  5,  6, 11);
          397  +**       xToken(pCtx, FTS5_TOKEN_COLOCATED, "1st", 3,  6, 11);
          398  +**       xToken(pCtx, 0, "place",                  5, 12, 17);
          399  +**</codeblock>
          400  +**
          401  +**   It is an error to specify the FTS5_TOKEN_COLOCATED flag the first time
          402  +**   xToken() is called. Multiple synonyms may be specified for a single token
          403  +**   by making multiple calls to xToken(FTS5_TOKEN_COLOCATED) in sequence. 
          404  +**   There is no limit to the number of synonyms that may be provided for a
          405  +**   single token.
          406  +**
          407  +**   In many cases, method (1) above is the best approach. It does not add 
          408  +**   extra data to the FTS index or require FTS5 to query for multiple terms,
          409  +**   so it is efficient in terms of disk space and query speed. However, it
          410  +**   does not support prefix queries very well. If, as suggested above, the
          411  +**   token "first" is subsituted for "1st" by the tokenizer, then the query:
          412  +**
          413  +**   <codeblock>
          414  +**     ... MATCH '1s*'</codeblock>
          415  +**
          416  +**   will not match documents that contain the token "1st" (as the tokenizer
          417  +**   will probably not map "1s" to any prefix of "first").
          418  +**
          419  +**   For full prefix support, method (3) may be preferred. In this case, 
          420  +**   because the index contains entries for both "first" and "1st", prefix
          421  +**   queries such as 'fi*' or '1s*' will match correctly. However, because
          422  +**   extra entries are added to the FTS index, this method uses more space
          423  +**   within the database.
          424  +**
          425  +**   Method (2) offers a midpoint between (1) and (3). Using this method,
          426  +**   a query such as '1s*' will match documents that contain the literal 
          427  +**   token "1st", but not "first" (assuming the tokenizer is not able to
          428  +**   provide synonyms for prefixes). However, a non-prefix query like '1st'
          429  +**   will match against "1st" and "first". This method does not require
          430  +**   extra disk space, as no extra entries are added to the FTS index. 
          431  +**   On the other hand, it may require more CPU cycles to run MATCH queries,
          432  +**   as separate queries of the FTS index are required for each synonym.
          433  +**
          434  +**   When using methods (2) or (3), it is important that the tokenizer only
          435  +**   provide synonyms when tokenizing document text (method (2)) or query
          436  +**   text (method (3)), not both. Doing so will not cause any errors, but is
          437  +**   inefficient.
   304    438   */
   305    439   typedef struct Fts5Tokenizer Fts5Tokenizer;
   306    440   typedef struct fts5_tokenizer fts5_tokenizer;
   307    441   struct fts5_tokenizer {
   308    442     int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut);
   309    443     void (*xDelete)(Fts5Tokenizer*);
   310    444     int (*xTokenize)(Fts5Tokenizer*, 
   311    445         void *pCtx,
          446  +      int flags,            /* Mask of FTS5_TOKENIZE_* flags */
   312    447         const char *pText, int nText, 
   313    448         int (*xToken)(
   314    449           void *pCtx,         /* Copy of 2nd argument to xTokenize() */
          450  +        int tflags,         /* Mask of FTS5_TOKEN_* flags */
   315    451           const char *pToken, /* Pointer to buffer containing token */
   316    452           int nToken,         /* Size of token in bytes */
   317    453           int iStart,         /* Byte offset of token within input text */
   318    454           int iEnd            /* Byte offset of end of token within input text */
   319    455         )
   320    456     );
   321    457   };
   322    458   
          459  +/* Flags that may be passed as the third argument to xTokenize() */
          460  +#define FTS5_TOKENIZE_QUERY     0x0001
          461  +#define FTS5_TOKENIZE_PREFIX    0x0002
          462  +#define FTS5_TOKENIZE_DOCUMENT  0x0004
          463  +#define FTS5_TOKENIZE_AUX       0x0008
          464  +
          465  +/* Flags that may be passed by the tokenizer implementation back to FTS5
          466  +** as the third argument to the supplied xToken callback. */
          467  +#define FTS5_TOKEN_COLOCATED    0x0001      /* Same position as prev. token */
          468  +
   323    469   /*
   324    470   ** END OF CUSTOM TOKENIZERS
   325    471   *************************************************************************/
   326    472   
   327    473   /*************************************************************************
   328    474   ** FTS5 EXTENSION REGISTRATION API
   329    475   */
   330    476   typedef struct fts5_api fts5_api;
   331    477   struct fts5_api {
   332         -  int iVersion;                   /* Currently always set to 1 */
          478  +  int iVersion;                   /* Currently always set to 2 */
   333    479   
   334    480     /* Create a new tokenizer */
   335    481     int (*xCreateTokenizer)(
   336    482       fts5_api *pApi,
   337    483       const char *zName,
   338    484       void *pContext,
   339    485       fts5_tokenizer *pTokenizer,

Changes to ext/fts5/fts5Int.h.

   162    162   );
   163    163   void sqlite3Fts5ConfigFree(Fts5Config*);
   164    164   
   165    165   int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig);
   166    166   
   167    167   int sqlite3Fts5Tokenize(
   168    168     Fts5Config *pConfig,            /* FTS5 Configuration object */
          169  +  int flags,                      /* FTS5_TOKENIZE_* flags */
   169    170     const char *pText, int nText,   /* Text to tokenize */
   170    171     void *pCtx,                     /* Context passed to xToken() */
   171         -  int (*xToken)(void*, const char*, int, int, int)    /* Callback */
          172  +  int (*xToken)(void*, int, const char*, int, int, int)    /* Callback */
   172    173   );
   173    174   
   174    175   void sqlite3Fts5Dequote(char *z);
   175    176   
   176    177   /* Load the contents of the %_config table */
   177    178   int sqlite3Fts5ConfigLoad(Fts5Config*, int);
   178    179   
................................................................................
   230    231   struct Fts5PoslistReader {
   231    232     /* Variables used only by sqlite3Fts5PoslistIterXXX() functions. */
   232    233     int iCol;                       /* If (iCol>=0), this column only */
   233    234     const u8 *a;                    /* Position list to iterate through */
   234    235     int n;                          /* Size of buffer at a[] in bytes */
   235    236     int i;                          /* Current offset in a[] */
   236    237   
          238  +  u8 bFlag;                       /* For client use (any custom purpose) */
          239  +
   237    240     /* Output variables */
   238         -  int bEof;                       /* Set to true at EOF */
          241  +  u8 bEof;                        /* Set to true at EOF */
   239    242     i64 iPos;                       /* (iCol<<32) + iPos */
   240    243   };
   241    244   int sqlite3Fts5PoslistReaderInit(
   242    245     int iCol,                       /* If (iCol>=0), this column only */
   243    246     const u8 *a, int n,             /* Poslist buffer to iterate through */
   244    247     Fts5PoslistReader *pIter        /* Iterator object to initialize */
   245    248   );
................................................................................
   377    380   /*
   378    381   ** Retrieve and clear the current error code, respectively.
   379    382   */
   380    383   int sqlite3Fts5IndexErrcode(Fts5Index*);
   381    384   void sqlite3Fts5IndexReset(Fts5Index*);
   382    385   
   383    386   /*
   384         -** Get or set the "averages" record.
          387  +** Get or set the "averages" values.
   385    388   */
   386         -int sqlite3Fts5IndexGetAverages(Fts5Index *p, Fts5Buffer *pBuf);
          389  +int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize);
   387    390   int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8*, int);
   388    391   
   389    392   /*
   390    393   ** Functions called by the storage module as part of integrity-check.
   391    394   */
   392    395   u64 sqlite3Fts5IndexCksum(Fts5Config*,i64,int,int,const char*,int);
   393    396   int sqlite3Fts5IndexIntegrityCheck(Fts5Index*, u64 cksum);
................................................................................
   592    595   /* Called during startup to register a UDF with SQLite */
   593    596   int sqlite3Fts5ExprInit(Fts5Global*, sqlite3*);
   594    597   
   595    598   int sqlite3Fts5ExprPhraseCount(Fts5Expr*);
   596    599   int sqlite3Fts5ExprPhraseSize(Fts5Expr*, int iPhrase);
   597    600   int sqlite3Fts5ExprPoslist(Fts5Expr*, int, const u8 **);
   598    601   
   599         -int sqlite3Fts5ExprPhraseExpr(Fts5Config*, Fts5Expr*, int, Fts5Expr**);
          602  +int sqlite3Fts5ExprClonePhrase(Fts5Config*, Fts5Expr*, int, Fts5Expr**);
   600    603   
   601    604   /*******************************************
   602    605   ** The fts5_expr.c API above this point is used by the other hand-written
   603    606   ** C code in this module. The interfaces below this point are called by
   604    607   ** the parser code in fts5parse.y.  */
   605    608   
   606    609   void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...);

Changes to ext/fts5/fts5_aux.c.

   144    144   }
   145    145   
   146    146   /*
   147    147   ** Tokenizer callback used by implementation of highlight() function.
   148    148   */
   149    149   static int fts5HighlightCb(
   150    150     void *pContext,                 /* Pointer to HighlightContext object */
          151  +  int tflags,                     /* Mask of FTS5_TOKEN_* flags */
   151    152     const char *pToken,             /* Buffer containing token */
   152    153     int nToken,                     /* Size of token in bytes */
   153    154     int iStartOff,                  /* Start offset of token */
   154    155     int iEndOff                     /* End offset of token */
   155    156   ){
   156    157     HighlightContext *p = (HighlightContext*)pContext;
   157    158     int rc = SQLITE_OK;
   158         -  int iPos = p->iPos++;
          159  +  int iPos;
          160  +
          161  +  if( tflags & FTS5_TOKEN_COLOCATED ) return SQLITE_OK;
          162  +  iPos = p->iPos++;
   159    163   
   160    164     if( p->iRangeEnd>0 ){
   161    165       if( iPos<p->iRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK;
   162    166       if( p->iRangeStart && iPos==p->iRangeStart ) p->iOff = iStartOff;
   163    167     }
   164    168   
   165    169     if( iPos==p->iter.iStart ){

Changes to ext/fts5/fts5_config.c.

   641    641   ** the callback returned SQLITE_DONE, this is not an error and this function
   642    642   ** still returns SQLITE_OK. Or, if the tokenization was abandoned early
   643    643   ** because the callback returned another non-zero value, it is assumed
   644    644   ** to be an SQLite error code and returned to the caller.
   645    645   */
   646    646   int sqlite3Fts5Tokenize(
   647    647     Fts5Config *pConfig,            /* FTS5 Configuration object */
          648  +  int flags,                      /* FTS5_TOKENIZE_* flags */
   648    649     const char *pText, int nText,   /* Text to tokenize */
   649    650     void *pCtx,                     /* Context passed to xToken() */
   650         -  int (*xToken)(void*, const char*, int, int, int)    /* Callback */
          651  +  int (*xToken)(void*, int, const char*, int, int, int)    /* Callback */
   651    652   ){
   652    653     if( pText==0 ) return SQLITE_OK;
   653         -  return pConfig->pTokApi->xTokenize(pConfig->pTok, pCtx, pText, nText, xToken);
          654  +  return pConfig->pTokApi->xTokenize(
          655  +      pConfig->pTok, pCtx, flags, pText, nText, xToken
          656  +  );
   654    657   }
   655    658   
   656    659   /*
   657    660   ** Argument pIn points to the first character in what is expected to be
   658    661   ** a comma-separated list of SQL literals followed by a ')' character.
   659    662   ** If it actually is this, return a pointer to the ')'. Otherwise, return
   660    663   ** NULL to indicate a parse error.

Changes to ext/fts5/fts5_expr.c.

    18     18   #include "fts5parse.h"
    19     19   
    20     20   /*
    21     21   ** All token types in the generated fts5parse.h file are greater than 0.
    22     22   */
    23     23   #define FTS5_EOF 0
    24     24   
           25  +#define FTS5_LARGEST_INT64  (0xffffffff|(((i64)0x7fffffff)<<32))
           26  +
    25     27   typedef struct Fts5ExprTerm Fts5ExprTerm;
    26     28   
    27     29   /*
    28     30   ** Functions generated by lemon from fts5parse.y.
    29     31   */
    30     32   void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(u64));
    31     33   void sqlite3Fts5ParserFree(void*, void (*freeProc)(void*));
................................................................................
    69     71   ** An instance of the following structure represents a single search term
    70     72   ** or term prefix.
    71     73   */
    72     74   struct Fts5ExprTerm {
    73     75     int bPrefix;                    /* True for a prefix term */
    74     76     char *zTerm;                    /* nul-terminated term */
    75     77     Fts5IndexIter *pIter;           /* Iterator for this term */
           78  +  Fts5ExprTerm *pSynonym;         /* Pointer to first in list of synonyms */
    76     79   };
    77     80   
    78     81   /*
    79     82   ** A phrase. One or more terms that must appear in a contiguous sequence
    80     83   ** within a document for it to match.
    81     84   */
    82     85   struct Fts5ExprPhrase {
................................................................................
   177    180         }
   178    181         pToken->n = (z2 - z);
   179    182         break;
   180    183       }
   181    184   
   182    185       default: {
   183    186         const char *z2;
          187  +      if( sqlite3Fts5IsBareword(z[0])==0 ){
          188  +        sqlite3Fts5ParseError(pParse, "fts5: syntax error near \"%.1s\"", z);
          189  +        return FTS5_EOF;
          190  +      }
   184    191         tok = FTS5_STRING;
   185    192         for(z2=&z[1]; sqlite3Fts5IsBareword(*z2); z2++);
   186    193         pToken->n = (z2 - z);
   187    194         if( pToken->n==2 && memcmp(pToken->p, "OR", 2)==0 )  tok = FTS5_OR;
   188    195         if( pToken->n==3 && memcmp(pToken->p, "NOT", 3)==0 ) tok = FTS5_NOT;
   189    196         if( pToken->n==3 && memcmp(pToken->p, "AND", 3)==0 ) tok = FTS5_AND;
   190    197         break;
................................................................................
   240    247     }
   241    248   
   242    249     sqlite3_free(sParse.apPhrase);
   243    250     *pzErr = sParse.zErr;
   244    251     return sParse.rc;
   245    252   }
   246    253   
   247         -/*
   248         -** Create a new FTS5 expression by cloning phrase iPhrase of the
   249         -** expression passed as the second argument.
   250         -*/
   251         -int sqlite3Fts5ExprPhraseExpr(
   252         -  Fts5Config *pConfig,
   253         -  Fts5Expr *pExpr, 
   254         -  int iPhrase, 
   255         -  Fts5Expr **ppNew
   256         -){
   257         -  int rc = SQLITE_OK;             /* Return code */
   258         -  Fts5ExprPhrase *pOrig;          /* The phrase extracted from pExpr */
   259         -  Fts5ExprPhrase *pCopy;          /* Copy of pOrig */
   260         -  Fts5Expr *pNew = 0;             /* Expression to return via *ppNew */
   261         -
   262         -  pOrig = pExpr->apExprPhrase[iPhrase];
   263         -  pCopy = (Fts5ExprPhrase*)sqlite3Fts5MallocZero(&rc, 
   264         -      sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * pOrig->nTerm
   265         -  );
   266         -  if( pCopy ){
   267         -    int i;                          /* Used to iterate through phrase terms */
   268         -    Fts5ExprPhrase **apPhrase;
   269         -    Fts5ExprNode *pNode;
   270         -    Fts5ExprNearset *pNear;
   271         -
   272         -    pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr));
   273         -    apPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc, 
   274         -        sizeof(Fts5ExprPhrase*)
   275         -    );
   276         -    pNode = (Fts5ExprNode*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5ExprNode));
   277         -    pNear = (Fts5ExprNearset*)sqlite3Fts5MallocZero(&rc, 
   278         -        sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*)
   279         -    );
   280         -
   281         -    for(i=0; i<pOrig->nTerm; i++){
   282         -      pCopy->aTerm[i].zTerm = sqlite3Fts5Strndup(&rc, pOrig->aTerm[i].zTerm,-1);
   283         -      pCopy->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix;
   284         -    }
   285         -
   286         -    if( rc==SQLITE_OK ){
   287         -      /* All the allocations succeeded. Put the expression object together. */
   288         -      pNew->pIndex = pExpr->pIndex;
   289         -      pNew->pRoot = pNode;
   290         -      pNew->nPhrase = 1;
   291         -      pNew->apExprPhrase = apPhrase;
   292         -      pNew->apExprPhrase[0] = pCopy;
   293         -
   294         -      pNode->eType = (pOrig->nTerm==1 ? FTS5_TERM : FTS5_STRING);
   295         -      pNode->pNear = pNear;
   296         -
   297         -      pNear->nPhrase = 1;
   298         -      pNear->apPhrase[0] = pCopy;
   299         -
   300         -      pCopy->nTerm = pOrig->nTerm;
   301         -      pCopy->pNode = pNode;
   302         -    }else{
   303         -      /* At least one allocation failed. Free them all. */
   304         -      for(i=0; i<pOrig->nTerm; i++){
   305         -        sqlite3_free(pCopy->aTerm[i].zTerm);
   306         -      }
   307         -      sqlite3_free(pCopy);
   308         -      sqlite3_free(pNear);
   309         -      sqlite3_free(pNode);
   310         -      sqlite3_free(apPhrase);
   311         -      sqlite3_free(pNew);
   312         -      pNew = 0;
   313         -    }
   314         -  }
   315         -
   316         -  *ppNew = pNew;
   317         -  return rc;
   318         -}
   319         -
   320    254   /*
   321    255   ** Free the expression node object passed as the only argument.
   322    256   */
   323    257   void sqlite3Fts5ParseNodeFree(Fts5ExprNode *p){
   324    258     if( p ){
   325    259       int i;
   326    260       for(i=0; i<p->nChild; i++){
................................................................................
   345    279   static int fts5ExprColsetTest(Fts5ExprColset *pColset, int iCol){
   346    280     int i;
   347    281     for(i=0; i<pColset->nCol; i++){
   348    282       if( pColset->aiCol[i]==iCol ) return 1;
   349    283     }
   350    284     return 0;
   351    285   }
          286  +
          287  +/*
          288  +** Argument pTerm must be a synonym iterator. Return the current rowid
          289  +** that it points to.
          290  +*/
          291  +static i64 fts5ExprSynonymRowid(Fts5ExprTerm *pTerm, int bDesc, int *pbEof){
          292  +  i64 iRet = 0;
          293  +  int bRetValid = 0;
          294  +  Fts5ExprTerm *p;
          295  +
          296  +  assert( pTerm->pSynonym );
          297  +  assert( bDesc==0 || bDesc==1 );
          298  +  for(p=pTerm; p; p=p->pSynonym){
          299  +    if( 0==sqlite3Fts5IterEof(p->pIter) ){
          300  +      i64 iRowid = sqlite3Fts5IterRowid(p->pIter);
          301  +      if( bRetValid==0 || (bDesc!=(iRowid<iRet)) ){
          302  +        iRet = iRowid;
          303  +        bRetValid = 1;
          304  +      }
          305  +    }
          306  +  }
          307  +
          308  +  if( pbEof && bRetValid==0 ) *pbEof = 1;
          309  +  return iRet;
          310  +}
          311  +
          312  +/*
          313  +** Argument pTerm must be a synonym iterator.
          314  +*/
          315  +static int fts5ExprSynonymPoslist(
          316  +  Fts5ExprTerm *pTerm, 
          317  +  i64 iRowid,
          318  +  int *pbDel,                     /* OUT: Caller should sqlite3_free(*pa) */
          319  +  u8 **pa, int *pn
          320  +){
          321  +  Fts5PoslistWriter writer = {0};
          322  +  Fts5PoslistReader aStatic[4];
          323  +  Fts5PoslistReader *aIter = aStatic;
          324  +  int nIter = 0;
          325  +  int nAlloc = 4;
          326  +  int rc = SQLITE_OK;
          327  +  Fts5ExprTerm *p;
          328  +
          329  +  assert( pTerm->pSynonym );
          330  +  for(p=pTerm; p; p=p->pSynonym){
          331  +    Fts5IndexIter *pIter = p->pIter;
          332  +    if( sqlite3Fts5IterEof(pIter)==0 && sqlite3Fts5IterRowid(pIter)==iRowid ){
          333  +      const u8 *a;
          334  +      int n;
          335  +      i64 dummy;
          336  +      rc = sqlite3Fts5IterPoslist(pIter, &a, &n, &dummy);
          337  +      if( rc!=SQLITE_OK ) goto synonym_poslist_out;
          338  +      if( nIter==nAlloc ){
          339  +        int nByte = sizeof(Fts5PoslistReader) * nAlloc * 2;
          340  +        Fts5PoslistReader *aNew = (Fts5PoslistReader*)sqlite3_malloc(nByte);
          341  +        if( aNew==0 ){
          342  +          rc = SQLITE_NOMEM;
          343  +          goto synonym_poslist_out;
          344  +        }
          345  +        memcpy(aNew, aIter, sizeof(Fts5PoslistReader) * nIter);
          346  +        nAlloc = nAlloc*2;
          347  +        if( aIter!=aStatic ) sqlite3_free(aIter);
          348  +        aIter = aNew;
          349  +      }
          350  +      sqlite3Fts5PoslistReaderInit(-1, a, n, &aIter[nIter]);
          351  +      assert( aIter[nIter].bEof==0 );
          352  +      nIter++;
          353  +    }
          354  +  }
          355  +
          356  +  assert( *pbDel==0 );
          357  +  if( nIter==1 ){
          358  +    *pa = (u8*)aIter[0].a;
          359  +    *pn = aIter[0].n;
          360  +  }else{
          361  +    Fts5PoslistWriter writer = {0};
          362  +    Fts5Buffer buf = {0,0,0};
          363  +    i64 iPrev = -1;
          364  +    while( 1 ){
          365  +      int i;
          366  +      i64 iMin = FTS5_LARGEST_INT64;
          367  +      for(i=0; i<nIter; i++){
          368  +        if( aIter[i].bEof==0 ){
          369  +          if( aIter[i].iPos==iPrev ){
          370  +            if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) continue;
          371  +          }
          372  +          if( aIter[i].iPos<iMin ){
          373  +            iMin = aIter[i].iPos;
          374  +          }
          375  +        }
          376  +      }
          377  +      if( iMin==FTS5_LARGEST_INT64 || rc!=SQLITE_OK ) break;
          378  +      rc = sqlite3Fts5PoslistWriterAppend(&buf, &writer, iMin);
          379  +      iPrev = iMin;
          380  +    }
          381  +    if( rc ){
          382  +      sqlite3_free(buf.p);
          383  +    }else{
          384  +      *pa = buf.p;
          385  +      *pn = buf.n;
          386  +      *pbDel = 1;
          387  +    }
          388  +  }
          389  +
          390  + synonym_poslist_out:
          391  +  if( aIter!=aStatic ) sqlite3_free(aIter);
          392  +  return rc;
          393  +}
          394  +
   352    395   
   353    396   /*
   354    397   ** All individual term iterators in pPhrase are guaranteed to be valid and
   355    398   ** pointing to the same rowid when this function is called. This function 
   356    399   ** checks if the current rowid really is a match, and if so populates
   357    400   ** the pPhrase->poslist buffer accordingly. Output parameter *pbMatch
   358    401   ** is set to true if this is really a match, or false otherwise.
   359    402   **
   360    403   ** SQLITE_OK is returned if an error occurs, or an SQLite error code 
   361    404   ** otherwise. It is not considered an error code if the current rowid is 
   362    405   ** not a match.
   363    406   */
   364    407   static int fts5ExprPhraseIsMatch(
   365         -  Fts5Expr *pExpr,                /* Expression pPhrase belongs to */
          408  +  Fts5ExprNode *pNode,            /* Node pPhrase belongs to */
   366    409     Fts5ExprColset *pColset,        /* Restrict matches to these columns */
   367    410     Fts5ExprPhrase *pPhrase,        /* Phrase object to initialize */
   368    411     int *pbMatch                    /* OUT: Set to true if really a match */
   369    412   ){
   370    413     Fts5PoslistWriter writer = {0};
   371    414     Fts5PoslistReader aStatic[4];
   372    415     Fts5PoslistReader *aIter = aStatic;
................................................................................
   384    427     /* If the aStatic[] array is not large enough, allocate a large array
   385    428     ** using sqlite3_malloc(). This approach could be improved upon. */
   386    429     if( pPhrase->nTerm>(sizeof(aStatic) / sizeof(aStatic[0])) ){
   387    430       int nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm;
   388    431       aIter = (Fts5PoslistReader*)sqlite3_malloc(nByte);
   389    432       if( !aIter ) return SQLITE_NOMEM;
   390    433     }
          434  +  memset(aIter, 0, sizeof(Fts5PoslistReader) * pPhrase->nTerm);
   391    435   
   392    436     /* Initialize a term iterator for each term in the phrase */
   393    437     for(i=0; i<pPhrase->nTerm; i++){
          438  +    Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
   394    439       i64 dummy;
   395         -    int n;
   396         -    const u8 *a;
   397         -    rc = sqlite3Fts5IterPoslist(pPhrase->aTerm[i].pIter, &a, &n, &dummy);
   398         -    if( rc || sqlite3Fts5PoslistReaderInit(iCol, a, n, &aIter[i]) ){
   399         -      goto ismatch_out;
          440  +    int n = 0;
          441  +    int bFlag = 0;
          442  +    const u8 *a = 0;
          443  +    if( pTerm->pSynonym ){
          444  +      rc = fts5ExprSynonymPoslist(pTerm, pNode->iRowid, &bFlag, (u8**)&a, &n);
          445  +    }else{
          446  +      rc = sqlite3Fts5IterPoslist(pTerm->pIter, &a, &n, &dummy);
   400    447       }
          448  +    if( rc!=SQLITE_OK ) goto ismatch_out;
          449  +    sqlite3Fts5PoslistReaderInit(iCol, a, n, &aIter[i]);
          450  +    aIter[i].bFlag = bFlag;
          451  +    if( aIter[i].bEof ) goto ismatch_out;
   401    452     }
   402    453   
   403    454     while( 1 ){
   404    455       int bMatch;
   405    456       i64 iPos = aIter[0].iPos;
   406    457       do {
   407    458         bMatch = 1;
................................................................................
   427    478       for(i=0; i<pPhrase->nTerm; i++){
   428    479         if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) goto ismatch_out;
   429    480       }
   430    481     }
   431    482   
   432    483    ismatch_out:
   433    484     *pbMatch = (pPhrase->poslist.n>0);
          485  +  for(i=0; i<pPhrase->nTerm; i++){
          486  +    if( aIter[i].bFlag ) sqlite3_free((u8*)aIter[i].a);
          487  +  }
   434    488     if( aIter!=aStatic ) sqlite3_free(aIter);
   435    489     return rc;
   436    490   }
   437    491   
   438    492   typedef struct Fts5LookaheadReader Fts5LookaheadReader;
   439    493   struct Fts5LookaheadReader {
   440    494     const u8 *a;                    /* Buffer containing position list */
................................................................................
   594    648   */
   595    649   static int fts5ExprNearAdvanceFirst(
   596    650     Fts5Expr *pExpr,                /* Expression pPhrase belongs to */
   597    651     Fts5ExprNode *pNode,            /* FTS5_STRING or FTS5_TERM node */
   598    652     int bFromValid,
   599    653     i64 iFrom 
   600    654   ){
   601         -  Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter;
          655  +  Fts5ExprTerm *pTerm = &pNode->pNear->apPhrase[0]->aTerm[0];
   602    656     int rc;
   603    657   
   604         -  assert( Fts5NodeIsString(pNode) );
   605         -  if( bFromValid ){
   606         -    rc = sqlite3Fts5IterNextFrom(pIter, iFrom);
          658  +  if( pTerm->pSynonym ){
          659  +    int bEof = 1;
          660  +    Fts5ExprTerm *p;
          661  +
          662  +    /* Find the firstest rowid any synonym points to. */
          663  +    i64 iRowid = fts5ExprSynonymRowid(pTerm, pExpr->bDesc, 0);
          664  +
          665  +    /* Advance each iterator that currently points to iRowid. Or, if iFrom
          666  +    ** is valid - each iterator that points to a rowid before iFrom.  */
          667  +    for(p=pTerm; p; p=p->pSynonym){
          668  +      if( sqlite3Fts5IterEof(p->pIter)==0 ){
          669  +        i64 ii = sqlite3Fts5IterRowid(p->pIter);
          670  +        if( ii==iRowid 
          671  +         || (bFromValid && ii!=iFrom && (ii>iFrom)==pExpr->bDesc) 
          672  +        ){
          673  +          if( bFromValid ){
          674  +            rc = sqlite3Fts5IterNextFrom(p->pIter, iFrom);
          675  +          }else{
          676  +            rc = sqlite3Fts5IterNext(p->pIter);
          677  +          }
          678  +          if( rc!=SQLITE_OK ) break;
          679  +          if( sqlite3Fts5IterEof(p->pIter)==0 ){
          680  +            bEof = 0;
          681  +          }
          682  +        }else{
          683  +          bEof = 0;
          684  +        }
          685  +      }
          686  +    }
          687  +
          688  +    /* Set the EOF flag if either all synonym iterators are at EOF or an
          689  +    ** error has occurred.  */
          690  +    pNode->bEof = (rc || bEof);
   607    691     }else{
   608         -    rc = sqlite3Fts5IterNext(pIter);
          692  +    Fts5IndexIter *pIter = pTerm->pIter;
          693  +
          694  +    assert( Fts5NodeIsString(pNode) );
          695  +    if( bFromValid ){
          696  +      rc = sqlite3Fts5IterNextFrom(pIter, iFrom);
          697  +    }else{
          698  +      rc = sqlite3Fts5IterNext(pIter);
          699  +    }
          700  +
          701  +    pNode->bEof = (rc || sqlite3Fts5IterEof(pIter));
   609    702     }
   610    703   
   611         -  pNode->bEof = (rc || sqlite3Fts5IterEof(pIter));
   612    704     return rc;
   613    705   }
   614    706   
   615    707   /*
   616    708   ** Advance iterator pIter until it points to a value equal to or laster
   617    709   ** than the initial value of *piLast. If this means the iterator points
   618    710   ** to a value laster than *piLast, update *piLast to the new lastest value.
................................................................................
   642    734       iRowid = sqlite3Fts5IterRowid(pIter);
   643    735       assert( (bDesc==0 && iRowid>=iLast) || (bDesc==1 && iRowid<=iLast) );
   644    736     }
   645    737     *piLast = iRowid;
   646    738   
   647    739     return 0;
   648    740   }
          741  +
          742  +static int fts5ExprSynonymAdvanceto(
          743  +  Fts5ExprTerm *pTerm,            /* Term iterator to advance */
          744  +  int bDesc,                      /* True if iterator is "rowid DESC" */
          745  +  i64 *piLast,                    /* IN/OUT: Lastest rowid seen so far */
          746  +  int *pRc                        /* OUT: Error code */
          747  +){
          748  +  int rc = SQLITE_OK;
          749  +  i64 iLast = *piLast;
          750  +  Fts5ExprTerm *p;
          751  +  int bEof = 0;
          752  +
          753  +  for(p=pTerm; rc==SQLITE_OK && p; p=p->pSynonym){
          754  +    if( sqlite3Fts5IterEof(p->pIter)==0 ){
          755  +      i64 iRowid = sqlite3Fts5IterRowid(p->pIter);
          756  +      if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){
          757  +        rc = sqlite3Fts5IterNextFrom(p->pIter, iLast);
          758  +      }
          759  +    }
          760  +  }
          761  +
          762  +  if( rc!=SQLITE_OK ){
          763  +    *pRc = rc;
          764  +    bEof = 1;
          765  +  }else{
          766  +    *piLast = fts5ExprSynonymRowid(pTerm, bDesc, &bEof);
          767  +  }
          768  +  return bEof;
          769  +}
   649    770   
   650    771   /*
   651    772   ** IN/OUT parameter (*pa) points to a position list n bytes in size. If
   652    773   ** the position list contains entries for column iCol, then (*pa) is set
   653    774   ** to point to the sub-position-list for that column and the number of
   654    775   ** bytes in it returned. Or, if the argument position list does not
   655    776   ** contain any entries for column iCol, return 0.
................................................................................
   713    834     int i;
   714    835   
   715    836     /* Check that each phrase in the nearset matches the current row.
   716    837     ** Populate the pPhrase->poslist buffers at the same time. If any
   717    838     ** phrase is not a match, break out of the loop early.  */
   718    839     for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
   719    840       Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
   720         -    if( pPhrase->nTerm>1 || pNear->pColset ){
          841  +    if( pPhrase->nTerm>1 || pPhrase->aTerm[0].pSynonym || pNear->pColset ){
   721    842         int bMatch = 0;
   722         -      rc = fts5ExprPhraseIsMatch(pExpr, pNear->pColset, pPhrase, &bMatch);
          843  +      rc = fts5ExprPhraseIsMatch(pNode, pNear->pColset, pPhrase, &bMatch);
   723    844         if( bMatch==0 ) break;
   724    845       }else{
   725    846         rc = sqlite3Fts5IterPoslistBuffer(
   726    847             pPhrase->aTerm[0].pIter, &pPhrase->poslist
   727    848         );
   728    849       }
   729    850     }
................................................................................
   751    872     Fts5ExprColset *pColset = pNear->pColset;
   752    873     const u8 *pPos;
   753    874     int nPos;
   754    875     int rc;
   755    876   
   756    877     assert( pNode->eType==FTS5_TERM );
   757    878     assert( pNear->nPhrase==1 && pPhrase->nTerm==1 );
          879  +  assert( pPhrase->aTerm[0].pSynonym==0 );
   758    880   
   759    881     rc = sqlite3Fts5IterPoslist(pIter, &pPos, &nPos, &pNode->iRowid);
   760    882   
   761    883     /* If the term may match any column, then this must be a match. 
   762    884     ** Return immediately in this case. Otherwise, try to find the
   763    885     ** part of the poslist that corresponds to the required column.
   764    886     ** If it can be found, return. If it cannot, the next iteration
................................................................................
   797    919   ){
   798    920     Fts5ExprNearset *pNear = pNode->pNear;
   799    921     Fts5ExprPhrase *pLeft = pNear->apPhrase[0];
   800    922     int rc = SQLITE_OK;
   801    923     i64 iLast;                      /* Lastest rowid any iterator points to */
   802    924     int i, j;                       /* Phrase and token index, respectively */
   803    925     int bMatch;                     /* True if all terms are at the same rowid */
          926  +  const int bDesc = pExpr->bDesc;
   804    927   
   805         -  assert( pNear->nPhrase>1 || pNear->apPhrase[0]->nTerm>1 );
          928  +  /* Check that this node should not be FTS5_TERM */
          929  +  assert( pNear->nPhrase>1 
          930  +       || pNear->apPhrase[0]->nTerm>1 
          931  +       || pNear->apPhrase[0]->aTerm[0].pSynonym
          932  +  );
   806    933   
   807    934     /* Initialize iLast, the "lastest" rowid any iterator points to. If the
   808    935     ** iterator skips through rowids in the default ascending order, this means
   809    936     ** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it
   810    937     ** means the minimum rowid.  */
   811         -  iLast = sqlite3Fts5IterRowid(pLeft->aTerm[0].pIter);
          938  +  if( pLeft->aTerm[0].pSynonym ){
          939  +    iLast = fts5ExprSynonymRowid(&pLeft->aTerm[0], bDesc, 0);
          940  +  }else{
          941  +    iLast = sqlite3Fts5IterRowid(pLeft->aTerm[0].pIter);
          942  +  }
   812    943   
   813    944     do {
   814    945       bMatch = 1;
   815    946       for(i=0; i<pNear->nPhrase; i++){
   816    947         Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
   817    948         for(j=0; j<pPhrase->nTerm; j++){
   818         -        Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter;
   819         -        i64 iRowid = sqlite3Fts5IterRowid(pIter);
   820         -        if( iRowid!=iLast ) bMatch = 0;
   821         -        if( fts5ExprAdvanceto(pIter, pExpr->bDesc, &iLast,&rc,&pNode->bEof) ){
   822         -          return rc;
          949  +        Fts5ExprTerm *pTerm = &pPhrase->aTerm[j];
          950  +        if( pTerm->pSynonym ){
          951  +          Fts5ExprTerm *p;
          952  +          int bEof = 1;
          953  +          i64 iRowid = fts5ExprSynonymRowid(pTerm, bDesc, 0);
          954  +          if( iRowid==iLast ) continue;
          955  +          bMatch = 0;
          956  +          if( fts5ExprSynonymAdvanceto(pTerm, bDesc, &iLast, &rc) ){
          957  +            pNode->bEof = 1;
          958  +            return rc;
          959  +          }
          960  +        }else{
          961  +          Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter;
          962  +          i64 iRowid = sqlite3Fts5IterRowid(pIter);
          963  +          if( iRowid==iLast ) continue;
          964  +          bMatch = 0;
          965  +          if( fts5ExprAdvanceto(pIter, bDesc, &iLast, &rc, &pNode->bEof) ){
          966  +            return rc;
          967  +          }
   823    968           }
   824    969         }
   825    970       }
   826    971     }while( bMatch==0 );
   827    972   
   828         -  pNode->bNomatch = (0==fts5ExprNearTest(&rc, pExpr, pNode));
   829    973     pNode->iRowid = iLast;
          974  +  pNode->bNomatch = (0==fts5ExprNearTest(&rc, pExpr, pNode));
   830    975   
   831    976     return rc;
   832    977   }
   833    978   
   834    979   /*
   835    980   ** Initialize all term iterators in the pNear object. If any term is found
   836         -** to match no documents at all, set *pbEof to true and return immediately,
   837         -** without initializing any further iterators.
          981  +** to match no documents at all, return immediately without initializing any
          982  +** further iterators.
   838    983   */
   839    984   static int fts5ExprNearInitAll(
   840    985     Fts5Expr *pExpr,
   841    986     Fts5ExprNode *pNode
   842    987   ){
   843    988     Fts5ExprNearset *pNear = pNode->pNear;
   844         -  Fts5ExprTerm *pTerm;
   845         -  Fts5ExprPhrase *pPhrase;
   846    989     int i, j;
   847    990     int rc = SQLITE_OK;
   848    991   
   849    992     for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
   850         -    pPhrase = pNear->apPhrase[i];
          993  +    Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
   851    994       for(j=0; j<pPhrase->nTerm; j++){
   852         -      pTerm = &pPhrase->aTerm[j];
   853         -      if( pTerm->pIter ){
   854         -        sqlite3Fts5IterClose(pTerm->pIter);
   855         -        pTerm->pIter = 0;
          995  +      Fts5ExprTerm *pTerm = &pPhrase->aTerm[j];
          996  +      Fts5ExprTerm *p;
          997  +      int bEof = 1;
          998  +
          999  +      for(p=pTerm; p && rc==SQLITE_OK; p=p->pSynonym){
         1000  +        if( p->pIter ){
         1001  +          sqlite3Fts5IterClose(p->pIter);
         1002  +          p->pIter = 0;
         1003  +        }
         1004  +        rc = sqlite3Fts5IndexQuery(
         1005  +            pExpr->pIndex, p->zTerm, strlen(p->zTerm),
         1006  +            (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) |
         1007  +            (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0),
         1008  +            &p->pIter
         1009  +        );
         1010  +        assert( rc==SQLITE_OK || p->pIter==0 );
         1011  +        if( p->pIter && 0==sqlite3Fts5IterEof(p->pIter) ){
         1012  +          bEof = 0;
         1013  +        }
   856   1014         }
   857         -      rc = sqlite3Fts5IndexQuery(
   858         -          pExpr->pIndex, pTerm->zTerm, strlen(pTerm->zTerm),
   859         -          (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) |
   860         -          (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0),
   861         -          &pTerm->pIter
   862         -      );
   863         -      assert( rc==SQLITE_OK || pTerm->pIter==0 );
   864         -      if( pTerm->pIter==0 || sqlite3Fts5IterEof(pTerm->pIter) ){
         1015  +
         1016  +      if( bEof ){
   865   1017           pNode->bEof = 1;
   866         -        break;
         1018  +        return rc;
   867   1019         }
   868   1020       }
   869   1021     }
   870   1022   
   871   1023     return rc;
   872   1024   }
   873   1025   
................................................................................
  1025   1177       switch( pNode->eType ){
  1026   1178         case FTS5_STRING: {
  1027   1179           rc = fts5ExprNearAdvanceFirst(pExpr, pNode, bFromValid, iFrom);
  1028   1180           break;
  1029   1181         };
  1030   1182   
  1031   1183         case FTS5_TERM: {
  1032         -        rc = fts5ExprNearAdvanceFirst(pExpr, pNode, bFromValid, iFrom);
  1033         -        if( pNode->bEof==0 ){
         1184  +        Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter;
         1185  +        if( bFromValid ){
         1186  +          rc = sqlite3Fts5IterNextFrom(pIter, iFrom);
         1187  +        }else{
         1188  +          rc = sqlite3Fts5IterNext(pIter);
         1189  +        }
         1190  +        if( rc==SQLITE_OK && sqlite3Fts5IterEof(pIter)==0 ){
  1034   1191             assert( rc==SQLITE_OK );
  1035   1192             rc = fts5ExprTokenTest(pExpr, pNode);
         1193  +        }else{
         1194  +          pNode->bEof = 1;
  1036   1195           }
  1037   1196           return rc;
  1038   1197         };
  1039   1198   
  1040   1199         case FTS5_AND: {
  1041   1200           Fts5ExprNode *pLeft = pNode->apChild[0];
  1042   1201           rc = fts5ExprNodeNext(pExpr, pLeft, bFromValid, iFrom);
................................................................................
  1262   1421   /*
  1263   1422   ** Free the phrase object passed as the only argument.
  1264   1423   */
  1265   1424   static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){
  1266   1425     if( pPhrase ){
  1267   1426       int i;
  1268   1427       for(i=0; i<pPhrase->nTerm; i++){
         1428  +      Fts5ExprTerm *pSyn;
         1429  +      Fts5ExprTerm *pNext;
  1269   1430         Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
  1270   1431         sqlite3_free(pTerm->zTerm);
  1271         -      if( pTerm->pIter ){
  1272         -        sqlite3Fts5IterClose(pTerm->pIter);
         1432  +      sqlite3Fts5IterClose(pTerm->pIter);
         1433  +
         1434  +      for(pSyn=pTerm->pSynonym; pSyn; pSyn=pNext){
         1435  +        pNext = pSyn->pSynonym;
         1436  +        sqlite3Fts5IterClose(pSyn->pIter);
         1437  +        sqlite3_free(pSyn);
  1273   1438         }
  1274   1439       }
  1275   1440       if( pPhrase->poslist.nSpace>0 ) fts5BufferFree(&pPhrase->poslist);
  1276   1441       sqlite3_free(pPhrase);
  1277   1442     }
  1278   1443   }
  1279   1444   
................................................................................
  1327   1492     }
  1328   1493     return pRet;
  1329   1494   }
  1330   1495   
  1331   1496   typedef struct TokenCtx TokenCtx;
  1332   1497   struct TokenCtx {
  1333   1498     Fts5ExprPhrase *pPhrase;
         1499  +  int rc;
  1334   1500   };
  1335   1501   
  1336   1502   /*
  1337   1503   ** Callback for tokenizing terms used by ParseTerm().
  1338   1504   */
  1339   1505   static int fts5ParseTokenize(
  1340   1506     void *pContext,                 /* Pointer to Fts5InsertCtx object */
         1507  +  int tflags,                     /* Mask of FTS5_TOKEN_* flags */
  1341   1508     const char *pToken,             /* Buffer containing token */
  1342   1509     int nToken,                     /* Size of token in bytes */
  1343         -  int iStart,                     /* Start offset of token */
  1344         -  int iEnd                        /* End offset of token */
         1510  +  int iUnused1,                   /* Start offset of token */
         1511  +  int iUnused2                    /* End offset of token */
  1345   1512   ){
  1346   1513     int rc = SQLITE_OK;
  1347   1514     const int SZALLOC = 8;
  1348   1515     TokenCtx *pCtx = (TokenCtx*)pContext;
  1349   1516     Fts5ExprPhrase *pPhrase = pCtx->pPhrase;
  1350         -  Fts5ExprTerm *pTerm;
  1351         -
  1352         -  if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){
  1353         -    Fts5ExprPhrase *pNew;
  1354         -    int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0);
  1355         -
  1356         -    pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase, 
  1357         -        sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew
  1358         -    );
  1359         -    if( pNew==0 ) return SQLITE_NOMEM;
  1360         -    if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase));
  1361         -    pCtx->pPhrase = pPhrase = pNew;
  1362         -    pNew->nTerm = nNew - SZALLOC;
  1363         -  }
  1364         -
  1365         -  pTerm = &pPhrase->aTerm[pPhrase->nTerm++];
  1366         -  memset(pTerm, 0, sizeof(Fts5ExprTerm));
  1367         -  pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken);
  1368         -
         1517  +
         1518  +  /* If an error has already occurred, this is a no-op */
         1519  +  if( pCtx->rc!=SQLITE_OK ) return pCtx->rc;
         1520  +
         1521  +  assert( pPhrase==0 || pPhrase->nTerm>0 );
         1522  +  if( pPhrase && (tflags & FTS5_TOKEN_COLOCATED) ){
         1523  +    Fts5ExprTerm *pSyn;
         1524  +    int nByte = sizeof(Fts5ExprTerm) + nToken+1;
         1525  +    pSyn = (Fts5ExprTerm*)sqlite3_malloc(nByte);
         1526  +    if( pSyn==0 ){
         1527  +      rc = SQLITE_NOMEM;
         1528  +    }else{
         1529  +      memset(pSyn, 0, nByte);
         1530  +      pSyn->zTerm = (char*)&pSyn[1];
         1531  +      memcpy(pSyn->zTerm, pToken, nToken);
         1532  +      pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym;
         1533  +      pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn;
         1534  +    }
         1535  +  }else{
         1536  +    Fts5ExprTerm *pTerm;
         1537  +    if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){
         1538  +      Fts5ExprPhrase *pNew;
         1539  +      int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0);
         1540  +
         1541  +      pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase, 
         1542  +          sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew
         1543  +      );
         1544  +      if( pNew==0 ){
         1545  +        rc = SQLITE_NOMEM;
         1546  +      }else{
         1547  +        if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase));
         1548  +        pCtx->pPhrase = pPhrase = pNew;
         1549  +        pNew->nTerm = nNew - SZALLOC;
         1550  +      }
         1551  +    }
         1552  +
         1553  +    if( rc==SQLITE_OK ){
         1554  +      pTerm = &pPhrase->aTerm[pPhrase->nTerm++];
         1555  +      memset(pTerm, 0, sizeof(Fts5ExprTerm));
         1556  +      pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken);
         1557  +    }
         1558  +  }
         1559  +
         1560  +  pCtx->rc = rc;
  1369   1561     return rc;
  1370   1562   }
  1371   1563   
  1372   1564   
  1373   1565   /*
  1374   1566   ** Free the phrase object passed as the only argument.
  1375   1567   */
................................................................................
  1413   1605     char *z = 0;
  1414   1606   
  1415   1607     memset(&sCtx, 0, sizeof(TokenCtx));
  1416   1608     sCtx.pPhrase = pAppend;
  1417   1609   
  1418   1610     rc = fts5ParseStringFromToken(pToken, &z);
  1419   1611     if( rc==SQLITE_OK ){
         1612  +    int flags = FTS5_TOKENIZE_QUERY | (bPrefix ? FTS5_TOKENIZE_QUERY : 0);
         1613  +    int n;
  1420   1614       sqlite3Fts5Dequote(z);
  1421         -    rc = sqlite3Fts5Tokenize(pConfig, z, strlen(z), &sCtx, fts5ParseTokenize);
         1615  +    n = strlen(z);
         1616  +    rc = sqlite3Fts5Tokenize(pConfig, flags, z, n, &sCtx, fts5ParseTokenize);
  1422   1617     }
  1423   1618     sqlite3_free(z);
  1424         -  if( rc ){
         1619  +  if( rc || (rc = sCtx.rc) ){
  1425   1620       pParse->rc = rc;
  1426   1621       fts5ExprPhraseFree(sCtx.pPhrase);
  1427   1622       sCtx.pPhrase = 0;
  1428   1623     }else if( sCtx.pPhrase ){
  1429   1624   
  1430   1625       if( pAppend==0 ){
  1431   1626         if( (pParse->nPhrase % 8)==0 ){
................................................................................
  1445   1640       pParse->apPhrase[pParse->nPhrase-1] = sCtx.pPhrase;
  1446   1641       assert( sCtx.pPhrase->nTerm>0 );
  1447   1642       sCtx.pPhrase->aTerm[sCtx.pPhrase->nTerm-1].bPrefix = bPrefix;
  1448   1643     }
  1449   1644   
  1450   1645     return sCtx.pPhrase;
  1451   1646   }
         1647  +
         1648  +/*
         1649  +** Create a new FTS5 expression by cloning phrase iPhrase of the
         1650  +** expression passed as the second argument.
         1651  +*/
         1652  +int sqlite3Fts5ExprClonePhrase(
         1653  +  Fts5Config *pConfig,
         1654  +  Fts5Expr *pExpr, 
         1655  +  int iPhrase, 
         1656  +  Fts5Expr **ppNew
         1657  +){
         1658  +  int rc = SQLITE_OK;             /* Return code */
         1659  +  Fts5ExprPhrase *pOrig;          /* The phrase extracted from pExpr */
         1660  +  Fts5ExprPhrase *pCopy;          /* Copy of pOrig */
         1661  +  int i;                          /* Used to iterate through phrase terms */
         1662  +
         1663  +  Fts5Expr *pNew = 0;             /* Expression to return via *ppNew */
         1664  +  Fts5ExprPhrase **apPhrase;      /* pNew->apPhrase */
         1665  +  Fts5ExprNode *pNode;            /* pNew->pRoot */
         1666  +  Fts5ExprNearset *pNear;         /* pNew->pRoot->pNear */
         1667  +
         1668  +  TokenCtx sCtx = {0,0};          /* Context object for fts5ParseTokenize */
         1669  +
         1670  +
         1671  +  pOrig = pExpr->apExprPhrase[iPhrase];
         1672  +
         1673  +  pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr));
         1674  +  if( rc==SQLITE_OK ){
         1675  +    pNew->apExprPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc, 
         1676  +        sizeof(Fts5ExprPhrase*));
         1677  +  }
         1678  +  if( rc==SQLITE_OK ){
         1679  +    pNew->pRoot = (Fts5ExprNode*)sqlite3Fts5MallocZero(&rc, 
         1680  +        sizeof(Fts5ExprNode));
         1681  +  }
         1682  +  if( rc==SQLITE_OK ){
         1683  +    pNew->pRoot->pNear = (Fts5ExprNearset*)sqlite3Fts5MallocZero(&rc, 
         1684  +        sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*));
         1685  +  }
         1686  +
         1687  +  for(i=0; rc==SQLITE_OK && i<pOrig->nTerm; i++){
         1688  +    int tflags = 0;
         1689  +    Fts5ExprTerm *p;
         1690  +    for(p=&pOrig->aTerm[i]; p && rc==SQLITE_OK; p=p->pSynonym){
         1691  +      const char *zTerm = p->zTerm;
         1692  +      rc = fts5ParseTokenize((void*)&sCtx, tflags, zTerm, strlen(zTerm), 0, 0);
         1693  +      tflags = FTS5_TOKEN_COLOCATED;
         1694  +    }
         1695  +    if( rc==SQLITE_OK ){
         1696  +      sCtx.pPhrase->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix;
         1697  +    }
         1698  +  }
         1699  +
         1700  +  if( rc==SQLITE_OK ){
         1701  +    /* All the allocations succeeded. Put the expression object together. */
         1702  +    pNew->pIndex = pExpr->pIndex;
         1703  +    pNew->nPhrase = 1;
         1704  +    pNew->apExprPhrase[0] = sCtx.pPhrase;
         1705  +    pNew->pRoot->pNear->apPhrase[0] = sCtx.pPhrase;
         1706  +    pNew->pRoot->pNear->nPhrase = 1;
         1707  +    sCtx.pPhrase->pNode = pNew->pRoot;
         1708  +
         1709  +    if( pOrig->nTerm==1 && pOrig->aTerm[0].pSynonym==0 ){
         1710  +      pNew->pRoot->eType = FTS5_TERM;
         1711  +    }else{
         1712  +      pNew->pRoot->eType = FTS5_STRING;
         1713  +    }
         1714  +  }else{
         1715  +    sqlite3Fts5ExprFree(pNew);
         1716  +    fts5ExprPhraseFree(sCtx.pPhrase);
         1717  +    pNew = 0;
         1718  +  }
         1719  +
         1720  +  *ppNew = pNew;
         1721  +  return rc;
         1722  +}
         1723  +
  1452   1724   
  1453   1725   /*
  1454   1726   ** Token pTok has appeared in a MATCH expression where the NEAR operator
  1455   1727   ** is expected. If token pTok does not contain "NEAR", store an error
  1456   1728   ** in the pParse object.
  1457   1729   */
  1458   1730   void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token *pTok){
................................................................................
  1626   1898         pRet->eType = eType;
  1627   1899         pRet->pNear = pNear;
  1628   1900         if( eType==FTS5_STRING ){
  1629   1901           int iPhrase;
  1630   1902           for(iPhrase=0; iPhrase<pNear->nPhrase; iPhrase++){
  1631   1903             pNear->apPhrase[iPhrase]->pNode = pRet;
  1632   1904           }
  1633         -        if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 ){
         1905  +        if( pNear->nPhrase==1 
         1906  +         && pNear->apPhrase[0]->nTerm==1 
         1907  +         && pNear->apPhrase[0]->aTerm[0].pSynonym==0
         1908  +        ){
  1634   1909             pRet->eType = FTS5_TERM;
  1635   1910           }
  1636   1911         }else{
  1637   1912           fts5ExprAddChildren(pRet, pLeft);
  1638   1913           fts5ExprAddChildren(pRet, pRight);
  1639   1914         }
  1640   1915       }
................................................................................
  1646   1921       sqlite3Fts5ParseNodeFree(pRight);
  1647   1922       sqlite3Fts5ParseNearsetFree(pNear);
  1648   1923     }
  1649   1924     return pRet;
  1650   1925   }
  1651   1926   
  1652   1927   static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){
  1653         -  char *zQuoted = sqlite3_malloc(strlen(pTerm->zTerm) * 2 + 3 + 2);
         1928  +  int nByte = 0;
         1929  +  Fts5ExprTerm *p;
         1930  +  char *zQuoted;
         1931  +
         1932  +  /* Determine the maximum amount of space required. */
         1933  +  for(p=pTerm; p; p=p->pSynonym){
         1934  +    nByte += strlen(pTerm->zTerm) * 2 + 3 + 2;
         1935  +  }
         1936  +  zQuoted = sqlite3_malloc(nByte);
         1937  +
  1654   1938     if( zQuoted ){
  1655   1939       int i = 0;
  1656         -    char *zIn = pTerm->zTerm;
  1657         -    zQuoted[i++] = '"';
  1658         -    while( *zIn ){
  1659         -      if( *zIn=='"' ) zQuoted[i++] = '"';
  1660         -      zQuoted[i++] = *zIn++;
         1940  +    for(p=pTerm; p; p=p->pSynonym){
         1941  +      char *zIn = p->zTerm;
         1942  +      zQuoted[i++] = '"';
         1943  +      while( *zIn ){
         1944  +        if( *zIn=='"' ) zQuoted[i++] = '"';
         1945  +        zQuoted[i++] = *zIn++;
         1946  +      }
         1947  +      zQuoted[i++] = '"';
         1948  +      if( p->pSynonym ) zQuoted[i++] = '|';
  1661   1949       }
  1662         -    zQuoted[i++] = '"';
  1663   1950       if( pTerm->bPrefix ){
  1664   1951         zQuoted[i++] = ' ';
  1665   1952         zQuoted[i++] = '*';
  1666   1953       }
  1667   1954       zQuoted[i++] = '\0';
  1668   1955     }
  1669   1956     return zQuoted;

Changes to ext/fts5/fts5_index.c.

   289    289   #define FTS5_DATA_ZERO_PADDING 8
   290    290   #define FTS5_DATA_PADDING 20
   291    291   
   292    292   typedef struct Fts5Data Fts5Data;
   293    293   typedef struct Fts5DlidxIter Fts5DlidxIter;
   294    294   typedef struct Fts5DlidxLvl Fts5DlidxLvl;
   295    295   typedef struct Fts5DlidxWriter Fts5DlidxWriter;
   296         -typedef struct Fts5NodeIter Fts5NodeIter;
   297    296   typedef struct Fts5PageWriter Fts5PageWriter;
   298    297   typedef struct Fts5SegIter Fts5SegIter;
   299    298   typedef struct Fts5DoclistIter Fts5DoclistIter;
   300    299   typedef struct Fts5SegWriter Fts5SegWriter;
   301    300   typedef struct Fts5Structure Fts5Structure;
   302    301   typedef struct Fts5StructureLevel Fts5StructureLevel;
   303    302   typedef struct Fts5StructureSegment Fts5StructureSegment;
................................................................................
   522    521   
   523    522     i64 iSwitchRowid;               /* Firstest rowid of other than aFirst[1] */
   524    523     Fts5CResult *aFirst;            /* Current merge state (see above) */
   525    524     Fts5SegIter aSeg[1];            /* Array of segment iterators */
   526    525   };
   527    526   
   528    527   
   529         -/*
   530         -** Object for iterating through the conents of a single internal node in 
   531         -** memory.
   532         -*/
   533         -struct Fts5NodeIter {
   534         -  /* Internal. Set and managed by fts5NodeIterXXX() functions. Except, 
   535         -  ** the EOF test for the iterator is (Fts5NodeIter.aData==0).  */
   536         -  const u8 *aData;
   537         -  int nData;
   538         -  int iOff;
   539         -
   540         -  /* Output variables */
   541         -  Fts5Buffer term;
   542         -  int nEmpty;
   543         -  int iChild;
   544         -  int bDlidx;
   545         -};
   546         -
   547    528   /*
   548    529   ** An instance of the following type is used to iterate through the contents
   549    530   ** of a doclist-index record.
   550    531   **
   551    532   ** pData:
   552    533   **   Record containing the doclist-index data.
   553    534   **
................................................................................
   569    550   };
   570    551   struct Fts5DlidxIter {
   571    552     int nLvl;
   572    553     int iSegid;
   573    554     Fts5DlidxLvl aLvl[1];
   574    555   };
   575    556   
   576         -
   577         -
   578         -/*
   579         -** The first argument passed to this macro is a pointer to an Fts5Buffer
   580         -** object.
   581         -*/
   582         -#define fts5BufferSize(pBuf,n) {                \
   583         -  if( pBuf->nSpace<n ) {                        \
   584         -    u8 *pNew = sqlite3_realloc(pBuf->p, n);     \
   585         -    if( pNew==0 ){                              \
   586         -      sqlite3_free(pBuf->p);                    \
   587         -    }                                           \
   588         -    pBuf->nSpace = n;                           \
   589         -    pBuf->p = pNew;                             \
   590         -  }                                             \
   591         -}
   592         -
   593    557   static void fts5PutU16(u8 *aOut, u16 iVal){
   594    558     aOut[0] = (iVal>>8);
   595    559     aOut[1] = (iVal&0xFF);
   596    560   }
   597    561   
   598    562   static u16 fts5GetU16(const u8 *aIn){
   599    563     return ((u16)aIn[0] << 8) + aIn[1];
................................................................................
   613    577   ** Compare the contents of the pLeft buffer with the pRight/nRight blob.
   614    578   **
   615    579   ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
   616    580   ** +ve if pRight is smaller than pLeft. In other words:
   617    581   **
   618    582   **     res = *pLeft - *pRight
   619    583   */
          584  +#ifdef SQLITE_DEBUG
   620    585   static int fts5BufferCompareBlob(
   621    586     Fts5Buffer *pLeft,              /* Left hand side of comparison */
   622    587     const u8 *pRight, int nRight    /* Right hand side of comparison */
   623    588   ){
   624    589     int nCmp = MIN(pLeft->n, nRight);
   625    590     int res = memcmp(pLeft->p, pRight, nCmp);
   626    591     return (res==0 ? (pLeft->n - nRight) : res);
   627    592   }
   628         -
          593  +#endif
   629    594   
   630    595   /*
   631    596   ** Compare the contents of the two buffers using memcmp(). If one buffer
   632    597   ** is a prefix of the other, it is considered the lesser.
   633    598   **
   634    599   ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
   635    600   ** +ve if pRight is smaller than pLeft. In other words:
................................................................................
   661    626     if( p->pReader ){
   662    627       sqlite3_blob *pReader = p->pReader;
   663    628       p->pReader = 0;
   664    629       sqlite3_blob_close(pReader);
   665    630     }
   666    631   }
   667    632   
   668         -static Fts5Data *fts5DataReadOrBuffer(
   669         -  Fts5Index *p, 
   670         -  Fts5Buffer *pBuf, 
   671         -  i64 iRowid
   672         -){
          633  +
          634  +/*
          635  +** Retrieve a record from the %_data table.
          636  +**
          637  +** If an error occurs, NULL is returned and an error left in the 
          638  +** Fts5Index object.
          639  +*/
          640  +static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){
   673    641     Fts5Data *pRet = 0;
   674    642     if( p->rc==SQLITE_OK ){
   675    643       int rc = SQLITE_OK;
   676    644   
   677    645       if( p->pReader ){
   678    646         /* This call may return SQLITE_ABORT if there has been a savepoint
   679    647         ** rollback since it was last used. In this case a new blob handle
................................................................................
   685    653         p->pReader = pBlob;
   686    654         if( rc!=SQLITE_OK ){
   687    655           fts5CloseReader(p);
   688    656         }
   689    657         if( rc==SQLITE_ABORT ) rc = SQLITE_OK;
   690    658       }
   691    659   
   692         -    /* If the blob handle is not yet open, open and seek it. Otherwise, use
   693         -    ** the blob_reopen() API to reseek the existing blob handle.  */
          660  +    /* If the blob handle is not open at this point, open it and seek 
          661  +    ** to the requested entry.  */
   694    662       if( p->pReader==0 && rc==SQLITE_OK ){
   695    663         Fts5Config *pConfig = p->pConfig;
   696    664         rc = sqlite3_blob_open(pConfig->db, 
   697    665             pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader
   698    666         );
   699    667       }
   700    668   
................................................................................
   704    672       ** table, missing row, non-blob/text in block column - indicate 
   705    673       ** backing store corruption.  */
   706    674       if( rc==SQLITE_ERROR ) rc = FTS5_CORRUPT;
   707    675   
   708    676       if( rc==SQLITE_OK ){
   709    677         u8 *aOut = 0;               /* Read blob data into this buffer */
   710    678         int nByte = sqlite3_blob_bytes(p->pReader);
   711         -      if( pBuf ){
   712         -        fts5BufferSize(pBuf, MAX(nByte, p->pConfig->pgsz) + 20);
   713         -        pBuf->n = nByte;
   714         -        aOut = pBuf->p;
   715         -        if( aOut==0 ){
   716         -          rc = SQLITE_NOMEM;
   717         -        }
          679  +      int nAlloc = sizeof(Fts5Data) + nByte + FTS5_DATA_PADDING;
          680  +      pRet = (Fts5Data*)sqlite3_malloc(nAlloc);
          681  +      if( pRet ){
          682  +        pRet->n = nByte;
          683  +        aOut = pRet->p = (u8*)&pRet[1];
   718    684         }else{
   719         -        int nSpace = nByte + FTS5_DATA_PADDING;
   720         -        pRet = (Fts5Data*)sqlite3_malloc(nSpace+sizeof(Fts5Data));
   721         -        if( pRet ){
   722         -          pRet->n = nByte;
   723         -          aOut = pRet->p = (u8*)&pRet[1];
   724         -        }else{
   725         -          rc = SQLITE_NOMEM;
   726         -        }
          685  +        rc = SQLITE_NOMEM;
   727    686         }
   728    687   
   729    688         if( rc==SQLITE_OK ){
   730    689           rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0);
   731    690         }
   732    691         if( rc!=SQLITE_OK ){
   733    692           sqlite3_free(pRet);
................................................................................
   734    693           pRet = 0;
   735    694         }
   736    695       }
   737    696       p->rc = rc;
   738    697       p->nRead++;
   739    698     }
   740    699   
   741         -  return pRet;
   742         -}
   743         -
   744         -/*
   745         -** Retrieve a record from the %_data table.
   746         -**
   747         -** If an error occurs, NULL is returned and an error left in the 
   748         -** Fts5Index object.
   749         -*/
   750         -static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){
   751         -  Fts5Data *pRet = fts5DataReadOrBuffer(p, 0, iRowid);
   752    700     assert( (pRet==0)==(p->rc!=SQLITE_OK) );
   753    701     return pRet;
   754    702   }
   755    703   
   756         -/*
   757         -** Read a record from the %_data table into the buffer supplied as the
   758         -** second argument.
   759         -**
   760         -** If an error occurs, an error is left in the Fts5Index object. If an
   761         -** error has already occurred when this function is called, it is a 
   762         -** no-op.
   763         -*/
   764         -static void fts5DataBuffer(Fts5Index *p, Fts5Buffer *pBuf, i64 iRowid){
   765         -  (void)fts5DataReadOrBuffer(p, pBuf, iRowid);
   766         -}
   767         -
   768    704   /*
   769    705   ** Release a reference to data record returned by an earlier call to
   770    706   ** fts5DataRead().
   771    707   */
   772    708   static void fts5DataRelease(Fts5Data *pData){
   773    709     sqlite3_free(pData);
   774    710   }
................................................................................
  1029    965   ** Fts5Index handle. If an error has already occurred when this function
  1030    966   ** is called, it is a no-op.
  1031    967   */
  1032    968   static Fts5Structure *fts5StructureRead(Fts5Index *p){
  1033    969     Fts5Config *pConfig = p->pConfig;
  1034    970     Fts5Structure *pRet = 0;        /* Object to return */
  1035    971     int iCookie;                    /* Configuration cookie */
          972  +  Fts5Data *pData;
  1036    973     Fts5Buffer buf = {0, 0, 0};
  1037    974   
  1038         -  fts5DataBuffer(p, &buf, FTS5_STRUCTURE_ROWID);
  1039         -  if( buf.p==0 ) return 0;
  1040         -  assert( buf.nSpace>=(buf.n + FTS5_DATA_ZERO_PADDING) );
  1041         -  memset(&buf.p[buf.n], 0, FTS5_DATA_ZERO_PADDING);
  1042         -  p->rc = fts5StructureDecode(buf.p, buf.n, &iCookie, &pRet);
  1043         -
          975  +  pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID);
          976  +  if( p->rc ) return 0;
          977  +  memset(&pData->p[pData->n], 0, FTS5_DATA_PADDING);
          978  +  p->rc = fts5StructureDecode(pData->p, pData->n, &iCookie, &pRet);
  1044    979     if( p->rc==SQLITE_OK && pConfig->iCookie!=iCookie ){
  1045    980       p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie);
  1046    981     }
  1047    982   
  1048         -  fts5BufferFree(&buf);
          983  +  fts5DataRelease(pData);
  1049    984     if( p->rc!=SQLITE_OK ){
  1050    985       fts5StructureRelease(pRet);
  1051    986       pRet = 0;
  1052    987     }
  1053    988     return pRet;
  1054    989   }
  1055    990   
................................................................................
  1224   1159         szPromote = szSeg;
  1225   1160       }
  1226   1161       fts5StructurePromoteTo(p, iPromote, szPromote, pStruct);
  1227   1162     }
  1228   1163   }
  1229   1164   
  1230   1165   
  1231         -/*
  1232         -** If the pIter->iOff offset currently points to an entry indicating one
  1233         -** or more term-less nodes, advance past it and set pIter->nEmpty to
  1234         -** the number of empty child nodes.
  1235         -*/
  1236         -static void fts5NodeIterGobbleNEmpty(Fts5NodeIter *pIter){
  1237         -  if( pIter->iOff<pIter->nData && 0==(pIter->aData[pIter->iOff] & 0xfe) ){
  1238         -    pIter->bDlidx = pIter->aData[pIter->iOff] & 0x01;
  1239         -    pIter->iOff++;
  1240         -    pIter->iOff += fts5GetVarint32(&pIter->aData[pIter->iOff], pIter->nEmpty);
  1241         -  }else{
  1242         -    pIter->nEmpty = 0;
  1243         -    pIter->bDlidx = 0;
  1244         -  }
  1245         -}
  1246         -
  1247         -/*
  1248         -** Advance to the next entry within the node.
  1249         -*/
  1250         -static void fts5NodeIterNext(int *pRc, Fts5NodeIter *pIter){
  1251         -  if( pIter->iOff>=pIter->nData ){
  1252         -    pIter->aData = 0;
  1253         -    pIter->iChild += pIter->nEmpty;
  1254         -  }else{
  1255         -    int nPre, nNew;
  1256         -    pIter->iOff += fts5GetVarint32(&pIter->aData[pIter->iOff], nPre);
  1257         -    pIter->iOff += fts5GetVarint32(&pIter->aData[pIter->iOff], nNew);
  1258         -    pIter->term.n = nPre-2;
  1259         -    fts5BufferAppendBlob(pRc, &pIter->term, nNew, pIter->aData+pIter->iOff);
  1260         -    pIter->iOff += nNew;
  1261         -    pIter->iChild += (1 + pIter->nEmpty);
  1262         -    fts5NodeIterGobbleNEmpty(pIter);
  1263         -    if( *pRc ) pIter->aData = 0;
  1264         -  }
  1265         -}
  1266         -
  1267         -
  1268         -/*
  1269         -** Initialize the iterator object pIter to iterate through the internal
  1270         -** segment node in pData.
  1271         -*/
  1272         -static void fts5NodeIterInit(const u8 *aData, int nData, Fts5NodeIter *pIter){
  1273         -  memset(pIter, 0, sizeof(*pIter));
  1274         -  pIter->aData = aData;
  1275         -  pIter->nData = nData;
  1276         -  pIter->iOff = fts5GetVarint32(aData, pIter->iChild);
  1277         -  fts5NodeIterGobbleNEmpty(pIter);
  1278         -}
  1279         -
  1280         -/*
  1281         -** Free any memory allocated by the iterator object.
  1282         -*/
  1283         -static void fts5NodeIterFree(Fts5NodeIter *pIter){
  1284         -  fts5BufferFree(&pIter->term);
  1285         -}
  1286         -
  1287   1166   /*
  1288   1167   ** Advance the iterator passed as the only argument. If the end of the 
  1289   1168   ** doclist-index page is reached, return non-zero.
  1290   1169   */
  1291   1170   static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){
  1292   1171     Fts5Data *pData = pLvl->pData;
  1293   1172   
................................................................................
  2037   1916         iOff += nPos;
  2038   1917       }
  2039   1918     }
  2040   1919   
  2041   1920     pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno);
  2042   1921   }
  2043   1922   
  2044         -#ifdef SQLITE_DEBUG
  2045         -static void fts5AssertNodeSeekOk(
  2046         -  Fts5Buffer *pNode,
  2047         -  const u8 *pTerm, int nTerm,     /* Term to search for */
  2048         -  int iExpectPg,
  2049         -  int bExpectDlidx
  2050         -){
  2051         -  int bDlidx;
  2052         -  int iPg;
  2053         -  int rc = SQLITE_OK;
  2054         -  Fts5NodeIter node;
  2055         -
  2056         -  fts5NodeIterInit(pNode->p, pNode->n, &node);
  2057         -  assert( node.term.n==0 );
  2058         -  iPg = node.iChild;
  2059         -  bDlidx = node.bDlidx;
  2060         -  for(fts5NodeIterNext(&rc, &node);
  2061         -      node.aData && fts5BufferCompareBlob(&node.term, pTerm, nTerm)<=0;
  2062         -      fts5NodeIterNext(&rc, &node)
  2063         -  ){
  2064         -    iPg = node.iChild;
  2065         -    bDlidx = node.bDlidx;
  2066         -  }
  2067         -  fts5NodeIterFree(&node);
  2068         -
  2069         -  assert( rc!=SQLITE_OK || iPg==iExpectPg );
  2070         -  assert( rc!=SQLITE_OK || bDlidx==bExpectDlidx );
  2071         -}
  2072         -#else
  2073         -#define fts5AssertNodeSeekOk(v,w,x,y,z)
  2074         -#endif
  2075         -
  2076         -/*
  2077         -** Argument pNode is an internal b-tree node. This function searches
  2078         -** within the node for the largest term that is smaller than or equal
  2079         -** to (pTerm/nTerm).
  2080         -**
  2081         -** It returns the associated page number. Or, if (pTerm/nTerm) is smaller
  2082         -** than all terms within the node, the leftmost child page number. 
  2083         -**
  2084         -** Before returning, (*pbDlidx) is set to true if the last term on the
  2085         -** returned child page number has a doclist-index. Or left as is otherwise.
  2086         -*/
  2087         -static int fts5NodeSeek(
  2088         -  Fts5Buffer *pNode,              /* Node to search */
  2089         -  const u8 *pTerm, int nTerm,     /* Term to search for */
  2090         -  int *pbDlidx                    /* OUT: True if dlidx flag is set */
  2091         -){
  2092         -  int iPg;
  2093         -  u8 *pPtr = pNode->p;
  2094         -  u8 *pEnd = &pPtr[pNode->n];
  2095         -  int nMatch = 0;                 /* Number of bytes of pTerm already matched */
  2096         -  
  2097         -  assert( *pbDlidx==0 );
  2098         -
  2099         -  pPtr += fts5GetVarint32(pPtr, iPg);
  2100         -  while( pPtr<pEnd ){
  2101         -    int nEmpty = 0;
  2102         -    int nKeep;
  2103         -    int nNew;
  2104         -
  2105         -    /* If there is a "no terms" record at pPtr, read it now. Store the
  2106         -    ** number of termless pages in nEmpty. If it indicates a doclist-index, 
  2107         -    ** set (*pbDlidx) to true.*/
  2108         -    if( *pPtr<2 ){
  2109         -      *pbDlidx = (*pPtr==0x01);
  2110         -      pPtr++;
  2111         -      pPtr += fts5GetVarint32(pPtr, nEmpty);
  2112         -      if( pPtr>=pEnd ) break;
  2113         -    }
  2114         -
  2115         -    /* Read the next "term" pointer. Set nKeep to the number of bytes to
  2116         -    ** keep from the previous term, and nNew to the number of bytes of
  2117         -    ** new data that will be appended to it. */
  2118         -    nKeep = (int)*pPtr++;
  2119         -    nNew = (int)*pPtr++;
  2120         -    if( (nKeep | nNew) & 0x0080 ){
  2121         -      pPtr -= 2;
  2122         -      pPtr += fts5GetVarint32(pPtr, nKeep);
  2123         -      pPtr += fts5GetVarint32(pPtr, nNew);
  2124         -    }
  2125         -    nKeep -= 2;
  2126         -
  2127         -    /* Compare (pTerm/nTerm) to the current term on the node (the one described
  2128         -    ** by nKeep/nNew). If the node term is larger, break out of the while()
  2129         -    ** loop. 
  2130         -    **
  2131         -    ** Otherwise, if (pTerm/nTerm) is larger or the two terms are equal, 
  2132         -    ** leave variable nMatch set to the size of the largest prefix common to
  2133         -    ** both terms in bytes.  */
  2134         -    if( nKeep==nMatch ){
  2135         -      int nTst = MIN(nNew, nTerm-nMatch);
  2136         -      int i;
  2137         -      for(i=0; i<nTst; i++){
  2138         -        if( pTerm[nKeep+i]!=pPtr[i] ) break;
  2139         -      }
  2140         -      nMatch += i;
  2141         -      assert( nMatch<=nTerm );
  2142         -
  2143         -      if( i<nNew && (nMatch==nTerm || pPtr[i] > pTerm[nMatch]) ) break;
  2144         -    }else if( nKeep<nMatch ){
  2145         -      break;
  2146         -    }
  2147         -
  2148         -    iPg += 1 + nEmpty;
  2149         -    *pbDlidx = 0;
  2150         -    pPtr += nNew;
  2151         -  }
  2152         -
  2153         -  fts5AssertNodeSeekOk(pNode, pTerm, nTerm, iPg, *pbDlidx);
  2154         -  return iPg;
  2155         -}
  2156         -
  2157   1923   #define fts5IndexGetVarint32(a, iOff, nVal) {     \
  2158   1924     nVal = a[iOff++];                               \
  2159   1925     if( nVal & 0x80 ){                              \
  2160   1926       iOff--;                                       \
  2161   1927       iOff += fts5GetVarint32(&a[iOff], nVal);      \
  2162   1928     }                                               \
  2163   1929   }
................................................................................
  2673   2439       if( iLeafPgno<pIter->iLeafPgno ){
  2674   2440         pIter->iLeafPgno = iLeafPgno+1;
  2675   2441         fts5SegIterReverseNewPage(p, pIter);
  2676   2442         bMove = 0;
  2677   2443       }
  2678   2444     }
  2679   2445   
  2680         -  while( p->rc==SQLITE_OK ){
         2446  +  do{
  2681   2447       if( bMove ) fts5SegIterNext(p, pIter, 0);
  2682   2448       if( pIter->pLeaf==0 ) break;
  2683   2449       if( bRev==0 && pIter->iRowid>=iMatch ) break;
  2684   2450       if( bRev!=0 && pIter->iRowid<=iMatch ) break;
  2685   2451       bMove = 1;
  2686         -  }
         2452  +  }while( p->rc==SQLITE_OK );
  2687   2453   }
  2688   2454   
  2689   2455   
  2690   2456   /*
  2691   2457   ** Free the iterator object passed as the second argument.
  2692   2458   */
  2693   2459   static void fts5MultiIterFree(Fts5Index *p, Fts5IndexIter *pIter){
................................................................................
  4455   4221   /*
  4456   4222   ** The %_data table is completely empty when this function is called. This
  4457   4223   ** function populates it with the initial structure objects for each index,
  4458   4224   ** and the initial version of the "averages" record (a zero-byte blob).
  4459   4225   */
  4460   4226   int sqlite3Fts5IndexReinit(Fts5Index *p){
  4461   4227     Fts5Structure s;
  4462         -
  4463         -  assert( p->rc==SQLITE_OK );
  4464         -  p->rc = sqlite3Fts5IndexSetAverages(p, (const u8*)"", 0);
  4465         -
  4466   4228     memset(&s, 0, sizeof(Fts5Structure));
         4229  +  fts5DataWrite(p, FTS5_AVERAGES_ROWID, (const u8*)"", 0);
  4467   4230     fts5StructureWrite(p, &s);
  4468         -
  4469   4231     return fts5IndexReturn(p);
  4470   4232   }
  4471   4233   
  4472   4234   /*
  4473   4235   ** Open a new Fts5Index handle. If the bCreate argument is true, create
  4474   4236   ** and initialize the underlying %_data table.
  4475   4237   **
................................................................................
  4783   4545       Fts5Index *pIndex = pIter->pIndex;
  4784   4546       fts5MultiIterFree(pIter->pIndex, pIter);
  4785   4547       fts5CloseReader(pIndex);
  4786   4548     }
  4787   4549   }
  4788   4550   
  4789   4551   /*
  4790         -** Read the "averages" record into the buffer supplied as the second 
  4791         -** argument. Return SQLITE_OK if successful, or an SQLite error code
  4792         -** if an error occurs.
         4552  +** Read and decode the "averages" record from the database. 
         4553  +**
         4554  +** Parameter anSize must point to an array of size nCol, where nCol is
         4555  +** the number of user defined columns in the FTS table.
  4793   4556   */
  4794         -int sqlite3Fts5IndexGetAverages(Fts5Index *p, Fts5Buffer *pBuf){
  4795         -  assert( p->rc==SQLITE_OK );
  4796         -  fts5DataReadOrBuffer(p, pBuf, FTS5_AVERAGES_ROWID);
         4557  +int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){
         4558  +  int nCol = p->pConfig->nCol;
         4559  +  Fts5Data *pData;
         4560  +
         4561  +  *pnRow = 0;
         4562  +  memset(anSize, 0, sizeof(i64) * nCol);
         4563  +  pData = fts5DataRead(p, FTS5_AVERAGES_ROWID);
         4564  +  if( p->rc==SQLITE_OK && pData->n ){
         4565  +    int i = 0;
         4566  +    int iCol;
         4567  +    i += fts5GetVarint(&pData->p[i], (u64*)pnRow);
         4568  +    for(iCol=0; i<pData->n && iCol<nCol; iCol++){
         4569  +      i += fts5GetVarint(&pData->p[i], (u64*)&anSize[iCol]);
         4570  +    }
         4571  +  }
         4572  +
         4573  +  fts5DataRelease(pData);
  4797   4574     return fts5IndexReturn(p);
  4798   4575   }
  4799   4576   
  4800   4577   /*
  4801   4578   ** Replace the current "averages" record with the contents of the buffer 
  4802   4579   ** supplied as the second argument.
  4803   4580   */
................................................................................
  5483   5260     }else if( iSegid==0 ){
  5484   5261       if( iRowid==FTS5_AVERAGES_ROWID ){
  5485   5262         /* todo */
  5486   5263       }else{
  5487   5264         fts5DecodeStructure(&rc, &s, a, n);
  5488   5265       }
  5489   5266     }else{
  5490         -
  5491   5267       Fts5Buffer term;
         5268  +    int iTermOff = 0;
         5269  +    int iRowidOff = 0;
         5270  +    int iOff;
         5271  +    int nKeep = 0;
         5272  +
  5492   5273       memset(&term, 0, sizeof(Fts5Buffer));
  5493   5274   
  5494         -    if( iHeight==0 ){
  5495         -      int iTermOff = 0;
  5496         -      int iRowidOff = 0;
  5497         -      int iOff;
  5498         -      int nKeep = 0;
         5275  +    if( n>=4 ){
         5276  +      iRowidOff = fts5GetU16(&a[0]);
         5277  +      iTermOff = fts5GetU16(&a[2]);
         5278  +    }else{
         5279  +      sqlite3Fts5BufferSet(&rc, &s, 8, (const u8*)"corrupt");
         5280  +      goto decode_out;
         5281  +    }
         5282  +
         5283  +    if( iRowidOff ){
         5284  +      iOff = iRowidOff;
         5285  +    }else if( iTermOff ){
         5286  +      iOff = iTermOff;
         5287  +    }else{
         5288  +      iOff = n;
         5289  +    }
         5290  +    fts5DecodePoslist(&rc, &s, &a[4], iOff-4);
  5499   5291   
  5500         -      if( n>=4 ){
  5501         -        iRowidOff = fts5GetU16(&a[0]);
  5502         -        iTermOff = fts5GetU16(&a[2]);
  5503         -      }else{
  5504         -        sqlite3Fts5BufferSet(&rc, &s, 8, (const u8*)"corrupt");
  5505         -        goto decode_out;
  5506         -      }
         5292  +    assert( iRowidOff==0 || iOff==iRowidOff );
         5293  +    if( iRowidOff ){
         5294  +      iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], n-iOff);
         5295  +    }
  5507   5296   
  5508         -      if( iRowidOff ){
  5509         -        iOff = iRowidOff;
  5510         -      }else if( iTermOff ){
  5511         -        iOff = iTermOff;
  5512         -      }else{
  5513         -        iOff = n;
  5514         -      }
  5515         -      fts5DecodePoslist(&rc, &s, &a[4], iOff-4);
  5516         -
  5517         -      assert( iRowidOff==0 || iOff==iRowidOff );
  5518         -      if( iRowidOff ){
  5519         -        iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], n-iOff);
  5520         -      }
         5297  +    assert( iTermOff==0 || iOff==iTermOff );
         5298  +    while( iOff<n ){
         5299  +      int nByte;
         5300  +      iOff += fts5GetVarint32(&a[iOff], nByte);
         5301  +      term.n= nKeep;
         5302  +      fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]);
         5303  +      iOff += nByte;
  5521   5304   
  5522         -      assert( iTermOff==0 || iOff==iTermOff );
  5523         -      while( iOff<n ){
  5524         -        int nByte;
  5525         -        iOff += fts5GetVarint32(&a[iOff], nByte);
  5526         -        term.n= nKeep;
  5527         -        fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]);
  5528         -        iOff += nByte;
  5529         -
  5530         -        sqlite3Fts5BufferAppendPrintf(
  5531         -            &rc, &s, " term=%.*s", term.n, (const char*)term.p
  5532         -        );
  5533         -        iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], n-iOff);
  5534         -        if( iOff<n ){
  5535         -          iOff += fts5GetVarint32(&a[iOff], nKeep);
  5536         -        }
         5305  +      sqlite3Fts5BufferAppendPrintf(
         5306  +          &rc, &s, " term=%.*s", term.n, (const char*)term.p
         5307  +          );
         5308  +      iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], n-iOff);
         5309  +      if( iOff<n ){
         5310  +        iOff += fts5GetVarint32(&a[iOff], nKeep);
  5537   5311         }
  5538         -      fts5BufferFree(&term);
  5539         -    }else{
  5540         -      Fts5NodeIter ss;
  5541         -      for(fts5NodeIterInit(a, n, &ss); ss.aData; fts5NodeIterNext(&rc, &ss)){
  5542         -        if( ss.term.n==0 ){
  5543         -          sqlite3Fts5BufferAppendPrintf(&rc, &s, " left=%d", ss.iChild);
  5544         -        }else{
  5545         -          sqlite3Fts5BufferAppendPrintf(&rc,&s, " \"%.*s\"", 
  5546         -              ss.term.n, ss.term.p
  5547         -          );
  5548         -        }
  5549         -        if( ss.nEmpty ){
  5550         -          sqlite3Fts5BufferAppendPrintf(&rc, &s, " empty=%d%s", ss.nEmpty,
  5551         -              ss.bDlidx ? "*" : ""
  5552         -          );
  5553         -        }
  5554         -      }
  5555         -      fts5NodeIterFree(&ss);
  5556   5312       }
         5313  +    fts5BufferFree(&term);
  5557   5314     }
  5558   5315     
  5559   5316    decode_out:
  5560   5317     sqlite3_free(a);
  5561   5318     if( rc==SQLITE_OK ){
  5562   5319       sqlite3_result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT);
  5563   5320     }else{

Changes to ext/fts5/fts5_main.c.

  1494   1494     return sqlite3Fts5StorageRowCount(pTab->pStorage, pnRow);
  1495   1495   }
  1496   1496   
  1497   1497   static int fts5ApiTokenize(
  1498   1498     Fts5Context *pCtx, 
  1499   1499     const char *pText, int nText, 
  1500   1500     void *pUserData,
  1501         -  int (*xToken)(void*, const char*, int, int, int)
         1501  +  int (*xToken)(void*, int, const char*, int, int, int)
  1502   1502   ){
  1503   1503     Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  1504   1504     Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
  1505         -  return sqlite3Fts5Tokenize(pTab->pConfig, pText, nText, pUserData, xToken);
         1505  +  return sqlite3Fts5Tokenize(
         1506  +      pTab->pConfig, FTS5_TOKENIZE_AUX, pText, nText, pUserData, xToken
         1507  +  );
  1506   1508   }
  1507   1509   
  1508   1510   static int fts5ApiPhraseCount(Fts5Context *pCtx){
  1509   1511     Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  1510   1512     return sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
  1511   1513   }
  1512   1514   
................................................................................
  1651   1653       }
  1652   1654     }
  1653   1655     return rc;
  1654   1656   }
  1655   1657   
  1656   1658   static int fts5ColumnSizeCb(
  1657   1659     void *pContext,                 /* Pointer to int */
         1660  +  int tflags,
  1658   1661     const char *pToken,             /* Buffer containing token */
  1659   1662     int nToken,                     /* Size of token in bytes */
  1660   1663     int iStart,                     /* Start offset of token */
  1661   1664     int iEnd                        /* End offset of token */
  1662   1665   ){
  1663   1666     int *pCnt = (int*)pContext;
  1664         -  *pCnt = *pCnt + 1;
         1667  +  if( (tflags & FTS5_TOKEN_COLOCATED)==0 ){
         1668  +    (*pCnt)++;
         1669  +  }
  1665   1670     return SQLITE_OK;
  1666   1671   }
  1667   1672   
  1668   1673   static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){
  1669   1674     Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  1670   1675     Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
  1671   1676     Fts5Config *pConfig = pTab->pConfig;
................................................................................
  1687   1692         for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
  1688   1693           if( pConfig->abUnindexed[i]==0 ){
  1689   1694             const char *z; int n;
  1690   1695             void *p = (void*)(&pCsr->aColumnSize[i]);
  1691   1696             pCsr->aColumnSize[i] = 0;
  1692   1697             rc = fts5ApiColumnText(pCtx, i, &z, &n);
  1693   1698             if( rc==SQLITE_OK ){
  1694         -            rc = sqlite3Fts5Tokenize(pConfig, z, n, p, fts5ColumnSizeCb);
         1699  +            rc = sqlite3Fts5Tokenize(
         1700  +                pConfig, FTS5_TOKENIZE_AUX, z, n, p, fts5ColumnSizeCb
         1701  +            );
  1695   1702             }
  1696   1703           }
  1697   1704         }
  1698   1705       }
  1699   1706       CsrFlagClear(pCsr, FTS5CSR_REQUIRE_DOCSIZE);
  1700   1707     }
  1701   1708     if( iCol<0 ){
................................................................................
  1849   1856     rc = fts5OpenMethod(pCsr->base.pVtab, (sqlite3_vtab_cursor**)&pNew);
  1850   1857     if( rc==SQLITE_OK ){
  1851   1858       Fts5Config *pConf = pTab->pConfig;
  1852   1859       pNew->ePlan = FTS5_PLAN_MATCH;
  1853   1860       pNew->iFirstRowid = SMALLEST_INT64;
  1854   1861       pNew->iLastRowid = LARGEST_INT64;
  1855   1862       pNew->base.pVtab = (sqlite3_vtab*)pTab;
  1856         -    rc = sqlite3Fts5ExprPhraseExpr(pConf, pCsr->pExpr, iPhrase, &pNew->pExpr);
         1863  +    rc = sqlite3Fts5ExprClonePhrase(pConf, pCsr->pExpr, iPhrase, &pNew->pExpr);
  1857   1864     }
  1858   1865   
  1859   1866     if( rc==SQLITE_OK ){
  1860   1867       for(rc = fts5CursorFirst(pTab, pNew, 0);
  1861   1868           rc==SQLITE_OK && CsrFlagTest(pNew, FTS5CSR_EOF)==0;
  1862   1869           rc = fts5NextMethod((sqlite3_vtab_cursor*)pNew)
  1863   1870       ){
................................................................................
  2340   2347     pGlobal = (Fts5Global*)sqlite3_malloc(sizeof(Fts5Global));
  2341   2348     if( pGlobal==0 ){
  2342   2349       rc = SQLITE_NOMEM;
  2343   2350     }else{
  2344   2351       void *p = (void*)pGlobal;
  2345   2352       memset(pGlobal, 0, sizeof(Fts5Global));
  2346   2353       pGlobal->db = db;
  2347         -    pGlobal->api.iVersion = 1;
         2354  +    pGlobal->api.iVersion = 2;
  2348   2355       pGlobal->api.xCreateFunction = fts5CreateAux;
  2349   2356       pGlobal->api.xCreateTokenizer = fts5CreateTokenizer;
  2350   2357       pGlobal->api.xFindTokenizer = fts5FindTokenizer;
  2351   2358       rc = sqlite3_create_module_v2(db, "fts5", &fts5Mod, p, fts5ModuleDestroy);
  2352   2359       if( rc==SQLITE_OK ) rc = sqlite3Fts5IndexInit(db);
  2353   2360       if( rc==SQLITE_OK ) rc = sqlite3Fts5ExprInit(pGlobal, db);
  2354   2361       if( rc==SQLITE_OK ) rc = sqlite3Fts5AuxInit(&pGlobal->api);

Changes to ext/fts5/fts5_storage.c.

   355    355   };
   356    356   
   357    357   /*
   358    358   ** Tokenization callback used when inserting tokens into the FTS index.
   359    359   */
   360    360   static int fts5StorageInsertCallback(
   361    361     void *pContext,                 /* Pointer to Fts5InsertCtx object */
          362  +  int tflags,
   362    363     const char *pToken,             /* Buffer containing token */
   363    364     int nToken,                     /* Size of token in bytes */
   364    365     int iStart,                     /* Start offset of token */
   365    366     int iEnd                        /* End offset of token */
   366    367   ){
   367    368     Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext;
   368    369     Fts5Index *pIdx = pCtx->pStorage->pIndex;
   369         -  int iPos = pCtx->szCol++;
   370         -  return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, iPos, pToken, nToken);
          370  +  if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){
          371  +    pCtx->szCol++;
          372  +  }
          373  +  return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, pCtx->szCol-1, pToken, nToken);
   371    374   }
   372    375   
   373    376   /*
   374    377   ** If a row with rowid iDel is present in the %_content table, add the
   375    378   ** delete-markers to the FTS index necessary to delete it. Do not actually
   376    379   ** remove the %_content row at this time though.
   377    380   */
................................................................................
   390    393         ctx.pStorage = p;
   391    394         ctx.iCol = -1;
   392    395         rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel);
   393    396         for(iCol=1; rc==SQLITE_OK && iCol<=pConfig->nCol; iCol++){
   394    397           if( pConfig->abUnindexed[iCol-1] ) continue;
   395    398           ctx.szCol = 0;
   396    399           rc = sqlite3Fts5Tokenize(pConfig, 
          400  +            FTS5_TOKENIZE_DOCUMENT,
   397    401               (const char*)sqlite3_column_text(pSeek, iCol),
   398    402               sqlite3_column_bytes(pSeek, iCol),
   399    403               (void*)&ctx,
   400    404               fts5StorageInsertCallback
   401    405           );
   402    406           p->aTotalSize[iCol-1] -= (i64)ctx.szCol;
   403    407         }
................................................................................
   447    451   **
   448    452   ** Return SQLITE_OK if successful, or an SQLite error code if an error
   449    453   ** occurs.
   450    454   */
   451    455   static int fts5StorageLoadTotals(Fts5Storage *p, int bCache){
   452    456     int rc = SQLITE_OK;
   453    457     if( p->bTotalsValid==0 ){
   454         -    int nCol = p->pConfig->nCol;
   455         -    Fts5Buffer buf;
   456         -    memset(&buf, 0, sizeof(buf));
   457         -
   458         -    memset(p->aTotalSize, 0, sizeof(i64) * nCol);
   459         -    p->nTotalRow = 0;
   460         -    rc = sqlite3Fts5IndexGetAverages(p->pIndex, &buf);
   461         -    if( rc==SQLITE_OK && buf.n ){
   462         -      int i = 0;
   463         -      int iCol;
   464         -      i += fts5GetVarint(&buf.p[i], (u64*)&p->nTotalRow);
   465         -      for(iCol=0; i<buf.n && iCol<nCol; iCol++){
   466         -        i += fts5GetVarint(&buf.p[i], (u64*)&p->aTotalSize[iCol]);
   467         -      }
   468         -    }
   469         -    sqlite3_free(buf.p);
          458  +    rc = sqlite3Fts5IndexGetAverages(p->pIndex, &p->nTotalRow, p->aTotalSize);
   470    459       p->bTotalsValid = bCache;
   471    460     }
   472    461     return rc;
   473    462   }
   474    463   
   475    464   /*
   476    465   ** Store the current contents of the p->nTotalRow and p->aTotalSize[] 
................................................................................
   561    550       ctx.iCol = -1;
   562    551   
   563    552       rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel);
   564    553       for(iCol=0; rc==SQLITE_OK && iCol<pConfig->nCol; iCol++){
   565    554         if( pConfig->abUnindexed[iCol] ) continue;
   566    555         ctx.szCol = 0;
   567    556         rc = sqlite3Fts5Tokenize(pConfig, 
          557  +        FTS5_TOKENIZE_DOCUMENT,
   568    558           (const char*)sqlite3_value_text(apVal[iCol]),
   569    559           sqlite3_value_bytes(apVal[iCol]),
   570    560           (void*)&ctx,
   571    561           fts5StorageInsertCallback
   572    562         );
   573    563         p->aTotalSize[iCol] -= (i64)ctx.szCol;
   574    564       }
................................................................................
   650    640   
   651    641       sqlite3Fts5BufferZero(&buf);
   652    642       rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iRowid);
   653    643       for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){
   654    644         ctx.szCol = 0;
   655    645         if( pConfig->abUnindexed[ctx.iCol]==0 ){
   656    646           rc = sqlite3Fts5Tokenize(pConfig, 
          647  +            FTS5_TOKENIZE_DOCUMENT,
   657    648               (const char*)sqlite3_column_text(pScan, ctx.iCol+1),
   658    649               sqlite3_column_bytes(pScan, ctx.iCol+1),
   659    650               (void*)&ctx,
   660    651               fts5StorageInsertCallback
   661    652           );
   662    653         }
   663    654         sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol);
................................................................................
   767    758       rc = sqlite3Fts5IndexBeginWrite(p->pIndex, *piRowid);
   768    759       ctx.pStorage = p;
   769    760     }
   770    761     for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){
   771    762       ctx.szCol = 0;
   772    763       if( pConfig->abUnindexed[ctx.iCol]==0 ){
   773    764         rc = sqlite3Fts5Tokenize(pConfig, 
          765  +          FTS5_TOKENIZE_DOCUMENT,
   774    766             (const char*)sqlite3_value_text(apVal[ctx.iCol+2]),
   775    767             sqlite3_value_bytes(apVal[ctx.iCol+2]),
   776    768             (void*)&ctx,
   777    769             fts5StorageInsertCallback
   778    770         );
   779    771       }
   780    772       sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol);
................................................................................
   834    826   };
   835    827   
   836    828   /*
   837    829   ** Tokenization callback used by integrity check.
   838    830   */
   839    831   static int fts5StorageIntegrityCallback(
   840    832     void *pContext,                 /* Pointer to Fts5InsertCtx object */
          833  +  int tflags,
   841    834     const char *pToken,             /* Buffer containing token */
   842    835     int nToken,                     /* Size of token in bytes */
   843    836     int iStart,                     /* Start offset of token */
   844    837     int iEnd                        /* End offset of token */
   845    838   ){
   846    839     Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext;
   847         -  int iPos = pCtx->szCol++;
          840  +  if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){
          841  +    pCtx->szCol++;
          842  +  }
   848    843     pCtx->cksum ^= sqlite3Fts5IndexCksum(
   849         -      pCtx->pConfig, pCtx->iRowid, pCtx->iCol, iPos, pToken, nToken
          844  +      pCtx->pConfig, pCtx->iRowid, pCtx->iCol, pCtx->szCol-1, pToken, nToken
   850    845     );
   851    846     return SQLITE_OK;
   852    847   }
   853    848   
   854    849   /*
   855    850   ** Check that the contents of the FTS index match that of the %_content
   856    851   ** table. Return SQLITE_OK if they do, or SQLITE_CORRUPT if not. Return
................................................................................
   877    872     rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN, &pScan, 0);
   878    873     if( rc==SQLITE_OK ){
   879    874       int rc2;
   880    875       while( SQLITE_ROW==sqlite3_step(pScan) ){
   881    876         int i;
   882    877         ctx.iRowid = sqlite3_column_int64(pScan, 0);
   883    878         ctx.szCol = 0;
   884         -      rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize);
          879  +      if( pConfig->bColumnsize ){
          880  +        rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize);
          881  +      }
   885    882         for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
   886    883           if( pConfig->abUnindexed[i] ) continue;
   887    884           ctx.iCol = i;
   888    885           ctx.szCol = 0;
   889         -        rc = sqlite3Fts5Tokenize(
   890         -            pConfig, 
          886  +        rc = sqlite3Fts5Tokenize(pConfig, 
          887  +            FTS5_TOKENIZE_DOCUMENT,
   891    888               (const char*)sqlite3_column_text(pScan, i+1),
   892    889               sqlite3_column_bytes(pScan, i+1),
   893    890               (void*)&ctx,
   894    891               fts5StorageIntegrityCallback
   895    892           );
   896         -        if( ctx.szCol!=aColSize[i] ) rc = FTS5_CORRUPT;
          893  +        if( pConfig->bColumnsize && ctx.szCol!=aColSize[i] ){
          894  +          rc = FTS5_CORRUPT;
          895  +        }
   897    896           aTotalSize[i] += ctx.szCol;
   898    897         }
   899    898         if( rc!=SQLITE_OK ) break;
   900    899       }
   901    900       rc2 = sqlite3_reset(pScan);
   902    901       if( rc==SQLITE_OK ) rc = rc2;
   903    902     }
................................................................................
   914    913     /* Check that the %_docsize and %_content tables contain the expected
   915    914     ** number of rows.  */
   916    915     if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){
   917    916       i64 nRow;
   918    917       rc = fts5StorageCount(p, "content", &nRow);
   919    918       if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT;
   920    919     }
   921         -  if( rc==SQLITE_OK ){
          920  +  if( rc==SQLITE_OK && pConfig->bColumnsize ){
   922    921       i64 nRow;
   923    922       rc = fts5StorageCount(p, "docsize", &nRow);
   924    923       if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT;
   925    924     }
   926    925   
   927    926     /* Pass the expected checksum down to the FTS index module. It will
   928    927     ** verify, amongst other things, that it matches the checksum generated by
................................................................................
   998    997   ** each table column. This function reads the %_docsize record for the
   999    998   ** specified rowid and populates aCol[] with the results.
  1000    999   **
  1001   1000   ** An SQLite error code is returned if an error occurs, or SQLITE_OK
  1002   1001   ** otherwise.
  1003   1002   */
  1004   1003   int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol){
  1005         -  int nCol = p->pConfig->nCol;
  1006         -  sqlite3_stmt *pLookup = 0;
  1007         -  int rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP_DOCSIZE, &pLookup, 0);
         1004  +  int nCol = p->pConfig->nCol;    /* Number of user columns in table */
         1005  +  sqlite3_stmt *pLookup = 0;      /* Statement to query %_docsize */
         1006  +  int rc;                         /* Return Code */
         1007  +
         1008  +  assert( p->pConfig->bColumnsize );
         1009  +  rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP_DOCSIZE, &pLookup, 0);
  1008   1010     if( rc==SQLITE_OK ){
  1009   1011       int bCorrupt = 1;
  1010   1012       sqlite3_bind_int64(pLookup, 1, iRowid);
  1011   1013       if( SQLITE_ROW==sqlite3_step(pLookup) ){
  1012   1014         const u8 *aBlob = sqlite3_column_blob(pLookup, 0);
  1013   1015         int nBlob = sqlite3_column_bytes(pLookup, 0);
  1014   1016         if( 0==fts5StorageDecodeSizeArray(aCol, nCol, aBlob, nBlob) ){

Changes to ext/fts5/fts5_tcl.c.

   137    137   typedef struct F5tAuxData F5tAuxData;
   138    138   struct F5tAuxData {
   139    139     Tcl_Obj *pObj;
   140    140   };
   141    141   
   142    142   static int xTokenizeCb(
   143    143     void *pCtx, 
          144  +  int tflags,
   144    145     const char *zToken, int nToken, 
   145    146     int iStart, int iEnd
   146    147   ){
   147    148     F5tFunction *p = (F5tFunction*)pCtx;
   148    149     Tcl_Obj *pEval = Tcl_DuplicateObj(p->pScript);
   149    150     int rc;
   150    151   
................................................................................
   580    581     Tcl_Obj *pRet;
   581    582     int bSubst;
   582    583     const char *zInput;
   583    584   };
   584    585   
   585    586   static int xTokenizeCb2(
   586    587     void *pCtx, 
          588  +  int tflags,
   587    589     const char *zToken, int nToken, 
   588    590     int iStart, int iEnd
   589    591   ){
   590    592     F5tTokenizeCtx *p = (F5tTokenizeCtx*)pCtx;
   591    593     if( p->bSubst ){
   592    594       Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewStringObj(zToken, nToken));
   593    595       Tcl_ListObjAppendElement(
................................................................................
   662    664     }
   663    665   
   664    666     pRet = Tcl_NewObj();
   665    667     Tcl_IncrRefCount(pRet);
   666    668     ctx.bSubst = (objc==5);
   667    669     ctx.pRet = pRet;
   668    670     ctx.zInput = zText;
   669         -  rc = tokenizer.xTokenize(pTok, (void*)&ctx, zText, nText, xTokenizeCb2);
          671  +  rc = tokenizer.xTokenize(
          672  +      pTok, (void*)&ctx, FTS5_TOKENIZE_DOCUMENT, zText, nText, xTokenizeCb2
          673  +  );
   670    674     tokenizer.xDelete(pTok);
   671    675     if( rc!=SQLITE_OK ){
   672    676       Tcl_AppendResult(interp, "error in tokenizer.xTokenize()", 0);
   673    677       Tcl_DecrRefCount(pRet);
   674    678       return TCL_ERROR;
   675    679     }
   676    680   
................................................................................
   684    688   /*************************************************************************
   685    689   ** Start of tokenizer wrapper.
   686    690   */
   687    691   
   688    692   typedef struct F5tTokenizerContext F5tTokenizerContext;
   689    693   typedef struct F5tTokenizerCb F5tTokenizerCb;
   690    694   typedef struct F5tTokenizerModule F5tTokenizerModule;
   691         -typedef struct F5tTokenizerModule F5tTokenizerInstance;
          695  +typedef struct F5tTokenizerInstance F5tTokenizerInstance;
   692    696   
   693    697   struct F5tTokenizerContext {
   694    698     void *pCtx;
   695         -  int (*xToken)(void*, const char*, int, int, int);
          699  +  int (*xToken)(void*, int, const char*, int, int, int);
   696    700   };
   697    701   
   698    702   struct F5tTokenizerModule {
   699    703     Tcl_Interp *interp;
          704  +  Tcl_Obj *pScript;
          705  +  F5tTokenizerContext *pContext;
          706  +};
          707  +
          708  +struct F5tTokenizerInstance {
          709  +  Tcl_Interp *interp;
   700    710     Tcl_Obj *pScript;
   701    711     F5tTokenizerContext *pContext;
   702    712   };
   703    713   
   704    714   static int f5tTokenizerCreate(
   705    715     void *pCtx, 
   706    716     const char **azArg, 
................................................................................
   744    754     Tcl_DecrRefCount(pInst->pScript);
   745    755     ckfree((char *)pInst);
   746    756   }
   747    757   
   748    758   static int f5tTokenizerTokenize(
   749    759     Fts5Tokenizer *p, 
   750    760     void *pCtx,
          761  +  int flags,
   751    762     const char *pText, int nText, 
   752         -  int (*xToken)(void*, const char*, int, int, int)
          763  +  int (*xToken)(void*, int, const char*, int, int, int)
   753    764   ){
   754    765     F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p;
   755    766     void *pOldCtx;
   756         -  int (*xOldToken)(void*, const char*, int, int, int);
          767  +  int (*xOldToken)(void*, int, const char*, int, int, int);
   757    768     Tcl_Obj *pEval;
   758    769     int rc;
          770  +  const char *zFlags;
   759    771   
   760    772     pOldCtx = pInst->pContext->pCtx;
   761    773     xOldToken = pInst->pContext->xToken;
   762    774   
          775  +  pInst->pContext->pCtx = pCtx;
          776  +  pInst->pContext->xToken = xToken;
          777  +
          778  +  assert( 
          779  +      flags==FTS5_TOKENIZE_DOCUMENT
          780  +   || flags==FTS5_TOKENIZE_AUX
          781  +   || flags==FTS5_TOKENIZE_QUERY
          782  +   || flags==(FTS5_TOKENIZE_QUERY | FTS5_TOKENIZE_PREFIX)
          783  +  );
   763    784     pEval = Tcl_DuplicateObj(pInst->pScript);
   764    785     Tcl_IncrRefCount(pEval);
   765         -  rc = Tcl_ListObjAppendElement(
   766         -      pInst->interp, pEval, Tcl_NewStringObj(pText, nText)
   767         -  );
   768         -  if( rc==TCL_OK ){
   769         -    rc = Tcl_EvalObjEx(pInst->interp, pEval, TCL_GLOBAL_ONLY);
          786  +  switch( flags ){
          787  +    case FTS5_TOKENIZE_DOCUMENT:
          788  +      zFlags = "document";
          789  +      break;
          790  +    case FTS5_TOKENIZE_AUX:
          791  +      zFlags = "aux";
          792  +      break;
          793  +    case FTS5_TOKENIZE_QUERY:
          794  +      zFlags = "query";
          795  +      break;
          796  +    case (FTS5_TOKENIZE_PREFIX | FTS5_TOKENIZE_QUERY):
          797  +      zFlags = "prefixquery";
          798  +      break;
          799  +    default:
          800  +      assert( 0 );
          801  +      zFlags = "invalid";
          802  +      break;
   770    803     }
          804  +
          805  +  Tcl_ListObjAppendElement(pInst->interp, pEval, Tcl_NewStringObj(zFlags, -1));
          806  +  Tcl_ListObjAppendElement(pInst->interp, pEval, Tcl_NewStringObj(pText,nText));
          807  +  rc = Tcl_EvalObjEx(pInst->interp, pEval, TCL_GLOBAL_ONLY);
   771    808     Tcl_DecrRefCount(pEval);
   772    809   
   773    810     pInst->pContext->pCtx = pOldCtx;
   774    811     pInst->pContext->xToken = xOldToken;
   775    812     return rc;
   776    813   }
   777    814   
   778    815   /*
   779         -** sqlite3_fts5_token TEXT START END POS
          816  +** sqlite3_fts5_token ?-colocated? TEXT START END
   780    817   */
   781    818   static int f5tTokenizerReturn(
   782    819     void * clientData,
   783    820     Tcl_Interp *interp,
   784    821     int objc,
   785    822     Tcl_Obj *CONST objv[]
   786    823   ){
   787    824     F5tTokenizerContext *p = (F5tTokenizerContext*)clientData;
   788    825     int iStart;
   789    826     int iEnd;
   790    827     int nToken;
          828  +  int tflags = 0;
   791    829     char *zToken;
   792    830     int rc;
   793    831   
   794         -  assert( p );
   795         -  if( objc!=4 ){
   796         -    Tcl_WrongNumArgs(interp, 1, objv, "TEXT START END");
          832  +  if( objc==5 ){
          833  +    int nArg;
          834  +    char *zArg = Tcl_GetStringFromObj(objv[1], &nArg);
          835  +    if( nArg<=10 && nArg>=2 && memcmp("-colocated", zArg, nArg)==0 ){
          836  +      tflags |= FTS5_TOKEN_COLOCATED;
          837  +    }else{
          838  +      goto usage;
          839  +    }
          840  +  }else if( objc!=4 ){
          841  +    goto usage;
          842  +  }
          843  +
          844  +  zToken = Tcl_GetStringFromObj(objv[objc-3], &nToken);
          845  +  if( Tcl_GetIntFromObj(interp, objv[objc-2], &iStart) 
          846  +   || Tcl_GetIntFromObj(interp, objv[objc-1], &iEnd) 
          847  +  ){
   797    848       return TCL_ERROR;
   798    849     }
          850  +
   799    851     if( p->xToken==0 ){
   800    852       Tcl_AppendResult(interp, 
   801    853           "sqlite3_fts5_token may only be used by tokenizer callback", 0
   802    854       );
   803    855       return TCL_ERROR;
   804    856     }
   805    857   
   806         -  zToken = Tcl_GetStringFromObj(objv[1], &nToken);
   807         -  if( Tcl_GetIntFromObj(interp, objv[2], &iStart) 
   808         -   || Tcl_GetIntFromObj(interp, objv[3], &iEnd) 
   809         -  ){
   810         -    return TCL_ERROR;
   811         -  }
   812         -
   813         -  rc = p->xToken(p->pCtx, zToken, nToken, iStart, iEnd);
          858  +  rc = p->xToken(p->pCtx, tflags, zToken, nToken, iStart, iEnd);
   814    859     Tcl_SetResult(interp, (char*)sqlite3ErrName(rc), TCL_VOLATILE);
   815    860     return TCL_OK;
          861  +
          862  + usage:
          863  +  Tcl_WrongNumArgs(interp, 1, objv, "?-colocated? TEXT START END");
          864  +  return TCL_ERROR;
   816    865   }
   817    866   
   818    867   static void f5tDelTokenizer(void *pCtx){
   819    868     F5tTokenizerModule *pMod = (F5tTokenizerModule*)pCtx;
   820    869     Tcl_DecrRefCount(pMod->pScript);
   821    870     ckfree((char *)pMod);
   822    871   }

Changes to ext/fts5/fts5_test_mi.c.

   348    348     Fts5Context *pFts,              /* First arg to pass to pApi functions */
   349    349     sqlite3_context *pCtx,          /* Context for returning result/error */
   350    350     int nVal,                       /* Number of values in apVal[] array */
   351    351     sqlite3_value **apVal           /* Array of trailing arguments */
   352    352   ){
   353    353     const char *zArg;
   354    354     Fts5MatchinfoCtx *p;
   355         -  int rc;
          355  +  int rc = SQLITE_OK;
   356    356   
   357    357     if( nVal>0 ){
   358    358       zArg = (const char*)sqlite3_value_text(apVal[0]);
   359    359     }else{
   360    360       zArg = "pcx";
   361    361     }
   362    362   
   363    363     p = (Fts5MatchinfoCtx*)pApi->xGetAuxdata(pFts, 0);
   364    364     if( p==0 || sqlite3_stricmp(zArg, p->zArg) ){
   365    365       p = fts5MatchinfoNew(pApi, pFts, pCtx, zArg);
   366         -    pApi->xSetAuxdata(pFts, p, sqlite3_free);
   367         -    if( p==0 ) return;
          366  +    if( p==0 ){
          367  +      rc = SQLITE_NOMEM;
          368  +    }else{
          369  +      rc = pApi->xSetAuxdata(pFts, p, sqlite3_free);
          370  +    }
   368    371     }
   369    372   
   370         -  rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoLocalCb);
          373  +  if( rc==SQLITE_OK ){
          374  +    rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoLocalCb);
          375  +  }
   371    376     if( rc!=SQLITE_OK ){
   372    377       sqlite3_result_error_code(pCtx, rc);
   373    378     }else{
   374    379       /* No errors has occured, so return a copy of the array of integers. */
   375    380       int nByte = p->nRet * sizeof(u32);
   376    381       sqlite3_result_blob(pCtx, (void*)p->aRet, nByte, SQLITE_TRANSIENT);
   377    382     }

Changes to ext/fts5/fts5_tokenize.c.

   112    112   
   113    113   /*
   114    114   ** Tokenize some text using the ascii tokenizer.
   115    115   */
   116    116   static int fts5AsciiTokenize(
   117    117     Fts5Tokenizer *pTokenizer,
   118    118     void *pCtx,
          119  +  int flags,
   119    120     const char *pText, int nText,
   120         -  int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd)
          121  +  int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd)
   121    122   ){
   122    123     AsciiTokenizer *p = (AsciiTokenizer*)pTokenizer;
   123    124     int rc = SQLITE_OK;
   124    125     int ie;
   125    126     int is = 0;
   126    127   
   127    128     char aFold[64];
................................................................................
   154    155           break;
   155    156         }
   156    157         nFold = nByte*2;
   157    158       }
   158    159       asciiFold(pFold, &pText[is], nByte);
   159    160   
   160    161       /* Invoke the token callback */
   161         -    rc = xToken(pCtx, pFold, nByte, is, ie);
          162  +    rc = xToken(pCtx, 0, pFold, nByte, is, ie);
   162    163       is = ie+1;
   163    164     }
   164    165     
   165    166     if( pFold!=aFold ) sqlite3_free(pFold);
   166    167     if( rc==SQLITE_DONE ) rc = SQLITE_OK;
   167    168     return rc;
   168    169   }
................................................................................
   381    382     assert( (sqlite3Fts5UnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 );
   382    383     return sqlite3Fts5UnicodeIsalnum(iCode) ^ fts5UnicodeIsException(p, iCode);
   383    384   }
   384    385   
   385    386   static int fts5UnicodeTokenize(
   386    387     Fts5Tokenizer *pTokenizer,
   387    388     void *pCtx,
          389  +  int flags,
   388    390     const char *pText, int nText,
   389         -  int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd)
          391  +  int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd)
   390    392   ){
   391    393     Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTokenizer;
   392    394     int rc = SQLITE_OK;
   393    395     unsigned char *a = p->aTokenChar;
   394    396   
   395    397     unsigned char *zTerm = (unsigned char*)&pText[nText];
   396    398     unsigned char *zCsr = (unsigned char *)pText;
................................................................................
   471    473           }
   472    474           zCsr++;
   473    475         }
   474    476         ie = zCsr - (unsigned char*)pText;
   475    477       }
   476    478   
   477    479       /* Invoke the token callback */
   478         -    rc = xToken(pCtx, aFold, zOut-aFold, is, ie);
          480  +    rc = xToken(pCtx, 0, aFold, zOut-aFold, is, ie); 
   479    481     }
   480    482     
   481    483    tokenize_done:
   482    484     if( rc==SQLITE_DONE ) rc = SQLITE_OK;
   483    485     return rc;
   484    486   }
   485    487   
................................................................................
   549    551     *ppOut = (Fts5Tokenizer*)pRet;
   550    552     return rc;
   551    553   }
   552    554   
   553    555   typedef struct PorterContext PorterContext;
   554    556   struct PorterContext {
   555    557     void *pCtx;
   556         -  int (*xToken)(void*, const char*, int, int, int);
          558  +  int (*xToken)(void*, int, const char*, int, int, int);
   557    559     char *aBuf;
   558    560   };
   559    561   
   560    562   typedef struct PorterRule PorterRule;
   561    563   struct PorterRule {
   562    564     const char *zSuffix;
   563    565     int nSuffix;
................................................................................
  1114   1116         *pnBuf = nBuf-1;
  1115   1117       }
  1116   1118     }
  1117   1119   }
  1118   1120   
  1119   1121   static int fts5PorterCb(
  1120   1122     void *pCtx, 
         1123  +  int tflags,
  1121   1124     const char *pToken, 
  1122   1125     int nToken, 
  1123   1126     int iStart, 
  1124   1127     int iEnd
  1125   1128   ){
  1126   1129     PorterContext *p = (PorterContext*)pCtx;
  1127   1130   
................................................................................
  1171   1174     /* Step 5b. */
  1172   1175     if( nBuf>1 && aBuf[nBuf-1]=='l' 
  1173   1176      && aBuf[nBuf-2]=='l' && fts5Porter_MGt1(aBuf, nBuf-1) 
  1174   1177     ){
  1175   1178       nBuf--;
  1176   1179     }
  1177   1180   
  1178         -  return p->xToken(p->pCtx, aBuf, nBuf, iStart, iEnd);
         1181  +  return p->xToken(p->pCtx, tflags, aBuf, nBuf, iStart, iEnd);
  1179   1182   
  1180   1183    pass_through:
  1181         -  return p->xToken(p->pCtx, pToken, nToken, iStart, iEnd);
         1184  +  return p->xToken(p->pCtx, tflags, pToken, nToken, iStart, iEnd);
  1182   1185   }
  1183   1186   
  1184   1187   /*
  1185   1188   ** Tokenize using the porter tokenizer.
  1186   1189   */
  1187   1190   static int fts5PorterTokenize(
  1188   1191     Fts5Tokenizer *pTokenizer,
  1189   1192     void *pCtx,
         1193  +  int flags,
  1190   1194     const char *pText, int nText,
  1191         -  int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd)
         1195  +  int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd)
  1192   1196   ){
  1193   1197     PorterTokenizer *p = (PorterTokenizer*)pTokenizer;
  1194   1198     PorterContext sCtx;
  1195   1199     sCtx.xToken = xToken;
  1196   1200     sCtx.pCtx = pCtx;
  1197   1201     sCtx.aBuf = p->aBuf;
  1198   1202     return p->tokenizer.xTokenize(
  1199         -      p->pTokenizer, (void*)&sCtx, pText, nText, fts5PorterCb
         1203  +      p->pTokenizer, (void*)&sCtx, flags, pText, nText, fts5PorterCb
  1200   1204     );
  1201   1205   }
  1202   1206   
  1203   1207   /*
  1204   1208   ** Register all built-in tokenizers with FTS5.
  1205   1209   */
  1206   1210   int sqlite3Fts5TokenizerInit(fts5_api *pApi){
................................................................................
  1221   1225           aBuiltin[i].zName,
  1222   1226           (void*)pApi,
  1223   1227           &aBuiltin[i].x,
  1224   1228           0
  1225   1229       );
  1226   1230     }
  1227   1231   
  1228         -  return SQLITE_OK;
         1232  +  return rc;
  1229   1233   }
  1230   1234   
  1231   1235   

Changes to ext/fts5/test/fts5_common.tcl.

   290    290   proc OR {args} {
   291    291     sort_poslist [concat {*}$args]
   292    292   }
   293    293   proc NOT {a b} {
   294    294     if {[llength $b]>0} { return [list] }
   295    295     return $a
   296    296   }
          297  +
          298  +#-------------------------------------------------------------------------
          299  +# This command is similar to [split], except that it also provides the
          300  +# start and end offsets of each token. For example:
          301  +#
          302  +#   [fts5_tokenize_split "abc d ef"] -> {abc 0 3 d 4 5 ef 6 8}
          303  +#
          304  +
          305  +proc gobble_whitespace {textvar} {
          306  +  upvar $textvar t
          307  +  regexp {([ ]*)(.*)} $t -> space t
          308  +  return [string length $space]
          309  +}
          310  +
          311  +proc gobble_text {textvar wordvar} {
          312  +  upvar $textvar t
          313  +  upvar $wordvar w
          314  +  regexp {([^ ]*)(.*)} $t -> w t
          315  +  return [string length $w]
          316  +}
          317  +
          318  +proc fts5_tokenize_split {text} {
          319  +  set token ""
          320  +  set ret [list]
          321  +  set iOff [gobble_whitespace text]
          322  +  while {[set nToken [gobble_text text word]]} {
          323  +    lappend ret $word $iOff [expr $iOff+$nToken]
          324  +    incr iOff $nToken
          325  +    incr iOff [gobble_whitespace text]
          326  +  }
          327  +
          328  +  set ret
          329  +}
   297    330   

Changes to ext/fts5/test/fts5aa.test.

   339    339   } {}
   340    340   
   341    341   do_execsql_test 13.5 {
   342    342     SELECT rowid FROM t1 WHERE t1 MATCH 'o';
   343    343   } {1}
   344    344   
   345    345   do_execsql_test 13.6 {
   346         -  SELECT rowid FROM t1 WHERE t1 MATCH '.';
          346  +  SELECT rowid FROM t1 WHERE t1 MATCH '""';
   347    347   } {}
   348    348   
   349    349   #-------------------------------------------------------------------------
   350    350   #
   351    351   reset_db
   352    352   do_execsql_test 14.1 {
   353    353     CREATE VIRTUAL TABLE t1 USING fts5(x, y);
................................................................................
   501    501   }
   502    502   do_execsql_test 18.2 {
   503    503     SELECT t1.rowid, t2.rowid FROM t1, t2 WHERE t2 MATCH t1.a AND t1.rowid = t2.c
   504    504   } {1 1}
   505    505   do_execsql_test 18.3 {
   506    506     SELECT t1.rowid, t2.rowid FROM t2, t1 WHERE t2 MATCH t1.a AND t1.rowid = t2.c
   507    507   } {1 1}
          508  +
          509  +#--------------------------------------------------------------------
          510  +# fts5 table in the temp schema.
          511  +#
          512  +reset_db
          513  +do_execsql_test 19.0 {
          514  +  CREATE VIRTUAL TABLE temp.t1 USING fts5(x);
          515  +  INSERT INTO t1 VALUES('x y z');
          516  +  INSERT INTO t1 VALUES('w x 1');
          517  +  SELECT rowid FROM t1 WHERE t1 MATCH 'x';
          518  +} {1 2}
          519  +
          520  +#--------------------------------------------------------------------
          521  +# Test that 6 and 7 byte varints can be read.
          522  +#
          523  +reset_db
          524  +do_execsql_test 20.0 {
          525  +  CREATE VIRTUAL TABLE temp.tmp USING fts5(x);
          526  +}
          527  +set ::ids [list \
          528  +  0 [expr 1<<36] [expr 2<<36] [expr 1<<43] [expr 2<<43]
          529  +]
          530  +do_test 20.1 {
          531  +  foreach id $::ids {
          532  +    execsql { INSERT INTO tmp(rowid, x) VALUES($id, 'x y z') }
          533  +  }
          534  +  execsql { SELECT rowid FROM tmp WHERE tmp MATCH 'y' }
          535  +} $::ids
          536  +
          537  +
   508    538   
   509    539   finish_test
   510    540   
   511    541   

Changes to ext/fts5/test/fts5columnsize.test.

   130    130   }
   131    131   do_execsql_test 3.2.1 {
   132    132     SELECT rowid, fts5_test_columnsize(t4) FROM t4 WHERE t4 MATCH 'a'
   133    133   } {
   134    134     1 {-1 0 -1} 2 {-1 0 -1}
   135    135   }
   136    136   
          137  +#-------------------------------------------------------------------------
          138  +# Test the integrity-check
          139  +#
          140  +do_execsql_test 4.1.1 {
          141  +  CREATE VIRTUAL TABLE t5 USING fts5(x, columnsize=0);
          142  +  INSERT INTO t5 VALUES('1 2 3 4');
          143  +  INSERT INTO t5 VALUES('2 4 6 8');
          144  +}
          145  +
          146  +breakpoint
          147  +do_execsql_test 4.1.2 {
          148  +  INSERT INTO t5(t5) VALUES('integrity-check');
          149  +}
   137    150   
   138    151   finish_test

Changes to ext/fts5/test/fts5ea.test.

    83     83   #-------------------------------------------------------------------------
    84     84   # Experiment with a tokenizer that considers " to be a token character.
    85     85   #
    86     86   do_execsql_test 4.0 {
    87     87     SELECT fts5_expr('a AND """"', 'x', 'tokenize="unicode61 tokenchars ''""''"');
    88     88   } {{"a" AND """"}}
    89     89   
           90  +#-------------------------------------------------------------------------
           91  +# Experiment with a tokenizer that considers " to be a token character.
           92  +#
           93  +do_catchsql_test 5.0 {
           94  +  SELECT fts5_expr('abc | def');
           95  +} {1 {fts5: syntax error near "|"}}
    90     96   
    91     97   
    92     98   
    93     99   finish_test

Changes to ext/fts5/test/fts5eb.test.

    26     26   
    27     27   proc do_syntax_test {tn expr res} {
    28     28     set ::se_expr $expr
    29     29     do_execsql_test $tn {SELECT fts5_expr($se_expr)} [list $res]
    30     30   }
    31     31   
    32     32   foreach {tn expr res} {
    33         -  1  {abc}                           {"abc"}
    34         -  2  {abc .}                         {"abc"}
    35         -  3  {.}                             {}
    36         -  4  {abc OR .}                      {"abc"}
    37         -  5  {abc NOT .}                     {"abc"}
    38         -  6  {abc AND .}                     {"abc"}
    39         -  7  {. OR abc}                      {"abc"}
    40         -  8  {. NOT abc}                     {"abc"}
    41         -  9  {. AND abc}                     {"abc"}
    42         -  10 {abc + . + def}                 {"abc" + "def"}
    43         -  11 {abc . def}                     {"abc" AND "def"}
    44         -  12 {r+e OR w}                      {"r" + "e" OR "w"}
           33  +  1  {abc}                            {"abc"}
           34  +  2  {abc ""}                         {"abc"}
           35  +  3  {""}                             {}
           36  +  4  {abc OR ""}                      {"abc"}
           37  +  5  {abc NOT ""}                     {"abc"}
           38  +  6  {abc AND ""}                     {"abc"}
           39  +  7  {"" OR abc}                      {"abc"}
           40  +  8  {"" NOT abc}                     {"abc"}
           41  +  9  {"" AND abc}                     {"abc"}
           42  +  10 {abc + "" + def}                 {"abc" + "def"}
           43  +  11 {abc "" def}                     {"abc" AND "def"}
           44  +  12 {r+e OR w}                       {"r" + "e" OR "w"}
    45     45   } {
    46     46     do_execsql_test 1.$tn {SELECT fts5_expr($expr)} [list $res]
    47     47   }
    48     48   
    49     49   do_catchsql_test 2.1 {
    50     50     SELECT fts5_expr()
    51     51   } {1 {wrong number of arguments to function fts5_expr}}

Changes to ext/fts5/test/fts5fault6.test.

    17     17   set testprefix fts5fault6
    18     18   
    19     19   # If SQLITE_ENABLE_FTS5 is defined, omit this file.
    20     20   ifcapable !fts5 {
    21     21     finish_test
    22     22     return
    23     23   }
           24  +
    24     25   
    25     26   #-------------------------------------------------------------------------
    26     27   # OOM while rebuilding an FTS5 table.
    27     28   #
    28     29   do_execsql_test 1.0 {
    29     30     CREATE VIRTUAL TABLE tt USING fts5(a, b);
    30     31     INSERT INTO tt VALUES('c d c g g f', 'a a a d g a');
................................................................................
   144    145     db eval { 
   145    146       CREATE VIRTUAL TABLE yu USING fts5(x, tokenize="unicode61 separators abc");
   146    147     }
   147    148   } -test {
   148    149     faultsim_test_result {0 {}}
   149    150   }
   150    151   
          152  +#-------------------------------------------------------------------------
          153  +#
          154  +# 5.2.* OOM while running a query that includes synonyms and matchinfo().
          155  +#
          156  +# 5.3.* OOM while running a query that returns a row containing instances
          157  +#       of more than 4 synonyms for a single term.
          158  +#
          159  +proc mit {blob} {
          160  +  set scan(littleEndian) i*
          161  +  set scan(bigEndian) I*
          162  +  binary scan $blob $scan($::tcl_platform(byteOrder)) r
          163  +  return $r
          164  +}
          165  +proc tcl_tokenize {tflags text} {
          166  +  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
          167  +    sqlite3_fts5_token $w $iStart $iEnd
          168  +    if {$tflags=="query" && [string length $w]==1} {
          169  +      for {set i 2} {$i < 7} {incr i} {
          170  +        sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd
          171  +      }
          172  +    }
          173  +  }
          174  +}
          175  +proc tcl_create {args} { return "tcl_tokenize" }
          176  +reset_db
          177  +sqlite3_fts5_create_tokenizer db tcl tcl_create
          178  +db func mit mit
          179  +sqlite3_fts5_register_matchinfo db
          180  +do_test 5.0 {
          181  +  execsql { CREATE VIRTUAL TABLE t1 USING fts5(a, tokenize=tcl) }
          182  +  execsql { INSERT INTO t1(t1, rank) VALUES('pgsz', 32) }
          183  +  foreach {rowid text} {
          184  +    1 {aaaa cc b aaaaa cc aa} 
          185  +    2 {aa aa bb a bbb}
          186  +    3 {bb aaaaa aaaaa b aaaa aaaaa}
          187  +    4 {aa a b aaaa aa}
          188  +    5 {aa b ccc aaaaa cc}
          189  +    6 {aa aaaaa bbbb cc aaa}
          190  +    7 {aaaaa aa aa ccccc bb}
          191  +    8 {ccc bbbbb ccccc bbb c}
          192  +    9 {cccccc bbbb a aaa cccc c}
          193  +
          194  +    20 {ddd f ddd eeeee fff ffff eeee ddd fff eeeee dddddd eeee}
          195  +    21 {fffff eee dddd fffff dd ee ee eeeee eee eeeeee ee dd e}
          196  +    22 {fffff d eeee dddd fffff dddddd ffff ddddd eeeee ee eee dddd ddddd}
          197  +    23 {ddddd fff ddd eeeee ffff eeee ddd ff ff ffffff eeeeee dddd ffffff}
          198  +    24 {eee dd ee dddd dddd eeeeee e eee fff ffff}
          199  +    25 {ddddd ffffff dddddd fff ddd ddddd ddd f eeee fff dddd f}
          200  +    26 {f ffff fff fff eeeeee dddd d dddddd ddddd eee ff eeeee}
          201  +    27 {eee fff dddddd eeeee eeeee dddd ddddd ffff f eeeee eee dddddd ddddd d}
          202  +    28 {dd ddddd d ddd d fff d dddd ee dddd ee ddd dddddd dddddd}
          203  +    29 {eeee dddd ee dddd eeee dddd dd fffff f ddd eeeee ddd ee}
          204  +    30 {ff ffffff eeeeee eeeee eee ffffff ff ffff f fffff eeeee}
          205  +    31 {fffff eeeeee dddd eeee eeee eeeeee eee fffff d ddddd ffffff ffff dddddd}
          206  +    32 {dddddd fffff ee eeeeee eeee ee fff dddd fff eeee ffffff eeeeee ffffff}
          207  +    33 {ddddd eeee dd ffff dddddd fff eeee ddddd ffff eeee ddd}
          208  +    34 {ee dddd ddddd dddddd eeee eeeeee f dd ee dddddd ffffff}
          209  +    35 {ee dddd dd eeeeee ddddd eee d eeeeee dddddd eee dddd fffff}
          210  +    36 {eee ffffff ffffff e fffff eeeee ff dddddd dddddd fff}
          211  +    37 {eeeee fffff dddddd dddd ffffff fff f dd ee dd dd eeeee}
          212  +    38 {eeeeee ee d ff eeeeee eeeeee eee eeeee ee ffffff dddd eeee dddddd ee}
          213  +    39 {eeeeee ddd fffff e dddd ee eee eee ffffff ee f d dddd}
          214  +    40 {ffffff dddddd eee ee ffffff eee eeee ddddd ee eeeeee f}
          215  +    41 {ddd ddd fff fffff ee fffff f fff ddddd fffff}
          216  +    42 {dddd ee ff d f ffffff fff ffffff ff dd dddddd f eeee}
          217  +    43 {d dd fff fffff d f fff e dddd ee ee}
          218  +    44 {ff ffff eee ddd d dd ffff dddd d eeee d eeeeee}
          219  +    45 {eeee f eeeee ee e ffff f ddd e fff}
          220  +    46 {ffff d ffff eeee ffff eeeee f ffff ddddd eee}
          221  +    47 {dd dd dddddd ddddd fffff dddddd ddd ddddd eeeeee ffff eeee eee ee}
          222  +    48 {ffff ffff e dddd ffffff dd dd dddd f fffff}
          223  +    49 {ffffff d dddddd ffff eeeee f ffff ffff d dd fffff eeeee}
          224  +
          225  +    50 {x e}
          226  +  } {
          227  +    execsql { INSERT INTO t1(rowid, a) VALUES($rowid, $text) }
          228  +  }
          229  +} {}
          230  +
          231  +set res [list {*}{
          232  +  1 {3 24 8 2 12 6}
          233  +  5 {2 24 8 2 12 6}
          234  +  6 {3 24 8 1 12 6}
          235  +  7 {3 24 8 1 12 6}
          236  +  9 {2 24 8 3 12 6}
          237  +}]
          238  +do_execsql_test 5.1.1 {
          239  +  SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH 'a AND c'
          240  +} $res
          241  +do_execsql_test 5.1.2 {
          242  +  SELECT count(*) FROM t1 WHERE t1 MATCH 'd e f'
          243  +} 29
          244  +
          245  +faultsim_save_and_close
          246  +do_faultsim_test 5.2 -faults oom* -prep {
          247  +  faultsim_restore_and_reopen
          248  +  sqlite3_fts5_create_tokenizer db tcl tcl_create
          249  +  sqlite3_fts5_register_matchinfo db
          250  +  db func mit mit
          251  +} -body {
          252  +  db eval { 
          253  +    SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH 'a AND c'
          254  +  }
          255  +} -test {
          256  +  faultsim_test_result [list 0 $::res]
          257  +}
          258  +
          259  +do_faultsim_test 5.3 -faults oom* -prep {
          260  +  faultsim_restore_and_reopen
          261  +  sqlite3_fts5_create_tokenizer db tcl tcl_create
          262  +} -body {
          263  +  db eval { 
          264  +    SELECT count(*) FROM t1 WHERE t1 MATCH 'd AND e AND f'
          265  +  }
          266  +} -test {
          267  +  faultsim_test_result {0 29}
          268  +}
          269  +
          270  +do_faultsim_test 5.4 -faults oom* -prep {
          271  +  faultsim_restore_and_reopen
          272  +  sqlite3_fts5_create_tokenizer db tcl tcl_create
          273  +} -body {
          274  +  db eval { 
          275  +    SELECT count(*) FROM t1 WHERE t1 MATCH 'x + e'
          276  +  }
          277  +} -test {
          278  +  faultsim_test_result {0 1}
          279  +}
          280  +
          281  +#-------------------------------------------------------------------------
          282  +catch { db close }
          283  +breakpoint
          284  +do_faultsim_test 6 -faults oom* -prep {
          285  +  sqlite_orig db test.db
          286  +  sqlite3_db_config_lookaside db 0 0 0
          287  +} -body {
          288  +  load_static_extension db fts5
          289  +} -test {
          290  +  faultsim_test_result {0 {}} {1 {initialization of fts5 failed: }}
          291  +  if {$testrc==0} {
          292  +    db eval { CREATE VIRTUAL TABLE temp.t1 USING fts5(x) }
          293  +  }
          294  +  db close
          295  +}
   151    296   finish_test
   152    297   

Added ext/fts5/test/fts5fault7.test.

            1  +# 2015 September 3
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#*************************************************************************
           11  +#
           12  +# This file is focused on OOM errors.
           13  +#
           14  +
           15  +source [file join [file dirname [info script]] fts5_common.tcl]
           16  +source $testdir/malloc_common.tcl
           17  +set testprefix fts5fault2
           18  +
           19  +# If SQLITE_ENABLE_FTS3 is defined, omit this file.
           20  +ifcapable !fts5 {
           21  +  finish_test
           22  +  return
           23  +}
           24  +
           25  +#-------------------------------------------------------------------------
           26  +# Test fault-injection on a query that uses xColumnSize() on columnsize=0
           27  +# table.
           28  +#
           29  +do_execsql_test 1.0 {
           30  +  CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize=0);
           31  +  INSERT INTO t1 VALUES('a b c d e f g');
           32  +  INSERT INTO t1 VALUES('a b c d');
           33  +  INSERT INTO t1 VALUES('a b c d e f g h i j');
           34  +}
           35  +
           36  +
           37  +fts5_aux_test_functions db
           38  +do_faultsim_test 1 -faults oom* -body {
           39  +  execsql { SELECT fts5_test_columnsize(t1) FROM t1 WHERE t1 MATCH 'b' }
           40  +} -test {
           41  +  faultsim_test_result {0 {7 4 10}} {1 SQLITE_NOMEM}
           42  +}
           43  +
           44  +finish_test
           45  +

Changes to ext/fts5/test/fts5matchinfo.test.

   351    351      GROUP BY t10.rowid
   352    352      ORDER BY 1;
   353    353   } {1 1 one 2 2 two 3 3 three}
   354    354     
   355    355   #---------------------------------------------------------------------------
   356    356   # Test the 'y' matchinfo flag
   357    357   #
   358         -set sqlite_fts3_enable_parentheses 1
   359    358   reset_db
          359  +sqlite3_fts5_register_matchinfo db
   360    360   do_execsql_test 11.0 {
   361         -  CREATE VIRTUAL TABLE tt USING fts3(x, y);
          361  +  CREATE VIRTUAL TABLE tt USING fts5(x, y);
   362    362     INSERT INTO tt VALUES('c d a c d d', 'e a g b d a');   -- 1
   363    363     INSERT INTO tt VALUES('c c g a e b', 'c g d g e c');   -- 2
   364    364     INSERT INTO tt VALUES('b e f d e g', 'b a c b c g');   -- 3
   365    365     INSERT INTO tt VALUES('a c f f g d', 'd b f d e g');   -- 4
   366    366     INSERT INTO tt VALUES('g a c f c f', 'd g g b c c');   -- 5
   367    367     INSERT INTO tt VALUES('g a c e b b', 'd b f b g g');   -- 6
   368    368     INSERT INTO tt VALUES('f d a a f c', 'e e a d c f');   -- 7
................................................................................
   428    428       SELECT rowid, mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH $expr
   429    429     } $r2
   430    430   
   431    431     do_execsql_test 11.1.$tn.2  {
   432    432       SELECT rowid, mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH $expr
   433    433     } $r2
   434    434   }
   435         -set sqlite_fts3_enable_parentheses 0
   436    435   
   437    436   #---------------------------------------------------------------------------
   438    437   # Test the 'b' matchinfo flag
   439    438   #
   440         -set sqlite_fts3_enable_parentheses 1
   441    439   reset_db
          440  +sqlite3_fts5_register_matchinfo db
   442    441   db func mit mit
   443    442   
   444    443   do_test 12.0 {
   445    444     set cols [list]
   446    445     for {set i 0} {$i < 50} {incr i} { lappend cols "c$i" }
   447         -  execsql "CREATE VIRTUAL TABLE tt USING fts3([join $cols ,])"
          446  +  execsql "CREATE VIRTUAL TABLE tt USING fts5([join $cols ,])"
   448    447   } {}
   449    448   
   450    449   do_execsql_test 12.1 {
   451    450     INSERT INTO tt (rowid, c4, c45) VALUES(1, 'abc', 'abc');
   452    451     SELECT mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH 'abc';
   453    452   } [list [list [expr 1<<4] [expr 1<<(45-32)]]]
   454    453   
   455         -set sqlite_fts3_enable_parentheses 0
   456    454   finish_test
   457    455   

Added ext/fts5/test/fts5synonym.test.

            1  +# 2014 Dec 20
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#***********************************************************************
           11  +#
           12  +# Tests focusing on custom tokenizers that support synonyms.
           13  +#
           14  +
           15  +source [file join [file dirname [info script]] fts5_common.tcl]
           16  +set testprefix fts5synonym
           17  +
           18  +# If SQLITE_ENABLE_FTS5 is defined, omit this file.
           19  +ifcapable !fts5 {
           20  +  finish_test
           21  +  return
           22  +}
           23  +
           24  +foreach S {
           25  +  {zero 0}
           26  +  {one 1 i}
           27  +  {two 2 ii}
           28  +  {three 3 iii}
           29  +  {four 4 iv}
           30  +  {five 5 v}
           31  +  {six 6 vi}
           32  +  {seven 7 vii}
           33  +  {eight 8 viii}
           34  +  {nine 9 ix}
           35  +} {
           36  +  foreach s $S {
           37  +    set o [list]
           38  +    foreach x $S {if {$x!=$s} {lappend o $x}}
           39  +    set ::syn($s) $o
           40  +  }
           41  +}
           42  +
           43  +proc tcl_tokenize {tflags text} {
           44  +  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
           45  +    sqlite3_fts5_token $w $iStart $iEnd
           46  +  }
           47  +}
           48  +
           49  +proc tcl_create {args} {
           50  +  return "tcl_tokenize"
           51  +}
           52  +
           53  +sqlite3_fts5_create_tokenizer db tcl tcl_create
           54  +
           55  +#-------------------------------------------------------------------------
           56  +# Warm body test for the code in fts5_tcl.c.
           57  +#
           58  +do_execsql_test 1.0 {
           59  +  CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl);
           60  +  INSERT INTO ft VALUES('abc def ghi');
           61  +  INSERT INTO ft VALUES('jkl mno pqr');
           62  +  SELECT rowid, x FROM ft WHERE ft MATCH 'def';
           63  +  SELECT x, rowid FROM ft WHERE ft MATCH 'pqr';
           64  +} {1 {abc def ghi} {jkl mno pqr} 2}
           65  +
           66  +#-------------------------------------------------------------------------
           67  +# Test a tokenizer that supports synonyms by adding extra entries to the
           68  +# FTS index.
           69  +#
           70  +
           71  +proc tcl_tokenize {tflags text} {
           72  +  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
           73  +    sqlite3_fts5_token $w $iStart $iEnd
           74  +    if {$tflags=="document" && [info exists ::syn($w)]} {
           75  +      foreach s $::syn($w) {
           76  +        sqlite3_fts5_token -colo $s $iStart $iEnd
           77  +      }
           78  +    }
           79  +  }
           80  +}
           81  +reset_db
           82  +sqlite3_fts5_create_tokenizer db tcl tcl_create
           83  +
           84  +do_execsql_test 2.0 {
           85  +  CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl);
           86  +  INSERT INTO ft VALUES('one two three');
           87  +  INSERT INTO ft VALUES('four five six');
           88  +  INSERT INTO ft VALUES('eight nine ten');
           89  +} {}
           90  +
           91  +foreach {tn expr res} {
           92  +  1 "3" 1
           93  +  2 "eight OR 8 OR 5" {2 3}
           94  +  3 "10" {}
           95  +  4 "1*" {1}
           96  +  5 "1 + 2" {1}
           97  +} {
           98  +  do_execsql_test 2.1.$tn {
           99  +    SELECT rowid FROM ft WHERE ft MATCH $expr
          100  +  } $res
          101  +}
          102  +
          103  +#-------------------------------------------------------------------------
          104  +# Test some broken tokenizers:
          105  +#
          106  +#   3.1.*: A tokenizer that declares the very first token to be colocated.
          107  +#
          108  +#   3.2.*: A tokenizer that reports two identical tokens at the same position.
          109  +#          This is allowed.
          110  +#
          111  +reset_db
          112  +sqlite3_fts5_create_tokenizer db tcl tcl_create
          113  +proc tcl_tokenize {tflags text} {
          114  +  set bColo 1
          115  +  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
          116  +    if {$bColo} {
          117  +      sqlite3_fts5_token -colo $w $iStart $iEnd
          118  +      set bColo 0
          119  +    } {
          120  +      sqlite3_fts5_token $w $iStart $iEnd
          121  +    }
          122  +  }
          123  +}
          124  +do_execsql_test 3.1.0 {
          125  +  CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl);
          126  +  INSERT INTO ft VALUES('one two three');
          127  +  CREATE VIRTUAL TABLE vv USING fts5vocab(ft, row);
          128  +  SELECT * FROM vv;
          129  +} {
          130  +  one 1 1   three 1 1   two 1 1
          131  +}
          132  +
          133  +do_execsql_test 3.1.1 {
          134  +  INSERT INTO ft(ft) VALUES('integrity-check');
          135  +} {}
          136  +
          137  +proc tcl_tokenize {tflags text} {
          138  +  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
          139  +    sqlite3_fts5_token $w $iStart $iEnd
          140  +  }
          141  +}
          142  +
          143  +do_execsql_test 3.1.2 {
          144  +  SELECT rowid FROM ft WHERE ft MATCH 'one two three'
          145  +} {1}
          146  +
          147  +reset_db
          148  +sqlite3_fts5_create_tokenizer db tcl tcl_create
          149  +proc tcl_tokenize {tflags text} {
          150  +  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
          151  +    sqlite3_fts5_token $w $iStart $iEnd
          152  +    sqlite3_fts5_token -colo $w $iStart $iEnd
          153  +  }
          154  +}
          155  +do_execsql_test 3.2.0 {
          156  +  CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl);
          157  +  INSERT INTO ft VALUES('one one two three');
          158  +  CREATE VIRTUAL TABLE vv USING fts5vocab(ft, row);
          159  +  SELECT * FROM vv;
          160  +} {
          161  +  one 1 4   three 1 2   two 1 2
          162  +}
          163  +do_execsql_test 3.2.1 {
          164  +  SELECT rowid FROM ft WHERE ft MATCH 'one';
          165  +} {1}
          166  +do_execsql_test 3.2.2 {
          167  +  SELECT rowid FROM ft WHERE ft MATCH 'one two three';
          168  +} {1}
          169  +do_execsql_test 3.2.3 {
          170  +  SELECT rowid FROM ft WHERE ft MATCH 'one + one + two + three';
          171  +} {1}
          172  +do_execsql_test 3.2.4 {
          173  +  SELECT rowid FROM ft WHERE ft MATCH 'one two two three';
          174  +} {1}
          175  +do_execsql_test 3.2.5 {
          176  +  SELECT rowid FROM ft WHERE ft MATCH 'one + two + two + three';
          177  +} {}
          178  +
          179  +#-------------------------------------------------------------------------
          180  +# Check that expressions with synonyms can be parsed and executed.
          181  +#
          182  +reset_db
          183  +sqlite3_fts5_create_tokenizer db tcl tcl_create
          184  +proc tcl_tokenize {tflags text} {
          185  +  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
          186  +    sqlite3_fts5_token $w $iStart $iEnd
          187  +    if {$tflags=="query" && [info exists ::syn($w)]} {
          188  +      foreach s $::syn($w) {
          189  +        sqlite3_fts5_token -colo $s $iStart $iEnd
          190  +      }
          191  +    }
          192  +  }
          193  +}
          194  +
          195  +foreach {tn expr res} {
          196  +  1  {abc}                           {"abc"}
          197  +  2  {one}                           {"one"|"i"|"1"}
          198  +  3  {3}                             {"3"|"iii"|"three"}
          199  +  4  {3*}                            {"3"|"iii"|"three" *}
          200  +} {
          201  +  do_execsql_test 4.1.$tn {SELECT fts5_expr($expr, 'tokenize=tcl')} [list $res]
          202  +}
          203  +
          204  +do_execsql_test 4.2.1 {
          205  +  CREATE VIRTUAL TABLE xx USING fts5(x, tokenize=tcl);
          206  +  INSERT INTO xx VALUES('one two');
          207  +  INSERT INTO xx VALUES('three four');
          208  +}
          209  +
          210  +do_execsql_test 4.2.2 {
          211  +  SELECT rowid FROM xx WHERE xx MATCH '2'
          212  +} {1}
          213  +
          214  +do_execsql_test 4.2.3 {
          215  +  SELECT rowid FROM xx WHERE xx MATCH '3'
          216  +} {2}
          217  +
          218  +do_test 5.0 {
          219  +  execsql { 
          220  +    CREATE VIRTUAL TABLE t1 USING fts5(a, b, tokenize=tcl)
          221  +  }
          222  +  foreach {rowid a b} {
          223  +    1 {four v 4 i three} {1 3 five five 4 one}
          224  +    2 {5 1 3 4 i} {2 2 v two 4}
          225  +    3 {5 i 5 2 four 4 1} {iii ii five two 1}
          226  +    4 {ii four 4 one 5 three five} {one 5 1 iii 4 3}
          227  +    5 {three i v i four 4 1} {ii five five five iii}
          228  +    6 {4 2 ii two 2 iii} {three 1 four 4 iv 1 iv}
          229  +    7 {ii ii two three 2 5} {iii i ii iii iii one one}
          230  +    8 {2 ii i two 3 three 2} {two iv v iii 3 five}
          231  +    9 {i 2 iv 3 five four v} {iii 4 three i three ii 1}
          232  +  } {
          233  +    execsql { INSERT INTO t1(rowid, a, b) VALUES($rowid, $a, $b) }
          234  +  }
          235  +} {}
          236  +
          237  +
          238  +foreach {tn q res} {
          239  +  1 {one} {
          240  +    1 {four v 4 [i] three} {[1] 3 five five 4 [one]}
          241  +    2 {5 [1] 3 4 [i]} {2 2 v two 4}
          242  +    3 {5 [i] 5 2 four 4 [1]} {iii ii five two [1]}
          243  +    4 {ii four 4 [one] 5 three five} {[one] 5 [1] iii 4 3}
          244  +    5 {three [i] v [i] four 4 [1]} {ii five five five iii}
          245  +    6 {4 2 ii two 2 iii} {three [1] four 4 iv [1] iv}
          246  +    7 {ii ii two three 2 5} {iii [i] ii iii iii [one] [one]}
          247  +    8 {2 ii [i] two 3 three 2} {two iv v iii 3 five}
          248  +    9 {[i] 2 iv 3 five four v} {iii 4 three [i] three ii [1]}
          249  +  }
          250  +  2 {five four} {
          251  +    1 {[four] [v] [4] i three} {1 3 [five] [five] [4] one}
          252  +    2 {[5] 1 3 [4] i} {2 2 [v] two [4]}
          253  +    3 {[5] i [5] 2 [four] [4] 1} {iii ii [five] two 1}
          254  +    4 {ii [four] [4] one [5] three [five]} {one [5] 1 iii [4] 3}
          255  +    5 {three i [v] i [four] [4] 1} {ii [five] [five] [five] iii}
          256  +    8 {2 ii i two 3 three 2} {two [iv] [v] iii 3 [five]}
          257  +    9 {i 2 [iv] 3 [five] [four] [v]} {iii [4] three i three ii 1}
          258  +  }
          259  +  3 {one OR two OR iii OR 4 OR v} {
          260  +    1 {[four] [v] [4] [i] [three]} {[1] [3] [five] [five] [4] [one]}
          261  +    2 {[5] [1] [3] [4] [i]} {[2] [2] [v] [two] [4]}
          262  +    3 {[5] [i] [5] [2] [four] [4] [1]} {[iii] [ii] [five] [two] [1]}
          263  +    4 {[ii] [four] [4] [one] [5] [three] [five]} {[one] [5] [1] [iii] [4] [3]}
          264  +    5 {[three] [i] [v] [i] [four] [4] [1]} {[ii] [five] [five] [five] [iii]}
          265  +    6 {[4] [2] [ii] [two] [2] [iii]} {[three] [1] [four] [4] [iv] [1] [iv]}
          266  +    7 {[ii] [ii] [two] [three] [2] [5]} {[iii] [i] [ii] [iii] [iii] [one] [one]}
          267  +    8 {[2] [ii] [i] [two] [3] [three] [2]} {[two] [iv] [v] [iii] [3] [five]}
          268  +    9 {[i] [2] [iv] [3] [five] [four] [v]} {[iii] [4] [three] [i] [three] [ii] [1]}
          269  +  }
          270  +
          271  +  4 {5 + 1} {
          272  +    2 {[5 1] 3 4 i} {2 2 v two 4} 
          273  +    3 {[5 i] 5 2 four 4 1} {iii ii five two 1} 
          274  +    4 {ii four 4 one 5 three five} {one [5 1] iii 4 3} 
          275  +    5 {three i [v i] four 4 1} {ii five five five iii}
          276  +  }
          277  +
          278  +  5 {one + two + three} {
          279  +    7 {ii ii two three 2 5} {iii [i ii iii] iii one one}
          280  +    8 {2 ii [i two 3] three 2} {two iv v iii 3 five}
          281  +  }
          282  +
          283  +  6 {"v v"} {
          284  +    1 {four v 4 i three} {1 3 [five five] 4 one}
          285  +    5 {three i v i four 4 1} {ii [five five five] iii}
          286  +  }
          287  +} {
          288  +  do_execsql_test 5.1.$tn {
          289  +    SELECT rowid, highlight(t1, 0, '[', ']'), highlight(t1, 1, '[', ']')
          290  +    FROM t1 WHERE t1 MATCH $q
          291  +  } $res
          292  +}
          293  +
          294  +# Test that the xQueryPhrase() API works with synonyms.
          295  +#
          296  +proc mit {blob} {
          297  +  set scan(littleEndian) i*
          298  +  set scan(bigEndian) I*
          299  +  binary scan $blob $scan($::tcl_platform(byteOrder)) r
          300  +  return $r
          301  +}
          302  +db func mit mit
          303  +sqlite3_fts5_register_matchinfo db
          304  +
          305  +foreach {tn q res} {
          306  +  1 {one} {
          307  +      1 {1 11 7 2 12 6}     2 {2 11 7 0 12 6} 
          308  +      3 {2 11 7 1 12 6}     4 {1 11 7 2 12 6} 
          309  +      5 {3 11 7 0 12 6}     6 {0 11 7 2 12 6} 
          310  +      7 {0 11 7 3 12 6}     8 {1 11 7 0 12 6} 
          311  +      9 {1 11 7 2 12 6}
          312  +  }
          313  +} {
          314  +  do_execsql_test 5.2.$tn {
          315  +    SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH $q
          316  +  } $res
          317  +}
          318  +
          319  +
          320  +#-------------------------------------------------------------------------
          321  +# Test terms with more than 4 synonyms.
          322  +#
          323  +reset_db
          324  +sqlite3_fts5_create_tokenizer db tcl tcl_create
          325  +proc tcl_tokenize {tflags text} {
          326  +  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
          327  +    sqlite3_fts5_token $w $iStart $iEnd
          328  +    if {$tflags=="query" && [string length $w]==1} {
          329  +      for {set i 2} {$i<=10} {incr i} {
          330  +        sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd
          331  +      }
          332  +    }
          333  +  }
          334  +}
          335  +
          336  +do_execsql_test 6.0.1 {
          337  +  CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize=tcl);
          338  +  INSERT INTO t1 VALUES('yy xx qq');
          339  +  INSERT INTO t1 VALUES('yy xx xx');
          340  +}
          341  +do_execsql_test 6.0.2 {
          342  +  SELECT * FROM t1 WHERE t1 MATCH 'NEAR(y q)';
          343  +} {{yy xx qq}}
          344  +
          345  +do_test 6.0.3 {
          346  +  execsql { 
          347  +    CREATE VIRTUAL TABLE t2 USING fts5(a, b, tokenize=tcl)
          348  +  }
          349  +  foreach {rowid a b} {
          350  +    1 {yyyy vvvvv qq oo yyyyyy vvvv eee} {ffff uu r qq aaaa}
          351  +    2 {ww oooooo bbbbb ssssss mm} {ffffff yy iiii rr s ccc qqqqq}
          352  +    3 {zzzz llll gggggg cccc uu} {hhhhhh aaaa ppppp rr ee jjjj}
          353  +    4 {r f i rrrrrr ww hhh} {aa yyy t x aaaaa ii}
          354  +    5 {fffff mm vvvv ooo ffffff kkkk tttt} {cccccc bb e zzz d n}
          355  +    6 {iii dddd hh qqqq ddd ooo} {ttt d c b aaaaaa qqqq}
          356  +    7 {jjjj rrrr v zzzzz u tt t} {ppppp pp dddd mm hhh uuu}
          357  +    8 {gggg rrrrrr kkkk vvvv gggg jjjjjj b} {dddddd jj r w cccc wwwwww ss}
          358  +    9 {kkkkk qqq oooo e tttttt mmm} {e ss qqqqqq hhhh llllll gg}
          359  +  } {
          360  +    execsql { INSERT INTO t2(rowid, a, b) VALUES($rowid, $a, $b) }
          361  +  }
          362  +} {}
          363  +
          364  +foreach {tn q res} {
          365  +  1 {a} {
          366  +    1 {yyyy vvvvv qq oo yyyyyy vvvv eee} {ffff uu r qq [aaaa]}
          367  +    3 {zzzz llll gggggg cccc uu} {hhhhhh [aaaa] ppppp rr ee jjjj}
          368  +    4 {r f i rrrrrr ww hhh} {[aa] yyy t x [aaaaa] ii}
          369  +    6 {iii dddd hh qqqq ddd ooo} {ttt d c b [aaaaaa] qqqq}
          370  +  }
          371  +
          372  +  2 {a AND q} {
          373  +    1 {yyyy vvvvv [qq] oo yyyyyy vvvv eee} {ffff uu r [qq] [aaaa]}
          374  +    6 {iii dddd hh [qqqq] ddd ooo} {ttt d c b [aaaaaa] [qqqq]}
          375  +  }
          376  +
          377  +  3 {o OR (q AND a)} {
          378  +    1 {yyyy vvvvv [qq] [oo] yyyyyy vvvv eee} {ffff uu r [qq] [aaaa]}
          379  +    2 {ww [oooooo] bbbbb ssssss mm} {ffffff yy iiii rr s ccc qqqqq}
          380  +    5 {fffff mm vvvv [ooo] ffffff kkkk tttt} {cccccc bb e zzz d n}
          381  +    6 {iii dddd hh [qqqq] ddd [ooo]} {ttt d c b [aaaaaa] [qqqq]}
          382  +    9 {kkkkk qqq [oooo] e tttttt mmm} {e ss qqqqqq hhhh llllll gg}
          383  +  }
          384  +
          385  +  4 {NEAR(q y, 20)} {
          386  +    1 {[yyyy] vvvvv [qq] oo [yyyyyy] vvvv eee} {ffff uu r qq aaaa}
          387  +    2 {ww oooooo bbbbb ssssss mm} {ffffff [yy] iiii rr s ccc [qqqqq]}
          388  +  }
          389  +} {
          390  +  do_execsql_test 6.1.$tn.asc {
          391  +    SELECT rowid, highlight(t2, 0, '[', ']'), highlight(t2, 1, '[', ']')
          392  +    FROM t2 WHERE t2 MATCH $q
          393  +  } $res
          394  +
          395  +  set res2 [list]
          396  +  foreach {rowid a b} $res {
          397  +    set res2 [concat [list $rowid $a $b] $res2]
          398  +  }
          399  +
          400  +  do_execsql_test 6.1.$tn.desc {
          401  +    SELECT rowid, highlight(t2, 0, '[', ']'), highlight(t2, 1, '[', ']')
          402  +    FROM t2 WHERE t2 MATCH $q ORDER BY rowid DESC
          403  +  } $res2
          404  +}
          405  +
          406  +do_execsql_test 6.2.1 {
          407  +  INSERT INTO t2(rowid, a, b) VALUES(13,
          408  +      'x xx xxx xxxx xxxxx xxxxxx xxxxxxx', 'y yy yyy yyyy yyyyy yyyyyy yyyyyyy'
          409  +  );
          410  +  SELECT rowid, highlight(t2, 0, '<', '>'), highlight(t2, 1, '(', ')')
          411  +  FROM t2 WHERE t2 MATCH 'x OR y'
          412  +} {
          413  +  1 {<yyyy> vvvvv qq oo <yyyyyy> vvvv eee} {ffff uu r qq aaaa}
          414  +  2 {ww oooooo bbbbb ssssss mm} {ffffff (yy) iiii rr s ccc qqqqq}
          415  +  4 {r f i rrrrrr ww hhh} {aa (yyy) t (x) aaaaa ii}
          416  +  13 {<x> <xx> <xxx> <xxxx> <xxxxx> <xxxxxx> <xxxxxxx>}
          417  +     {(y) (yy) (yyy) (yyyy) (yyyyy) (yyyyyy) (yyyyyyy)}
          418  +}
          419  +
          420  +#-------------------------------------------------------------------------
          421  +# Test that the xColumnSize() API is not confused by colocated tokens.
          422  +#
          423  +reset_db
          424  +sqlite3_fts5_create_tokenizer db tcl tcl_create
          425  +fts5_aux_test_functions db
          426  +proc tcl_tokenize {tflags text} {
          427  +  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
          428  +    sqlite3_fts5_token $w $iStart $iEnd
          429  +    if {[string length $w]==1} {
          430  +      for {set i 2} {$i<=10} {incr i} {
          431  +        sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd
          432  +      }
          433  +    }
          434  +  }
          435  +}
          436  +
          437  +do_execsql_test 7.0.1 {
          438  +  CREATE VIRTUAL TABLE t1 USING fts5(a, b, columnsize=1, tokenize=tcl);
          439  +  INSERT INTO t1 VALUES('0 2 3', '4 5 6 7');
          440  +  INSERT INTO t1 VALUES('8 9', '0 0 0 0 0 0 0 0 0 0');
          441  +  SELECT fts5_test_columnsize(t1) FROM t1 WHERE t1 MATCH '000 AND 00 AND 0';
          442  +} {{3 4} {2 10}}
          443  +
          444  +do_execsql_test 7.0.2 {
          445  +  INSERT INTO t1(t1) VALUES('integrity-check');
          446  +}
          447  +
          448  +do_execsql_test 7.1.1 {
          449  +  CREATE VIRTUAL TABLE t2 USING fts5(a, b, columnsize=0, tokenize=tcl);
          450  +  INSERT INTO t2 VALUES('0 2 3', '4 5 6 7');
          451  +  INSERT INTO t2 VALUES('8 9', '0 0 0 0 0 0 0 0 0 0');
          452  +  SELECT fts5_test_columnsize(t2) FROM t2 WHERE t2 MATCH '000 AND 00 AND 0';
          453  +} {{3 4} {2 10}}
          454  +
          455  +do_execsql_test 7.1.2 {
          456  +  INSERT INTO t2(t2) VALUES('integrity-check');
          457  +}
          458  +
          459  +finish_test
          460  +

Changes to main.mk.

    43     43   ################################################################################
    44     44   
    45     45   # This is how we compile
    46     46   #
    47     47   TCCX =  $(TCC) $(OPTS) -I. -I$(TOP)/src -I$(TOP) 
    48     48   TCCX += -I$(TOP)/ext/rtree -I$(TOP)/ext/icu -I$(TOP)/ext/fts3
    49     49   TCCX += -I$(TOP)/ext/async -I$(TOP)/ext/userauth
           50  +TCCX += -I$(TOP)/ext/fts5
    50     51   
    51     52   # Object files for the SQLite library.
    52     53   #
    53     54   LIBOBJ+= vdbe.o parse.o \
    54     55            alter.o analyze.o attach.o auth.o \
    55     56            backup.o bitvec.o btmutex.o btree.o build.o \
    56     57            callback.o complete.o ctime.o date.o dbstat.o delete.o expr.o fault.o fkey.o \
................................................................................
   225    226     $(TOP)/ext/userauth/userauth.c \
   226    227     $(TOP)/ext/userauth/sqlite3userauth.h 
   227    228   
   228    229   SRC += \
   229    230     $(TOP)/ext/rbu/sqlite3rbu.c \
   230    231     $(TOP)/ext/rbu/sqlite3rbu.h
   231    232   
          233  +
          234  +# FTS5 things
          235  +#
          236  +FTS5_HDR = \
          237  +   $(TOP)/ext/fts5/fts5.h \
          238  +   $(TOP)/ext/fts5/fts5Int.h \
          239  +   fts5parse.h
          240  +	   
          241  +FTS5_SRC = \
          242  +   $(TOP)/ext/fts5/fts5_aux.c \
          243  +   $(TOP)/ext/fts5/fts5_buffer.c \
          244  +   $(TOP)/ext/fts5/fts5_main.c \
          245  +   $(TOP)/ext/fts5/fts5_config.c \
          246  +   $(TOP)/ext/fts5/fts5_expr.c \
          247  +   $(TOP)/ext/fts5/fts5_hash.c \
          248  +   $(TOP)/ext/fts5/fts5_index.c \
          249  +   fts5parse.c \
          250  +   $(TOP)/ext/fts5/fts5_storage.c \
          251  +   $(TOP)/ext/fts5/fts5_tokenize.c \
          252  +   $(TOP)/ext/fts5/fts5_unicode2.c \
          253  +   $(TOP)/ext/fts5/fts5_varint.c \
          254  +   $(TOP)/ext/fts5/fts5_vocab.c  \
          255  +
   232    256   
   233    257   # Generated source code files
   234    258   #
   235    259   SRC += \
   236    260     keywordhash.h \
   237    261     opcodes.c \
   238    262     opcodes.h \
................................................................................
   632    656   
   633    657   fts3_write.o:	$(TOP)/ext/fts3/fts3_write.c $(HDR) $(EXTHDR)
   634    658   	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_write.c
   635    659   
   636    660   rtree.o:	$(TOP)/ext/rtree/rtree.c $(HDR) $(EXTHDR)
   637    661   	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/rtree/rtree.c
   638    662   
   639         -# FTS5 things
   640         -#
   641         -FTS5_SRC = \
   642         -   $(TOP)/ext/fts5/fts5.h \
   643         -   $(TOP)/ext/fts5/fts5Int.h \
   644         -   $(TOP)/ext/fts5/fts5_aux.c \
   645         -   $(TOP)/ext/fts5/fts5_buffer.c \
   646         -   $(TOP)/ext/fts5/fts5_main.c \
   647         -   $(TOP)/ext/fts5/fts5_config.c \
   648         -   $(TOP)/ext/fts5/fts5_expr.c \
   649         -   $(TOP)/ext/fts5/fts5_hash.c \
   650         -   $(TOP)/ext/fts5/fts5_index.c \
   651         -   fts5parse.c fts5parse.h \
   652         -   $(TOP)/ext/fts5/fts5_storage.c \
   653         -   $(TOP)/ext/fts5/fts5_tokenize.c \
   654         -   $(TOP)/ext/fts5/fts5_unicode2.c \
   655         -   $(TOP)/ext/fts5/fts5_varint.c \
   656         -   $(TOP)/ext/fts5/fts5_vocab.c  \
   657         -
   658    663   fts5parse.c:	$(TOP)/ext/fts5/fts5parse.y lemon 
   659    664   	cp $(TOP)/ext/fts5/fts5parse.y .
   660    665   	rm -f fts5parse.h
   661    666   	./lemon $(OPTS) fts5parse.y
   662    667   
   663    668   fts5parse.h: fts5parse.c
   664    669   
   665         -fts5.c: $(FTS5_SRC)
          670  +fts5.c: $(FTS5_SRC) $(FTS5_HDR)
   666    671   	tclsh $(TOP)/ext/fts5/tool/mkfts5c.tcl
   667    672   	cp $(TOP)/ext/fts5/fts5.h .
   668         -
   669    673   
   670    674   userauth.o:	$(TOP)/ext/userauth/userauth.c $(HDR) $(EXTHDR)
   671    675   	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/userauth/userauth.c
   672    676   
   673    677   sqlite3rbu.o:	$(TOP)/ext/rbu/sqlite3rbu.c $(HDR) $(EXTHDR)
   674    678   	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/rbu/sqlite3rbu.c
   675    679