SQLite

Check-in [0b7e4ab8ab]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Modify the fts5 custom tokenizer interface to permit synonym support. The fts5_api.iVersion value is now set to 2. Existing fts5 custom tokenizers (if there are such things) will need to be updated to use the new api version.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 0b7e4ab8abde3ae32459233df115c433dd58d2c1
User & Date: dan 2015-09-04 10:31:51.624
Context
2015-09-04
11:13
Enhance showfts5.tcl so that it can optionally display the number of terms in each segment. (check-in: d648ddd93d user: dan tags: trunk)
10:31
Modify the fts5 custom tokenizer interface to permit synonym support. The fts5_api.iVersion value is now set to 2. Existing fts5 custom tokenizers (if there are such things) will need to be updated to use the new api version. (check-in: 0b7e4ab8ab user: dan tags: trunk)
10:24
Merge latest trunk changes. (Closed-Leaf check-in: 443a5eb8e1 user: dan tags: fts5-incompatible)
04:31
Simplification of the LRU list handling in pcache1. (check-in: 05a3a2cd14 user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts5/extract_api_docs.tcl.
104
105
106
107
108
109
110



111
112
113
114
115
116
117
118
119
120
121
122
123
124
  set res "<dl>\n"
  foreach line [split [string trim $docs] "\n"] {
    regexp {[*][*](.*)} $line -> line
    if {[regexp {^ ?x.*:} $line]} {
      append res "<dt><b>$line</b></dt><dd><p style=margin-top:0>\n"
      continue
    }



    if {[string trim $line] == ""} {
      append res "<p>\n"
    } else {
      append res "$line\n"
    }
  }
  append res "</dl>\n"

  set res
}

proc get_api_docs {data} {
  # Initialize global array M as a map from Fts5StructureApi member name
  # to member definition. i.e.







>
>
>






<







104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119

120
121
122
123
124
125
126
  set res "<dl>\n"
  foreach line [split [string trim $docs] "\n"] {
    regexp {[*][*](.*)} $line -> line
    if {[regexp {^ ?x.*:} $line]} {
      append res "<dt><b>$line</b></dt><dd><p style=margin-top:0>\n"
      continue
    }
    if {[regexp {SYNONYM SUPPORT} $line]} {
      set line "</dl><h3>Synonym Support</h3>"
    }
    if {[string trim $line] == ""} {
      append res "<p>\n"
    } else {
      append res "$line\n"
    }
  }


  set res
}

proc get_api_docs {data} {
  # Initialize global array M as a map from Fts5StructureApi member name
  # to member definition. i.e.
204
205
206
207
208
209
210




211
212
213
214
215
216
217
  switch $::extract_api_docs_mode {
    fts5_api {
      output [get_fts5_struct $data "typedef struct fts5_api" "^\};"]
    }

    fts5_tokenizer {
      output [get_fts5_struct $data "typedef struct Fts5Tokenizer" "^\};"]




    }

    fts5_extension {
      output [get_fts5_struct $data "typedef.*Fts5ExtensionApi" "^.;"]
    }

    Fts5ExtensionApi {







>
>
>
>







206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
  switch $::extract_api_docs_mode {
    fts5_api {
      output [get_fts5_struct $data "typedef struct fts5_api" "^\};"]
    }

    fts5_tokenizer {
      output [get_fts5_struct $data "typedef struct Fts5Tokenizer" "^\};"]
      output [get_fts5_struct $data \
        "Flags that may be passed as the third argument to xTokenize()" \
        "#define FTS5_TOKEN_COLOCATED"
      ]
    }

    fts5_extension {
      output [get_fts5_struct $data "typedef.*Fts5ExtensionApi" "^.;"]
    }

    Fts5ExtensionApi {
Changes to ext/fts5/fts5.h.
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
  int (*xColumnCount)(Fts5Context*);
  int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow);
  int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken);

  int (*xTokenize)(Fts5Context*, 
    const char *pText, int nText, /* Text to tokenize */
    void *pCtx,                   /* Context passed to xToken() */
    int (*xToken)(void*, const char*, int, int, int)       /* Callback */
  );

  int (*xPhraseCount)(Fts5Context*);
  int (*xPhraseSize)(Fts5Context*, int iPhrase);

  int (*xInstCount)(Fts5Context*, int *pnInst);
  int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff);







|







213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
  int (*xColumnCount)(Fts5Context*);
  int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow);
  int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken);

  int (*xTokenize)(Fts5Context*, 
    const char *pText, int nText, /* Text to tokenize */
    void *pCtx,                   /* Context passed to xToken() */
    int (*xToken)(void*, int, const char*, int, int, int)       /* Callback */
  );

  int (*xPhraseCount)(Fts5Context*);
  int (*xPhraseSize)(Fts5Context*, int iPhrase);

  int (*xInstCount)(Fts5Context*, int *pnInst);
  int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff);
274
275
276
277
278
279
280
281
282
283
284
























285
286
287
288
289
290
291




292
293
294
295
296
297
298
299
300
301
302
303










































































































304
305
306
307
308
309
310
311

312
313
314

315
316
317
318
319
320
321
322










323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
** xDelete:
**   This function is invoked to delete a tokenizer handle previously
**   allocated using xCreate(). Fts5 guarantees that this function will
**   be invoked exactly once for each successful call to xCreate().
**
** xTokenize:
**   This function is expected to tokenize the nText byte string indicated 
**   by argument pText. pText may not be nul-terminated. The first argument
**   passed to this function is a pointer to an Fts5Tokenizer object returned 
**   by an earlier call to xCreate().
**
























**   For each token in the input string, the supplied callback xToken() must
**   be invoked. The first argument to it should be a copy of the pointer
**   passed as the second argument to xTokenize(). The next two arguments
**   are a pointer to a buffer containing the token text, and the size of
**   the token in bytes. The 4th and 5th arguments are the byte offsets of
**   the first byte of and first byte immediately following the text from 
**   which the token is derived within the input.




**
**   FTS5 assumes the xToken() callback is invoked for each token in the 
**   order that they occur within the input text.
**
**   If an xToken() callback returns any value other than SQLITE_OK, then
**   the tokenization should be abandoned and the xTokenize() method should
**   immediately return a copy of the xToken() return value. Or, if the
**   input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally,
**   if an error occurs with the xTokenize() implementation itself, it
**   may abandon the tokenization and return any error code other than
**   SQLITE_OK or SQLITE_DONE.
**










































































































*/
typedef struct Fts5Tokenizer Fts5Tokenizer;
typedef struct fts5_tokenizer fts5_tokenizer;
struct fts5_tokenizer {
  int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut);
  void (*xDelete)(Fts5Tokenizer*);
  int (*xTokenize)(Fts5Tokenizer*, 
      void *pCtx,

      const char *pText, int nText, 
      int (*xToken)(
        void *pCtx,         /* Copy of 2nd argument to xTokenize() */

        const char *pToken, /* Pointer to buffer containing token */
        int nToken,         /* Size of token in bytes */
        int iStart,         /* Byte offset of token within input text */
        int iEnd            /* Byte offset of end of token within input text */
      )
  );
};











/*
** END OF CUSTOM TOKENIZERS
*************************************************************************/

/*************************************************************************
** FTS5 EXTENSION REGISTRATION API
*/
typedef struct fts5_api fts5_api;
struct fts5_api {
  int iVersion;                   /* Currently always set to 1 */

  /* Create a new tokenizer */
  int (*xCreateTokenizer)(
    fts5_api *pApi,
    const char *zName,
    void *pContext,
    fts5_tokenizer *pTokenizer,







|
|
|

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


|
|
|
|

>
>
>
>












>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>








>



>








>
>
>
>
>
>
>
>
>
>









|







274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
** xDelete:
**   This function is invoked to delete a tokenizer handle previously
**   allocated using xCreate(). Fts5 guarantees that this function will
**   be invoked exactly once for each successful call to xCreate().
**
** xTokenize:
**   This function is expected to tokenize the nText byte string indicated 
**   by argument pText. pText may or may not be nul-terminated. The first
**   argument passed to this function is a pointer to an Fts5Tokenizer object
**   returned by an earlier call to xCreate().
**
**   The second argument indicates the reason that FTS5 is requesting
**   tokenization of the supplied text. This is always one of the following
**   four values:
**
**   <ul><li> <b>FTS5_TOKENIZE_DOCUMENT</b> - A document is being inserted into
**            or removed from the FTS table. The tokenizer is being invoked to
**            determine the set of tokens to add to (or delete from) the
**            FTS index.
**
**       <li> <b>FTS5_TOKENIZE_QUERY</b> - A MATCH query is being executed 
**            against the FTS index. The tokenizer is being called to tokenize 
**            a bareword or quoted string specified as part of the query.
**
**       <li> <b>(FTS5_TOKENIZE_QUERY | FTS5_TOKENIZE_PREFIX)</b> - Same as
**            FTS5_TOKENIZE_QUERY, except that the bareword or quoted string is
**            followed by a "*" character, indicating that the last token
**            returned by the tokenizer will be treated as a token prefix.
**
**       <li> <b>FTS5_TOKENIZE_AUX</b> - The tokenizer is being invoked to 
**            satisfy an fts5_api.xTokenize() request made by an auxiliary
**            function. Or an fts5_api.xColumnSize() request made by the same
**            on a columnsize=0 database.  
**   </ul>
**
**   For each token in the input string, the supplied callback xToken() must
**   be invoked. The first argument to it should be a copy of the pointer
**   passed as the second argument to xTokenize(). The third and fourth
**   arguments are a pointer to a buffer containing the token text, and the
**   size of the token in bytes. The 4th and 5th arguments are the byte offsets
**   of the first byte of and first byte immediately following the text from
**   which the token is derived within the input.
**
**   The second argument passed to the xToken() callback ("tflags") should
**   normally be set to 0. The exception is if the tokenizer supports 
**   synonyms. In this case see the discussion below for details.
**
**   FTS5 assumes the xToken() callback is invoked for each token in the 
**   order that they occur within the input text.
**
**   If an xToken() callback returns any value other than SQLITE_OK, then
**   the tokenization should be abandoned and the xTokenize() method should
**   immediately return a copy of the xToken() return value. Or, if the
**   input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally,
**   if an error occurs with the xTokenize() implementation itself, it
**   may abandon the tokenization and return any error code other than
**   SQLITE_OK or SQLITE_DONE.
**
** SYNONYM SUPPORT
**
**   Custom tokenizers may also support synonyms. Consider a case in which a
**   user wishes to query for a phrase such as "first place". Using the 
**   built-in tokenizers, the FTS5 query 'first + place' will match instances
**   of "first place" within the document set, but not alternative forms
**   such as "1st place". In some applications, it would be better to match
**   all instances of "first place" or "1st place" regardless of which form
**   the user specified in the MATCH query text.
**
**   There are several ways to approach this in FTS5:
**
**   <ol><li> By mapping all synonyms to a single token. In this case, the 
**            In the above example, this means that the tokenizer returns the
**            same token for inputs "first" and "1st". Say that token is in
**            fact "first", so that when the user inserts the document "I won
**            1st place" entries are added to the index for tokens "i", "won",
**            "first" and "place". If the user then queries for '1st + place',
**            the tokenizer substitutes "first" for "1st" and the query works
**            as expected.
**
**       <li> By adding multiple synonyms for a single term to the FTS index.
**            In this case, when tokenizing query text, the tokenizer may 
**            provide multiple synonyms for a single term within the document.
**            FTS5 then queries the index for each synonym individually. For
**            example, faced with the query:
**
**   <codeblock>
**     ... MATCH 'first place'</codeblock>
**
**            the tokenizer offers both "1st" and "first" as synonyms for the
**            first token in the MATCH query and FTS5 effectively runs a query 
**            similar to:
**
**   <codeblock>
**     ... MATCH '(first OR 1st) place'</codeblock>
**
**            except that, for the purposes of auxiliary functions, the query
**            still appears to contain just two phrases - "(first OR 1st)" 
**            being treated as a single phrase.
**
**       <li> By adding multiple synonyms for a single term to the FTS index.
**            Using this method, when tokenizing document text, the tokenizer
**            provides multiple synonyms for each token. So that when a 
**            document such as "I won first place" is tokenized, entries are
**            added to the FTS index for "i", "won", "first", "1st" and
**            "place".
**
**            This way, even if the tokenizer does not provide synonyms
**            when tokenizing query text (it should not - to do would be
**            inefficient), it doesn't matter if the user queries for 
**            'first + place' or '1st + place', as there are entires in the
**            FTS index corresponding to both forms of the first token.
**   </ol>
**
**   Whether is is parsing document or query text, any call to xToken that
**   specifies a <i>tflags</i> argument with the FTS5_TOKEN_COLOCATED bit
**   is considered to supply a synonym for the previous token. For example,
**   when parsing the document "I won first place", a tokenizer that supports
**   synonyms would call xToken() 5 times, as follows:
**
**   <codeblock>
**       xToken(pCtx, 0, "i",                      1,  0,  1);
**       xToken(pCtx, 0, "won",                    3,  2,  5);
**       xToken(pCtx, 0, "first",                  5,  6, 11);
**       xToken(pCtx, FTS5_TOKEN_COLOCATED, "1st", 3,  6, 11);
**       xToken(pCtx, 0, "place",                  5, 12, 17);
**</codeblock>
**
**   It is an error to specify the FTS5_TOKEN_COLOCATED flag the first time
**   xToken() is called. Multiple synonyms may be specified for a single token
**   by making multiple calls to xToken(FTS5_TOKEN_COLOCATED) in sequence. 
**   There is no limit to the number of synonyms that may be provided for a
**   single token.
**
**   In many cases, method (1) above is the best approach. It does not add 
**   extra data to the FTS index or require FTS5 to query for multiple terms,
**   so it is efficient in terms of disk space and query speed. However, it
**   does not support prefix queries very well. If, as suggested above, the
**   token "first" is subsituted for "1st" by the tokenizer, then the query:
**
**   <codeblock>
**     ... MATCH '1s*'</codeblock>
**
**   will not match documents that contain the token "1st" (as the tokenizer
**   will probably not map "1s" to any prefix of "first").
**
**   For full prefix support, method (3) may be preferred. In this case, 
**   because the index contains entries for both "first" and "1st", prefix
**   queries such as 'fi*' or '1s*' will match correctly. However, because
**   extra entries are added to the FTS index, this method uses more space
**   within the database.
**
**   Method (2) offers a midpoint between (1) and (3). Using this method,
**   a query such as '1s*' will match documents that contain the literal 
**   token "1st", but not "first" (assuming the tokenizer is not able to
**   provide synonyms for prefixes). However, a non-prefix query like '1st'
**   will match against "1st" and "first". This method does not require
**   extra disk space, as no extra entries are added to the FTS index. 
**   On the other hand, it may require more CPU cycles to run MATCH queries,
**   as separate queries of the FTS index are required for each synonym.
**
**   When using methods (2) or (3), it is important that the tokenizer only
**   provide synonyms when tokenizing document text (method (2)) or query
**   text (method (3)), not both. Doing so will not cause any errors, but is
**   inefficient.
*/
typedef struct Fts5Tokenizer Fts5Tokenizer;
typedef struct fts5_tokenizer fts5_tokenizer;
struct fts5_tokenizer {
  int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut);
  void (*xDelete)(Fts5Tokenizer*);
  int (*xTokenize)(Fts5Tokenizer*, 
      void *pCtx,
      int flags,            /* Mask of FTS5_TOKENIZE_* flags */
      const char *pText, int nText, 
      int (*xToken)(
        void *pCtx,         /* Copy of 2nd argument to xTokenize() */
        int tflags,         /* Mask of FTS5_TOKEN_* flags */
        const char *pToken, /* Pointer to buffer containing token */
        int nToken,         /* Size of token in bytes */
        int iStart,         /* Byte offset of token within input text */
        int iEnd            /* Byte offset of end of token within input text */
      )
  );
};

/* Flags that may be passed as the third argument to xTokenize() */
#define FTS5_TOKENIZE_QUERY     0x0001
#define FTS5_TOKENIZE_PREFIX    0x0002
#define FTS5_TOKENIZE_DOCUMENT  0x0004
#define FTS5_TOKENIZE_AUX       0x0008

/* Flags that may be passed by the tokenizer implementation back to FTS5
** as the third argument to the supplied xToken callback. */
#define FTS5_TOKEN_COLOCATED    0x0001      /* Same position as prev. token */

/*
** END OF CUSTOM TOKENIZERS
*************************************************************************/

/*************************************************************************
** FTS5 EXTENSION REGISTRATION API
*/
typedef struct fts5_api fts5_api;
struct fts5_api {
  int iVersion;                   /* Currently always set to 2 */

  /* Create a new tokenizer */
  int (*xCreateTokenizer)(
    fts5_api *pApi,
    const char *zName,
    void *pContext,
    fts5_tokenizer *pTokenizer,
Changes to ext/fts5/fts5Int.h.
162
163
164
165
166
167
168

169
170
171
172
173
174
175
176
177
178
);
void sqlite3Fts5ConfigFree(Fts5Config*);

int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig);

int sqlite3Fts5Tokenize(
  Fts5Config *pConfig,            /* FTS5 Configuration object */

  const char *pText, int nText,   /* Text to tokenize */
  void *pCtx,                     /* Context passed to xToken() */
  int (*xToken)(void*, const char*, int, int, int)    /* Callback */
);

void sqlite3Fts5Dequote(char *z);

/* Load the contents of the %_config table */
int sqlite3Fts5ConfigLoad(Fts5Config*, int);








>


|







162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
);
void sqlite3Fts5ConfigFree(Fts5Config*);

int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig);

int sqlite3Fts5Tokenize(
  Fts5Config *pConfig,            /* FTS5 Configuration object */
  int flags,                      /* FTS5_TOKENIZE_* flags */
  const char *pText, int nText,   /* Text to tokenize */
  void *pCtx,                     /* Context passed to xToken() */
  int (*xToken)(void*, int, const char*, int, int, int)    /* Callback */
);

void sqlite3Fts5Dequote(char *z);

/* Load the contents of the %_config table */
int sqlite3Fts5ConfigLoad(Fts5Config*, int);

230
231
232
233
234
235
236


237
238
239
240
241
242
243
244
245
struct Fts5PoslistReader {
  /* Variables used only by sqlite3Fts5PoslistIterXXX() functions. */
  int iCol;                       /* If (iCol>=0), this column only */
  const u8 *a;                    /* Position list to iterate through */
  int n;                          /* Size of buffer at a[] in bytes */
  int i;                          /* Current offset in a[] */



  /* Output variables */
  int bEof;                       /* Set to true at EOF */
  i64 iPos;                       /* (iCol<<32) + iPos */
};
int sqlite3Fts5PoslistReaderInit(
  int iCol,                       /* If (iCol>=0), this column only */
  const u8 *a, int n,             /* Poslist buffer to iterate through */
  Fts5PoslistReader *pIter        /* Iterator object to initialize */
);







>
>

|







231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
struct Fts5PoslistReader {
  /* Variables used only by sqlite3Fts5PoslistIterXXX() functions. */
  int iCol;                       /* If (iCol>=0), this column only */
  const u8 *a;                    /* Position list to iterate through */
  int n;                          /* Size of buffer at a[] in bytes */
  int i;                          /* Current offset in a[] */

  u8 bFlag;                       /* For client use (any custom purpose) */

  /* Output variables */
  u8 bEof;                        /* Set to true at EOF */
  i64 iPos;                       /* (iCol<<32) + iPos */
};
int sqlite3Fts5PoslistReaderInit(
  int iCol,                       /* If (iCol>=0), this column only */
  const u8 *a, int n,             /* Poslist buffer to iterate through */
  Fts5PoslistReader *pIter        /* Iterator object to initialize */
);
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
/*
** Retrieve and clear the current error code, respectively.
*/
int sqlite3Fts5IndexErrcode(Fts5Index*);
void sqlite3Fts5IndexReset(Fts5Index*);

/*
** Get or set the "averages" record.
*/
int sqlite3Fts5IndexGetAverages(Fts5Index *p, Fts5Buffer *pBuf);
int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8*, int);

/*
** Functions called by the storage module as part of integrity-check.
*/
u64 sqlite3Fts5IndexCksum(Fts5Config*,i64,int,int,const char*,int);
int sqlite3Fts5IndexIntegrityCheck(Fts5Index*, u64 cksum);







|

|







380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
/*
** Retrieve and clear the current error code, respectively.
*/
int sqlite3Fts5IndexErrcode(Fts5Index*);
void sqlite3Fts5IndexReset(Fts5Index*);

/*
** Get or set the "averages" values.
*/
int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize);
int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8*, int);

/*
** Functions called by the storage module as part of integrity-check.
*/
u64 sqlite3Fts5IndexCksum(Fts5Config*,i64,int,int,const char*,int);
int sqlite3Fts5IndexIntegrityCheck(Fts5Index*, u64 cksum);
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
/* Called during startup to register a UDF with SQLite */
int sqlite3Fts5ExprInit(Fts5Global*, sqlite3*);

int sqlite3Fts5ExprPhraseCount(Fts5Expr*);
int sqlite3Fts5ExprPhraseSize(Fts5Expr*, int iPhrase);
int sqlite3Fts5ExprPoslist(Fts5Expr*, int, const u8 **);

int sqlite3Fts5ExprPhraseExpr(Fts5Config*, Fts5Expr*, int, Fts5Expr**);

/*******************************************
** The fts5_expr.c API above this point is used by the other hand-written
** C code in this module. The interfaces below this point are called by
** the parser code in fts5parse.y.  */

void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...);







|







595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
/* Called during startup to register a UDF with SQLite */
int sqlite3Fts5ExprInit(Fts5Global*, sqlite3*);

int sqlite3Fts5ExprPhraseCount(Fts5Expr*);
int sqlite3Fts5ExprPhraseSize(Fts5Expr*, int iPhrase);
int sqlite3Fts5ExprPoslist(Fts5Expr*, int, const u8 **);

int sqlite3Fts5ExprClonePhrase(Fts5Config*, Fts5Expr*, int, Fts5Expr**);

/*******************************************
** The fts5_expr.c API above this point is used by the other hand-written
** C code in this module. The interfaces below this point are called by
** the parser code in fts5parse.y.  */

void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...);
Changes to ext/fts5/fts5_aux.c.
144
145
146
147
148
149
150

151
152
153
154
155
156
157



158
159
160
161
162
163
164
165
}

/*
** Tokenizer callback used by implementation of highlight() function.
*/
static int fts5HighlightCb(
  void *pContext,                 /* Pointer to HighlightContext object */

  const char *pToken,             /* Buffer containing token */
  int nToken,                     /* Size of token in bytes */
  int iStartOff,                  /* Start offset of token */
  int iEndOff                     /* End offset of token */
){
  HighlightContext *p = (HighlightContext*)pContext;
  int rc = SQLITE_OK;



  int iPos = p->iPos++;

  if( p->iRangeEnd>0 ){
    if( iPos<p->iRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK;
    if( p->iRangeStart && iPos==p->iRangeStart ) p->iOff = iStartOff;
  }

  if( iPos==p->iter.iStart ){







>







>
>
>
|







144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
}

/*
** Tokenizer callback used by implementation of highlight() function.
*/
static int fts5HighlightCb(
  void *pContext,                 /* Pointer to HighlightContext object */
  int tflags,                     /* Mask of FTS5_TOKEN_* flags */
  const char *pToken,             /* Buffer containing token */
  int nToken,                     /* Size of token in bytes */
  int iStartOff,                  /* Start offset of token */
  int iEndOff                     /* End offset of token */
){
  HighlightContext *p = (HighlightContext*)pContext;
  int rc = SQLITE_OK;
  int iPos;

  if( tflags & FTS5_TOKEN_COLOCATED ) return SQLITE_OK;
  iPos = p->iPos++;

  if( p->iRangeEnd>0 ){
    if( iPos<p->iRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK;
    if( p->iRangeStart && iPos==p->iRangeStart ) p->iOff = iStartOff;
  }

  if( iPos==p->iter.iStart ){
Changes to ext/fts5/fts5_config.c.
641
642
643
644
645
646
647

648
649
650
651
652
653


654
655
656
657
658
659
660
** the callback returned SQLITE_DONE, this is not an error and this function
** still returns SQLITE_OK. Or, if the tokenization was abandoned early
** because the callback returned another non-zero value, it is assumed
** to be an SQLite error code and returned to the caller.
*/
int sqlite3Fts5Tokenize(
  Fts5Config *pConfig,            /* FTS5 Configuration object */

  const char *pText, int nText,   /* Text to tokenize */
  void *pCtx,                     /* Context passed to xToken() */
  int (*xToken)(void*, const char*, int, int, int)    /* Callback */
){
  if( pText==0 ) return SQLITE_OK;
  return pConfig->pTokApi->xTokenize(pConfig->pTok, pCtx, pText, nText, xToken);


}

/*
** Argument pIn points to the first character in what is expected to be
** a comma-separated list of SQL literals followed by a ')' character.
** If it actually is this, return a pointer to the ')'. Otherwise, return
** NULL to indicate a parse error.







>


|


|
>
>







641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
** the callback returned SQLITE_DONE, this is not an error and this function
** still returns SQLITE_OK. Or, if the tokenization was abandoned early
** because the callback returned another non-zero value, it is assumed
** to be an SQLite error code and returned to the caller.
*/
int sqlite3Fts5Tokenize(
  Fts5Config *pConfig,            /* FTS5 Configuration object */
  int flags,                      /* FTS5_TOKENIZE_* flags */
  const char *pText, int nText,   /* Text to tokenize */
  void *pCtx,                     /* Context passed to xToken() */
  int (*xToken)(void*, int, const char*, int, int, int)    /* Callback */
){
  if( pText==0 ) return SQLITE_OK;
  return pConfig->pTokApi->xTokenize(
      pConfig->pTok, pCtx, flags, pText, nText, xToken
  );
}

/*
** Argument pIn points to the first character in what is expected to be
** a comma-separated list of SQL literals followed by a ')' character.
** If it actually is this, return a pointer to the ')'. Otherwise, return
** NULL to indicate a parse error.
Changes to ext/fts5/fts5_expr.c.
18
19
20
21
22
23
24


25
26
27
28
29
30
31
#include "fts5parse.h"

/*
** All token types in the generated fts5parse.h file are greater than 0.
*/
#define FTS5_EOF 0



typedef struct Fts5ExprTerm Fts5ExprTerm;

/*
** Functions generated by lemon from fts5parse.y.
*/
void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(u64));
void sqlite3Fts5ParserFree(void*, void (*freeProc)(void*));







>
>







18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#include "fts5parse.h"

/*
** All token types in the generated fts5parse.h file are greater than 0.
*/
#define FTS5_EOF 0

#define FTS5_LARGEST_INT64  (0xffffffff|(((i64)0x7fffffff)<<32))

typedef struct Fts5ExprTerm Fts5ExprTerm;

/*
** Functions generated by lemon from fts5parse.y.
*/
void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(u64));
void sqlite3Fts5ParserFree(void*, void (*freeProc)(void*));
69
70
71
72
73
74
75

76
77
78
79
80
81
82
** An instance of the following structure represents a single search term
** or term prefix.
*/
struct Fts5ExprTerm {
  int bPrefix;                    /* True for a prefix term */
  char *zTerm;                    /* nul-terminated term */
  Fts5IndexIter *pIter;           /* Iterator for this term */

};

/*
** A phrase. One or more terms that must appear in a contiguous sequence
** within a document for it to match.
*/
struct Fts5ExprPhrase {







>







71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
** An instance of the following structure represents a single search term
** or term prefix.
*/
struct Fts5ExprTerm {
  int bPrefix;                    /* True for a prefix term */
  char *zTerm;                    /* nul-terminated term */
  Fts5IndexIter *pIter;           /* Iterator for this term */
  Fts5ExprTerm *pSynonym;         /* Pointer to first in list of synonyms */
};

/*
** A phrase. One or more terms that must appear in a contiguous sequence
** within a document for it to match.
*/
struct Fts5ExprPhrase {
177
178
179
180
181
182
183




184
185
186
187
188
189
190
      }
      pToken->n = (z2 - z);
      break;
    }

    default: {
      const char *z2;




      tok = FTS5_STRING;
      for(z2=&z[1]; sqlite3Fts5IsBareword(*z2); z2++);
      pToken->n = (z2 - z);
      if( pToken->n==2 && memcmp(pToken->p, "OR", 2)==0 )  tok = FTS5_OR;
      if( pToken->n==3 && memcmp(pToken->p, "NOT", 3)==0 ) tok = FTS5_NOT;
      if( pToken->n==3 && memcmp(pToken->p, "AND", 3)==0 ) tok = FTS5_AND;
      break;







>
>
>
>







180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
      }
      pToken->n = (z2 - z);
      break;
    }

    default: {
      const char *z2;
      if( sqlite3Fts5IsBareword(z[0])==0 ){
        sqlite3Fts5ParseError(pParse, "fts5: syntax error near \"%.1s\"", z);
        return FTS5_EOF;
      }
      tok = FTS5_STRING;
      for(z2=&z[1]; sqlite3Fts5IsBareword(*z2); z2++);
      pToken->n = (z2 - z);
      if( pToken->n==2 && memcmp(pToken->p, "OR", 2)==0 )  tok = FTS5_OR;
      if( pToken->n==3 && memcmp(pToken->p, "NOT", 3)==0 ) tok = FTS5_NOT;
      if( pToken->n==3 && memcmp(pToken->p, "AND", 3)==0 ) tok = FTS5_AND;
      break;
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
  }

  sqlite3_free(sParse.apPhrase);
  *pzErr = sParse.zErr;
  return sParse.rc;
}

/*
** Create a new FTS5 expression by cloning phrase iPhrase of the
** expression passed as the second argument.
*/
int sqlite3Fts5ExprPhraseExpr(
  Fts5Config *pConfig,
  Fts5Expr *pExpr, 
  int iPhrase, 
  Fts5Expr **ppNew
){
  int rc = SQLITE_OK;             /* Return code */
  Fts5ExprPhrase *pOrig;          /* The phrase extracted from pExpr */
  Fts5ExprPhrase *pCopy;          /* Copy of pOrig */
  Fts5Expr *pNew = 0;             /* Expression to return via *ppNew */

  pOrig = pExpr->apExprPhrase[iPhrase];
  pCopy = (Fts5ExprPhrase*)sqlite3Fts5MallocZero(&rc, 
      sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * pOrig->nTerm
  );
  if( pCopy ){
    int i;                          /* Used to iterate through phrase terms */
    Fts5ExprPhrase **apPhrase;
    Fts5ExprNode *pNode;
    Fts5ExprNearset *pNear;

    pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr));
    apPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc, 
        sizeof(Fts5ExprPhrase*)
    );
    pNode = (Fts5ExprNode*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5ExprNode));
    pNear = (Fts5ExprNearset*)sqlite3Fts5MallocZero(&rc, 
        sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*)
    );

    for(i=0; i<pOrig->nTerm; i++){
      pCopy->aTerm[i].zTerm = sqlite3Fts5Strndup(&rc, pOrig->aTerm[i].zTerm,-1);
      pCopy->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix;
    }

    if( rc==SQLITE_OK ){
      /* All the allocations succeeded. Put the expression object together. */
      pNew->pIndex = pExpr->pIndex;
      pNew->pRoot = pNode;
      pNew->nPhrase = 1;
      pNew->apExprPhrase = apPhrase;
      pNew->apExprPhrase[0] = pCopy;

      pNode->eType = (pOrig->nTerm==1 ? FTS5_TERM : FTS5_STRING);
      pNode->pNear = pNear;

      pNear->nPhrase = 1;
      pNear->apPhrase[0] = pCopy;

      pCopy->nTerm = pOrig->nTerm;
      pCopy->pNode = pNode;
    }else{
      /* At least one allocation failed. Free them all. */
      for(i=0; i<pOrig->nTerm; i++){
        sqlite3_free(pCopy->aTerm[i].zTerm);
      }
      sqlite3_free(pCopy);
      sqlite3_free(pNear);
      sqlite3_free(pNode);
      sqlite3_free(apPhrase);
      sqlite3_free(pNew);
      pNew = 0;
    }
  }

  *ppNew = pNew;
  return rc;
}

/*
** Free the expression node object passed as the only argument.
*/
void sqlite3Fts5ParseNodeFree(Fts5ExprNode *p){
  if( p ){
    int i;
    for(i=0; i<p->nChild; i++){







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







247
248
249
250
251
252
253









































































254
255
256
257
258
259
260
  }

  sqlite3_free(sParse.apPhrase);
  *pzErr = sParse.zErr;
  return sParse.rc;
}










































































/*
** Free the expression node object passed as the only argument.
*/
void sqlite3Fts5ParseNodeFree(Fts5ExprNode *p){
  if( p ){
    int i;
    for(i=0; i<p->nChild; i++){
345
346
347
348
349
350
351













































































































352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
static int fts5ExprColsetTest(Fts5ExprColset *pColset, int iCol){
  int i;
  for(i=0; i<pColset->nCol; i++){
    if( pColset->aiCol[i]==iCol ) return 1;
  }
  return 0;
}














































































































/*
** All individual term iterators in pPhrase are guaranteed to be valid and
** pointing to the same rowid when this function is called. This function 
** checks if the current rowid really is a match, and if so populates
** the pPhrase->poslist buffer accordingly. Output parameter *pbMatch
** is set to true if this is really a match, or false otherwise.
**
** SQLITE_OK is returned if an error occurs, or an SQLite error code 
** otherwise. It is not considered an error code if the current rowid is 
** not a match.
*/
static int fts5ExprPhraseIsMatch(
  Fts5Expr *pExpr,                /* Expression pPhrase belongs to */
  Fts5ExprColset *pColset,        /* Restrict matches to these columns */
  Fts5ExprPhrase *pPhrase,        /* Phrase object to initialize */
  int *pbMatch                    /* OUT: Set to true if really a match */
){
  Fts5PoslistWriter writer = {0};
  Fts5PoslistReader aStatic[4];
  Fts5PoslistReader *aIter = aStatic;







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>













|







279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
static int fts5ExprColsetTest(Fts5ExprColset *pColset, int iCol){
  int i;
  for(i=0; i<pColset->nCol; i++){
    if( pColset->aiCol[i]==iCol ) return 1;
  }
  return 0;
}

/*
** Argument pTerm must be a synonym iterator. Return the current rowid
** that it points to.
*/
static i64 fts5ExprSynonymRowid(Fts5ExprTerm *pTerm, int bDesc, int *pbEof){
  i64 iRet = 0;
  int bRetValid = 0;
  Fts5ExprTerm *p;

  assert( pTerm->pSynonym );
  assert( bDesc==0 || bDesc==1 );
  for(p=pTerm; p; p=p->pSynonym){
    if( 0==sqlite3Fts5IterEof(p->pIter) ){
      i64 iRowid = sqlite3Fts5IterRowid(p->pIter);
      if( bRetValid==0 || (bDesc!=(iRowid<iRet)) ){
        iRet = iRowid;
        bRetValid = 1;
      }
    }
  }

  if( pbEof && bRetValid==0 ) *pbEof = 1;
  return iRet;
}

/*
** Argument pTerm must be a synonym iterator.
*/
static int fts5ExprSynonymPoslist(
  Fts5ExprTerm *pTerm, 
  i64 iRowid,
  int *pbDel,                     /* OUT: Caller should sqlite3_free(*pa) */
  u8 **pa, int *pn
){
  Fts5PoslistWriter writer = {0};
  Fts5PoslistReader aStatic[4];
  Fts5PoslistReader *aIter = aStatic;
  int nIter = 0;
  int nAlloc = 4;
  int rc = SQLITE_OK;
  Fts5ExprTerm *p;

  assert( pTerm->pSynonym );
  for(p=pTerm; p; p=p->pSynonym){
    Fts5IndexIter *pIter = p->pIter;
    if( sqlite3Fts5IterEof(pIter)==0 && sqlite3Fts5IterRowid(pIter)==iRowid ){
      const u8 *a;
      int n;
      i64 dummy;
      rc = sqlite3Fts5IterPoslist(pIter, &a, &n, &dummy);
      if( rc!=SQLITE_OK ) goto synonym_poslist_out;
      if( nIter==nAlloc ){
        int nByte = sizeof(Fts5PoslistReader) * nAlloc * 2;
        Fts5PoslistReader *aNew = (Fts5PoslistReader*)sqlite3_malloc(nByte);
        if( aNew==0 ){
          rc = SQLITE_NOMEM;
          goto synonym_poslist_out;
        }
        memcpy(aNew, aIter, sizeof(Fts5PoslistReader) * nIter);
        nAlloc = nAlloc*2;
        if( aIter!=aStatic ) sqlite3_free(aIter);
        aIter = aNew;
      }
      sqlite3Fts5PoslistReaderInit(-1, a, n, &aIter[nIter]);
      assert( aIter[nIter].bEof==0 );
      nIter++;
    }
  }

  assert( *pbDel==0 );
  if( nIter==1 ){
    *pa = (u8*)aIter[0].a;
    *pn = aIter[0].n;
  }else{
    Fts5PoslistWriter writer = {0};
    Fts5Buffer buf = {0,0,0};
    i64 iPrev = -1;
    while( 1 ){
      int i;
      i64 iMin = FTS5_LARGEST_INT64;
      for(i=0; i<nIter; i++){
        if( aIter[i].bEof==0 ){
          if( aIter[i].iPos==iPrev ){
            if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) continue;
          }
          if( aIter[i].iPos<iMin ){
            iMin = aIter[i].iPos;
          }
        }
      }
      if( iMin==FTS5_LARGEST_INT64 || rc!=SQLITE_OK ) break;
      rc = sqlite3Fts5PoslistWriterAppend(&buf, &writer, iMin);
      iPrev = iMin;
    }
    if( rc ){
      sqlite3_free(buf.p);
    }else{
      *pa = buf.p;
      *pn = buf.n;
      *pbDel = 1;
    }
  }

 synonym_poslist_out:
  if( aIter!=aStatic ) sqlite3_free(aIter);
  return rc;
}


/*
** All individual term iterators in pPhrase are guaranteed to be valid and
** pointing to the same rowid when this function is called. This function 
** checks if the current rowid really is a match, and if so populates
** the pPhrase->poslist buffer accordingly. Output parameter *pbMatch
** is set to true if this is really a match, or false otherwise.
**
** SQLITE_OK is returned if an error occurs, or an SQLite error code 
** otherwise. It is not considered an error code if the current rowid is 
** not a match.
*/
static int fts5ExprPhraseIsMatch(
  Fts5ExprNode *pNode,            /* Node pPhrase belongs to */
  Fts5ExprColset *pColset,        /* Restrict matches to these columns */
  Fts5ExprPhrase *pPhrase,        /* Phrase object to initialize */
  int *pbMatch                    /* OUT: Set to true if really a match */
){
  Fts5PoslistWriter writer = {0};
  Fts5PoslistReader aStatic[4];
  Fts5PoslistReader *aIter = aStatic;
384
385
386
387
388
389
390

391
392
393

394
395

396



397


398

399
400
401
402
403
404
405
406
407
  /* If the aStatic[] array is not large enough, allocate a large array
  ** using sqlite3_malloc(). This approach could be improved upon. */
  if( pPhrase->nTerm>(sizeof(aStatic) / sizeof(aStatic[0])) ){
    int nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm;
    aIter = (Fts5PoslistReader*)sqlite3_malloc(nByte);
    if( !aIter ) return SQLITE_NOMEM;
  }


  /* Initialize a term iterator for each term in the phrase */
  for(i=0; i<pPhrase->nTerm; i++){

    i64 dummy;
    int n;

    const u8 *a;



    rc = sqlite3Fts5IterPoslist(pPhrase->aTerm[i].pIter, &a, &n, &dummy);


    if( rc || sqlite3Fts5PoslistReaderInit(iCol, a, n, &aIter[i]) ){

      goto ismatch_out;
    }
  }

  while( 1 ){
    int bMatch;
    i64 iPos = aIter[0].iPos;
    do {
      bMatch = 1;







>



>

|
>
|
>
>
>
|
>
>
|
>
|
<







427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451

452
453
454
455
456
457
458
  /* If the aStatic[] array is not large enough, allocate a large array
  ** using sqlite3_malloc(). This approach could be improved upon. */
  if( pPhrase->nTerm>(sizeof(aStatic) / sizeof(aStatic[0])) ){
    int nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm;
    aIter = (Fts5PoslistReader*)sqlite3_malloc(nByte);
    if( !aIter ) return SQLITE_NOMEM;
  }
  memset(aIter, 0, sizeof(Fts5PoslistReader) * pPhrase->nTerm);

  /* Initialize a term iterator for each term in the phrase */
  for(i=0; i<pPhrase->nTerm; i++){
    Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
    i64 dummy;
    int n = 0;
    int bFlag = 0;
    const u8 *a = 0;
    if( pTerm->pSynonym ){
      rc = fts5ExprSynonymPoslist(pTerm, pNode->iRowid, &bFlag, (u8**)&a, &n);
    }else{
      rc = sqlite3Fts5IterPoslist(pTerm->pIter, &a, &n, &dummy);
    }
    if( rc!=SQLITE_OK ) goto ismatch_out;
    sqlite3Fts5PoslistReaderInit(iCol, a, n, &aIter[i]);
    aIter[i].bFlag = bFlag;
    if( aIter[i].bEof ) goto ismatch_out;

  }

  while( 1 ){
    int bMatch;
    i64 iPos = aIter[0].iPos;
    do {
      bMatch = 1;
427
428
429
430
431
432
433



434
435
436
437
438
439
440
    for(i=0; i<pPhrase->nTerm; i++){
      if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) goto ismatch_out;
    }
  }

 ismatch_out:
  *pbMatch = (pPhrase->poslist.n>0);



  if( aIter!=aStatic ) sqlite3_free(aIter);
  return rc;
}

typedef struct Fts5LookaheadReader Fts5LookaheadReader;
struct Fts5LookaheadReader {
  const u8 *a;                    /* Buffer containing position list */







>
>
>







478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
    for(i=0; i<pPhrase->nTerm; i++){
      if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) goto ismatch_out;
    }
  }

 ismatch_out:
  *pbMatch = (pPhrase->poslist.n>0);
  for(i=0; i<pPhrase->nTerm; i++){
    if( aIter[i].bFlag ) sqlite3_free((u8*)aIter[i].a);
  }
  if( aIter!=aStatic ) sqlite3_free(aIter);
  return rc;
}

typedef struct Fts5LookaheadReader Fts5LookaheadReader;
struct Fts5LookaheadReader {
  const u8 *a;                    /* Buffer containing position list */
594
595
596
597
598
599
600
601
602
603




































604
605
606
607
608
609
610
611


612
613
614
615
616
617
618
*/
static int fts5ExprNearAdvanceFirst(
  Fts5Expr *pExpr,                /* Expression pPhrase belongs to */
  Fts5ExprNode *pNode,            /* FTS5_STRING or FTS5_TERM node */
  int bFromValid,
  i64 iFrom 
){
  Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter;
  int rc;





































  assert( Fts5NodeIsString(pNode) );
  if( bFromValid ){
    rc = sqlite3Fts5IterNextFrom(pIter, iFrom);
  }else{
    rc = sqlite3Fts5IterNext(pIter);
  }

  pNode->bEof = (rc || sqlite3Fts5IterEof(pIter));


  return rc;
}

/*
** Advance iterator pIter until it points to a value equal to or laster
** than the initial value of *piLast. If this means the iterator points
** to a value laster than *piLast, update *piLast to the new lastest value.







|


>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
|
|
|
|

|
>
>







648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
*/
static int fts5ExprNearAdvanceFirst(
  Fts5Expr *pExpr,                /* Expression pPhrase belongs to */
  Fts5ExprNode *pNode,            /* FTS5_STRING or FTS5_TERM node */
  int bFromValid,
  i64 iFrom 
){
  Fts5ExprTerm *pTerm = &pNode->pNear->apPhrase[0]->aTerm[0];
  int rc;

  if( pTerm->pSynonym ){
    int bEof = 1;
    Fts5ExprTerm *p;

    /* Find the firstest rowid any synonym points to. */
    i64 iRowid = fts5ExprSynonymRowid(pTerm, pExpr->bDesc, 0);

    /* Advance each iterator that currently points to iRowid. Or, if iFrom
    ** is valid - each iterator that points to a rowid before iFrom.  */
    for(p=pTerm; p; p=p->pSynonym){
      if( sqlite3Fts5IterEof(p->pIter)==0 ){
        i64 ii = sqlite3Fts5IterRowid(p->pIter);
        if( ii==iRowid 
         || (bFromValid && ii!=iFrom && (ii>iFrom)==pExpr->bDesc) 
        ){
          if( bFromValid ){
            rc = sqlite3Fts5IterNextFrom(p->pIter, iFrom);
          }else{
            rc = sqlite3Fts5IterNext(p->pIter);
          }
          if( rc!=SQLITE_OK ) break;
          if( sqlite3Fts5IterEof(p->pIter)==0 ){
            bEof = 0;
          }
        }else{
          bEof = 0;
        }
      }
    }

    /* Set the EOF flag if either all synonym iterators are at EOF or an
    ** error has occurred.  */
    pNode->bEof = (rc || bEof);
  }else{
    Fts5IndexIter *pIter = pTerm->pIter;

    assert( Fts5NodeIsString(pNode) );
    if( bFromValid ){
      rc = sqlite3Fts5IterNextFrom(pIter, iFrom);
    }else{
      rc = sqlite3Fts5IterNext(pIter);
    }

    pNode->bEof = (rc || sqlite3Fts5IterEof(pIter));
  }

  return rc;
}

/*
** Advance iterator pIter until it points to a value equal to or laster
** than the initial value of *piLast. If this means the iterator points
** to a value laster than *piLast, update *piLast to the new lastest value.
642
643
644
645
646
647
648





























649
650
651
652
653
654
655
    iRowid = sqlite3Fts5IterRowid(pIter);
    assert( (bDesc==0 && iRowid>=iLast) || (bDesc==1 && iRowid<=iLast) );
  }
  *piLast = iRowid;

  return 0;
}






























/*
** IN/OUT parameter (*pa) points to a position list n bytes in size. If
** the position list contains entries for column iCol, then (*pa) is set
** to point to the sub-position-list for that column and the number of
** bytes in it returned. Or, if the argument position list does not
** contain any entries for column iCol, return 0.







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
    iRowid = sqlite3Fts5IterRowid(pIter);
    assert( (bDesc==0 && iRowid>=iLast) || (bDesc==1 && iRowid<=iLast) );
  }
  *piLast = iRowid;

  return 0;
}

static int fts5ExprSynonymAdvanceto(
  Fts5ExprTerm *pTerm,            /* Term iterator to advance */
  int bDesc,                      /* True if iterator is "rowid DESC" */
  i64 *piLast,                    /* IN/OUT: Lastest rowid seen so far */
  int *pRc                        /* OUT: Error code */
){
  int rc = SQLITE_OK;
  i64 iLast = *piLast;
  Fts5ExprTerm *p;
  int bEof = 0;

  for(p=pTerm; rc==SQLITE_OK && p; p=p->pSynonym){
    if( sqlite3Fts5IterEof(p->pIter)==0 ){
      i64 iRowid = sqlite3Fts5IterRowid(p->pIter);
      if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){
        rc = sqlite3Fts5IterNextFrom(p->pIter, iLast);
      }
    }
  }

  if( rc!=SQLITE_OK ){
    *pRc = rc;
    bEof = 1;
  }else{
    *piLast = fts5ExprSynonymRowid(pTerm, bDesc, &bEof);
  }
  return bEof;
}

/*
** IN/OUT parameter (*pa) points to a position list n bytes in size. If
** the position list contains entries for column iCol, then (*pa) is set
** to point to the sub-position-list for that column and the number of
** bytes in it returned. Or, if the argument position list does not
** contain any entries for column iCol, return 0.
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
  int i;

  /* Check that each phrase in the nearset matches the current row.
  ** Populate the pPhrase->poslist buffers at the same time. If any
  ** phrase is not a match, break out of the loop early.  */
  for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
    Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
    if( pPhrase->nTerm>1 || pNear->pColset ){
      int bMatch = 0;
      rc = fts5ExprPhraseIsMatch(pExpr, pNear->pColset, pPhrase, &bMatch);
      if( bMatch==0 ) break;
    }else{
      rc = sqlite3Fts5IterPoslistBuffer(
          pPhrase->aTerm[0].pIter, &pPhrase->poslist
      );
    }
  }







|

|







834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
  int i;

  /* Check that each phrase in the nearset matches the current row.
  ** Populate the pPhrase->poslist buffers at the same time. If any
  ** phrase is not a match, break out of the loop early.  */
  for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
    Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
    if( pPhrase->nTerm>1 || pPhrase->aTerm[0].pSynonym || pNear->pColset ){
      int bMatch = 0;
      rc = fts5ExprPhraseIsMatch(pNode, pNear->pColset, pPhrase, &bMatch);
      if( bMatch==0 ) break;
    }else{
      rc = sqlite3Fts5IterPoslistBuffer(
          pPhrase->aTerm[0].pIter, &pPhrase->poslist
      );
    }
  }
751
752
753
754
755
756
757

758
759
760
761
762
763
764
  Fts5ExprColset *pColset = pNear->pColset;
  const u8 *pPos;
  int nPos;
  int rc;

  assert( pNode->eType==FTS5_TERM );
  assert( pNear->nPhrase==1 && pPhrase->nTerm==1 );


  rc = sqlite3Fts5IterPoslist(pIter, &pPos, &nPos, &pNode->iRowid);

  /* If the term may match any column, then this must be a match. 
  ** Return immediately in this case. Otherwise, try to find the
  ** part of the poslist that corresponds to the required column.
  ** If it can be found, return. If it cannot, the next iteration







>







872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
  Fts5ExprColset *pColset = pNear->pColset;
  const u8 *pPos;
  int nPos;
  int rc;

  assert( pNode->eType==FTS5_TERM );
  assert( pNear->nPhrase==1 && pPhrase->nTerm==1 );
  assert( pPhrase->aTerm[0].pSynonym==0 );

  rc = sqlite3Fts5IterPoslist(pIter, &pPos, &nPos, &pNode->iRowid);

  /* If the term may match any column, then this must be a match. 
  ** Return immediately in this case. Otherwise, try to find the
  ** part of the poslist that corresponds to the required column.
  ** If it can be found, return. If it cannot, the next iteration
797
798
799
800
801
802
803

804

805



806
807
808
809
810



811

812
813
814
815
816
817












818
819

820
821
822

823
824
825
826
827
828
829

830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852




853
854
855
856
857
858
859
860
861
862
863
864





865
866
867
868
869
870
871
872
873
){
  Fts5ExprNearset *pNear = pNode->pNear;
  Fts5ExprPhrase *pLeft = pNear->apPhrase[0];
  int rc = SQLITE_OK;
  i64 iLast;                      /* Lastest rowid any iterator points to */
  int i, j;                       /* Phrase and token index, respectively */
  int bMatch;                     /* True if all terms are at the same rowid */



  assert( pNear->nPhrase>1 || pNear->apPhrase[0]->nTerm>1 );




  /* Initialize iLast, the "lastest" rowid any iterator points to. If the
  ** iterator skips through rowids in the default ascending order, this means
  ** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it
  ** means the minimum rowid.  */



  iLast = sqlite3Fts5IterRowid(pLeft->aTerm[0].pIter);


  do {
    bMatch = 1;
    for(i=0; i<pNear->nPhrase; i++){
      Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
      for(j=0; j<pPhrase->nTerm; j++){












        Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter;
        i64 iRowid = sqlite3Fts5IterRowid(pIter);

        if( iRowid!=iLast ) bMatch = 0;
        if( fts5ExprAdvanceto(pIter, pExpr->bDesc, &iLast,&rc,&pNode->bEof) ){
          return rc;

        }
      }
    }
  }while( bMatch==0 );

  pNode->bNomatch = (0==fts5ExprNearTest(&rc, pExpr, pNode));
  pNode->iRowid = iLast;


  return rc;
}

/*
** Initialize all term iterators in the pNear object. If any term is found
** to match no documents at all, set *pbEof to true and return immediately,
** without initializing any further iterators.
*/
static int fts5ExprNearInitAll(
  Fts5Expr *pExpr,
  Fts5ExprNode *pNode
){
  Fts5ExprNearset *pNear = pNode->pNear;
  Fts5ExprTerm *pTerm;
  Fts5ExprPhrase *pPhrase;
  int i, j;
  int rc = SQLITE_OK;

  for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
    pPhrase = pNear->apPhrase[i];
    for(j=0; j<pPhrase->nTerm; j++){
      pTerm = &pPhrase->aTerm[j];




      if( pTerm->pIter ){
        sqlite3Fts5IterClose(pTerm->pIter);
        pTerm->pIter = 0;
      }
      rc = sqlite3Fts5IndexQuery(
          pExpr->pIndex, pTerm->zTerm, strlen(pTerm->zTerm),
          (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) |
          (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0),
          &pTerm->pIter
      );
      assert( rc==SQLITE_OK || pTerm->pIter==0 );
      if( pTerm->pIter==0 || sqlite3Fts5IterEof(pTerm->pIter) ){





        pNode->bEof = 1;
        break;
      }
    }
  }

  return rc;
}








>

>
|
>
>
>





>
>
>
|
>






>
>
>
>
>
>
>
>
>
>
>
>
|
|
>
|
|
|
>





<

>






|
|






<
<




|

|
>
>
>
>
|
|
|
|
|
|
|
|
|
|
|
|
>
>
>
>
>

|







919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972

973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988


989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
){
  Fts5ExprNearset *pNear = pNode->pNear;
  Fts5ExprPhrase *pLeft = pNear->apPhrase[0];
  int rc = SQLITE_OK;
  i64 iLast;                      /* Lastest rowid any iterator points to */
  int i, j;                       /* Phrase and token index, respectively */
  int bMatch;                     /* True if all terms are at the same rowid */
  const int bDesc = pExpr->bDesc;

  /* Check that this node should not be FTS5_TERM */
  assert( pNear->nPhrase>1 
       || pNear->apPhrase[0]->nTerm>1 
       || pNear->apPhrase[0]->aTerm[0].pSynonym
  );

  /* Initialize iLast, the "lastest" rowid any iterator points to. If the
  ** iterator skips through rowids in the default ascending order, this means
  ** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it
  ** means the minimum rowid.  */
  if( pLeft->aTerm[0].pSynonym ){
    iLast = fts5ExprSynonymRowid(&pLeft->aTerm[0], bDesc, 0);
  }else{
    iLast = sqlite3Fts5IterRowid(pLeft->aTerm[0].pIter);
  }

  do {
    bMatch = 1;
    for(i=0; i<pNear->nPhrase; i++){
      Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
      for(j=0; j<pPhrase->nTerm; j++){
        Fts5ExprTerm *pTerm = &pPhrase->aTerm[j];
        if( pTerm->pSynonym ){
          Fts5ExprTerm *p;
          int bEof = 1;
          i64 iRowid = fts5ExprSynonymRowid(pTerm, bDesc, 0);
          if( iRowid==iLast ) continue;
          bMatch = 0;
          if( fts5ExprSynonymAdvanceto(pTerm, bDesc, &iLast, &rc) ){
            pNode->bEof = 1;
            return rc;
          }
        }else{
          Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter;
          i64 iRowid = sqlite3Fts5IterRowid(pIter);
          if( iRowid==iLast ) continue;
          bMatch = 0;
          if( fts5ExprAdvanceto(pIter, bDesc, &iLast, &rc, &pNode->bEof) ){
            return rc;
          }
        }
      }
    }
  }while( bMatch==0 );


  pNode->iRowid = iLast;
  pNode->bNomatch = (0==fts5ExprNearTest(&rc, pExpr, pNode));

  return rc;
}

/*
** Initialize all term iterators in the pNear object. If any term is found
** to match no documents at all, return immediately without initializing any
** further iterators.
*/
static int fts5ExprNearInitAll(
  Fts5Expr *pExpr,
  Fts5ExprNode *pNode
){
  Fts5ExprNearset *pNear = pNode->pNear;


  int i, j;
  int rc = SQLITE_OK;

  for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
    Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
    for(j=0; j<pPhrase->nTerm; j++){
      Fts5ExprTerm *pTerm = &pPhrase->aTerm[j];
      Fts5ExprTerm *p;
      int bEof = 1;

      for(p=pTerm; p && rc==SQLITE_OK; p=p->pSynonym){
        if( p->pIter ){
          sqlite3Fts5IterClose(p->pIter);
          p->pIter = 0;
        }
        rc = sqlite3Fts5IndexQuery(
            pExpr->pIndex, p->zTerm, strlen(p->zTerm),
            (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) |
            (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0),
            &p->pIter
        );
        assert( rc==SQLITE_OK || p->pIter==0 );
        if( p->pIter && 0==sqlite3Fts5IterEof(p->pIter) ){
          bEof = 0;
        }
      }

      if( bEof ){
        pNode->bEof = 1;
        return rc;
      }
    }
  }

  return rc;
}

1025
1026
1027
1028
1029
1030
1031

1032




1033
1034
1035


1036
1037
1038
1039
1040
1041
1042
    switch( pNode->eType ){
      case FTS5_STRING: {
        rc = fts5ExprNearAdvanceFirst(pExpr, pNode, bFromValid, iFrom);
        break;
      };

      case FTS5_TERM: {

        rc = fts5ExprNearAdvanceFirst(pExpr, pNode, bFromValid, iFrom);




        if( pNode->bEof==0 ){
          assert( rc==SQLITE_OK );
          rc = fts5ExprTokenTest(pExpr, pNode);


        }
        return rc;
      };

      case FTS5_AND: {
        Fts5ExprNode *pLeft = pNode->apChild[0];
        rc = fts5ExprNodeNext(pExpr, pLeft, bFromValid, iFrom);







>
|
>
>
>
>
|


>
>







1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
    switch( pNode->eType ){
      case FTS5_STRING: {
        rc = fts5ExprNearAdvanceFirst(pExpr, pNode, bFromValid, iFrom);
        break;
      };

      case FTS5_TERM: {
        Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter;
        if( bFromValid ){
          rc = sqlite3Fts5IterNextFrom(pIter, iFrom);
        }else{
          rc = sqlite3Fts5IterNext(pIter);
        }
        if( rc==SQLITE_OK && sqlite3Fts5IterEof(pIter)==0 ){
          assert( rc==SQLITE_OK );
          rc = fts5ExprTokenTest(pExpr, pNode);
        }else{
          pNode->bEof = 1;
        }
        return rc;
      };

      case FTS5_AND: {
        Fts5ExprNode *pLeft = pNode->apChild[0];
        rc = fts5ExprNodeNext(pExpr, pLeft, bFromValid, iFrom);
1262
1263
1264
1265
1266
1267
1268


1269
1270
1271



1272

1273
1274
1275
1276
1277
1278
1279
/*
** Free the phrase object passed as the only argument.
*/
static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){
  if( pPhrase ){
    int i;
    for(i=0; i<pPhrase->nTerm; i++){


      Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
      sqlite3_free(pTerm->zTerm);
      if( pTerm->pIter ){



        sqlite3Fts5IterClose(pTerm->pIter);

      }
    }
    if( pPhrase->poslist.nSpace>0 ) fts5BufferFree(&pPhrase->poslist);
    sqlite3_free(pPhrase);
  }
}








>
>


|
>
>
>
|
>







1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
/*
** Free the phrase object passed as the only argument.
*/
static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){
  if( pPhrase ){
    int i;
    for(i=0; i<pPhrase->nTerm; i++){
      Fts5ExprTerm *pSyn;
      Fts5ExprTerm *pNext;
      Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
      sqlite3_free(pTerm->zTerm);
      sqlite3Fts5IterClose(pTerm->pIter);

      for(pSyn=pTerm->pSynonym; pSyn; pSyn=pNext){
        pNext = pSyn->pSynonym;
        sqlite3Fts5IterClose(pSyn->pIter);
        sqlite3_free(pSyn);
      }
    }
    if( pPhrase->poslist.nSpace>0 ) fts5BufferFree(&pPhrase->poslist);
    sqlite3_free(pPhrase);
  }
}

1327
1328
1329
1330
1331
1332
1333

1334
1335
1336
1337
1338
1339
1340

1341
1342
1343
1344
1345
1346
1347
1348
1349






1350










1351


1352
1353
1354
1355
1356
1357
1358
1359


1360
1361
1362
1363
1364


1365
1366
1367
1368



1369
1370
1371
1372
1373
1374
1375
  }
  return pRet;
}

typedef struct TokenCtx TokenCtx;
struct TokenCtx {
  Fts5ExprPhrase *pPhrase;

};

/*
** Callback for tokenizing terms used by ParseTerm().
*/
static int fts5ParseTokenize(
  void *pContext,                 /* Pointer to Fts5InsertCtx object */

  const char *pToken,             /* Buffer containing token */
  int nToken,                     /* Size of token in bytes */
  int iStart,                     /* Start offset of token */
  int iEnd                        /* End offset of token */
){
  int rc = SQLITE_OK;
  const int SZALLOC = 8;
  TokenCtx *pCtx = (TokenCtx*)pContext;
  Fts5ExprPhrase *pPhrase = pCtx->pPhrase;






  Fts5ExprTerm *pTerm;













  if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){
    Fts5ExprPhrase *pNew;
    int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0);

    pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase, 
        sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew
    );
    if( pNew==0 ) return SQLITE_NOMEM;


    if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase));
    pCtx->pPhrase = pPhrase = pNew;
    pNew->nTerm = nNew - SZALLOC;
  }



  pTerm = &pPhrase->aTerm[pPhrase->nTerm++];
  memset(pTerm, 0, sizeof(Fts5ExprTerm));
  pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken);




  return rc;
}


/*
** Free the phrase object passed as the only argument.
*/







>







>


|
|





>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
|
>
>
|
|
|

|
|
|
|
>
>
|
|
|
|
|
>
>
|
|
|
|
>
>
>







1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
  }
  return pRet;
}

typedef struct TokenCtx TokenCtx;
struct TokenCtx {
  Fts5ExprPhrase *pPhrase;
  int rc;
};

/*
** Callback for tokenizing terms used by ParseTerm().
*/
static int fts5ParseTokenize(
  void *pContext,                 /* Pointer to Fts5InsertCtx object */
  int tflags,                     /* Mask of FTS5_TOKEN_* flags */
  const char *pToken,             /* Buffer containing token */
  int nToken,                     /* Size of token in bytes */
  int iUnused1,                   /* Start offset of token */
  int iUnused2                    /* End offset of token */
){
  int rc = SQLITE_OK;
  const int SZALLOC = 8;
  TokenCtx *pCtx = (TokenCtx*)pContext;
  Fts5ExprPhrase *pPhrase = pCtx->pPhrase;

  /* If an error has already occurred, this is a no-op */
  if( pCtx->rc!=SQLITE_OK ) return pCtx->rc;

  assert( pPhrase==0 || pPhrase->nTerm>0 );
  if( pPhrase && (tflags & FTS5_TOKEN_COLOCATED) ){
    Fts5ExprTerm *pSyn;
    int nByte = sizeof(Fts5ExprTerm) + nToken+1;
    pSyn = (Fts5ExprTerm*)sqlite3_malloc(nByte);
    if( pSyn==0 ){
      rc = SQLITE_NOMEM;
    }else{
      memset(pSyn, 0, nByte);
      pSyn->zTerm = (char*)&pSyn[1];
      memcpy(pSyn->zTerm, pToken, nToken);
      pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym;
      pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn;
    }
  }else{
    Fts5ExprTerm *pTerm;
    if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){
      Fts5ExprPhrase *pNew;
      int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0);

      pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase, 
          sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew
      );
      if( pNew==0 ){
        rc = SQLITE_NOMEM;
      }else{
        if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase));
        pCtx->pPhrase = pPhrase = pNew;
        pNew->nTerm = nNew - SZALLOC;
      }
    }

    if( rc==SQLITE_OK ){
      pTerm = &pPhrase->aTerm[pPhrase->nTerm++];
      memset(pTerm, 0, sizeof(Fts5ExprTerm));
      pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken);
    }
  }

  pCtx->rc = rc;
  return rc;
}


/*
** Free the phrase object passed as the only argument.
*/
1413
1414
1415
1416
1417
1418
1419


1420

1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
  char *z = 0;

  memset(&sCtx, 0, sizeof(TokenCtx));
  sCtx.pPhrase = pAppend;

  rc = fts5ParseStringFromToken(pToken, &z);
  if( rc==SQLITE_OK ){


    sqlite3Fts5Dequote(z);

    rc = sqlite3Fts5Tokenize(pConfig, z, strlen(z), &sCtx, fts5ParseTokenize);
  }
  sqlite3_free(z);
  if( rc ){
    pParse->rc = rc;
    fts5ExprPhraseFree(sCtx.pPhrase);
    sCtx.pPhrase = 0;
  }else if( sCtx.pPhrase ){

    if( pAppend==0 ){
      if( (pParse->nPhrase % 8)==0 ){







>
>

>
|


|







1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
  char *z = 0;

  memset(&sCtx, 0, sizeof(TokenCtx));
  sCtx.pPhrase = pAppend;

  rc = fts5ParseStringFromToken(pToken, &z);
  if( rc==SQLITE_OK ){
    int flags = FTS5_TOKENIZE_QUERY | (bPrefix ? FTS5_TOKENIZE_QUERY : 0);
    int n;
    sqlite3Fts5Dequote(z);
    n = strlen(z);
    rc = sqlite3Fts5Tokenize(pConfig, flags, z, n, &sCtx, fts5ParseTokenize);
  }
  sqlite3_free(z);
  if( rc || (rc = sCtx.rc) ){
    pParse->rc = rc;
    fts5ExprPhraseFree(sCtx.pPhrase);
    sCtx.pPhrase = 0;
  }else if( sCtx.pPhrase ){

    if( pAppend==0 ){
      if( (pParse->nPhrase % 8)==0 ){
1445
1446
1447
1448
1449
1450
1451













































































1452
1453
1454
1455
1456
1457
1458
    pParse->apPhrase[pParse->nPhrase-1] = sCtx.pPhrase;
    assert( sCtx.pPhrase->nTerm>0 );
    sCtx.pPhrase->aTerm[sCtx.pPhrase->nTerm-1].bPrefix = bPrefix;
  }

  return sCtx.pPhrase;
}














































































/*
** Token pTok has appeared in a MATCH expression where the NEAR operator
** is expected. If token pTok does not contain "NEAR", store an error
** in the pParse object.
*/
void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token *pTok){







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
    pParse->apPhrase[pParse->nPhrase-1] = sCtx.pPhrase;
    assert( sCtx.pPhrase->nTerm>0 );
    sCtx.pPhrase->aTerm[sCtx.pPhrase->nTerm-1].bPrefix = bPrefix;
  }

  return sCtx.pPhrase;
}

/*
** Create a new FTS5 expression by cloning phrase iPhrase of the
** expression passed as the second argument.
*/
int sqlite3Fts5ExprClonePhrase(
  Fts5Config *pConfig,
  Fts5Expr *pExpr, 
  int iPhrase, 
  Fts5Expr **ppNew
){
  int rc = SQLITE_OK;             /* Return code */
  Fts5ExprPhrase *pOrig;          /* The phrase extracted from pExpr */
  Fts5ExprPhrase *pCopy;          /* Copy of pOrig */
  int i;                          /* Used to iterate through phrase terms */

  Fts5Expr *pNew = 0;             /* Expression to return via *ppNew */
  Fts5ExprPhrase **apPhrase;      /* pNew->apPhrase */
  Fts5ExprNode *pNode;            /* pNew->pRoot */
  Fts5ExprNearset *pNear;         /* pNew->pRoot->pNear */

  TokenCtx sCtx = {0,0};          /* Context object for fts5ParseTokenize */


  pOrig = pExpr->apExprPhrase[iPhrase];

  pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr));
  if( rc==SQLITE_OK ){
    pNew->apExprPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc, 
        sizeof(Fts5ExprPhrase*));
  }
  if( rc==SQLITE_OK ){
    pNew->pRoot = (Fts5ExprNode*)sqlite3Fts5MallocZero(&rc, 
        sizeof(Fts5ExprNode));
  }
  if( rc==SQLITE_OK ){
    pNew->pRoot->pNear = (Fts5ExprNearset*)sqlite3Fts5MallocZero(&rc, 
        sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*));
  }

  for(i=0; rc==SQLITE_OK && i<pOrig->nTerm; i++){
    int tflags = 0;
    Fts5ExprTerm *p;
    for(p=&pOrig->aTerm[i]; p && rc==SQLITE_OK; p=p->pSynonym){
      const char *zTerm = p->zTerm;
      rc = fts5ParseTokenize((void*)&sCtx, tflags, zTerm, strlen(zTerm), 0, 0);
      tflags = FTS5_TOKEN_COLOCATED;
    }
    if( rc==SQLITE_OK ){
      sCtx.pPhrase->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix;
    }
  }

  if( rc==SQLITE_OK ){
    /* All the allocations succeeded. Put the expression object together. */
    pNew->pIndex = pExpr->pIndex;
    pNew->nPhrase = 1;
    pNew->apExprPhrase[0] = sCtx.pPhrase;
    pNew->pRoot->pNear->apPhrase[0] = sCtx.pPhrase;
    pNew->pRoot->pNear->nPhrase = 1;
    sCtx.pPhrase->pNode = pNew->pRoot;

    if( pOrig->nTerm==1 && pOrig->aTerm[0].pSynonym==0 ){
      pNew->pRoot->eType = FTS5_TERM;
    }else{
      pNew->pRoot->eType = FTS5_STRING;
    }
  }else{
    sqlite3Fts5ExprFree(pNew);
    fts5ExprPhraseFree(sCtx.pPhrase);
    pNew = 0;
  }

  *ppNew = pNew;
  return rc;
}


/*
** Token pTok has appeared in a MATCH expression where the NEAR operator
** is expected. If token pTok does not contain "NEAR", store an error
** in the pParse object.
*/
void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token *pTok){
1626
1627
1628
1629
1630
1631
1632
1633



1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652






1653



1654
1655

1656
1657
1658
1659
1660
1661
1662


1663
1664
1665
1666
1667
1668
1669
      pRet->eType = eType;
      pRet->pNear = pNear;
      if( eType==FTS5_STRING ){
        int iPhrase;
        for(iPhrase=0; iPhrase<pNear->nPhrase; iPhrase++){
          pNear->apPhrase[iPhrase]->pNode = pRet;
        }
        if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 ){



          pRet->eType = FTS5_TERM;
        }
      }else{
        fts5ExprAddChildren(pRet, pLeft);
        fts5ExprAddChildren(pRet, pRight);
      }
    }
  }

  if( pRet==0 ){
    assert( pParse->rc!=SQLITE_OK );
    sqlite3Fts5ParseNodeFree(pLeft);
    sqlite3Fts5ParseNodeFree(pRight);
    sqlite3Fts5ParseNearsetFree(pNear);
  }
  return pRet;
}

static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){






  char *zQuoted = sqlite3_malloc(strlen(pTerm->zTerm) * 2 + 3 + 2);



  if( zQuoted ){
    int i = 0;

    char *zIn = pTerm->zTerm;
    zQuoted[i++] = '"';
    while( *zIn ){
      if( *zIn=='"' ) zQuoted[i++] = '"';
      zQuoted[i++] = *zIn++;
    }
    zQuoted[i++] = '"';


    if( pTerm->bPrefix ){
      zQuoted[i++] = ' ';
      zQuoted[i++] = '*';
    }
    zQuoted[i++] = '\0';
  }
  return zQuoted;







|
>
>
>



















>
>
>
>
>
>
|
>
>
>


>
|
|
|
|
|
|
|
>
>







1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
      pRet->eType = eType;
      pRet->pNear = pNear;
      if( eType==FTS5_STRING ){
        int iPhrase;
        for(iPhrase=0; iPhrase<pNear->nPhrase; iPhrase++){
          pNear->apPhrase[iPhrase]->pNode = pRet;
        }
        if( pNear->nPhrase==1 
         && pNear->apPhrase[0]->nTerm==1 
         && pNear->apPhrase[0]->aTerm[0].pSynonym==0
        ){
          pRet->eType = FTS5_TERM;
        }
      }else{
        fts5ExprAddChildren(pRet, pLeft);
        fts5ExprAddChildren(pRet, pRight);
      }
    }
  }

  if( pRet==0 ){
    assert( pParse->rc!=SQLITE_OK );
    sqlite3Fts5ParseNodeFree(pLeft);
    sqlite3Fts5ParseNodeFree(pRight);
    sqlite3Fts5ParseNearsetFree(pNear);
  }
  return pRet;
}

static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){
  int nByte = 0;
  Fts5ExprTerm *p;
  char *zQuoted;

  /* Determine the maximum amount of space required. */
  for(p=pTerm; p; p=p->pSynonym){
    nByte += strlen(pTerm->zTerm) * 2 + 3 + 2;
  }
  zQuoted = sqlite3_malloc(nByte);

  if( zQuoted ){
    int i = 0;
    for(p=pTerm; p; p=p->pSynonym){
      char *zIn = p->zTerm;
      zQuoted[i++] = '"';
      while( *zIn ){
        if( *zIn=='"' ) zQuoted[i++] = '"';
        zQuoted[i++] = *zIn++;
      }
      zQuoted[i++] = '"';
      if( p->pSynonym ) zQuoted[i++] = '|';
    }
    if( pTerm->bPrefix ){
      zQuoted[i++] = ' ';
      zQuoted[i++] = '*';
    }
    zQuoted[i++] = '\0';
  }
  return zQuoted;
Changes to ext/fts5/fts5_index.c.
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
#define FTS5_DATA_ZERO_PADDING 8
#define FTS5_DATA_PADDING 20

typedef struct Fts5Data Fts5Data;
typedef struct Fts5DlidxIter Fts5DlidxIter;
typedef struct Fts5DlidxLvl Fts5DlidxLvl;
typedef struct Fts5DlidxWriter Fts5DlidxWriter;
typedef struct Fts5NodeIter Fts5NodeIter;
typedef struct Fts5PageWriter Fts5PageWriter;
typedef struct Fts5SegIter Fts5SegIter;
typedef struct Fts5DoclistIter Fts5DoclistIter;
typedef struct Fts5SegWriter Fts5SegWriter;
typedef struct Fts5Structure Fts5Structure;
typedef struct Fts5StructureLevel Fts5StructureLevel;
typedef struct Fts5StructureSegment Fts5StructureSegment;







<







289
290
291
292
293
294
295

296
297
298
299
300
301
302
#define FTS5_DATA_ZERO_PADDING 8
#define FTS5_DATA_PADDING 20

typedef struct Fts5Data Fts5Data;
typedef struct Fts5DlidxIter Fts5DlidxIter;
typedef struct Fts5DlidxLvl Fts5DlidxLvl;
typedef struct Fts5DlidxWriter Fts5DlidxWriter;

typedef struct Fts5PageWriter Fts5PageWriter;
typedef struct Fts5SegIter Fts5SegIter;
typedef struct Fts5DoclistIter Fts5DoclistIter;
typedef struct Fts5SegWriter Fts5SegWriter;
typedef struct Fts5Structure Fts5Structure;
typedef struct Fts5StructureLevel Fts5StructureLevel;
typedef struct Fts5StructureSegment Fts5StructureSegment;
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553

  i64 iSwitchRowid;               /* Firstest rowid of other than aFirst[1] */
  Fts5CResult *aFirst;            /* Current merge state (see above) */
  Fts5SegIter aSeg[1];            /* Array of segment iterators */
};


/*
** Object for iterating through the conents of a single internal node in 
** memory.
*/
struct Fts5NodeIter {
  /* Internal. Set and managed by fts5NodeIterXXX() functions. Except, 
  ** the EOF test for the iterator is (Fts5NodeIter.aData==0).  */
  const u8 *aData;
  int nData;
  int iOff;

  /* Output variables */
  Fts5Buffer term;
  int nEmpty;
  int iChild;
  int bDlidx;
};

/*
** An instance of the following type is used to iterate through the contents
** of a doclist-index record.
**
** pData:
**   Record containing the doclist-index data.
**







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







521
522
523
524
525
526
527


















528
529
530
531
532
533
534

  i64 iSwitchRowid;               /* Firstest rowid of other than aFirst[1] */
  Fts5CResult *aFirst;            /* Current merge state (see above) */
  Fts5SegIter aSeg[1];            /* Array of segment iterators */
};




















/*
** An instance of the following type is used to iterate through the contents
** of a doclist-index record.
**
** pData:
**   Record containing the doclist-index data.
**
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
};
struct Fts5DlidxIter {
  int nLvl;
  int iSegid;
  Fts5DlidxLvl aLvl[1];
};



/*
** The first argument passed to this macro is a pointer to an Fts5Buffer
** object.
*/
#define fts5BufferSize(pBuf,n) {                \
  if( pBuf->nSpace<n ) {                        \
    u8 *pNew = sqlite3_realloc(pBuf->p, n);     \
    if( pNew==0 ){                              \
      sqlite3_free(pBuf->p);                    \
    }                                           \
    pBuf->nSpace = n;                           \
    pBuf->p = pNew;                             \
  }                                             \
}

static void fts5PutU16(u8 *aOut, u16 iVal){
  aOut[0] = (iVal>>8);
  aOut[1] = (iVal&0xFF);
}

static u16 fts5GetU16(const u8 *aIn){
  return ((u16)aIn[0] << 8) + aIn[1];







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







550
551
552
553
554
555
556

















557
558
559
560
561
562
563
};
struct Fts5DlidxIter {
  int nLvl;
  int iSegid;
  Fts5DlidxLvl aLvl[1];
};


















static void fts5PutU16(u8 *aOut, u16 iVal){
  aOut[0] = (iVal>>8);
  aOut[1] = (iVal&0xFF);
}

static u16 fts5GetU16(const u8 *aIn){
  return ((u16)aIn[0] << 8) + aIn[1];
613
614
615
616
617
618
619

620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
** Compare the contents of the pLeft buffer with the pRight/nRight blob.
**
** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
** +ve if pRight is smaller than pLeft. In other words:
**
**     res = *pLeft - *pRight
*/

static int fts5BufferCompareBlob(
  Fts5Buffer *pLeft,              /* Left hand side of comparison */
  const u8 *pRight, int nRight    /* Right hand side of comparison */
){
  int nCmp = MIN(pLeft->n, nRight);
  int res = memcmp(pLeft->p, pRight, nCmp);
  return (res==0 ? (pLeft->n - nRight) : res);
}


/*
** Compare the contents of the two buffers using memcmp(). If one buffer
** is a prefix of the other, it is considered the lesser.
**
** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
** +ve if pRight is smaller than pLeft. In other words:







>








|







577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
** Compare the contents of the pLeft buffer with the pRight/nRight blob.
**
** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
** +ve if pRight is smaller than pLeft. In other words:
**
**     res = *pLeft - *pRight
*/
#ifdef SQLITE_DEBUG
static int fts5BufferCompareBlob(
  Fts5Buffer *pLeft,              /* Left hand side of comparison */
  const u8 *pRight, int nRight    /* Right hand side of comparison */
){
  int nCmp = MIN(pLeft->n, nRight);
  int res = memcmp(pLeft->p, pRight, nCmp);
  return (res==0 ? (pLeft->n - nRight) : res);
}
#endif

/*
** Compare the contents of the two buffers using memcmp(). If one buffer
** is a prefix of the other, it is considered the lesser.
**
** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
** +ve if pRight is smaller than pLeft. In other words:
661
662
663
664
665
666
667
668




669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
  if( p->pReader ){
    sqlite3_blob *pReader = p->pReader;
    p->pReader = 0;
    sqlite3_blob_close(pReader);
  }
}

static Fts5Data *fts5DataReadOrBuffer(




  Fts5Index *p, 
  Fts5Buffer *pBuf, 
  i64 iRowid
){
  Fts5Data *pRet = 0;
  if( p->rc==SQLITE_OK ){
    int rc = SQLITE_OK;

    if( p->pReader ){
      /* This call may return SQLITE_ABORT if there has been a savepoint
      ** rollback since it was last used. In this case a new blob handle
      ** is required.  */
      sqlite3_blob *pBlob = p->pReader;
      p->pReader = 0;
      rc = sqlite3_blob_reopen(pBlob, iRowid);
      assert( p->pReader==0 );
      p->pReader = pBlob;
      if( rc!=SQLITE_OK ){
        fts5CloseReader(p);
      }
      if( rc==SQLITE_ABORT ) rc = SQLITE_OK;
    }

    /* If the blob handle is not yet open, open and seek it. Otherwise, use
    ** the blob_reopen() API to reseek the existing blob handle.  */
    if( p->pReader==0 && rc==SQLITE_OK ){
      Fts5Config *pConfig = p->pConfig;
      rc = sqlite3_blob_open(pConfig->db, 
          pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader
      );
    }

    /* If either of the sqlite3_blob_open() or sqlite3_blob_reopen() calls
    ** above returned SQLITE_ERROR, return SQLITE_CORRUPT_VTAB instead.
    ** All the reasons those functions might return SQLITE_ERROR - missing
    ** table, missing row, non-blob/text in block column - indicate 
    ** backing store corruption.  */
    if( rc==SQLITE_ERROR ) rc = FTS5_CORRUPT;

    if( rc==SQLITE_OK ){
      u8 *aOut = 0;               /* Read blob data into this buffer */
      int nByte = sqlite3_blob_bytes(p->pReader);
      if( pBuf ){
        fts5BufferSize(pBuf, MAX(nByte, p->pConfig->pgsz) + 20);
        pBuf->n = nByte;
        aOut = pBuf->p;
        if( aOut==0 ){
          rc = SQLITE_NOMEM;
        }
      }else{
        int nSpace = nByte + FTS5_DATA_PADDING;
        pRet = (Fts5Data*)sqlite3_malloc(nSpace+sizeof(Fts5Data));
        if( pRet ){
          pRet->n = nByte;
          aOut = pRet->p = (u8*)&pRet[1];
        }else{
          rc = SQLITE_NOMEM;
        }
      }

      if( rc==SQLITE_OK ){
        rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0);
      }
      if( rc!=SQLITE_OK ){
        sqlite3_free(pRet);
        pRet = 0;
      }
    }
    p->rc = rc;
    p->nRead++;
  }

  return pRet;
}

/*
** Retrieve a record from the %_data table.
**
** If an error occurs, NULL is returned and an error left in the 
** Fts5Index object.
*/
static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){
  Fts5Data *pRet = fts5DataReadOrBuffer(p, 0, iRowid);
  assert( (pRet==0)==(p->rc!=SQLITE_OK) );
  return pRet;
}

/*
** Read a record from the %_data table into the buffer supplied as the
** second argument.
**
** If an error occurs, an error is left in the Fts5Index object. If an
** error has already occurred when this function is called, it is a 
** no-op.
*/
static void fts5DataBuffer(Fts5Index *p, Fts5Buffer *pBuf, i64 iRowid){
  (void)fts5DataReadOrBuffer(p, pBuf, iRowid);
}

/*
** Release a reference to data record returned by an earlier call to
** fts5DataRead().
*/
static void fts5DataRelease(Fts5Data *pData){
  sqlite3_free(pData);
}







|
>
>
>
>
|
|
|
<



















|
|

















<
<
<
<
<
<
<
<
|
|
|
|
|
|
|
<














<
<
<
<
<
<
<
<
<
<
<




<
<
<
<
<
<
<
<
<
<
<
<







626
627
628
629
630
631
632
633
634
635
636
637
638
639
640

641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678








679
680
681
682
683
684
685

686
687
688
689
690
691
692
693
694
695
696
697
698
699











700
701
702
703












704
705
706
707
708
709
710
  if( p->pReader ){
    sqlite3_blob *pReader = p->pReader;
    p->pReader = 0;
    sqlite3_blob_close(pReader);
  }
}


/*
** Retrieve a record from the %_data table.
**
** If an error occurs, NULL is returned and an error left in the 
** Fts5Index object.
*/
static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){

  Fts5Data *pRet = 0;
  if( p->rc==SQLITE_OK ){
    int rc = SQLITE_OK;

    if( p->pReader ){
      /* This call may return SQLITE_ABORT if there has been a savepoint
      ** rollback since it was last used. In this case a new blob handle
      ** is required.  */
      sqlite3_blob *pBlob = p->pReader;
      p->pReader = 0;
      rc = sqlite3_blob_reopen(pBlob, iRowid);
      assert( p->pReader==0 );
      p->pReader = pBlob;
      if( rc!=SQLITE_OK ){
        fts5CloseReader(p);
      }
      if( rc==SQLITE_ABORT ) rc = SQLITE_OK;
    }

    /* If the blob handle is not open at this point, open it and seek 
    ** to the requested entry.  */
    if( p->pReader==0 && rc==SQLITE_OK ){
      Fts5Config *pConfig = p->pConfig;
      rc = sqlite3_blob_open(pConfig->db, 
          pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader
      );
    }

    /* If either of the sqlite3_blob_open() or sqlite3_blob_reopen() calls
    ** above returned SQLITE_ERROR, return SQLITE_CORRUPT_VTAB instead.
    ** All the reasons those functions might return SQLITE_ERROR - missing
    ** table, missing row, non-blob/text in block column - indicate 
    ** backing store corruption.  */
    if( rc==SQLITE_ERROR ) rc = FTS5_CORRUPT;

    if( rc==SQLITE_OK ){
      u8 *aOut = 0;               /* Read blob data into this buffer */
      int nByte = sqlite3_blob_bytes(p->pReader);








      int nAlloc = sizeof(Fts5Data) + nByte + FTS5_DATA_PADDING;
      pRet = (Fts5Data*)sqlite3_malloc(nAlloc);
      if( pRet ){
        pRet->n = nByte;
        aOut = pRet->p = (u8*)&pRet[1];
      }else{
        rc = SQLITE_NOMEM;

      }

      if( rc==SQLITE_OK ){
        rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0);
      }
      if( rc!=SQLITE_OK ){
        sqlite3_free(pRet);
        pRet = 0;
      }
    }
    p->rc = rc;
    p->nRead++;
  }












  assert( (pRet==0)==(p->rc!=SQLITE_OK) );
  return pRet;
}













/*
** Release a reference to data record returned by an earlier call to
** fts5DataRead().
*/
static void fts5DataRelease(Fts5Data *pData){
  sqlite3_free(pData);
}
1029
1030
1031
1032
1033
1034
1035

1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
** Fts5Index handle. If an error has already occurred when this function
** is called, it is a no-op.
*/
static Fts5Structure *fts5StructureRead(Fts5Index *p){
  Fts5Config *pConfig = p->pConfig;
  Fts5Structure *pRet = 0;        /* Object to return */
  int iCookie;                    /* Configuration cookie */

  Fts5Buffer buf = {0, 0, 0};

  fts5DataBuffer(p, &buf, FTS5_STRUCTURE_ROWID);
  if( buf.p==0 ) return 0;
  assert( buf.nSpace>=(buf.n + FTS5_DATA_ZERO_PADDING) );
  memset(&buf.p[buf.n], 0, FTS5_DATA_ZERO_PADDING);
  p->rc = fts5StructureDecode(buf.p, buf.n, &iCookie, &pRet);

  if( p->rc==SQLITE_OK && pConfig->iCookie!=iCookie ){
    p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie);
  }

  fts5BufferFree(&buf);
  if( p->rc!=SQLITE_OK ){
    fts5StructureRelease(pRet);
    pRet = 0;
  }
  return pRet;
}








>


|
|
<
|
|
<




|







965
966
967
968
969
970
971
972
973
974
975
976

977
978

979
980
981
982
983
984
985
986
987
988
989
990
** Fts5Index handle. If an error has already occurred when this function
** is called, it is a no-op.
*/
static Fts5Structure *fts5StructureRead(Fts5Index *p){
  Fts5Config *pConfig = p->pConfig;
  Fts5Structure *pRet = 0;        /* Object to return */
  int iCookie;                    /* Configuration cookie */
  Fts5Data *pData;
  Fts5Buffer buf = {0, 0, 0};

  pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID);
  if( p->rc ) return 0;

  memset(&pData->p[pData->n], 0, FTS5_DATA_PADDING);
  p->rc = fts5StructureDecode(pData->p, pData->n, &iCookie, &pRet);

  if( p->rc==SQLITE_OK && pConfig->iCookie!=iCookie ){
    p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie);
  }

  fts5DataRelease(pData);
  if( p->rc!=SQLITE_OK ){
    fts5StructureRelease(pRet);
    pRet = 0;
  }
  return pRet;
}

1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
      szPromote = szSeg;
    }
    fts5StructurePromoteTo(p, iPromote, szPromote, pStruct);
  }
}


/*
** If the pIter->iOff offset currently points to an entry indicating one
** or more term-less nodes, advance past it and set pIter->nEmpty to
** the number of empty child nodes.
*/
static void fts5NodeIterGobbleNEmpty(Fts5NodeIter *pIter){
  if( pIter->iOff<pIter->nData && 0==(pIter->aData[pIter->iOff] & 0xfe) ){
    pIter->bDlidx = pIter->aData[pIter->iOff] & 0x01;
    pIter->iOff++;
    pIter->iOff += fts5GetVarint32(&pIter->aData[pIter->iOff], pIter->nEmpty);
  }else{
    pIter->nEmpty = 0;
    pIter->bDlidx = 0;
  }
}

/*
** Advance to the next entry within the node.
*/
static void fts5NodeIterNext(int *pRc, Fts5NodeIter *pIter){
  if( pIter->iOff>=pIter->nData ){
    pIter->aData = 0;
    pIter->iChild += pIter->nEmpty;
  }else{
    int nPre, nNew;
    pIter->iOff += fts5GetVarint32(&pIter->aData[pIter->iOff], nPre);
    pIter->iOff += fts5GetVarint32(&pIter->aData[pIter->iOff], nNew);
    pIter->term.n = nPre-2;
    fts5BufferAppendBlob(pRc, &pIter->term, nNew, pIter->aData+pIter->iOff);
    pIter->iOff += nNew;
    pIter->iChild += (1 + pIter->nEmpty);
    fts5NodeIterGobbleNEmpty(pIter);
    if( *pRc ) pIter->aData = 0;
  }
}


/*
** Initialize the iterator object pIter to iterate through the internal
** segment node in pData.
*/
static void fts5NodeIterInit(const u8 *aData, int nData, Fts5NodeIter *pIter){
  memset(pIter, 0, sizeof(*pIter));
  pIter->aData = aData;
  pIter->nData = nData;
  pIter->iOff = fts5GetVarint32(aData, pIter->iChild);
  fts5NodeIterGobbleNEmpty(pIter);
}

/*
** Free any memory allocated by the iterator object.
*/
static void fts5NodeIterFree(Fts5NodeIter *pIter){
  fts5BufferFree(&pIter->term);
}

/*
** Advance the iterator passed as the only argument. If the end of the 
** doclist-index page is reached, return non-zero.
*/
static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){
  Fts5Data *pData = pLvl->pData;








<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







1159
1160
1161
1162
1163
1164
1165
























































1166
1167
1168
1169
1170
1171
1172
      szPromote = szSeg;
    }
    fts5StructurePromoteTo(p, iPromote, szPromote, pStruct);
  }
}


























































/*
** Advance the iterator passed as the only argument. If the end of the 
** doclist-index page is reached, return non-zero.
*/
static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){
  Fts5Data *pData = pLvl->pData;

2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
      iOff += nPos;
    }
  }

  pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno);
}

#ifdef SQLITE_DEBUG
static void fts5AssertNodeSeekOk(
  Fts5Buffer *pNode,
  const u8 *pTerm, int nTerm,     /* Term to search for */
  int iExpectPg,
  int bExpectDlidx
){
  int bDlidx;
  int iPg;
  int rc = SQLITE_OK;
  Fts5NodeIter node;

  fts5NodeIterInit(pNode->p, pNode->n, &node);
  assert( node.term.n==0 );
  iPg = node.iChild;
  bDlidx = node.bDlidx;
  for(fts5NodeIterNext(&rc, &node);
      node.aData && fts5BufferCompareBlob(&node.term, pTerm, nTerm)<=0;
      fts5NodeIterNext(&rc, &node)
  ){
    iPg = node.iChild;
    bDlidx = node.bDlidx;
  }
  fts5NodeIterFree(&node);

  assert( rc!=SQLITE_OK || iPg==iExpectPg );
  assert( rc!=SQLITE_OK || bDlidx==bExpectDlidx );
}
#else
#define fts5AssertNodeSeekOk(v,w,x,y,z)
#endif

/*
** Argument pNode is an internal b-tree node. This function searches
** within the node for the largest term that is smaller than or equal
** to (pTerm/nTerm).
**
** It returns the associated page number. Or, if (pTerm/nTerm) is smaller
** than all terms within the node, the leftmost child page number. 
**
** Before returning, (*pbDlidx) is set to true if the last term on the
** returned child page number has a doclist-index. Or left as is otherwise.
*/
static int fts5NodeSeek(
  Fts5Buffer *pNode,              /* Node to search */
  const u8 *pTerm, int nTerm,     /* Term to search for */
  int *pbDlidx                    /* OUT: True if dlidx flag is set */
){
  int iPg;
  u8 *pPtr = pNode->p;
  u8 *pEnd = &pPtr[pNode->n];
  int nMatch = 0;                 /* Number of bytes of pTerm already matched */
  
  assert( *pbDlidx==0 );

  pPtr += fts5GetVarint32(pPtr, iPg);
  while( pPtr<pEnd ){
    int nEmpty = 0;
    int nKeep;
    int nNew;

    /* If there is a "no terms" record at pPtr, read it now. Store the
    ** number of termless pages in nEmpty. If it indicates a doclist-index, 
    ** set (*pbDlidx) to true.*/
    if( *pPtr<2 ){
      *pbDlidx = (*pPtr==0x01);
      pPtr++;
      pPtr += fts5GetVarint32(pPtr, nEmpty);
      if( pPtr>=pEnd ) break;
    }

    /* Read the next "term" pointer. Set nKeep to the number of bytes to
    ** keep from the previous term, and nNew to the number of bytes of
    ** new data that will be appended to it. */
    nKeep = (int)*pPtr++;
    nNew = (int)*pPtr++;
    if( (nKeep | nNew) & 0x0080 ){
      pPtr -= 2;
      pPtr += fts5GetVarint32(pPtr, nKeep);
      pPtr += fts5GetVarint32(pPtr, nNew);
    }
    nKeep -= 2;

    /* Compare (pTerm/nTerm) to the current term on the node (the one described
    ** by nKeep/nNew). If the node term is larger, break out of the while()
    ** loop. 
    **
    ** Otherwise, if (pTerm/nTerm) is larger or the two terms are equal, 
    ** leave variable nMatch set to the size of the largest prefix common to
    ** both terms in bytes.  */
    if( nKeep==nMatch ){
      int nTst = MIN(nNew, nTerm-nMatch);
      int i;
      for(i=0; i<nTst; i++){
        if( pTerm[nKeep+i]!=pPtr[i] ) break;
      }
      nMatch += i;
      assert( nMatch<=nTerm );

      if( i<nNew && (nMatch==nTerm || pPtr[i] > pTerm[nMatch]) ) break;
    }else if( nKeep<nMatch ){
      break;
    }

    iPg += 1 + nEmpty;
    *pbDlidx = 0;
    pPtr += nNew;
  }

  fts5AssertNodeSeekOk(pNode, pTerm, nTerm, iPg, *pbDlidx);
  return iPg;
}

#define fts5IndexGetVarint32(a, iOff, nVal) {     \
  nVal = a[iOff++];                               \
  if( nVal & 0x80 ){                              \
    iOff--;                                       \
    iOff += fts5GetVarint32(&a[iOff], nVal);      \
  }                                               \
}







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







1916
1917
1918
1919
1920
1921
1922

















































































































1923
1924
1925
1926
1927
1928
1929
      iOff += nPos;
    }
  }

  pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno);
}


















































































































#define fts5IndexGetVarint32(a, iOff, nVal) {     \
  nVal = a[iOff++];                               \
  if( nVal & 0x80 ){                              \
    iOff--;                                       \
    iOff += fts5GetVarint32(&a[iOff], nVal);      \
  }                                               \
}
2673
2674
2675
2676
2677
2678
2679
2680

2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
    if( iLeafPgno<pIter->iLeafPgno ){
      pIter->iLeafPgno = iLeafPgno+1;
      fts5SegIterReverseNewPage(p, pIter);
      bMove = 0;
    }
  }

  while( p->rc==SQLITE_OK ){

    if( bMove ) fts5SegIterNext(p, pIter, 0);
    if( pIter->pLeaf==0 ) break;
    if( bRev==0 && pIter->iRowid>=iMatch ) break;
    if( bRev!=0 && pIter->iRowid<=iMatch ) break;
    bMove = 1;
  }
}


/*
** Free the iterator object passed as the second argument.
*/
static void fts5MultiIterFree(Fts5Index *p, Fts5IndexIter *pIter){







<
>





|







2439
2440
2441
2442
2443
2444
2445

2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
    if( iLeafPgno<pIter->iLeafPgno ){
      pIter->iLeafPgno = iLeafPgno+1;
      fts5SegIterReverseNewPage(p, pIter);
      bMove = 0;
    }
  }


  do{
    if( bMove ) fts5SegIterNext(p, pIter, 0);
    if( pIter->pLeaf==0 ) break;
    if( bRev==0 && pIter->iRowid>=iMatch ) break;
    if( bRev!=0 && pIter->iRowid<=iMatch ) break;
    bMove = 1;
  }while( p->rc==SQLITE_OK );
}


/*
** Free the iterator object passed as the second argument.
*/
static void fts5MultiIterFree(Fts5Index *p, Fts5IndexIter *pIter){
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466

4467
4468
4469
4470
4471
4472
4473
4474
4475
/*
** The %_data table is completely empty when this function is called. This
** function populates it with the initial structure objects for each index,
** and the initial version of the "averages" record (a zero-byte blob).
*/
int sqlite3Fts5IndexReinit(Fts5Index *p){
  Fts5Structure s;

  assert( p->rc==SQLITE_OK );
  p->rc = sqlite3Fts5IndexSetAverages(p, (const u8*)"", 0);

  memset(&s, 0, sizeof(Fts5Structure));

  fts5StructureWrite(p, &s);

  return fts5IndexReturn(p);
}

/*
** Open a new Fts5Index handle. If the bCreate argument is true, create
** and initialize the underlying %_data table.
**







<
<
<
<

>

<







4221
4222
4223
4224
4225
4226
4227




4228
4229
4230

4231
4232
4233
4234
4235
4236
4237
/*
** The %_data table is completely empty when this function is called. This
** function populates it with the initial structure objects for each index,
** and the initial version of the "averages" record (a zero-byte blob).
*/
int sqlite3Fts5IndexReinit(Fts5Index *p){
  Fts5Structure s;




  memset(&s, 0, sizeof(Fts5Structure));
  fts5DataWrite(p, FTS5_AVERAGES_ROWID, (const u8*)"", 0);
  fts5StructureWrite(p, &s);

  return fts5IndexReturn(p);
}

/*
** Open a new Fts5Index handle. If the bCreate argument is true, create
** and initialize the underlying %_data table.
**
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792


4793
4794






4795







4796

4797
4798
4799
4800
4801
4802
4803
    Fts5Index *pIndex = pIter->pIndex;
    fts5MultiIterFree(pIter->pIndex, pIter);
    fts5CloseReader(pIndex);
  }
}

/*
** Read the "averages" record into the buffer supplied as the second 
** argument. Return SQLITE_OK if successful, or an SQLite error code
** if an error occurs.


*/
int sqlite3Fts5IndexGetAverages(Fts5Index *p, Fts5Buffer *pBuf){






  assert( p->rc==SQLITE_OK );







  fts5DataReadOrBuffer(p, pBuf, FTS5_AVERAGES_ROWID);

  return fts5IndexReturn(p);
}

/*
** Replace the current "averages" record with the contents of the buffer 
** supplied as the second argument.
*/







|
<
|
>
>

|
>
>
>
>
>
>
|
>
>
>
>
>
>
>
|
>







4545
4546
4547
4548
4549
4550
4551
4552

4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
    Fts5Index *pIndex = pIter->pIndex;
    fts5MultiIterFree(pIter->pIndex, pIter);
    fts5CloseReader(pIndex);
  }
}

/*
** Read and decode the "averages" record from the database. 

**
** Parameter anSize must point to an array of size nCol, where nCol is
** the number of user defined columns in the FTS table.
*/
int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){
  int nCol = p->pConfig->nCol;
  Fts5Data *pData;

  *pnRow = 0;
  memset(anSize, 0, sizeof(i64) * nCol);
  pData = fts5DataRead(p, FTS5_AVERAGES_ROWID);
  if( p->rc==SQLITE_OK && pData->n ){
    int i = 0;
    int iCol;
    i += fts5GetVarint(&pData->p[i], (u64*)pnRow);
    for(iCol=0; i<pData->n && iCol<nCol; iCol++){
      i += fts5GetVarint(&pData->p[i], (u64*)&anSize[iCol]);
    }
  }

  fts5DataRelease(pData);
  return fts5IndexReturn(p);
}

/*
** Replace the current "averages" record with the contents of the buffer 
** supplied as the second argument.
*/
5483
5484
5485
5486
5487
5488
5489
5490
5491





5492
5493
5494
5495
5496
5497
5498
5499
5500
5501
5502
5503
5504
5505
5506
5507
5508
5509
5510
5511
5512
5513
5514
5515
5516
5517
5518
5519
5520
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535
5536
5537
5538
5539
5540
5541
5542
5543
5544
5545
5546
5547
5548
5549
5550
5551
5552
5553
5554
5555
5556
5557
5558
5559
5560
5561
5562
5563
  }else if( iSegid==0 ){
    if( iRowid==FTS5_AVERAGES_ROWID ){
      /* todo */
    }else{
      fts5DecodeStructure(&rc, &s, a, n);
    }
  }else{

    Fts5Buffer term;





    memset(&term, 0, sizeof(Fts5Buffer));

    if( iHeight==0 ){
      int iTermOff = 0;
      int iRowidOff = 0;
      int iOff;
      int nKeep = 0;

      if( n>=4 ){
        iRowidOff = fts5GetU16(&a[0]);
        iTermOff = fts5GetU16(&a[2]);
      }else{
        sqlite3Fts5BufferSet(&rc, &s, 8, (const u8*)"corrupt");
        goto decode_out;
      }

      if( iRowidOff ){
        iOff = iRowidOff;
      }else if( iTermOff ){
        iOff = iTermOff;
      }else{
        iOff = n;
      }
      fts5DecodePoslist(&rc, &s, &a[4], iOff-4);

      assert( iRowidOff==0 || iOff==iRowidOff );
      if( iRowidOff ){
        iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], n-iOff);
      }

      assert( iTermOff==0 || iOff==iTermOff );
      while( iOff<n ){
        int nByte;
        iOff += fts5GetVarint32(&a[iOff], nByte);
        term.n= nKeep;
        fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]);
        iOff += nByte;

        sqlite3Fts5BufferAppendPrintf(
            &rc, &s, " term=%.*s", term.n, (const char*)term.p
        );
        iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], n-iOff);
        if( iOff<n ){
          iOff += fts5GetVarint32(&a[iOff], nKeep);
        }
      }
      fts5BufferFree(&term);
    }else{
      Fts5NodeIter ss;
      for(fts5NodeIterInit(a, n, &ss); ss.aData; fts5NodeIterNext(&rc, &ss)){
        if( ss.term.n==0 ){
          sqlite3Fts5BufferAppendPrintf(&rc, &s, " left=%d", ss.iChild);
        }else{
          sqlite3Fts5BufferAppendPrintf(&rc,&s, " \"%.*s\"", 
              ss.term.n, ss.term.p
          );
        }
        if( ss.nEmpty ){
          sqlite3Fts5BufferAppendPrintf(&rc, &s, " empty=%d%s", ss.nEmpty,
              ss.bDlidx ? "*" : ""
          );
        }
      }
      fts5NodeIterFree(&ss);
    }
  }
  
 decode_out:
  sqlite3_free(a);
  if( rc==SQLITE_OK ){
    sqlite3_result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT);
  }else{







<

>
>
>
>
>


<
<
<
<
<
<
|
|
|
|
|
|
|

|
|
|
|
|
|
|
|

|
|
|
|

|
|
|
|
|
|
|

|
|
|
|
|
|
|
|
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







5260
5261
5262
5263
5264
5265
5266

5267
5268
5269
5270
5271
5272
5273
5274






5275
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313


















5314
5315
5316
5317
5318
5319
5320
  }else if( iSegid==0 ){
    if( iRowid==FTS5_AVERAGES_ROWID ){
      /* todo */
    }else{
      fts5DecodeStructure(&rc, &s, a, n);
    }
  }else{

    Fts5Buffer term;
    int iTermOff = 0;
    int iRowidOff = 0;
    int iOff;
    int nKeep = 0;

    memset(&term, 0, sizeof(Fts5Buffer));







    if( n>=4 ){
      iRowidOff = fts5GetU16(&a[0]);
      iTermOff = fts5GetU16(&a[2]);
    }else{
      sqlite3Fts5BufferSet(&rc, &s, 8, (const u8*)"corrupt");
      goto decode_out;
    }

    if( iRowidOff ){
      iOff = iRowidOff;
    }else if( iTermOff ){
      iOff = iTermOff;
    }else{
      iOff = n;
    }
    fts5DecodePoslist(&rc, &s, &a[4], iOff-4);

    assert( iRowidOff==0 || iOff==iRowidOff );
    if( iRowidOff ){
      iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], n-iOff);
    }

    assert( iTermOff==0 || iOff==iTermOff );
    while( iOff<n ){
      int nByte;
      iOff += fts5GetVarint32(&a[iOff], nByte);
      term.n= nKeep;
      fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]);
      iOff += nByte;

      sqlite3Fts5BufferAppendPrintf(
          &rc, &s, " term=%.*s", term.n, (const char*)term.p
          );
      iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], n-iOff);
      if( iOff<n ){
        iOff += fts5GetVarint32(&a[iOff], nKeep);
      }
    }
    fts5BufferFree(&term);


















  }
  
 decode_out:
  sqlite3_free(a);
  if( rc==SQLITE_OK ){
    sqlite3_result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT);
  }else{
Changes to ext/fts5/fts5_main.c.
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505


1506
1507
1508
1509
1510
1511
1512
  return sqlite3Fts5StorageRowCount(pTab->pStorage, pnRow);
}

static int fts5ApiTokenize(
  Fts5Context *pCtx, 
  const char *pText, int nText, 
  void *pUserData,
  int (*xToken)(void*, const char*, int, int, int)
){
  Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
  return sqlite3Fts5Tokenize(pTab->pConfig, pText, nText, pUserData, xToken);


}

static int fts5ApiPhraseCount(Fts5Context *pCtx){
  Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  return sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
}








|



|
>
>







1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
  return sqlite3Fts5StorageRowCount(pTab->pStorage, pnRow);
}

static int fts5ApiTokenize(
  Fts5Context *pCtx, 
  const char *pText, int nText, 
  void *pUserData,
  int (*xToken)(void*, int, const char*, int, int, int)
){
  Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
  return sqlite3Fts5Tokenize(
      pTab->pConfig, FTS5_TOKENIZE_AUX, pText, nText, pUserData, xToken
  );
}

static int fts5ApiPhraseCount(Fts5Context *pCtx){
  Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  return sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
}

1651
1652
1653
1654
1655
1656
1657

1658
1659
1660
1661
1662
1663

1664

1665
1666
1667
1668
1669
1670
1671
    }
  }
  return rc;
}

static int fts5ColumnSizeCb(
  void *pContext,                 /* Pointer to int */

  const char *pToken,             /* Buffer containing token */
  int nToken,                     /* Size of token in bytes */
  int iStart,                     /* Start offset of token */
  int iEnd                        /* End offset of token */
){
  int *pCnt = (int*)pContext;

  *pCnt = *pCnt + 1;

  return SQLITE_OK;
}

static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){
  Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
  Fts5Config *pConfig = pTab->pConfig;







>






>
|
>







1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
    }
  }
  return rc;
}

static int fts5ColumnSizeCb(
  void *pContext,                 /* Pointer to int */
  int tflags,
  const char *pToken,             /* Buffer containing token */
  int nToken,                     /* Size of token in bytes */
  int iStart,                     /* Start offset of token */
  int iEnd                        /* End offset of token */
){
  int *pCnt = (int*)pContext;
  if( (tflags & FTS5_TOKEN_COLOCATED)==0 ){
    (*pCnt)++;
  }
  return SQLITE_OK;
}

static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){
  Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
  Fts5Config *pConfig = pTab->pConfig;
1687
1688
1689
1690
1691
1692
1693
1694


1695
1696
1697
1698
1699
1700
1701
      for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
        if( pConfig->abUnindexed[i]==0 ){
          const char *z; int n;
          void *p = (void*)(&pCsr->aColumnSize[i]);
          pCsr->aColumnSize[i] = 0;
          rc = fts5ApiColumnText(pCtx, i, &z, &n);
          if( rc==SQLITE_OK ){
            rc = sqlite3Fts5Tokenize(pConfig, z, n, p, fts5ColumnSizeCb);


          }
        }
      }
    }
    CsrFlagClear(pCsr, FTS5CSR_REQUIRE_DOCSIZE);
  }
  if( iCol<0 ){







|
>
>







1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
      for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
        if( pConfig->abUnindexed[i]==0 ){
          const char *z; int n;
          void *p = (void*)(&pCsr->aColumnSize[i]);
          pCsr->aColumnSize[i] = 0;
          rc = fts5ApiColumnText(pCtx, i, &z, &n);
          if( rc==SQLITE_OK ){
            rc = sqlite3Fts5Tokenize(
                pConfig, FTS5_TOKENIZE_AUX, z, n, p, fts5ColumnSizeCb
            );
          }
        }
      }
    }
    CsrFlagClear(pCsr, FTS5CSR_REQUIRE_DOCSIZE);
  }
  if( iCol<0 ){
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
  rc = fts5OpenMethod(pCsr->base.pVtab, (sqlite3_vtab_cursor**)&pNew);
  if( rc==SQLITE_OK ){
    Fts5Config *pConf = pTab->pConfig;
    pNew->ePlan = FTS5_PLAN_MATCH;
    pNew->iFirstRowid = SMALLEST_INT64;
    pNew->iLastRowid = LARGEST_INT64;
    pNew->base.pVtab = (sqlite3_vtab*)pTab;
    rc = sqlite3Fts5ExprPhraseExpr(pConf, pCsr->pExpr, iPhrase, &pNew->pExpr);
  }

  if( rc==SQLITE_OK ){
    for(rc = fts5CursorFirst(pTab, pNew, 0);
        rc==SQLITE_OK && CsrFlagTest(pNew, FTS5CSR_EOF)==0;
        rc = fts5NextMethod((sqlite3_vtab_cursor*)pNew)
    ){







|







1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
  rc = fts5OpenMethod(pCsr->base.pVtab, (sqlite3_vtab_cursor**)&pNew);
  if( rc==SQLITE_OK ){
    Fts5Config *pConf = pTab->pConfig;
    pNew->ePlan = FTS5_PLAN_MATCH;
    pNew->iFirstRowid = SMALLEST_INT64;
    pNew->iLastRowid = LARGEST_INT64;
    pNew->base.pVtab = (sqlite3_vtab*)pTab;
    rc = sqlite3Fts5ExprClonePhrase(pConf, pCsr->pExpr, iPhrase, &pNew->pExpr);
  }

  if( rc==SQLITE_OK ){
    for(rc = fts5CursorFirst(pTab, pNew, 0);
        rc==SQLITE_OK && CsrFlagTest(pNew, FTS5CSR_EOF)==0;
        rc = fts5NextMethod((sqlite3_vtab_cursor*)pNew)
    ){
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
  pGlobal = (Fts5Global*)sqlite3_malloc(sizeof(Fts5Global));
  if( pGlobal==0 ){
    rc = SQLITE_NOMEM;
  }else{
    void *p = (void*)pGlobal;
    memset(pGlobal, 0, sizeof(Fts5Global));
    pGlobal->db = db;
    pGlobal->api.iVersion = 1;
    pGlobal->api.xCreateFunction = fts5CreateAux;
    pGlobal->api.xCreateTokenizer = fts5CreateTokenizer;
    pGlobal->api.xFindTokenizer = fts5FindTokenizer;
    rc = sqlite3_create_module_v2(db, "fts5", &fts5Mod, p, fts5ModuleDestroy);
    if( rc==SQLITE_OK ) rc = sqlite3Fts5IndexInit(db);
    if( rc==SQLITE_OK ) rc = sqlite3Fts5ExprInit(pGlobal, db);
    if( rc==SQLITE_OK ) rc = sqlite3Fts5AuxInit(&pGlobal->api);







|







2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
  pGlobal = (Fts5Global*)sqlite3_malloc(sizeof(Fts5Global));
  if( pGlobal==0 ){
    rc = SQLITE_NOMEM;
  }else{
    void *p = (void*)pGlobal;
    memset(pGlobal, 0, sizeof(Fts5Global));
    pGlobal->db = db;
    pGlobal->api.iVersion = 2;
    pGlobal->api.xCreateFunction = fts5CreateAux;
    pGlobal->api.xCreateTokenizer = fts5CreateTokenizer;
    pGlobal->api.xFindTokenizer = fts5FindTokenizer;
    rc = sqlite3_create_module_v2(db, "fts5", &fts5Mod, p, fts5ModuleDestroy);
    if( rc==SQLITE_OK ) rc = sqlite3Fts5IndexInit(db);
    if( rc==SQLITE_OK ) rc = sqlite3Fts5ExprInit(pGlobal, db);
    if( rc==SQLITE_OK ) rc = sqlite3Fts5AuxInit(&pGlobal->api);
Changes to ext/fts5/fts5_storage.c.
355
356
357
358
359
360
361

362
363
364
365
366
367
368

369

370
371
372
373
374
375
376
377
};

/*
** Tokenization callback used when inserting tokens into the FTS index.
*/
static int fts5StorageInsertCallback(
  void *pContext,                 /* Pointer to Fts5InsertCtx object */

  const char *pToken,             /* Buffer containing token */
  int nToken,                     /* Size of token in bytes */
  int iStart,                     /* Start offset of token */
  int iEnd                        /* End offset of token */
){
  Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext;
  Fts5Index *pIdx = pCtx->pStorage->pIndex;

  int iPos = pCtx->szCol++;

  return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, iPos, pToken, nToken);
}

/*
** If a row with rowid iDel is present in the %_content table, add the
** delete-markers to the FTS index necessary to delete it. Do not actually
** remove the %_content row at this time though.
*/







>







>
|
>
|







355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
};

/*
** Tokenization callback used when inserting tokens into the FTS index.
*/
static int fts5StorageInsertCallback(
  void *pContext,                 /* Pointer to Fts5InsertCtx object */
  int tflags,
  const char *pToken,             /* Buffer containing token */
  int nToken,                     /* Size of token in bytes */
  int iStart,                     /* Start offset of token */
  int iEnd                        /* End offset of token */
){
  Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext;
  Fts5Index *pIdx = pCtx->pStorage->pIndex;
  if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){
    pCtx->szCol++;
  }
  return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, pCtx->szCol-1, pToken, nToken);
}

/*
** If a row with rowid iDel is present in the %_content table, add the
** delete-markers to the FTS index necessary to delete it. Do not actually
** remove the %_content row at this time though.
*/
390
391
392
393
394
395
396

397
398
399
400
401
402
403
      ctx.pStorage = p;
      ctx.iCol = -1;
      rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel);
      for(iCol=1; rc==SQLITE_OK && iCol<=pConfig->nCol; iCol++){
        if( pConfig->abUnindexed[iCol-1] ) continue;
        ctx.szCol = 0;
        rc = sqlite3Fts5Tokenize(pConfig, 

            (const char*)sqlite3_column_text(pSeek, iCol),
            sqlite3_column_bytes(pSeek, iCol),
            (void*)&ctx,
            fts5StorageInsertCallback
        );
        p->aTotalSize[iCol-1] -= (i64)ctx.szCol;
      }







>







393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
      ctx.pStorage = p;
      ctx.iCol = -1;
      rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel);
      for(iCol=1; rc==SQLITE_OK && iCol<=pConfig->nCol; iCol++){
        if( pConfig->abUnindexed[iCol-1] ) continue;
        ctx.szCol = 0;
        rc = sqlite3Fts5Tokenize(pConfig, 
            FTS5_TOKENIZE_DOCUMENT,
            (const char*)sqlite3_column_text(pSeek, iCol),
            sqlite3_column_bytes(pSeek, iCol),
            (void*)&ctx,
            fts5StorageInsertCallback
        );
        p->aTotalSize[iCol-1] -= (i64)ctx.szCol;
      }
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
**
** Return SQLITE_OK if successful, or an SQLite error code if an error
** occurs.
*/
static int fts5StorageLoadTotals(Fts5Storage *p, int bCache){
  int rc = SQLITE_OK;
  if( p->bTotalsValid==0 ){
    int nCol = p->pConfig->nCol;
    Fts5Buffer buf;
    memset(&buf, 0, sizeof(buf));

    memset(p->aTotalSize, 0, sizeof(i64) * nCol);
    p->nTotalRow = 0;
    rc = sqlite3Fts5IndexGetAverages(p->pIndex, &buf);
    if( rc==SQLITE_OK && buf.n ){
      int i = 0;
      int iCol;
      i += fts5GetVarint(&buf.p[i], (u64*)&p->nTotalRow);
      for(iCol=0; i<buf.n && iCol<nCol; iCol++){
        i += fts5GetVarint(&buf.p[i], (u64*)&p->aTotalSize[iCol]);
      }
    }
    sqlite3_free(buf.p);
    p->bTotalsValid = bCache;
  }
  return rc;
}

/*
** Store the current contents of the p->nTotalRow and p->aTotalSize[] 







<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<







451
452
453
454
455
456
457






458









459
460
461
462
463
464
465
**
** Return SQLITE_OK if successful, or an SQLite error code if an error
** occurs.
*/
static int fts5StorageLoadTotals(Fts5Storage *p, int bCache){
  int rc = SQLITE_OK;
  if( p->bTotalsValid==0 ){






    rc = sqlite3Fts5IndexGetAverages(p->pIndex, &p->nTotalRow, p->aTotalSize);









    p->bTotalsValid = bCache;
  }
  return rc;
}

/*
** Store the current contents of the p->nTotalRow and p->aTotalSize[] 
561
562
563
564
565
566
567

568
569
570
571
572
573
574
    ctx.iCol = -1;

    rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel);
    for(iCol=0; rc==SQLITE_OK && iCol<pConfig->nCol; iCol++){
      if( pConfig->abUnindexed[iCol] ) continue;
      ctx.szCol = 0;
      rc = sqlite3Fts5Tokenize(pConfig, 

        (const char*)sqlite3_value_text(apVal[iCol]),
        sqlite3_value_bytes(apVal[iCol]),
        (void*)&ctx,
        fts5StorageInsertCallback
      );
      p->aTotalSize[iCol] -= (i64)ctx.szCol;
    }







>







550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
    ctx.iCol = -1;

    rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel);
    for(iCol=0; rc==SQLITE_OK && iCol<pConfig->nCol; iCol++){
      if( pConfig->abUnindexed[iCol] ) continue;
      ctx.szCol = 0;
      rc = sqlite3Fts5Tokenize(pConfig, 
        FTS5_TOKENIZE_DOCUMENT,
        (const char*)sqlite3_value_text(apVal[iCol]),
        sqlite3_value_bytes(apVal[iCol]),
        (void*)&ctx,
        fts5StorageInsertCallback
      );
      p->aTotalSize[iCol] -= (i64)ctx.szCol;
    }
650
651
652
653
654
655
656

657
658
659
660
661
662
663

    sqlite3Fts5BufferZero(&buf);
    rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iRowid);
    for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){
      ctx.szCol = 0;
      if( pConfig->abUnindexed[ctx.iCol]==0 ){
        rc = sqlite3Fts5Tokenize(pConfig, 

            (const char*)sqlite3_column_text(pScan, ctx.iCol+1),
            sqlite3_column_bytes(pScan, ctx.iCol+1),
            (void*)&ctx,
            fts5StorageInsertCallback
        );
      }
      sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol);







>







640
641
642
643
644
645
646
647
648
649
650
651
652
653
654

    sqlite3Fts5BufferZero(&buf);
    rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iRowid);
    for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){
      ctx.szCol = 0;
      if( pConfig->abUnindexed[ctx.iCol]==0 ){
        rc = sqlite3Fts5Tokenize(pConfig, 
            FTS5_TOKENIZE_DOCUMENT,
            (const char*)sqlite3_column_text(pScan, ctx.iCol+1),
            sqlite3_column_bytes(pScan, ctx.iCol+1),
            (void*)&ctx,
            fts5StorageInsertCallback
        );
      }
      sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol);
767
768
769
770
771
772
773

774
775
776
777
778
779
780
    rc = sqlite3Fts5IndexBeginWrite(p->pIndex, *piRowid);
    ctx.pStorage = p;
  }
  for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){
    ctx.szCol = 0;
    if( pConfig->abUnindexed[ctx.iCol]==0 ){
      rc = sqlite3Fts5Tokenize(pConfig, 

          (const char*)sqlite3_value_text(apVal[ctx.iCol+2]),
          sqlite3_value_bytes(apVal[ctx.iCol+2]),
          (void*)&ctx,
          fts5StorageInsertCallback
      );
    }
    sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol);







>







758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
    rc = sqlite3Fts5IndexBeginWrite(p->pIndex, *piRowid);
    ctx.pStorage = p;
  }
  for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){
    ctx.szCol = 0;
    if( pConfig->abUnindexed[ctx.iCol]==0 ){
      rc = sqlite3Fts5Tokenize(pConfig, 
          FTS5_TOKENIZE_DOCUMENT,
          (const char*)sqlite3_value_text(apVal[ctx.iCol+2]),
          sqlite3_value_bytes(apVal[ctx.iCol+2]),
          (void*)&ctx,
          fts5StorageInsertCallback
      );
    }
    sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol);
834
835
836
837
838
839
840

841
842
843
844
845
846

847

848
849
850
851
852
853
854
855
856
};

/*
** Tokenization callback used by integrity check.
*/
static int fts5StorageIntegrityCallback(
  void *pContext,                 /* Pointer to Fts5InsertCtx object */

  const char *pToken,             /* Buffer containing token */
  int nToken,                     /* Size of token in bytes */
  int iStart,                     /* Start offset of token */
  int iEnd                        /* End offset of token */
){
  Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext;

  int iPos = pCtx->szCol++;

  pCtx->cksum ^= sqlite3Fts5IndexCksum(
      pCtx->pConfig, pCtx->iRowid, pCtx->iCol, iPos, pToken, nToken
  );
  return SQLITE_OK;
}

/*
** Check that the contents of the FTS index match that of the %_content
** table. Return SQLITE_OK if they do, or SQLITE_CORRUPT if not. Return







>






>
|
>

|







826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
};

/*
** Tokenization callback used by integrity check.
*/
static int fts5StorageIntegrityCallback(
  void *pContext,                 /* Pointer to Fts5InsertCtx object */
  int tflags,
  const char *pToken,             /* Buffer containing token */
  int nToken,                     /* Size of token in bytes */
  int iStart,                     /* Start offset of token */
  int iEnd                        /* End offset of token */
){
  Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext;
  if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){
    pCtx->szCol++;
  }
  pCtx->cksum ^= sqlite3Fts5IndexCksum(
      pCtx->pConfig, pCtx->iRowid, pCtx->iCol, pCtx->szCol-1, pToken, nToken
  );
  return SQLITE_OK;
}

/*
** Check that the contents of the FTS index match that of the %_content
** table. Return SQLITE_OK if they do, or SQLITE_CORRUPT if not. Return
877
878
879
880
881
882
883

884

885
886
887
888
889
890
891
892
893
894
895

896

897
898
899
900
901
902
903
  rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN, &pScan, 0);
  if( rc==SQLITE_OK ){
    int rc2;
    while( SQLITE_ROW==sqlite3_step(pScan) ){
      int i;
      ctx.iRowid = sqlite3_column_int64(pScan, 0);
      ctx.szCol = 0;

      rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize);

      for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
        if( pConfig->abUnindexed[i] ) continue;
        ctx.iCol = i;
        ctx.szCol = 0;
        rc = sqlite3Fts5Tokenize(
            pConfig, 
            (const char*)sqlite3_column_text(pScan, i+1),
            sqlite3_column_bytes(pScan, i+1),
            (void*)&ctx,
            fts5StorageIntegrityCallback
        );

        if( ctx.szCol!=aColSize[i] ) rc = FTS5_CORRUPT;

        aTotalSize[i] += ctx.szCol;
      }
      if( rc!=SQLITE_OK ) break;
    }
    rc2 = sqlite3_reset(pScan);
    if( rc==SQLITE_OK ) rc = rc2;
  }







>
|
>




|
|





>
|
>







872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
  rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN, &pScan, 0);
  if( rc==SQLITE_OK ){
    int rc2;
    while( SQLITE_ROW==sqlite3_step(pScan) ){
      int i;
      ctx.iRowid = sqlite3_column_int64(pScan, 0);
      ctx.szCol = 0;
      if( pConfig->bColumnsize ){
        rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize);
      }
      for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
        if( pConfig->abUnindexed[i] ) continue;
        ctx.iCol = i;
        ctx.szCol = 0;
        rc = sqlite3Fts5Tokenize(pConfig, 
            FTS5_TOKENIZE_DOCUMENT,
            (const char*)sqlite3_column_text(pScan, i+1),
            sqlite3_column_bytes(pScan, i+1),
            (void*)&ctx,
            fts5StorageIntegrityCallback
        );
        if( pConfig->bColumnsize && ctx.szCol!=aColSize[i] ){
          rc = FTS5_CORRUPT;
        }
        aTotalSize[i] += ctx.szCol;
      }
      if( rc!=SQLITE_OK ) break;
    }
    rc2 = sqlite3_reset(pScan);
    if( rc==SQLITE_OK ) rc = rc2;
  }
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
  /* Check that the %_docsize and %_content tables contain the expected
  ** number of rows.  */
  if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){
    i64 nRow;
    rc = fts5StorageCount(p, "content", &nRow);
    if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT;
  }
  if( rc==SQLITE_OK ){
    i64 nRow;
    rc = fts5StorageCount(p, "docsize", &nRow);
    if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT;
  }

  /* Pass the expected checksum down to the FTS index module. It will
  ** verify, amongst other things, that it matches the checksum generated by







|







913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
  /* Check that the %_docsize and %_content tables contain the expected
  ** number of rows.  */
  if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){
    i64 nRow;
    rc = fts5StorageCount(p, "content", &nRow);
    if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT;
  }
  if( rc==SQLITE_OK && pConfig->bColumnsize ){
    i64 nRow;
    rc = fts5StorageCount(p, "docsize", &nRow);
    if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT;
  }

  /* Pass the expected checksum down to the FTS index module. It will
  ** verify, amongst other things, that it matches the checksum generated by
998
999
1000
1001
1002
1003
1004
1005
1006



1007
1008
1009
1010
1011
1012
1013
1014
** each table column. This function reads the %_docsize record for the
** specified rowid and populates aCol[] with the results.
**
** An SQLite error code is returned if an error occurs, or SQLITE_OK
** otherwise.
*/
int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol){
  int nCol = p->pConfig->nCol;
  sqlite3_stmt *pLookup = 0;



  int rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP_DOCSIZE, &pLookup, 0);
  if( rc==SQLITE_OK ){
    int bCorrupt = 1;
    sqlite3_bind_int64(pLookup, 1, iRowid);
    if( SQLITE_ROW==sqlite3_step(pLookup) ){
      const u8 *aBlob = sqlite3_column_blob(pLookup, 0);
      int nBlob = sqlite3_column_bytes(pLookup, 0);
      if( 0==fts5StorageDecodeSizeArray(aCol, nCol, aBlob, nBlob) ){







|
|
>
>
>
|







997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
** each table column. This function reads the %_docsize record for the
** specified rowid and populates aCol[] with the results.
**
** An SQLite error code is returned if an error occurs, or SQLITE_OK
** otherwise.
*/
int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol){
  int nCol = p->pConfig->nCol;    /* Number of user columns in table */
  sqlite3_stmt *pLookup = 0;      /* Statement to query %_docsize */
  int rc;                         /* Return Code */

  assert( p->pConfig->bColumnsize );
  rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP_DOCSIZE, &pLookup, 0);
  if( rc==SQLITE_OK ){
    int bCorrupt = 1;
    sqlite3_bind_int64(pLookup, 1, iRowid);
    if( SQLITE_ROW==sqlite3_step(pLookup) ){
      const u8 *aBlob = sqlite3_column_blob(pLookup, 0);
      int nBlob = sqlite3_column_bytes(pLookup, 0);
      if( 0==fts5StorageDecodeSizeArray(aCol, nCol, aBlob, nBlob) ){
Changes to ext/fts5/fts5_tcl.c.
137
138
139
140
141
142
143

144
145
146
147
148
149
150
typedef struct F5tAuxData F5tAuxData;
struct F5tAuxData {
  Tcl_Obj *pObj;
};

static int xTokenizeCb(
  void *pCtx, 

  const char *zToken, int nToken, 
  int iStart, int iEnd
){
  F5tFunction *p = (F5tFunction*)pCtx;
  Tcl_Obj *pEval = Tcl_DuplicateObj(p->pScript);
  int rc;








>







137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
typedef struct F5tAuxData F5tAuxData;
struct F5tAuxData {
  Tcl_Obj *pObj;
};

static int xTokenizeCb(
  void *pCtx, 
  int tflags,
  const char *zToken, int nToken, 
  int iStart, int iEnd
){
  F5tFunction *p = (F5tFunction*)pCtx;
  Tcl_Obj *pEval = Tcl_DuplicateObj(p->pScript);
  int rc;

580
581
582
583
584
585
586

587
588
589
590
591
592
593
  Tcl_Obj *pRet;
  int bSubst;
  const char *zInput;
};

static int xTokenizeCb2(
  void *pCtx, 

  const char *zToken, int nToken, 
  int iStart, int iEnd
){
  F5tTokenizeCtx *p = (F5tTokenizeCtx*)pCtx;
  if( p->bSubst ){
    Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewStringObj(zToken, nToken));
    Tcl_ListObjAppendElement(







>







581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
  Tcl_Obj *pRet;
  int bSubst;
  const char *zInput;
};

static int xTokenizeCb2(
  void *pCtx, 
  int tflags,
  const char *zToken, int nToken, 
  int iStart, int iEnd
){
  F5tTokenizeCtx *p = (F5tTokenizeCtx*)pCtx;
  if( p->bSubst ){
    Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewStringObj(zToken, nToken));
    Tcl_ListObjAppendElement(
662
663
664
665
666
667
668
669


670
671
672
673
674
675
676
  }

  pRet = Tcl_NewObj();
  Tcl_IncrRefCount(pRet);
  ctx.bSubst = (objc==5);
  ctx.pRet = pRet;
  ctx.zInput = zText;
  rc = tokenizer.xTokenize(pTok, (void*)&ctx, zText, nText, xTokenizeCb2);


  tokenizer.xDelete(pTok);
  if( rc!=SQLITE_OK ){
    Tcl_AppendResult(interp, "error in tokenizer.xTokenize()", 0);
    Tcl_DecrRefCount(pRet);
    return TCL_ERROR;
  }








|
>
>







664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
  }

  pRet = Tcl_NewObj();
  Tcl_IncrRefCount(pRet);
  ctx.bSubst = (objc==5);
  ctx.pRet = pRet;
  ctx.zInput = zText;
  rc = tokenizer.xTokenize(
      pTok, (void*)&ctx, FTS5_TOKENIZE_DOCUMENT, zText, nText, xTokenizeCb2
  );
  tokenizer.xDelete(pTok);
  if( rc!=SQLITE_OK ){
    Tcl_AppendResult(interp, "error in tokenizer.xTokenize()", 0);
    Tcl_DecrRefCount(pRet);
    return TCL_ERROR;
  }

684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699






700
701
702
703
704
705
706
/*************************************************************************
** Start of tokenizer wrapper.
*/

typedef struct F5tTokenizerContext F5tTokenizerContext;
typedef struct F5tTokenizerCb F5tTokenizerCb;
typedef struct F5tTokenizerModule F5tTokenizerModule;
typedef struct F5tTokenizerModule F5tTokenizerInstance;

struct F5tTokenizerContext {
  void *pCtx;
  int (*xToken)(void*, const char*, int, int, int);
};

struct F5tTokenizerModule {
  Tcl_Interp *interp;






  Tcl_Obj *pScript;
  F5tTokenizerContext *pContext;
};

static int f5tTokenizerCreate(
  void *pCtx, 
  const char **azArg, 







|



|




>
>
>
>
>
>







688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
/*************************************************************************
** Start of tokenizer wrapper.
*/

typedef struct F5tTokenizerContext F5tTokenizerContext;
typedef struct F5tTokenizerCb F5tTokenizerCb;
typedef struct F5tTokenizerModule F5tTokenizerModule;
typedef struct F5tTokenizerInstance F5tTokenizerInstance;

struct F5tTokenizerContext {
  void *pCtx;
  int (*xToken)(void*, int, const char*, int, int, int);
};

struct F5tTokenizerModule {
  Tcl_Interp *interp;
  Tcl_Obj *pScript;
  F5tTokenizerContext *pContext;
};

struct F5tTokenizerInstance {
  Tcl_Interp *interp;
  Tcl_Obj *pScript;
  F5tTokenizerContext *pContext;
};

static int f5tTokenizerCreate(
  void *pCtx, 
  const char **azArg, 
744
745
746
747
748
749
750

751
752
753
754
755
756
757
758

759
760
761
762









763
764



















765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790

791
792
793





794


795


796




797
798

799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
  Tcl_DecrRefCount(pInst->pScript);
  ckfree((char *)pInst);
}

static int f5tTokenizerTokenize(
  Fts5Tokenizer *p, 
  void *pCtx,

  const char *pText, int nText, 
  int (*xToken)(void*, const char*, int, int, int)
){
  F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p;
  void *pOldCtx;
  int (*xOldToken)(void*, const char*, int, int, int);
  Tcl_Obj *pEval;
  int rc;


  pOldCtx = pInst->pContext->pCtx;
  xOldToken = pInst->pContext->xToken;










  pEval = Tcl_DuplicateObj(pInst->pScript);
  Tcl_IncrRefCount(pEval);



















  rc = Tcl_ListObjAppendElement(
      pInst->interp, pEval, Tcl_NewStringObj(pText, nText)
  );
  if( rc==TCL_OK ){
    rc = Tcl_EvalObjEx(pInst->interp, pEval, TCL_GLOBAL_ONLY);
  }
  Tcl_DecrRefCount(pEval);

  pInst->pContext->pCtx = pOldCtx;
  pInst->pContext->xToken = xOldToken;
  return rc;
}

/*
** sqlite3_fts5_token TEXT START END POS
*/
static int f5tTokenizerReturn(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  F5tTokenizerContext *p = (F5tTokenizerContext*)clientData;
  int iStart;
  int iEnd;
  int nToken;

  char *zToken;
  int rc;






  assert( p );


  if( objc!=4 ){


    Tcl_WrongNumArgs(interp, 1, objv, "TEXT START END");




    return TCL_ERROR;
  }

  if( p->xToken==0 ){
    Tcl_AppendResult(interp, 
        "sqlite3_fts5_token may only be used by tokenizer callback", 0
    );
    return TCL_ERROR;
  }

  zToken = Tcl_GetStringFromObj(objv[1], &nToken);
  if( Tcl_GetIntFromObj(interp, objv[2], &iStart) 
   || Tcl_GetIntFromObj(interp, objv[3], &iEnd) 
  ){
    return TCL_ERROR;
  }

  rc = p->xToken(p->pCtx, zToken, nToken, iStart, iEnd);
  Tcl_SetResult(interp, (char*)sqlite3ErrName(rc), TCL_VOLATILE);
  return TCL_OK;
}

static void f5tDelTokenizer(void *pCtx){
  F5tTokenizerModule *pMod = (F5tTokenizerModule*)pCtx;
  Tcl_DecrRefCount(pMod->pScript);
  ckfree((char *)pMod);
}







>

|



|


>




>
>
>
>
>
>
>
>
>


>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
<
<
|
<








|











>



>
>
>
>
>
|
>
>
|
>
>
|
>
>
>
>


>







|
<
|
<
|
|
|
<
|
|







754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806


807

808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858

859

860
861
862

863
864
865
866
867
868
869
870
871
  Tcl_DecrRefCount(pInst->pScript);
  ckfree((char *)pInst);
}

static int f5tTokenizerTokenize(
  Fts5Tokenizer *p, 
  void *pCtx,
  int flags,
  const char *pText, int nText, 
  int (*xToken)(void*, int, const char*, int, int, int)
){
  F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p;
  void *pOldCtx;
  int (*xOldToken)(void*, int, const char*, int, int, int);
  Tcl_Obj *pEval;
  int rc;
  const char *zFlags;

  pOldCtx = pInst->pContext->pCtx;
  xOldToken = pInst->pContext->xToken;

  pInst->pContext->pCtx = pCtx;
  pInst->pContext->xToken = xToken;

  assert( 
      flags==FTS5_TOKENIZE_DOCUMENT
   || flags==FTS5_TOKENIZE_AUX
   || flags==FTS5_TOKENIZE_QUERY
   || flags==(FTS5_TOKENIZE_QUERY | FTS5_TOKENIZE_PREFIX)
  );
  pEval = Tcl_DuplicateObj(pInst->pScript);
  Tcl_IncrRefCount(pEval);
  switch( flags ){
    case FTS5_TOKENIZE_DOCUMENT:
      zFlags = "document";
      break;
    case FTS5_TOKENIZE_AUX:
      zFlags = "aux";
      break;
    case FTS5_TOKENIZE_QUERY:
      zFlags = "query";
      break;
    case (FTS5_TOKENIZE_PREFIX | FTS5_TOKENIZE_QUERY):
      zFlags = "prefixquery";
      break;
    default:
      assert( 0 );
      zFlags = "invalid";
      break;
  }

  Tcl_ListObjAppendElement(pInst->interp, pEval, Tcl_NewStringObj(zFlags, -1));
  Tcl_ListObjAppendElement(pInst->interp, pEval, Tcl_NewStringObj(pText,nText));


  rc = Tcl_EvalObjEx(pInst->interp, pEval, TCL_GLOBAL_ONLY);

  Tcl_DecrRefCount(pEval);

  pInst->pContext->pCtx = pOldCtx;
  pInst->pContext->xToken = xOldToken;
  return rc;
}

/*
** sqlite3_fts5_token ?-colocated? TEXT START END
*/
static int f5tTokenizerReturn(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  F5tTokenizerContext *p = (F5tTokenizerContext*)clientData;
  int iStart;
  int iEnd;
  int nToken;
  int tflags = 0;
  char *zToken;
  int rc;

  if( objc==5 ){
    int nArg;
    char *zArg = Tcl_GetStringFromObj(objv[1], &nArg);
    if( nArg<=10 && nArg>=2 && memcmp("-colocated", zArg, nArg)==0 ){
      tflags |= FTS5_TOKEN_COLOCATED;
    }else{
      goto usage;
    }
  }else if( objc!=4 ){
    goto usage;
  }

  zToken = Tcl_GetStringFromObj(objv[objc-3], &nToken);
  if( Tcl_GetIntFromObj(interp, objv[objc-2], &iStart) 
   || Tcl_GetIntFromObj(interp, objv[objc-1], &iEnd) 
  ){
    return TCL_ERROR;
  }

  if( p->xToken==0 ){
    Tcl_AppendResult(interp, 
        "sqlite3_fts5_token may only be used by tokenizer callback", 0
    );
    return TCL_ERROR;
  }

  rc = p->xToken(p->pCtx, tflags, zToken, nToken, iStart, iEnd);

  Tcl_SetResult(interp, (char*)sqlite3ErrName(rc), TCL_VOLATILE);

  return TCL_OK;

 usage:

  Tcl_WrongNumArgs(interp, 1, objv, "?-colocated? TEXT START END");
  return TCL_ERROR;
}

static void f5tDelTokenizer(void *pCtx){
  F5tTokenizerModule *pMod = (F5tTokenizerModule*)pCtx;
  Tcl_DecrRefCount(pMod->pScript);
  ckfree((char *)pMod);
}
Changes to ext/fts5/fts5_test_mi.c.
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365



366
367
368
369


370

371
372
373
374
375
376
377
  Fts5Context *pFts,              /* First arg to pass to pApi functions */
  sqlite3_context *pCtx,          /* Context for returning result/error */
  int nVal,                       /* Number of values in apVal[] array */
  sqlite3_value **apVal           /* Array of trailing arguments */
){
  const char *zArg;
  Fts5MatchinfoCtx *p;
  int rc;

  if( nVal>0 ){
    zArg = (const char*)sqlite3_value_text(apVal[0]);
  }else{
    zArg = "pcx";
  }

  p = (Fts5MatchinfoCtx*)pApi->xGetAuxdata(pFts, 0);
  if( p==0 || sqlite3_stricmp(zArg, p->zArg) ){
    p = fts5MatchinfoNew(pApi, pFts, pCtx, zArg);



    pApi->xSetAuxdata(pFts, p, sqlite3_free);
    if( p==0 ) return;
  }



  rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoLocalCb);

  if( rc!=SQLITE_OK ){
    sqlite3_result_error_code(pCtx, rc);
  }else{
    /* No errors has occured, so return a copy of the array of integers. */
    int nByte = p->nRet * sizeof(u32);
    sqlite3_result_blob(pCtx, (void*)p->aRet, nByte, SQLITE_TRANSIENT);
  }







|










>
>
>
|
<
|
|
>
>
|
>







348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369

370
371
372
373
374
375
376
377
378
379
380
381
382
  Fts5Context *pFts,              /* First arg to pass to pApi functions */
  sqlite3_context *pCtx,          /* Context for returning result/error */
  int nVal,                       /* Number of values in apVal[] array */
  sqlite3_value **apVal           /* Array of trailing arguments */
){
  const char *zArg;
  Fts5MatchinfoCtx *p;
  int rc = SQLITE_OK;

  if( nVal>0 ){
    zArg = (const char*)sqlite3_value_text(apVal[0]);
  }else{
    zArg = "pcx";
  }

  p = (Fts5MatchinfoCtx*)pApi->xGetAuxdata(pFts, 0);
  if( p==0 || sqlite3_stricmp(zArg, p->zArg) ){
    p = fts5MatchinfoNew(pApi, pFts, pCtx, zArg);
    if( p==0 ){
      rc = SQLITE_NOMEM;
    }else{
      rc = pApi->xSetAuxdata(pFts, p, sqlite3_free);

    }
  }

  if( rc==SQLITE_OK ){
    rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoLocalCb);
  }
  if( rc!=SQLITE_OK ){
    sqlite3_result_error_code(pCtx, rc);
  }else{
    /* No errors has occured, so return a copy of the array of integers. */
    int nByte = p->nRet * sizeof(u32);
    sqlite3_result_blob(pCtx, (void*)p->aRet, nByte, SQLITE_TRANSIENT);
  }
Changes to ext/fts5/fts5_tokenize.c.
112
113
114
115
116
117
118

119
120
121
122
123
124
125
126
127

/*
** Tokenize some text using the ascii tokenizer.
*/
static int fts5AsciiTokenize(
  Fts5Tokenizer *pTokenizer,
  void *pCtx,

  const char *pText, int nText,
  int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd)
){
  AsciiTokenizer *p = (AsciiTokenizer*)pTokenizer;
  int rc = SQLITE_OK;
  int ie;
  int is = 0;

  char aFold[64];







>

|







112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128

/*
** Tokenize some text using the ascii tokenizer.
*/
static int fts5AsciiTokenize(
  Fts5Tokenizer *pTokenizer,
  void *pCtx,
  int flags,
  const char *pText, int nText,
  int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd)
){
  AsciiTokenizer *p = (AsciiTokenizer*)pTokenizer;
  int rc = SQLITE_OK;
  int ie;
  int is = 0;

  char aFold[64];
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
        break;
      }
      nFold = nByte*2;
    }
    asciiFold(pFold, &pText[is], nByte);

    /* Invoke the token callback */
    rc = xToken(pCtx, pFold, nByte, is, ie);
    is = ie+1;
  }
  
  if( pFold!=aFold ) sqlite3_free(pFold);
  if( rc==SQLITE_DONE ) rc = SQLITE_OK;
  return rc;
}







|







155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
        break;
      }
      nFold = nByte*2;
    }
    asciiFold(pFold, &pText[is], nByte);

    /* Invoke the token callback */
    rc = xToken(pCtx, 0, pFold, nByte, is, ie);
    is = ie+1;
  }
  
  if( pFold!=aFold ) sqlite3_free(pFold);
  if( rc==SQLITE_DONE ) rc = SQLITE_OK;
  return rc;
}
381
382
383
384
385
386
387

388
389
390
391
392
393
394
395
396
  assert( (sqlite3Fts5UnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 );
  return sqlite3Fts5UnicodeIsalnum(iCode) ^ fts5UnicodeIsException(p, iCode);
}

static int fts5UnicodeTokenize(
  Fts5Tokenizer *pTokenizer,
  void *pCtx,

  const char *pText, int nText,
  int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd)
){
  Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTokenizer;
  int rc = SQLITE_OK;
  unsigned char *a = p->aTokenChar;

  unsigned char *zTerm = (unsigned char*)&pText[nText];
  unsigned char *zCsr = (unsigned char *)pText;







>

|







382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
  assert( (sqlite3Fts5UnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 );
  return sqlite3Fts5UnicodeIsalnum(iCode) ^ fts5UnicodeIsException(p, iCode);
}

static int fts5UnicodeTokenize(
  Fts5Tokenizer *pTokenizer,
  void *pCtx,
  int flags,
  const char *pText, int nText,
  int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd)
){
  Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTokenizer;
  int rc = SQLITE_OK;
  unsigned char *a = p->aTokenChar;

  unsigned char *zTerm = (unsigned char*)&pText[nText];
  unsigned char *zCsr = (unsigned char *)pText;
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
        }
        zCsr++;
      }
      ie = zCsr - (unsigned char*)pText;
    }

    /* Invoke the token callback */
    rc = xToken(pCtx, aFold, zOut-aFold, is, ie);
  }
  
 tokenize_done:
  if( rc==SQLITE_DONE ) rc = SQLITE_OK;
  return rc;
}








|







473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
        }
        zCsr++;
      }
      ie = zCsr - (unsigned char*)pText;
    }

    /* Invoke the token callback */
    rc = xToken(pCtx, 0, aFold, zOut-aFold, is, ie); 
  }
  
 tokenize_done:
  if( rc==SQLITE_DONE ) rc = SQLITE_OK;
  return rc;
}

549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
  *ppOut = (Fts5Tokenizer*)pRet;
  return rc;
}

typedef struct PorterContext PorterContext;
struct PorterContext {
  void *pCtx;
  int (*xToken)(void*, const char*, int, int, int);
  char *aBuf;
};

typedef struct PorterRule PorterRule;
struct PorterRule {
  const char *zSuffix;
  int nSuffix;







|







551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
  *ppOut = (Fts5Tokenizer*)pRet;
  return rc;
}

typedef struct PorterContext PorterContext;
struct PorterContext {
  void *pCtx;
  int (*xToken)(void*, int, const char*, int, int, int);
  char *aBuf;
};

typedef struct PorterRule PorterRule;
struct PorterRule {
  const char *zSuffix;
  int nSuffix;
1114
1115
1116
1117
1118
1119
1120

1121
1122
1123
1124
1125
1126
1127
      *pnBuf = nBuf-1;
    }
  }
}

static int fts5PorterCb(
  void *pCtx, 

  const char *pToken, 
  int nToken, 
  int iStart, 
  int iEnd
){
  PorterContext *p = (PorterContext*)pCtx;








>







1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
      *pnBuf = nBuf-1;
    }
  }
}

static int fts5PorterCb(
  void *pCtx, 
  int tflags,
  const char *pToken, 
  int nToken, 
  int iStart, 
  int iEnd
){
  PorterContext *p = (PorterContext*)pCtx;

1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189

1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
  /* Step 5b. */
  if( nBuf>1 && aBuf[nBuf-1]=='l' 
   && aBuf[nBuf-2]=='l' && fts5Porter_MGt1(aBuf, nBuf-1) 
  ){
    nBuf--;
  }

  return p->xToken(p->pCtx, aBuf, nBuf, iStart, iEnd);

 pass_through:
  return p->xToken(p->pCtx, pToken, nToken, iStart, iEnd);
}

/*
** Tokenize using the porter tokenizer.
*/
static int fts5PorterTokenize(
  Fts5Tokenizer *pTokenizer,
  void *pCtx,

  const char *pText, int nText,
  int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd)
){
  PorterTokenizer *p = (PorterTokenizer*)pTokenizer;
  PorterContext sCtx;
  sCtx.xToken = xToken;
  sCtx.pCtx = pCtx;
  sCtx.aBuf = p->aBuf;
  return p->tokenizer.xTokenize(
      p->pTokenizer, (void*)&sCtx, pText, nText, fts5PorterCb
  );
}

/*
** Register all built-in tokenizers with FTS5.
*/
int sqlite3Fts5TokenizerInit(fts5_api *pApi){







|


|








>

|







|







1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
  /* Step 5b. */
  if( nBuf>1 && aBuf[nBuf-1]=='l' 
   && aBuf[nBuf-2]=='l' && fts5Porter_MGt1(aBuf, nBuf-1) 
  ){
    nBuf--;
  }

  return p->xToken(p->pCtx, tflags, aBuf, nBuf, iStart, iEnd);

 pass_through:
  return p->xToken(p->pCtx, tflags, pToken, nToken, iStart, iEnd);
}

/*
** Tokenize using the porter tokenizer.
*/
static int fts5PorterTokenize(
  Fts5Tokenizer *pTokenizer,
  void *pCtx,
  int flags,
  const char *pText, int nText,
  int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd)
){
  PorterTokenizer *p = (PorterTokenizer*)pTokenizer;
  PorterContext sCtx;
  sCtx.xToken = xToken;
  sCtx.pCtx = pCtx;
  sCtx.aBuf = p->aBuf;
  return p->tokenizer.xTokenize(
      p->pTokenizer, (void*)&sCtx, flags, pText, nText, fts5PorterCb
  );
}

/*
** Register all built-in tokenizers with FTS5.
*/
int sqlite3Fts5TokenizerInit(fts5_api *pApi){
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
        aBuiltin[i].zName,
        (void*)pApi,
        &aBuiltin[i].x,
        0
    );
  }

  return SQLITE_OK;
}









|



1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
        aBuiltin[i].zName,
        (void*)pApi,
        &aBuiltin[i].x,
        0
    );
  }

  return rc;
}


Changes to ext/fts5/test/fts5_common.tcl.
290
291
292
293
294
295
296

































297
proc OR {args} {
  sort_poslist [concat {*}$args]
}
proc NOT {a b} {
  if {[llength $b]>0} { return [list] }
  return $a
}









































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
proc OR {args} {
  sort_poslist [concat {*}$args]
}
proc NOT {a b} {
  if {[llength $b]>0} { return [list] }
  return $a
}

#-------------------------------------------------------------------------
# This command is similar to [split], except that it also provides the
# start and end offsets of each token. For example:
#
#   [fts5_tokenize_split "abc d ef"] -> {abc 0 3 d 4 5 ef 6 8}
#

proc gobble_whitespace {textvar} {
  upvar $textvar t
  regexp {([ ]*)(.*)} $t -> space t
  return [string length $space]
}

proc gobble_text {textvar wordvar} {
  upvar $textvar t
  upvar $wordvar w
  regexp {([^ ]*)(.*)} $t -> w t
  return [string length $w]
}

proc fts5_tokenize_split {text} {
  set token ""
  set ret [list]
  set iOff [gobble_whitespace text]
  while {[set nToken [gobble_text text word]]} {
    lappend ret $word $iOff [expr $iOff+$nToken]
    incr iOff $nToken
    incr iOff [gobble_whitespace text]
  }

  set ret
}

Changes to ext/fts5/test/fts5aa.test.
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
} {}

do_execsql_test 13.5 {
  SELECT rowid FROM t1 WHERE t1 MATCH 'o';
} {1}

do_execsql_test 13.6 {
  SELECT rowid FROM t1 WHERE t1 MATCH '.';
} {}

#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 14.1 {
  CREATE VIRTUAL TABLE t1 USING fts5(x, y);







|







339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
} {}

do_execsql_test 13.5 {
  SELECT rowid FROM t1 WHERE t1 MATCH 'o';
} {1}

do_execsql_test 13.6 {
  SELECT rowid FROM t1 WHERE t1 MATCH '""';
} {}

#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 14.1 {
  CREATE VIRTUAL TABLE t1 USING fts5(x, y);
501
502
503
504
505
506
507






























508
509
510
511
}
do_execsql_test 18.2 {
  SELECT t1.rowid, t2.rowid FROM t1, t2 WHERE t2 MATCH t1.a AND t1.rowid = t2.c
} {1 1}
do_execsql_test 18.3 {
  SELECT t1.rowid, t2.rowid FROM t2, t1 WHERE t2 MATCH t1.a AND t1.rowid = t2.c
} {1 1}































finish_test









>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>




501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
}
do_execsql_test 18.2 {
  SELECT t1.rowid, t2.rowid FROM t1, t2 WHERE t2 MATCH t1.a AND t1.rowid = t2.c
} {1 1}
do_execsql_test 18.3 {
  SELECT t1.rowid, t2.rowid FROM t2, t1 WHERE t2 MATCH t1.a AND t1.rowid = t2.c
} {1 1}

#--------------------------------------------------------------------
# fts5 table in the temp schema.
#
reset_db
do_execsql_test 19.0 {
  CREATE VIRTUAL TABLE temp.t1 USING fts5(x);
  INSERT INTO t1 VALUES('x y z');
  INSERT INTO t1 VALUES('w x 1');
  SELECT rowid FROM t1 WHERE t1 MATCH 'x';
} {1 2}

#--------------------------------------------------------------------
# Test that 6 and 7 byte varints can be read.
#
reset_db
do_execsql_test 20.0 {
  CREATE VIRTUAL TABLE temp.tmp USING fts5(x);
}
set ::ids [list \
  0 [expr 1<<36] [expr 2<<36] [expr 1<<43] [expr 2<<43]
]
do_test 20.1 {
  foreach id $::ids {
    execsql { INSERT INTO tmp(rowid, x) VALUES($id, 'x y z') }
  }
  execsql { SELECT rowid FROM tmp WHERE tmp MATCH 'y' }
} $::ids



finish_test


Changes to ext/fts5/test/fts5columnsize.test.
130
131
132
133
134
135
136













137
138
}
do_execsql_test 3.2.1 {
  SELECT rowid, fts5_test_columnsize(t4) FROM t4 WHERE t4 MATCH 'a'
} {
  1 {-1 0 -1} 2 {-1 0 -1}
}















finish_test







>
>
>
>
>
>
>
>
>
>
>
>
>


130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
}
do_execsql_test 3.2.1 {
  SELECT rowid, fts5_test_columnsize(t4) FROM t4 WHERE t4 MATCH 'a'
} {
  1 {-1 0 -1} 2 {-1 0 -1}
}

#-------------------------------------------------------------------------
# Test the integrity-check
#
do_execsql_test 4.1.1 {
  CREATE VIRTUAL TABLE t5 USING fts5(x, columnsize=0);
  INSERT INTO t5 VALUES('1 2 3 4');
  INSERT INTO t5 VALUES('2 4 6 8');
}

breakpoint
do_execsql_test 4.1.2 {
  INSERT INTO t5(t5) VALUES('integrity-check');
}

finish_test
Changes to ext/fts5/test/fts5ea.test.
83
84
85
86
87
88
89






90
91
92
93
#-------------------------------------------------------------------------
# Experiment with a tokenizer that considers " to be a token character.
#
do_execsql_test 4.0 {
  SELECT fts5_expr('a AND """"', 'x', 'tokenize="unicode61 tokenchars ''""''"');
} {{"a" AND """"}}










finish_test







>
>
>
>
>
>




83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#-------------------------------------------------------------------------
# Experiment with a tokenizer that considers " to be a token character.
#
do_execsql_test 4.0 {
  SELECT fts5_expr('a AND """"', 'x', 'tokenize="unicode61 tokenchars ''""''"');
} {{"a" AND """"}}

#-------------------------------------------------------------------------
# Experiment with a tokenizer that considers " to be a token character.
#
do_catchsql_test 5.0 {
  SELECT fts5_expr('abc | def');
} {1 {fts5: syntax error near "|"}}



finish_test
Changes to ext/fts5/test/fts5eb.test.
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51

proc do_syntax_test {tn expr res} {
  set ::se_expr $expr
  do_execsql_test $tn {SELECT fts5_expr($se_expr)} [list $res]
}

foreach {tn expr res} {
  1  {abc}                           {"abc"}
  2  {abc .}                         {"abc"}
  3  {.}                             {}
  4  {abc OR .}                      {"abc"}
  5  {abc NOT .}                     {"abc"}
  6  {abc AND .}                     {"abc"}
  7  {. OR abc}                      {"abc"}
  8  {. NOT abc}                     {"abc"}
  9  {. AND abc}                     {"abc"}
  10 {abc + . + def}                 {"abc" + "def"}
  11 {abc . def}                     {"abc" AND "def"}
  12 {r+e OR w}                      {"r" + "e" OR "w"}
} {
  do_execsql_test 1.$tn {SELECT fts5_expr($expr)} [list $res]
}

do_catchsql_test 2.1 {
  SELECT fts5_expr()
} {1 {wrong number of arguments to function fts5_expr}}







|
|
|
|
|
|
|
|
|
|
|
|







26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51

proc do_syntax_test {tn expr res} {
  set ::se_expr $expr
  do_execsql_test $tn {SELECT fts5_expr($se_expr)} [list $res]
}

foreach {tn expr res} {
  1  {abc}                            {"abc"}
  2  {abc ""}                         {"abc"}
  3  {""}                             {}
  4  {abc OR ""}                      {"abc"}
  5  {abc NOT ""}                     {"abc"}
  6  {abc AND ""}                     {"abc"}
  7  {"" OR abc}                      {"abc"}
  8  {"" NOT abc}                     {"abc"}
  9  {"" AND abc}                     {"abc"}
  10 {abc + "" + def}                 {"abc" + "def"}
  11 {abc "" def}                     {"abc" AND "def"}
  12 {r+e OR w}                       {"r" + "e" OR "w"}
} {
  do_execsql_test 1.$tn {SELECT fts5_expr($expr)} [list $res]
}

do_catchsql_test 2.1 {
  SELECT fts5_expr()
} {1 {wrong number of arguments to function fts5_expr}}
Changes to ext/fts5/test/fts5fault6.test.
17
18
19
20
21
22
23

24
25
26
27
28
29
30
set testprefix fts5fault6

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}


#-------------------------------------------------------------------------
# OOM while rebuilding an FTS5 table.
#
do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE tt USING fts5(a, b);
  INSERT INTO tt VALUES('c d c g g f', 'a a a d g a');







>







17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
set testprefix fts5fault6

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}


#-------------------------------------------------------------------------
# OOM while rebuilding an FTS5 table.
#
do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE tt USING fts5(a, b);
  INSERT INTO tt VALUES('c d c g g f', 'a a a d g a');
144
145
146
147
148
149
150
















































































































































151
152
  db eval { 
    CREATE VIRTUAL TABLE yu USING fts5(x, tokenize="unicode61 separators abc");
  }
} -test {
  faultsim_test_result {0 {}}
}

















































































































































finish_test








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
  db eval { 
    CREATE VIRTUAL TABLE yu USING fts5(x, tokenize="unicode61 separators abc");
  }
} -test {
  faultsim_test_result {0 {}}
}

#-------------------------------------------------------------------------
#
# 5.2.* OOM while running a query that includes synonyms and matchinfo().
#
# 5.3.* OOM while running a query that returns a row containing instances
#       of more than 4 synonyms for a single term.
#
proc mit {blob} {
  set scan(littleEndian) i*
  set scan(bigEndian) I*
  binary scan $blob $scan($::tcl_platform(byteOrder)) r
  return $r
}
proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
    if {$tflags=="query" && [string length $w]==1} {
      for {set i 2} {$i < 7} {incr i} {
        sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd
      }
    }
  }
}
proc tcl_create {args} { return "tcl_tokenize" }
reset_db
sqlite3_fts5_create_tokenizer db tcl tcl_create
db func mit mit
sqlite3_fts5_register_matchinfo db
do_test 5.0 {
  execsql { CREATE VIRTUAL TABLE t1 USING fts5(a, tokenize=tcl) }
  execsql { INSERT INTO t1(t1, rank) VALUES('pgsz', 32) }
  foreach {rowid text} {
    1 {aaaa cc b aaaaa cc aa} 
    2 {aa aa bb a bbb}
    3 {bb aaaaa aaaaa b aaaa aaaaa}
    4 {aa a b aaaa aa}
    5 {aa b ccc aaaaa cc}
    6 {aa aaaaa bbbb cc aaa}
    7 {aaaaa aa aa ccccc bb}
    8 {ccc bbbbb ccccc bbb c}
    9 {cccccc bbbb a aaa cccc c}

    20 {ddd f ddd eeeee fff ffff eeee ddd fff eeeee dddddd eeee}
    21 {fffff eee dddd fffff dd ee ee eeeee eee eeeeee ee dd e}
    22 {fffff d eeee dddd fffff dddddd ffff ddddd eeeee ee eee dddd ddddd}
    23 {ddddd fff ddd eeeee ffff eeee ddd ff ff ffffff eeeeee dddd ffffff}
    24 {eee dd ee dddd dddd eeeeee e eee fff ffff}
    25 {ddddd ffffff dddddd fff ddd ddddd ddd f eeee fff dddd f}
    26 {f ffff fff fff eeeeee dddd d dddddd ddddd eee ff eeeee}
    27 {eee fff dddddd eeeee eeeee dddd ddddd ffff f eeeee eee dddddd ddddd d}
    28 {dd ddddd d ddd d fff d dddd ee dddd ee ddd dddddd dddddd}
    29 {eeee dddd ee dddd eeee dddd dd fffff f ddd eeeee ddd ee}
    30 {ff ffffff eeeeee eeeee eee ffffff ff ffff f fffff eeeee}
    31 {fffff eeeeee dddd eeee eeee eeeeee eee fffff d ddddd ffffff ffff dddddd}
    32 {dddddd fffff ee eeeeee eeee ee fff dddd fff eeee ffffff eeeeee ffffff}
    33 {ddddd eeee dd ffff dddddd fff eeee ddddd ffff eeee ddd}
    34 {ee dddd ddddd dddddd eeee eeeeee f dd ee dddddd ffffff}
    35 {ee dddd dd eeeeee ddddd eee d eeeeee dddddd eee dddd fffff}
    36 {eee ffffff ffffff e fffff eeeee ff dddddd dddddd fff}
    37 {eeeee fffff dddddd dddd ffffff fff f dd ee dd dd eeeee}
    38 {eeeeee ee d ff eeeeee eeeeee eee eeeee ee ffffff dddd eeee dddddd ee}
    39 {eeeeee ddd fffff e dddd ee eee eee ffffff ee f d dddd}
    40 {ffffff dddddd eee ee ffffff eee eeee ddddd ee eeeeee f}
    41 {ddd ddd fff fffff ee fffff f fff ddddd fffff}
    42 {dddd ee ff d f ffffff fff ffffff ff dd dddddd f eeee}
    43 {d dd fff fffff d f fff e dddd ee ee}
    44 {ff ffff eee ddd d dd ffff dddd d eeee d eeeeee}
    45 {eeee f eeeee ee e ffff f ddd e fff}
    46 {ffff d ffff eeee ffff eeeee f ffff ddddd eee}
    47 {dd dd dddddd ddddd fffff dddddd ddd ddddd eeeeee ffff eeee eee ee}
    48 {ffff ffff e dddd ffffff dd dd dddd f fffff}
    49 {ffffff d dddddd ffff eeeee f ffff ffff d dd fffff eeeee}

    50 {x e}
  } {
    execsql { INSERT INTO t1(rowid, a) VALUES($rowid, $text) }
  }
} {}

set res [list {*}{
  1 {3 24 8 2 12 6}
  5 {2 24 8 2 12 6}
  6 {3 24 8 1 12 6}
  7 {3 24 8 1 12 6}
  9 {2 24 8 3 12 6}
}]
do_execsql_test 5.1.1 {
  SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH 'a AND c'
} $res
do_execsql_test 5.1.2 {
  SELECT count(*) FROM t1 WHERE t1 MATCH 'd e f'
} 29

faultsim_save_and_close
do_faultsim_test 5.2 -faults oom* -prep {
  faultsim_restore_and_reopen
  sqlite3_fts5_create_tokenizer db tcl tcl_create
  sqlite3_fts5_register_matchinfo db
  db func mit mit
} -body {
  db eval { 
    SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH 'a AND c'
  }
} -test {
  faultsim_test_result [list 0 $::res]
}

do_faultsim_test 5.3 -faults oom* -prep {
  faultsim_restore_and_reopen
  sqlite3_fts5_create_tokenizer db tcl tcl_create
} -body {
  db eval { 
    SELECT count(*) FROM t1 WHERE t1 MATCH 'd AND e AND f'
  }
} -test {
  faultsim_test_result {0 29}
}

do_faultsim_test 5.4 -faults oom* -prep {
  faultsim_restore_and_reopen
  sqlite3_fts5_create_tokenizer db tcl tcl_create
} -body {
  db eval { 
    SELECT count(*) FROM t1 WHERE t1 MATCH 'x + e'
  }
} -test {
  faultsim_test_result {0 1}
}

#-------------------------------------------------------------------------
catch { db close }
breakpoint
do_faultsim_test 6 -faults oom* -prep {
  sqlite_orig db test.db
  sqlite3_db_config_lookaside db 0 0 0
} -body {
  load_static_extension db fts5
} -test {
  faultsim_test_result {0 {}} {1 {initialization of fts5 failed: }}
  if {$testrc==0} {
    db eval { CREATE VIRTUAL TABLE temp.t1 USING fts5(x) }
  }
  db close
}
finish_test

Added ext/fts5/test/fts5fault7.test.


























































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# 2015 September 3
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
#
# This file is focused on OOM errors.
#

source [file join [file dirname [info script]] fts5_common.tcl]
source $testdir/malloc_common.tcl
set testprefix fts5fault2

# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

#-------------------------------------------------------------------------
# Test fault-injection on a query that uses xColumnSize() on columnsize=0
# table.
#
do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize=0);
  INSERT INTO t1 VALUES('a b c d e f g');
  INSERT INTO t1 VALUES('a b c d');
  INSERT INTO t1 VALUES('a b c d e f g h i j');
}


fts5_aux_test_functions db
do_faultsim_test 1 -faults oom* -body {
  execsql { SELECT fts5_test_columnsize(t1) FROM t1 WHERE t1 MATCH 'b' }
} -test {
  faultsim_test_result {0 {7 4 10}} {1 SQLITE_NOMEM}
}

finish_test

Changes to ext/fts5/test/fts5matchinfo.test.
351
352
353
354
355
356
357
358
359

360
361
362
363
364
365
366
367
368
   GROUP BY t10.rowid
   ORDER BY 1;
} {1 1 one 2 2 two 3 3 three}
  
#---------------------------------------------------------------------------
# Test the 'y' matchinfo flag
#
set sqlite_fts3_enable_parentheses 1
reset_db

do_execsql_test 11.0 {
  CREATE VIRTUAL TABLE tt USING fts3(x, y);
  INSERT INTO tt VALUES('c d a c d d', 'e a g b d a');   -- 1
  INSERT INTO tt VALUES('c c g a e b', 'c g d g e c');   -- 2
  INSERT INTO tt VALUES('b e f d e g', 'b a c b c g');   -- 3
  INSERT INTO tt VALUES('a c f f g d', 'd b f d e g');   -- 4
  INSERT INTO tt VALUES('g a c f c f', 'd g g b c c');   -- 5
  INSERT INTO tt VALUES('g a c e b b', 'd b f b g g');   -- 6
  INSERT INTO tt VALUES('f d a a f c', 'e e a d c f');   -- 7







<

>

|







351
352
353
354
355
356
357

358
359
360
361
362
363
364
365
366
367
368
   GROUP BY t10.rowid
   ORDER BY 1;
} {1 1 one 2 2 two 3 3 three}
  
#---------------------------------------------------------------------------
# Test the 'y' matchinfo flag
#

reset_db
sqlite3_fts5_register_matchinfo db
do_execsql_test 11.0 {
  CREATE VIRTUAL TABLE tt USING fts5(x, y);
  INSERT INTO tt VALUES('c d a c d d', 'e a g b d a');   -- 1
  INSERT INTO tt VALUES('c c g a e b', 'c g d g e c');   -- 2
  INSERT INTO tt VALUES('b e f d e g', 'b a c b c g');   -- 3
  INSERT INTO tt VALUES('a c f f g d', 'd b f d e g');   -- 4
  INSERT INTO tt VALUES('g a c f c f', 'd g g b c c');   -- 5
  INSERT INTO tt VALUES('g a c e b b', 'd b f b g g');   -- 6
  INSERT INTO tt VALUES('f d a a f c', 'e e a d c f');   -- 7
428
429
430
431
432
433
434
435
436
437
438
439
440
441

442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
    SELECT rowid, mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH $expr
  } $r2

  do_execsql_test 11.1.$tn.2  {
    SELECT rowid, mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH $expr
  } $r2
}
set sqlite_fts3_enable_parentheses 0

#---------------------------------------------------------------------------
# Test the 'b' matchinfo flag
#
set sqlite_fts3_enable_parentheses 1
reset_db

db func mit mit

do_test 12.0 {
  set cols [list]
  for {set i 0} {$i < 50} {incr i} { lappend cols "c$i" }
  execsql "CREATE VIRTUAL TABLE tt USING fts3([join $cols ,])"
} {}

do_execsql_test 12.1 {
  INSERT INTO tt (rowid, c4, c45) VALUES(1, 'abc', 'abc');
  SELECT mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH 'abc';
} [list [list [expr 1<<4] [expr 1<<(45-32)]]]

set sqlite_fts3_enable_parentheses 0
finish_test








<




<

>





|







<


428
429
430
431
432
433
434

435
436
437
438

439
440
441
442
443
444
445
446
447
448
449
450
451
452
453

454
455
    SELECT rowid, mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH $expr
  } $r2

  do_execsql_test 11.1.$tn.2  {
    SELECT rowid, mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH $expr
  } $r2
}


#---------------------------------------------------------------------------
# Test the 'b' matchinfo flag
#

reset_db
sqlite3_fts5_register_matchinfo db
db func mit mit

do_test 12.0 {
  set cols [list]
  for {set i 0} {$i < 50} {incr i} { lappend cols "c$i" }
  execsql "CREATE VIRTUAL TABLE tt USING fts5([join $cols ,])"
} {}

do_execsql_test 12.1 {
  INSERT INTO tt (rowid, c4, c45) VALUES(1, 'abc', 'abc');
  SELECT mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH 'abc';
} [list [list [expr 1<<4] [expr 1<<(45-32)]]]


finish_test

Added ext/fts5/test/fts5synonym.test.
























































































































































































































































































































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
# 2014 Dec 20
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Tests focusing on custom tokenizers that support synonyms.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5synonym

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

foreach S {
  {zero 0}
  {one 1 i}
  {two 2 ii}
  {three 3 iii}
  {four 4 iv}
  {five 5 v}
  {six 6 vi}
  {seven 7 vii}
  {eight 8 viii}
  {nine 9 ix}
} {
  foreach s $S {
    set o [list]
    foreach x $S {if {$x!=$s} {lappend o $x}}
    set ::syn($s) $o
  }
}

proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
  }
}

proc tcl_create {args} {
  return "tcl_tokenize"
}

sqlite3_fts5_create_tokenizer db tcl tcl_create

#-------------------------------------------------------------------------
# Warm body test for the code in fts5_tcl.c.
#
do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl);
  INSERT INTO ft VALUES('abc def ghi');
  INSERT INTO ft VALUES('jkl mno pqr');
  SELECT rowid, x FROM ft WHERE ft MATCH 'def';
  SELECT x, rowid FROM ft WHERE ft MATCH 'pqr';
} {1 {abc def ghi} {jkl mno pqr} 2}

#-------------------------------------------------------------------------
# Test a tokenizer that supports synonyms by adding extra entries to the
# FTS index.
#

proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
    if {$tflags=="document" && [info exists ::syn($w)]} {
      foreach s $::syn($w) {
        sqlite3_fts5_token -colo $s $iStart $iEnd
      }
    }
  }
}
reset_db
sqlite3_fts5_create_tokenizer db tcl tcl_create

do_execsql_test 2.0 {
  CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl);
  INSERT INTO ft VALUES('one two three');
  INSERT INTO ft VALUES('four five six');
  INSERT INTO ft VALUES('eight nine ten');
} {}

foreach {tn expr res} {
  1 "3" 1
  2 "eight OR 8 OR 5" {2 3}
  3 "10" {}
  4 "1*" {1}
  5 "1 + 2" {1}
} {
  do_execsql_test 2.1.$tn {
    SELECT rowid FROM ft WHERE ft MATCH $expr
  } $res
}

#-------------------------------------------------------------------------
# Test some broken tokenizers:
#
#   3.1.*: A tokenizer that declares the very first token to be colocated.
#
#   3.2.*: A tokenizer that reports two identical tokens at the same position.
#          This is allowed.
#
reset_db
sqlite3_fts5_create_tokenizer db tcl tcl_create
proc tcl_tokenize {tflags text} {
  set bColo 1
  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    if {$bColo} {
      sqlite3_fts5_token -colo $w $iStart $iEnd
      set bColo 0
    } {
      sqlite3_fts5_token $w $iStart $iEnd
    }
  }
}
do_execsql_test 3.1.0 {
  CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl);
  INSERT INTO ft VALUES('one two three');
  CREATE VIRTUAL TABLE vv USING fts5vocab(ft, row);
  SELECT * FROM vv;
} {
  one 1 1   three 1 1   two 1 1
}

do_execsql_test 3.1.1 {
  INSERT INTO ft(ft) VALUES('integrity-check');
} {}

proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
  }
}

do_execsql_test 3.1.2 {
  SELECT rowid FROM ft WHERE ft MATCH 'one two three'
} {1}

reset_db
sqlite3_fts5_create_tokenizer db tcl tcl_create
proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
    sqlite3_fts5_token -colo $w $iStart $iEnd
  }
}
do_execsql_test 3.2.0 {
  CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl);
  INSERT INTO ft VALUES('one one two three');
  CREATE VIRTUAL TABLE vv USING fts5vocab(ft, row);
  SELECT * FROM vv;
} {
  one 1 4   three 1 2   two 1 2
}
do_execsql_test 3.2.1 {
  SELECT rowid FROM ft WHERE ft MATCH 'one';
} {1}
do_execsql_test 3.2.2 {
  SELECT rowid FROM ft WHERE ft MATCH 'one two three';
} {1}
do_execsql_test 3.2.3 {
  SELECT rowid FROM ft WHERE ft MATCH 'one + one + two + three';
} {1}
do_execsql_test 3.2.4 {
  SELECT rowid FROM ft WHERE ft MATCH 'one two two three';
} {1}
do_execsql_test 3.2.5 {
  SELECT rowid FROM ft WHERE ft MATCH 'one + two + two + three';
} {}

#-------------------------------------------------------------------------
# Check that expressions with synonyms can be parsed and executed.
#
reset_db
sqlite3_fts5_create_tokenizer db tcl tcl_create
proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
    if {$tflags=="query" && [info exists ::syn($w)]} {
      foreach s $::syn($w) {
        sqlite3_fts5_token -colo $s $iStart $iEnd
      }
    }
  }
}

foreach {tn expr res} {
  1  {abc}                           {"abc"}
  2  {one}                           {"one"|"i"|"1"}
  3  {3}                             {"3"|"iii"|"three"}
  4  {3*}                            {"3"|"iii"|"three" *}
} {
  do_execsql_test 4.1.$tn {SELECT fts5_expr($expr, 'tokenize=tcl')} [list $res]
}

do_execsql_test 4.2.1 {
  CREATE VIRTUAL TABLE xx USING fts5(x, tokenize=tcl);
  INSERT INTO xx VALUES('one two');
  INSERT INTO xx VALUES('three four');
}

do_execsql_test 4.2.2 {
  SELECT rowid FROM xx WHERE xx MATCH '2'
} {1}

do_execsql_test 4.2.3 {
  SELECT rowid FROM xx WHERE xx MATCH '3'
} {2}

do_test 5.0 {
  execsql { 
    CREATE VIRTUAL TABLE t1 USING fts5(a, b, tokenize=tcl)
  }
  foreach {rowid a b} {
    1 {four v 4 i three} {1 3 five five 4 one}
    2 {5 1 3 4 i} {2 2 v two 4}
    3 {5 i 5 2 four 4 1} {iii ii five two 1}
    4 {ii four 4 one 5 three five} {one 5 1 iii 4 3}
    5 {three i v i four 4 1} {ii five five five iii}
    6 {4 2 ii two 2 iii} {three 1 four 4 iv 1 iv}
    7 {ii ii two three 2 5} {iii i ii iii iii one one}
    8 {2 ii i two 3 three 2} {two iv v iii 3 five}
    9 {i 2 iv 3 five four v} {iii 4 three i three ii 1}
  } {
    execsql { INSERT INTO t1(rowid, a, b) VALUES($rowid, $a, $b) }
  }
} {}


foreach {tn q res} {
  1 {one} {
    1 {four v 4 [i] three} {[1] 3 five five 4 [one]}
    2 {5 [1] 3 4 [i]} {2 2 v two 4}
    3 {5 [i] 5 2 four 4 [1]} {iii ii five two [1]}
    4 {ii four 4 [one] 5 three five} {[one] 5 [1] iii 4 3}
    5 {three [i] v [i] four 4 [1]} {ii five five five iii}
    6 {4 2 ii two 2 iii} {three [1] four 4 iv [1] iv}
    7 {ii ii two three 2 5} {iii [i] ii iii iii [one] [one]}
    8 {2 ii [i] two 3 three 2} {two iv v iii 3 five}
    9 {[i] 2 iv 3 five four v} {iii 4 three [i] three ii [1]}
  }
  2 {five four} {
    1 {[four] [v] [4] i three} {1 3 [five] [five] [4] one}
    2 {[5] 1 3 [4] i} {2 2 [v] two [4]}
    3 {[5] i [5] 2 [four] [4] 1} {iii ii [five] two 1}
    4 {ii [four] [4] one [5] three [five]} {one [5] 1 iii [4] 3}
    5 {three i [v] i [four] [4] 1} {ii [five] [five] [five] iii}
    8 {2 ii i two 3 three 2} {two [iv] [v] iii 3 [five]}
    9 {i 2 [iv] 3 [five] [four] [v]} {iii [4] three i three ii 1}
  }
  3 {one OR two OR iii OR 4 OR v} {
    1 {[four] [v] [4] [i] [three]} {[1] [3] [five] [five] [4] [one]}
    2 {[5] [1] [3] [4] [i]} {[2] [2] [v] [two] [4]}
    3 {[5] [i] [5] [2] [four] [4] [1]} {[iii] [ii] [five] [two] [1]}
    4 {[ii] [four] [4] [one] [5] [three] [five]} {[one] [5] [1] [iii] [4] [3]}
    5 {[three] [i] [v] [i] [four] [4] [1]} {[ii] [five] [five] [five] [iii]}
    6 {[4] [2] [ii] [two] [2] [iii]} {[three] [1] [four] [4] [iv] [1] [iv]}
    7 {[ii] [ii] [two] [three] [2] [5]} {[iii] [i] [ii] [iii] [iii] [one] [one]}
    8 {[2] [ii] [i] [two] [3] [three] [2]} {[two] [iv] [v] [iii] [3] [five]}
    9 {[i] [2] [iv] [3] [five] [four] [v]} {[iii] [4] [three] [i] [three] [ii] [1]}
  }

  4 {5 + 1} {
    2 {[5 1] 3 4 i} {2 2 v two 4} 
    3 {[5 i] 5 2 four 4 1} {iii ii five two 1} 
    4 {ii four 4 one 5 three five} {one [5 1] iii 4 3} 
    5 {three i [v i] four 4 1} {ii five five five iii}
  }

  5 {one + two + three} {
    7 {ii ii two three 2 5} {iii [i ii iii] iii one one}
    8 {2 ii [i two 3] three 2} {two iv v iii 3 five}
  }

  6 {"v v"} {
    1 {four v 4 i three} {1 3 [five five] 4 one}
    5 {three i v i four 4 1} {ii [five five five] iii}
  }
} {
  do_execsql_test 5.1.$tn {
    SELECT rowid, highlight(t1, 0, '[', ']'), highlight(t1, 1, '[', ']')
    FROM t1 WHERE t1 MATCH $q
  } $res
}

# Test that the xQueryPhrase() API works with synonyms.
#
proc mit {blob} {
  set scan(littleEndian) i*
  set scan(bigEndian) I*
  binary scan $blob $scan($::tcl_platform(byteOrder)) r
  return $r
}
db func mit mit
sqlite3_fts5_register_matchinfo db

foreach {tn q res} {
  1 {one} {
      1 {1 11 7 2 12 6}     2 {2 11 7 0 12 6} 
      3 {2 11 7 1 12 6}     4 {1 11 7 2 12 6} 
      5 {3 11 7 0 12 6}     6 {0 11 7 2 12 6} 
      7 {0 11 7 3 12 6}     8 {1 11 7 0 12 6} 
      9 {1 11 7 2 12 6}
  }
} {
  do_execsql_test 5.2.$tn {
    SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH $q
  } $res
}


#-------------------------------------------------------------------------
# Test terms with more than 4 synonyms.
#
reset_db
sqlite3_fts5_create_tokenizer db tcl tcl_create
proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
    if {$tflags=="query" && [string length $w]==1} {
      for {set i 2} {$i<=10} {incr i} {
        sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd
      }
    }
  }
}

do_execsql_test 6.0.1 {
  CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize=tcl);
  INSERT INTO t1 VALUES('yy xx qq');
  INSERT INTO t1 VALUES('yy xx xx');
}
do_execsql_test 6.0.2 {
  SELECT * FROM t1 WHERE t1 MATCH 'NEAR(y q)';
} {{yy xx qq}}

do_test 6.0.3 {
  execsql { 
    CREATE VIRTUAL TABLE t2 USING fts5(a, b, tokenize=tcl)
  }
  foreach {rowid a b} {
    1 {yyyy vvvvv qq oo yyyyyy vvvv eee} {ffff uu r qq aaaa}
    2 {ww oooooo bbbbb ssssss mm} {ffffff yy iiii rr s ccc qqqqq}
    3 {zzzz llll gggggg cccc uu} {hhhhhh aaaa ppppp rr ee jjjj}
    4 {r f i rrrrrr ww hhh} {aa yyy t x aaaaa ii}
    5 {fffff mm vvvv ooo ffffff kkkk tttt} {cccccc bb e zzz d n}
    6 {iii dddd hh qqqq ddd ooo} {ttt d c b aaaaaa qqqq}
    7 {jjjj rrrr v zzzzz u tt t} {ppppp pp dddd mm hhh uuu}
    8 {gggg rrrrrr kkkk vvvv gggg jjjjjj b} {dddddd jj r w cccc wwwwww ss}
    9 {kkkkk qqq oooo e tttttt mmm} {e ss qqqqqq hhhh llllll gg}
  } {
    execsql { INSERT INTO t2(rowid, a, b) VALUES($rowid, $a, $b) }
  }
} {}

foreach {tn q res} {
  1 {a} {
    1 {yyyy vvvvv qq oo yyyyyy vvvv eee} {ffff uu r qq [aaaa]}
    3 {zzzz llll gggggg cccc uu} {hhhhhh [aaaa] ppppp rr ee jjjj}
    4 {r f i rrrrrr ww hhh} {[aa] yyy t x [aaaaa] ii}
    6 {iii dddd hh qqqq ddd ooo} {ttt d c b [aaaaaa] qqqq}
  }

  2 {a AND q} {
    1 {yyyy vvvvv [qq] oo yyyyyy vvvv eee} {ffff uu r [qq] [aaaa]}
    6 {iii dddd hh [qqqq] ddd ooo} {ttt d c b [aaaaaa] [qqqq]}
  }

  3 {o OR (q AND a)} {
    1 {yyyy vvvvv [qq] [oo] yyyyyy vvvv eee} {ffff uu r [qq] [aaaa]}
    2 {ww [oooooo] bbbbb ssssss mm} {ffffff yy iiii rr s ccc qqqqq}
    5 {fffff mm vvvv [ooo] ffffff kkkk tttt} {cccccc bb e zzz d n}
    6 {iii dddd hh [qqqq] ddd [ooo]} {ttt d c b [aaaaaa] [qqqq]}
    9 {kkkkk qqq [oooo] e tttttt mmm} {e ss qqqqqq hhhh llllll gg}
  }

  4 {NEAR(q y, 20)} {
    1 {[yyyy] vvvvv [qq] oo [yyyyyy] vvvv eee} {ffff uu r qq aaaa}
    2 {ww oooooo bbbbb ssssss mm} {ffffff [yy] iiii rr s ccc [qqqqq]}
  }
} {
  do_execsql_test 6.1.$tn.asc {
    SELECT rowid, highlight(t2, 0, '[', ']'), highlight(t2, 1, '[', ']')
    FROM t2 WHERE t2 MATCH $q
  } $res

  set res2 [list]
  foreach {rowid a b} $res {
    set res2 [concat [list $rowid $a $b] $res2]
  }

  do_execsql_test 6.1.$tn.desc {
    SELECT rowid, highlight(t2, 0, '[', ']'), highlight(t2, 1, '[', ']')
    FROM t2 WHERE t2 MATCH $q ORDER BY rowid DESC
  } $res2
}

do_execsql_test 6.2.1 {
  INSERT INTO t2(rowid, a, b) VALUES(13,
      'x xx xxx xxxx xxxxx xxxxxx xxxxxxx', 'y yy yyy yyyy yyyyy yyyyyy yyyyyyy'
  );
  SELECT rowid, highlight(t2, 0, '<', '>'), highlight(t2, 1, '(', ')')
  FROM t2 WHERE t2 MATCH 'x OR y'
} {
  1 {<yyyy> vvvvv qq oo <yyyyyy> vvvv eee} {ffff uu r qq aaaa}
  2 {ww oooooo bbbbb ssssss mm} {ffffff (yy) iiii rr s ccc qqqqq}
  4 {r f i rrrrrr ww hhh} {aa (yyy) t (x) aaaaa ii}
  13 {<x> <xx> <xxx> <xxxx> <xxxxx> <xxxxxx> <xxxxxxx>}
     {(y) (yy) (yyy) (yyyy) (yyyyy) (yyyyyy) (yyyyyyy)}
}

#-------------------------------------------------------------------------
# Test that the xColumnSize() API is not confused by colocated tokens.
#
reset_db
sqlite3_fts5_create_tokenizer db tcl tcl_create
fts5_aux_test_functions db
proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
    if {[string length $w]==1} {
      for {set i 2} {$i<=10} {incr i} {
        sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd
      }
    }
  }
}

do_execsql_test 7.0.1 {
  CREATE VIRTUAL TABLE t1 USING fts5(a, b, columnsize=1, tokenize=tcl);
  INSERT INTO t1 VALUES('0 2 3', '4 5 6 7');
  INSERT INTO t1 VALUES('8 9', '0 0 0 0 0 0 0 0 0 0');
  SELECT fts5_test_columnsize(t1) FROM t1 WHERE t1 MATCH '000 AND 00 AND 0';
} {{3 4} {2 10}}

do_execsql_test 7.0.2 {
  INSERT INTO t1(t1) VALUES('integrity-check');
}

do_execsql_test 7.1.1 {
  CREATE VIRTUAL TABLE t2 USING fts5(a, b, columnsize=0, tokenize=tcl);
  INSERT INTO t2 VALUES('0 2 3', '4 5 6 7');
  INSERT INTO t2 VALUES('8 9', '0 0 0 0 0 0 0 0 0 0');
  SELECT fts5_test_columnsize(t2) FROM t2 WHERE t2 MATCH '000 AND 00 AND 0';
} {{3 4} {2 10}}

do_execsql_test 7.1.2 {
  INSERT INTO t2(t2) VALUES('integrity-check');
}

finish_test

Changes to main.mk.
43
44
45
46
47
48
49

50
51
52
53
54
55
56
################################################################################

# This is how we compile
#
TCCX =  $(TCC) $(OPTS) -I. -I$(TOP)/src -I$(TOP) 
TCCX += -I$(TOP)/ext/rtree -I$(TOP)/ext/icu -I$(TOP)/ext/fts3
TCCX += -I$(TOP)/ext/async -I$(TOP)/ext/userauth


# Object files for the SQLite library.
#
LIBOBJ+= vdbe.o parse.o \
         alter.o analyze.o attach.o auth.o \
         backup.o bitvec.o btmutex.o btree.o build.o \
         callback.o complete.o ctime.o date.o dbstat.o delete.o expr.o fault.o fkey.o \







>







43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
################################################################################

# This is how we compile
#
TCCX =  $(TCC) $(OPTS) -I. -I$(TOP)/src -I$(TOP) 
TCCX += -I$(TOP)/ext/rtree -I$(TOP)/ext/icu -I$(TOP)/ext/fts3
TCCX += -I$(TOP)/ext/async -I$(TOP)/ext/userauth
TCCX += -I$(TOP)/ext/fts5

# Object files for the SQLite library.
#
LIBOBJ+= vdbe.o parse.o \
         alter.o analyze.o attach.o auth.o \
         backup.o bitvec.o btmutex.o btree.o build.o \
         callback.o complete.o ctime.o date.o dbstat.o delete.o expr.o fault.o fkey.o \
225
226
227
228
229
230
231























232
233
234
235
236
237
238
  $(TOP)/ext/userauth/userauth.c \
  $(TOP)/ext/userauth/sqlite3userauth.h 

SRC += \
  $(TOP)/ext/rbu/sqlite3rbu.c \
  $(TOP)/ext/rbu/sqlite3rbu.h

























# Generated source code files
#
SRC += \
  keywordhash.h \
  opcodes.c \
  opcodes.h \







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
  $(TOP)/ext/userauth/userauth.c \
  $(TOP)/ext/userauth/sqlite3userauth.h 

SRC += \
  $(TOP)/ext/rbu/sqlite3rbu.c \
  $(TOP)/ext/rbu/sqlite3rbu.h


# FTS5 things
#
FTS5_HDR = \
   $(TOP)/ext/fts5/fts5.h \
   $(TOP)/ext/fts5/fts5Int.h \
   fts5parse.h
	   
FTS5_SRC = \
   $(TOP)/ext/fts5/fts5_aux.c \
   $(TOP)/ext/fts5/fts5_buffer.c \
   $(TOP)/ext/fts5/fts5_main.c \
   $(TOP)/ext/fts5/fts5_config.c \
   $(TOP)/ext/fts5/fts5_expr.c \
   $(TOP)/ext/fts5/fts5_hash.c \
   $(TOP)/ext/fts5/fts5_index.c \
   fts5parse.c \
   $(TOP)/ext/fts5/fts5_storage.c \
   $(TOP)/ext/fts5/fts5_tokenize.c \
   $(TOP)/ext/fts5/fts5_unicode2.c \
   $(TOP)/ext/fts5/fts5_varint.c \
   $(TOP)/ext/fts5/fts5_vocab.c  \


# Generated source code files
#
SRC += \
  keywordhash.h \
  opcodes.c \
  opcodes.h \
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675

fts3_write.o:	$(TOP)/ext/fts3/fts3_write.c $(HDR) $(EXTHDR)
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_write.c

rtree.o:	$(TOP)/ext/rtree/rtree.c $(HDR) $(EXTHDR)
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/rtree/rtree.c

# FTS5 things
#
FTS5_SRC = \
   $(TOP)/ext/fts5/fts5.h \
   $(TOP)/ext/fts5/fts5Int.h \
   $(TOP)/ext/fts5/fts5_aux.c \
   $(TOP)/ext/fts5/fts5_buffer.c \
   $(TOP)/ext/fts5/fts5_main.c \
   $(TOP)/ext/fts5/fts5_config.c \
   $(TOP)/ext/fts5/fts5_expr.c \
   $(TOP)/ext/fts5/fts5_hash.c \
   $(TOP)/ext/fts5/fts5_index.c \
   fts5parse.c fts5parse.h \
   $(TOP)/ext/fts5/fts5_storage.c \
   $(TOP)/ext/fts5/fts5_tokenize.c \
   $(TOP)/ext/fts5/fts5_unicode2.c \
   $(TOP)/ext/fts5/fts5_varint.c \
   $(TOP)/ext/fts5/fts5_vocab.c  \

fts5parse.c:	$(TOP)/ext/fts5/fts5parse.y lemon 
	cp $(TOP)/ext/fts5/fts5parse.y .
	rm -f fts5parse.h
	./lemon $(OPTS) fts5parse.y

fts5parse.h: fts5parse.c

fts5.c: $(FTS5_SRC)
	tclsh $(TOP)/ext/fts5/tool/mkfts5c.tcl
	cp $(TOP)/ext/fts5/fts5.h .


userauth.o:	$(TOP)/ext/userauth/userauth.c $(HDR) $(EXTHDR)
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/userauth/userauth.c

sqlite3rbu.o:	$(TOP)/ext/rbu/sqlite3rbu.c $(HDR) $(EXTHDR)
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/rbu/sqlite3rbu.c








<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







|


<







656
657
658
659
660
661
662



















663
664
665
666
667
668
669
670
671
672

673
674
675
676
677
678
679

fts3_write.o:	$(TOP)/ext/fts3/fts3_write.c $(HDR) $(EXTHDR)
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_write.c

rtree.o:	$(TOP)/ext/rtree/rtree.c $(HDR) $(EXTHDR)
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/rtree/rtree.c




















fts5parse.c:	$(TOP)/ext/fts5/fts5parse.y lemon 
	cp $(TOP)/ext/fts5/fts5parse.y .
	rm -f fts5parse.h
	./lemon $(OPTS) fts5parse.y

fts5parse.h: fts5parse.c

fts5.c: $(FTS5_SRC) $(FTS5_HDR)
	tclsh $(TOP)/ext/fts5/tool/mkfts5c.tcl
	cp $(TOP)/ext/fts5/fts5.h .


userauth.o:	$(TOP)/ext/userauth/userauth.c $(HDR) $(EXTHDR)
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/userauth/userauth.c

sqlite3rbu.o:	$(TOP)/ext/rbu/sqlite3rbu.c $(HDR) $(EXTHDR)
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/rbu/sqlite3rbu.c