Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Modify the fts5 custom tokenizer interface to permit synonym support. The fts5_api.iVersion value is now set to 2. Existing fts5 custom tokenizers (if there are such things) will need to be updated to use the new api version. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
0b7e4ab8abde3ae32459233df115c433 |
User & Date: | dan 2015-09-04 10:31:51.624 |
Context
2015-09-04
| ||
11:13 | Enhance showfts5.tcl so that it can optionally display the number of terms in each segment. (check-in: d648ddd93d user: dan tags: trunk) | |
10:31 | Modify the fts5 custom tokenizer interface to permit synonym support. The fts5_api.iVersion value is now set to 2. Existing fts5 custom tokenizers (if there are such things) will need to be updated to use the new api version. (check-in: 0b7e4ab8ab user: dan tags: trunk) | |
10:24 | Merge latest trunk changes. (Closed-Leaf check-in: 443a5eb8e1 user: dan tags: fts5-incompatible) | |
04:31 | Simplification of the LRU list handling in pcache1. (check-in: 05a3a2cd14 user: drh tags: trunk) | |
Changes
Changes to ext/fts5/extract_api_docs.tcl.
︙ | ︙ | |||
104 105 106 107 108 109 110 111 112 113 114 115 116 | set res "<dl>\n" foreach line [split [string trim $docs] "\n"] { regexp {[*][*](.*)} $line -> line if {[regexp {^ ?x.*:} $line]} { append res "<dt><b>$line</b></dt><dd><p style=margin-top:0>\n" continue } if {[string trim $line] == ""} { append res "<p>\n" } else { append res "$line\n" } } | > > > < | 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 | set res "<dl>\n" foreach line [split [string trim $docs] "\n"] { regexp {[*][*](.*)} $line -> line if {[regexp {^ ?x.*:} $line]} { append res "<dt><b>$line</b></dt><dd><p style=margin-top:0>\n" continue } if {[regexp {SYNONYM SUPPORT} $line]} { set line "</dl><h3>Synonym Support</h3>" } if {[string trim $line] == ""} { append res "<p>\n" } else { append res "$line\n" } } set res } proc get_api_docs {data} { # Initialize global array M as a map from Fts5StructureApi member name # to member definition. i.e. |
︙ | ︙ | |||
204 205 206 207 208 209 210 211 212 213 214 215 216 217 | switch $::extract_api_docs_mode { fts5_api { output [get_fts5_struct $data "typedef struct fts5_api" "^\};"] } fts5_tokenizer { output [get_fts5_struct $data "typedef struct Fts5Tokenizer" "^\};"] } fts5_extension { output [get_fts5_struct $data "typedef.*Fts5ExtensionApi" "^.;"] } Fts5ExtensionApi { | > > > > | 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 | switch $::extract_api_docs_mode { fts5_api { output [get_fts5_struct $data "typedef struct fts5_api" "^\};"] } fts5_tokenizer { output [get_fts5_struct $data "typedef struct Fts5Tokenizer" "^\};"] output [get_fts5_struct $data \ "Flags that may be passed as the third argument to xTokenize()" \ "#define FTS5_TOKEN_COLOCATED" ] } fts5_extension { output [get_fts5_struct $data "typedef.*Fts5ExtensionApi" "^.;"] } Fts5ExtensionApi { |
︙ | ︙ |
Changes to ext/fts5/fts5.h.
︙ | ︙ | |||
213 214 215 216 217 218 219 | int (*xColumnCount)(Fts5Context*); int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow); int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken); int (*xTokenize)(Fts5Context*, const char *pText, int nText, /* Text to tokenize */ void *pCtx, /* Context passed to xToken() */ | | | 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 | int (*xColumnCount)(Fts5Context*); int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow); int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken); int (*xTokenize)(Fts5Context*, const char *pText, int nText, /* Text to tokenize */ void *pCtx, /* Context passed to xToken() */ int (*xToken)(void*, int, const char*, int, int, int) /* Callback */ ); int (*xPhraseCount)(Fts5Context*); int (*xPhraseSize)(Fts5Context*, int iPhrase); int (*xInstCount)(Fts5Context*, int *pnInst); int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff); |
︙ | ︙ | |||
274 275 276 277 278 279 280 | ** xDelete: ** This function is invoked to delete a tokenizer handle previously ** allocated using xCreate(). Fts5 guarantees that this function will ** be invoked exactly once for each successful call to xCreate(). ** ** xTokenize: ** This function is expected to tokenize the nText byte string indicated | | | | > > > > > > > > > > > > > > > > > > > > > > > > | | | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 | ** xDelete: ** This function is invoked to delete a tokenizer handle previously ** allocated using xCreate(). Fts5 guarantees that this function will ** be invoked exactly once for each successful call to xCreate(). ** ** xTokenize: ** This function is expected to tokenize the nText byte string indicated ** by argument pText. pText may or may not be nul-terminated. The first ** argument passed to this function is a pointer to an Fts5Tokenizer object ** returned by an earlier call to xCreate(). ** ** The second argument indicates the reason that FTS5 is requesting ** tokenization of the supplied text. This is always one of the following ** four values: ** ** <ul><li> <b>FTS5_TOKENIZE_DOCUMENT</b> - A document is being inserted into ** or removed from the FTS table. The tokenizer is being invoked to ** determine the set of tokens to add to (or delete from) the ** FTS index. ** ** <li> <b>FTS5_TOKENIZE_QUERY</b> - A MATCH query is being executed ** against the FTS index. The tokenizer is being called to tokenize ** a bareword or quoted string specified as part of the query. ** ** <li> <b>(FTS5_TOKENIZE_QUERY | FTS5_TOKENIZE_PREFIX)</b> - Same as ** FTS5_TOKENIZE_QUERY, except that the bareword or quoted string is ** followed by a "*" character, indicating that the last token ** returned by the tokenizer will be treated as a token prefix. ** ** <li> <b>FTS5_TOKENIZE_AUX</b> - The tokenizer is being invoked to ** satisfy an fts5_api.xTokenize() request made by an auxiliary ** function. Or an fts5_api.xColumnSize() request made by the same ** on a columnsize=0 database. ** </ul> ** ** For each token in the input string, the supplied callback xToken() must ** be invoked. The first argument to it should be a copy of the pointer ** passed as the second argument to xTokenize(). The third and fourth ** arguments are a pointer to a buffer containing the token text, and the ** size of the token in bytes. The 4th and 5th arguments are the byte offsets ** of the first byte of and first byte immediately following the text from ** which the token is derived within the input. ** ** The second argument passed to the xToken() callback ("tflags") should ** normally be set to 0. The exception is if the tokenizer supports ** synonyms. In this case see the discussion below for details. ** ** FTS5 assumes the xToken() callback is invoked for each token in the ** order that they occur within the input text. ** ** If an xToken() callback returns any value other than SQLITE_OK, then ** the tokenization should be abandoned and the xTokenize() method should ** immediately return a copy of the xToken() return value. Or, if the ** input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally, ** if an error occurs with the xTokenize() implementation itself, it ** may abandon the tokenization and return any error code other than ** SQLITE_OK or SQLITE_DONE. ** ** SYNONYM SUPPORT ** ** Custom tokenizers may also support synonyms. Consider a case in which a ** user wishes to query for a phrase such as "first place". Using the ** built-in tokenizers, the FTS5 query 'first + place' will match instances ** of "first place" within the document set, but not alternative forms ** such as "1st place". In some applications, it would be better to match ** all instances of "first place" or "1st place" regardless of which form ** the user specified in the MATCH query text. ** ** There are several ways to approach this in FTS5: ** ** <ol><li> By mapping all synonyms to a single token. In this case, the ** In the above example, this means that the tokenizer returns the ** same token for inputs "first" and "1st". Say that token is in ** fact "first", so that when the user inserts the document "I won ** 1st place" entries are added to the index for tokens "i", "won", ** "first" and "place". If the user then queries for '1st + place', ** the tokenizer substitutes "first" for "1st" and the query works ** as expected. ** ** <li> By adding multiple synonyms for a single term to the FTS index. ** In this case, when tokenizing query text, the tokenizer may ** provide multiple synonyms for a single term within the document. ** FTS5 then queries the index for each synonym individually. For ** example, faced with the query: ** ** <codeblock> ** ... MATCH 'first place'</codeblock> ** ** the tokenizer offers both "1st" and "first" as synonyms for the ** first token in the MATCH query and FTS5 effectively runs a query ** similar to: ** ** <codeblock> ** ... MATCH '(first OR 1st) place'</codeblock> ** ** except that, for the purposes of auxiliary functions, the query ** still appears to contain just two phrases - "(first OR 1st)" ** being treated as a single phrase. ** ** <li> By adding multiple synonyms for a single term to the FTS index. ** Using this method, when tokenizing document text, the tokenizer ** provides multiple synonyms for each token. So that when a ** document such as "I won first place" is tokenized, entries are ** added to the FTS index for "i", "won", "first", "1st" and ** "place". ** ** This way, even if the tokenizer does not provide synonyms ** when tokenizing query text (it should not - to do would be ** inefficient), it doesn't matter if the user queries for ** 'first + place' or '1st + place', as there are entires in the ** FTS index corresponding to both forms of the first token. ** </ol> ** ** Whether is is parsing document or query text, any call to xToken that ** specifies a <i>tflags</i> argument with the FTS5_TOKEN_COLOCATED bit ** is considered to supply a synonym for the previous token. For example, ** when parsing the document "I won first place", a tokenizer that supports ** synonyms would call xToken() 5 times, as follows: ** ** <codeblock> ** xToken(pCtx, 0, "i", 1, 0, 1); ** xToken(pCtx, 0, "won", 3, 2, 5); ** xToken(pCtx, 0, "first", 5, 6, 11); ** xToken(pCtx, FTS5_TOKEN_COLOCATED, "1st", 3, 6, 11); ** xToken(pCtx, 0, "place", 5, 12, 17); **</codeblock> ** ** It is an error to specify the FTS5_TOKEN_COLOCATED flag the first time ** xToken() is called. Multiple synonyms may be specified for a single token ** by making multiple calls to xToken(FTS5_TOKEN_COLOCATED) in sequence. ** There is no limit to the number of synonyms that may be provided for a ** single token. ** ** In many cases, method (1) above is the best approach. It does not add ** extra data to the FTS index or require FTS5 to query for multiple terms, ** so it is efficient in terms of disk space and query speed. However, it ** does not support prefix queries very well. If, as suggested above, the ** token "first" is subsituted for "1st" by the tokenizer, then the query: ** ** <codeblock> ** ... MATCH '1s*'</codeblock> ** ** will not match documents that contain the token "1st" (as the tokenizer ** will probably not map "1s" to any prefix of "first"). ** ** For full prefix support, method (3) may be preferred. In this case, ** because the index contains entries for both "first" and "1st", prefix ** queries such as 'fi*' or '1s*' will match correctly. However, because ** extra entries are added to the FTS index, this method uses more space ** within the database. ** ** Method (2) offers a midpoint between (1) and (3). Using this method, ** a query such as '1s*' will match documents that contain the literal ** token "1st", but not "first" (assuming the tokenizer is not able to ** provide synonyms for prefixes). However, a non-prefix query like '1st' ** will match against "1st" and "first". This method does not require ** extra disk space, as no extra entries are added to the FTS index. ** On the other hand, it may require more CPU cycles to run MATCH queries, ** as separate queries of the FTS index are required for each synonym. ** ** When using methods (2) or (3), it is important that the tokenizer only ** provide synonyms when tokenizing document text (method (2)) or query ** text (method (3)), not both. Doing so will not cause any errors, but is ** inefficient. */ typedef struct Fts5Tokenizer Fts5Tokenizer; typedef struct fts5_tokenizer fts5_tokenizer; struct fts5_tokenizer { int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut); void (*xDelete)(Fts5Tokenizer*); int (*xTokenize)(Fts5Tokenizer*, void *pCtx, int flags, /* Mask of FTS5_TOKENIZE_* flags */ const char *pText, int nText, int (*xToken)( void *pCtx, /* Copy of 2nd argument to xTokenize() */ int tflags, /* Mask of FTS5_TOKEN_* flags */ const char *pToken, /* Pointer to buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Byte offset of token within input text */ int iEnd /* Byte offset of end of token within input text */ ) ); }; /* Flags that may be passed as the third argument to xTokenize() */ #define FTS5_TOKENIZE_QUERY 0x0001 #define FTS5_TOKENIZE_PREFIX 0x0002 #define FTS5_TOKENIZE_DOCUMENT 0x0004 #define FTS5_TOKENIZE_AUX 0x0008 /* Flags that may be passed by the tokenizer implementation back to FTS5 ** as the third argument to the supplied xToken callback. */ #define FTS5_TOKEN_COLOCATED 0x0001 /* Same position as prev. token */ /* ** END OF CUSTOM TOKENIZERS *************************************************************************/ /************************************************************************* ** FTS5 EXTENSION REGISTRATION API */ typedef struct fts5_api fts5_api; struct fts5_api { int iVersion; /* Currently always set to 2 */ /* Create a new tokenizer */ int (*xCreateTokenizer)( fts5_api *pApi, const char *zName, void *pContext, fts5_tokenizer *pTokenizer, |
︙ | ︙ |
Changes to ext/fts5/fts5Int.h.
︙ | ︙ | |||
162 163 164 165 166 167 168 169 170 | ); void sqlite3Fts5ConfigFree(Fts5Config*); int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig); int sqlite3Fts5Tokenize( Fts5Config *pConfig, /* FTS5 Configuration object */ const char *pText, int nText, /* Text to tokenize */ void *pCtx, /* Context passed to xToken() */ | > | | 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 | ); void sqlite3Fts5ConfigFree(Fts5Config*); int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig); int sqlite3Fts5Tokenize( Fts5Config *pConfig, /* FTS5 Configuration object */ int flags, /* FTS5_TOKENIZE_* flags */ const char *pText, int nText, /* Text to tokenize */ void *pCtx, /* Context passed to xToken() */ int (*xToken)(void*, int, const char*, int, int, int) /* Callback */ ); void sqlite3Fts5Dequote(char *z); /* Load the contents of the %_config table */ int sqlite3Fts5ConfigLoad(Fts5Config*, int); |
︙ | ︙ | |||
230 231 232 233 234 235 236 237 | struct Fts5PoslistReader { /* Variables used only by sqlite3Fts5PoslistIterXXX() functions. */ int iCol; /* If (iCol>=0), this column only */ const u8 *a; /* Position list to iterate through */ int n; /* Size of buffer at a[] in bytes */ int i; /* Current offset in a[] */ /* Output variables */ | > > | | 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 | struct Fts5PoslistReader { /* Variables used only by sqlite3Fts5PoslistIterXXX() functions. */ int iCol; /* If (iCol>=0), this column only */ const u8 *a; /* Position list to iterate through */ int n; /* Size of buffer at a[] in bytes */ int i; /* Current offset in a[] */ u8 bFlag; /* For client use (any custom purpose) */ /* Output variables */ u8 bEof; /* Set to true at EOF */ i64 iPos; /* (iCol<<32) + iPos */ }; int sqlite3Fts5PoslistReaderInit( int iCol, /* If (iCol>=0), this column only */ const u8 *a, int n, /* Poslist buffer to iterate through */ Fts5PoslistReader *pIter /* Iterator object to initialize */ ); |
︙ | ︙ | |||
377 378 379 380 381 382 383 | /* ** Retrieve and clear the current error code, respectively. */ int sqlite3Fts5IndexErrcode(Fts5Index*); void sqlite3Fts5IndexReset(Fts5Index*); /* | | | | 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 | /* ** Retrieve and clear the current error code, respectively. */ int sqlite3Fts5IndexErrcode(Fts5Index*); void sqlite3Fts5IndexReset(Fts5Index*); /* ** Get or set the "averages" values. */ int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize); int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8*, int); /* ** Functions called by the storage module as part of integrity-check. */ u64 sqlite3Fts5IndexCksum(Fts5Config*,i64,int,int,const char*,int); int sqlite3Fts5IndexIntegrityCheck(Fts5Index*, u64 cksum); |
︙ | ︙ | |||
592 593 594 595 596 597 598 | /* Called during startup to register a UDF with SQLite */ int sqlite3Fts5ExprInit(Fts5Global*, sqlite3*); int sqlite3Fts5ExprPhraseCount(Fts5Expr*); int sqlite3Fts5ExprPhraseSize(Fts5Expr*, int iPhrase); int sqlite3Fts5ExprPoslist(Fts5Expr*, int, const u8 **); | | | 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 | /* Called during startup to register a UDF with SQLite */ int sqlite3Fts5ExprInit(Fts5Global*, sqlite3*); int sqlite3Fts5ExprPhraseCount(Fts5Expr*); int sqlite3Fts5ExprPhraseSize(Fts5Expr*, int iPhrase); int sqlite3Fts5ExprPoslist(Fts5Expr*, int, const u8 **); int sqlite3Fts5ExprClonePhrase(Fts5Config*, Fts5Expr*, int, Fts5Expr**); /******************************************* ** The fts5_expr.c API above this point is used by the other hand-written ** C code in this module. The interfaces below this point are called by ** the parser code in fts5parse.y. */ void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...); |
︙ | ︙ |
Changes to ext/fts5/fts5_aux.c.
︙ | ︙ | |||
144 145 146 147 148 149 150 151 152 153 154 155 156 157 | } /* ** Tokenizer callback used by implementation of highlight() function. */ static int fts5HighlightCb( void *pContext, /* Pointer to HighlightContext object */ const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStartOff, /* Start offset of token */ int iEndOff /* End offset of token */ ){ HighlightContext *p = (HighlightContext*)pContext; int rc = SQLITE_OK; | > > > > | | 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 | } /* ** Tokenizer callback used by implementation of highlight() function. */ static int fts5HighlightCb( void *pContext, /* Pointer to HighlightContext object */ int tflags, /* Mask of FTS5_TOKEN_* flags */ const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStartOff, /* Start offset of token */ int iEndOff /* End offset of token */ ){ HighlightContext *p = (HighlightContext*)pContext; int rc = SQLITE_OK; int iPos; if( tflags & FTS5_TOKEN_COLOCATED ) return SQLITE_OK; iPos = p->iPos++; if( p->iRangeEnd>0 ){ if( iPos<p->iRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK; if( p->iRangeStart && iPos==p->iRangeStart ) p->iOff = iStartOff; } if( iPos==p->iter.iStart ){ |
︙ | ︙ |
Changes to ext/fts5/fts5_config.c.
︙ | ︙ | |||
641 642 643 644 645 646 647 648 649 | ** the callback returned SQLITE_DONE, this is not an error and this function ** still returns SQLITE_OK. Or, if the tokenization was abandoned early ** because the callback returned another non-zero value, it is assumed ** to be an SQLite error code and returned to the caller. */ int sqlite3Fts5Tokenize( Fts5Config *pConfig, /* FTS5 Configuration object */ const char *pText, int nText, /* Text to tokenize */ void *pCtx, /* Context passed to xToken() */ | > | | > > | 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 | ** the callback returned SQLITE_DONE, this is not an error and this function ** still returns SQLITE_OK. Or, if the tokenization was abandoned early ** because the callback returned another non-zero value, it is assumed ** to be an SQLite error code and returned to the caller. */ int sqlite3Fts5Tokenize( Fts5Config *pConfig, /* FTS5 Configuration object */ int flags, /* FTS5_TOKENIZE_* flags */ const char *pText, int nText, /* Text to tokenize */ void *pCtx, /* Context passed to xToken() */ int (*xToken)(void*, int, const char*, int, int, int) /* Callback */ ){ if( pText==0 ) return SQLITE_OK; return pConfig->pTokApi->xTokenize( pConfig->pTok, pCtx, flags, pText, nText, xToken ); } /* ** Argument pIn points to the first character in what is expected to be ** a comma-separated list of SQL literals followed by a ')' character. ** If it actually is this, return a pointer to the ')'. Otherwise, return ** NULL to indicate a parse error. |
︙ | ︙ |
Changes to ext/fts5/fts5_expr.c.
︙ | ︙ | |||
18 19 20 21 22 23 24 25 26 27 28 29 30 31 | #include "fts5parse.h" /* ** All token types in the generated fts5parse.h file are greater than 0. */ #define FTS5_EOF 0 typedef struct Fts5ExprTerm Fts5ExprTerm; /* ** Functions generated by lemon from fts5parse.y. */ void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(u64)); void sqlite3Fts5ParserFree(void*, void (*freeProc)(void*)); | > > | 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 | #include "fts5parse.h" /* ** All token types in the generated fts5parse.h file are greater than 0. */ #define FTS5_EOF 0 #define FTS5_LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32)) typedef struct Fts5ExprTerm Fts5ExprTerm; /* ** Functions generated by lemon from fts5parse.y. */ void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(u64)); void sqlite3Fts5ParserFree(void*, void (*freeProc)(void*)); |
︙ | ︙ | |||
69 70 71 72 73 74 75 76 77 78 79 80 81 82 | ** An instance of the following structure represents a single search term ** or term prefix. */ struct Fts5ExprTerm { int bPrefix; /* True for a prefix term */ char *zTerm; /* nul-terminated term */ Fts5IndexIter *pIter; /* Iterator for this term */ }; /* ** A phrase. One or more terms that must appear in a contiguous sequence ** within a document for it to match. */ struct Fts5ExprPhrase { | > | 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 | ** An instance of the following structure represents a single search term ** or term prefix. */ struct Fts5ExprTerm { int bPrefix; /* True for a prefix term */ char *zTerm; /* nul-terminated term */ Fts5IndexIter *pIter; /* Iterator for this term */ Fts5ExprTerm *pSynonym; /* Pointer to first in list of synonyms */ }; /* ** A phrase. One or more terms that must appear in a contiguous sequence ** within a document for it to match. */ struct Fts5ExprPhrase { |
︙ | ︙ | |||
177 178 179 180 181 182 183 184 185 186 187 188 189 190 | } pToken->n = (z2 - z); break; } default: { const char *z2; tok = FTS5_STRING; for(z2=&z[1]; sqlite3Fts5IsBareword(*z2); z2++); pToken->n = (z2 - z); if( pToken->n==2 && memcmp(pToken->p, "OR", 2)==0 ) tok = FTS5_OR; if( pToken->n==3 && memcmp(pToken->p, "NOT", 3)==0 ) tok = FTS5_NOT; if( pToken->n==3 && memcmp(pToken->p, "AND", 3)==0 ) tok = FTS5_AND; break; | > > > > | 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 | } pToken->n = (z2 - z); break; } default: { const char *z2; if( sqlite3Fts5IsBareword(z[0])==0 ){ sqlite3Fts5ParseError(pParse, "fts5: syntax error near \"%.1s\"", z); return FTS5_EOF; } tok = FTS5_STRING; for(z2=&z[1]; sqlite3Fts5IsBareword(*z2); z2++); pToken->n = (z2 - z); if( pToken->n==2 && memcmp(pToken->p, "OR", 2)==0 ) tok = FTS5_OR; if( pToken->n==3 && memcmp(pToken->p, "NOT", 3)==0 ) tok = FTS5_NOT; if( pToken->n==3 && memcmp(pToken->p, "AND", 3)==0 ) tok = FTS5_AND; break; |
︙ | ︙ | |||
240 241 242 243 244 245 246 | } sqlite3_free(sParse.apPhrase); *pzErr = sParse.zErr; return sParse.rc; } | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 247 248 249 250 251 252 253 254 255 256 257 258 259 260 | } sqlite3_free(sParse.apPhrase); *pzErr = sParse.zErr; return sParse.rc; } /* ** Free the expression node object passed as the only argument. */ void sqlite3Fts5ParseNodeFree(Fts5ExprNode *p){ if( p ){ int i; for(i=0; i<p->nChild; i++){ |
︙ | ︙ | |||
345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 | static int fts5ExprColsetTest(Fts5ExprColset *pColset, int iCol){ int i; for(i=0; i<pColset->nCol; i++){ if( pColset->aiCol[i]==iCol ) return 1; } return 0; } /* ** All individual term iterators in pPhrase are guaranteed to be valid and ** pointing to the same rowid when this function is called. This function ** checks if the current rowid really is a match, and if so populates ** the pPhrase->poslist buffer accordingly. Output parameter *pbMatch ** is set to true if this is really a match, or false otherwise. ** ** SQLITE_OK is returned if an error occurs, or an SQLite error code ** otherwise. It is not considered an error code if the current rowid is ** not a match. */ static int fts5ExprPhraseIsMatch( | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 | static int fts5ExprColsetTest(Fts5ExprColset *pColset, int iCol){ int i; for(i=0; i<pColset->nCol; i++){ if( pColset->aiCol[i]==iCol ) return 1; } return 0; } /* ** Argument pTerm must be a synonym iterator. Return the current rowid ** that it points to. */ static i64 fts5ExprSynonymRowid(Fts5ExprTerm *pTerm, int bDesc, int *pbEof){ i64 iRet = 0; int bRetValid = 0; Fts5ExprTerm *p; assert( pTerm->pSynonym ); assert( bDesc==0 || bDesc==1 ); for(p=pTerm; p; p=p->pSynonym){ if( 0==sqlite3Fts5IterEof(p->pIter) ){ i64 iRowid = sqlite3Fts5IterRowid(p->pIter); if( bRetValid==0 || (bDesc!=(iRowid<iRet)) ){ iRet = iRowid; bRetValid = 1; } } } if( pbEof && bRetValid==0 ) *pbEof = 1; return iRet; } /* ** Argument pTerm must be a synonym iterator. */ static int fts5ExprSynonymPoslist( Fts5ExprTerm *pTerm, i64 iRowid, int *pbDel, /* OUT: Caller should sqlite3_free(*pa) */ u8 **pa, int *pn ){ Fts5PoslistWriter writer = {0}; Fts5PoslistReader aStatic[4]; Fts5PoslistReader *aIter = aStatic; int nIter = 0; int nAlloc = 4; int rc = SQLITE_OK; Fts5ExprTerm *p; assert( pTerm->pSynonym ); for(p=pTerm; p; p=p->pSynonym){ Fts5IndexIter *pIter = p->pIter; if( sqlite3Fts5IterEof(pIter)==0 && sqlite3Fts5IterRowid(pIter)==iRowid ){ const u8 *a; int n; i64 dummy; rc = sqlite3Fts5IterPoslist(pIter, &a, &n, &dummy); if( rc!=SQLITE_OK ) goto synonym_poslist_out; if( nIter==nAlloc ){ int nByte = sizeof(Fts5PoslistReader) * nAlloc * 2; Fts5PoslistReader *aNew = (Fts5PoslistReader*)sqlite3_malloc(nByte); if( aNew==0 ){ rc = SQLITE_NOMEM; goto synonym_poslist_out; } memcpy(aNew, aIter, sizeof(Fts5PoslistReader) * nIter); nAlloc = nAlloc*2; if( aIter!=aStatic ) sqlite3_free(aIter); aIter = aNew; } sqlite3Fts5PoslistReaderInit(-1, a, n, &aIter[nIter]); assert( aIter[nIter].bEof==0 ); nIter++; } } assert( *pbDel==0 ); if( nIter==1 ){ *pa = (u8*)aIter[0].a; *pn = aIter[0].n; }else{ Fts5PoslistWriter writer = {0}; Fts5Buffer buf = {0,0,0}; i64 iPrev = -1; while( 1 ){ int i; i64 iMin = FTS5_LARGEST_INT64; for(i=0; i<nIter; i++){ if( aIter[i].bEof==0 ){ if( aIter[i].iPos==iPrev ){ if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) continue; } if( aIter[i].iPos<iMin ){ iMin = aIter[i].iPos; } } } if( iMin==FTS5_LARGEST_INT64 || rc!=SQLITE_OK ) break; rc = sqlite3Fts5PoslistWriterAppend(&buf, &writer, iMin); iPrev = iMin; } if( rc ){ sqlite3_free(buf.p); }else{ *pa = buf.p; *pn = buf.n; *pbDel = 1; } } synonym_poslist_out: if( aIter!=aStatic ) sqlite3_free(aIter); return rc; } /* ** All individual term iterators in pPhrase are guaranteed to be valid and ** pointing to the same rowid when this function is called. This function ** checks if the current rowid really is a match, and if so populates ** the pPhrase->poslist buffer accordingly. Output parameter *pbMatch ** is set to true if this is really a match, or false otherwise. ** ** SQLITE_OK is returned if an error occurs, or an SQLite error code ** otherwise. It is not considered an error code if the current rowid is ** not a match. */ static int fts5ExprPhraseIsMatch( Fts5ExprNode *pNode, /* Node pPhrase belongs to */ Fts5ExprColset *pColset, /* Restrict matches to these columns */ Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ int *pbMatch /* OUT: Set to true if really a match */ ){ Fts5PoslistWriter writer = {0}; Fts5PoslistReader aStatic[4]; Fts5PoslistReader *aIter = aStatic; |
︙ | ︙ | |||
384 385 386 387 388 389 390 391 392 393 394 | /* If the aStatic[] array is not large enough, allocate a large array ** using sqlite3_malloc(). This approach could be improved upon. */ if( pPhrase->nTerm>(sizeof(aStatic) / sizeof(aStatic[0])) ){ int nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm; aIter = (Fts5PoslistReader*)sqlite3_malloc(nByte); if( !aIter ) return SQLITE_NOMEM; } /* Initialize a term iterator for each term in the phrase */ for(i=0; i<pPhrase->nTerm; i++){ i64 dummy; | > > | > | > > > | > > | > | < | 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 | /* If the aStatic[] array is not large enough, allocate a large array ** using sqlite3_malloc(). This approach could be improved upon. */ if( pPhrase->nTerm>(sizeof(aStatic) / sizeof(aStatic[0])) ){ int nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm; aIter = (Fts5PoslistReader*)sqlite3_malloc(nByte); if( !aIter ) return SQLITE_NOMEM; } memset(aIter, 0, sizeof(Fts5PoslistReader) * pPhrase->nTerm); /* Initialize a term iterator for each term in the phrase */ for(i=0; i<pPhrase->nTerm; i++){ Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; i64 dummy; int n = 0; int bFlag = 0; const u8 *a = 0; if( pTerm->pSynonym ){ rc = fts5ExprSynonymPoslist(pTerm, pNode->iRowid, &bFlag, (u8**)&a, &n); }else{ rc = sqlite3Fts5IterPoslist(pTerm->pIter, &a, &n, &dummy); } if( rc!=SQLITE_OK ) goto ismatch_out; sqlite3Fts5PoslistReaderInit(iCol, a, n, &aIter[i]); aIter[i].bFlag = bFlag; if( aIter[i].bEof ) goto ismatch_out; } while( 1 ){ int bMatch; i64 iPos = aIter[0].iPos; do { bMatch = 1; |
︙ | ︙ | |||
427 428 429 430 431 432 433 434 435 436 437 438 439 440 | for(i=0; i<pPhrase->nTerm; i++){ if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) goto ismatch_out; } } ismatch_out: *pbMatch = (pPhrase->poslist.n>0); if( aIter!=aStatic ) sqlite3_free(aIter); return rc; } typedef struct Fts5LookaheadReader Fts5LookaheadReader; struct Fts5LookaheadReader { const u8 *a; /* Buffer containing position list */ | > > > | 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 | for(i=0; i<pPhrase->nTerm; i++){ if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) goto ismatch_out; } } ismatch_out: *pbMatch = (pPhrase->poslist.n>0); for(i=0; i<pPhrase->nTerm; i++){ if( aIter[i].bFlag ) sqlite3_free((u8*)aIter[i].a); } if( aIter!=aStatic ) sqlite3_free(aIter); return rc; } typedef struct Fts5LookaheadReader Fts5LookaheadReader; struct Fts5LookaheadReader { const u8 *a; /* Buffer containing position list */ |
︙ | ︙ | |||
594 595 596 597 598 599 600 | */ static int fts5ExprNearAdvanceFirst( Fts5Expr *pExpr, /* Expression pPhrase belongs to */ Fts5ExprNode *pNode, /* FTS5_STRING or FTS5_TERM node */ int bFromValid, i64 iFrom ){ | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | | | | | | > > | 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 | */ static int fts5ExprNearAdvanceFirst( Fts5Expr *pExpr, /* Expression pPhrase belongs to */ Fts5ExprNode *pNode, /* FTS5_STRING or FTS5_TERM node */ int bFromValid, i64 iFrom ){ Fts5ExprTerm *pTerm = &pNode->pNear->apPhrase[0]->aTerm[0]; int rc; if( pTerm->pSynonym ){ int bEof = 1; Fts5ExprTerm *p; /* Find the firstest rowid any synonym points to. */ i64 iRowid = fts5ExprSynonymRowid(pTerm, pExpr->bDesc, 0); /* Advance each iterator that currently points to iRowid. Or, if iFrom ** is valid - each iterator that points to a rowid before iFrom. */ for(p=pTerm; p; p=p->pSynonym){ if( sqlite3Fts5IterEof(p->pIter)==0 ){ i64 ii = sqlite3Fts5IterRowid(p->pIter); if( ii==iRowid || (bFromValid && ii!=iFrom && (ii>iFrom)==pExpr->bDesc) ){ if( bFromValid ){ rc = sqlite3Fts5IterNextFrom(p->pIter, iFrom); }else{ rc = sqlite3Fts5IterNext(p->pIter); } if( rc!=SQLITE_OK ) break; if( sqlite3Fts5IterEof(p->pIter)==0 ){ bEof = 0; } }else{ bEof = 0; } } } /* Set the EOF flag if either all synonym iterators are at EOF or an ** error has occurred. */ pNode->bEof = (rc || bEof); }else{ Fts5IndexIter *pIter = pTerm->pIter; assert( Fts5NodeIsString(pNode) ); if( bFromValid ){ rc = sqlite3Fts5IterNextFrom(pIter, iFrom); }else{ rc = sqlite3Fts5IterNext(pIter); } pNode->bEof = (rc || sqlite3Fts5IterEof(pIter)); } return rc; } /* ** Advance iterator pIter until it points to a value equal to or laster ** than the initial value of *piLast. If this means the iterator points ** to a value laster than *piLast, update *piLast to the new lastest value. |
︙ | ︙ | |||
642 643 644 645 646 647 648 649 650 651 652 653 654 655 | iRowid = sqlite3Fts5IterRowid(pIter); assert( (bDesc==0 && iRowid>=iLast) || (bDesc==1 && iRowid<=iLast) ); } *piLast = iRowid; return 0; } /* ** IN/OUT parameter (*pa) points to a position list n bytes in size. If ** the position list contains entries for column iCol, then (*pa) is set ** to point to the sub-position-list for that column and the number of ** bytes in it returned. Or, if the argument position list does not ** contain any entries for column iCol, return 0. | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 | iRowid = sqlite3Fts5IterRowid(pIter); assert( (bDesc==0 && iRowid>=iLast) || (bDesc==1 && iRowid<=iLast) ); } *piLast = iRowid; return 0; } static int fts5ExprSynonymAdvanceto( Fts5ExprTerm *pTerm, /* Term iterator to advance */ int bDesc, /* True if iterator is "rowid DESC" */ i64 *piLast, /* IN/OUT: Lastest rowid seen so far */ int *pRc /* OUT: Error code */ ){ int rc = SQLITE_OK; i64 iLast = *piLast; Fts5ExprTerm *p; int bEof = 0; for(p=pTerm; rc==SQLITE_OK && p; p=p->pSynonym){ if( sqlite3Fts5IterEof(p->pIter)==0 ){ i64 iRowid = sqlite3Fts5IterRowid(p->pIter); if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){ rc = sqlite3Fts5IterNextFrom(p->pIter, iLast); } } } if( rc!=SQLITE_OK ){ *pRc = rc; bEof = 1; }else{ *piLast = fts5ExprSynonymRowid(pTerm, bDesc, &bEof); } return bEof; } /* ** IN/OUT parameter (*pa) points to a position list n bytes in size. If ** the position list contains entries for column iCol, then (*pa) is set ** to point to the sub-position-list for that column and the number of ** bytes in it returned. Or, if the argument position list does not ** contain any entries for column iCol, return 0. |
︙ | ︙ | |||
713 714 715 716 717 718 719 | int i; /* Check that each phrase in the nearset matches the current row. ** Populate the pPhrase->poslist buffers at the same time. If any ** phrase is not a match, break out of the loop early. */ for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; | | | | 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 | int i; /* Check that each phrase in the nearset matches the current row. ** Populate the pPhrase->poslist buffers at the same time. If any ** phrase is not a match, break out of the loop early. */ for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; if( pPhrase->nTerm>1 || pPhrase->aTerm[0].pSynonym || pNear->pColset ){ int bMatch = 0; rc = fts5ExprPhraseIsMatch(pNode, pNear->pColset, pPhrase, &bMatch); if( bMatch==0 ) break; }else{ rc = sqlite3Fts5IterPoslistBuffer( pPhrase->aTerm[0].pIter, &pPhrase->poslist ); } } |
︙ | ︙ | |||
751 752 753 754 755 756 757 758 759 760 761 762 763 764 | Fts5ExprColset *pColset = pNear->pColset; const u8 *pPos; int nPos; int rc; assert( pNode->eType==FTS5_TERM ); assert( pNear->nPhrase==1 && pPhrase->nTerm==1 ); rc = sqlite3Fts5IterPoslist(pIter, &pPos, &nPos, &pNode->iRowid); /* If the term may match any column, then this must be a match. ** Return immediately in this case. Otherwise, try to find the ** part of the poslist that corresponds to the required column. ** If it can be found, return. If it cannot, the next iteration | > | 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 | Fts5ExprColset *pColset = pNear->pColset; const u8 *pPos; int nPos; int rc; assert( pNode->eType==FTS5_TERM ); assert( pNear->nPhrase==1 && pPhrase->nTerm==1 ); assert( pPhrase->aTerm[0].pSynonym==0 ); rc = sqlite3Fts5IterPoslist(pIter, &pPos, &nPos, &pNode->iRowid); /* If the term may match any column, then this must be a match. ** Return immediately in this case. Otherwise, try to find the ** part of the poslist that corresponds to the required column. ** If it can be found, return. If it cannot, the next iteration |
︙ | ︙ | |||
797 798 799 800 801 802 803 804 | ){ Fts5ExprNearset *pNear = pNode->pNear; Fts5ExprPhrase *pLeft = pNear->apPhrase[0]; int rc = SQLITE_OK; i64 iLast; /* Lastest rowid any iterator points to */ int i, j; /* Phrase and token index, respectively */ int bMatch; /* True if all terms are at the same rowid */ | > > | > > > > > > | > > > > > > > > > > > > > | | > | | | > < > | | < < | | > > > > | | | | | | | | | | | | > > > > > | | 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 | ){ Fts5ExprNearset *pNear = pNode->pNear; Fts5ExprPhrase *pLeft = pNear->apPhrase[0]; int rc = SQLITE_OK; i64 iLast; /* Lastest rowid any iterator points to */ int i, j; /* Phrase and token index, respectively */ int bMatch; /* True if all terms are at the same rowid */ const int bDesc = pExpr->bDesc; /* Check that this node should not be FTS5_TERM */ assert( pNear->nPhrase>1 || pNear->apPhrase[0]->nTerm>1 || pNear->apPhrase[0]->aTerm[0].pSynonym ); /* Initialize iLast, the "lastest" rowid any iterator points to. If the ** iterator skips through rowids in the default ascending order, this means ** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it ** means the minimum rowid. */ if( pLeft->aTerm[0].pSynonym ){ iLast = fts5ExprSynonymRowid(&pLeft->aTerm[0], bDesc, 0); }else{ iLast = sqlite3Fts5IterRowid(pLeft->aTerm[0].pIter); } do { bMatch = 1; for(i=0; i<pNear->nPhrase; i++){ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; for(j=0; j<pPhrase->nTerm; j++){ Fts5ExprTerm *pTerm = &pPhrase->aTerm[j]; if( pTerm->pSynonym ){ Fts5ExprTerm *p; int bEof = 1; i64 iRowid = fts5ExprSynonymRowid(pTerm, bDesc, 0); if( iRowid==iLast ) continue; bMatch = 0; if( fts5ExprSynonymAdvanceto(pTerm, bDesc, &iLast, &rc) ){ pNode->bEof = 1; return rc; } }else{ Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter; i64 iRowid = sqlite3Fts5IterRowid(pIter); if( iRowid==iLast ) continue; bMatch = 0; if( fts5ExprAdvanceto(pIter, bDesc, &iLast, &rc, &pNode->bEof) ){ return rc; } } } } }while( bMatch==0 ); pNode->iRowid = iLast; pNode->bNomatch = (0==fts5ExprNearTest(&rc, pExpr, pNode)); return rc; } /* ** Initialize all term iterators in the pNear object. If any term is found ** to match no documents at all, return immediately without initializing any ** further iterators. */ static int fts5ExprNearInitAll( Fts5Expr *pExpr, Fts5ExprNode *pNode ){ Fts5ExprNearset *pNear = pNode->pNear; int i, j; int rc = SQLITE_OK; for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; for(j=0; j<pPhrase->nTerm; j++){ Fts5ExprTerm *pTerm = &pPhrase->aTerm[j]; Fts5ExprTerm *p; int bEof = 1; for(p=pTerm; p && rc==SQLITE_OK; p=p->pSynonym){ if( p->pIter ){ sqlite3Fts5IterClose(p->pIter); p->pIter = 0; } rc = sqlite3Fts5IndexQuery( pExpr->pIndex, p->zTerm, strlen(p->zTerm), (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) | (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0), &p->pIter ); assert( rc==SQLITE_OK || p->pIter==0 ); if( p->pIter && 0==sqlite3Fts5IterEof(p->pIter) ){ bEof = 0; } } if( bEof ){ pNode->bEof = 1; return rc; } } } return rc; } |
︙ | ︙ | |||
1025 1026 1027 1028 1029 1030 1031 | switch( pNode->eType ){ case FTS5_STRING: { rc = fts5ExprNearAdvanceFirst(pExpr, pNode, bFromValid, iFrom); break; }; case FTS5_TERM: { | > | > > > > | > > | 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 | switch( pNode->eType ){ case FTS5_STRING: { rc = fts5ExprNearAdvanceFirst(pExpr, pNode, bFromValid, iFrom); break; }; case FTS5_TERM: { Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter; if( bFromValid ){ rc = sqlite3Fts5IterNextFrom(pIter, iFrom); }else{ rc = sqlite3Fts5IterNext(pIter); } if( rc==SQLITE_OK && sqlite3Fts5IterEof(pIter)==0 ){ assert( rc==SQLITE_OK ); rc = fts5ExprTokenTest(pExpr, pNode); }else{ pNode->bEof = 1; } return rc; }; case FTS5_AND: { Fts5ExprNode *pLeft = pNode->apChild[0]; rc = fts5ExprNodeNext(pExpr, pLeft, bFromValid, iFrom); |
︙ | ︙ | |||
1262 1263 1264 1265 1266 1267 1268 1269 1270 | /* ** Free the phrase object passed as the only argument. */ static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){ if( pPhrase ){ int i; for(i=0; i<pPhrase->nTerm; i++){ Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; sqlite3_free(pTerm->zTerm); | > > | > > > | > | 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 | /* ** Free the phrase object passed as the only argument. */ static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){ if( pPhrase ){ int i; for(i=0; i<pPhrase->nTerm; i++){ Fts5ExprTerm *pSyn; Fts5ExprTerm *pNext; Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; sqlite3_free(pTerm->zTerm); sqlite3Fts5IterClose(pTerm->pIter); for(pSyn=pTerm->pSynonym; pSyn; pSyn=pNext){ pNext = pSyn->pSynonym; sqlite3Fts5IterClose(pSyn->pIter); sqlite3_free(pSyn); } } if( pPhrase->poslist.nSpace>0 ) fts5BufferFree(&pPhrase->poslist); sqlite3_free(pPhrase); } } |
︙ | ︙ | |||
1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 | } return pRet; } typedef struct TokenCtx TokenCtx; struct TokenCtx { Fts5ExprPhrase *pPhrase; }; /* ** Callback for tokenizing terms used by ParseTerm(). */ static int fts5ParseTokenize( void *pContext, /* Pointer to Fts5InsertCtx object */ const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ | > > | | > > > > > > | > > > > > > > > > > | > > | | | | | | | > > | | | | | > > | | | | > > > | 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 | } return pRet; } typedef struct TokenCtx TokenCtx; struct TokenCtx { Fts5ExprPhrase *pPhrase; int rc; }; /* ** Callback for tokenizing terms used by ParseTerm(). */ static int fts5ParseTokenize( void *pContext, /* Pointer to Fts5InsertCtx object */ int tflags, /* Mask of FTS5_TOKEN_* flags */ const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iUnused1, /* Start offset of token */ int iUnused2 /* End offset of token */ ){ int rc = SQLITE_OK; const int SZALLOC = 8; TokenCtx *pCtx = (TokenCtx*)pContext; Fts5ExprPhrase *pPhrase = pCtx->pPhrase; /* If an error has already occurred, this is a no-op */ if( pCtx->rc!=SQLITE_OK ) return pCtx->rc; assert( pPhrase==0 || pPhrase->nTerm>0 ); if( pPhrase && (tflags & FTS5_TOKEN_COLOCATED) ){ Fts5ExprTerm *pSyn; int nByte = sizeof(Fts5ExprTerm) + nToken+1; pSyn = (Fts5ExprTerm*)sqlite3_malloc(nByte); if( pSyn==0 ){ rc = SQLITE_NOMEM; }else{ memset(pSyn, 0, nByte); pSyn->zTerm = (char*)&pSyn[1]; memcpy(pSyn->zTerm, pToken, nToken); pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym; pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn; } }else{ Fts5ExprTerm *pTerm; if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){ Fts5ExprPhrase *pNew; int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0); pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase, sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew ); if( pNew==0 ){ rc = SQLITE_NOMEM; }else{ if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase)); pCtx->pPhrase = pPhrase = pNew; pNew->nTerm = nNew - SZALLOC; } } if( rc==SQLITE_OK ){ pTerm = &pPhrase->aTerm[pPhrase->nTerm++]; memset(pTerm, 0, sizeof(Fts5ExprTerm)); pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken); } } pCtx->rc = rc; return rc; } /* ** Free the phrase object passed as the only argument. */ |
︙ | ︙ | |||
1413 1414 1415 1416 1417 1418 1419 1420 | char *z = 0; memset(&sCtx, 0, sizeof(TokenCtx)); sCtx.pPhrase = pAppend; rc = fts5ParseStringFromToken(pToken, &z); if( rc==SQLITE_OK ){ sqlite3Fts5Dequote(z); | > > > | | | 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 | char *z = 0; memset(&sCtx, 0, sizeof(TokenCtx)); sCtx.pPhrase = pAppend; rc = fts5ParseStringFromToken(pToken, &z); if( rc==SQLITE_OK ){ int flags = FTS5_TOKENIZE_QUERY | (bPrefix ? FTS5_TOKENIZE_QUERY : 0); int n; sqlite3Fts5Dequote(z); n = strlen(z); rc = sqlite3Fts5Tokenize(pConfig, flags, z, n, &sCtx, fts5ParseTokenize); } sqlite3_free(z); if( rc || (rc = sCtx.rc) ){ pParse->rc = rc; fts5ExprPhraseFree(sCtx.pPhrase); sCtx.pPhrase = 0; }else if( sCtx.pPhrase ){ if( pAppend==0 ){ if( (pParse->nPhrase % 8)==0 ){ |
︙ | ︙ | |||
1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 | pParse->apPhrase[pParse->nPhrase-1] = sCtx.pPhrase; assert( sCtx.pPhrase->nTerm>0 ); sCtx.pPhrase->aTerm[sCtx.pPhrase->nTerm-1].bPrefix = bPrefix; } return sCtx.pPhrase; } /* ** Token pTok has appeared in a MATCH expression where the NEAR operator ** is expected. If token pTok does not contain "NEAR", store an error ** in the pParse object. */ void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token *pTok){ | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 | pParse->apPhrase[pParse->nPhrase-1] = sCtx.pPhrase; assert( sCtx.pPhrase->nTerm>0 ); sCtx.pPhrase->aTerm[sCtx.pPhrase->nTerm-1].bPrefix = bPrefix; } return sCtx.pPhrase; } /* ** Create a new FTS5 expression by cloning phrase iPhrase of the ** expression passed as the second argument. */ int sqlite3Fts5ExprClonePhrase( Fts5Config *pConfig, Fts5Expr *pExpr, int iPhrase, Fts5Expr **ppNew ){ int rc = SQLITE_OK; /* Return code */ Fts5ExprPhrase *pOrig; /* The phrase extracted from pExpr */ Fts5ExprPhrase *pCopy; /* Copy of pOrig */ int i; /* Used to iterate through phrase terms */ Fts5Expr *pNew = 0; /* Expression to return via *ppNew */ Fts5ExprPhrase **apPhrase; /* pNew->apPhrase */ Fts5ExprNode *pNode; /* pNew->pRoot */ Fts5ExprNearset *pNear; /* pNew->pRoot->pNear */ TokenCtx sCtx = {0,0}; /* Context object for fts5ParseTokenize */ pOrig = pExpr->apExprPhrase[iPhrase]; pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr)); if( rc==SQLITE_OK ){ pNew->apExprPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc, sizeof(Fts5ExprPhrase*)); } if( rc==SQLITE_OK ){ pNew->pRoot = (Fts5ExprNode*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5ExprNode)); } if( rc==SQLITE_OK ){ pNew->pRoot->pNear = (Fts5ExprNearset*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*)); } for(i=0; rc==SQLITE_OK && i<pOrig->nTerm; i++){ int tflags = 0; Fts5ExprTerm *p; for(p=&pOrig->aTerm[i]; p && rc==SQLITE_OK; p=p->pSynonym){ const char *zTerm = p->zTerm; rc = fts5ParseTokenize((void*)&sCtx, tflags, zTerm, strlen(zTerm), 0, 0); tflags = FTS5_TOKEN_COLOCATED; } if( rc==SQLITE_OK ){ sCtx.pPhrase->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix; } } if( rc==SQLITE_OK ){ /* All the allocations succeeded. Put the expression object together. */ pNew->pIndex = pExpr->pIndex; pNew->nPhrase = 1; pNew->apExprPhrase[0] = sCtx.pPhrase; pNew->pRoot->pNear->apPhrase[0] = sCtx.pPhrase; pNew->pRoot->pNear->nPhrase = 1; sCtx.pPhrase->pNode = pNew->pRoot; if( pOrig->nTerm==1 && pOrig->aTerm[0].pSynonym==0 ){ pNew->pRoot->eType = FTS5_TERM; }else{ pNew->pRoot->eType = FTS5_STRING; } }else{ sqlite3Fts5ExprFree(pNew); fts5ExprPhraseFree(sCtx.pPhrase); pNew = 0; } *ppNew = pNew; return rc; } /* ** Token pTok has appeared in a MATCH expression where the NEAR operator ** is expected. If token pTok does not contain "NEAR", store an error ** in the pParse object. */ void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token *pTok){ |
︙ | ︙ | |||
1626 1627 1628 1629 1630 1631 1632 | pRet->eType = eType; pRet->pNear = pNear; if( eType==FTS5_STRING ){ int iPhrase; for(iPhrase=0; iPhrase<pNear->nPhrase; iPhrase++){ pNear->apPhrase[iPhrase]->pNode = pRet; } | | > > > > > > > > > | > > > > | | | | | | | > > | 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 | pRet->eType = eType; pRet->pNear = pNear; if( eType==FTS5_STRING ){ int iPhrase; for(iPhrase=0; iPhrase<pNear->nPhrase; iPhrase++){ pNear->apPhrase[iPhrase]->pNode = pRet; } if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 && pNear->apPhrase[0]->aTerm[0].pSynonym==0 ){ pRet->eType = FTS5_TERM; } }else{ fts5ExprAddChildren(pRet, pLeft); fts5ExprAddChildren(pRet, pRight); } } } if( pRet==0 ){ assert( pParse->rc!=SQLITE_OK ); sqlite3Fts5ParseNodeFree(pLeft); sqlite3Fts5ParseNodeFree(pRight); sqlite3Fts5ParseNearsetFree(pNear); } return pRet; } static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){ int nByte = 0; Fts5ExprTerm *p; char *zQuoted; /* Determine the maximum amount of space required. */ for(p=pTerm; p; p=p->pSynonym){ nByte += strlen(pTerm->zTerm) * 2 + 3 + 2; } zQuoted = sqlite3_malloc(nByte); if( zQuoted ){ int i = 0; for(p=pTerm; p; p=p->pSynonym){ char *zIn = p->zTerm; zQuoted[i++] = '"'; while( *zIn ){ if( *zIn=='"' ) zQuoted[i++] = '"'; zQuoted[i++] = *zIn++; } zQuoted[i++] = '"'; if( p->pSynonym ) zQuoted[i++] = '|'; } if( pTerm->bPrefix ){ zQuoted[i++] = ' '; zQuoted[i++] = '*'; } zQuoted[i++] = '\0'; } return zQuoted; |
︙ | ︙ |
Changes to ext/fts5/fts5_index.c.
︙ | ︙ | |||
289 290 291 292 293 294 295 | #define FTS5_DATA_ZERO_PADDING 8 #define FTS5_DATA_PADDING 20 typedef struct Fts5Data Fts5Data; typedef struct Fts5DlidxIter Fts5DlidxIter; typedef struct Fts5DlidxLvl Fts5DlidxLvl; typedef struct Fts5DlidxWriter Fts5DlidxWriter; | < | 289 290 291 292 293 294 295 296 297 298 299 300 301 302 | #define FTS5_DATA_ZERO_PADDING 8 #define FTS5_DATA_PADDING 20 typedef struct Fts5Data Fts5Data; typedef struct Fts5DlidxIter Fts5DlidxIter; typedef struct Fts5DlidxLvl Fts5DlidxLvl; typedef struct Fts5DlidxWriter Fts5DlidxWriter; typedef struct Fts5PageWriter Fts5PageWriter; typedef struct Fts5SegIter Fts5SegIter; typedef struct Fts5DoclistIter Fts5DoclistIter; typedef struct Fts5SegWriter Fts5SegWriter; typedef struct Fts5Structure Fts5Structure; typedef struct Fts5StructureLevel Fts5StructureLevel; typedef struct Fts5StructureSegment Fts5StructureSegment; |
︙ | ︙ | |||
522 523 524 525 526 527 528 | i64 iSwitchRowid; /* Firstest rowid of other than aFirst[1] */ Fts5CResult *aFirst; /* Current merge state (see above) */ Fts5SegIter aSeg[1]; /* Array of segment iterators */ }; | < < < < < < < < < < < < < < < < < < | 521 522 523 524 525 526 527 528 529 530 531 532 533 534 | i64 iSwitchRowid; /* Firstest rowid of other than aFirst[1] */ Fts5CResult *aFirst; /* Current merge state (see above) */ Fts5SegIter aSeg[1]; /* Array of segment iterators */ }; /* ** An instance of the following type is used to iterate through the contents ** of a doclist-index record. ** ** pData: ** Record containing the doclist-index data. ** |
︙ | ︙ | |||
569 570 571 572 573 574 575 | }; struct Fts5DlidxIter { int nLvl; int iSegid; Fts5DlidxLvl aLvl[1]; }; | < < < < < < < < < < < < < < < < < | 550 551 552 553 554 555 556 557 558 559 560 561 562 563 | }; struct Fts5DlidxIter { int nLvl; int iSegid; Fts5DlidxLvl aLvl[1]; }; static void fts5PutU16(u8 *aOut, u16 iVal){ aOut[0] = (iVal>>8); aOut[1] = (iVal&0xFF); } static u16 fts5GetU16(const u8 *aIn){ return ((u16)aIn[0] << 8) + aIn[1]; |
︙ | ︙ | |||
613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 | ** Compare the contents of the pLeft buffer with the pRight/nRight blob. ** ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or ** +ve if pRight is smaller than pLeft. In other words: ** ** res = *pLeft - *pRight */ static int fts5BufferCompareBlob( Fts5Buffer *pLeft, /* Left hand side of comparison */ const u8 *pRight, int nRight /* Right hand side of comparison */ ){ int nCmp = MIN(pLeft->n, nRight); int res = memcmp(pLeft->p, pRight, nCmp); return (res==0 ? (pLeft->n - nRight) : res); } | > | | 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 | ** Compare the contents of the pLeft buffer with the pRight/nRight blob. ** ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or ** +ve if pRight is smaller than pLeft. In other words: ** ** res = *pLeft - *pRight */ #ifdef SQLITE_DEBUG static int fts5BufferCompareBlob( Fts5Buffer *pLeft, /* Left hand side of comparison */ const u8 *pRight, int nRight /* Right hand side of comparison */ ){ int nCmp = MIN(pLeft->n, nRight); int res = memcmp(pLeft->p, pRight, nCmp); return (res==0 ? (pLeft->n - nRight) : res); } #endif /* ** Compare the contents of the two buffers using memcmp(). If one buffer ** is a prefix of the other, it is considered the lesser. ** ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or ** +ve if pRight is smaller than pLeft. In other words: |
︙ | ︙ | |||
661 662 663 664 665 666 667 | if( p->pReader ){ sqlite3_blob *pReader = p->pReader; p->pReader = 0; sqlite3_blob_close(pReader); } } | | > > > > | | | < | | < < < < < < < < | | | | | | | < < < < < < < < < < < < < < < < < < < < < < < < | 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 | if( p->pReader ){ sqlite3_blob *pReader = p->pReader; p->pReader = 0; sqlite3_blob_close(pReader); } } /* ** Retrieve a record from the %_data table. ** ** If an error occurs, NULL is returned and an error left in the ** Fts5Index object. */ static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){ Fts5Data *pRet = 0; if( p->rc==SQLITE_OK ){ int rc = SQLITE_OK; if( p->pReader ){ /* This call may return SQLITE_ABORT if there has been a savepoint ** rollback since it was last used. In this case a new blob handle ** is required. */ sqlite3_blob *pBlob = p->pReader; p->pReader = 0; rc = sqlite3_blob_reopen(pBlob, iRowid); assert( p->pReader==0 ); p->pReader = pBlob; if( rc!=SQLITE_OK ){ fts5CloseReader(p); } if( rc==SQLITE_ABORT ) rc = SQLITE_OK; } /* If the blob handle is not open at this point, open it and seek ** to the requested entry. */ if( p->pReader==0 && rc==SQLITE_OK ){ Fts5Config *pConfig = p->pConfig; rc = sqlite3_blob_open(pConfig->db, pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader ); } /* If either of the sqlite3_blob_open() or sqlite3_blob_reopen() calls ** above returned SQLITE_ERROR, return SQLITE_CORRUPT_VTAB instead. ** All the reasons those functions might return SQLITE_ERROR - missing ** table, missing row, non-blob/text in block column - indicate ** backing store corruption. */ if( rc==SQLITE_ERROR ) rc = FTS5_CORRUPT; if( rc==SQLITE_OK ){ u8 *aOut = 0; /* Read blob data into this buffer */ int nByte = sqlite3_blob_bytes(p->pReader); int nAlloc = sizeof(Fts5Data) + nByte + FTS5_DATA_PADDING; pRet = (Fts5Data*)sqlite3_malloc(nAlloc); if( pRet ){ pRet->n = nByte; aOut = pRet->p = (u8*)&pRet[1]; }else{ rc = SQLITE_NOMEM; } if( rc==SQLITE_OK ){ rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0); } if( rc!=SQLITE_OK ){ sqlite3_free(pRet); pRet = 0; } } p->rc = rc; p->nRead++; } assert( (pRet==0)==(p->rc!=SQLITE_OK) ); return pRet; } /* ** Release a reference to data record returned by an earlier call to ** fts5DataRead(). */ static void fts5DataRelease(Fts5Data *pData){ sqlite3_free(pData); } |
︙ | ︙ | |||
1029 1030 1031 1032 1033 1034 1035 1036 1037 | ** Fts5Index handle. If an error has already occurred when this function ** is called, it is a no-op. */ static Fts5Structure *fts5StructureRead(Fts5Index *p){ Fts5Config *pConfig = p->pConfig; Fts5Structure *pRet = 0; /* Object to return */ int iCookie; /* Configuration cookie */ Fts5Buffer buf = {0, 0, 0}; | > | | < | | < | | 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 | ** Fts5Index handle. If an error has already occurred when this function ** is called, it is a no-op. */ static Fts5Structure *fts5StructureRead(Fts5Index *p){ Fts5Config *pConfig = p->pConfig; Fts5Structure *pRet = 0; /* Object to return */ int iCookie; /* Configuration cookie */ Fts5Data *pData; Fts5Buffer buf = {0, 0, 0}; pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID); if( p->rc ) return 0; memset(&pData->p[pData->n], 0, FTS5_DATA_PADDING); p->rc = fts5StructureDecode(pData->p, pData->n, &iCookie, &pRet); if( p->rc==SQLITE_OK && pConfig->iCookie!=iCookie ){ p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie); } fts5DataRelease(pData); if( p->rc!=SQLITE_OK ){ fts5StructureRelease(pRet); pRet = 0; } return pRet; } |
︙ | ︙ | |||
1224 1225 1226 1227 1228 1229 1230 | szPromote = szSeg; } fts5StructurePromoteTo(p, iPromote, szPromote, pStruct); } } | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 | szPromote = szSeg; } fts5StructurePromoteTo(p, iPromote, szPromote, pStruct); } } /* ** Advance the iterator passed as the only argument. If the end of the ** doclist-index page is reached, return non-zero. */ static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){ Fts5Data *pData = pLvl->pData; |
︙ | ︙ | |||
2037 2038 2039 2040 2041 2042 2043 | iOff += nPos; } } pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno); } | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 | iOff += nPos; } } pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno); } #define fts5IndexGetVarint32(a, iOff, nVal) { \ nVal = a[iOff++]; \ if( nVal & 0x80 ){ \ iOff--; \ iOff += fts5GetVarint32(&a[iOff], nVal); \ } \ } |
︙ | ︙ | |||
2673 2674 2675 2676 2677 2678 2679 | if( iLeafPgno<pIter->iLeafPgno ){ pIter->iLeafPgno = iLeafPgno+1; fts5SegIterReverseNewPage(p, pIter); bMove = 0; } } | < > | | 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 | if( iLeafPgno<pIter->iLeafPgno ){ pIter->iLeafPgno = iLeafPgno+1; fts5SegIterReverseNewPage(p, pIter); bMove = 0; } } do{ if( bMove ) fts5SegIterNext(p, pIter, 0); if( pIter->pLeaf==0 ) break; if( bRev==0 && pIter->iRowid>=iMatch ) break; if( bRev!=0 && pIter->iRowid<=iMatch ) break; bMove = 1; }while( p->rc==SQLITE_OK ); } /* ** Free the iterator object passed as the second argument. */ static void fts5MultiIterFree(Fts5Index *p, Fts5IndexIter *pIter){ |
︙ | ︙ | |||
4455 4456 4457 4458 4459 4460 4461 | /* ** The %_data table is completely empty when this function is called. This ** function populates it with the initial structure objects for each index, ** and the initial version of the "averages" record (a zero-byte blob). */ int sqlite3Fts5IndexReinit(Fts5Index *p){ Fts5Structure s; | < < < < > < | 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 | /* ** The %_data table is completely empty when this function is called. This ** function populates it with the initial structure objects for each index, ** and the initial version of the "averages" record (a zero-byte blob). */ int sqlite3Fts5IndexReinit(Fts5Index *p){ Fts5Structure s; memset(&s, 0, sizeof(Fts5Structure)); fts5DataWrite(p, FTS5_AVERAGES_ROWID, (const u8*)"", 0); fts5StructureWrite(p, &s); return fts5IndexReturn(p); } /* ** Open a new Fts5Index handle. If the bCreate argument is true, create ** and initialize the underlying %_data table. ** |
︙ | ︙ | |||
4783 4784 4785 4786 4787 4788 4789 | Fts5Index *pIndex = pIter->pIndex; fts5MultiIterFree(pIter->pIndex, pIter); fts5CloseReader(pIndex); } } /* | | < | > > | > > > > > > | > > > > > > > | > | 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 | Fts5Index *pIndex = pIter->pIndex; fts5MultiIterFree(pIter->pIndex, pIter); fts5CloseReader(pIndex); } } /* ** Read and decode the "averages" record from the database. ** ** Parameter anSize must point to an array of size nCol, where nCol is ** the number of user defined columns in the FTS table. */ int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){ int nCol = p->pConfig->nCol; Fts5Data *pData; *pnRow = 0; memset(anSize, 0, sizeof(i64) * nCol); pData = fts5DataRead(p, FTS5_AVERAGES_ROWID); if( p->rc==SQLITE_OK && pData->n ){ int i = 0; int iCol; i += fts5GetVarint(&pData->p[i], (u64*)pnRow); for(iCol=0; i<pData->n && iCol<nCol; iCol++){ i += fts5GetVarint(&pData->p[i], (u64*)&anSize[iCol]); } } fts5DataRelease(pData); return fts5IndexReturn(p); } /* ** Replace the current "averages" record with the contents of the buffer ** supplied as the second argument. */ |
︙ | ︙ | |||
5483 5484 5485 5486 5487 5488 5489 | }else if( iSegid==0 ){ if( iRowid==FTS5_AVERAGES_ROWID ){ /* todo */ }else{ fts5DecodeStructure(&rc, &s, a, n); } }else{ | < > > > > > < < < < < < | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | < < < < < < < < < < < < < < < < < < | 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 | }else if( iSegid==0 ){ if( iRowid==FTS5_AVERAGES_ROWID ){ /* todo */ }else{ fts5DecodeStructure(&rc, &s, a, n); } }else{ Fts5Buffer term; int iTermOff = 0; int iRowidOff = 0; int iOff; int nKeep = 0; memset(&term, 0, sizeof(Fts5Buffer)); if( n>=4 ){ iRowidOff = fts5GetU16(&a[0]); iTermOff = fts5GetU16(&a[2]); }else{ sqlite3Fts5BufferSet(&rc, &s, 8, (const u8*)"corrupt"); goto decode_out; } if( iRowidOff ){ iOff = iRowidOff; }else if( iTermOff ){ iOff = iTermOff; }else{ iOff = n; } fts5DecodePoslist(&rc, &s, &a[4], iOff-4); assert( iRowidOff==0 || iOff==iRowidOff ); if( iRowidOff ){ iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], n-iOff); } assert( iTermOff==0 || iOff==iTermOff ); while( iOff<n ){ int nByte; iOff += fts5GetVarint32(&a[iOff], nByte); term.n= nKeep; fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]); iOff += nByte; sqlite3Fts5BufferAppendPrintf( &rc, &s, " term=%.*s", term.n, (const char*)term.p ); iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], n-iOff); if( iOff<n ){ iOff += fts5GetVarint32(&a[iOff], nKeep); } } fts5BufferFree(&term); } decode_out: sqlite3_free(a); if( rc==SQLITE_OK ){ sqlite3_result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT); }else{ |
︙ | ︙ |
Changes to ext/fts5/fts5_main.c.
︙ | ︙ | |||
1494 1495 1496 1497 1498 1499 1500 | return sqlite3Fts5StorageRowCount(pTab->pStorage, pnRow); } static int fts5ApiTokenize( Fts5Context *pCtx, const char *pText, int nText, void *pUserData, | | | > > | 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 | return sqlite3Fts5StorageRowCount(pTab->pStorage, pnRow); } static int fts5ApiTokenize( Fts5Context *pCtx, const char *pText, int nText, void *pUserData, int (*xToken)(void*, int, const char*, int, int, int) ){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); return sqlite3Fts5Tokenize( pTab->pConfig, FTS5_TOKENIZE_AUX, pText, nText, pUserData, xToken ); } static int fts5ApiPhraseCount(Fts5Context *pCtx){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; return sqlite3Fts5ExprPhraseCount(pCsr->pExpr); } |
︙ | ︙ | |||
1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 | } } return rc; } static int fts5ColumnSizeCb( void *pContext, /* Pointer to int */ const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ int iEnd /* End offset of token */ ){ int *pCnt = (int*)pContext; | > > | > | 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 | } } return rc; } static int fts5ColumnSizeCb( void *pContext, /* Pointer to int */ int tflags, const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ int iEnd /* End offset of token */ ){ int *pCnt = (int*)pContext; if( (tflags & FTS5_TOKEN_COLOCATED)==0 ){ (*pCnt)++; } return SQLITE_OK; } static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); Fts5Config *pConfig = pTab->pConfig; |
︙ | ︙ | |||
1687 1688 1689 1690 1691 1692 1693 | for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){ if( pConfig->abUnindexed[i]==0 ){ const char *z; int n; void *p = (void*)(&pCsr->aColumnSize[i]); pCsr->aColumnSize[i] = 0; rc = fts5ApiColumnText(pCtx, i, &z, &n); if( rc==SQLITE_OK ){ | | > > | 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 | for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){ if( pConfig->abUnindexed[i]==0 ){ const char *z; int n; void *p = (void*)(&pCsr->aColumnSize[i]); pCsr->aColumnSize[i] = 0; rc = fts5ApiColumnText(pCtx, i, &z, &n); if( rc==SQLITE_OK ){ rc = sqlite3Fts5Tokenize( pConfig, FTS5_TOKENIZE_AUX, z, n, p, fts5ColumnSizeCb ); } } } } CsrFlagClear(pCsr, FTS5CSR_REQUIRE_DOCSIZE); } if( iCol<0 ){ |
︙ | ︙ | |||
1849 1850 1851 1852 1853 1854 1855 | rc = fts5OpenMethod(pCsr->base.pVtab, (sqlite3_vtab_cursor**)&pNew); if( rc==SQLITE_OK ){ Fts5Config *pConf = pTab->pConfig; pNew->ePlan = FTS5_PLAN_MATCH; pNew->iFirstRowid = SMALLEST_INT64; pNew->iLastRowid = LARGEST_INT64; pNew->base.pVtab = (sqlite3_vtab*)pTab; | | | 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 | rc = fts5OpenMethod(pCsr->base.pVtab, (sqlite3_vtab_cursor**)&pNew); if( rc==SQLITE_OK ){ Fts5Config *pConf = pTab->pConfig; pNew->ePlan = FTS5_PLAN_MATCH; pNew->iFirstRowid = SMALLEST_INT64; pNew->iLastRowid = LARGEST_INT64; pNew->base.pVtab = (sqlite3_vtab*)pTab; rc = sqlite3Fts5ExprClonePhrase(pConf, pCsr->pExpr, iPhrase, &pNew->pExpr); } if( rc==SQLITE_OK ){ for(rc = fts5CursorFirst(pTab, pNew, 0); rc==SQLITE_OK && CsrFlagTest(pNew, FTS5CSR_EOF)==0; rc = fts5NextMethod((sqlite3_vtab_cursor*)pNew) ){ |
︙ | ︙ | |||
2340 2341 2342 2343 2344 2345 2346 | pGlobal = (Fts5Global*)sqlite3_malloc(sizeof(Fts5Global)); if( pGlobal==0 ){ rc = SQLITE_NOMEM; }else{ void *p = (void*)pGlobal; memset(pGlobal, 0, sizeof(Fts5Global)); pGlobal->db = db; | | | 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 | pGlobal = (Fts5Global*)sqlite3_malloc(sizeof(Fts5Global)); if( pGlobal==0 ){ rc = SQLITE_NOMEM; }else{ void *p = (void*)pGlobal; memset(pGlobal, 0, sizeof(Fts5Global)); pGlobal->db = db; pGlobal->api.iVersion = 2; pGlobal->api.xCreateFunction = fts5CreateAux; pGlobal->api.xCreateTokenizer = fts5CreateTokenizer; pGlobal->api.xFindTokenizer = fts5FindTokenizer; rc = sqlite3_create_module_v2(db, "fts5", &fts5Mod, p, fts5ModuleDestroy); if( rc==SQLITE_OK ) rc = sqlite3Fts5IndexInit(db); if( rc==SQLITE_OK ) rc = sqlite3Fts5ExprInit(pGlobal, db); if( rc==SQLITE_OK ) rc = sqlite3Fts5AuxInit(&pGlobal->api); |
︙ | ︙ |
Changes to ext/fts5/fts5_storage.c.
︙ | ︙ | |||
355 356 357 358 359 360 361 362 363 364 365 366 367 368 | }; /* ** Tokenization callback used when inserting tokens into the FTS index. */ static int fts5StorageInsertCallback( void *pContext, /* Pointer to Fts5InsertCtx object */ const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ int iEnd /* End offset of token */ ){ Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext; Fts5Index *pIdx = pCtx->pStorage->pIndex; | > > | > | | 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 | }; /* ** Tokenization callback used when inserting tokens into the FTS index. */ static int fts5StorageInsertCallback( void *pContext, /* Pointer to Fts5InsertCtx object */ int tflags, const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ int iEnd /* End offset of token */ ){ Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext; Fts5Index *pIdx = pCtx->pStorage->pIndex; if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){ pCtx->szCol++; } return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, pCtx->szCol-1, pToken, nToken); } /* ** If a row with rowid iDel is present in the %_content table, add the ** delete-markers to the FTS index necessary to delete it. Do not actually ** remove the %_content row at this time though. */ |
︙ | ︙ | |||
390 391 392 393 394 395 396 397 398 399 400 401 402 403 | ctx.pStorage = p; ctx.iCol = -1; rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel); for(iCol=1; rc==SQLITE_OK && iCol<=pConfig->nCol; iCol++){ if( pConfig->abUnindexed[iCol-1] ) continue; ctx.szCol = 0; rc = sqlite3Fts5Tokenize(pConfig, (const char*)sqlite3_column_text(pSeek, iCol), sqlite3_column_bytes(pSeek, iCol), (void*)&ctx, fts5StorageInsertCallback ); p->aTotalSize[iCol-1] -= (i64)ctx.szCol; } | > | 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 | ctx.pStorage = p; ctx.iCol = -1; rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel); for(iCol=1; rc==SQLITE_OK && iCol<=pConfig->nCol; iCol++){ if( pConfig->abUnindexed[iCol-1] ) continue; ctx.szCol = 0; rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT, (const char*)sqlite3_column_text(pSeek, iCol), sqlite3_column_bytes(pSeek, iCol), (void*)&ctx, fts5StorageInsertCallback ); p->aTotalSize[iCol-1] -= (i64)ctx.szCol; } |
︙ | ︙ | |||
447 448 449 450 451 452 453 | ** ** Return SQLITE_OK if successful, or an SQLite error code if an error ** occurs. */ static int fts5StorageLoadTotals(Fts5Storage *p, int bCache){ int rc = SQLITE_OK; if( p->bTotalsValid==0 ){ | < < < < < < | < < < < < < < < < | 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 | ** ** Return SQLITE_OK if successful, or an SQLite error code if an error ** occurs. */ static int fts5StorageLoadTotals(Fts5Storage *p, int bCache){ int rc = SQLITE_OK; if( p->bTotalsValid==0 ){ rc = sqlite3Fts5IndexGetAverages(p->pIndex, &p->nTotalRow, p->aTotalSize); p->bTotalsValid = bCache; } return rc; } /* ** Store the current contents of the p->nTotalRow and p->aTotalSize[] |
︙ | ︙ | |||
561 562 563 564 565 566 567 568 569 570 571 572 573 574 | ctx.iCol = -1; rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel); for(iCol=0; rc==SQLITE_OK && iCol<pConfig->nCol; iCol++){ if( pConfig->abUnindexed[iCol] ) continue; ctx.szCol = 0; rc = sqlite3Fts5Tokenize(pConfig, (const char*)sqlite3_value_text(apVal[iCol]), sqlite3_value_bytes(apVal[iCol]), (void*)&ctx, fts5StorageInsertCallback ); p->aTotalSize[iCol] -= (i64)ctx.szCol; } | > | 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 | ctx.iCol = -1; rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel); for(iCol=0; rc==SQLITE_OK && iCol<pConfig->nCol; iCol++){ if( pConfig->abUnindexed[iCol] ) continue; ctx.szCol = 0; rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT, (const char*)sqlite3_value_text(apVal[iCol]), sqlite3_value_bytes(apVal[iCol]), (void*)&ctx, fts5StorageInsertCallback ); p->aTotalSize[iCol] -= (i64)ctx.szCol; } |
︙ | ︙ | |||
650 651 652 653 654 655 656 657 658 659 660 661 662 663 | sqlite3Fts5BufferZero(&buf); rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iRowid); for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){ ctx.szCol = 0; if( pConfig->abUnindexed[ctx.iCol]==0 ){ rc = sqlite3Fts5Tokenize(pConfig, (const char*)sqlite3_column_text(pScan, ctx.iCol+1), sqlite3_column_bytes(pScan, ctx.iCol+1), (void*)&ctx, fts5StorageInsertCallback ); } sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol); | > | 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 | sqlite3Fts5BufferZero(&buf); rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iRowid); for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){ ctx.szCol = 0; if( pConfig->abUnindexed[ctx.iCol]==0 ){ rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT, (const char*)sqlite3_column_text(pScan, ctx.iCol+1), sqlite3_column_bytes(pScan, ctx.iCol+1), (void*)&ctx, fts5StorageInsertCallback ); } sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol); |
︙ | ︙ | |||
767 768 769 770 771 772 773 774 775 776 777 778 779 780 | rc = sqlite3Fts5IndexBeginWrite(p->pIndex, *piRowid); ctx.pStorage = p; } for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){ ctx.szCol = 0; if( pConfig->abUnindexed[ctx.iCol]==0 ){ rc = sqlite3Fts5Tokenize(pConfig, (const char*)sqlite3_value_text(apVal[ctx.iCol+2]), sqlite3_value_bytes(apVal[ctx.iCol+2]), (void*)&ctx, fts5StorageInsertCallback ); } sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol); | > | 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 | rc = sqlite3Fts5IndexBeginWrite(p->pIndex, *piRowid); ctx.pStorage = p; } for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){ ctx.szCol = 0; if( pConfig->abUnindexed[ctx.iCol]==0 ){ rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT, (const char*)sqlite3_value_text(apVal[ctx.iCol+2]), sqlite3_value_bytes(apVal[ctx.iCol+2]), (void*)&ctx, fts5StorageInsertCallback ); } sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol); |
︙ | ︙ | |||
834 835 836 837 838 839 840 841 842 843 844 845 846 | }; /* ** Tokenization callback used by integrity check. */ static int fts5StorageIntegrityCallback( void *pContext, /* Pointer to Fts5InsertCtx object */ const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ int iEnd /* End offset of token */ ){ Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext; | > > | > | | 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 | }; /* ** Tokenization callback used by integrity check. */ static int fts5StorageIntegrityCallback( void *pContext, /* Pointer to Fts5InsertCtx object */ int tflags, const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ int iEnd /* End offset of token */ ){ Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext; if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){ pCtx->szCol++; } pCtx->cksum ^= sqlite3Fts5IndexCksum( pCtx->pConfig, pCtx->iRowid, pCtx->iCol, pCtx->szCol-1, pToken, nToken ); return SQLITE_OK; } /* ** Check that the contents of the FTS index match that of the %_content ** table. Return SQLITE_OK if they do, or SQLITE_CORRUPT if not. Return |
︙ | ︙ | |||
877 878 879 880 881 882 883 | rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN, &pScan, 0); if( rc==SQLITE_OK ){ int rc2; while( SQLITE_ROW==sqlite3_step(pScan) ){ int i; ctx.iRowid = sqlite3_column_int64(pScan, 0); ctx.szCol = 0; | > | > | | > | > | 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 | rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN, &pScan, 0); if( rc==SQLITE_OK ){ int rc2; while( SQLITE_ROW==sqlite3_step(pScan) ){ int i; ctx.iRowid = sqlite3_column_int64(pScan, 0); ctx.szCol = 0; if( pConfig->bColumnsize ){ rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize); } for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){ if( pConfig->abUnindexed[i] ) continue; ctx.iCol = i; ctx.szCol = 0; rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT, (const char*)sqlite3_column_text(pScan, i+1), sqlite3_column_bytes(pScan, i+1), (void*)&ctx, fts5StorageIntegrityCallback ); if( pConfig->bColumnsize && ctx.szCol!=aColSize[i] ){ rc = FTS5_CORRUPT; } aTotalSize[i] += ctx.szCol; } if( rc!=SQLITE_OK ) break; } rc2 = sqlite3_reset(pScan); if( rc==SQLITE_OK ) rc = rc2; } |
︙ | ︙ | |||
914 915 916 917 918 919 920 | /* Check that the %_docsize and %_content tables contain the expected ** number of rows. */ if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){ i64 nRow; rc = fts5StorageCount(p, "content", &nRow); if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT; } | | | 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 | /* Check that the %_docsize and %_content tables contain the expected ** number of rows. */ if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){ i64 nRow; rc = fts5StorageCount(p, "content", &nRow); if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT; } if( rc==SQLITE_OK && pConfig->bColumnsize ){ i64 nRow; rc = fts5StorageCount(p, "docsize", &nRow); if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT; } /* Pass the expected checksum down to the FTS index module. It will ** verify, amongst other things, that it matches the checksum generated by |
︙ | ︙ | |||
998 999 1000 1001 1002 1003 1004 | ** each table column. This function reads the %_docsize record for the ** specified rowid and populates aCol[] with the results. ** ** An SQLite error code is returned if an error occurs, or SQLITE_OK ** otherwise. */ int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol){ | | | > > > | | 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 | ** each table column. This function reads the %_docsize record for the ** specified rowid and populates aCol[] with the results. ** ** An SQLite error code is returned if an error occurs, or SQLITE_OK ** otherwise. */ int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol){ int nCol = p->pConfig->nCol; /* Number of user columns in table */ sqlite3_stmt *pLookup = 0; /* Statement to query %_docsize */ int rc; /* Return Code */ assert( p->pConfig->bColumnsize ); rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP_DOCSIZE, &pLookup, 0); if( rc==SQLITE_OK ){ int bCorrupt = 1; sqlite3_bind_int64(pLookup, 1, iRowid); if( SQLITE_ROW==sqlite3_step(pLookup) ){ const u8 *aBlob = sqlite3_column_blob(pLookup, 0); int nBlob = sqlite3_column_bytes(pLookup, 0); if( 0==fts5StorageDecodeSizeArray(aCol, nCol, aBlob, nBlob) ){ |
︙ | ︙ |
Changes to ext/fts5/fts5_tcl.c.
︙ | ︙ | |||
137 138 139 140 141 142 143 144 145 146 147 148 149 150 | typedef struct F5tAuxData F5tAuxData; struct F5tAuxData { Tcl_Obj *pObj; }; static int xTokenizeCb( void *pCtx, const char *zToken, int nToken, int iStart, int iEnd ){ F5tFunction *p = (F5tFunction*)pCtx; Tcl_Obj *pEval = Tcl_DuplicateObj(p->pScript); int rc; | > | 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 | typedef struct F5tAuxData F5tAuxData; struct F5tAuxData { Tcl_Obj *pObj; }; static int xTokenizeCb( void *pCtx, int tflags, const char *zToken, int nToken, int iStart, int iEnd ){ F5tFunction *p = (F5tFunction*)pCtx; Tcl_Obj *pEval = Tcl_DuplicateObj(p->pScript); int rc; |
︙ | ︙ | |||
580 581 582 583 584 585 586 587 588 589 590 591 592 593 | Tcl_Obj *pRet; int bSubst; const char *zInput; }; static int xTokenizeCb2( void *pCtx, const char *zToken, int nToken, int iStart, int iEnd ){ F5tTokenizeCtx *p = (F5tTokenizeCtx*)pCtx; if( p->bSubst ){ Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewStringObj(zToken, nToken)); Tcl_ListObjAppendElement( | > | 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 | Tcl_Obj *pRet; int bSubst; const char *zInput; }; static int xTokenizeCb2( void *pCtx, int tflags, const char *zToken, int nToken, int iStart, int iEnd ){ F5tTokenizeCtx *p = (F5tTokenizeCtx*)pCtx; if( p->bSubst ){ Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewStringObj(zToken, nToken)); Tcl_ListObjAppendElement( |
︙ | ︙ | |||
662 663 664 665 666 667 668 | } pRet = Tcl_NewObj(); Tcl_IncrRefCount(pRet); ctx.bSubst = (objc==5); ctx.pRet = pRet; ctx.zInput = zText; | | > > | 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 | } pRet = Tcl_NewObj(); Tcl_IncrRefCount(pRet); ctx.bSubst = (objc==5); ctx.pRet = pRet; ctx.zInput = zText; rc = tokenizer.xTokenize( pTok, (void*)&ctx, FTS5_TOKENIZE_DOCUMENT, zText, nText, xTokenizeCb2 ); tokenizer.xDelete(pTok); if( rc!=SQLITE_OK ){ Tcl_AppendResult(interp, "error in tokenizer.xTokenize()", 0); Tcl_DecrRefCount(pRet); return TCL_ERROR; } |
︙ | ︙ | |||
684 685 686 687 688 689 690 | /************************************************************************* ** Start of tokenizer wrapper. */ typedef struct F5tTokenizerContext F5tTokenizerContext; typedef struct F5tTokenizerCb F5tTokenizerCb; typedef struct F5tTokenizerModule F5tTokenizerModule; | | | > > > > > > | 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 | /************************************************************************* ** Start of tokenizer wrapper. */ typedef struct F5tTokenizerContext F5tTokenizerContext; typedef struct F5tTokenizerCb F5tTokenizerCb; typedef struct F5tTokenizerModule F5tTokenizerModule; typedef struct F5tTokenizerInstance F5tTokenizerInstance; struct F5tTokenizerContext { void *pCtx; int (*xToken)(void*, int, const char*, int, int, int); }; struct F5tTokenizerModule { Tcl_Interp *interp; Tcl_Obj *pScript; F5tTokenizerContext *pContext; }; struct F5tTokenizerInstance { Tcl_Interp *interp; Tcl_Obj *pScript; F5tTokenizerContext *pContext; }; static int f5tTokenizerCreate( void *pCtx, const char **azArg, |
︙ | ︙ | |||
744 745 746 747 748 749 750 751 | Tcl_DecrRefCount(pInst->pScript); ckfree((char *)pInst); } static int f5tTokenizerTokenize( Fts5Tokenizer *p, void *pCtx, const char *pText, int nText, | > | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | < < | < | > > > > > > | > > | > > | > > > > > | < | < | | | < | | | 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 | Tcl_DecrRefCount(pInst->pScript); ckfree((char *)pInst); } static int f5tTokenizerTokenize( Fts5Tokenizer *p, void *pCtx, int flags, const char *pText, int nText, int (*xToken)(void*, int, const char*, int, int, int) ){ F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p; void *pOldCtx; int (*xOldToken)(void*, int, const char*, int, int, int); Tcl_Obj *pEval; int rc; const char *zFlags; pOldCtx = pInst->pContext->pCtx; xOldToken = pInst->pContext->xToken; pInst->pContext->pCtx = pCtx; pInst->pContext->xToken = xToken; assert( flags==FTS5_TOKENIZE_DOCUMENT || flags==FTS5_TOKENIZE_AUX || flags==FTS5_TOKENIZE_QUERY || flags==(FTS5_TOKENIZE_QUERY | FTS5_TOKENIZE_PREFIX) ); pEval = Tcl_DuplicateObj(pInst->pScript); Tcl_IncrRefCount(pEval); switch( flags ){ case FTS5_TOKENIZE_DOCUMENT: zFlags = "document"; break; case FTS5_TOKENIZE_AUX: zFlags = "aux"; break; case FTS5_TOKENIZE_QUERY: zFlags = "query"; break; case (FTS5_TOKENIZE_PREFIX | FTS5_TOKENIZE_QUERY): zFlags = "prefixquery"; break; default: assert( 0 ); zFlags = "invalid"; break; } Tcl_ListObjAppendElement(pInst->interp, pEval, Tcl_NewStringObj(zFlags, -1)); Tcl_ListObjAppendElement(pInst->interp, pEval, Tcl_NewStringObj(pText,nText)); rc = Tcl_EvalObjEx(pInst->interp, pEval, TCL_GLOBAL_ONLY); Tcl_DecrRefCount(pEval); pInst->pContext->pCtx = pOldCtx; pInst->pContext->xToken = xOldToken; return rc; } /* ** sqlite3_fts5_token ?-colocated? TEXT START END */ static int f5tTokenizerReturn( void * clientData, Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[] ){ F5tTokenizerContext *p = (F5tTokenizerContext*)clientData; int iStart; int iEnd; int nToken; int tflags = 0; char *zToken; int rc; if( objc==5 ){ int nArg; char *zArg = Tcl_GetStringFromObj(objv[1], &nArg); if( nArg<=10 && nArg>=2 && memcmp("-colocated", zArg, nArg)==0 ){ tflags |= FTS5_TOKEN_COLOCATED; }else{ goto usage; } }else if( objc!=4 ){ goto usage; } zToken = Tcl_GetStringFromObj(objv[objc-3], &nToken); if( Tcl_GetIntFromObj(interp, objv[objc-2], &iStart) || Tcl_GetIntFromObj(interp, objv[objc-1], &iEnd) ){ return TCL_ERROR; } if( p->xToken==0 ){ Tcl_AppendResult(interp, "sqlite3_fts5_token may only be used by tokenizer callback", 0 ); return TCL_ERROR; } rc = p->xToken(p->pCtx, tflags, zToken, nToken, iStart, iEnd); Tcl_SetResult(interp, (char*)sqlite3ErrName(rc), TCL_VOLATILE); return TCL_OK; usage: Tcl_WrongNumArgs(interp, 1, objv, "?-colocated? TEXT START END"); return TCL_ERROR; } static void f5tDelTokenizer(void *pCtx){ F5tTokenizerModule *pMod = (F5tTokenizerModule*)pCtx; Tcl_DecrRefCount(pMod->pScript); ckfree((char *)pMod); } |
︙ | ︙ |
Changes to ext/fts5/fts5_test_mi.c.
︙ | ︙ | |||
348 349 350 351 352 353 354 | Fts5Context *pFts, /* First arg to pass to pApi functions */ sqlite3_context *pCtx, /* Context for returning result/error */ int nVal, /* Number of values in apVal[] array */ sqlite3_value **apVal /* Array of trailing arguments */ ){ const char *zArg; Fts5MatchinfoCtx *p; | | > > > | < | | > > | > | 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 | Fts5Context *pFts, /* First arg to pass to pApi functions */ sqlite3_context *pCtx, /* Context for returning result/error */ int nVal, /* Number of values in apVal[] array */ sqlite3_value **apVal /* Array of trailing arguments */ ){ const char *zArg; Fts5MatchinfoCtx *p; int rc = SQLITE_OK; if( nVal>0 ){ zArg = (const char*)sqlite3_value_text(apVal[0]); }else{ zArg = "pcx"; } p = (Fts5MatchinfoCtx*)pApi->xGetAuxdata(pFts, 0); if( p==0 || sqlite3_stricmp(zArg, p->zArg) ){ p = fts5MatchinfoNew(pApi, pFts, pCtx, zArg); if( p==0 ){ rc = SQLITE_NOMEM; }else{ rc = pApi->xSetAuxdata(pFts, p, sqlite3_free); } } if( rc==SQLITE_OK ){ rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoLocalCb); } if( rc!=SQLITE_OK ){ sqlite3_result_error_code(pCtx, rc); }else{ /* No errors has occured, so return a copy of the array of integers. */ int nByte = p->nRet * sizeof(u32); sqlite3_result_blob(pCtx, (void*)p->aRet, nByte, SQLITE_TRANSIENT); } |
︙ | ︙ |
Changes to ext/fts5/fts5_tokenize.c.
︙ | ︙ | |||
112 113 114 115 116 117 118 119 | /* ** Tokenize some text using the ascii tokenizer. */ static int fts5AsciiTokenize( Fts5Tokenizer *pTokenizer, void *pCtx, const char *pText, int nText, | > | | 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 | /* ** Tokenize some text using the ascii tokenizer. */ static int fts5AsciiTokenize( Fts5Tokenizer *pTokenizer, void *pCtx, int flags, const char *pText, int nText, int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd) ){ AsciiTokenizer *p = (AsciiTokenizer*)pTokenizer; int rc = SQLITE_OK; int ie; int is = 0; char aFold[64]; |
︙ | ︙ | |||
154 155 156 157 158 159 160 | break; } nFold = nByte*2; } asciiFold(pFold, &pText[is], nByte); /* Invoke the token callback */ | | | 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 | break; } nFold = nByte*2; } asciiFold(pFold, &pText[is], nByte); /* Invoke the token callback */ rc = xToken(pCtx, 0, pFold, nByte, is, ie); is = ie+1; } if( pFold!=aFold ) sqlite3_free(pFold); if( rc==SQLITE_DONE ) rc = SQLITE_OK; return rc; } |
︙ | ︙ | |||
381 382 383 384 385 386 387 388 | assert( (sqlite3Fts5UnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 ); return sqlite3Fts5UnicodeIsalnum(iCode) ^ fts5UnicodeIsException(p, iCode); } static int fts5UnicodeTokenize( Fts5Tokenizer *pTokenizer, void *pCtx, const char *pText, int nText, | > | | 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 | assert( (sqlite3Fts5UnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 ); return sqlite3Fts5UnicodeIsalnum(iCode) ^ fts5UnicodeIsException(p, iCode); } static int fts5UnicodeTokenize( Fts5Tokenizer *pTokenizer, void *pCtx, int flags, const char *pText, int nText, int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd) ){ Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTokenizer; int rc = SQLITE_OK; unsigned char *a = p->aTokenChar; unsigned char *zTerm = (unsigned char*)&pText[nText]; unsigned char *zCsr = (unsigned char *)pText; |
︙ | ︙ | |||
471 472 473 474 475 476 477 | } zCsr++; } ie = zCsr - (unsigned char*)pText; } /* Invoke the token callback */ | | | 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 | } zCsr++; } ie = zCsr - (unsigned char*)pText; } /* Invoke the token callback */ rc = xToken(pCtx, 0, aFold, zOut-aFold, is, ie); } tokenize_done: if( rc==SQLITE_DONE ) rc = SQLITE_OK; return rc; } |
︙ | ︙ | |||
549 550 551 552 553 554 555 | *ppOut = (Fts5Tokenizer*)pRet; return rc; } typedef struct PorterContext PorterContext; struct PorterContext { void *pCtx; | | | 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 | *ppOut = (Fts5Tokenizer*)pRet; return rc; } typedef struct PorterContext PorterContext; struct PorterContext { void *pCtx; int (*xToken)(void*, int, const char*, int, int, int); char *aBuf; }; typedef struct PorterRule PorterRule; struct PorterRule { const char *zSuffix; int nSuffix; |
︙ | ︙ | |||
1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 | *pnBuf = nBuf-1; } } } static int fts5PorterCb( void *pCtx, const char *pToken, int nToken, int iStart, int iEnd ){ PorterContext *p = (PorterContext*)pCtx; | > | 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 | *pnBuf = nBuf-1; } } } static int fts5PorterCb( void *pCtx, int tflags, const char *pToken, int nToken, int iStart, int iEnd ){ PorterContext *p = (PorterContext*)pCtx; |
︙ | ︙ | |||
1171 1172 1173 1174 1175 1176 1177 | /* Step 5b. */ if( nBuf>1 && aBuf[nBuf-1]=='l' && aBuf[nBuf-2]=='l' && fts5Porter_MGt1(aBuf, nBuf-1) ){ nBuf--; } | | | > | | | 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 | /* Step 5b. */ if( nBuf>1 && aBuf[nBuf-1]=='l' && aBuf[nBuf-2]=='l' && fts5Porter_MGt1(aBuf, nBuf-1) ){ nBuf--; } return p->xToken(p->pCtx, tflags, aBuf, nBuf, iStart, iEnd); pass_through: return p->xToken(p->pCtx, tflags, pToken, nToken, iStart, iEnd); } /* ** Tokenize using the porter tokenizer. */ static int fts5PorterTokenize( Fts5Tokenizer *pTokenizer, void *pCtx, int flags, const char *pText, int nText, int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd) ){ PorterTokenizer *p = (PorterTokenizer*)pTokenizer; PorterContext sCtx; sCtx.xToken = xToken; sCtx.pCtx = pCtx; sCtx.aBuf = p->aBuf; return p->tokenizer.xTokenize( p->pTokenizer, (void*)&sCtx, flags, pText, nText, fts5PorterCb ); } /* ** Register all built-in tokenizers with FTS5. */ int sqlite3Fts5TokenizerInit(fts5_api *pApi){ |
︙ | ︙ | |||
1221 1222 1223 1224 1225 1226 1227 | aBuiltin[i].zName, (void*)pApi, &aBuiltin[i].x, 0 ); } | | | 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 | aBuiltin[i].zName, (void*)pApi, &aBuiltin[i].x, 0 ); } return rc; } |
Changes to ext/fts5/test/fts5_common.tcl.
︙ | ︙ | |||
290 291 292 293 294 295 296 297 | proc OR {args} { sort_poslist [concat {*}$args] } proc NOT {a b} { if {[llength $b]>0} { return [list] } return $a } | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 | proc OR {args} { sort_poslist [concat {*}$args] } proc NOT {a b} { if {[llength $b]>0} { return [list] } return $a } #------------------------------------------------------------------------- # This command is similar to [split], except that it also provides the # start and end offsets of each token. For example: # # [fts5_tokenize_split "abc d ef"] -> {abc 0 3 d 4 5 ef 6 8} # proc gobble_whitespace {textvar} { upvar $textvar t regexp {([ ]*)(.*)} $t -> space t return [string length $space] } proc gobble_text {textvar wordvar} { upvar $textvar t upvar $wordvar w regexp {([^ ]*)(.*)} $t -> w t return [string length $w] } proc fts5_tokenize_split {text} { set token "" set ret [list] set iOff [gobble_whitespace text] while {[set nToken [gobble_text text word]]} { lappend ret $word $iOff [expr $iOff+$nToken] incr iOff $nToken incr iOff [gobble_whitespace text] } set ret } |
Changes to ext/fts5/test/fts5aa.test.
︙ | ︙ | |||
339 340 341 342 343 344 345 | } {} do_execsql_test 13.5 { SELECT rowid FROM t1 WHERE t1 MATCH 'o'; } {1} do_execsql_test 13.6 { | | | 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 | } {} do_execsql_test 13.5 { SELECT rowid FROM t1 WHERE t1 MATCH 'o'; } {1} do_execsql_test 13.6 { SELECT rowid FROM t1 WHERE t1 MATCH '""'; } {} #------------------------------------------------------------------------- # reset_db do_execsql_test 14.1 { CREATE VIRTUAL TABLE t1 USING fts5(x, y); |
︙ | ︙ | |||
501 502 503 504 505 506 507 508 509 510 511 | } do_execsql_test 18.2 { SELECT t1.rowid, t2.rowid FROM t1, t2 WHERE t2 MATCH t1.a AND t1.rowid = t2.c } {1 1} do_execsql_test 18.3 { SELECT t1.rowid, t2.rowid FROM t2, t1 WHERE t2 MATCH t1.a AND t1.rowid = t2.c } {1 1} finish_test | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 | } do_execsql_test 18.2 { SELECT t1.rowid, t2.rowid FROM t1, t2 WHERE t2 MATCH t1.a AND t1.rowid = t2.c } {1 1} do_execsql_test 18.3 { SELECT t1.rowid, t2.rowid FROM t2, t1 WHERE t2 MATCH t1.a AND t1.rowid = t2.c } {1 1} #-------------------------------------------------------------------- # fts5 table in the temp schema. # reset_db do_execsql_test 19.0 { CREATE VIRTUAL TABLE temp.t1 USING fts5(x); INSERT INTO t1 VALUES('x y z'); INSERT INTO t1 VALUES('w x 1'); SELECT rowid FROM t1 WHERE t1 MATCH 'x'; } {1 2} #-------------------------------------------------------------------- # Test that 6 and 7 byte varints can be read. # reset_db do_execsql_test 20.0 { CREATE VIRTUAL TABLE temp.tmp USING fts5(x); } set ::ids [list \ 0 [expr 1<<36] [expr 2<<36] [expr 1<<43] [expr 2<<43] ] do_test 20.1 { foreach id $::ids { execsql { INSERT INTO tmp(rowid, x) VALUES($id, 'x y z') } } execsql { SELECT rowid FROM tmp WHERE tmp MATCH 'y' } } $::ids finish_test |
Changes to ext/fts5/test/fts5columnsize.test.
︙ | ︙ | |||
130 131 132 133 134 135 136 137 138 | } do_execsql_test 3.2.1 { SELECT rowid, fts5_test_columnsize(t4) FROM t4 WHERE t4 MATCH 'a' } { 1 {-1 0 -1} 2 {-1 0 -1} } finish_test | > > > > > > > > > > > > > | 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 | } do_execsql_test 3.2.1 { SELECT rowid, fts5_test_columnsize(t4) FROM t4 WHERE t4 MATCH 'a' } { 1 {-1 0 -1} 2 {-1 0 -1} } #------------------------------------------------------------------------- # Test the integrity-check # do_execsql_test 4.1.1 { CREATE VIRTUAL TABLE t5 USING fts5(x, columnsize=0); INSERT INTO t5 VALUES('1 2 3 4'); INSERT INTO t5 VALUES('2 4 6 8'); } breakpoint do_execsql_test 4.1.2 { INSERT INTO t5(t5) VALUES('integrity-check'); } finish_test |
Changes to ext/fts5/test/fts5ea.test.
︙ | ︙ | |||
83 84 85 86 87 88 89 90 91 92 93 | #------------------------------------------------------------------------- # Experiment with a tokenizer that considers " to be a token character. # do_execsql_test 4.0 { SELECT fts5_expr('a AND """"', 'x', 'tokenize="unicode61 tokenchars ''""''"'); } {{"a" AND """"}} finish_test | > > > > > > | 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 | #------------------------------------------------------------------------- # Experiment with a tokenizer that considers " to be a token character. # do_execsql_test 4.0 { SELECT fts5_expr('a AND """"', 'x', 'tokenize="unicode61 tokenchars ''""''"'); } {{"a" AND """"}} #------------------------------------------------------------------------- # Experiment with a tokenizer that considers " to be a token character. # do_catchsql_test 5.0 { SELECT fts5_expr('abc | def'); } {1 {fts5: syntax error near "|"}} finish_test |
Changes to ext/fts5/test/fts5eb.test.
︙ | ︙ | |||
26 27 28 29 30 31 32 | proc do_syntax_test {tn expr res} { set ::se_expr $expr do_execsql_test $tn {SELECT fts5_expr($se_expr)} [list $res] } foreach {tn expr res} { | | | | | | | | | | | | | | 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 | proc do_syntax_test {tn expr res} { set ::se_expr $expr do_execsql_test $tn {SELECT fts5_expr($se_expr)} [list $res] } foreach {tn expr res} { 1 {abc} {"abc"} 2 {abc ""} {"abc"} 3 {""} {} 4 {abc OR ""} {"abc"} 5 {abc NOT ""} {"abc"} 6 {abc AND ""} {"abc"} 7 {"" OR abc} {"abc"} 8 {"" NOT abc} {"abc"} 9 {"" AND abc} {"abc"} 10 {abc + "" + def} {"abc" + "def"} 11 {abc "" def} {"abc" AND "def"} 12 {r+e OR w} {"r" + "e" OR "w"} } { do_execsql_test 1.$tn {SELECT fts5_expr($expr)} [list $res] } do_catchsql_test 2.1 { SELECT fts5_expr() } {1 {wrong number of arguments to function fts5_expr}} |
︙ | ︙ |
Changes to ext/fts5/test/fts5fault6.test.
︙ | ︙ | |||
17 18 19 20 21 22 23 24 25 26 27 28 29 30 | set testprefix fts5fault6 # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } #------------------------------------------------------------------------- # OOM while rebuilding an FTS5 table. # do_execsql_test 1.0 { CREATE VIRTUAL TABLE tt USING fts5(a, b); INSERT INTO tt VALUES('c d c g g f', 'a a a d g a'); | > | 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | set testprefix fts5fault6 # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } #------------------------------------------------------------------------- # OOM while rebuilding an FTS5 table. # do_execsql_test 1.0 { CREATE VIRTUAL TABLE tt USING fts5(a, b); INSERT INTO tt VALUES('c d c g g f', 'a a a d g a'); |
︙ | ︙ | |||
144 145 146 147 148 149 150 151 152 | db eval { CREATE VIRTUAL TABLE yu USING fts5(x, tokenize="unicode61 separators abc"); } } -test { faultsim_test_result {0 {}} } finish_test | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 | db eval { CREATE VIRTUAL TABLE yu USING fts5(x, tokenize="unicode61 separators abc"); } } -test { faultsim_test_result {0 {}} } #------------------------------------------------------------------------- # # 5.2.* OOM while running a query that includes synonyms and matchinfo(). # # 5.3.* OOM while running a query that returns a row containing instances # of more than 4 synonyms for a single term. # proc mit {blob} { set scan(littleEndian) i* set scan(bigEndian) I* binary scan $blob $scan($::tcl_platform(byteOrder)) r return $r } proc tcl_tokenize {tflags text} { foreach {w iStart iEnd} [fts5_tokenize_split $text] { sqlite3_fts5_token $w $iStart $iEnd if {$tflags=="query" && [string length $w]==1} { for {set i 2} {$i < 7} {incr i} { sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd } } } } proc tcl_create {args} { return "tcl_tokenize" } reset_db sqlite3_fts5_create_tokenizer db tcl tcl_create db func mit mit sqlite3_fts5_register_matchinfo db do_test 5.0 { execsql { CREATE VIRTUAL TABLE t1 USING fts5(a, tokenize=tcl) } execsql { INSERT INTO t1(t1, rank) VALUES('pgsz', 32) } foreach {rowid text} { 1 {aaaa cc b aaaaa cc aa} 2 {aa aa bb a bbb} 3 {bb aaaaa aaaaa b aaaa aaaaa} 4 {aa a b aaaa aa} 5 {aa b ccc aaaaa cc} 6 {aa aaaaa bbbb cc aaa} 7 {aaaaa aa aa ccccc bb} 8 {ccc bbbbb ccccc bbb c} 9 {cccccc bbbb a aaa cccc c} 20 {ddd f ddd eeeee fff ffff eeee ddd fff eeeee dddddd eeee} 21 {fffff eee dddd fffff dd ee ee eeeee eee eeeeee ee dd e} 22 {fffff d eeee dddd fffff dddddd ffff ddddd eeeee ee eee dddd ddddd} 23 {ddddd fff ddd eeeee ffff eeee ddd ff ff ffffff eeeeee dddd ffffff} 24 {eee dd ee dddd dddd eeeeee e eee fff ffff} 25 {ddddd ffffff dddddd fff ddd ddddd ddd f eeee fff dddd f} 26 {f ffff fff fff eeeeee dddd d dddddd ddddd eee ff eeeee} 27 {eee fff dddddd eeeee eeeee dddd ddddd ffff f eeeee eee dddddd ddddd d} 28 {dd ddddd d ddd d fff d dddd ee dddd ee ddd dddddd dddddd} 29 {eeee dddd ee dddd eeee dddd dd fffff f ddd eeeee ddd ee} 30 {ff ffffff eeeeee eeeee eee ffffff ff ffff f fffff eeeee} 31 {fffff eeeeee dddd eeee eeee eeeeee eee fffff d ddddd ffffff ffff dddddd} 32 {dddddd fffff ee eeeeee eeee ee fff dddd fff eeee ffffff eeeeee ffffff} 33 {ddddd eeee dd ffff dddddd fff eeee ddddd ffff eeee ddd} 34 {ee dddd ddddd dddddd eeee eeeeee f dd ee dddddd ffffff} 35 {ee dddd dd eeeeee ddddd eee d eeeeee dddddd eee dddd fffff} 36 {eee ffffff ffffff e fffff eeeee ff dddddd dddddd fff} 37 {eeeee fffff dddddd dddd ffffff fff f dd ee dd dd eeeee} 38 {eeeeee ee d ff eeeeee eeeeee eee eeeee ee ffffff dddd eeee dddddd ee} 39 {eeeeee ddd fffff e dddd ee eee eee ffffff ee f d dddd} 40 {ffffff dddddd eee ee ffffff eee eeee ddddd ee eeeeee f} 41 {ddd ddd fff fffff ee fffff f fff ddddd fffff} 42 {dddd ee ff d f ffffff fff ffffff ff dd dddddd f eeee} 43 {d dd fff fffff d f fff e dddd ee ee} 44 {ff ffff eee ddd d dd ffff dddd d eeee d eeeeee} 45 {eeee f eeeee ee e ffff f ddd e fff} 46 {ffff d ffff eeee ffff eeeee f ffff ddddd eee} 47 {dd dd dddddd ddddd fffff dddddd ddd ddddd eeeeee ffff eeee eee ee} 48 {ffff ffff e dddd ffffff dd dd dddd f fffff} 49 {ffffff d dddddd ffff eeeee f ffff ffff d dd fffff eeeee} 50 {x e} } { execsql { INSERT INTO t1(rowid, a) VALUES($rowid, $text) } } } {} set res [list {*}{ 1 {3 24 8 2 12 6} 5 {2 24 8 2 12 6} 6 {3 24 8 1 12 6} 7 {3 24 8 1 12 6} 9 {2 24 8 3 12 6} }] do_execsql_test 5.1.1 { SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH 'a AND c' } $res do_execsql_test 5.1.2 { SELECT count(*) FROM t1 WHERE t1 MATCH 'd e f' } 29 faultsim_save_and_close do_faultsim_test 5.2 -faults oom* -prep { faultsim_restore_and_reopen sqlite3_fts5_create_tokenizer db tcl tcl_create sqlite3_fts5_register_matchinfo db db func mit mit } -body { db eval { SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH 'a AND c' } } -test { faultsim_test_result [list 0 $::res] } do_faultsim_test 5.3 -faults oom* -prep { faultsim_restore_and_reopen sqlite3_fts5_create_tokenizer db tcl tcl_create } -body { db eval { SELECT count(*) FROM t1 WHERE t1 MATCH 'd AND e AND f' } } -test { faultsim_test_result {0 29} } do_faultsim_test 5.4 -faults oom* -prep { faultsim_restore_and_reopen sqlite3_fts5_create_tokenizer db tcl tcl_create } -body { db eval { SELECT count(*) FROM t1 WHERE t1 MATCH 'x + e' } } -test { faultsim_test_result {0 1} } #------------------------------------------------------------------------- catch { db close } breakpoint do_faultsim_test 6 -faults oom* -prep { sqlite_orig db test.db sqlite3_db_config_lookaside db 0 0 0 } -body { load_static_extension db fts5 } -test { faultsim_test_result {0 {}} {1 {initialization of fts5 failed: }} if {$testrc==0} { db eval { CREATE VIRTUAL TABLE temp.t1 USING fts5(x) } } db close } finish_test |
Added ext/fts5/test/fts5fault7.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | # 2015 September 3 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # # This file is focused on OOM errors. # source [file join [file dirname [info script]] fts5_common.tcl] source $testdir/malloc_common.tcl set testprefix fts5fault2 # If SQLITE_ENABLE_FTS3 is defined, omit this file. ifcapable !fts5 { finish_test return } #------------------------------------------------------------------------- # Test fault-injection on a query that uses xColumnSize() on columnsize=0 # table. # do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize=0); INSERT INTO t1 VALUES('a b c d e f g'); INSERT INTO t1 VALUES('a b c d'); INSERT INTO t1 VALUES('a b c d e f g h i j'); } fts5_aux_test_functions db do_faultsim_test 1 -faults oom* -body { execsql { SELECT fts5_test_columnsize(t1) FROM t1 WHERE t1 MATCH 'b' } } -test { faultsim_test_result {0 {7 4 10}} {1 SQLITE_NOMEM} } finish_test |
Changes to ext/fts5/test/fts5matchinfo.test.
︙ | ︙ | |||
351 352 353 354 355 356 357 | GROUP BY t10.rowid ORDER BY 1; } {1 1 one 2 2 two 3 3 three} #--------------------------------------------------------------------------- # Test the 'y' matchinfo flag # | < > | | 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 | GROUP BY t10.rowid ORDER BY 1; } {1 1 one 2 2 two 3 3 three} #--------------------------------------------------------------------------- # Test the 'y' matchinfo flag # reset_db sqlite3_fts5_register_matchinfo db do_execsql_test 11.0 { CREATE VIRTUAL TABLE tt USING fts5(x, y); INSERT INTO tt VALUES('c d a c d d', 'e a g b d a'); -- 1 INSERT INTO tt VALUES('c c g a e b', 'c g d g e c'); -- 2 INSERT INTO tt VALUES('b e f d e g', 'b a c b c g'); -- 3 INSERT INTO tt VALUES('a c f f g d', 'd b f d e g'); -- 4 INSERT INTO tt VALUES('g a c f c f', 'd g g b c c'); -- 5 INSERT INTO tt VALUES('g a c e b b', 'd b f b g g'); -- 6 INSERT INTO tt VALUES('f d a a f c', 'e e a d c f'); -- 7 |
︙ | ︙ | |||
428 429 430 431 432 433 434 | SELECT rowid, mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH $expr } $r2 do_execsql_test 11.1.$tn.2 { SELECT rowid, mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH $expr } $r2 } | < < > | < | 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 | SELECT rowid, mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH $expr } $r2 do_execsql_test 11.1.$tn.2 { SELECT rowid, mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH $expr } $r2 } #--------------------------------------------------------------------------- # Test the 'b' matchinfo flag # reset_db sqlite3_fts5_register_matchinfo db db func mit mit do_test 12.0 { set cols [list] for {set i 0} {$i < 50} {incr i} { lappend cols "c$i" } execsql "CREATE VIRTUAL TABLE tt USING fts5([join $cols ,])" } {} do_execsql_test 12.1 { INSERT INTO tt (rowid, c4, c45) VALUES(1, 'abc', 'abc'); SELECT mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH 'abc'; } [list [list [expr 1<<4] [expr 1<<(45-32)]]] finish_test |
Added ext/fts5/test/fts5synonym.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 | # 2014 Dec 20 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # Tests focusing on custom tokenizers that support synonyms. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5synonym # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } foreach S { {zero 0} {one 1 i} {two 2 ii} {three 3 iii} {four 4 iv} {five 5 v} {six 6 vi} {seven 7 vii} {eight 8 viii} {nine 9 ix} } { foreach s $S { set o [list] foreach x $S {if {$x!=$s} {lappend o $x}} set ::syn($s) $o } } proc tcl_tokenize {tflags text} { foreach {w iStart iEnd} [fts5_tokenize_split $text] { sqlite3_fts5_token $w $iStart $iEnd } } proc tcl_create {args} { return "tcl_tokenize" } sqlite3_fts5_create_tokenizer db tcl tcl_create #------------------------------------------------------------------------- # Warm body test for the code in fts5_tcl.c. # do_execsql_test 1.0 { CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl); INSERT INTO ft VALUES('abc def ghi'); INSERT INTO ft VALUES('jkl mno pqr'); SELECT rowid, x FROM ft WHERE ft MATCH 'def'; SELECT x, rowid FROM ft WHERE ft MATCH 'pqr'; } {1 {abc def ghi} {jkl mno pqr} 2} #------------------------------------------------------------------------- # Test a tokenizer that supports synonyms by adding extra entries to the # FTS index. # proc tcl_tokenize {tflags text} { foreach {w iStart iEnd} [fts5_tokenize_split $text] { sqlite3_fts5_token $w $iStart $iEnd if {$tflags=="document" && [info exists ::syn($w)]} { foreach s $::syn($w) { sqlite3_fts5_token -colo $s $iStart $iEnd } } } } reset_db sqlite3_fts5_create_tokenizer db tcl tcl_create do_execsql_test 2.0 { CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl); INSERT INTO ft VALUES('one two three'); INSERT INTO ft VALUES('four five six'); INSERT INTO ft VALUES('eight nine ten'); } {} foreach {tn expr res} { 1 "3" 1 2 "eight OR 8 OR 5" {2 3} 3 "10" {} 4 "1*" {1} 5 "1 + 2" {1} } { do_execsql_test 2.1.$tn { SELECT rowid FROM ft WHERE ft MATCH $expr } $res } #------------------------------------------------------------------------- # Test some broken tokenizers: # # 3.1.*: A tokenizer that declares the very first token to be colocated. # # 3.2.*: A tokenizer that reports two identical tokens at the same position. # This is allowed. # reset_db sqlite3_fts5_create_tokenizer db tcl tcl_create proc tcl_tokenize {tflags text} { set bColo 1 foreach {w iStart iEnd} [fts5_tokenize_split $text] { if {$bColo} { sqlite3_fts5_token -colo $w $iStart $iEnd set bColo 0 } { sqlite3_fts5_token $w $iStart $iEnd } } } do_execsql_test 3.1.0 { CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl); INSERT INTO ft VALUES('one two three'); CREATE VIRTUAL TABLE vv USING fts5vocab(ft, row); SELECT * FROM vv; } { one 1 1 three 1 1 two 1 1 } do_execsql_test 3.1.1 { INSERT INTO ft(ft) VALUES('integrity-check'); } {} proc tcl_tokenize {tflags text} { foreach {w iStart iEnd} [fts5_tokenize_split $text] { sqlite3_fts5_token $w $iStart $iEnd } } do_execsql_test 3.1.2 { SELECT rowid FROM ft WHERE ft MATCH 'one two three' } {1} reset_db sqlite3_fts5_create_tokenizer db tcl tcl_create proc tcl_tokenize {tflags text} { foreach {w iStart iEnd} [fts5_tokenize_split $text] { sqlite3_fts5_token $w $iStart $iEnd sqlite3_fts5_token -colo $w $iStart $iEnd } } do_execsql_test 3.2.0 { CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl); INSERT INTO ft VALUES('one one two three'); CREATE VIRTUAL TABLE vv USING fts5vocab(ft, row); SELECT * FROM vv; } { one 1 4 three 1 2 two 1 2 } do_execsql_test 3.2.1 { SELECT rowid FROM ft WHERE ft MATCH 'one'; } {1} do_execsql_test 3.2.2 { SELECT rowid FROM ft WHERE ft MATCH 'one two three'; } {1} do_execsql_test 3.2.3 { SELECT rowid FROM ft WHERE ft MATCH 'one + one + two + three'; } {1} do_execsql_test 3.2.4 { SELECT rowid FROM ft WHERE ft MATCH 'one two two three'; } {1} do_execsql_test 3.2.5 { SELECT rowid FROM ft WHERE ft MATCH 'one + two + two + three'; } {} #------------------------------------------------------------------------- # Check that expressions with synonyms can be parsed and executed. # reset_db sqlite3_fts5_create_tokenizer db tcl tcl_create proc tcl_tokenize {tflags text} { foreach {w iStart iEnd} [fts5_tokenize_split $text] { sqlite3_fts5_token $w $iStart $iEnd if {$tflags=="query" && [info exists ::syn($w)]} { foreach s $::syn($w) { sqlite3_fts5_token -colo $s $iStart $iEnd } } } } foreach {tn expr res} { 1 {abc} {"abc"} 2 {one} {"one"|"i"|"1"} 3 {3} {"3"|"iii"|"three"} 4 {3*} {"3"|"iii"|"three" *} } { do_execsql_test 4.1.$tn {SELECT fts5_expr($expr, 'tokenize=tcl')} [list $res] } do_execsql_test 4.2.1 { CREATE VIRTUAL TABLE xx USING fts5(x, tokenize=tcl); INSERT INTO xx VALUES('one two'); INSERT INTO xx VALUES('three four'); } do_execsql_test 4.2.2 { SELECT rowid FROM xx WHERE xx MATCH '2' } {1} do_execsql_test 4.2.3 { SELECT rowid FROM xx WHERE xx MATCH '3' } {2} do_test 5.0 { execsql { CREATE VIRTUAL TABLE t1 USING fts5(a, b, tokenize=tcl) } foreach {rowid a b} { 1 {four v 4 i three} {1 3 five five 4 one} 2 {5 1 3 4 i} {2 2 v two 4} 3 {5 i 5 2 four 4 1} {iii ii five two 1} 4 {ii four 4 one 5 three five} {one 5 1 iii 4 3} 5 {three i v i four 4 1} {ii five five five iii} 6 {4 2 ii two 2 iii} {three 1 four 4 iv 1 iv} 7 {ii ii two three 2 5} {iii i ii iii iii one one} 8 {2 ii i two 3 three 2} {two iv v iii 3 five} 9 {i 2 iv 3 five four v} {iii 4 three i three ii 1} } { execsql { INSERT INTO t1(rowid, a, b) VALUES($rowid, $a, $b) } } } {} foreach {tn q res} { 1 {one} { 1 {four v 4 [i] three} {[1] 3 five five 4 [one]} 2 {5 [1] 3 4 [i]} {2 2 v two 4} 3 {5 [i] 5 2 four 4 [1]} {iii ii five two [1]} 4 {ii four 4 [one] 5 three five} {[one] 5 [1] iii 4 3} 5 {three [i] v [i] four 4 [1]} {ii five five five iii} 6 {4 2 ii two 2 iii} {three [1] four 4 iv [1] iv} 7 {ii ii two three 2 5} {iii [i] ii iii iii [one] [one]} 8 {2 ii [i] two 3 three 2} {two iv v iii 3 five} 9 {[i] 2 iv 3 five four v} {iii 4 three [i] three ii [1]} } 2 {five four} { 1 {[four] [v] [4] i three} {1 3 [five] [five] [4] one} 2 {[5] 1 3 [4] i} {2 2 [v] two [4]} 3 {[5] i [5] 2 [four] [4] 1} {iii ii [five] two 1} 4 {ii [four] [4] one [5] three [five]} {one [5] 1 iii [4] 3} 5 {three i [v] i [four] [4] 1} {ii [five] [five] [five] iii} 8 {2 ii i two 3 three 2} {two [iv] [v] iii 3 [five]} 9 {i 2 [iv] 3 [five] [four] [v]} {iii [4] three i three ii 1} } 3 {one OR two OR iii OR 4 OR v} { 1 {[four] [v] [4] [i] [three]} {[1] [3] [five] [five] [4] [one]} 2 {[5] [1] [3] [4] [i]} {[2] [2] [v] [two] [4]} 3 {[5] [i] [5] [2] [four] [4] [1]} {[iii] [ii] [five] [two] [1]} 4 {[ii] [four] [4] [one] [5] [three] [five]} {[one] [5] [1] [iii] [4] [3]} 5 {[three] [i] [v] [i] [four] [4] [1]} {[ii] [five] [five] [five] [iii]} 6 {[4] [2] [ii] [two] [2] [iii]} {[three] [1] [four] [4] [iv] [1] [iv]} 7 {[ii] [ii] [two] [three] [2] [5]} {[iii] [i] [ii] [iii] [iii] [one] [one]} 8 {[2] [ii] [i] [two] [3] [three] [2]} {[two] [iv] [v] [iii] [3] [five]} 9 {[i] [2] [iv] [3] [five] [four] [v]} {[iii] [4] [three] [i] [three] [ii] [1]} } 4 {5 + 1} { 2 {[5 1] 3 4 i} {2 2 v two 4} 3 {[5 i] 5 2 four 4 1} {iii ii five two 1} 4 {ii four 4 one 5 three five} {one [5 1] iii 4 3} 5 {three i [v i] four 4 1} {ii five five five iii} } 5 {one + two + three} { 7 {ii ii two three 2 5} {iii [i ii iii] iii one one} 8 {2 ii [i two 3] three 2} {two iv v iii 3 five} } 6 {"v v"} { 1 {four v 4 i three} {1 3 [five five] 4 one} 5 {three i v i four 4 1} {ii [five five five] iii} } } { do_execsql_test 5.1.$tn { SELECT rowid, highlight(t1, 0, '[', ']'), highlight(t1, 1, '[', ']') FROM t1 WHERE t1 MATCH $q } $res } # Test that the xQueryPhrase() API works with synonyms. # proc mit {blob} { set scan(littleEndian) i* set scan(bigEndian) I* binary scan $blob $scan($::tcl_platform(byteOrder)) r return $r } db func mit mit sqlite3_fts5_register_matchinfo db foreach {tn q res} { 1 {one} { 1 {1 11 7 2 12 6} 2 {2 11 7 0 12 6} 3 {2 11 7 1 12 6} 4 {1 11 7 2 12 6} 5 {3 11 7 0 12 6} 6 {0 11 7 2 12 6} 7 {0 11 7 3 12 6} 8 {1 11 7 0 12 6} 9 {1 11 7 2 12 6} } } { do_execsql_test 5.2.$tn { SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH $q } $res } #------------------------------------------------------------------------- # Test terms with more than 4 synonyms. # reset_db sqlite3_fts5_create_tokenizer db tcl tcl_create proc tcl_tokenize {tflags text} { foreach {w iStart iEnd} [fts5_tokenize_split $text] { sqlite3_fts5_token $w $iStart $iEnd if {$tflags=="query" && [string length $w]==1} { for {set i 2} {$i<=10} {incr i} { sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd } } } } do_execsql_test 6.0.1 { CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize=tcl); INSERT INTO t1 VALUES('yy xx qq'); INSERT INTO t1 VALUES('yy xx xx'); } do_execsql_test 6.0.2 { SELECT * FROM t1 WHERE t1 MATCH 'NEAR(y q)'; } {{yy xx qq}} do_test 6.0.3 { execsql { CREATE VIRTUAL TABLE t2 USING fts5(a, b, tokenize=tcl) } foreach {rowid a b} { 1 {yyyy vvvvv qq oo yyyyyy vvvv eee} {ffff uu r qq aaaa} 2 {ww oooooo bbbbb ssssss mm} {ffffff yy iiii rr s ccc qqqqq} 3 {zzzz llll gggggg cccc uu} {hhhhhh aaaa ppppp rr ee jjjj} 4 {r f i rrrrrr ww hhh} {aa yyy t x aaaaa ii} 5 {fffff mm vvvv ooo ffffff kkkk tttt} {cccccc bb e zzz d n} 6 {iii dddd hh qqqq ddd ooo} {ttt d c b aaaaaa qqqq} 7 {jjjj rrrr v zzzzz u tt t} {ppppp pp dddd mm hhh uuu} 8 {gggg rrrrrr kkkk vvvv gggg jjjjjj b} {dddddd jj r w cccc wwwwww ss} 9 {kkkkk qqq oooo e tttttt mmm} {e ss qqqqqq hhhh llllll gg} } { execsql { INSERT INTO t2(rowid, a, b) VALUES($rowid, $a, $b) } } } {} foreach {tn q res} { 1 {a} { 1 {yyyy vvvvv qq oo yyyyyy vvvv eee} {ffff uu r qq [aaaa]} 3 {zzzz llll gggggg cccc uu} {hhhhhh [aaaa] ppppp rr ee jjjj} 4 {r f i rrrrrr ww hhh} {[aa] yyy t x [aaaaa] ii} 6 {iii dddd hh qqqq ddd ooo} {ttt d c b [aaaaaa] qqqq} } 2 {a AND q} { 1 {yyyy vvvvv [qq] oo yyyyyy vvvv eee} {ffff uu r [qq] [aaaa]} 6 {iii dddd hh [qqqq] ddd ooo} {ttt d c b [aaaaaa] [qqqq]} } 3 {o OR (q AND a)} { 1 {yyyy vvvvv [qq] [oo] yyyyyy vvvv eee} {ffff uu r [qq] [aaaa]} 2 {ww [oooooo] bbbbb ssssss mm} {ffffff yy iiii rr s ccc qqqqq} 5 {fffff mm vvvv [ooo] ffffff kkkk tttt} {cccccc bb e zzz d n} 6 {iii dddd hh [qqqq] ddd [ooo]} {ttt d c b [aaaaaa] [qqqq]} 9 {kkkkk qqq [oooo] e tttttt mmm} {e ss qqqqqq hhhh llllll gg} } 4 {NEAR(q y, 20)} { 1 {[yyyy] vvvvv [qq] oo [yyyyyy] vvvv eee} {ffff uu r qq aaaa} 2 {ww oooooo bbbbb ssssss mm} {ffffff [yy] iiii rr s ccc [qqqqq]} } } { do_execsql_test 6.1.$tn.asc { SELECT rowid, highlight(t2, 0, '[', ']'), highlight(t2, 1, '[', ']') FROM t2 WHERE t2 MATCH $q } $res set res2 [list] foreach {rowid a b} $res { set res2 [concat [list $rowid $a $b] $res2] } do_execsql_test 6.1.$tn.desc { SELECT rowid, highlight(t2, 0, '[', ']'), highlight(t2, 1, '[', ']') FROM t2 WHERE t2 MATCH $q ORDER BY rowid DESC } $res2 } do_execsql_test 6.2.1 { INSERT INTO t2(rowid, a, b) VALUES(13, 'x xx xxx xxxx xxxxx xxxxxx xxxxxxx', 'y yy yyy yyyy yyyyy yyyyyy yyyyyyy' ); SELECT rowid, highlight(t2, 0, '<', '>'), highlight(t2, 1, '(', ')') FROM t2 WHERE t2 MATCH 'x OR y' } { 1 {<yyyy> vvvvv qq oo <yyyyyy> vvvv eee} {ffff uu r qq aaaa} 2 {ww oooooo bbbbb ssssss mm} {ffffff (yy) iiii rr s ccc qqqqq} 4 {r f i rrrrrr ww hhh} {aa (yyy) t (x) aaaaa ii} 13 {<x> <xx> <xxx> <xxxx> <xxxxx> <xxxxxx> <xxxxxxx>} {(y) (yy) (yyy) (yyyy) (yyyyy) (yyyyyy) (yyyyyyy)} } #------------------------------------------------------------------------- # Test that the xColumnSize() API is not confused by colocated tokens. # reset_db sqlite3_fts5_create_tokenizer db tcl tcl_create fts5_aux_test_functions db proc tcl_tokenize {tflags text} { foreach {w iStart iEnd} [fts5_tokenize_split $text] { sqlite3_fts5_token $w $iStart $iEnd if {[string length $w]==1} { for {set i 2} {$i<=10} {incr i} { sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd } } } } do_execsql_test 7.0.1 { CREATE VIRTUAL TABLE t1 USING fts5(a, b, columnsize=1, tokenize=tcl); INSERT INTO t1 VALUES('0 2 3', '4 5 6 7'); INSERT INTO t1 VALUES('8 9', '0 0 0 0 0 0 0 0 0 0'); SELECT fts5_test_columnsize(t1) FROM t1 WHERE t1 MATCH '000 AND 00 AND 0'; } {{3 4} {2 10}} do_execsql_test 7.0.2 { INSERT INTO t1(t1) VALUES('integrity-check'); } do_execsql_test 7.1.1 { CREATE VIRTUAL TABLE t2 USING fts5(a, b, columnsize=0, tokenize=tcl); INSERT INTO t2 VALUES('0 2 3', '4 5 6 7'); INSERT INTO t2 VALUES('8 9', '0 0 0 0 0 0 0 0 0 0'); SELECT fts5_test_columnsize(t2) FROM t2 WHERE t2 MATCH '000 AND 00 AND 0'; } {{3 4} {2 10}} do_execsql_test 7.1.2 { INSERT INTO t2(t2) VALUES('integrity-check'); } finish_test |
Changes to main.mk.
︙ | ︙ | |||
43 44 45 46 47 48 49 50 51 52 53 54 55 56 | ################################################################################ # This is how we compile # TCCX = $(TCC) $(OPTS) -I. -I$(TOP)/src -I$(TOP) TCCX += -I$(TOP)/ext/rtree -I$(TOP)/ext/icu -I$(TOP)/ext/fts3 TCCX += -I$(TOP)/ext/async -I$(TOP)/ext/userauth # Object files for the SQLite library. # LIBOBJ+= vdbe.o parse.o \ alter.o analyze.o attach.o auth.o \ backup.o bitvec.o btmutex.o btree.o build.o \ callback.o complete.o ctime.o date.o dbstat.o delete.o expr.o fault.o fkey.o \ | > | 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 | ################################################################################ # This is how we compile # TCCX = $(TCC) $(OPTS) -I. -I$(TOP)/src -I$(TOP) TCCX += -I$(TOP)/ext/rtree -I$(TOP)/ext/icu -I$(TOP)/ext/fts3 TCCX += -I$(TOP)/ext/async -I$(TOP)/ext/userauth TCCX += -I$(TOP)/ext/fts5 # Object files for the SQLite library. # LIBOBJ+= vdbe.o parse.o \ alter.o analyze.o attach.o auth.o \ backup.o bitvec.o btmutex.o btree.o build.o \ callback.o complete.o ctime.o date.o dbstat.o delete.o expr.o fault.o fkey.o \ |
︙ | ︙ | |||
225 226 227 228 229 230 231 232 233 234 235 236 237 238 | $(TOP)/ext/userauth/userauth.c \ $(TOP)/ext/userauth/sqlite3userauth.h SRC += \ $(TOP)/ext/rbu/sqlite3rbu.c \ $(TOP)/ext/rbu/sqlite3rbu.h # Generated source code files # SRC += \ keywordhash.h \ opcodes.c \ opcodes.h \ | > > > > > > > > > > > > > > > > > > > > > > > | 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 | $(TOP)/ext/userauth/userauth.c \ $(TOP)/ext/userauth/sqlite3userauth.h SRC += \ $(TOP)/ext/rbu/sqlite3rbu.c \ $(TOP)/ext/rbu/sqlite3rbu.h # FTS5 things # FTS5_HDR = \ $(TOP)/ext/fts5/fts5.h \ $(TOP)/ext/fts5/fts5Int.h \ fts5parse.h FTS5_SRC = \ $(TOP)/ext/fts5/fts5_aux.c \ $(TOP)/ext/fts5/fts5_buffer.c \ $(TOP)/ext/fts5/fts5_main.c \ $(TOP)/ext/fts5/fts5_config.c \ $(TOP)/ext/fts5/fts5_expr.c \ $(TOP)/ext/fts5/fts5_hash.c \ $(TOP)/ext/fts5/fts5_index.c \ fts5parse.c \ $(TOP)/ext/fts5/fts5_storage.c \ $(TOP)/ext/fts5/fts5_tokenize.c \ $(TOP)/ext/fts5/fts5_unicode2.c \ $(TOP)/ext/fts5/fts5_varint.c \ $(TOP)/ext/fts5/fts5_vocab.c \ # Generated source code files # SRC += \ keywordhash.h \ opcodes.c \ opcodes.h \ |
︙ | ︙ | |||
632 633 634 635 636 637 638 | fts3_write.o: $(TOP)/ext/fts3/fts3_write.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_write.c rtree.o: $(TOP)/ext/rtree/rtree.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/rtree/rtree.c | < < < < < < < < < < < < < < < < < < < | < | 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 | fts3_write.o: $(TOP)/ext/fts3/fts3_write.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_write.c rtree.o: $(TOP)/ext/rtree/rtree.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/rtree/rtree.c fts5parse.c: $(TOP)/ext/fts5/fts5parse.y lemon cp $(TOP)/ext/fts5/fts5parse.y . rm -f fts5parse.h ./lemon $(OPTS) fts5parse.y fts5parse.h: fts5parse.c fts5.c: $(FTS5_SRC) $(FTS5_HDR) tclsh $(TOP)/ext/fts5/tool/mkfts5c.tcl cp $(TOP)/ext/fts5/fts5.h . userauth.o: $(TOP)/ext/userauth/userauth.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/userauth/userauth.c sqlite3rbu.o: $(TOP)/ext/rbu/sqlite3rbu.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/rbu/sqlite3rbu.c |
︙ | ︙ |