Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Add support for phrase queries to fts5. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts5 |
Files: | files | file ages | folders |
SHA1: |
2e5652e6526b8fb3f5c163168d95bc0b |
User & Date: | dan 2014-07-02 20:18:49.027 |
Context
2014-07-03
| ||
20:39 | Add support for NEAR expressions to fts5. (check-in: 250ae8d401 user: dan tags: fts5) | |
2014-07-02
| ||
20:18 | Add support for phrase queries to fts5. (check-in: 2e5652e652 user: dan tags: fts5) | |
2014-07-01
| ||
20:45 | Change the position list format so that its size in bytes is stored at the start of the list itself. (check-in: 62f2ff2041 user: dan tags: fts5) | |
Changes
Changes to ext/fts5/fts5Int.h.
︙ | ︙ | |||
62 63 64 65 66 67 68 69 70 71 72 73 74 75 | ); void sqlite3Fts5Dequote(char *z); /* ** End of interface to code in fts5_config.c. **************************************************************************/ /************************************************************************** ** Interface to code in fts5_index.c. fts5_index.c contains contains code ** to access the data stored in the %_data table. */ typedef struct Fts5Index Fts5Index; | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 | ); void sqlite3Fts5Dequote(char *z); /* ** End of interface to code in fts5_config.c. **************************************************************************/ /************************************************************************** */ /* ** Buffer object for the incremental building of string data. */ typedef struct Fts5Buffer Fts5Buffer; struct Fts5Buffer { u8 *p; int n; int nSpace; }; int sqlite3Fts5BufferGrow(int*, Fts5Buffer*, int); void sqlite3Fts5BufferAppendVarint(int*, Fts5Buffer*, i64); void sqlite3Fts5BufferAppendBlob(int*, Fts5Buffer*, int, const u8*); void sqlite3Fts5BufferAppendString(int *, Fts5Buffer*, const char*); void sqlite3Fts5BufferFree(Fts5Buffer*); void sqlite3Fts5BufferZero(Fts5Buffer*); void sqlite3Fts5BufferSet(int*, Fts5Buffer*, int, const u8*); void sqlite3Fts5BufferAppendPrintf(int *, Fts5Buffer*, char *zFmt, ...); #define fts5BufferZero(x) sqlite3Fts5BufferZero(x) #define fts5BufferGrow(a,b,c) sqlite3Fts5BufferGrow(a,b,c) #define fts5BufferAppendVarint(a,b,c) sqlite3Fts5BufferAppendVarint(a,b,c) #define fts5BufferFree(a) sqlite3Fts5BufferFree(a) #define fts5BufferAppendBlob(a,b,c,d) sqlite3Fts5BufferAppendBlob(a,b,c,d) #define fts5BufferSet(a,b,c,d) sqlite3Fts5BufferSet(a,b,c,d) /* ** End of interface to code in fts5_buffer.c. **************************************************************************/ /************************************************************************** ** Interface to code in fts5_index.c. fts5_index.c contains contains code ** to access the data stored in the %_data table. */ typedef struct Fts5Index Fts5Index; |
︙ | ︙ |
Added ext/fts5/fts5_buffer.c.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 | /* ** 2014 May 31 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** */ #include "fts5Int.h" int sqlite3Fts5BufferGrow(int *pRc, Fts5Buffer *pBuf, int nByte){ /* A no-op if an error has already occurred */ if( *pRc ) return 1; if( (pBuf->n + nByte) > pBuf->nSpace ){ u8 *pNew; int nNew = pBuf->nSpace ? pBuf->nSpace*2 : 64; while( nNew<(pBuf->n + nByte) ){ nNew = nNew * 2; } pNew = sqlite3_realloc(pBuf->p, nNew); if( pNew==0 ){ *pRc = SQLITE_NOMEM; return 1; }else{ pBuf->nSpace = nNew; pBuf->p = pNew; } } return 0; } /* ** Encode value iVal as an SQLite varint and append it to the buffer object ** pBuf. If an OOM error occurs, set the error code in p. */ void sqlite3Fts5BufferAppendVarint(int *pRc, Fts5Buffer *pBuf, i64 iVal){ if( sqlite3Fts5BufferGrow(pRc, pBuf, 9) ) return; pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iVal); } /* ** Append buffer nData/pData to buffer pBuf. If an OOM error occurs, set ** the error code in p. If an error has already occurred when this function ** is called, it is a no-op. */ void sqlite3Fts5BufferAppendBlob( int *pRc, Fts5Buffer *pBuf, int nData, const u8 *pData ){ if( sqlite3Fts5BufferGrow(pRc, pBuf, nData) ) return; memcpy(&pBuf->p[pBuf->n], pData, nData); pBuf->n += nData; } /* ** Append the nul-terminated string zStr to the buffer pBuf. This function ** ensures that the byte following the buffer data is set to 0x00, even ** though this byte is not included in the pBuf->n count. */ void sqlite3Fts5BufferAppendString( int *pRc, Fts5Buffer *pBuf, const char *zStr ){ int nStr = strlen(zStr); if( sqlite3Fts5BufferGrow(pRc, pBuf, nStr+1) ) return; sqlite3Fts5BufferAppendBlob(pRc, pBuf, nStr, (const u8*)zStr); if( *pRc==SQLITE_OK ) pBuf->p[pBuf->n] = 0x00; } /* ** Argument zFmt is a printf() style format string. This function performs ** the printf() style processing, then appends the results to buffer pBuf. ** ** Like sqlite3Fts5BufferAppendString(), this function ensures that the byte ** following the buffer data is set to 0x00, even though this byte is not ** included in the pBuf->n count. */ void sqlite3Fts5BufferAppendPrintf( int *pRc, Fts5Buffer *pBuf, char *zFmt, ... ){ if( *pRc==SQLITE_OK ){ char *zTmp; va_list ap; va_start(ap, zFmt); zTmp = sqlite3_vmprintf(zFmt, ap); va_end(ap); if( zTmp==0 ){ *pRc = SQLITE_NOMEM; }else{ sqlite3Fts5BufferAppendString(pRc, pBuf, zTmp); sqlite3_free(zTmp); } } } /* ** Free any buffer allocated by pBuf. Zero the structure before returning. */ void sqlite3Fts5BufferFree(Fts5Buffer *pBuf){ sqlite3_free(pBuf->p); memset(pBuf, 0, sizeof(Fts5Buffer)); } /* ** Zero the contents of the buffer object. But do not free the associated ** memory allocation. */ void sqlite3Fts5BufferZero(Fts5Buffer *pBuf){ pBuf->n = 0; } /* ** Set the buffer to contain nData/pData. If an OOM error occurs, leave an ** the error code in p. If an error has already occurred when this function ** is called, it is a no-op. */ void sqlite3Fts5BufferSet( int *pRc, Fts5Buffer *pBuf, int nData, const u8 *pData ){ pBuf->n = 0; sqlite3Fts5BufferAppendBlob(pRc, pBuf, nData, pData); } |
Changes to ext/fts5/fts5_expr.c.
︙ | ︙ | |||
64 65 66 67 68 69 70 71 72 73 74 75 76 77 | }; /* ** A phrase. One or more terms that must appear in a contiguous sequence ** within a document for it to match. */ struct Fts5ExprPhrase { int nTerm; /* Number of entries in aTerm[] */ Fts5ExprTerm aTerm[0]; /* Terms that make up this phrase */ }; /* ** One or more phrases that must appear within a certain token distance of ** each other within each matching document. | > > | 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | }; /* ** A phrase. One or more terms that must appear in a contiguous sequence ** within a document for it to match. */ struct Fts5ExprPhrase { Fts5Buffer poslist; /* Current position list */ i64 iRowid; /* Current rowid */ int nTerm; /* Number of entries in aTerm[] */ Fts5ExprTerm aTerm[0]; /* Terms that make up this phrase */ }; /* ** One or more phrases that must appear within a certain token distance of ** each other within each matching document. |
︙ | ︙ | |||
89 90 91 92 93 94 95 96 97 98 99 100 101 102 | */ struct Fts5Parse { Fts5Config *pConfig; char *zErr; int rc; Fts5ExprNode *pExpr; /* Result of a successful parse */ }; void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...){ if( pParse->rc==SQLITE_OK ){ va_list ap; va_start(ap, zFmt); pParse->zErr = sqlite3_vmprintf(zFmt, ap); va_end(ap); | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 | */ struct Fts5Parse { Fts5Config *pConfig; char *zErr; int rc; Fts5ExprNode *pExpr; /* Result of a successful parse */ }; /************************************************************************* */ typedef struct Fts5PoslistIter Fts5PoslistIter; struct Fts5PoslistIter { const u8 *a; /* Position list to iterate through */ int n; /* Size of buffer at a[] in bytes */ int i; /* Current offset in a[] */ /* Output variables */ int bEof; /* Set to true at EOF */ i64 iPos; /* (iCol<<32) + iPos */ }; static void fts5PoslistIterNext(Fts5PoslistIter *pIter){ if( pIter->i>=pIter->n ){ pIter->bEof = 1; }else{ int iVal; pIter->i += getVarint32(&pIter->a[pIter->i], iVal); if( iVal==1 ){ pIter->i += getVarint32(&pIter->a[pIter->i], iVal); pIter->iPos = ((u64)iVal << 32); pIter->i += getVarint32(&pIter->a[pIter->i], iVal); } pIter->iPos += (iVal-2); } } static void fts5PoslistIterInit(const u8 *a, int n, Fts5PoslistIter *pIter){ memset(pIter, 0, sizeof(*pIter)); pIter->a = a; pIter->n = n; fts5PoslistIterNext(pIter); } /* *************************************************************************/ void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...){ if( pParse->rc==SQLITE_OK ){ va_list ap; va_start(ap, zFmt); pParse->zErr = sqlite3_vmprintf(zFmt, ap); va_end(ap); |
︙ | ︙ | |||
242 243 244 245 246 247 248 249 250 251 252 253 254 255 | /* ** */ static int fts5ExprNodeTest(Fts5Expr *pExpr, Fts5ExprNode *pNode){ assert( 0 ); return SQLITE_OK; } static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){ int rc = SQLITE_OK; pNode->bEof = 0; if( pNode->eType==FTS5_STRING ){ Fts5ExprPhrase *pPhrase = pNode->pNear->apPhrase[0]; | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > < | | < < < < < < | < | < < < | < < | < | < | 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 | /* ** */ static int fts5ExprNodeTest(Fts5Expr *pExpr, Fts5ExprNode *pNode){ assert( 0 ); return SQLITE_OK; } /* ** All individual term iterators in pPhrase are guaranteed to be valid and ** pointing to the same rowid when this function is called. This function ** checks if the current rowid really is a match, and if so populates ** the pPhrase->poslist buffer accordingly. Output parameter *pbMatch ** is set to true if this is really a match, or false otherwise. ** ** SQLITE_OK is returned if an error occurs, or an SQLite error code ** otherwise. It is not considered an error code if the current rowid is ** not a match. */ static int fts5ExprPhraseIsMatch( Fts5Expr *pExpr, /* Expression pPhrase belongs to */ Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ int *pbMatch /* OUT: Set to true if really a match */ ){ Fts5PoslistIter aStatic[4]; Fts5PoslistIter *aIter = aStatic; int i; int rc = SQLITE_OK; if( pPhrase->nTerm>(sizeof(aStatic) / sizeof(aStatic[0])) ){ int nByte = sizeof(Fts5PoslistIter) * pPhrase->nTerm; aIter = (Fts5PoslistIter*)sqlite3_malloc(nByte); if( !aIter ) return SQLITE_NOMEM; } /* Initialize a term iterator for each term in the phrase */ for(i=0; i<pPhrase->nTerm; i++){ int n; const u8 *a = sqlite3Fts5IterPoslist(pPhrase->aTerm[i].pIter, &n); fts5PoslistIterInit(a, n, &aIter[i]); } *pbMatch = 0; while( 1 ){ int bMatch = 1; i64 iPos = aIter[0].iPos; for(i=1; i<pPhrase->nTerm; i++){ Fts5PoslistIter *pPos = &aIter[i]; i64 iAdj = pPos->iPos-i; if( (pPos->iPos-i)!=iPos ){ bMatch = 0; if( iAdj>iPos ) iPos = iAdj; } } if( bMatch ){ *pbMatch = 1; break; } for(i=0; i<pPhrase->nTerm; i++){ Fts5PoslistIter *pPos = &aIter[i]; while( (pPos->iPos-i) < iPos ){ fts5PoslistIterNext(pPos); if( pPos->bEof ) goto ismatch_out; } } } ismatch_out: if( aIter!=aStatic ) sqlite3_free(aIter); return rc; } /* ** All individual term iterators in pPhrase are guaranteed to be valid when ** this function is called. This function checks if all term iterators ** point to the same rowid, and if not, advances them until they do. ** If an EOF is reached before this happens, *pbEof is set to true before ** returning. ** ** SQLITE_OK is returned if an error occurs, or an SQLite error code ** otherwise. It is not considered an error code if an iterator reaches ** EOF. */ static int fts5ExprPhraseNextRowidMatch( Fts5Expr *pExpr, /* Expression pPhrase belongs to */ Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ int *pbEof /* OUT: Set to true if phrase at EOF */ ){ assert( *pbEof==0 ); while( 1 ){ int i; int bMatch = 1; i64 iMin = sqlite3Fts5IterRowid(pPhrase->aTerm[0].pIter); for(i=1; i<pPhrase->nTerm; i++){ i64 iRowid = sqlite3Fts5IterRowid(pPhrase->aTerm[i].pIter); if( iRowid!=iMin ){ bMatch = 0; if( iRowid<iMin ) iMin = iRowid; } } if( bMatch ) break; for(i=0; i<pPhrase->nTerm; i++){ Fts5IndexIter *pIter = pPhrase->aTerm[i].pIter; while( sqlite3Fts5IterRowid(pIter)>iMin ){ sqlite3Fts5IterNext(pIter, 0); if( sqlite3Fts5IterEof(pIter) ){ *pbEof = 1; return SQLITE_OK; } } } } return SQLITE_OK; } static int fts5ExprPhraseAdvanceAll( Fts5Expr *pExpr, /* Expression pPhrase belongs to */ Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ int *pbEof /* OUT: Set to true if phrase at EOF */ ){ int i; int rc = SQLITE_OK; for(i=0; i<pPhrase->nTerm; i++){ Fts5IndexIter *pIter = pPhrase->aTerm[i].pIter; sqlite3Fts5IterNext(pIter, 0); if( sqlite3Fts5IterEof(pIter) ){ *pbEof = 1; break; } } return rc; } /* ** Argument pPhrase points to a multi-term phrase object. All individual ** term iterators point to valid entries (not EOF). * ** This function tests if the term iterators currently all point to the ** same rowid, and if so, if the rowid matches the phrase constraint. If ** so, the pPhrase->poslist buffer is populated and the pPhrase->iRowid ** variable set before returning. Or, if the current combination of ** iterators is not a match, they are advanced until they are. If one of ** the iterators reaches EOF before a match is found, *pbEof is set to ** true before returning. The final values of the pPhrase->poslist and ** iRowid fields are undefined in this case. ** ** SQLITE_OK is returned if an error occurs, or an SQLite error code ** otherwise. It is not considered an error code if an iterator reaches ** EOF. */ static int fts5ExprPhraseNextMatch( Fts5Expr *pExpr, /* Expression pPhrase belongs to */ Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ int *pbEof /* OUT: Set to true if phrase at EOF */ ){ int i; /* Used to iterate through terms */ int rc = SQLITE_OK; /* Return code */ int bMatch = 0; assert( *pbEof==0 ); while( 1 ){ rc = fts5ExprPhraseNextRowidMatch(pExpr, pPhrase, pbEof); if( rc!=SQLITE_OK || *pbEof ) break; /* At this point, all term iterators are valid and point to the same rowid. ** The following assert() statements verify this. */ #ifdef SQLITE_DEBUG for(i=0; i<pPhrase->nTerm; i++){ Fts5IndexIter *pIter = pPhrase->aTerm[i].pIter; Fts5IndexIter *pOne = pPhrase->aTerm[0].pIter; assert( 0==sqlite3Fts5IterEof(pIter) ); assert( sqlite3Fts5IterRowid(pOne)==sqlite3Fts5IterRowid(pIter) ); } #endif rc = fts5ExprPhraseIsMatch(pExpr, pPhrase, &bMatch); if( rc!=SQLITE_OK || bMatch ) break; rc = fts5ExprPhraseAdvanceAll(pExpr, pPhrase, pbEof); if( rc!=SQLITE_OK || *pbEof ) break; } pPhrase->iRowid = sqlite3Fts5IterRowid(pPhrase->aTerm[0].pIter); return rc; } /* ** Advance the phrase iterator pPhrase to the next match. */ static int fts5ExprPhraseNext( Fts5Expr *pExpr, /* Expression pPhrase belongs to */ Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ int *pbEof /* OUT: Set to true if phrase at EOF */ ){ int i; for(i=0; i<pPhrase->nTerm; i++){ Fts5IndexIter *pIter = pPhrase->aTerm[i].pIter; sqlite3Fts5IterNext(pIter, 0); if( sqlite3Fts5IterEof(pIter) ){ *pbEof = 1; return SQLITE_OK; } } if( pPhrase->nTerm==1 ){ pPhrase->iRowid = sqlite3Fts5IterRowid(pPhrase->aTerm[0].pIter); }else{ fts5ExprPhraseNextMatch(pExpr, pPhrase, pbEof); } return SQLITE_OK; } /* ** Point phrase object pPhrase at the first matching document. Or, if there ** are no matching documents at all, move pPhrase to EOF and set *pbEof to ** true before returning. ** ** If no error occurs, SQLITE_OK is returned. Otherwise, an SQLite error ** code. */ static int fts5ExprPhraseFirst( Fts5Expr *pExpr, /* Expression pPhrase belongs to */ Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ int *pbEof /* OUT: Set to true if phrase at EOF */ ){ int i; /* Used to iterate through terms */ int rc = SQLITE_OK; for(i=0; i<pPhrase->nTerm; i++){ Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; pTerm->pIter = sqlite3Fts5IndexQuery( pExpr->pIndex, pTerm->zTerm, strlen(pTerm->zTerm), (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) | (pExpr->bAsc ? FTS5INDEX_QUERY_ASC : 0) ); if( sqlite3Fts5IterEof(pTerm->pIter) ){ *pbEof = 1; return SQLITE_OK; } } if( pPhrase->nTerm==1 ){ const u8 *a; int n; Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; pPhrase->iRowid = sqlite3Fts5IterRowid(pIter); a = sqlite3Fts5IterPoslist(pIter, &n); if( a ){ sqlite3Fts5BufferSet(&rc, &pPhrase->poslist, n, a); } }else{ rc = fts5ExprPhraseNextMatch(pExpr, pPhrase, pbEof); } return rc; } static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){ int rc = SQLITE_OK; pNode->bEof = 0; if( pNode->eType==FTS5_STRING ){ Fts5ExprPhrase *pPhrase = pNode->pNear->apPhrase[0]; assert( pNode->pNear->nPhrase==1 ); assert( pNode->bEof==0 ); rc = fts5ExprPhraseFirst(pExpr, pPhrase, &pNode->bEof); pNode->iRowid = pPhrase->iRowid; }else{ rc = fts5ExprNodeFirst(pExpr, pNode->pLeft); if( rc==SQLITE_OK ){ rc = fts5ExprNodeFirst(pExpr, pNode->pRight); } if( rc==SQLITE_OK ){ rc = fts5ExprNodeTest(pExpr, pNode); } } return rc; } static int fts5ExprNodeNext(Fts5Expr *pExpr, Fts5ExprNode *pNode){ int rc = SQLITE_OK; if( pNode->eType==FTS5_STRING ){ Fts5ExprPhrase *pPhrase = pNode->pNear->apPhrase[0]; assert( pNode->pNear->nPhrase==1 ); rc = fts5ExprPhraseNext(pExpr, pPhrase, &pNode->bEof); pNode->iRowid = pPhrase->iRowid; }else{ assert( 0 ); } return rc; } |
︙ | ︙ | |||
369 370 371 372 373 374 375 376 377 378 379 380 381 382 | for(i=0; i<pPhrase->nTerm; i++){ Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; sqlite3_free(pTerm->zTerm); if( pTerm->pIter ){ sqlite3Fts5IterClose(pTerm->pIter); } } sqlite3_free(pPhrase); } } /* ** If argument pNear is NULL, then a new Fts5ExprNearset object is allocated ** and populated with pPhrase. Or, if pNear is not NULL, phrase pPhrase is | > | 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 | for(i=0; i<pPhrase->nTerm; i++){ Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; sqlite3_free(pTerm->zTerm); if( pTerm->pIter ){ sqlite3Fts5IterClose(pTerm->pIter); } } fts5BufferFree(&pPhrase->poslist); sqlite3_free(pPhrase); } } /* ** If argument pNear is NULL, then a new Fts5ExprNearset object is allocated ** and populated with pPhrase. Or, if pNear is not NULL, phrase pPhrase is |
︙ | ︙ | |||
451 452 453 454 455 456 457 458 459 460 461 462 | Fts5ExprPhrase *pNew; int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0); pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase, sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew ); if( pNew==0 ) return SQLITE_NOMEM; pCtx->pPhrase = pPhrase = pNew; pNew->nTerm = nNew - SZALLOC; } pTerm = &pPhrase->aTerm[pPhrase->nTerm++]; | > | < | 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 | Fts5ExprPhrase *pNew; int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0); pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase, sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew ); if( pNew==0 ) return SQLITE_NOMEM; if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase)); pCtx->pPhrase = pPhrase = pNew; pNew->nTerm = nNew - SZALLOC; } pTerm = &pPhrase->aTerm[pPhrase->nTerm++]; memset(pTerm, 0, sizeof(Fts5ExprTerm)); pTerm->zTerm = fts5Strdup(pToken, nToken); return pTerm->zTerm ? SQLITE_OK : SQLITE_NOMEM; } /* |
︙ | ︙ |
Changes to ext/fts5/fts5_index.c.
︙ | ︙ | |||
250 251 252 253 254 255 256 | #else # define FTS5_CORRUPT SQLITE_CORRUPT_VTAB #endif typedef struct Fts5BtreeIter Fts5BtreeIter; typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel; | < | 250 251 252 253 254 255 256 257 258 259 260 261 262 263 | #else # define FTS5_CORRUPT SQLITE_CORRUPT_VTAB #endif typedef struct Fts5BtreeIter Fts5BtreeIter; typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel; typedef struct Fts5ChunkIter Fts5ChunkIter; typedef struct Fts5Data Fts5Data; typedef struct Fts5MultiSegIter Fts5MultiSegIter; typedef struct Fts5NodeIter Fts5NodeIter; typedef struct Fts5PageWriter Fts5PageWriter; typedef struct Fts5PendingDoclist Fts5PendingDoclist; typedef struct Fts5PendingPoslist Fts5PendingPoslist; |
︙ | ︙ | |||
294 295 296 297 298 299 300 | /* State used by the fts5DataXXX() functions. */ sqlite3_blob *pReader; /* RO incr-blob open on %_data table */ sqlite3_stmt *pWriter; /* "INSERT ... %_data VALUES(?,?)" */ sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */ }; | < < < < < < < < < | 293 294 295 296 297 298 299 300 301 302 303 304 305 306 | /* State used by the fts5DataXXX() functions. */ sqlite3_blob *pReader; /* RO incr-blob open on %_data table */ sqlite3_stmt *pWriter; /* "INSERT ... %_data VALUES(?,?)" */ sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */ }; struct Fts5IndexIter { Fts5Index *pIndex; Fts5Structure *pStruct; Fts5MultiSegIter *pMulti; Fts5Buffer poslist; /* Buffer containing current poslist */ }; |
︙ | ︙ | |||
555 556 557 558 559 560 561 | }else{ memset(pRet, 0, nByte); } return pRet; } | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 545 546 547 548 549 550 551 552 553 554 555 556 557 558 | }else{ memset(pRet, 0, nByte); } return pRet; } /* ** Compare the contents of the pLeft buffer with the pRight/nRight blob. ** ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or ** +ve if pRight is smaller than pLeft. In other words: ** ** res = *pLeft - *pRight |
︙ | ︙ | |||
1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 | pIter->n = MIN(pLeaf->n - iOff, pIter->nRem); pIter->p = pLeaf->p + iOff; if( pIter->n==0 ){ fts5ChunkIterNext(p, pIter); } } /* ** Read and return the next 32-bit varint from the position-list iterator ** passed as the second argument. ** ** If an error occurs, zero is returned an an error code left in ** Fts5Index.rc. If an error has already occurred when this function is | > > > > > | 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 | pIter->n = MIN(pLeaf->n - iOff, pIter->nRem); pIter->p = pLeaf->p + iOff; if( pIter->n==0 ){ fts5ChunkIterNext(p, pIter); } } static void fts5ChunkIterRelease(Fts5ChunkIter *pIter){ fts5DataRelease(pIter->pLeaf); pIter->pLeaf = 0; } /* ** Read and return the next 32-bit varint from the position-list iterator ** passed as the second argument. ** ** If an error occurs, zero is returned an an error code left in ** Fts5Index.rc. If an error has already occurred when this function is |
︙ | ︙ | |||
2931 2932 2933 2934 2935 2936 2937 | if( rc!=SQLITE_OK ){ *pRc = rc; return; } for(iLvl=0; iLvl<p->nLevel; iLvl++){ Fts5StructureLevel *pLvl = &p->aLevel[iLvl]; | > | > | | | 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 | if( rc!=SQLITE_OK ){ *pRc = rc; return; } for(iLvl=0; iLvl<p->nLevel; iLvl++){ Fts5StructureLevel *pLvl = &p->aLevel[iLvl]; sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {lvl=%d nMerge=%d", iLvl, pLvl->nMerge ); for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){ Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d h=%d leaves=%d..%d}", pSeg->iSegid, pSeg->nHeight, pSeg->pgnoFirst, pSeg->pgnoLast ); } sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}"); } fts5StructureRelease(p); } /* ** Decode a segment-data rowid from the %_data table. This function is |
︙ | ︙ | |||
2980 2981 2982 2983 2984 2985 2986 | ** The return value is the number of bytes read from the input buffer. */ static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ int iOff = 0; while( iOff<n ){ int iVal; iOff += getVarint32(&a[iOff], iVal); | | | | | 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 | ** The return value is the number of bytes read from the input buffer. */ static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ int iOff = 0; while( iOff<n ){ int iVal; iOff += getVarint32(&a[iOff], iVal); sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %d", iVal); } return iOff; } /* ** The start of buffer (a/n) contains the start of a doclist. The doclist ** may or may not finish within the buffer. This function appends a text ** representation of the part of the doclist that is present to buffer ** pBuf. ** ** The return value is the number of bytes read from the input buffer. */ static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ i64 iDocid; int iOff = 0; if( iOff<n ){ iOff += sqlite3GetVarint(&a[iOff], (u64*)&iDocid); sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " rowid=%lld", iDocid); } while( iOff<n ){ int nPos; iOff += getVarint32(&a[iOff], nPos); iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos)); if( iOff<n ){ i64 iDelta; iOff += sqlite3GetVarint(&a[iOff], (u64*)&iDelta); if( iDelta==0 ) return iOff; iDocid -= iDelta; sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " rowid=%lld", iDocid); } } return iOff; } /* |
︙ | ︙ | |||
3040 3041 3042 3043 3044 3045 3046 | iRowid = sqlite3_value_int64(apVal[0]); n = sqlite3_value_bytes(apVal[1]); a = sqlite3_value_blob(apVal[1]); fts5DecodeRowid(iRowid, &iIdx, &iSegid, &iHeight, &iPgno); if( iSegid==0 ){ if( iRowid==FTS5_AVERAGES_ROWID ){ | | > | > | | 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 | iRowid = sqlite3_value_int64(apVal[0]); n = sqlite3_value_bytes(apVal[1]); a = sqlite3_value_blob(apVal[1]); fts5DecodeRowid(iRowid, &iIdx, &iSegid, &iHeight, &iPgno); if( iSegid==0 ){ if( iRowid==FTS5_AVERAGES_ROWID ){ sqlite3Fts5BufferAppendPrintf(&rc, &s, "{averages} "); }else{ sqlite3Fts5BufferAppendPrintf(&rc, &s, "{structure idx=%d}", (int)(iRowid-10) ); fts5DecodeStructure(&rc, &s, a, n); } }else{ Fts5Buffer term; memset(&term, 0, sizeof(Fts5Buffer)); sqlite3Fts5BufferAppendPrintf(&rc, &s, "(idx=%d segid=%d h=%d pgno=%d) ", iIdx, iSegid, iHeight, iPgno ); if( iHeight==0 ){ int iTermOff = 0; int iRowidOff = 0; int iOff; |
︙ | ︙ | |||
3085 3086 3087 3088 3089 3090 3091 | while( iOff<n ){ int nByte; iOff += getVarint32(&a[iOff], nByte); term.n= nKeep; fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]); iOff += nByte; | | | | > > | | 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 | while( iOff<n ){ int nByte; iOff += getVarint32(&a[iOff], nByte); term.n= nKeep; fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]); iOff += nByte; sqlite3Fts5BufferAppendPrintf( &rc, &s, " term=%.*s", term.n, (const char*)term.p ); iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], n-iOff); if( iOff<n ){ iOff += getVarint32(&a[iOff], nKeep); } } fts5BufferFree(&term); }else{ Fts5NodeIter ss; for(fts5NodeIterInit(a, n, &ss); ss.aData; fts5NodeIterNext(&rc, &ss)){ if( ss.term.n==0 ){ sqlite3Fts5BufferAppendPrintf(&rc, &s, " left=%d", ss.iChild); }else{ sqlite3Fts5BufferAppendPrintf(&rc,&s, " \"%.*s\"", ss.term.n, ss.term.p ); } if( ss.nEmpty ){ sqlite3Fts5BufferAppendPrintf(&rc, &s, " empty=%d", ss.nEmpty); } } fts5NodeIterFree(&ss); } } if( rc==SQLITE_OK ){ |
︙ | ︙ | |||
3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 | /* No matching prefix index. todo: deal with this. */ assert( 0 ); } } pRet = (Fts5IndexIter*)sqlite3_malloc(sizeof(Fts5IndexIter)); if( pRet ){ pRet->pStruct = fts5StructureRead(p, 0); if( pRet->pStruct ){ fts5MultiIterNew(p, pRet->pStruct, iIdx, (const u8*)pToken, nToken, -1, 0, &pRet->pMulti ); } pRet->pIndex = p; | > | 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 | /* No matching prefix index. todo: deal with this. */ assert( 0 ); } } pRet = (Fts5IndexIter*)sqlite3_malloc(sizeof(Fts5IndexIter)); if( pRet ){ memset(pRet, 0, sizeof(Fts5IndexIter)); pRet->pStruct = fts5StructureRead(p, 0); if( pRet->pStruct ){ fts5MultiIterNew(p, pRet->pStruct, iIdx, (const u8*)pToken, nToken, -1, 0, &pRet->pMulti ); } pRet->pIndex = p; |
︙ | ︙ | |||
3212 3213 3214 3215 3216 3217 3218 3219 | ** the current entry. Output variable *pn is set to the size of the buffer ** in bytes before returning. ** ** The returned buffer does not include the 0x00 terminator byte stored on ** disk. */ const u8 *sqlite3Fts5IterPoslist(Fts5IndexIter *pIter, int *pn){ assert( sqlite3Fts5IterEof(pIter)==0 ); | > > > > > > > > > > > | > > > > > | 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 | ** the current entry. Output variable *pn is set to the size of the buffer ** in bytes before returning. ** ** The returned buffer does not include the 0x00 terminator byte stored on ** disk. */ const u8 *sqlite3Fts5IterPoslist(Fts5IndexIter *pIter, int *pn){ Fts5ChunkIter iter; Fts5Index *p = pIter->pIndex; Fts5SegIter *pSeg = &pIter->pMulti->aSeg[ pIter->pMulti->aFirst[1] ]; assert( sqlite3Fts5IterEof(pIter)==0 ); fts5ChunkIterInit(p, pSeg, &iter); if( fts5ChunkIterEof(p, &iter)==0 ){ fts5BufferZero(&pIter->poslist); fts5BufferGrow(&p->rc, &pIter->poslist, iter.nRem); while( fts5ChunkIterEof(p, &iter)==0 ){ fts5BufferAppendBlob(&p->rc, &pIter->poslist, iter.n, iter.p); fts5ChunkIterNext(p, &iter); } } fts5ChunkIterRelease(&iter); if( p->rc ) return 0; *pn = pIter->poslist.n; return pIter->poslist.p; } /* ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery(). */ void sqlite3Fts5IterClose(Fts5IndexIter *pIter){ if( pIter ){ fts5MultiIterFree(pIter->pIndex, pIter->pMulti); fts5StructureRelease(pIter->pStruct); fts5CloseReader(pIter->pIndex); fts5BufferFree(&pIter->poslist); sqlite3_free(pIter); } } |
Changes to main.mk.
︙ | ︙ | |||
69 70 71 72 73 74 75 76 77 78 79 80 81 82 | random.o resolve.o rowset.o rtree.o select.o status.o \ table.o tokenize.o trigger.o \ update.o util.o vacuum.o \ vdbeapi.o vdbeaux.o vdbeblob.o vdbemem.o vdbesort.o \ vdbetrace.o wal.o walker.o where.o utf.o vtab.o LIBOBJ += fts5.o LIBOBJ += fts5_config.o LIBOBJ += fts5_expr.o LIBOBJ += fts5_index.o LIBOBJ += fts5_storage.o LIBOBJ += fts5parse.o | > | 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | random.o resolve.o rowset.o rtree.o select.o status.o \ table.o tokenize.o trigger.o \ update.o util.o vacuum.o \ vdbeapi.o vdbeaux.o vdbeblob.o vdbemem.o vdbesort.o \ vdbetrace.o wal.o walker.o where.o utf.o vtab.o LIBOBJ += fts5.o LIBOBJ += fts5_buffer.o LIBOBJ += fts5_config.o LIBOBJ += fts5_expr.o LIBOBJ += fts5_index.o LIBOBJ += fts5_storage.o LIBOBJ += fts5parse.o |
︙ | ︙ | |||
568 569 570 571 572 573 574 575 576 577 578 579 580 581 | rtree.o: $(TOP)/ext/rtree/rtree.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/rtree/rtree.c # FTS5 things # fts5_config.o: $(TOP)/ext/fts5/fts5_config.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_config.c fts5_expr.o: $(TOP)/ext/fts5/fts5_expr.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_expr.c fts5_index.o: $(TOP)/ext/fts5/fts5_index.c $(HDR) $(EXTHDR) | > > > | 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 | rtree.o: $(TOP)/ext/rtree/rtree.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/rtree/rtree.c # FTS5 things # fts5_buffer.o: $(TOP)/ext/fts5/fts5_buffer.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_buffer.c fts5_config.o: $(TOP)/ext/fts5/fts5_config.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_config.c fts5_expr.o: $(TOP)/ext/fts5/fts5_expr.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_expr.c fts5_index.o: $(TOP)/ext/fts5/fts5_index.c $(HDR) $(EXTHDR) |
︙ | ︙ |
Changes to test/fts5ab.test.
︙ | ︙ | |||
90 91 92 93 94 95 96 | 8 x {6} 9 y {6} 10 z {6} } { do_execsql_test 2.7.$tn { SELECT rowid FROM t1 WHERE t1 MATCH $expr } $res } | > > | > > > > | > > > > > > > > > > > | > > > | > > > > > > > > | > > > | < > | 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 | 8 x {6} 9 y {6} 10 z {6} } { do_execsql_test 2.7.$tn { SELECT rowid FROM t1 WHERE t1 MATCH $expr } $res } #------------------------------------------------------------------------- # reset_db do_execsql_test 2.0 { CREATE VIRTUAL TABLE t1 USING fts5(a,b); INSERT INTO t1(t1) VALUES('pgsz=32'); } foreach {tn a b} { 1 {abashed abandons abase abash abaft} {abases abased} 2 {abasing abases abaft abated abandons} {abases abandoned} 3 {abatement abash abash abated abase} {abasements abashing} 4 {abaft abasements abase abasement abasing} {abasement abases} 5 {abaft abashing abatement abash abasements} {abandons abandoning} 6 {aback abate abasements abashes abandoned} {abasement abased} 7 {abandons abated abased aback abandoning} {abases abandoned} 8 {abashing abases abasement abaft abashing} {abashed abate} 9 {abash abase abate abashing abashed} {abandon abandoned} 10 {abate abandoning abandons abasement aback} {abandon abandoning} } { do_execsql_test 2.1.$tn.1 { INSERT INTO t1 VALUES($a, $b) } do_execsql_test 2.1.$tn.2 { INSERT INTO t1(t1) VALUES('integrity-check') } } foreach {tn expr res} { 1 {abash} {9 5 3 1} 2 {abase} {9 4 3 1} 3 {abase + abash} {1} 4 {abash + abase} {9} 5 {abaft + abashing} {8 5} 6 {abandon + abandoning} {10} 7 {"abashing abases abasement abaft abashing"} {8} } { do_execsql_test 2.2.$tn { SELECT rowid FROM t1 WHERE t1 MATCH $expr } $res } finish_test |