Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Fixes to problems in FTS3 snippet() function found by th3 tests. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
3b5ccd2682176929f4da8a3f39a7e8f5 |
User & Date: | dan 2010-01-07 10:54:29.000 |
Context
2010-01-07
| ||
11:27 | Changes to test code so that testfixture compiles when OMIT_SHARED_CACHE and OMIT_UTF16 are defined. (check-in: d6ee5ff6c8 user: dan tags: trunk) | |
10:54 | Fixes to problems in FTS3 snippet() function found by th3 tests. (check-in: 3b5ccd2682 user: dan tags: trunk) | |
03:53 | Another attempt at fixing the table generator in lemon. Again, this does not effect the SQLite grammar. (check-in: e22c090f35 user: drh tags: trunk) | |
Changes
Changes to ext/fts3/fts3.c.
︙ | ︙ | |||
2172 2173 2174 2175 2176 2177 2178 | sqlite3_value **apVal /* Array of arguments */ ){ Fts3Cursor *pCsr; /* Cursor handle passed through apVal[0] */ const char *zStart = "<b>"; const char *zEnd = "</b>"; const char *zEllipsis = "<b>...</b>"; int iCol = -1; | | | 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 | sqlite3_value **apVal /* Array of arguments */ ){ Fts3Cursor *pCsr; /* Cursor handle passed through apVal[0] */ const char *zStart = "<b>"; const char *zEnd = "</b>"; const char *zEllipsis = "<b>...</b>"; int iCol = -1; int nToken = 15; /* Default number of tokens in snippet */ /* There must be at least one argument passed to this function (otherwise ** the non-overloaded version would have been called instead of this one). */ assert( nVal>=1 ); if( nVal>6 ){ |
︙ | ︙ |
Changes to ext/fts3/fts3_snippet.c.
︙ | ︙ | |||
41 42 43 44 45 46 47 | static int fts3ExprIterate( Fts3Expr *pExpr, /* Expression to iterate phrases of */ int (*x)(Fts3Expr *, void *), /* Callback function to invoke for phrases */ void *pCtx /* Second argument to pass to callback */ ){ int rc; int eType = pExpr->eType; | < < | | | 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 | static int fts3ExprIterate( Fts3Expr *pExpr, /* Expression to iterate phrases of */ int (*x)(Fts3Expr *, void *), /* Callback function to invoke for phrases */ void *pCtx /* Second argument to pass to callback */ ){ int rc; int eType = pExpr->eType; if( eType!=FTSQUERY_PHRASE ){ assert( pExpr->pLeft && pExpr->pRight ); rc = fts3ExprIterate(pExpr->pLeft, x, pCtx); if( rc==SQLITE_OK && eType!=FTSQUERY_NOT ){ rc = fts3ExprIterate(pExpr->pRight, x, pCtx); } }else{ rc = x(pExpr, pCtx); } return rc; } |
︙ | ︙ | |||
104 105 106 107 108 109 110 | p->nPhrase++; p->nToken += pExpr->pPhrase->nToken; if( pExpr->isLoaded==0 ){ rc = sqlite3Fts3ExprLoadDoclist(p->pTab, pExpr); pExpr->isLoaded = 1; if( rc==SQLITE_OK ){ | | | 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 | p->nPhrase++; p->nToken += pExpr->pPhrase->nToken; if( pExpr->isLoaded==0 ){ rc = sqlite3Fts3ExprLoadDoclist(p->pTab, pExpr); pExpr->isLoaded = 1; if( rc==SQLITE_OK ){ rc = fts3ExprNearTrim(pExpr); } } return rc; } static int fts3ExprLoadDoclistsCb2(Fts3Expr *pExpr, void *ctx){ |
︙ | ︙ | |||
455 456 457 458 459 460 461 | } pC->pTokenizer = pTab->pTokenizer; while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){ const char *ZDUMMY; int DUMMY1, DUMMY2, DUMMY3; rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent); } pMod->xClose(pC); | | < | | > > > < | | < < < | > | > | | < | < | > > | < | < | < | < < < < < < < < < < | > > > | | > > > > > > > | < | < | < < < < > > > > > > > > > > > | 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 | } pC->pTokenizer = pTab->pTokenizer; while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){ const char *ZDUMMY; int DUMMY1, DUMMY2, DUMMY3; rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent); } pMod->xClose(pC); if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; } nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet; assert( nShift<=nDesired ); if( nShift>0 ){ *piPos += nShift; *pHlmask = hlmask >> nShift; } } } return SQLITE_OK; } static int fts3SnippetText( Fts3Cursor *pCsr, /* FTS3 Cursor */ SnippetFragment *pFragment, /* Snippet to extract */ int iFragment, /* Fragment number */ int isLast, /* True for final fragment in snippet */ int nSnippet, /* Number of tokens in extracted snippet */ const char *zOpen, /* String inserted before highlighted term */ const char *zClose, /* String inserted after highlighted term */ const char *zEllipsis, StrBuffer *pOut ){ Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; int rc; /* Return code */ const char *zDoc; /* Document text to extract snippet from */ int nDoc; /* Size of zDoc in bytes */ int iCurrent = 0; /* Current token number of document */ int iEnd = 0; /* Byte offset of end of current token */ int isShiftDone = 0; int iPos = pFragment->iPos; u64 hlmask = pFragment->hlmask; sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */ sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */ const char *ZDUMMY; /* Dummy arguments used with tokenizer */ int DUMMY1; /* Dummy arguments used with tokenizer */ zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, pFragment->iCol+1); if( zDoc==0 ){ if( sqlite3_column_type(pCsr->pStmt, pFragment->iCol+1)!=SQLITE_NULL ){ return SQLITE_NOMEM; } return SQLITE_OK; } nDoc = sqlite3_column_bytes(pCsr->pStmt, pFragment->iCol+1); /* Open a token cursor on the document. */ pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC); if( rc!=SQLITE_OK ){ return rc; } pC->pTokenizer = pTab->pTokenizer; while( rc==SQLITE_OK ){ int iBegin; /* Offset in zDoc of start of token */ int iFin; /* Offset in zDoc of end of token */ int isHighlight; rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent); if( rc!=SQLITE_OK ){ if( rc==SQLITE_DONE ){ /* Special case - the last token of the snippet is also the last token ** of the column. Append any punctuation that occurred between the end ** of the previous token and the end of the document to the output. ** Then break out of the loop. */ rc = fts3StringAppend(pOut, &zDoc[iEnd], -1); } break; } if( iCurrent<iPos ){ continue; } if( !isShiftDone ){ int n = nDoc - iBegin; rc = fts3SnippetShift(pTab, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask); isShiftDone = 1; /* Now that the shift has been done, check if the initial "..." are ** required. They are required if (a) this is not the first fragment, ** or (b) this fragment does not begin at position 0 of its column. */ if( rc==SQLITE_OK && (iPos>0 || iFragment>0) ){ rc = fts3StringAppend(pOut, zEllipsis, -1); } if( rc!=SQLITE_OK || iCurrent<iPos ) continue; } if( iCurrent>=(iPos+nSnippet) ){ if( isLast ){ rc = fts3StringAppend(pOut, zEllipsis, -1); } break; } /* Set isHighlight to true if this term should be highlighted. */ isHighlight = (hlmask & ((u64)1 << (iCurrent-iPos)))!=0; if( iCurrent>iPos ) rc = fts3StringAppend(pOut, &zDoc[iEnd], iBegin-iEnd); if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zOpen, -1); if( rc==SQLITE_OK ) rc = fts3StringAppend(pOut, &zDoc[iBegin], iFin-iBegin); if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zClose, -1); iEnd = iFin; } pMod->xClose(pC); return rc; } |
︙ | ︙ | |||
799 800 801 802 803 804 805 | assert( (mCovered&mSeen)==mCovered ); if( mSeen==mCovered ) break; }while( nSnippet<SizeofArray(aSnippet) ); assert( nFToken>0 ); for(i=0; i<nSnippet && rc==SQLITE_OK; i++){ | | | | < < < < | 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 | assert( (mCovered&mSeen)==mCovered ); if( mSeen==mCovered ) break; }while( nSnippet<SizeofArray(aSnippet) ); assert( nFToken>0 ); for(i=0; i<nSnippet && rc==SQLITE_OK; i++){ rc = fts3SnippetText(pCsr, &aSnippet[i], i, (i==nSnippet-1), nFToken, zStart, zEnd, zEllipsis, &res ); } snippet_out: if( rc!=SQLITE_OK ){ sqlite3_result_error_code(pCtx, rc); sqlite3_free(res.z); }else{ |
︙ | ︙ | |||
947 948 949 950 951 952 953 | rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); } if( rc==SQLITE_OK ){ char aBuffer[64]; sqlite3_snprintf(sizeof(aBuffer), aBuffer, "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart ); | | | 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 | rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); } if( rc==SQLITE_OK ){ char aBuffer[64]; sqlite3_snprintf(sizeof(aBuffer), aBuffer, "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart ); rc = fts3StringAppend(&res, aBuffer, -1); } } } if( rc==SQLITE_DONE ){ rc = SQLITE_ERROR; } |
︙ | ︙ |
Changes to test/fts3snippet.test.
1 2 3 4 | set testdir [file dirname $argv0] source $testdir/tester.tcl | > > > > > > > > > > > | > > > > > > > > > < < < < < < | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 | # 2010 January 07 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # set testdir [file dirname $argv0] source $testdir/tester.tcl # If SQLITE_ENABLE_FTS3 is not defined, omit this file. ifcapable !fts3 { finish_test ; return } # Transform the list $L to its "normal" form. So that it can be compared to # another list with the same set of elements using [string compare]. # proc normalize {L} { set ret [list] foreach l $L {lappend ret $l} return $ret } do_test fts3snippet-1.1 { execsql { CREATE VIRTUAL TABLE ft USING fts3; INSERT INTO ft VALUES('xxx xxx xxx xxx'); } } {} do_test fts3snippet-1.2 { execsql { SELECT offsets(ft) FROM ft WHERE ft MATCH 'xxx' } } {{0 0 0 3 0 0 4 3 0 0 8 3 0 0 12 3}} do_test fts3snippet-1.3 { execsql { SELECT offsets(ft) FROM ft WHERE ft MATCH '"xxx xxx"' } } [list [normalize { |
︙ | ︙ | |||
59 60 61 62 63 64 65 66 67 68 | 0 2 4 3 0 0 8 3 0 1 8 3 0 2 8 3 0 0 12 3 0 2 12 3 }]] finish_test | > > > > > > > > > > > > > > > > > > > > > > > > > > | 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 | 0 2 4 3 0 0 8 3 0 1 8 3 0 2 8 3 0 0 12 3 0 2 12 3 }]] do_test fts3snippet-2.1 { execsql { DROP TABLE IF EXISTS ft; CREATE VIRTUAL TABLE ft USING fts3; INSERT INTO ft VALUES('one two three four five six seven eight nine ten'); } } {} foreach {tn expr res} { 1 one "[one] two three four five..." 2 two "one [two] three four five..." 3 three "one two [three] four five..." 4 four "...two three [four] five six..." 5 five "...three four [five] six seven..." 6 six "...four five [six] seven eight..." 7 seven "...five six [seven] eight nine..." 8 eight "...six seven [eight] nine ten" 9 nine "...six seven eight [nine] ten" 10 ten "...six seven eight nine [ten]" } { do_test fts3snippet-2.2.$tn { execsql { SELECT snippet(ft, '[', ']', '...', 0, 5) FROM ft WHERE ft MATCH $expr } } [list $res] } finish_test |