Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Change the fts5 tokenizer API to allow more than one token to occupy a single position within a document. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts5-incompatible |
Files: | files | file ages | folders |
SHA1: |
90b85b42f2b2dd3e939b129b7df2b822 |
User & Date: | dan 2015-08-28 19:56:47.300 |
Context
2015-08-29
| ||
15:44 | Another change to the fts5 tokenizer API. (check-in: fc71868496 user: dan tags: fts5-incompatible) | |
2015-08-28
| ||
19:56 | Change the fts5 tokenizer API to allow more than one token to occupy a single position within a document. (check-in: 90b85b42f2 user: dan tags: fts5-incompatible) | |
16:41 | Fix compiler warnings in rbu code. (check-in: 0fdc36fe35 user: dan tags: trunk) | |
Changes
Changes to ext/fts5/fts5.h.
︙ | ︙ | |||
213 214 215 216 217 218 219 | int (*xColumnCount)(Fts5Context*); int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow); int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken); int (*xTokenize)(Fts5Context*, const char *pText, int nText, /* Text to tokenize */ void *pCtx, /* Context passed to xToken() */ | | | 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 | int (*xColumnCount)(Fts5Context*); int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow); int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken); int (*xTokenize)(Fts5Context*, const char *pText, int nText, /* Text to tokenize */ void *pCtx, /* Context passed to xToken() */ int (*xToken)(void*, const char*, int, int, int, int) /* Callback */ ); int (*xPhraseCount)(Fts5Context*); int (*xPhraseSize)(Fts5Context*, int iPhrase); int (*xInstCount)(Fts5Context*, int *pnInst); int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff); |
︙ | ︙ | |||
305 306 307 308 309 310 311 312 313 314 315 316 317 | typedef struct Fts5Tokenizer Fts5Tokenizer; typedef struct fts5_tokenizer fts5_tokenizer; struct fts5_tokenizer { int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut); void (*xDelete)(Fts5Tokenizer*); int (*xTokenize)(Fts5Tokenizer*, void *pCtx, const char *pText, int nText, int (*xToken)( void *pCtx, /* Copy of 2nd argument to xTokenize() */ const char *pToken, /* Pointer to buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Byte offset of token within input text */ | > | > > > > > > | | 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 | typedef struct Fts5Tokenizer Fts5Tokenizer; typedef struct fts5_tokenizer fts5_tokenizer; struct fts5_tokenizer { int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut); void (*xDelete)(Fts5Tokenizer*); int (*xTokenize)(Fts5Tokenizer*, void *pCtx, int flags, const char *pText, int nText, int (*xToken)( void *pCtx, /* Copy of 2nd argument to xTokenize() */ const char *pToken, /* Pointer to buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Byte offset of token within input text */ int iEnd, /* Byte offset of end of token within input text */ int iPos /* Number of tokens before this one in input text */ ) ); }; #define FTS5_TOKENIZE_QUERY 0x0001 #define FTS5_TOKENIZE_PREFIX 0x0002 #define FTS5_TOKENIZE_DOCUMENT 0x0004 #define FTS5_TOKENIZE_AUX 0x0008 /* ** END OF CUSTOM TOKENIZERS *************************************************************************/ /************************************************************************* ** FTS5 EXTENSION REGISTRATION API */ typedef struct fts5_api fts5_api; struct fts5_api { int iVersion; /* Currently always set to 2 */ /* Create a new tokenizer */ int (*xCreateTokenizer)( fts5_api *pApi, const char *zName, void *pContext, fts5_tokenizer *pTokenizer, |
︙ | ︙ |
Changes to ext/fts5/fts5Int.h.
︙ | ︙ | |||
162 163 164 165 166 167 168 169 170 | ); void sqlite3Fts5ConfigFree(Fts5Config*); int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig); int sqlite3Fts5Tokenize( Fts5Config *pConfig, /* FTS5 Configuration object */ const char *pText, int nText, /* Text to tokenize */ void *pCtx, /* Context passed to xToken() */ | > | | 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 | ); void sqlite3Fts5ConfigFree(Fts5Config*); int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig); int sqlite3Fts5Tokenize( Fts5Config *pConfig, /* FTS5 Configuration object */ int flags, /* FTS5_TOKENIZE_* flags */ const char *pText, int nText, /* Text to tokenize */ void *pCtx, /* Context passed to xToken() */ int (*xToken)(void*, const char*, int, int, int, int) /* Callback */ ); void sqlite3Fts5Dequote(char *z); /* Load the contents of the %_config table */ int sqlite3Fts5ConfigLoad(Fts5Config*, int); |
︙ | ︙ |
Changes to ext/fts5/fts5_aux.c.
︙ | ︙ | |||
147 148 149 150 151 152 153 | ** Tokenizer callback used by implementation of highlight() function. */ static int fts5HighlightCb( void *pContext, /* Pointer to HighlightContext object */ const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStartOff, /* Start offset of token */ | | > > > | | 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 | ** Tokenizer callback used by implementation of highlight() function. */ static int fts5HighlightCb( void *pContext, /* Pointer to HighlightContext object */ const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStartOff, /* Start offset of token */ int iEndOff, /* End offset of token */ int iPos ){ HighlightContext *p = (HighlightContext*)pContext; int rc = SQLITE_OK; if( iPos<p->iPos ) return SQLITE_OK; p->iPos = iPos+1; if( p->iRangeEnd>0 ){ if( iPos<p->iRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK; if( p->iRangeStart && iPos==p->iRangeStart ) p->iOff = iStartOff; } if( iPos==p->iter.iStart ){ |
︙ | ︙ |
Changes to ext/fts5/fts5_config.c.
︙ | ︙ | |||
641 642 643 644 645 646 647 648 649 | ** the callback returned SQLITE_DONE, this is not an error and this function ** still returns SQLITE_OK. Or, if the tokenization was abandoned early ** because the callback returned another non-zero value, it is assumed ** to be an SQLite error code and returned to the caller. */ int sqlite3Fts5Tokenize( Fts5Config *pConfig, /* FTS5 Configuration object */ const char *pText, int nText, /* Text to tokenize */ void *pCtx, /* Context passed to xToken() */ | > | | > > | 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 | ** the callback returned SQLITE_DONE, this is not an error and this function ** still returns SQLITE_OK. Or, if the tokenization was abandoned early ** because the callback returned another non-zero value, it is assumed ** to be an SQLite error code and returned to the caller. */ int sqlite3Fts5Tokenize( Fts5Config *pConfig, /* FTS5 Configuration object */ int flags, /* FTS5_TOKENIZE_* flags */ const char *pText, int nText, /* Text to tokenize */ void *pCtx, /* Context passed to xToken() */ int (*xToken)(void*, const char*, int, int, int, int) /* Callback */ ){ if( pText==0 ) return SQLITE_OK; return pConfig->pTokApi->xTokenize( pConfig->pTok, pCtx, flags, pText, nText, xToken ); } /* ** Argument pIn points to the first character in what is expected to be ** a comma-separated list of SQL literals followed by a ')' character. ** If it actually is this, return a pointer to the ')'. Otherwise, return ** NULL to indicate a parse error. |
︙ | ︙ |
Changes to ext/fts5/fts5_expr.c.
︙ | ︙ | |||
1337 1338 1339 1340 1341 1342 1343 | ** Callback for tokenizing terms used by ParseTerm(). */ static int fts5ParseTokenize( void *pContext, /* Pointer to Fts5InsertCtx object */ const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ | | > | 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 | ** Callback for tokenizing terms used by ParseTerm(). */ static int fts5ParseTokenize( void *pContext, /* Pointer to Fts5InsertCtx object */ const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ int iEnd, /* End offset of token */ int iPos ){ int rc = SQLITE_OK; const int SZALLOC = 8; TokenCtx *pCtx = (TokenCtx*)pContext; Fts5ExprPhrase *pPhrase = pCtx->pPhrase; Fts5ExprTerm *pTerm; |
︙ | ︙ | |||
1413 1414 1415 1416 1417 1418 1419 1420 | char *z = 0; memset(&sCtx, 0, sizeof(TokenCtx)); sCtx.pPhrase = pAppend; rc = fts5ParseStringFromToken(pToken, &z); if( rc==SQLITE_OK ){ sqlite3Fts5Dequote(z); | > > > | | 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 | char *z = 0; memset(&sCtx, 0, sizeof(TokenCtx)); sCtx.pPhrase = pAppend; rc = fts5ParseStringFromToken(pToken, &z); if( rc==SQLITE_OK ){ int flags = FTS5_TOKENIZE_QUERY | (bPrefix ? FTS5_TOKENIZE_QUERY : 0); int n; sqlite3Fts5Dequote(z); n = strlen(z); rc = sqlite3Fts5Tokenize(pConfig, flags, z, n, &sCtx, fts5ParseTokenize); } sqlite3_free(z); if( rc ){ pParse->rc = rc; fts5ExprPhraseFree(sCtx.pPhrase); sCtx.pPhrase = 0; }else if( sCtx.pPhrase ){ |
︙ | ︙ |
Changes to ext/fts5/fts5_main.c.
︙ | ︙ | |||
1494 1495 1496 1497 1498 1499 1500 | return sqlite3Fts5StorageRowCount(pTab->pStorage, pnRow); } static int fts5ApiTokenize( Fts5Context *pCtx, const char *pText, int nText, void *pUserData, | | | > > | 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 | return sqlite3Fts5StorageRowCount(pTab->pStorage, pnRow); } static int fts5ApiTokenize( Fts5Context *pCtx, const char *pText, int nText, void *pUserData, int (*xToken)(void*, const char*, int, int, int, int) ){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); return sqlite3Fts5Tokenize( pTab->pConfig, FTS5_TOKENIZE_AUX, pText, nText, pUserData, xToken ); } static int fts5ApiPhraseCount(Fts5Context *pCtx){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; return sqlite3Fts5ExprPhraseCount(pCsr->pExpr); } |
︙ | ︙ | |||
1654 1655 1656 1657 1658 1659 1660 | } static int fts5ColumnSizeCb( void *pContext, /* Pointer to int */ const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ | | > | | 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 | } static int fts5ColumnSizeCb( void *pContext, /* Pointer to int */ const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ int iEnd, /* End offset of token */ int iPos ){ int *pCnt = (int*)pContext; *pCnt = iPos+1; return SQLITE_OK; } static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); Fts5Config *pConfig = pTab->pConfig; |
︙ | ︙ | |||
1687 1688 1689 1690 1691 1692 1693 | for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){ if( pConfig->abUnindexed[i]==0 ){ const char *z; int n; void *p = (void*)(&pCsr->aColumnSize[i]); pCsr->aColumnSize[i] = 0; rc = fts5ApiColumnText(pCtx, i, &z, &n); if( rc==SQLITE_OK ){ | | > > | 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 | for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){ if( pConfig->abUnindexed[i]==0 ){ const char *z; int n; void *p = (void*)(&pCsr->aColumnSize[i]); pCsr->aColumnSize[i] = 0; rc = fts5ApiColumnText(pCtx, i, &z, &n); if( rc==SQLITE_OK ){ rc = sqlite3Fts5Tokenize( pConfig, FTS5_TOKENIZE_AUX, z, n, p, fts5ColumnSizeCb ); } } } } CsrFlagClear(pCsr, FTS5CSR_REQUIRE_DOCSIZE); } if( iCol<0 ){ |
︙ | ︙ | |||
2340 2341 2342 2343 2344 2345 2346 | pGlobal = (Fts5Global*)sqlite3_malloc(sizeof(Fts5Global)); if( pGlobal==0 ){ rc = SQLITE_NOMEM; }else{ void *p = (void*)pGlobal; memset(pGlobal, 0, sizeof(Fts5Global)); pGlobal->db = db; | | | 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 | pGlobal = (Fts5Global*)sqlite3_malloc(sizeof(Fts5Global)); if( pGlobal==0 ){ rc = SQLITE_NOMEM; }else{ void *p = (void*)pGlobal; memset(pGlobal, 0, sizeof(Fts5Global)); pGlobal->db = db; pGlobal->api.iVersion = 2; pGlobal->api.xCreateFunction = fts5CreateAux; pGlobal->api.xCreateTokenizer = fts5CreateTokenizer; pGlobal->api.xFindTokenizer = fts5FindTokenizer; rc = sqlite3_create_module_v2(db, "fts5", &fts5Mod, p, fts5ModuleDestroy); if( rc==SQLITE_OK ) rc = sqlite3Fts5IndexInit(db); if( rc==SQLITE_OK ) rc = sqlite3Fts5ExprInit(pGlobal, db); if( rc==SQLITE_OK ) rc = sqlite3Fts5AuxInit(&pGlobal->api); |
︙ | ︙ |
Changes to ext/fts5/fts5_storage.c.
︙ | ︙ | |||
358 359 360 361 362 363 364 | ** Tokenization callback used when inserting tokens into the FTS index. */ static int fts5StorageInsertCallback( void *pContext, /* Pointer to Fts5InsertCtx object */ const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ | | > > | | 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 | ** Tokenization callback used when inserting tokens into the FTS index. */ static int fts5StorageInsertCallback( void *pContext, /* Pointer to Fts5InsertCtx object */ const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ int iEnd, /* End offset of token */ int iPos ){ Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext; Fts5Index *pIdx = pCtx->pStorage->pIndex; assert( iPos+1>=pCtx->szCol ); pCtx->szCol = iPos+1; return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, iPos, pToken, nToken); } /* ** If a row with rowid iDel is present in the %_content table, add the ** delete-markers to the FTS index necessary to delete it. Do not actually ** remove the %_content row at this time though. |
︙ | ︙ | |||
390 391 392 393 394 395 396 397 398 399 400 401 402 403 | ctx.pStorage = p; ctx.iCol = -1; rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel); for(iCol=1; rc==SQLITE_OK && iCol<=pConfig->nCol; iCol++){ if( pConfig->abUnindexed[iCol-1] ) continue; ctx.szCol = 0; rc = sqlite3Fts5Tokenize(pConfig, (const char*)sqlite3_column_text(pSeek, iCol), sqlite3_column_bytes(pSeek, iCol), (void*)&ctx, fts5StorageInsertCallback ); p->aTotalSize[iCol-1] -= (i64)ctx.szCol; } | > | 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 | ctx.pStorage = p; ctx.iCol = -1; rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel); for(iCol=1; rc==SQLITE_OK && iCol<=pConfig->nCol; iCol++){ if( pConfig->abUnindexed[iCol-1] ) continue; ctx.szCol = 0; rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT, (const char*)sqlite3_column_text(pSeek, iCol), sqlite3_column_bytes(pSeek, iCol), (void*)&ctx, fts5StorageInsertCallback ); p->aTotalSize[iCol-1] -= (i64)ctx.szCol; } |
︙ | ︙ | |||
561 562 563 564 565 566 567 568 569 570 571 572 573 574 | ctx.iCol = -1; rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel); for(iCol=0; rc==SQLITE_OK && iCol<pConfig->nCol; iCol++){ if( pConfig->abUnindexed[iCol] ) continue; ctx.szCol = 0; rc = sqlite3Fts5Tokenize(pConfig, (const char*)sqlite3_value_text(apVal[iCol]), sqlite3_value_bytes(apVal[iCol]), (void*)&ctx, fts5StorageInsertCallback ); p->aTotalSize[iCol] -= (i64)ctx.szCol; } | > | 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 | ctx.iCol = -1; rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel); for(iCol=0; rc==SQLITE_OK && iCol<pConfig->nCol; iCol++){ if( pConfig->abUnindexed[iCol] ) continue; ctx.szCol = 0; rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT, (const char*)sqlite3_value_text(apVal[iCol]), sqlite3_value_bytes(apVal[iCol]), (void*)&ctx, fts5StorageInsertCallback ); p->aTotalSize[iCol] -= (i64)ctx.szCol; } |
︙ | ︙ | |||
650 651 652 653 654 655 656 657 658 659 660 661 662 663 | sqlite3Fts5BufferZero(&buf); rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iRowid); for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){ ctx.szCol = 0; if( pConfig->abUnindexed[ctx.iCol]==0 ){ rc = sqlite3Fts5Tokenize(pConfig, (const char*)sqlite3_column_text(pScan, ctx.iCol+1), sqlite3_column_bytes(pScan, ctx.iCol+1), (void*)&ctx, fts5StorageInsertCallback ); } sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol); | > | 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 | sqlite3Fts5BufferZero(&buf); rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iRowid); for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){ ctx.szCol = 0; if( pConfig->abUnindexed[ctx.iCol]==0 ){ rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT, (const char*)sqlite3_column_text(pScan, ctx.iCol+1), sqlite3_column_bytes(pScan, ctx.iCol+1), (void*)&ctx, fts5StorageInsertCallback ); } sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol); |
︙ | ︙ | |||
767 768 769 770 771 772 773 774 775 776 777 778 779 780 | rc = sqlite3Fts5IndexBeginWrite(p->pIndex, *piRowid); ctx.pStorage = p; } for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){ ctx.szCol = 0; if( pConfig->abUnindexed[ctx.iCol]==0 ){ rc = sqlite3Fts5Tokenize(pConfig, (const char*)sqlite3_value_text(apVal[ctx.iCol+2]), sqlite3_value_bytes(apVal[ctx.iCol+2]), (void*)&ctx, fts5StorageInsertCallback ); } sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol); | > | 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 | rc = sqlite3Fts5IndexBeginWrite(p->pIndex, *piRowid); ctx.pStorage = p; } for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){ ctx.szCol = 0; if( pConfig->abUnindexed[ctx.iCol]==0 ){ rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT, (const char*)sqlite3_value_text(apVal[ctx.iCol+2]), sqlite3_value_bytes(apVal[ctx.iCol+2]), (void*)&ctx, fts5StorageInsertCallback ); } sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol); |
︙ | ︙ | |||
837 838 839 840 841 842 843 | ** Tokenization callback used by integrity check. */ static int fts5StorageIntegrityCallback( void *pContext, /* Pointer to Fts5InsertCtx object */ const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ | | > > | | 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 | ** Tokenization callback used by integrity check. */ static int fts5StorageIntegrityCallback( void *pContext, /* Pointer to Fts5InsertCtx object */ const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ int iEnd, /* End offset of token */ int iPos ){ Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext; assert( iPos+1>=pCtx->szCol ); pCtx->szCol = iPos+1; pCtx->cksum ^= sqlite3Fts5IndexCksum( pCtx->pConfig, pCtx->iRowid, pCtx->iCol, iPos, pToken, nToken ); return SQLITE_OK; } /* |
︙ | ︙ | |||
882 883 884 885 886 887 888 | ctx.iRowid = sqlite3_column_int64(pScan, 0); ctx.szCol = 0; rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize); for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){ if( pConfig->abUnindexed[i] ) continue; ctx.iCol = i; ctx.szCol = 0; | | | | 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 | ctx.iRowid = sqlite3_column_int64(pScan, 0); ctx.szCol = 0; rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize); for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){ if( pConfig->abUnindexed[i] ) continue; ctx.iCol = i; ctx.szCol = 0; rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT, (const char*)sqlite3_column_text(pScan, i+1), sqlite3_column_bytes(pScan, i+1), (void*)&ctx, fts5StorageIntegrityCallback ); if( ctx.szCol!=aColSize[i] ) rc = FTS5_CORRUPT; aTotalSize[i] += ctx.szCol; |
︙ | ︙ |
Changes to ext/fts5/fts5_tcl.c.
︙ | ︙ | |||
138 139 140 141 142 143 144 | struct F5tAuxData { Tcl_Obj *pObj; }; static int xTokenizeCb( void *pCtx, const char *zToken, int nToken, | | | 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 | struct F5tAuxData { Tcl_Obj *pObj; }; static int xTokenizeCb( void *pCtx, const char *zToken, int nToken, int iStart, int iEnd, int iPos ){ F5tFunction *p = (F5tFunction*)pCtx; Tcl_Obj *pEval = Tcl_DuplicateObj(p->pScript); int rc; Tcl_IncrRefCount(pEval); Tcl_ListObjAppendElement(p->interp, pEval, Tcl_NewStringObj(zToken, nToken)); |
︙ | ︙ | |||
581 582 583 584 585 586 587 | int bSubst; const char *zInput; }; static int xTokenizeCb2( void *pCtx, const char *zToken, int nToken, | | | 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 | int bSubst; const char *zInput; }; static int xTokenizeCb2( void *pCtx, const char *zToken, int nToken, int iStart, int iEnd, int iPos ){ F5tTokenizeCtx *p = (F5tTokenizeCtx*)pCtx; if( p->bSubst ){ Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewStringObj(zToken, nToken)); Tcl_ListObjAppendElement( 0, p->pRet, Tcl_NewStringObj(&p->zInput[iStart], iEnd-iStart) ); |
︙ | ︙ | |||
662 663 664 665 666 667 668 | } pRet = Tcl_NewObj(); Tcl_IncrRefCount(pRet); ctx.bSubst = (objc==5); ctx.pRet = pRet; ctx.zInput = zText; | | > > | 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 | } pRet = Tcl_NewObj(); Tcl_IncrRefCount(pRet); ctx.bSubst = (objc==5); ctx.pRet = pRet; ctx.zInput = zText; rc = tokenizer.xTokenize( pTok, (void*)&ctx, FTS5_TOKENIZE_DOCUMENT, zText, nText, xTokenizeCb2 ); tokenizer.xDelete(pTok); if( rc!=SQLITE_OK ){ Tcl_AppendResult(interp, "error in tokenizer.xTokenize()", 0); Tcl_DecrRefCount(pRet); return TCL_ERROR; } |
︙ | ︙ | |||
744 745 746 747 748 749 750 751 | Tcl_DecrRefCount(pInst->pScript); ckfree((char *)pInst); } static int f5tTokenizerTokenize( Fts5Tokenizer *p, void *pCtx, const char *pText, int nText, | > | | 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 | Tcl_DecrRefCount(pInst->pScript); ckfree((char *)pInst); } static int f5tTokenizerTokenize( Fts5Tokenizer *p, void *pCtx, int flags, const char *pText, int nText, int (*xToken)(void*, const char*, int, int, int, int) ){ F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p; void *pOldCtx; int (*xOldToken)(void*, const char*, int, int, int); Tcl_Obj *pEval; int rc; |
︙ | ︙ |
Changes to ext/fts5/fts5_tokenize.c.
︙ | ︙ | |||
112 113 114 115 116 117 118 119 | /* ** Tokenize some text using the ascii tokenizer. */ static int fts5AsciiTokenize( Fts5Tokenizer *pTokenizer, void *pCtx, const char *pText, int nText, | > | > | 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 | /* ** Tokenize some text using the ascii tokenizer. */ static int fts5AsciiTokenize( Fts5Tokenizer *pTokenizer, void *pCtx, int flags, const char *pText, int nText, int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd, int iPos) ){ AsciiTokenizer *p = (AsciiTokenizer*)pTokenizer; int rc = SQLITE_OK; int ie; int is = 0; int iPos = 0; char aFold[64]; int nFold = sizeof(aFold); char *pFold = aFold; unsigned char *a = p->aTokenChar; while( is<nText && rc==SQLITE_OK ){ |
︙ | ︙ | |||
154 155 156 157 158 159 160 | break; } nFold = nByte*2; } asciiFold(pFold, &pText[is], nByte); /* Invoke the token callback */ | | | 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 | break; } nFold = nByte*2; } asciiFold(pFold, &pText[is], nByte); /* Invoke the token callback */ rc = xToken(pCtx, pFold, nByte, is, ie, iPos++); is = ie+1; } if( pFold!=aFold ) sqlite3_free(pFold); if( rc==SQLITE_DONE ) rc = SQLITE_OK; return rc; } |
︙ | ︙ | |||
381 382 383 384 385 386 387 388 | assert( (sqlite3Fts5UnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 ); return sqlite3Fts5UnicodeIsalnum(iCode) ^ fts5UnicodeIsException(p, iCode); } static int fts5UnicodeTokenize( Fts5Tokenizer *pTokenizer, void *pCtx, const char *pText, int nText, | > | > | 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 | assert( (sqlite3Fts5UnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 ); return sqlite3Fts5UnicodeIsalnum(iCode) ^ fts5UnicodeIsException(p, iCode); } static int fts5UnicodeTokenize( Fts5Tokenizer *pTokenizer, void *pCtx, int flags, const char *pText, int nText, int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd, int iPos) ){ Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTokenizer; int rc = SQLITE_OK; unsigned char *a = p->aTokenChar; int iPos = 0; unsigned char *zTerm = (unsigned char*)&pText[nText]; unsigned char *zCsr = (unsigned char *)pText; /* Output buffer */ char *aFold = p->aFold; int nFold = p->nFold; |
︙ | ︙ | |||
471 472 473 474 475 476 477 | } zCsr++; } ie = zCsr - (unsigned char*)pText; } /* Invoke the token callback */ | | | 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 | } zCsr++; } ie = zCsr - (unsigned char*)pText; } /* Invoke the token callback */ rc = xToken(pCtx, aFold, zOut-aFold, is, ie, iPos++); } tokenize_done: if( rc==SQLITE_DONE ) rc = SQLITE_OK; return rc; } |
︙ | ︙ | |||
549 550 551 552 553 554 555 | *ppOut = (Fts5Tokenizer*)pRet; return rc; } typedef struct PorterContext PorterContext; struct PorterContext { void *pCtx; | | | 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 | *ppOut = (Fts5Tokenizer*)pRet; return rc; } typedef struct PorterContext PorterContext; struct PorterContext { void *pCtx; int (*xToken)(void*, const char*, int, int, int, int); char *aBuf; }; typedef struct PorterRule PorterRule; struct PorterRule { const char *zSuffix; int nSuffix; |
︙ | ︙ | |||
1117 1118 1119 1120 1121 1122 1123 | } static int fts5PorterCb( void *pCtx, const char *pToken, int nToken, int iStart, | | > | 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 | } static int fts5PorterCb( void *pCtx, const char *pToken, int nToken, int iStart, int iEnd, int iPos ){ PorterContext *p = (PorterContext*)pCtx; char *aBuf; int nBuf; if( nToken>FTS5_PORTER_MAX_TOKEN || nToken<3 ) goto pass_through; |
︙ | ︙ | |||
1171 1172 1173 1174 1175 1176 1177 | /* Step 5b. */ if( nBuf>1 && aBuf[nBuf-1]=='l' && aBuf[nBuf-2]=='l' && fts5Porter_MGt1(aBuf, nBuf-1) ){ nBuf--; } | | | > | | | 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 | /* Step 5b. */ if( nBuf>1 && aBuf[nBuf-1]=='l' && aBuf[nBuf-2]=='l' && fts5Porter_MGt1(aBuf, nBuf-1) ){ nBuf--; } return p->xToken(p->pCtx, aBuf, nBuf, iStart, iEnd, iPos); pass_through: return p->xToken(p->pCtx, pToken, nToken, iStart, iEnd, iPos); } /* ** Tokenize using the porter tokenizer. */ static int fts5PorterTokenize( Fts5Tokenizer *pTokenizer, void *pCtx, int flags, const char *pText, int nText, int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd, int iPos) ){ PorterTokenizer *p = (PorterTokenizer*)pTokenizer; PorterContext sCtx; sCtx.xToken = xToken; sCtx.pCtx = pCtx; sCtx.aBuf = p->aBuf; return p->tokenizer.xTokenize( p->pTokenizer, (void*)&sCtx, flags, pText, nText, fts5PorterCb ); } /* ** Register all built-in tokenizers with FTS5. */ int sqlite3Fts5TokenizerInit(fts5_api *pApi){ |
︙ | ︙ |
Changes to ext/fts5/test/fts5matchinfo.test.
︙ | ︙ | |||
351 352 353 354 355 356 357 | GROUP BY t10.rowid ORDER BY 1; } {1 1 one 2 2 two 3 3 three} #--------------------------------------------------------------------------- # Test the 'y' matchinfo flag # | < > | | 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 | GROUP BY t10.rowid ORDER BY 1; } {1 1 one 2 2 two 3 3 three} #--------------------------------------------------------------------------- # Test the 'y' matchinfo flag # reset_db sqlite3_fts5_register_matchinfo db do_execsql_test 11.0 { CREATE VIRTUAL TABLE tt USING fts5(x, y); INSERT INTO tt VALUES('c d a c d d', 'e a g b d a'); -- 1 INSERT INTO tt VALUES('c c g a e b', 'c g d g e c'); -- 2 INSERT INTO tt VALUES('b e f d e g', 'b a c b c g'); -- 3 INSERT INTO tt VALUES('a c f f g d', 'd b f d e g'); -- 4 INSERT INTO tt VALUES('g a c f c f', 'd g g b c c'); -- 5 INSERT INTO tt VALUES('g a c e b b', 'd b f b g g'); -- 6 INSERT INTO tt VALUES('f d a a f c', 'e e a d c f'); -- 7 |
︙ | ︙ | |||
428 429 430 431 432 433 434 | SELECT rowid, mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH $expr } $r2 do_execsql_test 11.1.$tn.2 { SELECT rowid, mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH $expr } $r2 } | < < > | < | 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 | SELECT rowid, mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH $expr } $r2 do_execsql_test 11.1.$tn.2 { SELECT rowid, mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH $expr } $r2 } #--------------------------------------------------------------------------- # Test the 'b' matchinfo flag # reset_db sqlite3_fts5_register_matchinfo db db func mit mit do_test 12.0 { set cols [list] for {set i 0} {$i < 50} {incr i} { lappend cols "c$i" } execsql "CREATE VIRTUAL TABLE tt USING fts5([join $cols ,])" } {} do_execsql_test 12.1 { INSERT INTO tt (rowid, c4, c45) VALUES(1, 'abc', 'abc'); SELECT mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH 'abc'; } [list [list [expr 1<<4] [expr 1<<(45-32)]]] finish_test |