Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Remove the iPos parameter from the tokenizer callback. Fix the "tokenchars" and "separators" options on the simple tokenizer. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts5 |
Files: | files | file ages | folders |
SHA1: |
65f0262fb82dbfd9f80233ac7c3108e2 |
User & Date: | dan 2015-01-06 19:08:26.571 |
Context
2015-01-07
| ||
17:11 | Add the 'rebuild' and 'delete-all' commands. (check-in: 0cb2fed525 user: dan tags: fts5) | |
2015-01-06
| ||
19:08 | Remove the iPos parameter from the tokenizer callback. Fix the "tokenchars" and "separators" options on the simple tokenizer. (check-in: 65f0262fb8 user: dan tags: fts5) | |
14:38 | Further fixes and test cases related to external content tables. (check-in: ce6a899baf user: dan tags: fts5) | |
Changes
Changes to ext/fts5/fts5.c.
︙ | ︙ | |||
1126 1127 1128 1129 1130 1131 1132 | }else{ i64 iDel = sqlite3_value_int64(apVal[0]); /* Rowid to delete */ rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel); } }else if( nArg>1 ){ sqlite3_value *pCmd = apVal[2 + pConfig->nCol]; if( SQLITE_NULL!=sqlite3_value_type(pCmd) ){ | | | 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 | }else{ i64 iDel = sqlite3_value_int64(apVal[0]); /* Rowid to delete */ rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel); } }else if( nArg>1 ){ sqlite3_value *pCmd = apVal[2 + pConfig->nCol]; if( SQLITE_NULL!=sqlite3_value_type(pCmd) ){ const char *z = (const char*)sqlite3_value_text(pCmd); if( pConfig->eContent!=FTS5_CONTENT_NORMAL && 0==sqlite3_stricmp("delete", z) ){ return fts5SpecialDelete(pTab, apVal, pRowid); }else{ return fts5SpecialInsert(pTab, pCmd, apVal[2 + pConfig->nCol + 1]); } |
︙ | ︙ | |||
1216 1217 1218 1219 1220 1221 1222 | return sqlite3Fts5StorageRowCount(pTab->pStorage, pnRow); } static int fts5ApiTokenize( Fts5Context *pCtx, const char *pText, int nText, void *pUserData, | | | 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 | return sqlite3Fts5StorageRowCount(pTab->pStorage, pnRow); } static int fts5ApiTokenize( Fts5Context *pCtx, const char *pText, int nText, void *pUserData, int (*xToken)(void*, const char*, int, int, int) ){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); return sqlite3Fts5Tokenize(pTab->pConfig, pText, nText, pUserData, xToken); } static int fts5ApiPhraseCount(Fts5Context *pCtx){ |
︙ | ︙ |
Changes to ext/fts5/fts5.h.
︙ | ︙ | |||
191 192 193 194 195 196 197 | int (*xColumnCount)(Fts5Context*); int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow); int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken); int (*xTokenize)(Fts5Context*, const char *pText, int nText, /* Text to tokenize */ void *pCtx, /* Context passed to xToken() */ | | | 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 | int (*xColumnCount)(Fts5Context*); int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow); int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken); int (*xTokenize)(Fts5Context*, const char *pText, int nText, /* Text to tokenize */ void *pCtx, /* Context passed to xToken() */ int (*xToken)(void*, const char*, int, int, int) /* Callback */ ); int (*xPhraseCount)(Fts5Context*); int (*xPhraseSize)(Fts5Context*, int iPhrase); int (*xInstCount)(Fts5Context*, int *pnInst); int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff); |
︙ | ︙ | |||
287 288 289 290 291 292 293 | void *pCtx, const char *pText, int nText, int (*xToken)( void *pCtx, /* Copy of 2nd argument to xTokenize() */ const char *pToken, /* Pointer to buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Byte offset of token within input text */ | | < | 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 | void *pCtx, const char *pText, int nText, int (*xToken)( void *pCtx, /* Copy of 2nd argument to xTokenize() */ const char *pToken, /* Pointer to buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Byte offset of token within input text */ int iEnd /* Byte offset of end of token within input text */ ) ); }; /* ** END OF CUSTOM TOKENIZERS *************************************************************************/ |
︙ | ︙ |
Changes to ext/fts5/fts5Int.h.
︙ | ︙ | |||
103 104 105 106 107 108 109 | int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig); int sqlite3Fts5Tokenize( Fts5Config *pConfig, /* FTS5 Configuration object */ const char *pText, int nText, /* Text to tokenize */ void *pCtx, /* Context passed to xToken() */ | | | 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 | int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig); int sqlite3Fts5Tokenize( Fts5Config *pConfig, /* FTS5 Configuration object */ const char *pText, int nText, /* Text to tokenize */ void *pCtx, /* Context passed to xToken() */ int (*xToken)(void*, const char*, int, int, int) /* Callback */ ); void sqlite3Fts5Dequote(char *z); /* Load the contents of the %_config table */ int sqlite3Fts5ConfigLoad(Fts5Config*, int); |
︙ | ︙ |
Changes to ext/fts5/fts5_aux.c.
︙ | ︙ | |||
42 43 44 45 46 47 48 | int nInst; /* Total number of phrase instances */ /* Output variables */ int iStart; /* First token in coalesced phrase instance */ int iEnd; /* Last token in coalesced phrase instance */ }; | < < < < < < < | 42 43 44 45 46 47 48 49 50 51 52 53 54 55 | int nInst; /* Total number of phrase instances */ /* Output variables */ int iStart; /* First token in coalesced phrase instance */ int iEnd; /* Last token in coalesced phrase instance */ }; /* ** Advance the iterator to the next coalesced phrase instance. Return ** an SQLite error code if an error occurs, or SQLITE_OK otherwise. */ static int fts5CInstIterNext(CInstIter *pIter){ int rc = SQLITE_OK; pIter->iStart = -1; |
︙ | ︙ | |||
113 114 115 116 117 118 119 120 121 122 123 124 125 126 | /************************************************************************* ** Start of highlight() implementation. */ typedef struct HighlightContext HighlightContext; struct HighlightContext { CInstIter iter; /* Coalesced Instance Iterator */ int iRangeStart; /* First token to include */ int iRangeEnd; /* If non-zero, last token to include */ const char *zOpen; /* Opening highlight */ const char *zClose; /* Closing highlight */ const char *zIn; /* Input text */ int nIn; /* Size of input text in bytes */ int iOff; /* Current offset within zIn[] */ | > | 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 | /************************************************************************* ** Start of highlight() implementation. */ typedef struct HighlightContext HighlightContext; struct HighlightContext { CInstIter iter; /* Coalesced Instance Iterator */ int iPos; /* Current token offset in zIn[] */ int iRangeStart; /* First token to include */ int iRangeEnd; /* If non-zero, last token to include */ const char *zOpen; /* Opening highlight */ const char *zClose; /* Closing highlight */ const char *zIn; /* Input text */ int nIn; /* Size of input text in bytes */ int iOff; /* Current offset within zIn[] */ |
︙ | ︙ | |||
152 153 154 155 156 157 158 | ** Tokenizer callback used by implementation of highlight() function. */ static int fts5HighlightCb( void *pContext, /* Pointer to HighlightContext object */ const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStartOff, /* Start offset of token */ | | < > | 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 | ** Tokenizer callback used by implementation of highlight() function. */ static int fts5HighlightCb( void *pContext, /* Pointer to HighlightContext object */ const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStartOff, /* Start offset of token */ int iEndOff /* End offset of token */ ){ HighlightContext *p = (HighlightContext*)pContext; int rc = SQLITE_OK; int iPos = p->iPos++; if( p->iRangeEnd>0 ){ if( iPos<p->iRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK; if( p->iRangeStart && iPos==p->iRangeStart ) p->iOff = iStartOff; } if( iPos==p->iter.iStart ){ |
︙ | ︙ |
Changes to ext/fts5/fts5_buffer.c.
︙ | ︙ | |||
54 55 56 57 58 59 60 | } int sqlite3Fts5Get32(const u8 *aBuf){ return (aBuf[0] << 24) + (aBuf[1] << 16) + (aBuf[2] << 8) + aBuf[3]; } void sqlite3Fts5BufferAppend32(int *pRc, Fts5Buffer *pBuf, int iVal){ | < | 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | } int sqlite3Fts5Get32(const u8 *aBuf){ return (aBuf[0] << 24) + (aBuf[1] << 16) + (aBuf[2] << 8) + aBuf[3]; } void sqlite3Fts5BufferAppend32(int *pRc, Fts5Buffer *pBuf, int iVal){ if( sqlite3Fts5BufferGrow(pRc, pBuf, 4) ) return; sqlite3Fts5Put32(&pBuf->p[pBuf->n], iVal); pBuf->n += 4; } /* ** Append buffer nData/pData to buffer pBuf. If an OOM error occurs, set |
︙ | ︙ |
Changes to ext/fts5/fts5_config.c.
︙ | ︙ | |||
360 361 362 363 364 365 366 | rc = SQLITE_ERROR; }else{ pConfig->zContentRowid = fts5EscapeName(&rc, zArg); } return rc; } | | | 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 | rc = SQLITE_ERROR; }else{ pConfig->zContentRowid = fts5EscapeName(&rc, zArg); } return rc; } *pzErr = sqlite3_mprintf("unrecognized option: \"%.*s\"", nCmd, zCmd); return SQLITE_ERROR; } /* ** Allocate an instance of the default tokenizer ("simple") at ** Fts5Config.pTokenizer. Return SQLITE_OK if successful, or an SQLite error ** code if an error occurs. |
︙ | ︙ | |||
584 585 586 587 588 589 590 | ** because the callback returned another non-zero value, it is assumed ** to be an SQLite error code and returned to the caller. */ int sqlite3Fts5Tokenize( Fts5Config *pConfig, /* FTS5 Configuration object */ const char *pText, int nText, /* Text to tokenize */ void *pCtx, /* Context passed to xToken() */ | | | 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 | ** because the callback returned another non-zero value, it is assumed ** to be an SQLite error code and returned to the caller. */ int sqlite3Fts5Tokenize( Fts5Config *pConfig, /* FTS5 Configuration object */ const char *pText, int nText, /* Text to tokenize */ void *pCtx, /* Context passed to xToken() */ int (*xToken)(void*, const char*, int, int, int) /* Callback */ ){ return pConfig->pTokApi->xTokenize(pConfig->pTok, pCtx, pText, nText, xToken); } /* ** Argument pIn points to the first character in what is expected to be ** a comma-separated list of SQL literals followed by a ')' character. |
︙ | ︙ |
Changes to ext/fts5/fts5_expr.c.
︙ | ︙ | |||
453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 | memset(p, 0, sizeof(Fts5LookaheadReader)); p->a = a; p->n = n; fts5LookaheadReaderNext(p); return fts5LookaheadReaderNext(p); } static int fts5LookaheadReaderEof(Fts5LookaheadReader *p){ return (p->iPos==FTS5_LOOKAHEAD_EOF); } typedef struct Fts5NearTrimmer Fts5NearTrimmer; struct Fts5NearTrimmer { Fts5LookaheadReader reader; /* Input iterator */ Fts5PoslistWriter writer; /* Writer context */ Fts5Buffer *pOut; /* Output poslist */ }; | > > | 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 | memset(p, 0, sizeof(Fts5LookaheadReader)); p->a = a; p->n = n; fts5LookaheadReaderNext(p); return fts5LookaheadReaderNext(p); } #if 0 static int fts5LookaheadReaderEof(Fts5LookaheadReader *p){ return (p->iPos==FTS5_LOOKAHEAD_EOF); } #endif typedef struct Fts5NearTrimmer Fts5NearTrimmer; struct Fts5NearTrimmer { Fts5LookaheadReader reader; /* Input iterator */ Fts5PoslistWriter writer; /* Writer context */ Fts5Buffer *pOut; /* Output poslist */ }; |
︙ | ︙ | |||
1137 1138 1139 1140 1141 1142 1143 | ** Callback for tokenizing terms used by ParseTerm(). */ static int fts5ParseTokenize( void *pContext, /* Pointer to Fts5InsertCtx object */ const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ | | < | 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 | ** Callback for tokenizing terms used by ParseTerm(). */ static int fts5ParseTokenize( void *pContext, /* Pointer to Fts5InsertCtx object */ const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ int iEnd /* End offset of token */ ){ int rc = SQLITE_OK; const int SZALLOC = 8; TokenCtx *pCtx = (TokenCtx*)pContext; Fts5ExprPhrase *pPhrase = pCtx->pPhrase; Fts5ExprTerm *pTerm; |
︙ | ︙ |
Changes to ext/fts5/fts5_index.c.
︙ | ︙ | |||
838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 | } /* ** Close the sqlite3_blob handle used to read records from the %_data table. ** And discard any cached reads. This function is called at the end of ** a read transaction or when any sub-transaction is rolled back. */ static void fts5DataReset(Fts5Index *p){ if( p->pReader ){ sqlite3_blob_close(p->pReader); p->pReader = 0; } } /* ** Remove all records associated with segment iSegid in index iIdx. */ static void fts5DataRemoveSegment(Fts5Index *p, int iIdx, int iSegid){ i64 iFirst = FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, 0); i64 iLast = FTS5_SEGMENT_ROWID(iIdx, iSegid+1, 0, 0)-1; | > > | 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 | } /* ** Close the sqlite3_blob handle used to read records from the %_data table. ** And discard any cached reads. This function is called at the end of ** a read transaction or when any sub-transaction is rolled back. */ #if 0 static void fts5DataReset(Fts5Index *p){ if( p->pReader ){ sqlite3_blob_close(p->pReader); p->pReader = 0; } } #endif /* ** Remove all records associated with segment iSegid in index iIdx. */ static void fts5DataRemoveSegment(Fts5Index *p, int iIdx, int iSegid){ i64 iFirst = FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, 0); i64 iLast = FTS5_SEGMENT_ROWID(iIdx, iSegid+1, 0, 0)-1; |
︙ | ︙ |
Changes to ext/fts5/fts5_storage.c.
︙ | ︙ | |||
278 279 280 281 282 283 284 | ** Tokenization callback used when inserting tokens into the FTS index. */ static int fts5StorageInsertCallback( void *pContext, /* Pointer to Fts5InsertCtx object */ const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ | | < | | 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 | ** Tokenization callback used when inserting tokens into the FTS index. */ static int fts5StorageInsertCallback( void *pContext, /* Pointer to Fts5InsertCtx object */ const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ int iEnd /* End offset of token */ ){ Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext; Fts5Index *pIdx = pCtx->pStorage->pIndex; int iPos = pCtx->szCol++; return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, iPos, pToken, nToken); } /* ** If a row with rowid iDel is present in the %_content table, add the ** delete-markers to the FTS index necessary to delete it. Do not actually ** remove the %_content row at this time though. |
︙ | ︙ | |||
308 309 310 311 312 313 314 315 316 317 318 319 320 321 | if( sqlite3_step(pSeek)==SQLITE_ROW ){ int iCol; Fts5InsertCtx ctx; ctx.pStorage = p; ctx.iCol = -1; rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel); for(iCol=1; rc==SQLITE_OK && iCol<=pConfig->nCol; iCol++){ rc = sqlite3Fts5Tokenize(pConfig, (const char*)sqlite3_column_text(pSeek, iCol), sqlite3_column_bytes(pSeek, iCol), (void*)&ctx, fts5StorageInsertCallback ); p->aTotalSize[iCol-1] -= (i64)ctx.szCol; | > | 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 | if( sqlite3_step(pSeek)==SQLITE_ROW ){ int iCol; Fts5InsertCtx ctx; ctx.pStorage = p; ctx.iCol = -1; rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel); for(iCol=1; rc==SQLITE_OK && iCol<=pConfig->nCol; iCol++){ ctx.szCol = 0; rc = sqlite3Fts5Tokenize(pConfig, (const char*)sqlite3_column_text(pSeek, iCol), sqlite3_column_bytes(pSeek, iCol), (void*)&ctx, fts5StorageInsertCallback ); p->aTotalSize[iCol-1] -= (i64)ctx.szCol; |
︙ | ︙ | |||
470 471 472 473 474 475 476 477 478 479 480 481 482 483 | int iCol; Fts5InsertCtx ctx; ctx.pStorage = p; ctx.iCol = -1; rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel); for(iCol=0; rc==SQLITE_OK && iCol<pConfig->nCol; iCol++){ rc = sqlite3Fts5Tokenize(pConfig, (const char*)sqlite3_value_text(apVal[iCol]), sqlite3_value_bytes(apVal[iCol]), (void*)&ctx, fts5StorageInsertCallback ); p->aTotalSize[iCol] -= (i64)ctx.szCol; | > | 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 | int iCol; Fts5InsertCtx ctx; ctx.pStorage = p; ctx.iCol = -1; rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel); for(iCol=0; rc==SQLITE_OK && iCol<pConfig->nCol; iCol++){ ctx.szCol = 0; rc = sqlite3Fts5Tokenize(pConfig, (const char*)sqlite3_value_text(apVal[iCol]), sqlite3_value_bytes(apVal[iCol]), (void*)&ctx, fts5StorageInsertCallback ); p->aTotalSize[iCol] -= (i64)ctx.szCol; |
︙ | ︙ | |||
647 648 649 650 651 652 653 | ** Tokenization callback used by integrity check. */ static int fts5StorageIntegrityCallback( void *pContext, /* Pointer to Fts5InsertCtx object */ const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ | | < > < | 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 | ** Tokenization callback used by integrity check. */ static int fts5StorageIntegrityCallback( void *pContext, /* Pointer to Fts5InsertCtx object */ const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ int iEnd /* End offset of token */ ){ Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext; int iPos = pCtx->szCol++; pCtx->cksum ^= sqlite3Fts5IndexCksum( pCtx->pConfig, pCtx->iRowid, pCtx->iCol, iPos, pToken, nToken ); return SQLITE_OK; } /* ** Check that the contents of the FTS index match that of the %_content ** table. Return SQLITE_OK if they do, or SQLITE_CORRUPT if not. Return ** some other SQLite error code if an error occurs while attempting to |
︙ | ︙ | |||
691 692 693 694 695 696 697 698 699 700 701 702 703 704 | while( SQLITE_ROW==sqlite3_step(pScan) ){ int i; ctx.iRowid = sqlite3_column_int64(pScan, 0); ctx.szCol = 0; rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize); for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){ ctx.iCol = i; rc = sqlite3Fts5Tokenize( pConfig, (const char*)sqlite3_column_text(pScan, i+1), sqlite3_column_bytes(pScan, i+1), (void*)&ctx, fts5StorageIntegrityCallback ); | > | 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 | while( SQLITE_ROW==sqlite3_step(pScan) ){ int i; ctx.iRowid = sqlite3_column_int64(pScan, 0); ctx.szCol = 0; rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize); for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){ ctx.iCol = i; ctx.szCol = 0; rc = sqlite3Fts5Tokenize( pConfig, (const char*)sqlite3_column_text(pScan, i+1), sqlite3_column_bytes(pScan, i+1), (void*)&ctx, fts5StorageIntegrityCallback ); |
︙ | ︙ |
Changes to ext/fts5/fts5_tcl.c.
︙ | ︙ | |||
108 109 110 111 112 113 114 | struct F5tAuxData { Tcl_Obj *pObj; }; static int xTokenizeCb( void *pCtx, const char *zToken, int nToken, | | < | 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | struct F5tAuxData { Tcl_Obj *pObj; }; static int xTokenizeCb( void *pCtx, const char *zToken, int nToken, int iStart, int iEnd ){ F5tFunction *p = (F5tFunction*)pCtx; Tcl_Obj *pEval = Tcl_DuplicateObj(p->pScript); int rc; Tcl_IncrRefCount(pEval); Tcl_ListObjAppendElement(p->interp, pEval, Tcl_NewStringObj(zToken, nToken)); Tcl_ListObjAppendElement(p->interp, pEval, Tcl_NewIntObj(iStart)); Tcl_ListObjAppendElement(p->interp, pEval, Tcl_NewIntObj(iEnd)); rc = Tcl_EvalObjEx(p->interp, pEval, 0); Tcl_DecrRefCount(pEval); return rc; } |
︙ | ︙ | |||
524 525 526 527 528 529 530 | int bSubst; const char *zInput; }; static int xTokenizeCb2( void *pCtx, const char *zToken, int nToken, | | < < | 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 | int bSubst; const char *zInput; }; static int xTokenizeCb2( void *pCtx, const char *zToken, int nToken, int iStart, int iEnd ){ F5tTokenizeCtx *p = (F5tTokenizeCtx*)pCtx; if( p->bSubst ){ Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewStringObj(zToken, nToken)); Tcl_ListObjAppendElement( 0, p->pRet, Tcl_NewStringObj(&p->zInput[iStart], iEnd-iStart) ); }else{ Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewStringObj(zToken, nToken)); Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewIntObj(iStart)); Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewIntObj(iEnd)); } return SQLITE_OK; } /* ** sqlite3_fts5_tokenize DB TOKENIZER TEXT |
︙ | ︙ | |||
633 634 635 636 637 638 639 | typedef struct F5tTokenizerContext F5tTokenizerContext; typedef struct F5tTokenizerCb F5tTokenizerCb; typedef struct F5tTokenizerModule F5tTokenizerModule; typedef struct F5tTokenizerModule F5tTokenizerInstance; struct F5tTokenizerContext { void *pCtx; | | | 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 | typedef struct F5tTokenizerContext F5tTokenizerContext; typedef struct F5tTokenizerCb F5tTokenizerCb; typedef struct F5tTokenizerModule F5tTokenizerModule; typedef struct F5tTokenizerModule F5tTokenizerInstance; struct F5tTokenizerContext { void *pCtx; int (*xToken)(void*, const char*, int, int, int); }; struct F5tTokenizerModule { Tcl_Interp *interp; Tcl_Obj *pScript; F5tTokenizerContext *pContext; }; |
︙ | ︙ | |||
689 690 691 692 693 694 695 | ckfree(pInst); } static int f5tTokenizerTokenize( Fts5Tokenizer *p, void *pCtx, const char *pText, int nText, | | | | 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 | ckfree(pInst); } static int f5tTokenizerTokenize( Fts5Tokenizer *p, void *pCtx, const char *pText, int nText, int (*xToken)(void*, const char*, int, int, int) ){ F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p; void *pOldCtx; int (*xOldToken)(void*, const char*, int, int, int); Tcl_Obj *pEval; int rc; pOldCtx = pInst->pContext->pCtx; xOldToken = pInst->pContext->xToken; pEval = Tcl_DuplicateObj(pInst->pScript); |
︙ | ︙ | |||
729 730 731 732 733 734 735 | Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[] ){ F5tTokenizerContext *p = (F5tTokenizerContext*)clientData; int iStart; int iEnd; | < | | < | | 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 | Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[] ){ F5tTokenizerContext *p = (F5tTokenizerContext*)clientData; int iStart; int iEnd; int nToken; char *zToken; int rc; assert( p ); if( objc!=4 ){ Tcl_WrongNumArgs(interp, 1, objv, "TEXT START END"); return TCL_ERROR; } if( p->xToken==0 ){ Tcl_AppendResult(interp, "sqlite3_fts5_token may only be used by tokenizer callback", 0 ); return TCL_ERROR; } zToken = Tcl_GetStringFromObj(objv[1], &nToken); if( Tcl_GetIntFromObj(interp, objv[2], &iStart) || Tcl_GetIntFromObj(interp, objv[3], &iEnd) ){ return TCL_ERROR; } rc = p->xToken(p->pCtx, zToken, nToken, iStart, iEnd); Tcl_SetResult(interp, (char*)sqlite3ErrName(rc), TCL_VOLATILE); return TCL_OK; } static void f5tDelTokenizer(void *pCtx){ F5tTokenizerModule *pMod = (F5tTokenizerModule*)pCtx; Tcl_DecrRefCount(pMod->pScript); |
︙ | ︙ |
Changes to ext/fts5/fts5_tokenize.c.
︙ | ︙ | |||
12 13 14 15 16 17 18 | */ #include "fts5.h" #include <string.h> #include <assert.h> /************************************************************************** | | < < < < < < < < < < < < < < < < < < < > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | > < > | | | < | 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 | */ #include "fts5.h" #include <string.h> #include <assert.h> /************************************************************************** ** Start of simple tokenizer implementation. */ /* ** For tokenizers with no "unicode" modifier, the set of token characters ** is the same as the set of ASCII range alphanumeric characters. */ static unsigned char aSimpleTokenChar[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00..0x0F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10..0x1F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20..0x2F */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30..0x3F */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40..0x4F */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x50..0x5F */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60..0x6F */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70..0x7F */ }; typedef struct SimpleTokenizer SimpleTokenizer; struct SimpleTokenizer { unsigned char aTokenChar[128]; }; static void fts5SimpleAddExceptions( SimpleTokenizer *p, const char *zArg, int bTokenChars ){ int i; for(i=0; zArg[i]; i++){ if( (zArg[i] & 0x80)==0 ){ p->aTokenChar[(int)zArg[i]] = (unsigned char)bTokenChars; } } } /* ** Create a "simple" tokenizer. */ static int fts5SimpleCreate( void *pCtx, const char **azArg, int nArg, Fts5Tokenizer **ppOut ){ int rc = SQLITE_OK; SimpleTokenizer *p = 0; if( nArg%2 ){ rc = SQLITE_ERROR; }else{ p = sqlite3_malloc(sizeof(SimpleTokenizer)); if( p==0 ){ rc = SQLITE_NOMEM; }else{ int i; memset(p, 0, sizeof(SimpleTokenizer)); memcpy(p->aTokenChar, aSimpleTokenChar, sizeof(aSimpleTokenChar)); for(i=0; rc==SQLITE_OK && i<nArg; i+=2){ const char *zArg = azArg[i+1]; if( 0==sqlite3_stricmp(azArg[i], "tokenchars") ){ fts5SimpleAddExceptions(p, zArg, 1); }else if( 0==sqlite3_stricmp(azArg[i], "separators") ){ fts5SimpleAddExceptions(p, zArg, 0); }else{ rc = SQLITE_ERROR; } } } } *ppOut = (Fts5Tokenizer*)p; return rc; } /* ** Delete a "simple" tokenizer. */ static void fts5SimpleDelete(Fts5Tokenizer *p){ sqlite3_free(p); } static void simpleFold(char *aOut, const char *aIn, int nByte){ int i; for(i=0; i<nByte; i++){ char c = aIn[i]; if( c>='A' && c<='Z' ) c += 32; aOut[i] = c; } } /* ** Tokenize some text using the simple tokenizer. */ static int fts5SimpleTokenize( Fts5Tokenizer *pTokenizer, void *pCtx, const char *pText, int nText, int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd) ){ SimpleTokenizer *p = (SimpleTokenizer*)pTokenizer; int rc = SQLITE_OK; int ie; int is = 0; char aFold[64]; int nFold = sizeof(aFold); char *pFold = aFold; unsigned char *a = p->aTokenChar; while( is<nText && rc==SQLITE_OK ){ int nByte; /* Skip any leading divider characters. */ while( is<nText && ((pText[is]&0x80) || a[(int)pText[is]]==0) ){ is++; } if( is==nText ) break; /* Count the token characters */ ie = is+1; while( ie<nText && ((pText[ie]&0x80)==0 && a[(int)pText[ie]] ) ){ ie++; } /* Fold to lower case */ nByte = ie-is; if( nByte>nFold ){ if( pFold!=aFold ) sqlite3_free(pFold); pFold = sqlite3_malloc(nByte*2); if( pFold==0 ){ rc = SQLITE_NOMEM; break; } nFold = nByte*2; } simpleFold(pFold, &pText[is], nByte); /* Invoke the token callback */ rc = xToken(pCtx, pFold, nByte, is, ie); is = ie+1; } if( pFold!=aFold ) sqlite3_free(pFold); if( rc==SQLITE_DONE ) rc = SQLITE_OK; return rc; } |
︙ | ︙ | |||
324 325 326 327 328 329 330 | /* ** Tokenize some text using a unicode61 tokenizer. */ static int fts5UnicodeTokenize( Fts5Tokenizer *pTokenizer, void *pCtx, const char *pText, int nText, | | < | 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 | /* ** Tokenize some text using a unicode61 tokenizer. */ static int fts5UnicodeTokenize( Fts5Tokenizer *pTokenizer, void *pCtx, const char *pText, int nText, int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd) ){ Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTokenizer; const unsigned char *zInput = (const unsigned char*)pText; const unsigned char *zTerm = &zInput[nText]; const unsigned char *z = zInput; int rc = SQLITE_OK; int nBuf = 0; unsigned char *zBuf = 0; unsigned char *zOut = 0; while( rc==SQLITE_OK && z<zTerm ){ int iCode; int bAlnum; const unsigned char *zStart; const unsigned char *zCode; |
︙ | ︙ | |||
374 375 376 377 378 379 380 | /* Write the new character to it */ iOut = sqlite3Fts5UnicodeFold(iCode, p->bRemoveDiacritic); if( iOut ) WRITE_UTF8(zOut, iOut); } if( zOut>zBuf && (bAlnum==0 || z>=zTerm) ){ int ie = (bAlnum ? z : zCode) - zInput; | | < | | 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 | /* Write the new character to it */ iOut = sqlite3Fts5UnicodeFold(iCode, p->bRemoveDiacritic); if( iOut ) WRITE_UTF8(zOut, iOut); } if( zOut>zBuf && (bAlnum==0 || z>=zTerm) ){ int ie = (bAlnum ? z : zCode) - zInput; rc = xToken(pCtx, (const char*)zBuf, zOut-zBuf, zStart-zInput, ie); zOut = zBuf; } } tokenize_finished: sqlite3_free(zBuf); return rc; } /************************************************************************** ** Start of porter stemmer implementation. */ /* Any tokens larger than this (in bytes) are passed through without ** stemming. */ #define FTS5_PORTER_MAX_TOKEN 64 typedef struct PorterTokenizer PorterTokenizer; |
︙ | ︙ | |||
448 449 450 451 452 453 454 | *ppOut = (Fts5Tokenizer*)pRet; return rc; } typedef struct PorterContext PorterContext; struct PorterContext { void *pCtx; | | < | 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 | *ppOut = (Fts5Tokenizer*)pRet; return rc; } typedef struct PorterContext PorterContext; struct PorterContext { void *pCtx; int (*xToken)(void*, const char*, int, int, int); char *aBuf; }; typedef struct PorterRule PorterRule; struct PorterRule { const char *zSuffix; int nSuffix; int (*xCond)(char *zStem, int nStem); const char *zOutput; int nOutput; }; static int fts5PorterApply(char *aBuf, int *pnBuf, PorterRule *aRule){ int ret = -1; int nBuf = *pnBuf; PorterRule *p; for(p=aRule; p->zSuffix; p++){ assert( strlen(p->zSuffix)==p->nSuffix ); assert( strlen(p->zOutput)==p->nOutput ); if( nBuf<p->nSuffix ) continue; if( 0==memcmp(&aBuf[nBuf - p->nSuffix], p->zSuffix, p->nSuffix) ) break; } |
︙ | ︙ | |||
573 574 575 576 577 578 579 | } static int fts5PorterCb( void *pCtx, const char *pToken, int nToken, int iStart, | | < | 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 | } static int fts5PorterCb( void *pCtx, const char *pToken, int nToken, int iStart, int iEnd ){ PorterContext *p = (PorterContext*)pCtx; PorterRule aStep1A[] = { { "sses", 4, 0, "ss", 2 }, { "ies", 3, 0, "i", 1 }, { "ss", 2, 0, "ss", 2 }, |
︙ | ︙ | |||
712 713 714 715 716 717 718 | /* Step 5b. */ if( nBuf>1 && aBuf[nBuf-1]=='l' && aBuf[nBuf-2]=='l' && fts5Porter_MGt1(aBuf, nBuf-1) ){ nBuf--; } | | | | | 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 | /* Step 5b. */ if( nBuf>1 && aBuf[nBuf-1]=='l' && aBuf[nBuf-2]=='l' && fts5Porter_MGt1(aBuf, nBuf-1) ){ nBuf--; } return p->xToken(p->pCtx, aBuf, nBuf, iStart, iEnd); pass_through: return p->xToken(p->pCtx, pToken, nToken, iStart, iEnd); } /* ** Tokenize using the porter tokenizer. */ static int fts5PorterTokenize( Fts5Tokenizer *pTokenizer, void *pCtx, const char *pText, int nText, int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd) ){ PorterTokenizer *p = (PorterTokenizer*)pTokenizer; PorterContext sCtx; sCtx.xToken = xToken; sCtx.pCtx = pCtx; sCtx.aBuf = p->aBuf; return p->tokenizer.xTokenize( |
︙ | ︙ |
Name change from test/fts5_common.tcl to ext/fts5/test/fts5_common.tcl.
1 2 3 4 5 6 7 8 9 10 11 12 | # 2014 Dec 19 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # | > > | > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 | # 2014 Dec 19 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # if {![info exists testdir]} { set testdir [file join [file dirname [info script]] .. .. .. test] } source $testdir/tester.tcl proc fts5_test_poslist {cmd} { set res [list] for {set i 0} {$i < [$cmd xInstCount]} {incr i} { lappend res [string map {{ } .} [$cmd xInst $i]] } |
︙ | ︙ | |||
41 42 43 44 45 46 47 | set res [list] for {set i 0} {$i < [$cmd xColumnCount]} {incr i} { lappend res [$cmd xColumnTotalSize $i] } set res } | | | 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 | set res [list] for {set i 0} {$i < [$cmd xColumnCount]} {incr i} { lappend res [$cmd xColumnTotalSize $i] } set res } proc test_append_token {varname token iStart iEnd} { upvar $varname var lappend var $token } proc fts5_test_tokenize {cmd} { set res [list] for {set i 0} {$i < [$cmd xColumnCount]} {incr i} { set tokens [list] |
︙ | ︙ |
Changes to ext/fts5/test/fts5aa.test.
︙ | ︙ | |||
8 9 10 11 12 13 14 | # May you share freely, never taking more than you give. # #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # | < | < < | 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 | # May you share freely, never taking more than you give. # #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5aa # If SQLITE_ENABLE_FTS3 is defined, omit this file. ifcapable !fts5 { finish_test return } |
︙ | ︙ |
Changes to ext/fts5/test/fts5ab.test.
︙ | ︙ | |||
9 10 11 12 13 14 15 | # #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # # | < | < < | 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 | # #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5ab # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } |
︙ | ︙ |
Changes to ext/fts5/test/fts5ac.test.
︙ | ︙ | |||
9 10 11 12 13 14 15 | # #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # # | < | < < | 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 | # #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5ac # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } |
︙ | ︙ |
Changes to ext/fts5/test/fts5ad.test.
︙ | ︙ | |||
9 10 11 12 13 14 15 | # #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # # | < | < < | 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 | # #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5ad # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } |
︙ | ︙ |
Changes to ext/fts5/test/fts5ae.test.
︙ | ︙ | |||
9 10 11 12 13 14 15 | # #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # # | < | < < | 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 | # #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5ae # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } |
︙ | ︙ |
Changes to ext/fts5/test/fts5af.test.
︙ | ︙ | |||
11 12 13 14 15 16 17 | # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # # More specifically, the tests in this file focus on the built-in # snippet() function. # | < | < < | 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 | # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # # More specifically, the tests in this file focus on the built-in # snippet() function. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5af # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } |
︙ | ︙ |
Changes to ext/fts5/test/fts5ag.test.
︙ | ︙ | |||
8 9 10 11 12 13 14 | # May you share freely, never taking more than you give. # #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # | < | < < | 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 | # May you share freely, never taking more than you give. # #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5ag # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } |
︙ | ︙ |
Changes to ext/fts5/test/fts5ah.test.
︙ | ︙ | |||
8 9 10 11 12 13 14 | # May you share freely, never taking more than you give. # #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # | < | < < | 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 | # May you share freely, never taking more than you give. # #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5ah # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } |
︙ | ︙ |
Changes to ext/fts5/test/fts5ai.test.
︙ | ︙ | |||
10 11 12 13 14 15 16 | #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # # Specifically, it tests transactions and savepoints # | < | < < | 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 | #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # # Specifically, it tests transactions and savepoints # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5ai # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } |
︙ | ︙ |
Changes to ext/fts5/test/fts5aj.test.
︙ | ︙ | |||
12 13 14 15 16 17 18 | # focus of this script is testing the FTS5 module. # # Specifically, this tests that, provided the amount of data remains # constant, the FTS index does not grow indefinitely as rows are inserted # and deleted, # | < | < < | 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 | # focus of this script is testing the FTS5 module. # # Specifically, this tests that, provided the amount of data remains # constant, the FTS index does not grow indefinitely as rows are inserted # and deleted, # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5aj # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } |
︙ | ︙ |
Changes to ext/fts5/test/fts5ak.test.
︙ | ︙ | |||
10 11 12 13 14 15 16 | #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # # Specifically, the auxiliary function "highlight". # | < | < < | 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 | #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # # Specifically, the auxiliary function "highlight". # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5ak # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } |
︙ | ︙ |
Changes to ext/fts5/test/fts5al.test.
︙ | ︙ | |||
10 11 12 13 14 15 16 | #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # # Specifically, this function tests the %_config table. # | < | < < | 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 | #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # # Specifically, this function tests the %_config table. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5al # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } |
︙ | ︙ |
Changes to ext/fts5/test/fts5auxdata.test.
︙ | ︙ | |||
8 9 10 11 12 13 14 | # May you share freely, never taking more than you give. # #*********************************************************************** # # Tests focusing on the fts5 xSetAuxdata() and xGetAuxdata() APIs. # | < | < < | 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 | # May you share freely, never taking more than you give. # #*********************************************************************** # # Tests focusing on the fts5 xSetAuxdata() and xGetAuxdata() APIs. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5auxdata do_execsql_test 1.0 { CREATE VIRTUAL TABLE f1 USING fts5(a, b); INSERT INTO f1(rowid, a, b) VALUES(1, 'a', 'b1'); INSERT INTO f1(rowid, a, b) VALUES(2, 'a', 'b2'); INSERT INTO f1(rowid, a, b) VALUES(3, 'a', 'b3'); |
︙ | ︙ |
Changes to ext/fts5/test/fts5content.test.
1 2 3 4 5 6 7 8 9 10 11 12 13 | # 2014 Dec 20 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # | < | < < | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 | # 2014 Dec 20 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5content #------------------------------------------------------------------------- # Contentless tables # do_execsql_test 1.1 { CREATE VIRTUAL TABLE f1 USING fts5(a, b, content=''); |
︙ | ︙ |
Changes to ext/fts5/test/fts5ea.test.
1 2 3 4 5 6 7 8 9 10 11 12 | # 2014 June 17 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # | < | < < | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | # 2014 June 17 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5ea # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } |
︙ | ︙ |
Changes to ext/fts5/test/fts5fault1.test.
︙ | ︙ | |||
8 9 10 11 12 13 14 | # May you share freely, never taking more than you give. # #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # | < | < < | 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 | # May you share freely, never taking more than you give. # #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # source [file join [file dirname [info script]] fts5_common.tcl] source $testdir/malloc_common.tcl set testprefix fts5fault1 # If SQLITE_ENABLE_FTS3 is defined, omit this file. ifcapable !fts5 { finish_test return |
︙ | ︙ |
Changes to ext/fts5/test/fts5porter.test.
︙ | ︙ | |||
10 11 12 13 14 15 16 | #*********************************************************************** # # Tests focusing on the fts5 porter stemmer implementation. # # http://tartarus.org/martin/PorterStemmer/ # | < | < < | 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 | #*********************************************************************** # # Tests focusing on the fts5 porter stemmer implementation. # # http://tartarus.org/martin/PorterStemmer/ # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5porter set test_vocab { a a aaron aaron abaissiez abaissiez abandon abandon abandoned abandon abase abas abash abash abate abat |
︙ | ︙ |
Changes to ext/fts5/test/fts5tokenizer.test.
︙ | ︙ | |||
8 9 10 11 12 13 14 | # May you share freely, never taking more than you give. # #*********************************************************************** # # Tests focusing on the fts5 tokenizers # | < | < < < | 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 | # May you share freely, never taking more than you give. # #*********************************************************************** # # Tests focusing on the fts5 tokenizers # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5tokenizer do_execsql_test 1.0 { CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize=porter); DROP TABLE ft1; } do_execsql_test 1.1 { |
︙ | ︙ | |||
47 48 49 50 51 52 53 | } do_execsql_test 2.1 { SELECT rowid FROM ft1 WHERE ft1 MATCH 'embedding' } 1 do_execsql_test 2.2 { SELECT rowid FROM ft1 WHERE ft1 MATCH 'database' } 1 do_execsql_test 2.3 { SELECT rowid FROM ft1 WHERE ft1 MATCH 'database embedding' } 1 | < < > > > > > > > > > > > > > > > > > > > > > | 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 | } do_execsql_test 2.1 { SELECT rowid FROM ft1 WHERE ft1 MATCH 'embedding' } 1 do_execsql_test 2.2 { SELECT rowid FROM ft1 WHERE ft1 MATCH 'database' } 1 do_execsql_test 2.3 { SELECT rowid FROM ft1 WHERE ft1 MATCH 'database embedding' } 1 proc tcl_create {args} { set ::targs $args error "failed" } sqlite3_fts5_create_tokenizer db tcl tcl_create foreach {tn directive expected} { 1 {tokenize='tcl a b c'} {a b c} 2 {tokenize='tcl ''d'' ''e'' ''f'''} {d e f} 3 {tokenize="tcl 'g' 'h' 'i'"} {g h i} 4 {tokenize = tcl} {} } { do_catchsql_test 3.$tn.1 " CREATE VIRTUAL TABLE ft2 USING fts5(x, $directive) " {1 {error in tokenizer constructor}} do_test 3.$tn.2 { set ::targs } $expected } do_catchsql_test 4.1 { CREATE VIRTUAL TABLE ft2 USING fts5(x, tokenize = tcl abc); } {1 {parse error in "tokenize = tcl abc"}} do_catchsql_test 4.2 { CREATE VIRTUAL TABLE ft2 USING fts5(x y) } {1 {parse error in "x y"}} #------------------------------------------------------------------------- # Test the "separators" and "tokenchars" options a bit. # foreach {tn tokenizer} {1 simple 2 unicode61} { reset_db set T "$tokenizer tokenchars ',.:' separators 'xyz'" execsql "CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = \"$T\")" do_execsql_test 5.$tn.1 { INSERT INTO t1 VALUES('abcxdefyghizjkl.mno,pqr:stu/vwx+yz'); } foreach {tn2 token res} { 1 abc 1 2 def 1 3 ghi 1 4 jkl {} 5 mno {} 6 pqr {} 7 stu {} 8 jkl.mno,pqr:stu 1 9 vw 1 } { do_execsql_test 5.$tn.2.$tn2 " SELECT rowid FROM t1 WHERE t1 MATCH '\"$token\"' " $res } } finish_test |
Changes to ext/fts5/test/fts5unicode.test.
︙ | ︙ | |||
8 9 10 11 12 13 14 | # May you share freely, never taking more than you give. # #*********************************************************************** # # Tests focusing on the fts5 tokenizers # | < | < < | | 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 | # May you share freely, never taking more than you give. # #*********************************************************************** # # Tests focusing on the fts5 tokenizers # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5unicode proc tokenize_test {tn tokenizer input output} { uplevel [list do_test $tn [subst -nocommands { set ret {} foreach {z s e} [sqlite3_fts5_tokenize db {$tokenizer} {$input}] { lappend ret [set z] } set ret }] [list {*}$output]] } foreach {tn t} {1 simple 2 unicode61} { |
︙ | ︙ |
Changes to ext/fts5/test/fts5unicode2.test.
︙ | ︙ | |||
10 11 12 13 14 15 16 | #************************************************************************* # # The tests in this file focus on testing the "unicode" FTS tokenizer. # # This is a modified copy of FTS4 test file "fts4_unicode.test". # | < | < < | 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 | #************************************************************************* # # The tests in this file focus on testing the "unicode" FTS tokenizer. # # This is a modified copy of FTS4 test file "fts4_unicode.test". # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5unicode2 proc do_unicode_token_test {tn input res} { uplevel [list do_test $tn [list \ sqlite3_fts5_tokenize -subst db "unicode61 remove_diacritics 0" $input ] [list {*}$res]] } |
︙ | ︙ | |||
37 38 39 40 41 42 43 | set input [lindex $args end-1] set res [lindex $args end] uplevel [list do_test $tn [list \ sqlite3_fts5_tokenize -subst db $tokenizer $input ] [list {*}$res]] } | | | | | | | | | | | | | | 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 | set input [lindex $args end-1] set res [lindex $args end] uplevel [list do_test $tn [list \ sqlite3_fts5_tokenize -subst db $tokenizer $input ] [list {*}$res]] } do_unicode_token_test 1.0 {a B c D} {a a b B c c d D} do_unicode_token_test 1.1 "\uC4 \uD6 \uDC" \ "\uE4 \uC4 \uF6 \uD6 \uFC \uDC" do_unicode_token_test 1.2 "x\uC4x x\uD6x x\uDCx" \ "x\uE4x x\uC4x x\uF6x x\uD6x x\uFCx x\uDCx" # 0x00DF is a small "sharp s". 0x1E9E is a capital sharp s. do_unicode_token_test 1.3 "\uDF" "\uDF \uDF" do_unicode_token_test 1.4 "\u1E9E" "\uDF \u1E9E" do_unicode_token_test 1.5 "The quick brown fox" { the The quick quick brown brown fox fox } do_unicode_token_test 1.6 "The\u00bfquick\u224ebrown\u2263fox" { the The quick quick brown brown fox fox } do_unicode_token_test2 1.7 {a B c D} {a a b B c c d D} do_unicode_token_test2 1.8 "\uC4 \uD6 \uDC" "a \uC4 o \uD6 u \uDC" do_unicode_token_test2 1.9 "x\uC4x x\uD6x x\uDCx" \ "xax x\uC4x xox x\uD6x xux x\uDCx" # Check that diacritics are removed if remove_diacritics=1 is specified. # And that they do not break tokens. do_unicode_token_test2 1.10 "xx\u0301xx" "xxxx xx\u301xx" # Title-case mappings work do_unicode_token_test 1.11 "\u01c5" "\u01c6 \u01c5" #------------------------------------------------------------------------- # set docs [list { Enhance the INSERT syntax to allow multiple rows to be inserted via the VALUES clause. } { |
︙ | ︙ | |||
259 260 261 262 263 264 265 | #------------------------------------------------------------------------- breakpoint do_unicode_token_test3 5.1 {tokenchars {}} { sqlite3_reset sqlite3_column_int } { | | | | | | | | | | | | | | | | | | | | | | | | | < | | 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 | #------------------------------------------------------------------------- breakpoint do_unicode_token_test3 5.1 {tokenchars {}} { sqlite3_reset sqlite3_column_int } { sqlite3 sqlite3 reset reset sqlite3 sqlite3 column column int int } do_unicode_token_test3 5.2 {tokenchars _} { sqlite3_reset sqlite3_column_int } { sqlite3_reset sqlite3_reset sqlite3_column_int sqlite3_column_int } do_unicode_token_test3 5.3 {separators xyz} { Laotianxhorseyrunszfast } { laotian Laotian horse horse runs runs fast fast } do_unicode_token_test3 5.4 {tokenchars xyz} { Laotianxhorseyrunszfast } { laotianxhorseyrunszfast Laotianxhorseyrunszfast } do_unicode_token_test3 5.5 {tokenchars _} {separators zyx} { sqlite3_resetxsqlite3_column_intyhonda_phantom } { sqlite3_reset sqlite3_reset sqlite3_column_int sqlite3_column_int honda_phantom honda_phantom } do_unicode_token_test3 5.6 "separators \u05D1" "abc\u05D1def" { abc abc def def } do_unicode_token_test3 5.7 \ "tokenchars \u2444\u2445" \ "separators \u05D0\u05D1\u05D2" \ "\u2444fre\u2445sh\u05D0water\u05D2fish.\u2445timer" \ [list \ \u2444fre\u2445sh \u2444fre\u2445sh \ water water \ fish fish \ \u2445timer \u2445timer \ ] # Check that it is not possible to add a standalone diacritic codepoint # to either separators or tokenchars. do_unicode_token_test3 5.8 "separators \u0301" \ "hello\u0301world \u0301helloworld" \ "helloworld hello\u0301world helloworld helloworld" do_unicode_token_test3 5.9 "tokenchars \u0301" \ "hello\u0301world \u0301helloworld" \ "helloworld hello\u0301world helloworld helloworld" do_unicode_token_test3 5.10 "separators \u0301" \ "remove_diacritics 0" \ "hello\u0301world \u0301helloworld" \ "hello\u0301world hello\u0301world helloworld helloworld" do_unicode_token_test3 5.11 "tokenchars \u0301" \ "remove_diacritics 0" \ "hello\u0301world \u0301helloworld" \ "hello\u0301world hello\u0301world helloworld helloworld" #------------------------------------------------------------------------- proc do_tokenize {tokenizer txt} { set res [list] foreach {b c} [sqlite3_fts5_tokenize -subst db $tokenizer $txt] { lappend res $b } set res } # Argument $lCodepoint must be a list of codepoints (integers) that # correspond to whitespace characters. This command creates a string |
︙ | ︙ | |||
386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 | do_isspace_test 6.$T.19 $T {32 160 5760 6158} do_isspace_test 6.$T.20 $T {8192 8193 8194 8195} do_isspace_test 6.$T.21 $T {8196 8197 8198 8199} do_isspace_test 6.$T.22 $T {8200 8201 8202 8239} do_isspace_test 6.$T.23 $T {8287 12288} } #------------------------------------------------------------------------- # Test that the private use ranges are treated as alphanumeric. # foreach {tn1 c} { 1 \ue000 2 \ue001 3 \uf000 4 \uf8fe 5 \uf8ff } { foreach {tn2 config res} { | > | | | 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 | do_isspace_test 6.$T.19 $T {32 160 5760 6158} do_isspace_test 6.$T.20 $T {8192 8193 8194 8195} do_isspace_test 6.$T.21 $T {8196 8197 8198 8199} do_isspace_test 6.$T.22 $T {8200 8201 8202 8239} do_isspace_test 6.$T.23 $T {8287 12288} } #------------------------------------------------------------------------- # Test that the private use ranges are treated as alphanumeric. # foreach {tn1 c} { 1 \ue000 2 \ue001 3 \uf000 4 \uf8fe 5 \uf8ff } { foreach {tn2 config res} { 1 "" "hello*world hello*world" 2 "separators *" "hello hello world world" } { set config [string map [list * $c] $config] set input [string map [list * $c] "hello*world"] set output [string map [list * $c] $res] do_unicode_token_test3 7.$tn1.$tn2 {*}$config $input $output } } |
︙ | ︙ |
Changes to test/tester.tcl.
︙ | ︙ | |||
1917 1918 1919 1920 1921 1922 1923 | # few test cases that deliberately corrupt database files should rescind # this setting by invoking "database_can_be_corrupt" # database_never_corrupt source $testdir/thread_common.tcl source $testdir/malloc_common.tcl | < | 1917 1918 1919 1920 1921 1922 1923 | # few test cases that deliberately corrupt database files should rescind # this setting by invoking "database_can_be_corrupt" # database_never_corrupt source $testdir/thread_common.tcl source $testdir/malloc_common.tcl |