/* ** This file contains the implementation of a custom FTS5 tokenizer. This ** tokenizer implements the following special features: ** ** * For all tokens that match the pattern "SQLITE_XXX" (case sensitive), ** "XXX" is added as a synonym for SQLITE_XXX. ** ** * For all tokens that match the pattern "sqlite3_xxx" (case sensitive), ** "xxx" is added as a synonym for sqlite3_xxx. ** ** By default, this file builds a TCL extension. But if the -DSQLITE_EXT ** compile-time option is used, then an SQLite extension is built instead. */ #ifdef SQLITE_EXT #include "sqlite3ext.h" SQLITE_EXTENSION_INIT1 #else #include #include #endif #include #include /************************************************************************* ** This is generic code copied from the FTS5 documentation. ** ** Return a pointer to the fts5_api pointer for database connection db. ** If an error occurs, return NULL and leave an error in the database ** handle (accessible using sqlite3_errcode()/errmsg()). */ fts5_api *fts5_api_from_db(sqlite3 *db){ fts5_api *pRet = 0; sqlite3_stmt *pStmt = 0; if( SQLITE_OK==sqlite3_prepare(db, "SELECT fts5(?1)", -1, &pStmt, 0) ){ sqlite3_bind_pointer(pStmt, 1, (void*)&pRet, "fts5_api_ptr", 0); sqlite3_step(pStmt); } sqlite3_finalize(pStmt); return pRet; } /************************************************************************/ /* ** Simple ranking function used by search script. Assumes the queried ** table has the following 5 indexed columns: ** ** apis, -- C APIs ** keywords, -- Keywords ** title1, -- Document title ** title2, -- Heading title, if any ** content, -- Document text ** ** This function returns the following integer values: ** ** 10000 - all phrases present in "keywords". ** 1000 - all phrases present in "keywords", "title1" or "title2". ** 100 - all phrases present in "keywords", "title1" or "title2" or "apis". ** ** It adds a bonus of 10 if either of the above and the condition ** (xRowid()>1000 && (xRowid() % 1000)==1) is true. ** */ void srankFunc( const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ Fts5Context *pFts, /* First arg to pass to pApi functions */ sqlite3_context *pCtx, /* Context for returning result/error */ int nVal, /* Number of values in apVal[] array */ sqlite3_value **apVal /* Array of trailing arguments */ ){ int nPhrase; /* Number of phrases in query */ int i; /* Used to iterate through phrases */ int rc; /* Return code */ int n1 = 0; int n2 = 0; int n3 = 0; int iScore = 0; /* Returned value */ sqlite3_int64 iRowid; /* Rowid for current row */ iRowid = pApi->xRowid(pFts); #if 0 if( iRowid<1000 ) return; #endif nPhrase = pApi->xPhraseCount(pFts); for(i=0; ixPhraseFirst(pFts, i, &iter, &ic, &io); if( rc!=SQLITE_OK ){ sqlite3_result_error(pCtx, "Error in xPhraseFirst/xPhraseNext", -1); return; } if( ic==0 ){ while( ic==0 ) pApi->xPhraseNext(pFts, &iter, &ic, &io); if( ic<0 ) ic = 0; } if( ic==1 ) n1++; if( ic==2 || ic==3 ) n2++; if( ic==0 ) n3++; } if( n1==nPhrase ){ iScore = 10000; } else if( n1+n2==nPhrase ){ iScore = 1000; } else if( n1+n2+n3==nPhrase ){ iScore = 100; } if( iScore && iRowid>1000 && (iRowid % 1000)==1 ){ iScore += 10; } sqlite3_result_int(pCtx, iScore); } typedef struct STokenizer STokenizer; typedef struct STokenCtx STokenCtx; /* ** Tokenizer type. Casts to Fts5Tokenizer. */ struct STokenizer { fts5_tokenizer porter; Fts5Tokenizer *pPorter; }; /* ** Context passed through underlying tokenizer to wrapper callback. */ struct STokenCtx { void *pCtx; int (*xToken)(void*, int, const char*, int, int, int); }; static int stokenCreate( void *pCtx, const char **azArg, int nArg, Fts5Tokenizer **ppOut ){ fts5_api *pApi = (fts5_api*)pCtx; STokenizer *p; void *pPorterCtx; int rc; /* Allocate the Fts5Tokenizer object for this tokenizer. */ p = sqlite3_malloc(sizeof(STokenizer)); if( p ){ memset(p, 0, sizeof(STokenizer)); }else{ return SQLITE_NOMEM; } /* Locate and allocate the porter tokenizer */ rc = pApi->xFindTokenizer(pApi, "porter", &pPorterCtx, &p->porter); if( rc==SQLITE_OK ){ rc = p->porter.xCreate(pPorterCtx, azArg, nArg, &p->pPorter); } /* Return the new tokenizer to the caller */ if( rc!=SQLITE_OK ){ sqlite3_free(p); p = 0; } *ppOut = (Fts5Tokenizer*)p; return rc; } static void stokenDelete(Fts5Tokenizer *pTokenizer){ STokenizer *p = (STokenizer*)pTokenizer; p->porter.xDelete(p->pPorter); sqlite3_free(p); } static int stokenTokenizeCb( void *pCtx, /* Copy of 2nd argument to xTokenize() */ int tflags, /* Mask of FTS5_TOKEN_* flags */ const char *pToken, /* Pointer to buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Byte offset of token within input text */ int iEnd /* Byte offset of end of token within input text */ ){ STokenCtx *p = (STokenCtx*)pCtx; int rc = p->xToken(p->pCtx, 0, pToken, nToken, iStart, iEnd); if( rc==SQLITE_OK && nToken>7 && 0==memcmp("sqlite_", pToken, 7) ){ rc = p->xToken( p->pCtx, FTS5_TOKEN_COLOCATED, pToken+7, nToken-7, iStart, iEnd); } if( rc==SQLITE_OK && nToken>8 && 0==memcmp("sqlite3_", pToken, 8) ){ rc = p->xToken( p->pCtx, FTS5_TOKEN_COLOCATED, pToken+8, nToken-8, iStart, iEnd); } return rc; } /* ** Tokenizer type for "html" tokenizer. Casts to Fts5Tokenizer. */ typedef struct HtmlTokenizer HtmlTokenizer; struct HtmlTokenizer { fts5_tokenizer tokenizer; Fts5Tokenizer *pTokenizer; }; static int htmlCreate( void *pCtx, const char **azArg, int nArg, Fts5Tokenizer **ppOut ){ fts5_api *pApi = (fts5_api*)pCtx; HtmlTokenizer *p = 0; int rc = SQLITE_OK; if( nArg==0 ){ rc = SQLITE_ERROR; }else{ /* Allocate the Fts5Tokenizer object for this tokenizer. */ p = sqlite3_malloc(sizeof(HtmlTokenizer)); if( p ){ memset(p, 0, sizeof(HtmlTokenizer)); }else{ return SQLITE_NOMEM; } } if( rc==SQLITE_OK ){ /* Locate and allocate the next tokenizer */ void *pNextCtx = 0; rc = pApi->xFindTokenizer(pApi, azArg[0], &pNextCtx, &p->tokenizer); if( rc==SQLITE_OK ){ rc = p->tokenizer.xCreate(pNextCtx, &azArg[1], nArg-1, &p->pTokenizer); } } /* Return the new tokenizer to the caller */ if( rc!=SQLITE_OK ){ sqlite3_free(p); p = 0; } *ppOut = (Fts5Tokenizer*)p; return rc; } static void htmlDelete(Fts5Tokenizer *pTokenizer){ HtmlTokenizer *p = (HtmlTokenizer*)pTokenizer; p->tokenizer.xDelete(p->pTokenizer); sqlite3_free(p); } static int htmlTokenize( Fts5Tokenizer *pTokenizer, void *pCtx, int flags, /* Mask of FTS5_TOKENIZE_* flags */ const char *pText, int nText, int (*xToken)( void *pCtx, /* Copy of 2nd argument to xTokenize() */ int tflags, /* Mask of FTS5_TOKEN_* flags */ const char *pToken, /* Pointer to buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Byte offset of token within input text */ int iEnd /* Byte offset of end of token within input text */ ) ){ HtmlTokenizer *p = (HtmlTokenizer*)pTokenizer; char *zOut; int i; int bTag=0; int rc; zOut = sqlite3_malloc(nText+1); if( zOut==0 ){ return SQLITE_NOMEM; } for(i=0; i' ) bTag = 0; } rc = p->tokenizer.xTokenize(p->pTokenizer, pCtx, flags, zOut, nText, xToken); sqlite3_free(zOut); return rc; } static int stokenTokenize( Fts5Tokenizer *pTokenizer, void *pCtx, int flags, /* Mask of FTS5_TOKENIZE_* flags */ const char *pText, int nText, int (*xToken)( void *pCtx, /* Copy of 2nd argument to xTokenize() */ int tflags, /* Mask of FTS5_TOKEN_* flags */ const char *pToken, /* Pointer to buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Byte offset of token within input text */ int iEnd /* Byte offset of end of token within input text */ ) ){ STokenizer *p = (STokenizer*)pTokenizer; int rc; if( flags==FTS5_TOKENIZE_DOCUMENT ){ STokenCtx ctx; ctx.xToken = xToken; ctx.pCtx = pCtx; rc = p->porter.xTokenize( p->pPorter, (void*)&ctx, flags, pText, nText, stokenTokenizeCb ); }else{ rc = p->porter.xTokenize(p->pPorter, pCtx, flags, pText, nText, xToken); } return rc; } static int register_tokenizer(sqlite3 *db, char **pzErr, void *p){ fts5_api *pApi; fts5_tokenizer t; int rc; pApi = fts5_api_from_db(db); if( pApi==0 ){ *pzErr = sqlite3_mprintf("fts5_api_from_db: %s", sqlite3_errmsg(db)); return SQLITE_ERROR; } t.xCreate = stokenCreate; t.xDelete = stokenDelete; t.xTokenize = stokenTokenize; rc = pApi->xCreateTokenizer(pApi, "stoken", (void*)pApi, &t, 0); if( rc==SQLITE_OK ){ t.xCreate = htmlCreate; t.xDelete = htmlDelete; t.xTokenize = htmlTokenize; rc = pApi->xCreateTokenizer(pApi, "html", (void*)pApi, &t, 0); } if( rc==SQLITE_OK ){ rc = pApi->xCreateFunction(pApi, "srank", 0, srankFunc, 0); } return rc; } #ifdef SQLITE_EXT #ifdef _WIN32 __declspec(dllexport) #endif int sqlite3_ftsext_init( sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi ){ int rc = SQLITE_OK; SQLITE_EXTENSION_INIT2(pApi); (void)pzErrMsg; /* Unused parameter */ rc = register_tokenizer(db, 0, 0); return rc; } #else int Fts5ext_Init(Tcl_Interp *interp){ #ifdef USE_TCL_STUBS if (Tcl_InitStubs(interp, "8.4", 0) == 0) { return TCL_ERROR; } #endif sqlite3_auto_extension((void (*)(void))register_tokenizer); return TCL_OK; } #endif