Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Add an implementation of snippet() and its associated mi apis to fts5. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | matchinfo |
Files: | files | file ages | folders |
SHA1: |
8d94102cd33b75d9c1f84bf5b8be5065 |
User & Date: | dan 2013-01-07 19:52:49.064 |
Context
2013-01-08
| ||
11:45 | Fix an fts5 problem to do with initializing the global size record. Also have the checksum routine ignore size records when calculating the index checksum. check-in: e7b52edf68 user: dan tags: matchinfo | |
2013-01-07
| ||
19:52 | Add an implementation of snippet() and its associated mi apis to fts5. check-in: 8d94102cd3 user: dan tags: matchinfo | |
2013-01-04
| ||
18:37 | Allow an fts5 tokenizer to split a single document into multiple streams (i.e. sub-fields within a single column value). Modify the matchinfo APIs so that a ranking function may handle streams and/or columns separately or otherwise. check-in: f3ac136843 user: dan tags: matchinfo | |
Changes
Changes to src/fts5.c.
︙ | ︙ | |||
141 142 143 144 145 146 147 148 149 150 151 152 153 154 | /* ** Structure types used by this module. */ typedef struct Fts5Expr Fts5Expr; typedef struct Fts5ExprNode Fts5ExprNode; typedef struct Fts5List Fts5List; typedef struct Fts5Parser Fts5Parser; typedef struct Fts5ParserToken Fts5ParserToken; typedef struct Fts5Phrase Fts5Phrase; typedef struct Fts5Prefix Fts5Prefix; typedef struct Fts5Size Fts5Size; typedef struct Fts5Str Fts5Str; typedef struct Fts5Token Fts5Token; | > | 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 | /* ** Structure types used by this module. */ typedef struct Fts5Expr Fts5Expr; typedef struct Fts5ExprNode Fts5ExprNode; typedef struct Fts5List Fts5List; typedef struct Fts5MatchIter Fts5MatchIter; typedef struct Fts5Parser Fts5Parser; typedef struct Fts5ParserToken Fts5ParserToken; typedef struct Fts5Phrase Fts5Phrase; typedef struct Fts5Prefix Fts5Prefix; typedef struct Fts5Size Fts5Size; typedef struct Fts5Str Fts5Str; typedef struct Fts5Token Fts5Token; |
︙ | ︙ | |||
229 230 231 232 233 234 235 | Fts5ExprNode *pLeft; Fts5ExprNode *pRight; const u8 *aPk; /* Primary key of current entry (or null) */ int nPk; /* Size of aPk[] in bytes */ }; struct Fts5Expr { | | < > < < | < < < < | 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 | Fts5ExprNode *pLeft; Fts5ExprNode *pRight; const u8 *aPk; /* Primary key of current entry (or null) */ int nPk; /* Size of aPk[] in bytes */ }; struct Fts5Expr { Fts5ExprNode *pRoot; /* Root node of expression */ int nPhrase; /* Number of Fts5Str objects in query */ Fts5Str **apPhrase; /* All Fts5Str objects */ }; /* ** FTS5 specific cursor data. */ struct Fts5Cursor { sqlite4 *db; Fts5Info *pInfo; Fts5Expr *pExpr; /* MATCH expression for this cursor */ char *zExpr; /* Full text of MATCH expression */ KVByteArray *aKey; /* Buffer for primary key */ int nKeyAlloc; /* Bytes allocated at aKey[] */ KVCursor *pCsr; /* Cursor used to retrive values */ Mem *aMem; /* Array of column values */ int bMemValid; /* True if contents of aMem[] are valid */ Fts5Size *pSz; /* Local size data */ Fts5Size *pGlobal; /* Global size data */ i64 nGlobal; /* Total number of rows in table */ int *anRow; Fts5MatchIter *pIter; /* Used by mi_match_detail() */ }; /* ** A deserialized 'size record' (see above). */ struct Fts5Size { int nCol; /* Number of columns in indexed table */ |
︙ | ︙ | |||
286 287 288 289 290 291 292 293 294 295 296 297 298 299 | int iList; /* The current entry */ int iCol; int iStream; int iOff; }; /* ** Return true for EOF, or false if the next entry is valid. */ static int fts5InstanceListNext(InstanceList *p){ int i = p->iList; int bRet = 1; | > > > > > > > > > > > | 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 | int iList; /* The current entry */ int iCol; int iStream; int iOff; }; /* ** An instance of this structure is used by the sqlite4_mi_match_detail() ** API to iterate through matches. */ struct Fts5MatchIter { int bValid; /* True if aList[] is current row */ int iCurrent; /* Current index in aList[] (or -1) */ int iMatch; /* Current iMatch value */ InstanceList *aList; /* One iterator for each phrase in expr */ }; /* ** Return true for EOF, or false if the next entry is valid. */ static int fts5InstanceListNext(InstanceList *p){ int i = p->iList; int bRet = 1; |
︙ | ︙ | |||
777 778 779 780 781 782 783 784 785 786 787 788 789 790 | sqlite4DbFree(db, pNode); } } static void fts5ExpressionFree(sqlite4 *db, Fts5Expr *pExpr){ if( pExpr ){ fts5FreeExprNode(db, pExpr->pRoot); sqlite4DbFree(db, pExpr); } } typedef struct ExprHier ExprHier; struct ExprHier { Fts5ExprNode **ppNode; | > | 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 | sqlite4DbFree(db, pNode); } } static void fts5ExpressionFree(sqlite4 *db, Fts5Expr *pExpr){ if( pExpr ){ fts5FreeExprNode(db, pExpr->pRoot); sqlite4DbFree(db, pExpr->apPhrase); sqlite4DbFree(db, pExpr); } } typedef struct ExprHier ExprHier; struct ExprHier { Fts5ExprNode **ppNode; |
︙ | ︙ | |||
833 834 835 836 837 838 839 840 841 842 843 844 845 846 | *pp = pNode; (*paHier)[*pnHier].ppNode = &pNode->pRight; (*paHier)[*pnHier].nOpen = 0; (*pnHier)++; return SQLITE4_OK; } static int fts5ParseExpression( sqlite4 *db, /* Database handle */ Fts5Tokenizer *pTokenizer, /* Tokenizer module */ sqlite4_tokenizer *p, /* Tokenizer instance */ int iRoot, /* Root page number of FTS index */ char **azCol, /* Array of column names (nul-term'd) */ | > > > > > > > > > > > > > > > | 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 | *pp = pNode; (*paHier)[*pnHier].ppNode = &pNode->pRight; (*paHier)[*pnHier].nOpen = 0; (*pnHier)++; return SQLITE4_OK; } static void fts5FindStrings(Fts5ExprNode *p, Fts5Str ***papStr){ if( p ){ if( p->eType==TOKEN_PRIMITIVE ){ int i; Fts5Str *aStr = p->pPhrase->aStr; for(i=0; i<p->pPhrase->nStr; i++){ **papStr = &aStr[i]; (*papStr)++; } } fts5FindStrings(p->pLeft, papStr); fts5FindStrings(p->pRight, papStr); } } static int fts5ParseExpression( sqlite4 *db, /* Database handle */ Fts5Tokenizer *pTokenizer, /* Tokenizer module */ sqlite4_tokenizer *p, /* Tokenizer instance */ int iRoot, /* Root page number of FTS index */ char **azCol, /* Array of column names (nul-term'd) */ |
︙ | ︙ | |||
972 973 974 975 976 977 978 979 980 981 982 | if( rc==SQLITE4_OK && *aHier[nHier-1].ppNode==0 ){ rc = SQLITE4_ERROR; } for(i=0; rc==SQLITE4_OK && i<nHier; i++){ if( aHier[i].nOpen>0 ) rc = SQLITE4_ERROR; } if( rc!=SQLITE4_OK ){ fts5ExpressionFree(db, pExpr); *pzErr = sParse.zErr; | > > > > > > > > > > > < < | > | 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 | if( rc==SQLITE4_OK && *aHier[nHier-1].ppNode==0 ){ rc = SQLITE4_ERROR; } for(i=0; rc==SQLITE4_OK && i<nHier; i++){ if( aHier[i].nOpen>0 ) rc = SQLITE4_ERROR; } if( rc==SQLITE4_OK ){ pExpr->nPhrase = nStr; pExpr->apPhrase = (Fts5Str**)sqlite4DbMallocZero(db, sizeof(Fts5Str*)*nStr); if( pExpr->apPhrase==0 ){ rc = SQLITE4_NOMEM; }else{ Fts5Str **a = pExpr->apPhrase; fts5FindStrings(pExpr->pRoot, &a); } } if( rc!=SQLITE4_OK ){ fts5ExpressionFree(db, pExpr); *pzErr = sParse.zErr; pExpr = 0; } *ppExpr = pExpr; sqlite4DbFree(db, aHier); return rc; } /* ** Search for the Fts5Tokenizer object named zName. Return a pointer to it ** if it exists, or NULL otherwise. |
︙ | ︙ | |||
1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 | nByte += nCol * sizeof(char *); } pInfo = sqlite4DbMallocZero(db, nByte); if( pInfo ){ pInfo->iDb = sqlite4SchemaToIndex(db, pIdx->pSchema); pInfo->iRoot = pIdx->tnum; pInfo->nCol = pIdx->pTable->nCol; fts5TokenizerCreate(pParse, pIdx->pFts, &pInfo->pTokenizer, &pInfo->p); if( pInfo->p==0 ){ assert( pParse->nErr ); sqlite4DbFree(db, pInfo); pInfo = 0; | > | 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 | nByte += nCol * sizeof(char *); } pInfo = sqlite4DbMallocZero(db, nByte); if( pInfo ){ pInfo->iDb = sqlite4SchemaToIndex(db, pIdx->pSchema); pInfo->iRoot = pIdx->tnum; sqlite4FindPrimaryKey(pIdx->pTable, &pInfo->iTbl); pInfo->nCol = pIdx->pTable->nCol; fts5TokenizerCreate(pParse, pIdx->pFts, &pInfo->pTokenizer, &pInfo->p); if( pInfo->p==0 ){ assert( pParse->nErr ); sqlite4DbFree(db, pInfo); pInfo = 0; |
︙ | ︙ | |||
2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 | static int fts5OpenCursors(sqlite4 *db, Fts5Info *pInfo, Fts5Cursor *pCsr){ return fts5OpenExprCursors(db, pInfo, pCsr->pExpr->pRoot); } void sqlite4Fts5Close(sqlite4 *db, Fts5Cursor *pCsr){ if( pCsr ){ fts5ExpressionFree(db, pCsr->pExpr); sqlite4DbFree(db, pCsr->aKey); sqlite4DbFree(db, pCsr->anRow); sqlite4DbFree(db, pCsr); } } static int fts5TokenAdvanceToMatch( | > | 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 | static int fts5OpenCursors(sqlite4 *db, Fts5Info *pInfo, Fts5Cursor *pCsr){ return fts5OpenExprCursors(db, pInfo, pCsr->pExpr->pRoot); } void sqlite4Fts5Close(sqlite4 *db, Fts5Cursor *pCsr){ if( pCsr ){ fts5ExpressionFree(db, pCsr->pExpr); sqlite4DbFree(db, pCsr->pIter); sqlite4DbFree(db, pCsr->aKey); sqlite4DbFree(db, pCsr->anRow); sqlite4DbFree(db, pCsr); } } static int fts5TokenAdvanceToMatch( |
︙ | ︙ | |||
2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 | assert( rc!=SQLITE4_NOTFOUND ); return rc; } int sqlite4Fts5Next(Fts5Cursor *pCsr){ sqlite4DbFree(pCsr->db, pCsr->pSz); pCsr->pSz = 0; return fts5ExprAdvance(pCsr->db, pCsr->pExpr->pRoot, 0); } int sqlite4Fts5Open( sqlite4 *db, /* Database handle */ Fts5Info *pInfo, /* Index description */ const char *zMatch, /* Match expression */ | > | 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 | assert( rc!=SQLITE4_NOTFOUND ); return rc; } int sqlite4Fts5Next(Fts5Cursor *pCsr){ sqlite4DbFree(pCsr->db, pCsr->pSz); pCsr->pSz = 0; pCsr->bMemValid = 0; return fts5ExprAdvance(pCsr->db, pCsr->pExpr->pRoot, 0); } int sqlite4Fts5Open( sqlite4 *db, /* Database handle */ Fts5Info *pInfo, /* Index description */ const char *zMatch, /* Match expression */ |
︙ | ︙ | |||
2573 2574 2575 2576 2577 2578 2579 | */ int sqlite4Fts5Valid(Fts5Cursor *pCsr){ return( pCsr->pExpr->pRoot->aPk!=0 ); } int sqlite4Fts5Pk( Fts5Cursor *pCsr, | | | | 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 | */ int sqlite4Fts5Valid(Fts5Cursor *pCsr){ return( pCsr->pExpr->pRoot->aPk!=0 ); } int sqlite4Fts5Pk( Fts5Cursor *pCsr, int iTbl, KVByteArray **paKey, KVSize *pnKey ){ int i; int nReq; const u8 *aPk; int nPk; |
︙ | ︙ | |||
2692 2693 2694 2695 2696 2697 2698 | }else{ rc = fts5CsrLoadGlobal(pCsr); if( rc==SQLITE4_OK ) *pn = pCsr->nGlobal; } return rc; } | < | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 | }else{ rc = fts5CsrLoadGlobal(pCsr); if( rc==SQLITE4_OK ) *pn = pCsr->nGlobal; } return rc; } int sqlite4_mi_column_value( sqlite4_context *pCtx, int iCol, sqlite4_value **ppVal ){ int rc = SQLITE4_OK; Fts5Cursor *pCsr = pCtx->pFts; if( pCsr==0 ){ rc = SQLITE4_MISUSE; }else{ if( pCsr->bMemValid==0 ){ sqlite4 *db = pCsr->db; Fts5Info *pInfo = pCsr->pInfo; if( pCsr->aMem==0 ){ int nByte = sizeof(Mem) * pInfo->nCol; pCsr->aMem = (Mem *)sqlite4DbMallocZero(db, nByte); if( pCsr->aMem==0 ){ rc = SQLITE4_NOMEM; }else{ int i; for(i=0; i<pInfo->nCol; i++){ pCsr->aMem[i].db = db; } } } if( pCsr->pCsr==0 && rc==SQLITE4_OK ){ KVStore *pStore = db->aDb[pInfo->iDb].pKV; rc = sqlite4KVStoreOpenCursor(pStore, &pCsr->pCsr); } if( rc==SQLITE4_OK ){ u8 *aKey = 0; int nKey; /* Primary key for current row */ const u8 *aData; int nData; /* Data record for current row */ rc = sqlite4Fts5Pk(pCsr, pInfo->iTbl, &aKey, &nKey); if( rc==SQLITE4_OK ){ rc = sqlite4KVCursorSeek(pCsr->pCsr, aKey, nKey, 0); if( rc==SQLITE4_NOTFOUND ){ rc = SQLITE4_CORRUPT_BKPT; } } if( rc==SQLITE4_OK ){ rc = sqlite4KVCursorData(pCsr->pCsr, 0, -1, &aData, &nData); } if( rc==SQLITE4_OK ){ int i; ValueDecoder *pCodec; /* The decoder object */ rc = sqlite4VdbeCreateDecoder(db, aData, nData, pInfo->nCol, &pCodec); for(i=0; rc==SQLITE4_OK && i<pInfo->nCol; i++){ rc = sqlite4VdbeDecodeValue(pCodec, i, 0, &pCsr->aMem[i]); } sqlite4VdbeDestroyDecoder(pCodec); } if( rc==SQLITE4_OK ) pCsr->bMemValid = 1; } } if( rc==SQLITE4_OK ){ assert( pCsr->bMemValid ); *ppVal = &pCsr->aMem[iCol]; } } return rc; } int sqlite4_mi_tokenize( sqlite4_context *pCtx, const char *zText, int nText, void *p, int(*x)(void *, int, int, const char *, int, int, int) ){ int rc = SQLITE4_OK; Fts5Cursor *pCsr = pCtx->pFts; if( pCsr==0 ){ rc = SQLITE4_MISUSE; }else{ Fts5Info *pInfo = pCsr->pInfo; rc = pInfo->pTokenizer->xTokenize(p, pInfo->p, zText, nText, x); } return rc; } static Fts5Str *fts5FindStr(Fts5ExprNode *p, int *piStr){ Fts5Str *pRet = 0; if( p->eType==TOKEN_PRIMITIVE ){ |
︙ | ︙ | |||
2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 | for(i=0; i<nStream; i++) nRow += aRow[nStream*iC + iS]; }else if( iC<nCol && iS<nStream ){ nRow = aRow[iC * nStream + iS]; } *pn = nRow; } } return rc; } /************************************************************************** *************************************************************************** ** Below this point is test code. | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 | for(i=0; i<nStream; i++) nRow += aRow[nStream*iC + iS]; }else if( iC<nCol && iS<nStream ){ nRow = aRow[iC * nStream + iS]; } *pn = nRow; } } return rc; } static void fts5IterSetCurrent(Fts5MatchIter *pIter, int nList){ InstanceList *pBest = 0; int i; for(i=0; i<nList; i++){ InstanceList *p = &pIter->aList[i]; if( fts5InstanceListEof(p)==0 ){ if( (pBest==0) || (p->iCol<pBest->iCol) || (p->iCol==pBest->iCol && p->iOff<pBest->iOff) ){ pBest = p; } } } if( pBest==0 ){ pIter->iCurrent = -1; }else{ pIter->iCurrent = pBest - pIter->aList; } } int sqlite4_mi_match_detail( sqlite4_context *pCtx, /* Context object passed to mi function */ int iMatch, /* Index of match */ int *piOff, /* OUT: Token offset of match */ int *piC, /* OUT: Column number of match iMatch */ int *piS, /* OUT: Stream number of match iMatch */ int *piP /* OUT: Phrase number of match iMatch */ ){ int rc = SQLITE4_OK; Fts5Cursor *pCsr = pCtx->pFts; if( pCsr==0 ){ rc = SQLITE4_MISUSE; }else{ int nPhrase = pCsr->pExpr->nPhrase; Fts5MatchIter *pIter = pCsr->pIter; if( pIter==0 ){ pCsr->pIter = pIter = (Fts5MatchIter *)sqlite4DbMallocZero( pCsr->db, sizeof(Fts5MatchIter) + sizeof(InstanceList)*nPhrase ); if( pIter ){ pIter->aList = (InstanceList *)&pIter[1]; }else{ rc = SQLITE4_NOMEM; } } if( rc==SQLITE4_OK && (pIter->bValid==0 || iMatch<pIter->iMatch) ){ int i; for(i=0; i<pCsr->pExpr->nPhrase; i++){ Fts5Str *pStr = pCsr->pExpr->apPhrase[i]; fts5InstanceListInit(pStr->aList, pStr->nList, &pIter->aList[i]); fts5InstanceListNext(&pIter->aList[i]); } pIter->iMatch = 0; fts5IterSetCurrent(pIter, pCsr->pExpr->nPhrase); } if( rc==SQLITE4_OK ){ assert( pIter->iMatch<=iMatch ); while( pIter->iCurrent>=0 && pIter->iMatch<iMatch ){ fts5InstanceListNext(&pIter->aList[pIter->iCurrent]); fts5IterSetCurrent(pIter, pCsr->pExpr->nPhrase); } if( pIter->iCurrent<0 ){ rc = SQLITE4_NOTFOUND; }else{ InstanceList *p = &pIter->aList[pIter->iCurrent]; *piOff = p->iOff; *piC = p->iCol; *piS = p->iStream; *piP = pIter->iCurrent; } } } return rc; } /************************************************************************** *************************************************************************** ** Below this point is test code. |
︙ | ︙ |
Changes to src/fts5func.c.
︙ | ︙ | |||
147 148 149 150 151 152 153 154 155 156 157 158 | if( rc==SQLITE4_OK ){ sqlite4_result_double(pCtx, rank); }else{ sqlite4_result_error_code(pCtx, rc); } } static void fts5Snippet(sqlite4_context *pCtx, int nArg, sqlite4_value **apArg){ } static int fts5SimpleTokenize( | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > < | | 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 | if( rc==SQLITE4_OK ){ sqlite4_result_double(pCtx, rank); }else{ sqlite4_result_error_code(pCtx, rc); } } typedef struct SnippetCtx SnippetCtx; struct SnippetCtx { sqlite4 *db; /* Database handle */ int nToken; /* Number of tokens in snippet */ int iOff; /* First token in snippet */ u64 mask; /* Snippet mask. Highlight these terms */ char *zOut; /* Pointer to snippet text */ int nOut; /* Size of zOut in bytes */ int nAlloc; /* Bytes of space allocated at zOut */ int iFrom; const char *zText; /* Document to extract snippet from */ int rc; /* Set to NOMEM if OOM is encountered */ }; static void fts5SnippetAppend(SnippetCtx *p, const char *z, int n){ if( p->rc==SQLITE4_OK ){ if( (p->nOut + n) > p->nAlloc ){ int nNew = (p->nOut+n) * 2; p->zOut = sqlite4DbReallocOrFree(p->db, p->zOut, nNew); if( p->zOut==0 ){ p->rc = SQLITE4_NOMEM; return; } p->nAlloc = sqlite4DbMallocSize(p->db, p->zOut); } memcpy(&p->zOut[p->nOut], z, n); p->nOut += n; } } static int fts5SnippetCb( void *pCtx, int iStream, int iOff, const char *z, int n, int iSrc, int nSrc ){ SnippetCtx *p = (SnippetCtx *)pCtx; if( iOff<p->iOff ){ return 0; }else if( iOff>(p->iOff + p->nToken) ){ fts5SnippetAppend(p, &p->zText[p->iFrom], iSrc - p->iFrom); fts5SnippetAppend(p, "...", 3); p->iFrom = -1; return 1; }else{ int bHighlight; /* True to highlight term */ bHighlight = (p->mask & (1 << (p->iOff+p->nToken - iOff - 1))) ? 1 : 0; if( p->iFrom==0 && p->iOff!=0 ){ p->iFrom = iSrc; fts5SnippetAppend(p, "...", 3); } if( bHighlight ){ fts5SnippetAppend(p, &p->zText[p->iFrom], iSrc - p->iFrom); fts5SnippetAppend(p, "[", 1); fts5SnippetAppend(p, &p->zText[iSrc], nSrc); fts5SnippetAppend(p, "]", 1); p->iFrom = iSrc+nSrc; } } return 0; } static int fts5SnippetText( sqlite4_context *pCtx, int iCol, int iOff, int nToken, u64 mask ){ int rc; sqlite4_value *pVal = 0; rc = sqlite4_mi_column_value(pCtx, iCol, &pVal); if( rc==SQLITE4_OK ){ SnippetCtx sCtx; int nText; nText = sqlite4_value_bytes(pVal); memset(&sCtx, 0, sizeof(sCtx)); sCtx.zText = (const char *)sqlite4_value_text(pVal); sCtx.db = sqlite4_context_db_handle(pCtx); sCtx.nToken = nToken; sCtx.iOff = iOff; sCtx.mask = mask; sqlite4_mi_tokenize(pCtx, sCtx.zText, nText, &sCtx, fts5SnippetCb); if( sCtx.rc==SQLITE4_OK && sCtx.iFrom>0 ){ fts5SnippetAppend(&sCtx, &sCtx.zText[sCtx.iFrom], nText - sCtx.iFrom); } rc = sCtx.rc; sqlite4_result_text(pCtx, sCtx.zOut, sCtx.nOut, SQLITE4_TRANSIENT); sqlite4DbFree(sCtx.db, sCtx.zOut); } return rc; } static int fts5BestSnippet( sqlite4_context *pCtx, u64 mask, /* Mask of high-priority phrases */ int nToken, int *piOff, int *piCol, u64 *pMask ){ sqlite4 *db = sqlite4_context_db_handle(pCtx); int nPhrase; int rc = SQLITE4_OK; int i; int iPrev = 0; int iPrevCol = 0; u64 *aMask; u64 lmask = (((u64)1) << nToken) - 1; int iBestOff = 0; int iBestCol = 0; int nBest = 0; u64 bmask = 0; sqlite4_mi_phrase_count(pCtx, &nPhrase); aMask = sqlite4DbMallocZero(db, sizeof(u64) * nPhrase); if( !aMask ) return SQLITE4_NOMEM; /* Iterate through all matches for all phrases */ for(i=0; rc==SQLITE4_OK; i++){ int iOff; int iCol; int iStream; int iPhrase; u64 tmask = 0; rc = sqlite4_mi_match_detail(pCtx, i, &iOff, &iCol, &iStream, &iPhrase); if( rc==SQLITE4_OK ){ int iMask; int nShift; int nScore = 0; nShift = ((iPrevCol==iCol) ? (iOff-iPrev) : 100); for(iMask=0; iMask<nPhrase; iMask++){ if( nShift<64){ aMask[iMask] = aMask[iMask] << nShift; }else{ aMask[iMask] = 0; } } aMask[iPhrase] = aMask[iMask] | 0x0001; for(iMask=0; iMask<nPhrase; iMask++){ if( (aMask[iMask] & lmask) ){ nScore += ((aMask[iMask] & mask) ? 100 : 1); } tmask = tmask | aMask[iMask]; } if( nScore>nBest ){ bmask = (tmask & lmask); nBest = nScore; iBestOff = iOff; iBestCol = iCol; } iPrev = iOff; iPrevCol = iCol; } } *piOff = iBestOff; *piCol = iBestCol; *pMask = bmask; sqlite4DbFree(db, aMask); return rc; } static void fts5Snippet(sqlite4_context *pCtx, int nArg, sqlite4_value **apArg){ int nToken = 15; u64 hlmask = 0; u64 mask = 0; int iOff = 0; int iCol = 0; int rc; rc = fts5BestSnippet(pCtx, mask, nToken, &iOff, &iCol, &hlmask); if( rc==SQLITE4_OK ){ rc = fts5SnippetText(pCtx, iCol, iOff, nToken, hlmask); } if( rc!=SQLITE4_OK ){ sqlite4_result_error_code(pCtx, rc); } } static int fts5SimpleTokenize( void *pCtx, sqlite4_tokenizer *p, const char *zDoc, int nDoc, int(*x)(void*, int, int, const char*, int, int, int) ){ sqlite4_env *pEnv = (sqlite4_env *)p; char *aBuf; int nBuf; |
︙ | ︙ |
Changes to src/sqlite.h.in.
︙ | ︙ | |||
4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 | int enc, void *p, void (*xFunc)(sqlite4_context*,int,sqlite4_value **), void (*xDestroy)(void *) ); /* ** Special functions that may be called from within matchinfo UDFs. All ** return an SQLite error code - SQLITE4_OK if successful, or some other ** error code otherwise. ** ** sqlite4_mi_column_count(): ** Set *pn to the number of columns in the queried table. ** | > > | 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 | int enc, void *p, void (*xFunc)(sqlite4_context*,int,sqlite4_value **), void (*xDestroy)(void *) ); /* ** CAPIREF: Matchinfo APIs. ** ** Special functions that may be called from within matchinfo UDFs. All ** return an SQLite error code - SQLITE4_OK if successful, or some other ** error code otherwise. ** ** sqlite4_mi_column_count(): ** Set *pn to the number of columns in the queried table. ** |
︙ | ︙ | |||
4460 4461 4462 4463 4464 4465 4466 | ** identified by parameters iC and iS. ** ** Parameter iP may also be negative. In this case, the output value is ** set to the total number of occurrences of all query phrases in the ** current row, subject to the constraints imposed by iC and iS. ** ** sqlite4_mi_match_detail(): | > > > > > > > > | | > > | > > > > | 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 | ** identified by parameters iC and iS. ** ** Parameter iP may also be negative. In this case, the output value is ** set to the total number of occurrences of all query phrases in the ** current row, subject to the constraints imposed by iC and iS. ** ** sqlite4_mi_match_detail(): ** This function is used to access the details of the iMatch'th match ** (of any phrase) in the current row. Matches are sorted in order of ** occurrence. If parameter iMatch is equal to or greater than the number ** of matches in the current row, SQLITE_NOTFOUND is returned. Otherwise, ** unless an error occurs, SQLITE4_OK is returned and the *piOff, *piC, *piS, ** and *piP output parameters are set to the token offset, column number, ** stream number and phrase number respectively. ** ** It is anticipated that this function be used to iterate through matches ** in order of occurrence. It is optimized so that it is fastest when ** called with the iMatch parameter set to 0, P or P+1, where P is the ** iMatch value passed to the previous call. ** ** sqlite4_mi_column_value(): ** Set *ppVal to point to an sqlite4_value object containing the value ** read from column iCol of the current row. This object is valid until ** the function callback returns. */ int sqlite4_mi_column_count(sqlite4_context *, int *pn); int sqlite4_mi_phrase_count(sqlite4_context *, int *pn); int sqlite4_mi_stream_count(sqlite4_context *, int *pn); int sqlite4_mi_total_size(sqlite4_context *, int iC, int iS, int *pn); int sqlite4_mi_total_rows(sqlite4_context *, int *pn); int sqlite4_mi_row_count(sqlite4_context *, int iC, int iS, int iP, int *pn); int sqlite4_mi_size(sqlite4_context *, int iC, int iS, int *pn); int sqlite4_mi_match_count(sqlite4_context *, int iC, int iS, int iP, int *pn); int sqlite4_mi_match_detail( sqlite4_context *, int iMatch, int *piOff, int *piC, int *piS, int *piP ); int sqlite4_mi_column_value(sqlite4_context *, int iC, sqlite4_value **ppVal); int sqlite4_mi_tokenize(sqlite4_context *, const char *, int, void *, int(*x)(void *, int, int, const char *, int, int, int) ); /* ** Undo the hack that converts floating point types to integer for ** builds on processors without floating point support. */ |
︙ | ︙ |
Changes to src/sqliteInt.h.
︙ | ︙ | |||
2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 | /* ** An instance of this structure is used as the p4 argument to some fts5 ** related vdbe opcodes. */ struct Fts5Info { int iDb; /* Database containing this index */ int iRoot; /* Root page number of index */ int nCol; /* Number of columns in indexed table */ char **azCol; /* Column names for table */ Fts5Tokenizer *pTokenizer; /* Tokenizer module */ sqlite4_tokenizer *p; /* Tokenizer instance */ }; int sqlite4WalkExpr(Walker*, Expr*); | > | 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 | /* ** An instance of this structure is used as the p4 argument to some fts5 ** related vdbe opcodes. */ struct Fts5Info { int iDb; /* Database containing this index */ int iRoot; /* Root page number of index */ int iTbl; /* Root page number of indexed table */ int nCol; /* Number of columns in indexed table */ char **azCol; /* Column names for table */ Fts5Tokenizer *pTokenizer; /* Tokenizer module */ sqlite4_tokenizer *p; /* Tokenizer instance */ }; int sqlite4WalkExpr(Walker*, Expr*); |
︙ | ︙ |