Index: ext/fts5/extract_api_docs.tcl ================================================================== --- ext/fts5/extract_api_docs.tcl +++ ext/fts5/extract_api_docs.tcl @@ -106,17 +106,19 @@ regexp {[*][*](.*)} $line -> line if {[regexp {^ ?x.*:} $line]} { append res "
$line

\n" continue } + if {[regexp {SYNONYM SUPPORT} $line]} { + set line "

Synonym Support

" + } if {[string trim $line] == ""} { append res "

\n" } else { append res "$line\n" } } - append res "\n" set res } proc get_api_docs {data} { @@ -206,10 +208,14 @@ output [get_fts5_struct $data "typedef struct fts5_api" "^\};"] } fts5_tokenizer { output [get_fts5_struct $data "typedef struct Fts5Tokenizer" "^\};"] + output [get_fts5_struct $data \ + "Flags that may be passed as the third argument to xTokenize()" \ + "#define FTS5_TOKEN_COLOCATED" + ] } fts5_extension { output [get_fts5_struct $data "typedef.*Fts5ExtensionApi" "^.;"] } Index: ext/fts5/fts5.h ================================================================== --- ext/fts5/fts5.h +++ ext/fts5/fts5.h @@ -215,11 +215,11 @@ int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken); int (*xTokenize)(Fts5Context*, const char *pText, int nText, /* Text to tokenize */ void *pCtx, /* Context passed to xToken() */ - int (*xToken)(void*, const char*, int, int, int) /* Callback */ + int (*xToken)(void*, int, const char*, int, int, int) /* Callback */ ); int (*xPhraseCount)(Fts5Context*); int (*xPhraseSize)(Fts5Context*, int iPhrase); @@ -276,21 +276,49 @@ ** allocated using xCreate(). Fts5 guarantees that this function will ** be invoked exactly once for each successful call to xCreate(). ** ** xTokenize: ** This function is expected to tokenize the nText byte string indicated -** by argument pText. pText may not be nul-terminated. The first argument -** passed to this function is a pointer to an Fts5Tokenizer object returned -** by an earlier call to xCreate(). +** by argument pText. pText may or may not be nul-terminated. The first +** argument passed to this function is a pointer to an Fts5Tokenizer object +** returned by an earlier call to xCreate(). +** +** The second argument indicates the reason that FTS5 is requesting +** tokenization of the supplied text. This is always one of the following +** four values: +** +**

** ** For each token in the input string, the supplied callback xToken() must ** be invoked. The first argument to it should be a copy of the pointer -** passed as the second argument to xTokenize(). The next two arguments -** are a pointer to a buffer containing the token text, and the size of -** the token in bytes. The 4th and 5th arguments are the byte offsets of -** the first byte of and first byte immediately following the text from +** passed as the second argument to xTokenize(). The third and fourth +** arguments are a pointer to a buffer containing the token text, and the +** size of the token in bytes. The 4th and 5th arguments are the byte offsets +** of the first byte of and first byte immediately following the text from ** which the token is derived within the input. +** +** The second argument passed to the xToken() callback ("tflags") should +** normally be set to 0. The exception is if the tokenizer supports +** synonyms. In this case see the discussion below for details. ** ** FTS5 assumes the xToken() callback is invoked for each token in the ** order that they occur within the input text. ** ** If an xToken() callback returns any value other than SQLITE_OK, then @@ -299,39 +327,157 @@ ** input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally, ** if an error occurs with the xTokenize() implementation itself, it ** may abandon the tokenization and return any error code other than ** SQLITE_OK or SQLITE_DONE. ** +** SYNONYM SUPPORT +** +** Custom tokenizers may also support synonyms. Consider a case in which a +** user wishes to query for a phrase such as "first place". Using the +** built-in tokenizers, the FTS5 query 'first + place' will match instances +** of "first place" within the document set, but not alternative forms +** such as "1st place". In some applications, it would be better to match +** all instances of "first place" or "1st place" regardless of which form +** the user specified in the MATCH query text. +** +** There are several ways to approach this in FTS5: +** +**
  1. By mapping all synonyms to a single token. In this case, the +** In the above example, this means that the tokenizer returns the +** same token for inputs "first" and "1st". Say that token is in +** fact "first", so that when the user inserts the document "I won +** 1st place" entries are added to the index for tokens "i", "won", +** "first" and "place". If the user then queries for '1st + place', +** the tokenizer substitutes "first" for "1st" and the query works +** as expected. +** +**
  2. By adding multiple synonyms for a single term to the FTS index. +** In this case, when tokenizing query text, the tokenizer may +** provide multiple synonyms for a single term within the document. +** FTS5 then queries the index for each synonym individually. For +** example, faced with the query: +** +** +** ... MATCH 'first place' +** +** the tokenizer offers both "1st" and "first" as synonyms for the +** first token in the MATCH query and FTS5 effectively runs a query +** similar to: +** +** +** ... MATCH '(first OR 1st) place' +** +** except that, for the purposes of auxiliary functions, the query +** still appears to contain just two phrases - "(first OR 1st)" +** being treated as a single phrase. +** +**
  3. By adding multiple synonyms for a single term to the FTS index. +** Using this method, when tokenizing document text, the tokenizer +** provides multiple synonyms for each token. So that when a +** document such as "I won first place" is tokenized, entries are +** added to the FTS index for "i", "won", "first", "1st" and +** "place". +** +** This way, even if the tokenizer does not provide synonyms +** when tokenizing query text (it should not - to do would be +** inefficient), it doesn't matter if the user queries for +** 'first + place' or '1st + place', as there are entires in the +** FTS index corresponding to both forms of the first token. +**
+** +** Whether is is parsing document or query text, any call to xToken that +** specifies a tflags argument with the FTS5_TOKEN_COLOCATED bit +** is considered to supply a synonym for the previous token. For example, +** when parsing the document "I won first place", a tokenizer that supports +** synonyms would call xToken() 5 times, as follows: +** +** +** xToken(pCtx, 0, "i", 1, 0, 1); +** xToken(pCtx, 0, "won", 3, 2, 5); +** xToken(pCtx, 0, "first", 5, 6, 11); +** xToken(pCtx, FTS5_TOKEN_COLOCATED, "1st", 3, 6, 11); +** xToken(pCtx, 0, "place", 5, 12, 17); +** +** +** It is an error to specify the FTS5_TOKEN_COLOCATED flag the first time +** xToken() is called. Multiple synonyms may be specified for a single token +** by making multiple calls to xToken(FTS5_TOKEN_COLOCATED) in sequence. +** There is no limit to the number of synonyms that may be provided for a +** single token. +** +** In many cases, method (1) above is the best approach. It does not add +** extra data to the FTS index or require FTS5 to query for multiple terms, +** so it is efficient in terms of disk space and query speed. However, it +** does not support prefix queries very well. If, as suggested above, the +** token "first" is subsituted for "1st" by the tokenizer, then the query: +** +** +** ... MATCH '1s*' +** +** will not match documents that contain the token "1st" (as the tokenizer +** will probably not map "1s" to any prefix of "first"). +** +** For full prefix support, method (3) may be preferred. In this case, +** because the index contains entries for both "first" and "1st", prefix +** queries such as 'fi*' or '1s*' will match correctly. However, because +** extra entries are added to the FTS index, this method uses more space +** within the database. +** +** Method (2) offers a midpoint between (1) and (3). Using this method, +** a query such as '1s*' will match documents that contain the literal +** token "1st", but not "first" (assuming the tokenizer is not able to +** provide synonyms for prefixes). However, a non-prefix query like '1st' +** will match against "1st" and "first". This method does not require +** extra disk space, as no extra entries are added to the FTS index. +** On the other hand, it may require more CPU cycles to run MATCH queries, +** as separate queries of the FTS index are required for each synonym. +** +** When using methods (2) or (3), it is important that the tokenizer only +** provide synonyms when tokenizing document text (method (2)) or query +** text (method (3)), not both. Doing so will not cause any errors, but is +** inefficient. */ typedef struct Fts5Tokenizer Fts5Tokenizer; typedef struct fts5_tokenizer fts5_tokenizer; struct fts5_tokenizer { int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut); void (*xDelete)(Fts5Tokenizer*); int (*xTokenize)(Fts5Tokenizer*, void *pCtx, + int flags, /* Mask of FTS5_TOKENIZE_* flags */ const char *pText, int nText, int (*xToken)( void *pCtx, /* Copy of 2nd argument to xTokenize() */ + int tflags, /* Mask of FTS5_TOKEN_* flags */ const char *pToken, /* Pointer to buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Byte offset of token within input text */ int iEnd /* Byte offset of end of token within input text */ ) ); }; +/* Flags that may be passed as the third argument to xTokenize() */ +#define FTS5_TOKENIZE_QUERY 0x0001 +#define FTS5_TOKENIZE_PREFIX 0x0002 +#define FTS5_TOKENIZE_DOCUMENT 0x0004 +#define FTS5_TOKENIZE_AUX 0x0008 + +/* Flags that may be passed by the tokenizer implementation back to FTS5 +** as the third argument to the supplied xToken callback. */ +#define FTS5_TOKEN_COLOCATED 0x0001 /* Same position as prev. token */ + /* ** END OF CUSTOM TOKENIZERS *************************************************************************/ /************************************************************************* ** FTS5 EXTENSION REGISTRATION API */ typedef struct fts5_api fts5_api; struct fts5_api { - int iVersion; /* Currently always set to 1 */ + int iVersion; /* Currently always set to 2 */ /* Create a new tokenizer */ int (*xCreateTokenizer)( fts5_api *pApi, const char *zName, Index: ext/fts5/fts5Int.h ================================================================== --- ext/fts5/fts5Int.h +++ ext/fts5/fts5Int.h @@ -164,13 +164,14 @@ int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig); int sqlite3Fts5Tokenize( Fts5Config *pConfig, /* FTS5 Configuration object */ + int flags, /* FTS5_TOKENIZE_* flags */ const char *pText, int nText, /* Text to tokenize */ void *pCtx, /* Context passed to xToken() */ - int (*xToken)(void*, const char*, int, int, int) /* Callback */ + int (*xToken)(void*, int, const char*, int, int, int) /* Callback */ ); void sqlite3Fts5Dequote(char *z); /* Load the contents of the %_config table */ @@ -232,12 +233,14 @@ int iCol; /* If (iCol>=0), this column only */ const u8 *a; /* Position list to iterate through */ int n; /* Size of buffer at a[] in bytes */ int i; /* Current offset in a[] */ + u8 bFlag; /* For client use (any custom purpose) */ + /* Output variables */ - int bEof; /* Set to true at EOF */ + u8 bEof; /* Set to true at EOF */ i64 iPos; /* (iCol<<32) + iPos */ }; int sqlite3Fts5PoslistReaderInit( int iCol, /* If (iCol>=0), this column only */ const u8 *a, int n, /* Poslist buffer to iterate through */ @@ -379,13 +382,13 @@ */ int sqlite3Fts5IndexErrcode(Fts5Index*); void sqlite3Fts5IndexReset(Fts5Index*); /* -** Get or set the "averages" record. +** Get or set the "averages" values. */ -int sqlite3Fts5IndexGetAverages(Fts5Index *p, Fts5Buffer *pBuf); +int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize); int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8*, int); /* ** Functions called by the storage module as part of integrity-check. */ @@ -594,11 +597,11 @@ int sqlite3Fts5ExprPhraseCount(Fts5Expr*); int sqlite3Fts5ExprPhraseSize(Fts5Expr*, int iPhrase); int sqlite3Fts5ExprPoslist(Fts5Expr*, int, const u8 **); -int sqlite3Fts5ExprPhraseExpr(Fts5Config*, Fts5Expr*, int, Fts5Expr**); +int sqlite3Fts5ExprClonePhrase(Fts5Config*, Fts5Expr*, int, Fts5Expr**); /******************************************* ** The fts5_expr.c API above this point is used by the other hand-written ** C code in this module. The interfaces below this point are called by ** the parser code in fts5parse.y. */ Index: ext/fts5/fts5_aux.c ================================================================== --- ext/fts5/fts5_aux.c +++ ext/fts5/fts5_aux.c @@ -146,18 +146,22 @@ /* ** Tokenizer callback used by implementation of highlight() function. */ static int fts5HighlightCb( void *pContext, /* Pointer to HighlightContext object */ + int tflags, /* Mask of FTS5_TOKEN_* flags */ const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStartOff, /* Start offset of token */ int iEndOff /* End offset of token */ ){ HighlightContext *p = (HighlightContext*)pContext; int rc = SQLITE_OK; - int iPos = p->iPos++; + int iPos; + + if( tflags & FTS5_TOKEN_COLOCATED ) return SQLITE_OK; + iPos = p->iPos++; if( p->iRangeEnd>0 ){ if( iPosiRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK; if( p->iRangeStart && iPos==p->iRangeStart ) p->iOff = iStartOff; } Index: ext/fts5/fts5_config.c ================================================================== --- ext/fts5/fts5_config.c +++ ext/fts5/fts5_config.c @@ -643,16 +643,19 @@ ** because the callback returned another non-zero value, it is assumed ** to be an SQLite error code and returned to the caller. */ int sqlite3Fts5Tokenize( Fts5Config *pConfig, /* FTS5 Configuration object */ + int flags, /* FTS5_TOKENIZE_* flags */ const char *pText, int nText, /* Text to tokenize */ void *pCtx, /* Context passed to xToken() */ - int (*xToken)(void*, const char*, int, int, int) /* Callback */ + int (*xToken)(void*, int, const char*, int, int, int) /* Callback */ ){ if( pText==0 ) return SQLITE_OK; - return pConfig->pTokApi->xTokenize(pConfig->pTok, pCtx, pText, nText, xToken); + return pConfig->pTokApi->xTokenize( + pConfig->pTok, pCtx, flags, pText, nText, xToken + ); } /* ** Argument pIn points to the first character in what is expected to be ** a comma-separated list of SQL literals followed by a ')' character. Index: ext/fts5/fts5_expr.c ================================================================== --- ext/fts5/fts5_expr.c +++ ext/fts5/fts5_expr.c @@ -20,10 +20,12 @@ /* ** All token types in the generated fts5parse.h file are greater than 0. */ #define FTS5_EOF 0 +#define FTS5_LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32)) + typedef struct Fts5ExprTerm Fts5ExprTerm; /* ** Functions generated by lemon from fts5parse.y. */ @@ -71,10 +73,11 @@ */ struct Fts5ExprTerm { int bPrefix; /* True for a prefix term */ char *zTerm; /* nul-terminated term */ Fts5IndexIter *pIter; /* Iterator for this term */ + Fts5ExprTerm *pSynonym; /* Pointer to first in list of synonyms */ }; /* ** A phrase. One or more terms that must appear in a contiguous sequence ** within a document for it to match. @@ -179,10 +182,14 @@ break; } default: { const char *z2; + if( sqlite3Fts5IsBareword(z[0])==0 ){ + sqlite3Fts5ParseError(pParse, "fts5: syntax error near \"%.1s\"", z); + return FTS5_EOF; + } tok = FTS5_STRING; for(z2=&z[1]; sqlite3Fts5IsBareword(*z2); z2++); pToken->n = (z2 - z); if( pToken->n==2 && memcmp(pToken->p, "OR", 2)==0 ) tok = FTS5_OR; if( pToken->n==3 && memcmp(pToken->p, "NOT", 3)==0 ) tok = FTS5_NOT; @@ -242,83 +249,10 @@ sqlite3_free(sParse.apPhrase); *pzErr = sParse.zErr; return sParse.rc; } -/* -** Create a new FTS5 expression by cloning phrase iPhrase of the -** expression passed as the second argument. -*/ -int sqlite3Fts5ExprPhraseExpr( - Fts5Config *pConfig, - Fts5Expr *pExpr, - int iPhrase, - Fts5Expr **ppNew -){ - int rc = SQLITE_OK; /* Return code */ - Fts5ExprPhrase *pOrig; /* The phrase extracted from pExpr */ - Fts5ExprPhrase *pCopy; /* Copy of pOrig */ - Fts5Expr *pNew = 0; /* Expression to return via *ppNew */ - - pOrig = pExpr->apExprPhrase[iPhrase]; - pCopy = (Fts5ExprPhrase*)sqlite3Fts5MallocZero(&rc, - sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * pOrig->nTerm - ); - if( pCopy ){ - int i; /* Used to iterate through phrase terms */ - Fts5ExprPhrase **apPhrase; - Fts5ExprNode *pNode; - Fts5ExprNearset *pNear; - - pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr)); - apPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc, - sizeof(Fts5ExprPhrase*) - ); - pNode = (Fts5ExprNode*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5ExprNode)); - pNear = (Fts5ExprNearset*)sqlite3Fts5MallocZero(&rc, - sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*) - ); - - for(i=0; inTerm; i++){ - pCopy->aTerm[i].zTerm = sqlite3Fts5Strndup(&rc, pOrig->aTerm[i].zTerm,-1); - pCopy->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix; - } - - if( rc==SQLITE_OK ){ - /* All the allocations succeeded. Put the expression object together. */ - pNew->pIndex = pExpr->pIndex; - pNew->pRoot = pNode; - pNew->nPhrase = 1; - pNew->apExprPhrase = apPhrase; - pNew->apExprPhrase[0] = pCopy; - - pNode->eType = (pOrig->nTerm==1 ? FTS5_TERM : FTS5_STRING); - pNode->pNear = pNear; - - pNear->nPhrase = 1; - pNear->apPhrase[0] = pCopy; - - pCopy->nTerm = pOrig->nTerm; - pCopy->pNode = pNode; - }else{ - /* At least one allocation failed. Free them all. */ - for(i=0; inTerm; i++){ - sqlite3_free(pCopy->aTerm[i].zTerm); - } - sqlite3_free(pCopy); - sqlite3_free(pNear); - sqlite3_free(pNode); - sqlite3_free(apPhrase); - sqlite3_free(pNew); - pNew = 0; - } - } - - *ppNew = pNew; - return rc; -} - /* ** Free the expression node object passed as the only argument. */ void sqlite3Fts5ParseNodeFree(Fts5ExprNode *p){ if( p ){ @@ -347,10 +281,119 @@ for(i=0; inCol; i++){ if( pColset->aiCol[i]==iCol ) return 1; } return 0; } + +/* +** Argument pTerm must be a synonym iterator. Return the current rowid +** that it points to. +*/ +static i64 fts5ExprSynonymRowid(Fts5ExprTerm *pTerm, int bDesc, int *pbEof){ + i64 iRet = 0; + int bRetValid = 0; + Fts5ExprTerm *p; + + assert( pTerm->pSynonym ); + assert( bDesc==0 || bDesc==1 ); + for(p=pTerm; p; p=p->pSynonym){ + if( 0==sqlite3Fts5IterEof(p->pIter) ){ + i64 iRowid = sqlite3Fts5IterRowid(p->pIter); + if( bRetValid==0 || (bDesc!=(iRowidpSynonym ); + for(p=pTerm; p; p=p->pSynonym){ + Fts5IndexIter *pIter = p->pIter; + if( sqlite3Fts5IterEof(pIter)==0 && sqlite3Fts5IterRowid(pIter)==iRowid ){ + const u8 *a; + int n; + i64 dummy; + rc = sqlite3Fts5IterPoslist(pIter, &a, &n, &dummy); + if( rc!=SQLITE_OK ) goto synonym_poslist_out; + if( nIter==nAlloc ){ + int nByte = sizeof(Fts5PoslistReader) * nAlloc * 2; + Fts5PoslistReader *aNew = (Fts5PoslistReader*)sqlite3_malloc(nByte); + if( aNew==0 ){ + rc = SQLITE_NOMEM; + goto synonym_poslist_out; + } + memcpy(aNew, aIter, sizeof(Fts5PoslistReader) * nIter); + nAlloc = nAlloc*2; + if( aIter!=aStatic ) sqlite3_free(aIter); + aIter = aNew; + } + sqlite3Fts5PoslistReaderInit(-1, a, n, &aIter[nIter]); + assert( aIter[nIter].bEof==0 ); + nIter++; + } + } + + assert( *pbDel==0 ); + if( nIter==1 ){ + *pa = (u8*)aIter[0].a; + *pn = aIter[0].n; + }else{ + Fts5PoslistWriter writer = {0}; + Fts5Buffer buf = {0,0,0}; + i64 iPrev = -1; + while( 1 ){ + int i; + i64 iMin = FTS5_LARGEST_INT64; + for(i=0; inTerm>(sizeof(aStatic) / sizeof(aStatic[0])) ){ int nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm; aIter = (Fts5PoslistReader*)sqlite3_malloc(nByte); if( !aIter ) return SQLITE_NOMEM; } + memset(aIter, 0, sizeof(Fts5PoslistReader) * pPhrase->nTerm); /* Initialize a term iterator for each term in the phrase */ for(i=0; inTerm; i++){ + Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; i64 dummy; - int n; - const u8 *a; - rc = sqlite3Fts5IterPoslist(pPhrase->aTerm[i].pIter, &a, &n, &dummy); - if( rc || sqlite3Fts5PoslistReaderInit(iCol, a, n, &aIter[i]) ){ - goto ismatch_out; + int n = 0; + int bFlag = 0; + const u8 *a = 0; + if( pTerm->pSynonym ){ + rc = fts5ExprSynonymPoslist(pTerm, pNode->iRowid, &bFlag, (u8**)&a, &n); + }else{ + rc = sqlite3Fts5IterPoslist(pTerm->pIter, &a, &n, &dummy); } + if( rc!=SQLITE_OK ) goto ismatch_out; + sqlite3Fts5PoslistReaderInit(iCol, a, n, &aIter[i]); + aIter[i].bFlag = bFlag; + if( aIter[i].bEof ) goto ismatch_out; } while( 1 ){ int bMatch; i64 iPos = aIter[0].iPos; @@ -429,10 +480,13 @@ } } ismatch_out: *pbMatch = (pPhrase->poslist.n>0); + for(i=0; inTerm; i++){ + if( aIter[i].bFlag ) sqlite3_free((u8*)aIter[i].a); + } if( aIter!=aStatic ) sqlite3_free(aIter); return rc; } typedef struct Fts5LookaheadReader Fts5LookaheadReader; @@ -596,21 +650,59 @@ Fts5Expr *pExpr, /* Expression pPhrase belongs to */ Fts5ExprNode *pNode, /* FTS5_STRING or FTS5_TERM node */ int bFromValid, i64 iFrom ){ - Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter; + Fts5ExprTerm *pTerm = &pNode->pNear->apPhrase[0]->aTerm[0]; int rc; - assert( Fts5NodeIsString(pNode) ); - if( bFromValid ){ - rc = sqlite3Fts5IterNextFrom(pIter, iFrom); + if( pTerm->pSynonym ){ + int bEof = 1; + Fts5ExprTerm *p; + + /* Find the firstest rowid any synonym points to. */ + i64 iRowid = fts5ExprSynonymRowid(pTerm, pExpr->bDesc, 0); + + /* Advance each iterator that currently points to iRowid. Or, if iFrom + ** is valid - each iterator that points to a rowid before iFrom. */ + for(p=pTerm; p; p=p->pSynonym){ + if( sqlite3Fts5IterEof(p->pIter)==0 ){ + i64 ii = sqlite3Fts5IterRowid(p->pIter); + if( ii==iRowid + || (bFromValid && ii!=iFrom && (ii>iFrom)==pExpr->bDesc) + ){ + if( bFromValid ){ + rc = sqlite3Fts5IterNextFrom(p->pIter, iFrom); + }else{ + rc = sqlite3Fts5IterNext(p->pIter); + } + if( rc!=SQLITE_OK ) break; + if( sqlite3Fts5IterEof(p->pIter)==0 ){ + bEof = 0; + } + }else{ + bEof = 0; + } + } + } + + /* Set the EOF flag if either all synonym iterators are at EOF or an + ** error has occurred. */ + pNode->bEof = (rc || bEof); }else{ - rc = sqlite3Fts5IterNext(pIter); + Fts5IndexIter *pIter = pTerm->pIter; + + assert( Fts5NodeIsString(pNode) ); + if( bFromValid ){ + rc = sqlite3Fts5IterNextFrom(pIter, iFrom); + }else{ + rc = sqlite3Fts5IterNext(pIter); + } + + pNode->bEof = (rc || sqlite3Fts5IterEof(pIter)); } - pNode->bEof = (rc || sqlite3Fts5IterEof(pIter)); return rc; } /* ** Advance iterator pIter until it points to a value equal to or laster @@ -644,10 +736,39 @@ } *piLast = iRowid; return 0; } + +static int fts5ExprSynonymAdvanceto( + Fts5ExprTerm *pTerm, /* Term iterator to advance */ + int bDesc, /* True if iterator is "rowid DESC" */ + i64 *piLast, /* IN/OUT: Lastest rowid seen so far */ + int *pRc /* OUT: Error code */ +){ + int rc = SQLITE_OK; + i64 iLast = *piLast; + Fts5ExprTerm *p; + int bEof = 0; + + for(p=pTerm; rc==SQLITE_OK && p; p=p->pSynonym){ + if( sqlite3Fts5IterEof(p->pIter)==0 ){ + i64 iRowid = sqlite3Fts5IterRowid(p->pIter); + if( (bDesc==0 && iLast>iRowid) || (bDesc && iLastpIter, iLast); + } + } + } + + if( rc!=SQLITE_OK ){ + *pRc = rc; + bEof = 1; + }else{ + *piLast = fts5ExprSynonymRowid(pTerm, bDesc, &bEof); + } + return bEof; +} /* ** IN/OUT parameter (*pa) points to a position list n bytes in size. If ** the position list contains entries for column iCol, then (*pa) is set ** to point to the sub-position-list for that column and the number of @@ -715,13 +836,13 @@ /* Check that each phrase in the nearset matches the current row. ** Populate the pPhrase->poslist buffers at the same time. If any ** phrase is not a match, break out of the loop early. */ for(i=0; rc==SQLITE_OK && inPhrase; i++){ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; - if( pPhrase->nTerm>1 || pNear->pColset ){ + if( pPhrase->nTerm>1 || pPhrase->aTerm[0].pSynonym || pNear->pColset ){ int bMatch = 0; - rc = fts5ExprPhraseIsMatch(pExpr, pNear->pColset, pPhrase, &bMatch); + rc = fts5ExprPhraseIsMatch(pNode, pNear->pColset, pPhrase, &bMatch); if( bMatch==0 ) break; }else{ rc = sqlite3Fts5IterPoslistBuffer( pPhrase->aTerm[0].pIter, &pPhrase->poslist ); @@ -753,10 +874,11 @@ int nPos; int rc; assert( pNode->eType==FTS5_TERM ); assert( pNear->nPhrase==1 && pPhrase->nTerm==1 ); + assert( pPhrase->aTerm[0].pSynonym==0 ); rc = sqlite3Fts5IterPoslist(pIter, &pPos, &nPos, &pNode->iRowid); /* If the term may match any column, then this must be a match. ** Return immediately in this case. Otherwise, try to find the @@ -799,73 +921,103 @@ Fts5ExprPhrase *pLeft = pNear->apPhrase[0]; int rc = SQLITE_OK; i64 iLast; /* Lastest rowid any iterator points to */ int i, j; /* Phrase and token index, respectively */ int bMatch; /* True if all terms are at the same rowid */ + const int bDesc = pExpr->bDesc; - assert( pNear->nPhrase>1 || pNear->apPhrase[0]->nTerm>1 ); + /* Check that this node should not be FTS5_TERM */ + assert( pNear->nPhrase>1 + || pNear->apPhrase[0]->nTerm>1 + || pNear->apPhrase[0]->aTerm[0].pSynonym + ); /* Initialize iLast, the "lastest" rowid any iterator points to. If the ** iterator skips through rowids in the default ascending order, this means ** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it ** means the minimum rowid. */ - iLast = sqlite3Fts5IterRowid(pLeft->aTerm[0].pIter); + if( pLeft->aTerm[0].pSynonym ){ + iLast = fts5ExprSynonymRowid(&pLeft->aTerm[0], bDesc, 0); + }else{ + iLast = sqlite3Fts5IterRowid(pLeft->aTerm[0].pIter); + } do { bMatch = 1; for(i=0; inPhrase; i++){ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; for(j=0; jnTerm; j++){ - Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter; - i64 iRowid = sqlite3Fts5IterRowid(pIter); - if( iRowid!=iLast ) bMatch = 0; - if( fts5ExprAdvanceto(pIter, pExpr->bDesc, &iLast,&rc,&pNode->bEof) ){ - return rc; + Fts5ExprTerm *pTerm = &pPhrase->aTerm[j]; + if( pTerm->pSynonym ){ + Fts5ExprTerm *p; + int bEof = 1; + i64 iRowid = fts5ExprSynonymRowid(pTerm, bDesc, 0); + if( iRowid==iLast ) continue; + bMatch = 0; + if( fts5ExprSynonymAdvanceto(pTerm, bDesc, &iLast, &rc) ){ + pNode->bEof = 1; + return rc; + } + }else{ + Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter; + i64 iRowid = sqlite3Fts5IterRowid(pIter); + if( iRowid==iLast ) continue; + bMatch = 0; + if( fts5ExprAdvanceto(pIter, bDesc, &iLast, &rc, &pNode->bEof) ){ + return rc; + } } } } }while( bMatch==0 ); - pNode->bNomatch = (0==fts5ExprNearTest(&rc, pExpr, pNode)); pNode->iRowid = iLast; + pNode->bNomatch = (0==fts5ExprNearTest(&rc, pExpr, pNode)); return rc; } /* ** Initialize all term iterators in the pNear object. If any term is found -** to match no documents at all, set *pbEof to true and return immediately, -** without initializing any further iterators. +** to match no documents at all, return immediately without initializing any +** further iterators. */ static int fts5ExprNearInitAll( Fts5Expr *pExpr, Fts5ExprNode *pNode ){ Fts5ExprNearset *pNear = pNode->pNear; - Fts5ExprTerm *pTerm; - Fts5ExprPhrase *pPhrase; int i, j; int rc = SQLITE_OK; for(i=0; rc==SQLITE_OK && inPhrase; i++){ - pPhrase = pNear->apPhrase[i]; + Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; for(j=0; jnTerm; j++){ - pTerm = &pPhrase->aTerm[j]; - if( pTerm->pIter ){ - sqlite3Fts5IterClose(pTerm->pIter); - pTerm->pIter = 0; - } - rc = sqlite3Fts5IndexQuery( - pExpr->pIndex, pTerm->zTerm, strlen(pTerm->zTerm), - (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) | - (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0), - &pTerm->pIter - ); - assert( rc==SQLITE_OK || pTerm->pIter==0 ); - if( pTerm->pIter==0 || sqlite3Fts5IterEof(pTerm->pIter) ){ + Fts5ExprTerm *pTerm = &pPhrase->aTerm[j]; + Fts5ExprTerm *p; + int bEof = 1; + + for(p=pTerm; p && rc==SQLITE_OK; p=p->pSynonym){ + if( p->pIter ){ + sqlite3Fts5IterClose(p->pIter); + p->pIter = 0; + } + rc = sqlite3Fts5IndexQuery( + pExpr->pIndex, p->zTerm, strlen(p->zTerm), + (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) | + (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0), + &p->pIter + ); + assert( rc==SQLITE_OK || p->pIter==0 ); + if( p->pIter && 0==sqlite3Fts5IterEof(p->pIter) ){ + bEof = 0; + } + } + + if( bEof ){ pNode->bEof = 1; - break; + return rc; } } } return rc; @@ -1027,14 +1179,21 @@ rc = fts5ExprNearAdvanceFirst(pExpr, pNode, bFromValid, iFrom); break; }; case FTS5_TERM: { - rc = fts5ExprNearAdvanceFirst(pExpr, pNode, bFromValid, iFrom); - if( pNode->bEof==0 ){ + Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter; + if( bFromValid ){ + rc = sqlite3Fts5IterNextFrom(pIter, iFrom); + }else{ + rc = sqlite3Fts5IterNext(pIter); + } + if( rc==SQLITE_OK && sqlite3Fts5IterEof(pIter)==0 ){ assert( rc==SQLITE_OK ); rc = fts5ExprTokenTest(pExpr, pNode); + }else{ + pNode->bEof = 1; } return rc; }; case FTS5_AND: { @@ -1264,14 +1423,20 @@ */ static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){ if( pPhrase ){ int i; for(i=0; inTerm; i++){ + Fts5ExprTerm *pSyn; + Fts5ExprTerm *pNext; Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; sqlite3_free(pTerm->zTerm); - if( pTerm->pIter ){ - sqlite3Fts5IterClose(pTerm->pIter); + sqlite3Fts5IterClose(pTerm->pIter); + + for(pSyn=pTerm->pSynonym; pSyn; pSyn=pNext){ + pNext = pSyn->pSynonym; + sqlite3Fts5IterClose(pSyn->pIter); + sqlite3_free(pSyn); } } if( pPhrase->poslist.nSpace>0 ) fts5BufferFree(&pPhrase->poslist); sqlite3_free(pPhrase); } @@ -1329,45 +1494,72 @@ } typedef struct TokenCtx TokenCtx; struct TokenCtx { Fts5ExprPhrase *pPhrase; + int rc; }; /* ** Callback for tokenizing terms used by ParseTerm(). */ static int fts5ParseTokenize( void *pContext, /* Pointer to Fts5InsertCtx object */ + int tflags, /* Mask of FTS5_TOKEN_* flags */ const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ - int iStart, /* Start offset of token */ - int iEnd /* End offset of token */ + int iUnused1, /* Start offset of token */ + int iUnused2 /* End offset of token */ ){ int rc = SQLITE_OK; const int SZALLOC = 8; TokenCtx *pCtx = (TokenCtx*)pContext; Fts5ExprPhrase *pPhrase = pCtx->pPhrase; - Fts5ExprTerm *pTerm; - - if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){ - Fts5ExprPhrase *pNew; - int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0); - - pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase, - sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew - ); - if( pNew==0 ) return SQLITE_NOMEM; - if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase)); - pCtx->pPhrase = pPhrase = pNew; - pNew->nTerm = nNew - SZALLOC; - } - - pTerm = &pPhrase->aTerm[pPhrase->nTerm++]; - memset(pTerm, 0, sizeof(Fts5ExprTerm)); - pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken); - + + /* If an error has already occurred, this is a no-op */ + if( pCtx->rc!=SQLITE_OK ) return pCtx->rc; + + assert( pPhrase==0 || pPhrase->nTerm>0 ); + if( pPhrase && (tflags & FTS5_TOKEN_COLOCATED) ){ + Fts5ExprTerm *pSyn; + int nByte = sizeof(Fts5ExprTerm) + nToken+1; + pSyn = (Fts5ExprTerm*)sqlite3_malloc(nByte); + if( pSyn==0 ){ + rc = SQLITE_NOMEM; + }else{ + memset(pSyn, 0, nByte); + pSyn->zTerm = (char*)&pSyn[1]; + memcpy(pSyn->zTerm, pToken, nToken); + pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym; + pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn; + } + }else{ + Fts5ExprTerm *pTerm; + if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){ + Fts5ExprPhrase *pNew; + int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0); + + pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase, + sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew + ); + if( pNew==0 ){ + rc = SQLITE_NOMEM; + }else{ + if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase)); + pCtx->pPhrase = pPhrase = pNew; + pNew->nTerm = nNew - SZALLOC; + } + } + + if( rc==SQLITE_OK ){ + pTerm = &pPhrase->aTerm[pPhrase->nTerm++]; + memset(pTerm, 0, sizeof(Fts5ExprTerm)); + pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken); + } + } + + pCtx->rc = rc; return rc; } /* @@ -1415,15 +1607,18 @@ memset(&sCtx, 0, sizeof(TokenCtx)); sCtx.pPhrase = pAppend; rc = fts5ParseStringFromToken(pToken, &z); if( rc==SQLITE_OK ){ + int flags = FTS5_TOKENIZE_QUERY | (bPrefix ? FTS5_TOKENIZE_QUERY : 0); + int n; sqlite3Fts5Dequote(z); - rc = sqlite3Fts5Tokenize(pConfig, z, strlen(z), &sCtx, fts5ParseTokenize); + n = strlen(z); + rc = sqlite3Fts5Tokenize(pConfig, flags, z, n, &sCtx, fts5ParseTokenize); } sqlite3_free(z); - if( rc ){ + if( rc || (rc = sCtx.rc) ){ pParse->rc = rc; fts5ExprPhraseFree(sCtx.pPhrase); sCtx.pPhrase = 0; }else if( sCtx.pPhrase ){ @@ -1447,10 +1642,87 @@ sCtx.pPhrase->aTerm[sCtx.pPhrase->nTerm-1].bPrefix = bPrefix; } return sCtx.pPhrase; } + +/* +** Create a new FTS5 expression by cloning phrase iPhrase of the +** expression passed as the second argument. +*/ +int sqlite3Fts5ExprClonePhrase( + Fts5Config *pConfig, + Fts5Expr *pExpr, + int iPhrase, + Fts5Expr **ppNew +){ + int rc = SQLITE_OK; /* Return code */ + Fts5ExprPhrase *pOrig; /* The phrase extracted from pExpr */ + Fts5ExprPhrase *pCopy; /* Copy of pOrig */ + int i; /* Used to iterate through phrase terms */ + + Fts5Expr *pNew = 0; /* Expression to return via *ppNew */ + Fts5ExprPhrase **apPhrase; /* pNew->apPhrase */ + Fts5ExprNode *pNode; /* pNew->pRoot */ + Fts5ExprNearset *pNear; /* pNew->pRoot->pNear */ + + TokenCtx sCtx = {0,0}; /* Context object for fts5ParseTokenize */ + + + pOrig = pExpr->apExprPhrase[iPhrase]; + + pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr)); + if( rc==SQLITE_OK ){ + pNew->apExprPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc, + sizeof(Fts5ExprPhrase*)); + } + if( rc==SQLITE_OK ){ + pNew->pRoot = (Fts5ExprNode*)sqlite3Fts5MallocZero(&rc, + sizeof(Fts5ExprNode)); + } + if( rc==SQLITE_OK ){ + pNew->pRoot->pNear = (Fts5ExprNearset*)sqlite3Fts5MallocZero(&rc, + sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*)); + } + + for(i=0; rc==SQLITE_OK && inTerm; i++){ + int tflags = 0; + Fts5ExprTerm *p; + for(p=&pOrig->aTerm[i]; p && rc==SQLITE_OK; p=p->pSynonym){ + const char *zTerm = p->zTerm; + rc = fts5ParseTokenize((void*)&sCtx, tflags, zTerm, strlen(zTerm), 0, 0); + tflags = FTS5_TOKEN_COLOCATED; + } + if( rc==SQLITE_OK ){ + sCtx.pPhrase->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix; + } + } + + if( rc==SQLITE_OK ){ + /* All the allocations succeeded. Put the expression object together. */ + pNew->pIndex = pExpr->pIndex; + pNew->nPhrase = 1; + pNew->apExprPhrase[0] = sCtx.pPhrase; + pNew->pRoot->pNear->apPhrase[0] = sCtx.pPhrase; + pNew->pRoot->pNear->nPhrase = 1; + sCtx.pPhrase->pNode = pNew->pRoot; + + if( pOrig->nTerm==1 && pOrig->aTerm[0].pSynonym==0 ){ + pNew->pRoot->eType = FTS5_TERM; + }else{ + pNew->pRoot->eType = FTS5_STRING; + } + }else{ + sqlite3Fts5ExprFree(pNew); + fts5ExprPhraseFree(sCtx.pPhrase); + pNew = 0; + } + + *ppNew = pNew; + return rc; +} + /* ** Token pTok has appeared in a MATCH expression where the NEAR operator ** is expected. If token pTok does not contain "NEAR", store an error ** in the pParse object. @@ -1628,11 +1900,14 @@ if( eType==FTS5_STRING ){ int iPhrase; for(iPhrase=0; iPhrasenPhrase; iPhrase++){ pNear->apPhrase[iPhrase]->pNode = pRet; } - if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 ){ + if( pNear->nPhrase==1 + && pNear->apPhrase[0]->nTerm==1 + && pNear->apPhrase[0]->aTerm[0].pSynonym==0 + ){ pRet->eType = FTS5_TERM; } }else{ fts5ExprAddChildren(pRet, pLeft); fts5ExprAddChildren(pRet, pRight); @@ -1648,20 +1923,32 @@ } return pRet; } static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){ - char *zQuoted = sqlite3_malloc(strlen(pTerm->zTerm) * 2 + 3 + 2); + int nByte = 0; + Fts5ExprTerm *p; + char *zQuoted; + + /* Determine the maximum amount of space required. */ + for(p=pTerm; p; p=p->pSynonym){ + nByte += strlen(pTerm->zTerm) * 2 + 3 + 2; + } + zQuoted = sqlite3_malloc(nByte); + if( zQuoted ){ int i = 0; - char *zIn = pTerm->zTerm; - zQuoted[i++] = '"'; - while( *zIn ){ - if( *zIn=='"' ) zQuoted[i++] = '"'; - zQuoted[i++] = *zIn++; + for(p=pTerm; p; p=p->pSynonym){ + char *zIn = p->zTerm; + zQuoted[i++] = '"'; + while( *zIn ){ + if( *zIn=='"' ) zQuoted[i++] = '"'; + zQuoted[i++] = *zIn++; + } + zQuoted[i++] = '"'; + if( p->pSynonym ) zQuoted[i++] = '|'; } - zQuoted[i++] = '"'; if( pTerm->bPrefix ){ zQuoted[i++] = ' '; zQuoted[i++] = '*'; } zQuoted[i++] = '\0'; Index: ext/fts5/fts5_index.c ================================================================== --- ext/fts5/fts5_index.c +++ ext/fts5/fts5_index.c @@ -291,11 +291,10 @@ typedef struct Fts5Data Fts5Data; typedef struct Fts5DlidxIter Fts5DlidxIter; typedef struct Fts5DlidxLvl Fts5DlidxLvl; typedef struct Fts5DlidxWriter Fts5DlidxWriter; -typedef struct Fts5NodeIter Fts5NodeIter; typedef struct Fts5PageWriter Fts5PageWriter; typedef struct Fts5SegIter Fts5SegIter; typedef struct Fts5DoclistIter Fts5DoclistIter; typedef struct Fts5SegWriter Fts5SegWriter; typedef struct Fts5Structure Fts5Structure; @@ -524,28 +523,10 @@ Fts5CResult *aFirst; /* Current merge state (see above) */ Fts5SegIter aSeg[1]; /* Array of segment iterators */ }; -/* -** Object for iterating through the conents of a single internal node in -** memory. -*/ -struct Fts5NodeIter { - /* Internal. Set and managed by fts5NodeIterXXX() functions. Except, - ** the EOF test for the iterator is (Fts5NodeIter.aData==0). */ - const u8 *aData; - int nData; - int iOff; - - /* Output variables */ - Fts5Buffer term; - int nEmpty; - int iChild; - int bDlidx; -}; - /* ** An instance of the following type is used to iterate through the contents ** of a doclist-index record. ** ** pData: @@ -571,27 +552,10 @@ int nLvl; int iSegid; Fts5DlidxLvl aLvl[1]; }; - - -/* -** The first argument passed to this macro is a pointer to an Fts5Buffer -** object. -*/ -#define fts5BufferSize(pBuf,n) { \ - if( pBuf->nSpacep, n); \ - if( pNew==0 ){ \ - sqlite3_free(pBuf->p); \ - } \ - pBuf->nSpace = n; \ - pBuf->p = pNew; \ - } \ -} - static void fts5PutU16(u8 *aOut, u16 iVal){ aOut[0] = (iVal>>8); aOut[1] = (iVal&0xFF); } @@ -615,19 +579,20 @@ ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or ** +ve if pRight is smaller than pLeft. In other words: ** ** res = *pLeft - *pRight */ +#ifdef SQLITE_DEBUG static int fts5BufferCompareBlob( Fts5Buffer *pLeft, /* Left hand side of comparison */ const u8 *pRight, int nRight /* Right hand side of comparison */ ){ int nCmp = MIN(pLeft->n, nRight); int res = memcmp(pLeft->p, pRight, nCmp); return (res==0 ? (pLeft->n - nRight) : res); } - +#endif /* ** Compare the contents of the two buffers using memcmp(). If one buffer ** is a prefix of the other, it is considered the lesser. ** @@ -663,15 +628,18 @@ p->pReader = 0; sqlite3_blob_close(pReader); } } -static Fts5Data *fts5DataReadOrBuffer( - Fts5Index *p, - Fts5Buffer *pBuf, - i64 iRowid -){ + +/* +** Retrieve a record from the %_data table. +** +** If an error occurs, NULL is returned and an error left in the +** Fts5Index object. +*/ +static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){ Fts5Data *pRet = 0; if( p->rc==SQLITE_OK ){ int rc = SQLITE_OK; if( p->pReader ){ @@ -687,12 +655,12 @@ fts5CloseReader(p); } if( rc==SQLITE_ABORT ) rc = SQLITE_OK; } - /* If the blob handle is not yet open, open and seek it. Otherwise, use - ** the blob_reopen() API to reseek the existing blob handle. */ + /* If the blob handle is not open at this point, open it and seek + ** to the requested entry. */ if( p->pReader==0 && rc==SQLITE_OK ){ Fts5Config *pConfig = p->pConfig; rc = sqlite3_blob_open(pConfig->db, pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader ); @@ -706,26 +674,17 @@ if( rc==SQLITE_ERROR ) rc = FTS5_CORRUPT; if( rc==SQLITE_OK ){ u8 *aOut = 0; /* Read blob data into this buffer */ int nByte = sqlite3_blob_bytes(p->pReader); - if( pBuf ){ - fts5BufferSize(pBuf, MAX(nByte, p->pConfig->pgsz) + 20); - pBuf->n = nByte; - aOut = pBuf->p; - if( aOut==0 ){ - rc = SQLITE_NOMEM; - } - }else{ - int nSpace = nByte + FTS5_DATA_PADDING; - pRet = (Fts5Data*)sqlite3_malloc(nSpace+sizeof(Fts5Data)); - if( pRet ){ - pRet->n = nByte; - aOut = pRet->p = (u8*)&pRet[1]; - }else{ - rc = SQLITE_NOMEM; - } + int nAlloc = sizeof(Fts5Data) + nByte + FTS5_DATA_PADDING; + pRet = (Fts5Data*)sqlite3_malloc(nAlloc); + if( pRet ){ + pRet->n = nByte; + aOut = pRet->p = (u8*)&pRet[1]; + }else{ + rc = SQLITE_NOMEM; } if( rc==SQLITE_OK ){ rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0); } @@ -736,37 +695,14 @@ } p->rc = rc; p->nRead++; } - return pRet; -} - -/* -** Retrieve a record from the %_data table. -** -** If an error occurs, NULL is returned and an error left in the -** Fts5Index object. -*/ -static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){ - Fts5Data *pRet = fts5DataReadOrBuffer(p, 0, iRowid); assert( (pRet==0)==(p->rc!=SQLITE_OK) ); return pRet; } -/* -** Read a record from the %_data table into the buffer supplied as the -** second argument. -** -** If an error occurs, an error is left in the Fts5Index object. If an -** error has already occurred when this function is called, it is a -** no-op. -*/ -static void fts5DataBuffer(Fts5Index *p, Fts5Buffer *pBuf, i64 iRowid){ - (void)fts5DataReadOrBuffer(p, pBuf, iRowid); -} - /* ** Release a reference to data record returned by an earlier call to ** fts5DataRead(). */ static void fts5DataRelease(Fts5Data *pData){ @@ -1031,23 +967,22 @@ */ static Fts5Structure *fts5StructureRead(Fts5Index *p){ Fts5Config *pConfig = p->pConfig; Fts5Structure *pRet = 0; /* Object to return */ int iCookie; /* Configuration cookie */ + Fts5Data *pData; Fts5Buffer buf = {0, 0, 0}; - fts5DataBuffer(p, &buf, FTS5_STRUCTURE_ROWID); - if( buf.p==0 ) return 0; - assert( buf.nSpace>=(buf.n + FTS5_DATA_ZERO_PADDING) ); - memset(&buf.p[buf.n], 0, FTS5_DATA_ZERO_PADDING); - p->rc = fts5StructureDecode(buf.p, buf.n, &iCookie, &pRet); - + pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID); + if( p->rc ) return 0; + memset(&pData->p[pData->n], 0, FTS5_DATA_PADDING); + p->rc = fts5StructureDecode(pData->p, pData->n, &iCookie, &pRet); if( p->rc==SQLITE_OK && pConfig->iCookie!=iCookie ){ p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie); } - fts5BufferFree(&buf); + fts5DataRelease(pData); if( p->rc!=SQLITE_OK ){ fts5StructureRelease(pRet); pRet = 0; } return pRet; @@ -1226,66 +1161,10 @@ fts5StructurePromoteTo(p, iPromote, szPromote, pStruct); } } -/* -** If the pIter->iOff offset currently points to an entry indicating one -** or more term-less nodes, advance past it and set pIter->nEmpty to -** the number of empty child nodes. -*/ -static void fts5NodeIterGobbleNEmpty(Fts5NodeIter *pIter){ - if( pIter->iOffnData && 0==(pIter->aData[pIter->iOff] & 0xfe) ){ - pIter->bDlidx = pIter->aData[pIter->iOff] & 0x01; - pIter->iOff++; - pIter->iOff += fts5GetVarint32(&pIter->aData[pIter->iOff], pIter->nEmpty); - }else{ - pIter->nEmpty = 0; - pIter->bDlidx = 0; - } -} - -/* -** Advance to the next entry within the node. -*/ -static void fts5NodeIterNext(int *pRc, Fts5NodeIter *pIter){ - if( pIter->iOff>=pIter->nData ){ - pIter->aData = 0; - pIter->iChild += pIter->nEmpty; - }else{ - int nPre, nNew; - pIter->iOff += fts5GetVarint32(&pIter->aData[pIter->iOff], nPre); - pIter->iOff += fts5GetVarint32(&pIter->aData[pIter->iOff], nNew); - pIter->term.n = nPre-2; - fts5BufferAppendBlob(pRc, &pIter->term, nNew, pIter->aData+pIter->iOff); - pIter->iOff += nNew; - pIter->iChild += (1 + pIter->nEmpty); - fts5NodeIterGobbleNEmpty(pIter); - if( *pRc ) pIter->aData = 0; - } -} - - -/* -** Initialize the iterator object pIter to iterate through the internal -** segment node in pData. -*/ -static void fts5NodeIterInit(const u8 *aData, int nData, Fts5NodeIter *pIter){ - memset(pIter, 0, sizeof(*pIter)); - pIter->aData = aData; - pIter->nData = nData; - pIter->iOff = fts5GetVarint32(aData, pIter->iChild); - fts5NodeIterGobbleNEmpty(pIter); -} - -/* -** Free any memory allocated by the iterator object. -*/ -static void fts5NodeIterFree(Fts5NodeIter *pIter){ - fts5BufferFree(&pIter->term); -} - /* ** Advance the iterator passed as the only argument. If the end of the ** doclist-index page is reached, return non-zero. */ static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){ @@ -2039,123 +1918,10 @@ } pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno); } -#ifdef SQLITE_DEBUG -static void fts5AssertNodeSeekOk( - Fts5Buffer *pNode, - const u8 *pTerm, int nTerm, /* Term to search for */ - int iExpectPg, - int bExpectDlidx -){ - int bDlidx; - int iPg; - int rc = SQLITE_OK; - Fts5NodeIter node; - - fts5NodeIterInit(pNode->p, pNode->n, &node); - assert( node.term.n==0 ); - iPg = node.iChild; - bDlidx = node.bDlidx; - for(fts5NodeIterNext(&rc, &node); - node.aData && fts5BufferCompareBlob(&node.term, pTerm, nTerm)<=0; - fts5NodeIterNext(&rc, &node) - ){ - iPg = node.iChild; - bDlidx = node.bDlidx; - } - fts5NodeIterFree(&node); - - assert( rc!=SQLITE_OK || iPg==iExpectPg ); - assert( rc!=SQLITE_OK || bDlidx==bExpectDlidx ); -} -#else -#define fts5AssertNodeSeekOk(v,w,x,y,z) -#endif - -/* -** Argument pNode is an internal b-tree node. This function searches -** within the node for the largest term that is smaller than or equal -** to (pTerm/nTerm). -** -** It returns the associated page number. Or, if (pTerm/nTerm) is smaller -** than all terms within the node, the leftmost child page number. -** -** Before returning, (*pbDlidx) is set to true if the last term on the -** returned child page number has a doclist-index. Or left as is otherwise. -*/ -static int fts5NodeSeek( - Fts5Buffer *pNode, /* Node to search */ - const u8 *pTerm, int nTerm, /* Term to search for */ - int *pbDlidx /* OUT: True if dlidx flag is set */ -){ - int iPg; - u8 *pPtr = pNode->p; - u8 *pEnd = &pPtr[pNode->n]; - int nMatch = 0; /* Number of bytes of pTerm already matched */ - - assert( *pbDlidx==0 ); - - pPtr += fts5GetVarint32(pPtr, iPg); - while( pPtr=pEnd ) break; - } - - /* Read the next "term" pointer. Set nKeep to the number of bytes to - ** keep from the previous term, and nNew to the number of bytes of - ** new data that will be appended to it. */ - nKeep = (int)*pPtr++; - nNew = (int)*pPtr++; - if( (nKeep | nNew) & 0x0080 ){ - pPtr -= 2; - pPtr += fts5GetVarint32(pPtr, nKeep); - pPtr += fts5GetVarint32(pPtr, nNew); - } - nKeep -= 2; - - /* Compare (pTerm/nTerm) to the current term on the node (the one described - ** by nKeep/nNew). If the node term is larger, break out of the while() - ** loop. - ** - ** Otherwise, if (pTerm/nTerm) is larger or the two terms are equal, - ** leave variable nMatch set to the size of the largest prefix common to - ** both terms in bytes. */ - if( nKeep==nMatch ){ - int nTst = MIN(nNew, nTerm-nMatch); - int i; - for(i=0; i pTerm[nMatch]) ) break; - }else if( nKeeprc==SQLITE_OK ){ + do{ if( bMove ) fts5SegIterNext(p, pIter, 0); if( pIter->pLeaf==0 ) break; if( bRev==0 && pIter->iRowid>=iMatch ) break; if( bRev!=0 && pIter->iRowid<=iMatch ) break; bMove = 1; - } + }while( p->rc==SQLITE_OK ); } /* ** Free the iterator object passed as the second argument. @@ -4457,17 +4223,13 @@ ** function populates it with the initial structure objects for each index, ** and the initial version of the "averages" record (a zero-byte blob). */ int sqlite3Fts5IndexReinit(Fts5Index *p){ Fts5Structure s; - - assert( p->rc==SQLITE_OK ); - p->rc = sqlite3Fts5IndexSetAverages(p, (const u8*)"", 0); - memset(&s, 0, sizeof(Fts5Structure)); + fts5DataWrite(p, FTS5_AVERAGES_ROWID, (const u8*)"", 0); fts5StructureWrite(p, &s); - return fts5IndexReturn(p); } /* ** Open a new Fts5Index handle. If the bCreate argument is true, create @@ -4785,17 +4547,32 @@ fts5CloseReader(pIndex); } } /* -** Read the "averages" record into the buffer supplied as the second -** argument. Return SQLITE_OK if successful, or an SQLite error code -** if an error occurs. +** Read and decode the "averages" record from the database. +** +** Parameter anSize must point to an array of size nCol, where nCol is +** the number of user defined columns in the FTS table. */ -int sqlite3Fts5IndexGetAverages(Fts5Index *p, Fts5Buffer *pBuf){ - assert( p->rc==SQLITE_OK ); - fts5DataReadOrBuffer(p, pBuf, FTS5_AVERAGES_ROWID); +int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){ + int nCol = p->pConfig->nCol; + Fts5Data *pData; + + *pnRow = 0; + memset(anSize, 0, sizeof(i64) * nCol); + pData = fts5DataRead(p, FTS5_AVERAGES_ROWID); + if( p->rc==SQLITE_OK && pData->n ){ + int i = 0; + int iCol; + i += fts5GetVarint(&pData->p[i], (u64*)pnRow); + for(iCol=0; in && iColp[i], (u64*)&anSize[iCol]); + } + } + + fts5DataRelease(pData); return fts5IndexReturn(p); } /* ** Replace the current "averages" record with the contents of the buffer @@ -5485,77 +5262,57 @@ /* todo */ }else{ fts5DecodeStructure(&rc, &s, a, n); } }else{ - Fts5Buffer term; + int iTermOff = 0; + int iRowidOff = 0; + int iOff; + int nKeep = 0; + memset(&term, 0, sizeof(Fts5Buffer)); - if( iHeight==0 ){ - int iTermOff = 0; - int iRowidOff = 0; - int iOff; - int nKeep = 0; - - if( n>=4 ){ - iRowidOff = fts5GetU16(&a[0]); - iTermOff = fts5GetU16(&a[2]); - }else{ - sqlite3Fts5BufferSet(&rc, &s, 8, (const u8*)"corrupt"); - goto decode_out; - } - - if( iRowidOff ){ - iOff = iRowidOff; - }else if( iTermOff ){ - iOff = iTermOff; - }else{ - iOff = n; - } - fts5DecodePoslist(&rc, &s, &a[4], iOff-4); - - assert( iRowidOff==0 || iOff==iRowidOff ); - if( iRowidOff ){ - iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], n-iOff); - } - - assert( iTermOff==0 || iOff==iTermOff ); - while( iOff=4 ){ + iRowidOff = fts5GetU16(&a[0]); + iTermOff = fts5GetU16(&a[2]); + }else{ + sqlite3Fts5BufferSet(&rc, &s, 8, (const u8*)"corrupt"); + goto decode_out; + } + + if( iRowidOff ){ + iOff = iRowidOff; + }else if( iTermOff ){ + iOff = iTermOff; + }else{ + iOff = n; + } + fts5DecodePoslist(&rc, &s, &a[4], iOff-4); + + assert( iRowidOff==0 || iOff==iRowidOff ); + if( iRowidOff ){ + iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], n-iOff); + } + + assert( iTermOff==0 || iOff==iTermOff ); + while( iOffbase.pVtab); - return sqlite3Fts5Tokenize(pTab->pConfig, pText, nText, pUserData, xToken); + return sqlite3Fts5Tokenize( + pTab->pConfig, FTS5_TOKENIZE_AUX, pText, nText, pUserData, xToken + ); } static int fts5ApiPhraseCount(Fts5Context *pCtx){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; return sqlite3Fts5ExprPhraseCount(pCsr->pExpr); @@ -1653,17 +1655,20 @@ return rc; } static int fts5ColumnSizeCb( void *pContext, /* Pointer to int */ + int tflags, const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ int iEnd /* End offset of token */ ){ int *pCnt = (int*)pContext; - *pCnt = *pCnt + 1; + if( (tflags & FTS5_TOKEN_COLOCATED)==0 ){ + (*pCnt)++; + } return SQLITE_OK; } static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; @@ -1689,11 +1694,13 @@ const char *z; int n; void *p = (void*)(&pCsr->aColumnSize[i]); pCsr->aColumnSize[i] = 0; rc = fts5ApiColumnText(pCtx, i, &z, &n); if( rc==SQLITE_OK ){ - rc = sqlite3Fts5Tokenize(pConfig, z, n, p, fts5ColumnSizeCb); + rc = sqlite3Fts5Tokenize( + pConfig, FTS5_TOKENIZE_AUX, z, n, p, fts5ColumnSizeCb + ); } } } } CsrFlagClear(pCsr, FTS5CSR_REQUIRE_DOCSIZE); @@ -1851,11 +1858,11 @@ Fts5Config *pConf = pTab->pConfig; pNew->ePlan = FTS5_PLAN_MATCH; pNew->iFirstRowid = SMALLEST_INT64; pNew->iLastRowid = LARGEST_INT64; pNew->base.pVtab = (sqlite3_vtab*)pTab; - rc = sqlite3Fts5ExprPhraseExpr(pConf, pCsr->pExpr, iPhrase, &pNew->pExpr); + rc = sqlite3Fts5ExprClonePhrase(pConf, pCsr->pExpr, iPhrase, &pNew->pExpr); } if( rc==SQLITE_OK ){ for(rc = fts5CursorFirst(pTab, pNew, 0); rc==SQLITE_OK && CsrFlagTest(pNew, FTS5CSR_EOF)==0; @@ -2342,11 +2349,11 @@ rc = SQLITE_NOMEM; }else{ void *p = (void*)pGlobal; memset(pGlobal, 0, sizeof(Fts5Global)); pGlobal->db = db; - pGlobal->api.iVersion = 1; + pGlobal->api.iVersion = 2; pGlobal->api.xCreateFunction = fts5CreateAux; pGlobal->api.xCreateTokenizer = fts5CreateTokenizer; pGlobal->api.xFindTokenizer = fts5FindTokenizer; rc = sqlite3_create_module_v2(db, "fts5", &fts5Mod, p, fts5ModuleDestroy); if( rc==SQLITE_OK ) rc = sqlite3Fts5IndexInit(db); Index: ext/fts5/fts5_storage.c ================================================================== --- ext/fts5/fts5_storage.c +++ ext/fts5/fts5_storage.c @@ -357,19 +357,22 @@ /* ** Tokenization callback used when inserting tokens into the FTS index. */ static int fts5StorageInsertCallback( void *pContext, /* Pointer to Fts5InsertCtx object */ + int tflags, const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ int iEnd /* End offset of token */ ){ Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext; Fts5Index *pIdx = pCtx->pStorage->pIndex; - int iPos = pCtx->szCol++; - return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, iPos, pToken, nToken); + if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){ + pCtx->szCol++; + } + return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, pCtx->szCol-1, pToken, nToken); } /* ** If a row with rowid iDel is present in the %_content table, add the ** delete-markers to the FTS index necessary to delete it. Do not actually @@ -392,10 +395,11 @@ rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel); for(iCol=1; rc==SQLITE_OK && iCol<=pConfig->nCol; iCol++){ if( pConfig->abUnindexed[iCol-1] ) continue; ctx.szCol = 0; rc = sqlite3Fts5Tokenize(pConfig, + FTS5_TOKENIZE_DOCUMENT, (const char*)sqlite3_column_text(pSeek, iCol), sqlite3_column_bytes(pSeek, iCol), (void*)&ctx, fts5StorageInsertCallback ); @@ -449,26 +453,11 @@ ** occurs. */ static int fts5StorageLoadTotals(Fts5Storage *p, int bCache){ int rc = SQLITE_OK; if( p->bTotalsValid==0 ){ - int nCol = p->pConfig->nCol; - Fts5Buffer buf; - memset(&buf, 0, sizeof(buf)); - - memset(p->aTotalSize, 0, sizeof(i64) * nCol); - p->nTotalRow = 0; - rc = sqlite3Fts5IndexGetAverages(p->pIndex, &buf); - if( rc==SQLITE_OK && buf.n ){ - int i = 0; - int iCol; - i += fts5GetVarint(&buf.p[i], (u64*)&p->nTotalRow); - for(iCol=0; iaTotalSize[iCol]); - } - } - sqlite3_free(buf.p); + rc = sqlite3Fts5IndexGetAverages(p->pIndex, &p->nTotalRow, p->aTotalSize); p->bTotalsValid = bCache; } return rc; } @@ -563,10 +552,11 @@ rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel); for(iCol=0; rc==SQLITE_OK && iColnCol; iCol++){ if( pConfig->abUnindexed[iCol] ) continue; ctx.szCol = 0; rc = sqlite3Fts5Tokenize(pConfig, + FTS5_TOKENIZE_DOCUMENT, (const char*)sqlite3_value_text(apVal[iCol]), sqlite3_value_bytes(apVal[iCol]), (void*)&ctx, fts5StorageInsertCallback ); @@ -652,10 +642,11 @@ rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iRowid); for(ctx.iCol=0; rc==SQLITE_OK && ctx.iColnCol; ctx.iCol++){ ctx.szCol = 0; if( pConfig->abUnindexed[ctx.iCol]==0 ){ rc = sqlite3Fts5Tokenize(pConfig, + FTS5_TOKENIZE_DOCUMENT, (const char*)sqlite3_column_text(pScan, ctx.iCol+1), sqlite3_column_bytes(pScan, ctx.iCol+1), (void*)&ctx, fts5StorageInsertCallback ); @@ -769,10 +760,11 @@ } for(ctx.iCol=0; rc==SQLITE_OK && ctx.iColnCol; ctx.iCol++){ ctx.szCol = 0; if( pConfig->abUnindexed[ctx.iCol]==0 ){ rc = sqlite3Fts5Tokenize(pConfig, + FTS5_TOKENIZE_DOCUMENT, (const char*)sqlite3_value_text(apVal[ctx.iCol+2]), sqlite3_value_bytes(apVal[ctx.iCol+2]), (void*)&ctx, fts5StorageInsertCallback ); @@ -836,19 +828,22 @@ /* ** Tokenization callback used by integrity check. */ static int fts5StorageIntegrityCallback( void *pContext, /* Pointer to Fts5InsertCtx object */ + int tflags, const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ int iEnd /* End offset of token */ ){ Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext; - int iPos = pCtx->szCol++; + if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){ + pCtx->szCol++; + } pCtx->cksum ^= sqlite3Fts5IndexCksum( - pCtx->pConfig, pCtx->iRowid, pCtx->iCol, iPos, pToken, nToken + pCtx->pConfig, pCtx->iRowid, pCtx->iCol, pCtx->szCol-1, pToken, nToken ); return SQLITE_OK; } /* @@ -879,23 +874,27 @@ int rc2; while( SQLITE_ROW==sqlite3_step(pScan) ){ int i; ctx.iRowid = sqlite3_column_int64(pScan, 0); ctx.szCol = 0; - rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize); + if( pConfig->bColumnsize ){ + rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize); + } for(i=0; rc==SQLITE_OK && inCol; i++){ if( pConfig->abUnindexed[i] ) continue; ctx.iCol = i; ctx.szCol = 0; - rc = sqlite3Fts5Tokenize( - pConfig, + rc = sqlite3Fts5Tokenize(pConfig, + FTS5_TOKENIZE_DOCUMENT, (const char*)sqlite3_column_text(pScan, i+1), sqlite3_column_bytes(pScan, i+1), (void*)&ctx, fts5StorageIntegrityCallback ); - if( ctx.szCol!=aColSize[i] ) rc = FTS5_CORRUPT; + if( pConfig->bColumnsize && ctx.szCol!=aColSize[i] ){ + rc = FTS5_CORRUPT; + } aTotalSize[i] += ctx.szCol; } if( rc!=SQLITE_OK ) break; } rc2 = sqlite3_reset(pScan); @@ -916,11 +915,11 @@ if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){ i64 nRow; rc = fts5StorageCount(p, "content", &nRow); if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT; } - if( rc==SQLITE_OK ){ + if( rc==SQLITE_OK && pConfig->bColumnsize ){ i64 nRow; rc = fts5StorageCount(p, "docsize", &nRow); if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT; } @@ -1000,13 +999,16 @@ ** ** An SQLite error code is returned if an error occurs, or SQLITE_OK ** otherwise. */ int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol){ - int nCol = p->pConfig->nCol; - sqlite3_stmt *pLookup = 0; - int rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP_DOCSIZE, &pLookup, 0); + int nCol = p->pConfig->nCol; /* Number of user columns in table */ + sqlite3_stmt *pLookup = 0; /* Statement to query %_docsize */ + int rc; /* Return Code */ + + assert( p->pConfig->bColumnsize ); + rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP_DOCSIZE, &pLookup, 0); if( rc==SQLITE_OK ){ int bCorrupt = 1; sqlite3_bind_int64(pLookup, 1, iRowid); if( SQLITE_ROW==sqlite3_step(pLookup) ){ const u8 *aBlob = sqlite3_column_blob(pLookup, 0); Index: ext/fts5/fts5_tcl.c ================================================================== --- ext/fts5/fts5_tcl.c +++ ext/fts5/fts5_tcl.c @@ -139,10 +139,11 @@ Tcl_Obj *pObj; }; static int xTokenizeCb( void *pCtx, + int tflags, const char *zToken, int nToken, int iStart, int iEnd ){ F5tFunction *p = (F5tFunction*)pCtx; Tcl_Obj *pEval = Tcl_DuplicateObj(p->pScript); @@ -582,10 +583,11 @@ const char *zInput; }; static int xTokenizeCb2( void *pCtx, + int tflags, const char *zToken, int nToken, int iStart, int iEnd ){ F5tTokenizeCtx *p = (F5tTokenizeCtx*)pCtx; if( p->bSubst ){ @@ -664,11 +666,13 @@ pRet = Tcl_NewObj(); Tcl_IncrRefCount(pRet); ctx.bSubst = (objc==5); ctx.pRet = pRet; ctx.zInput = zText; - rc = tokenizer.xTokenize(pTok, (void*)&ctx, zText, nText, xTokenizeCb2); + rc = tokenizer.xTokenize( + pTok, (void*)&ctx, FTS5_TOKENIZE_DOCUMENT, zText, nText, xTokenizeCb2 + ); tokenizer.xDelete(pTok); if( rc!=SQLITE_OK ){ Tcl_AppendResult(interp, "error in tokenizer.xTokenize()", 0); Tcl_DecrRefCount(pRet); return TCL_ERROR; @@ -686,19 +690,25 @@ */ typedef struct F5tTokenizerContext F5tTokenizerContext; typedef struct F5tTokenizerCb F5tTokenizerCb; typedef struct F5tTokenizerModule F5tTokenizerModule; -typedef struct F5tTokenizerModule F5tTokenizerInstance; +typedef struct F5tTokenizerInstance F5tTokenizerInstance; struct F5tTokenizerContext { void *pCtx; - int (*xToken)(void*, const char*, int, int, int); + int (*xToken)(void*, int, const char*, int, int, int); }; struct F5tTokenizerModule { Tcl_Interp *interp; + Tcl_Obj *pScript; + F5tTokenizerContext *pContext; +}; + +struct F5tTokenizerInstance { + Tcl_Interp *interp; Tcl_Obj *pScript; F5tTokenizerContext *pContext; }; static int f5tTokenizerCreate( @@ -746,39 +756,66 @@ } static int f5tTokenizerTokenize( Fts5Tokenizer *p, void *pCtx, + int flags, const char *pText, int nText, - int (*xToken)(void*, const char*, int, int, int) + int (*xToken)(void*, int, const char*, int, int, int) ){ F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p; void *pOldCtx; - int (*xOldToken)(void*, const char*, int, int, int); + int (*xOldToken)(void*, int, const char*, int, int, int); Tcl_Obj *pEval; int rc; + const char *zFlags; pOldCtx = pInst->pContext->pCtx; xOldToken = pInst->pContext->xToken; + pInst->pContext->pCtx = pCtx; + pInst->pContext->xToken = xToken; + + assert( + flags==FTS5_TOKENIZE_DOCUMENT + || flags==FTS5_TOKENIZE_AUX + || flags==FTS5_TOKENIZE_QUERY + || flags==(FTS5_TOKENIZE_QUERY | FTS5_TOKENIZE_PREFIX) + ); pEval = Tcl_DuplicateObj(pInst->pScript); Tcl_IncrRefCount(pEval); - rc = Tcl_ListObjAppendElement( - pInst->interp, pEval, Tcl_NewStringObj(pText, nText) - ); - if( rc==TCL_OK ){ - rc = Tcl_EvalObjEx(pInst->interp, pEval, TCL_GLOBAL_ONLY); + switch( flags ){ + case FTS5_TOKENIZE_DOCUMENT: + zFlags = "document"; + break; + case FTS5_TOKENIZE_AUX: + zFlags = "aux"; + break; + case FTS5_TOKENIZE_QUERY: + zFlags = "query"; + break; + case (FTS5_TOKENIZE_PREFIX | FTS5_TOKENIZE_QUERY): + zFlags = "prefixquery"; + break; + default: + assert( 0 ); + zFlags = "invalid"; + break; } + + Tcl_ListObjAppendElement(pInst->interp, pEval, Tcl_NewStringObj(zFlags, -1)); + Tcl_ListObjAppendElement(pInst->interp, pEval, Tcl_NewStringObj(pText,nText)); + rc = Tcl_EvalObjEx(pInst->interp, pEval, TCL_GLOBAL_ONLY); Tcl_DecrRefCount(pEval); pInst->pContext->pCtx = pOldCtx; pInst->pContext->xToken = xOldToken; return rc; } /* -** sqlite3_fts5_token TEXT START END POS +** sqlite3_fts5_token ?-colocated? TEXT START END */ static int f5tTokenizerReturn( void * clientData, Tcl_Interp *interp, int objc, @@ -786,35 +823,47 @@ ){ F5tTokenizerContext *p = (F5tTokenizerContext*)clientData; int iStart; int iEnd; int nToken; + int tflags = 0; char *zToken; int rc; - assert( p ); - if( objc!=4 ){ - Tcl_WrongNumArgs(interp, 1, objv, "TEXT START END"); + if( objc==5 ){ + int nArg; + char *zArg = Tcl_GetStringFromObj(objv[1], &nArg); + if( nArg<=10 && nArg>=2 && memcmp("-colocated", zArg, nArg)==0 ){ + tflags |= FTS5_TOKEN_COLOCATED; + }else{ + goto usage; + } + }else if( objc!=4 ){ + goto usage; + } + + zToken = Tcl_GetStringFromObj(objv[objc-3], &nToken); + if( Tcl_GetIntFromObj(interp, objv[objc-2], &iStart) + || Tcl_GetIntFromObj(interp, objv[objc-1], &iEnd) + ){ return TCL_ERROR; } + if( p->xToken==0 ){ Tcl_AppendResult(interp, "sqlite3_fts5_token may only be used by tokenizer callback", 0 ); return TCL_ERROR; } - zToken = Tcl_GetStringFromObj(objv[1], &nToken); - if( Tcl_GetIntFromObj(interp, objv[2], &iStart) - || Tcl_GetIntFromObj(interp, objv[3], &iEnd) - ){ - return TCL_ERROR; - } - - rc = p->xToken(p->pCtx, zToken, nToken, iStart, iEnd); + rc = p->xToken(p->pCtx, tflags, zToken, nToken, iStart, iEnd); Tcl_SetResult(interp, (char*)sqlite3ErrName(rc), TCL_VOLATILE); return TCL_OK; + + usage: + Tcl_WrongNumArgs(interp, 1, objv, "?-colocated? TEXT START END"); + return TCL_ERROR; } static void f5tDelTokenizer(void *pCtx){ F5tTokenizerModule *pMod = (F5tTokenizerModule*)pCtx; Tcl_DecrRefCount(pMod->pScript); Index: ext/fts5/fts5_test_mi.c ================================================================== --- ext/fts5/fts5_test_mi.c +++ ext/fts5/fts5_test_mi.c @@ -350,11 +350,11 @@ int nVal, /* Number of values in apVal[] array */ sqlite3_value **apVal /* Array of trailing arguments */ ){ const char *zArg; Fts5MatchinfoCtx *p; - int rc; + int rc = SQLITE_OK; if( nVal>0 ){ zArg = (const char*)sqlite3_value_text(apVal[0]); }else{ zArg = "pcx"; @@ -361,15 +361,20 @@ } p = (Fts5MatchinfoCtx*)pApi->xGetAuxdata(pFts, 0); if( p==0 || sqlite3_stricmp(zArg, p->zArg) ){ p = fts5MatchinfoNew(pApi, pFts, pCtx, zArg); - pApi->xSetAuxdata(pFts, p, sqlite3_free); - if( p==0 ) return; + if( p==0 ){ + rc = SQLITE_NOMEM; + }else{ + rc = pApi->xSetAuxdata(pFts, p, sqlite3_free); + } } - rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoLocalCb); + if( rc==SQLITE_OK ){ + rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoLocalCb); + } if( rc!=SQLITE_OK ){ sqlite3_result_error_code(pCtx, rc); }else{ /* No errors has occured, so return a copy of the array of integers. */ int nByte = p->nRet * sizeof(u32); Index: ext/fts5/fts5_tokenize.c ================================================================== --- ext/fts5/fts5_tokenize.c +++ ext/fts5/fts5_tokenize.c @@ -114,12 +114,13 @@ ** Tokenize some text using the ascii tokenizer. */ static int fts5AsciiTokenize( Fts5Tokenizer *pTokenizer, void *pCtx, + int flags, const char *pText, int nText, - int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd) + int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd) ){ AsciiTokenizer *p = (AsciiTokenizer*)pTokenizer; int rc = SQLITE_OK; int ie; int is = 0; @@ -156,11 +157,11 @@ nFold = nByte*2; } asciiFold(pFold, &pText[is], nByte); /* Invoke the token callback */ - rc = xToken(pCtx, pFold, nByte, is, ie); + rc = xToken(pCtx, 0, pFold, nByte, is, ie); is = ie+1; } if( pFold!=aFold ) sqlite3_free(pFold); if( rc==SQLITE_DONE ) rc = SQLITE_OK; @@ -383,12 +384,13 @@ } static int fts5UnicodeTokenize( Fts5Tokenizer *pTokenizer, void *pCtx, + int flags, const char *pText, int nText, - int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd) + int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd) ){ Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTokenizer; int rc = SQLITE_OK; unsigned char *a = p->aTokenChar; @@ -473,11 +475,11 @@ } ie = zCsr - (unsigned char*)pText; } /* Invoke the token callback */ - rc = xToken(pCtx, aFold, zOut-aFold, is, ie); + rc = xToken(pCtx, 0, aFold, zOut-aFold, is, ie); } tokenize_done: if( rc==SQLITE_DONE ) rc = SQLITE_OK; return rc; @@ -551,11 +553,11 @@ } typedef struct PorterContext PorterContext; struct PorterContext { void *pCtx; - int (*xToken)(void*, const char*, int, int, int); + int (*xToken)(void*, int, const char*, int, int, int); char *aBuf; }; typedef struct PorterRule PorterRule; struct PorterRule { @@ -1116,10 +1118,11 @@ } } static int fts5PorterCb( void *pCtx, + int tflags, const char *pToken, int nToken, int iStart, int iEnd ){ @@ -1173,32 +1176,33 @@ && aBuf[nBuf-2]=='l' && fts5Porter_MGt1(aBuf, nBuf-1) ){ nBuf--; } - return p->xToken(p->pCtx, aBuf, nBuf, iStart, iEnd); + return p->xToken(p->pCtx, tflags, aBuf, nBuf, iStart, iEnd); pass_through: - return p->xToken(p->pCtx, pToken, nToken, iStart, iEnd); + return p->xToken(p->pCtx, tflags, pToken, nToken, iStart, iEnd); } /* ** Tokenize using the porter tokenizer. */ static int fts5PorterTokenize( Fts5Tokenizer *pTokenizer, void *pCtx, + int flags, const char *pText, int nText, - int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd) + int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd) ){ PorterTokenizer *p = (PorterTokenizer*)pTokenizer; PorterContext sCtx; sCtx.xToken = xToken; sCtx.pCtx = pCtx; sCtx.aBuf = p->aBuf; return p->tokenizer.xTokenize( - p->pTokenizer, (void*)&sCtx, pText, nText, fts5PorterCb + p->pTokenizer, (void*)&sCtx, flags, pText, nText, fts5PorterCb ); } /* ** Register all built-in tokenizers with FTS5. @@ -1223,9 +1227,9 @@ &aBuiltin[i].x, 0 ); } - return SQLITE_OK; + return rc; } Index: ext/fts5/test/fts5_common.tcl ================================================================== --- ext/fts5/test/fts5_common.tcl +++ ext/fts5/test/fts5_common.tcl @@ -292,6 +292,39 @@ } proc NOT {a b} { if {[llength $b]>0} { return [list] } return $a } + +#------------------------------------------------------------------------- +# This command is similar to [split], except that it also provides the +# start and end offsets of each token. For example: +# +# [fts5_tokenize_split "abc d ef"] -> {abc 0 3 d 4 5 ef 6 8} +# + +proc gobble_whitespace {textvar} { + upvar $textvar t + regexp {([ ]*)(.*)} $t -> space t + return [string length $space] +} + +proc gobble_text {textvar wordvar} { + upvar $textvar t + upvar $wordvar w + regexp {([^ ]*)(.*)} $t -> w t + return [string length $w] +} + +proc fts5_tokenize_split {text} { + set token "" + set ret [list] + set iOff [gobble_whitespace text] + while {[set nToken [gobble_text text word]]} { + lappend ret $word $iOff [expr $iOff+$nToken] + incr iOff $nToken + incr iOff [gobble_whitespace text] + } + + set ret +} Index: ext/fts5/test/fts5aa.test ================================================================== --- ext/fts5/test/fts5aa.test +++ ext/fts5/test/fts5aa.test @@ -341,11 +341,11 @@ do_execsql_test 13.5 { SELECT rowid FROM t1 WHERE t1 MATCH 'o'; } {1} do_execsql_test 13.6 { - SELECT rowid FROM t1 WHERE t1 MATCH '.'; + SELECT rowid FROM t1 WHERE t1 MATCH '""'; } {} #------------------------------------------------------------------------- # reset_db @@ -503,9 +503,39 @@ SELECT t1.rowid, t2.rowid FROM t1, t2 WHERE t2 MATCH t1.a AND t1.rowid = t2.c } {1 1} do_execsql_test 18.3 { SELECT t1.rowid, t2.rowid FROM t2, t1 WHERE t2 MATCH t1.a AND t1.rowid = t2.c } {1 1} + +#-------------------------------------------------------------------- +# fts5 table in the temp schema. +# +reset_db +do_execsql_test 19.0 { + CREATE VIRTUAL TABLE temp.t1 USING fts5(x); + INSERT INTO t1 VALUES('x y z'); + INSERT INTO t1 VALUES('w x 1'); + SELECT rowid FROM t1 WHERE t1 MATCH 'x'; +} {1 2} + +#-------------------------------------------------------------------- +# Test that 6 and 7 byte varints can be read. +# +reset_db +do_execsql_test 20.0 { + CREATE VIRTUAL TABLE temp.tmp USING fts5(x); +} +set ::ids [list \ + 0 [expr 1<<36] [expr 2<<36] [expr 1<<43] [expr 2<<43] +] +do_test 20.1 { + foreach id $::ids { + execsql { INSERT INTO tmp(rowid, x) VALUES($id, 'x y z') } + } + execsql { SELECT rowid FROM tmp WHERE tmp MATCH 'y' } +} $::ids + + finish_test Index: ext/fts5/test/fts5columnsize.test ================================================================== --- ext/fts5/test/fts5columnsize.test +++ ext/fts5/test/fts5columnsize.test @@ -132,7 +132,20 @@ SELECT rowid, fts5_test_columnsize(t4) FROM t4 WHERE t4 MATCH 'a' } { 1 {-1 0 -1} 2 {-1 0 -1} } +#------------------------------------------------------------------------- +# Test the integrity-check +# +do_execsql_test 4.1.1 { + CREATE VIRTUAL TABLE t5 USING fts5(x, columnsize=0); + INSERT INTO t5 VALUES('1 2 3 4'); + INSERT INTO t5 VALUES('2 4 6 8'); +} + +breakpoint +do_execsql_test 4.1.2 { + INSERT INTO t5(t5) VALUES('integrity-check'); +} finish_test Index: ext/fts5/test/fts5ea.test ================================================================== --- ext/fts5/test/fts5ea.test +++ ext/fts5/test/fts5ea.test @@ -85,9 +85,15 @@ # do_execsql_test 4.0 { SELECT fts5_expr('a AND """"', 'x', 'tokenize="unicode61 tokenchars ''""''"'); } {{"a" AND """"}} +#------------------------------------------------------------------------- +# Experiment with a tokenizer that considers " to be a token character. +# +do_catchsql_test 5.0 { + SELECT fts5_expr('abc | def'); +} {1 {fts5: syntax error near "|"}} finish_test Index: ext/fts5/test/fts5eb.test ================================================================== --- ext/fts5/test/fts5eb.test +++ ext/fts5/test/fts5eb.test @@ -28,22 +28,22 @@ set ::se_expr $expr do_execsql_test $tn {SELECT fts5_expr($se_expr)} [list $res] } foreach {tn expr res} { - 1 {abc} {"abc"} - 2 {abc .} {"abc"} - 3 {.} {} - 4 {abc OR .} {"abc"} - 5 {abc NOT .} {"abc"} - 6 {abc AND .} {"abc"} - 7 {. OR abc} {"abc"} - 8 {. NOT abc} {"abc"} - 9 {. AND abc} {"abc"} - 10 {abc + . + def} {"abc" + "def"} - 11 {abc . def} {"abc" AND "def"} - 12 {r+e OR w} {"r" + "e" OR "w"} + 1 {abc} {"abc"} + 2 {abc ""} {"abc"} + 3 {""} {} + 4 {abc OR ""} {"abc"} + 5 {abc NOT ""} {"abc"} + 6 {abc AND ""} {"abc"} + 7 {"" OR abc} {"abc"} + 8 {"" NOT abc} {"abc"} + 9 {"" AND abc} {"abc"} + 10 {abc + "" + def} {"abc" + "def"} + 11 {abc "" def} {"abc" AND "def"} + 12 {r+e OR w} {"r" + "e" OR "w"} } { do_execsql_test 1.$tn {SELECT fts5_expr($expr)} [list $res] } do_catchsql_test 2.1 { Index: ext/fts5/test/fts5fault6.test ================================================================== --- ext/fts5/test/fts5fault6.test +++ ext/fts5/test/fts5fault6.test @@ -19,10 +19,11 @@ # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } + #------------------------------------------------------------------------- # OOM while rebuilding an FTS5 table. # do_execsql_test 1.0 { @@ -146,7 +147,151 @@ } } -test { faultsim_test_result {0 {}} } +#------------------------------------------------------------------------- +# +# 5.2.* OOM while running a query that includes synonyms and matchinfo(). +# +# 5.3.* OOM while running a query that returns a row containing instances +# of more than 4 synonyms for a single term. +# +proc mit {blob} { + set scan(littleEndian) i* + set scan(bigEndian) I* + binary scan $blob $scan($::tcl_platform(byteOrder)) r + return $r +} +proc tcl_tokenize {tflags text} { + foreach {w iStart iEnd} [fts5_tokenize_split $text] { + sqlite3_fts5_token $w $iStart $iEnd + if {$tflags=="query" && [string length $w]==1} { + for {set i 2} {$i < 7} {incr i} { + sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd + } + } + } +} +proc tcl_create {args} { return "tcl_tokenize" } +reset_db +sqlite3_fts5_create_tokenizer db tcl tcl_create +db func mit mit +sqlite3_fts5_register_matchinfo db +do_test 5.0 { + execsql { CREATE VIRTUAL TABLE t1 USING fts5(a, tokenize=tcl) } + execsql { INSERT INTO t1(t1, rank) VALUES('pgsz', 32) } + foreach {rowid text} { + 1 {aaaa cc b aaaaa cc aa} + 2 {aa aa bb a bbb} + 3 {bb aaaaa aaaaa b aaaa aaaaa} + 4 {aa a b aaaa aa} + 5 {aa b ccc aaaaa cc} + 6 {aa aaaaa bbbb cc aaa} + 7 {aaaaa aa aa ccccc bb} + 8 {ccc bbbbb ccccc bbb c} + 9 {cccccc bbbb a aaa cccc c} + + 20 {ddd f ddd eeeee fff ffff eeee ddd fff eeeee dddddd eeee} + 21 {fffff eee dddd fffff dd ee ee eeeee eee eeeeee ee dd e} + 22 {fffff d eeee dddd fffff dddddd ffff ddddd eeeee ee eee dddd ddddd} + 23 {ddddd fff ddd eeeee ffff eeee ddd ff ff ffffff eeeeee dddd ffffff} + 24 {eee dd ee dddd dddd eeeeee e eee fff ffff} + 25 {ddddd ffffff dddddd fff ddd ddddd ddd f eeee fff dddd f} + 26 {f ffff fff fff eeeeee dddd d dddddd ddddd eee ff eeeee} + 27 {eee fff dddddd eeeee eeeee dddd ddddd ffff f eeeee eee dddddd ddddd d} + 28 {dd ddddd d ddd d fff d dddd ee dddd ee ddd dddddd dddddd} + 29 {eeee dddd ee dddd eeee dddd dd fffff f ddd eeeee ddd ee} + 30 {ff ffffff eeeeee eeeee eee ffffff ff ffff f fffff eeeee} + 31 {fffff eeeeee dddd eeee eeee eeeeee eee fffff d ddddd ffffff ffff dddddd} + 32 {dddddd fffff ee eeeeee eeee ee fff dddd fff eeee ffffff eeeeee ffffff} + 33 {ddddd eeee dd ffff dddddd fff eeee ddddd ffff eeee ddd} + 34 {ee dddd ddddd dddddd eeee eeeeee f dd ee dddddd ffffff} + 35 {ee dddd dd eeeeee ddddd eee d eeeeee dddddd eee dddd fffff} + 36 {eee ffffff ffffff e fffff eeeee ff dddddd dddddd fff} + 37 {eeeee fffff dddddd dddd ffffff fff f dd ee dd dd eeeee} + 38 {eeeeee ee d ff eeeeee eeeeee eee eeeee ee ffffff dddd eeee dddddd ee} + 39 {eeeeee ddd fffff e dddd ee eee eee ffffff ee f d dddd} + 40 {ffffff dddddd eee ee ffffff eee eeee ddddd ee eeeeee f} + 41 {ddd ddd fff fffff ee fffff f fff ddddd fffff} + 42 {dddd ee ff d f ffffff fff ffffff ff dd dddddd f eeee} + 43 {d dd fff fffff d f fff e dddd ee ee} + 44 {ff ffff eee ddd d dd ffff dddd d eeee d eeeeee} + 45 {eeee f eeeee ee e ffff f ddd e fff} + 46 {ffff d ffff eeee ffff eeeee f ffff ddddd eee} + 47 {dd dd dddddd ddddd fffff dddddd ddd ddddd eeeeee ffff eeee eee ee} + 48 {ffff ffff e dddd ffffff dd dd dddd f fffff} + 49 {ffffff d dddddd ffff eeeee f ffff ffff d dd fffff eeeee} + + 50 {x e} + } { + execsql { INSERT INTO t1(rowid, a) VALUES($rowid, $text) } + } +} {} + +set res [list {*}{ + 1 {3 24 8 2 12 6} + 5 {2 24 8 2 12 6} + 6 {3 24 8 1 12 6} + 7 {3 24 8 1 12 6} + 9 {2 24 8 3 12 6} +}] +do_execsql_test 5.1.1 { + SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH 'a AND c' +} $res +do_execsql_test 5.1.2 { + SELECT count(*) FROM t1 WHERE t1 MATCH 'd e f' +} 29 + +faultsim_save_and_close +do_faultsim_test 5.2 -faults oom* -prep { + faultsim_restore_and_reopen + sqlite3_fts5_create_tokenizer db tcl tcl_create + sqlite3_fts5_register_matchinfo db + db func mit mit +} -body { + db eval { + SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH 'a AND c' + } +} -test { + faultsim_test_result [list 0 $::res] +} + +do_faultsim_test 5.3 -faults oom* -prep { + faultsim_restore_and_reopen + sqlite3_fts5_create_tokenizer db tcl tcl_create +} -body { + db eval { + SELECT count(*) FROM t1 WHERE t1 MATCH 'd AND e AND f' + } +} -test { + faultsim_test_result {0 29} +} + +do_faultsim_test 5.4 -faults oom* -prep { + faultsim_restore_and_reopen + sqlite3_fts5_create_tokenizer db tcl tcl_create +} -body { + db eval { + SELECT count(*) FROM t1 WHERE t1 MATCH 'x + e' + } +} -test { + faultsim_test_result {0 1} +} + +#------------------------------------------------------------------------- +catch { db close } +breakpoint +do_faultsim_test 6 -faults oom* -prep { + sqlite_orig db test.db + sqlite3_db_config_lookaside db 0 0 0 +} -body { + load_static_extension db fts5 +} -test { + faultsim_test_result {0 {}} {1 {initialization of fts5 failed: }} + if {$testrc==0} { + db eval { CREATE VIRTUAL TABLE temp.t1 USING fts5(x) } + } + db close +} finish_test ADDED ext/fts5/test/fts5fault7.test Index: ext/fts5/test/fts5fault7.test ================================================================== --- /dev/null +++ ext/fts5/test/fts5fault7.test @@ -0,0 +1,45 @@ +# 2015 September 3 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# +# This file is focused on OOM errors. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +source $testdir/malloc_common.tcl +set testprefix fts5fault2 + +# If SQLITE_ENABLE_FTS3 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +#------------------------------------------------------------------------- +# Test fault-injection on a query that uses xColumnSize() on columnsize=0 +# table. +# +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize=0); + INSERT INTO t1 VALUES('a b c d e f g'); + INSERT INTO t1 VALUES('a b c d'); + INSERT INTO t1 VALUES('a b c d e f g h i j'); +} + + +fts5_aux_test_functions db +do_faultsim_test 1 -faults oom* -body { + execsql { SELECT fts5_test_columnsize(t1) FROM t1 WHERE t1 MATCH 'b' } +} -test { + faultsim_test_result {0 {7 4 10}} {1 SQLITE_NOMEM} +} + +finish_test + Index: ext/fts5/test/fts5matchinfo.test ================================================================== --- ext/fts5/test/fts5matchinfo.test +++ ext/fts5/test/fts5matchinfo.test @@ -353,14 +353,14 @@ } {1 1 one 2 2 two 3 3 three} #--------------------------------------------------------------------------- # Test the 'y' matchinfo flag # -set sqlite_fts3_enable_parentheses 1 reset_db +sqlite3_fts5_register_matchinfo db do_execsql_test 11.0 { - CREATE VIRTUAL TABLE tt USING fts3(x, y); + CREATE VIRTUAL TABLE tt USING fts5(x, y); INSERT INTO tt VALUES('c d a c d d', 'e a g b d a'); -- 1 INSERT INTO tt VALUES('c c g a e b', 'c g d g e c'); -- 2 INSERT INTO tt VALUES('b e f d e g', 'b a c b c g'); -- 3 INSERT INTO tt VALUES('a c f f g d', 'd b f d e g'); -- 4 INSERT INTO tt VALUES('g a c f c f', 'd g g b c c'); -- 5 @@ -430,28 +430,26 @@ do_execsql_test 11.1.$tn.2 { SELECT rowid, mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH $expr } $r2 } -set sqlite_fts3_enable_parentheses 0 #--------------------------------------------------------------------------- # Test the 'b' matchinfo flag # -set sqlite_fts3_enable_parentheses 1 reset_db +sqlite3_fts5_register_matchinfo db db func mit mit do_test 12.0 { set cols [list] for {set i 0} {$i < 50} {incr i} { lappend cols "c$i" } - execsql "CREATE VIRTUAL TABLE tt USING fts3([join $cols ,])" + execsql "CREATE VIRTUAL TABLE tt USING fts5([join $cols ,])" } {} do_execsql_test 12.1 { INSERT INTO tt (rowid, c4, c45) VALUES(1, 'abc', 'abc'); SELECT mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH 'abc'; } [list [list [expr 1<<4] [expr 1<<(45-32)]]] -set sqlite_fts3_enable_parentheses 0 finish_test ADDED ext/fts5/test/fts5synonym.test Index: ext/fts5/test/fts5synonym.test ================================================================== --- /dev/null +++ ext/fts5/test/fts5synonym.test @@ -0,0 +1,460 @@ +# 2014 Dec 20 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Tests focusing on custom tokenizers that support synonyms. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5synonym + +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +foreach S { + {zero 0} + {one 1 i} + {two 2 ii} + {three 3 iii} + {four 4 iv} + {five 5 v} + {six 6 vi} + {seven 7 vii} + {eight 8 viii} + {nine 9 ix} +} { + foreach s $S { + set o [list] + foreach x $S {if {$x!=$s} {lappend o $x}} + set ::syn($s) $o + } +} + +proc tcl_tokenize {tflags text} { + foreach {w iStart iEnd} [fts5_tokenize_split $text] { + sqlite3_fts5_token $w $iStart $iEnd + } +} + +proc tcl_create {args} { + return "tcl_tokenize" +} + +sqlite3_fts5_create_tokenizer db tcl tcl_create + +#------------------------------------------------------------------------- +# Warm body test for the code in fts5_tcl.c. +# +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl); + INSERT INTO ft VALUES('abc def ghi'); + INSERT INTO ft VALUES('jkl mno pqr'); + SELECT rowid, x FROM ft WHERE ft MATCH 'def'; + SELECT x, rowid FROM ft WHERE ft MATCH 'pqr'; +} {1 {abc def ghi} {jkl mno pqr} 2} + +#------------------------------------------------------------------------- +# Test a tokenizer that supports synonyms by adding extra entries to the +# FTS index. +# + +proc tcl_tokenize {tflags text} { + foreach {w iStart iEnd} [fts5_tokenize_split $text] { + sqlite3_fts5_token $w $iStart $iEnd + if {$tflags=="document" && [info exists ::syn($w)]} { + foreach s $::syn($w) { + sqlite3_fts5_token -colo $s $iStart $iEnd + } + } + } +} +reset_db +sqlite3_fts5_create_tokenizer db tcl tcl_create + +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl); + INSERT INTO ft VALUES('one two three'); + INSERT INTO ft VALUES('four five six'); + INSERT INTO ft VALUES('eight nine ten'); +} {} + +foreach {tn expr res} { + 1 "3" 1 + 2 "eight OR 8 OR 5" {2 3} + 3 "10" {} + 4 "1*" {1} + 5 "1 + 2" {1} +} { + do_execsql_test 2.1.$tn { + SELECT rowid FROM ft WHERE ft MATCH $expr + } $res +} + +#------------------------------------------------------------------------- +# Test some broken tokenizers: +# +# 3.1.*: A tokenizer that declares the very first token to be colocated. +# +# 3.2.*: A tokenizer that reports two identical tokens at the same position. +# This is allowed. +# +reset_db +sqlite3_fts5_create_tokenizer db tcl tcl_create +proc tcl_tokenize {tflags text} { + set bColo 1 + foreach {w iStart iEnd} [fts5_tokenize_split $text] { + if {$bColo} { + sqlite3_fts5_token -colo $w $iStart $iEnd + set bColo 0 + } { + sqlite3_fts5_token $w $iStart $iEnd + } + } +} +do_execsql_test 3.1.0 { + CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl); + INSERT INTO ft VALUES('one two three'); + CREATE VIRTUAL TABLE vv USING fts5vocab(ft, row); + SELECT * FROM vv; +} { + one 1 1 three 1 1 two 1 1 +} + +do_execsql_test 3.1.1 { + INSERT INTO ft(ft) VALUES('integrity-check'); +} {} + +proc tcl_tokenize {tflags text} { + foreach {w iStart iEnd} [fts5_tokenize_split $text] { + sqlite3_fts5_token $w $iStart $iEnd + } +} + +do_execsql_test 3.1.2 { + SELECT rowid FROM ft WHERE ft MATCH 'one two three' +} {1} + +reset_db +sqlite3_fts5_create_tokenizer db tcl tcl_create +proc tcl_tokenize {tflags text} { + foreach {w iStart iEnd} [fts5_tokenize_split $text] { + sqlite3_fts5_token $w $iStart $iEnd + sqlite3_fts5_token -colo $w $iStart $iEnd + } +} +do_execsql_test 3.2.0 { + CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl); + INSERT INTO ft VALUES('one one two three'); + CREATE VIRTUAL TABLE vv USING fts5vocab(ft, row); + SELECT * FROM vv; +} { + one 1 4 three 1 2 two 1 2 +} +do_execsql_test 3.2.1 { + SELECT rowid FROM ft WHERE ft MATCH 'one'; +} {1} +do_execsql_test 3.2.2 { + SELECT rowid FROM ft WHERE ft MATCH 'one two three'; +} {1} +do_execsql_test 3.2.3 { + SELECT rowid FROM ft WHERE ft MATCH 'one + one + two + three'; +} {1} +do_execsql_test 3.2.4 { + SELECT rowid FROM ft WHERE ft MATCH 'one two two three'; +} {1} +do_execsql_test 3.2.5 { + SELECT rowid FROM ft WHERE ft MATCH 'one + two + two + three'; +} {} + +#------------------------------------------------------------------------- +# Check that expressions with synonyms can be parsed and executed. +# +reset_db +sqlite3_fts5_create_tokenizer db tcl tcl_create +proc tcl_tokenize {tflags text} { + foreach {w iStart iEnd} [fts5_tokenize_split $text] { + sqlite3_fts5_token $w $iStart $iEnd + if {$tflags=="query" && [info exists ::syn($w)]} { + foreach s $::syn($w) { + sqlite3_fts5_token -colo $s $iStart $iEnd + } + } + } +} + +foreach {tn expr res} { + 1 {abc} {"abc"} + 2 {one} {"one"|"i"|"1"} + 3 {3} {"3"|"iii"|"three"} + 4 {3*} {"3"|"iii"|"three" *} +} { + do_execsql_test 4.1.$tn {SELECT fts5_expr($expr, 'tokenize=tcl')} [list $res] +} + +do_execsql_test 4.2.1 { + CREATE VIRTUAL TABLE xx USING fts5(x, tokenize=tcl); + INSERT INTO xx VALUES('one two'); + INSERT INTO xx VALUES('three four'); +} + +do_execsql_test 4.2.2 { + SELECT rowid FROM xx WHERE xx MATCH '2' +} {1} + +do_execsql_test 4.2.3 { + SELECT rowid FROM xx WHERE xx MATCH '3' +} {2} + +do_test 5.0 { + execsql { + CREATE VIRTUAL TABLE t1 USING fts5(a, b, tokenize=tcl) + } + foreach {rowid a b} { + 1 {four v 4 i three} {1 3 five five 4 one} + 2 {5 1 3 4 i} {2 2 v two 4} + 3 {5 i 5 2 four 4 1} {iii ii five two 1} + 4 {ii four 4 one 5 three five} {one 5 1 iii 4 3} + 5 {three i v i four 4 1} {ii five five five iii} + 6 {4 2 ii two 2 iii} {three 1 four 4 iv 1 iv} + 7 {ii ii two three 2 5} {iii i ii iii iii one one} + 8 {2 ii i two 3 three 2} {two iv v iii 3 five} + 9 {i 2 iv 3 five four v} {iii 4 three i three ii 1} + } { + execsql { INSERT INTO t1(rowid, a, b) VALUES($rowid, $a, $b) } + } +} {} + + +foreach {tn q res} { + 1 {one} { + 1 {four v 4 [i] three} {[1] 3 five five 4 [one]} + 2 {5 [1] 3 4 [i]} {2 2 v two 4} + 3 {5 [i] 5 2 four 4 [1]} {iii ii five two [1]} + 4 {ii four 4 [one] 5 three five} {[one] 5 [1] iii 4 3} + 5 {three [i] v [i] four 4 [1]} {ii five five five iii} + 6 {4 2 ii two 2 iii} {three [1] four 4 iv [1] iv} + 7 {ii ii two three 2 5} {iii [i] ii iii iii [one] [one]} + 8 {2 ii [i] two 3 three 2} {two iv v iii 3 five} + 9 {[i] 2 iv 3 five four v} {iii 4 three [i] three ii [1]} + } + 2 {five four} { + 1 {[four] [v] [4] i three} {1 3 [five] [five] [4] one} + 2 {[5] 1 3 [4] i} {2 2 [v] two [4]} + 3 {[5] i [5] 2 [four] [4] 1} {iii ii [five] two 1} + 4 {ii [four] [4] one [5] three [five]} {one [5] 1 iii [4] 3} + 5 {three i [v] i [four] [4] 1} {ii [five] [five] [five] iii} + 8 {2 ii i two 3 three 2} {two [iv] [v] iii 3 [five]} + 9 {i 2 [iv] 3 [five] [four] [v]} {iii [4] three i three ii 1} + } + 3 {one OR two OR iii OR 4 OR v} { + 1 {[four] [v] [4] [i] [three]} {[1] [3] [five] [five] [4] [one]} + 2 {[5] [1] [3] [4] [i]} {[2] [2] [v] [two] [4]} + 3 {[5] [i] [5] [2] [four] [4] [1]} {[iii] [ii] [five] [two] [1]} + 4 {[ii] [four] [4] [one] [5] [three] [five]} {[one] [5] [1] [iii] [4] [3]} + 5 {[three] [i] [v] [i] [four] [4] [1]} {[ii] [five] [five] [five] [iii]} + 6 {[4] [2] [ii] [two] [2] [iii]} {[three] [1] [four] [4] [iv] [1] [iv]} + 7 {[ii] [ii] [two] [three] [2] [5]} {[iii] [i] [ii] [iii] [iii] [one] [one]} + 8 {[2] [ii] [i] [two] [3] [three] [2]} {[two] [iv] [v] [iii] [3] [five]} + 9 {[i] [2] [iv] [3] [five] [four] [v]} {[iii] [4] [three] [i] [three] [ii] [1]} + } + + 4 {5 + 1} { + 2 {[5 1] 3 4 i} {2 2 v two 4} + 3 {[5 i] 5 2 four 4 1} {iii ii five two 1} + 4 {ii four 4 one 5 three five} {one [5 1] iii 4 3} + 5 {three i [v i] four 4 1} {ii five five five iii} + } + + 5 {one + two + three} { + 7 {ii ii two three 2 5} {iii [i ii iii] iii one one} + 8 {2 ii [i two 3] three 2} {two iv v iii 3 five} + } + + 6 {"v v"} { + 1 {four v 4 i three} {1 3 [five five] 4 one} + 5 {three i v i four 4 1} {ii [five five five] iii} + } +} { + do_execsql_test 5.1.$tn { + SELECT rowid, highlight(t1, 0, '[', ']'), highlight(t1, 1, '[', ']') + FROM t1 WHERE t1 MATCH $q + } $res +} + +# Test that the xQueryPhrase() API works with synonyms. +# +proc mit {blob} { + set scan(littleEndian) i* + set scan(bigEndian) I* + binary scan $blob $scan($::tcl_platform(byteOrder)) r + return $r +} +db func mit mit +sqlite3_fts5_register_matchinfo db + +foreach {tn q res} { + 1 {one} { + 1 {1 11 7 2 12 6} 2 {2 11 7 0 12 6} + 3 {2 11 7 1 12 6} 4 {1 11 7 2 12 6} + 5 {3 11 7 0 12 6} 6 {0 11 7 2 12 6} + 7 {0 11 7 3 12 6} 8 {1 11 7 0 12 6} + 9 {1 11 7 2 12 6} + } +} { + do_execsql_test 5.2.$tn { + SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH $q + } $res +} + + +#------------------------------------------------------------------------- +# Test terms with more than 4 synonyms. +# +reset_db +sqlite3_fts5_create_tokenizer db tcl tcl_create +proc tcl_tokenize {tflags text} { + foreach {w iStart iEnd} [fts5_tokenize_split $text] { + sqlite3_fts5_token $w $iStart $iEnd + if {$tflags=="query" && [string length $w]==1} { + for {set i 2} {$i<=10} {incr i} { + sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd + } + } + } +} + +do_execsql_test 6.0.1 { + CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize=tcl); + INSERT INTO t1 VALUES('yy xx qq'); + INSERT INTO t1 VALUES('yy xx xx'); +} +do_execsql_test 6.0.2 { + SELECT * FROM t1 WHERE t1 MATCH 'NEAR(y q)'; +} {{yy xx qq}} + +do_test 6.0.3 { + execsql { + CREATE VIRTUAL TABLE t2 USING fts5(a, b, tokenize=tcl) + } + foreach {rowid a b} { + 1 {yyyy vvvvv qq oo yyyyyy vvvv eee} {ffff uu r qq aaaa} + 2 {ww oooooo bbbbb ssssss mm} {ffffff yy iiii rr s ccc qqqqq} + 3 {zzzz llll gggggg cccc uu} {hhhhhh aaaa ppppp rr ee jjjj} + 4 {r f i rrrrrr ww hhh} {aa yyy t x aaaaa ii} + 5 {fffff mm vvvv ooo ffffff kkkk tttt} {cccccc bb e zzz d n} + 6 {iii dddd hh qqqq ddd ooo} {ttt d c b aaaaaa qqqq} + 7 {jjjj rrrr v zzzzz u tt t} {ppppp pp dddd mm hhh uuu} + 8 {gggg rrrrrr kkkk vvvv gggg jjjjjj b} {dddddd jj r w cccc wwwwww ss} + 9 {kkkkk qqq oooo e tttttt mmm} {e ss qqqqqq hhhh llllll gg} + } { + execsql { INSERT INTO t2(rowid, a, b) VALUES($rowid, $a, $b) } + } +} {} + +foreach {tn q res} { + 1 {a} { + 1 {yyyy vvvvv qq oo yyyyyy vvvv eee} {ffff uu r qq [aaaa]} + 3 {zzzz llll gggggg cccc uu} {hhhhhh [aaaa] ppppp rr ee jjjj} + 4 {r f i rrrrrr ww hhh} {[aa] yyy t x [aaaaa] ii} + 6 {iii dddd hh qqqq ddd ooo} {ttt d c b [aaaaaa] qqqq} + } + + 2 {a AND q} { + 1 {yyyy vvvvv [qq] oo yyyyyy vvvv eee} {ffff uu r [qq] [aaaa]} + 6 {iii dddd hh [qqqq] ddd ooo} {ttt d c b [aaaaaa] [qqqq]} + } + + 3 {o OR (q AND a)} { + 1 {yyyy vvvvv [qq] [oo] yyyyyy vvvv eee} {ffff uu r [qq] [aaaa]} + 2 {ww [oooooo] bbbbb ssssss mm} {ffffff yy iiii rr s ccc qqqqq} + 5 {fffff mm vvvv [ooo] ffffff kkkk tttt} {cccccc bb e zzz d n} + 6 {iii dddd hh [qqqq] ddd [ooo]} {ttt d c b [aaaaaa] [qqqq]} + 9 {kkkkk qqq [oooo] e tttttt mmm} {e ss qqqqqq hhhh llllll gg} + } + + 4 {NEAR(q y, 20)} { + 1 {[yyyy] vvvvv [qq] oo [yyyyyy] vvvv eee} {ffff uu r qq aaaa} + 2 {ww oooooo bbbbb ssssss mm} {ffffff [yy] iiii rr s ccc [qqqqq]} + } +} { + do_execsql_test 6.1.$tn.asc { + SELECT rowid, highlight(t2, 0, '[', ']'), highlight(t2, 1, '[', ']') + FROM t2 WHERE t2 MATCH $q + } $res + + set res2 [list] + foreach {rowid a b} $res { + set res2 [concat [list $rowid $a $b] $res2] + } + + do_execsql_test 6.1.$tn.desc { + SELECT rowid, highlight(t2, 0, '[', ']'), highlight(t2, 1, '[', ']') + FROM t2 WHERE t2 MATCH $q ORDER BY rowid DESC + } $res2 +} + +do_execsql_test 6.2.1 { + INSERT INTO t2(rowid, a, b) VALUES(13, + 'x xx xxx xxxx xxxxx xxxxxx xxxxxxx', 'y yy yyy yyyy yyyyy yyyyyy yyyyyyy' + ); + SELECT rowid, highlight(t2, 0, '<', '>'), highlight(t2, 1, '(', ')') + FROM t2 WHERE t2 MATCH 'x OR y' +} { + 1 { vvvvv qq oo vvvv eee} {ffff uu r qq aaaa} + 2 {ww oooooo bbbbb ssssss mm} {ffffff (yy) iiii rr s ccc qqqqq} + 4 {r f i rrrrrr ww hhh} {aa (yyy) t (x) aaaaa ii} + 13 { } + {(y) (yy) (yyy) (yyyy) (yyyyy) (yyyyyy) (yyyyyyy)} +} + +#------------------------------------------------------------------------- +# Test that the xColumnSize() API is not confused by colocated tokens. +# +reset_db +sqlite3_fts5_create_tokenizer db tcl tcl_create +fts5_aux_test_functions db +proc tcl_tokenize {tflags text} { + foreach {w iStart iEnd} [fts5_tokenize_split $text] { + sqlite3_fts5_token $w $iStart $iEnd + if {[string length $w]==1} { + for {set i 2} {$i<=10} {incr i} { + sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd + } + } + } +} + +do_execsql_test 7.0.1 { + CREATE VIRTUAL TABLE t1 USING fts5(a, b, columnsize=1, tokenize=tcl); + INSERT INTO t1 VALUES('0 2 3', '4 5 6 7'); + INSERT INTO t1 VALUES('8 9', '0 0 0 0 0 0 0 0 0 0'); + SELECT fts5_test_columnsize(t1) FROM t1 WHERE t1 MATCH '000 AND 00 AND 0'; +} {{3 4} {2 10}} + +do_execsql_test 7.0.2 { + INSERT INTO t1(t1) VALUES('integrity-check'); +} + +do_execsql_test 7.1.1 { + CREATE VIRTUAL TABLE t2 USING fts5(a, b, columnsize=0, tokenize=tcl); + INSERT INTO t2 VALUES('0 2 3', '4 5 6 7'); + INSERT INTO t2 VALUES('8 9', '0 0 0 0 0 0 0 0 0 0'); + SELECT fts5_test_columnsize(t2) FROM t2 WHERE t2 MATCH '000 AND 00 AND 0'; +} {{3 4} {2 10}} + +do_execsql_test 7.1.2 { + INSERT INTO t2(t2) VALUES('integrity-check'); +} + +finish_test + Index: main.mk ================================================================== --- main.mk +++ main.mk @@ -45,10 +45,11 @@ # This is how we compile # TCCX = $(TCC) $(OPTS) -I. -I$(TOP)/src -I$(TOP) TCCX += -I$(TOP)/ext/rtree -I$(TOP)/ext/icu -I$(TOP)/ext/fts3 TCCX += -I$(TOP)/ext/async -I$(TOP)/ext/userauth +TCCX += -I$(TOP)/ext/fts5 # Object files for the SQLite library. # LIBOBJ+= vdbe.o parse.o \ alter.o analyze.o attach.o auth.o \ @@ -227,10 +228,33 @@ SRC += \ $(TOP)/ext/rbu/sqlite3rbu.c \ $(TOP)/ext/rbu/sqlite3rbu.h + +# FTS5 things +# +FTS5_HDR = \ + $(TOP)/ext/fts5/fts5.h \ + $(TOP)/ext/fts5/fts5Int.h \ + fts5parse.h + +FTS5_SRC = \ + $(TOP)/ext/fts5/fts5_aux.c \ + $(TOP)/ext/fts5/fts5_buffer.c \ + $(TOP)/ext/fts5/fts5_main.c \ + $(TOP)/ext/fts5/fts5_config.c \ + $(TOP)/ext/fts5/fts5_expr.c \ + $(TOP)/ext/fts5/fts5_hash.c \ + $(TOP)/ext/fts5/fts5_index.c \ + fts5parse.c \ + $(TOP)/ext/fts5/fts5_storage.c \ + $(TOP)/ext/fts5/fts5_tokenize.c \ + $(TOP)/ext/fts5/fts5_unicode2.c \ + $(TOP)/ext/fts5/fts5_varint.c \ + $(TOP)/ext/fts5/fts5_vocab.c \ + # Generated source code files # SRC += \ keywordhash.h \ @@ -634,40 +658,20 @@ $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_write.c rtree.o: $(TOP)/ext/rtree/rtree.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/rtree/rtree.c -# FTS5 things -# -FTS5_SRC = \ - $(TOP)/ext/fts5/fts5.h \ - $(TOP)/ext/fts5/fts5Int.h \ - $(TOP)/ext/fts5/fts5_aux.c \ - $(TOP)/ext/fts5/fts5_buffer.c \ - $(TOP)/ext/fts5/fts5_main.c \ - $(TOP)/ext/fts5/fts5_config.c \ - $(TOP)/ext/fts5/fts5_expr.c \ - $(TOP)/ext/fts5/fts5_hash.c \ - $(TOP)/ext/fts5/fts5_index.c \ - fts5parse.c fts5parse.h \ - $(TOP)/ext/fts5/fts5_storage.c \ - $(TOP)/ext/fts5/fts5_tokenize.c \ - $(TOP)/ext/fts5/fts5_unicode2.c \ - $(TOP)/ext/fts5/fts5_varint.c \ - $(TOP)/ext/fts5/fts5_vocab.c \ - fts5parse.c: $(TOP)/ext/fts5/fts5parse.y lemon cp $(TOP)/ext/fts5/fts5parse.y . rm -f fts5parse.h ./lemon $(OPTS) fts5parse.y fts5parse.h: fts5parse.c -fts5.c: $(FTS5_SRC) +fts5.c: $(FTS5_SRC) $(FTS5_HDR) tclsh $(TOP)/ext/fts5/tool/mkfts5c.tcl cp $(TOP)/ext/fts5/fts5.h . - userauth.o: $(TOP)/ext/userauth/userauth.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/userauth/userauth.c sqlite3rbu.o: $(TOP)/ext/rbu/sqlite3rbu.c $(HDR) $(EXTHDR)