Index: ext/fts5/fts5.c ================================================================== --- ext/fts5/fts5.c +++ ext/fts5/fts5.c @@ -16,22 +16,30 @@ #include "fts5Int.h" typedef struct Fts5Table Fts5Table; typedef struct Fts5Cursor Fts5Cursor; +/* +** Virtual-table object. +*/ struct Fts5Table { sqlite3_vtab base; /* Base class used by SQLite core */ Fts5Config *pConfig; /* Virtual table configuration */ Fts5Index *pIndex; /* Full-text index */ Fts5Storage *pStorage; /* Document store */ }; +/* +** Virtual-table cursor object. +*/ struct Fts5Cursor { sqlite3_vtab_cursor base; /* Base class used by SQLite core */ int idxNum; /* idxNum passed to xFilter() */ sqlite3_stmt *pStmt; /* Statement used to read %_content */ int bEof; /* True at EOF */ + Fts5Expr *pExpr; /* Expression for MATCH queries */ + int bSeekRequired; }; /* ** Close a virtual table handle opened by fts5InitVtab(). If the bDestroy ** argument is non-zero, attempt delete the shadow tables from teh database @@ -163,19 +171,22 @@ #define FTS5_PLAN(idxNum) ((idxNum) & 0x7) #define FTS5_ORDER_DESC 8 /* ORDER BY rowid DESC */ #define FTS5_ORDER_ASC 16 /* ORDER BY rowid ASC */ - +/* +** Search the object passed as the first argument for a usable constraint +** on column iCol using operator eOp. If one is found, return its index in +** the pInfo->aConstraint[] array. If no such constraint is found, return +** a negative value. +*/ static int fts5FindConstraint(sqlite3_index_info *pInfo, int eOp, int iCol){ int i; - for(i=0; inConstraint; i++){ struct sqlite3_index_constraint *p = &pInfo->aConstraint[i]; if( p->usable && p->iColumn==iCol && p->op==eOp ) return i; } - return -1; } /* ** Implementation of the xBestIndex method for FTS5 tables. There @@ -251,10 +262,11 @@ Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; if( pCsr->pStmt ){ int eStmt = fts5StmtType(pCsr->idxNum); sqlite3Fts5StorageStmtRelease(pTab->pStorage, eStmt, pCsr->pStmt); } + sqlite3Fts5ExprFree(pCsr->pExpr); sqlite3_free(pCsr); return SQLITE_OK; } @@ -269,19 +281,22 @@ static int fts5NextMethod(sqlite3_vtab_cursor *pCursor){ Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; int ePlan = FTS5_PLAN(pCsr->idxNum); int rc = SQLITE_OK; - assert( ePlan!=FTS5_PLAN_MATCH ); if( ePlan!=FTS5_PLAN_MATCH ){ rc = sqlite3_step(pCsr->pStmt); if( rc!=SQLITE_ROW ){ pCsr->bEof = 1; rc = sqlite3_reset(pCsr->pStmt); }else{ rc = SQLITE_OK; } + }else{ + rc = sqlite3Fts5ExprNext(pCsr->pExpr); + pCsr->bEof = sqlite3Fts5ExprEof(pCsr->pExpr); + pCsr->bSeekRequired = 1; } return rc; } @@ -300,23 +315,34 @@ Fts5Table *pTab = (Fts5Table*)(pCursor->pVtab); Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; int rc = SQLITE_OK; int ePlan = FTS5_PLAN(idxNum); int eStmt = fts5StmtType(idxNum); + int bAsc = ((idxNum & FTS5_ORDER_ASC) ? 1 : 0); - assert( ePlan!=FTS5_PLAN_MATCH ); memset(&pCursor[1], 0, sizeof(Fts5Cursor) - sizeof(sqlite3_vtab_cursor)); pCsr->idxNum = idxNum; rc = sqlite3Fts5StorageStmt(pTab->pStorage, eStmt, &pCsr->pStmt); - if( ePlan==FTS5_PLAN_ROWID ){ - sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]); - } - if( rc==SQLITE_OK ){ - rc = fts5NextMethod(pCursor); + if( ePlan==FTS5_PLAN_MATCH ){ + char **pzErr = &pTab->base.zErrMsg; + const char *zExpr = (const char*)sqlite3_value_text(apVal[0]); + rc = sqlite3Fts5ExprNew(pTab->pConfig, zExpr, &pCsr->pExpr, pzErr); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts5ExprFirst(pCsr->pExpr, pTab->pIndex, bAsc); + pCsr->bEof = sqlite3Fts5ExprEof(pCsr->pExpr); + pCsr->bSeekRequired = 1; + } + }else{ + if( ePlan==FTS5_PLAN_ROWID ){ + sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]); + } + rc = fts5NextMethod(pCursor); + } } + return rc; } /* ** This is the xEof method of the virtual table. SQLite calls this @@ -336,14 +362,14 @@ static int fts5RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; int ePlan = FTS5_PLAN(pCsr->idxNum); assert( pCsr->bEof==0 ); - assert( ePlan!=FTS5_PLAN_MATCH ); - if( ePlan!=FTS5_PLAN_MATCH ){ *pRowid = sqlite3_column_int64(pCsr->pStmt, 0); + }else{ + *pRowid = sqlite3Fts5ExprRowid(pCsr->pExpr); } return SQLITE_OK; } @@ -356,17 +382,32 @@ sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ int iCol /* Index of column to read value from */ ){ Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; int ePlan = FTS5_PLAN(pCsr->idxNum); + int rc = SQLITE_OK; assert( pCsr->bEof==0 ); - assert( ePlan!=FTS5_PLAN_MATCH ); - if( ePlan!=FTS5_PLAN_MATCH ){ + if( pCsr->bSeekRequired ){ + assert( ePlan==FTS5_PLAN_MATCH && pCsr->pExpr ); + sqlite3_reset(pCsr->pStmt); + sqlite3_bind_int64(pCsr->pStmt, 1, sqlite3Fts5ExprRowid(pCsr->pExpr)); + rc = sqlite3_step(pCsr->pStmt); + if( rc==SQLITE_ROW ){ + rc = SQLITE_OK; + }else{ + rc = sqlite3_reset(pCsr->pStmt); + if( rc==SQLITE_OK ){ + rc = SQLITE_CORRUPT_VTAB; + } + } + } + + if( rc==SQLITE_OK ){ sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1)); } - return SQLITE_OK; + return rc; } /* ** This function is called to handle an FTS INSERT command. In other words, ** an INSERT statement of the form: Index: ext/fts5/fts5Int.h ================================================================== --- ext/fts5/fts5Int.h +++ ext/fts5/fts5Int.h @@ -72,18 +72,18 @@ ** to access the data stored in the %_data table. */ typedef struct Fts5Index Fts5Index; typedef struct Fts5IndexIter Fts5IndexIter; + /* ** Values used as part of the flags argument passed to IndexQuery(). */ #define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */ #define FTS5INDEX_QUERY_ASC 0x0002 /* Docs in ascending rowid order */ #define FTS5INDEX_QUERY_MATCH 0x0004 /* Use the iMatch arg to Next() */ -#define FTS5INDEX_QUERY_DELETE 0x0008 /* Visit delete markers */ /* ** Create/destroy an Fts5Index object. */ int sqlite3Fts5IndexOpen(Fts5Config *pConfig, int bCreate, Fts5Index**, char**); @@ -112,12 +112,11 @@ /* ** Docid list iteration. */ int sqlite3Fts5IterEof(Fts5IndexIter*); void sqlite3Fts5IterNext(Fts5IndexIter*, i64 iMatch); -int sqlite3Fts5IterSeek(Fts5IndexIter*, i64 iDocid); -i64 sqlite3Fts5IterDocid(Fts5IndexIter*); +i64 sqlite3Fts5IterRowid(Fts5IndexIter*); /* ** Position list iteration. ** ** for( @@ -126,12 +125,12 @@ ** iPos=sqlite3Fts5IterNextPos(pIter) ** ){ ** // token appears at position iPos of column iCol of the current document ** } */ -int sqlite3Fts5IterFirstPos(Fts5IndexIter*, int iCol); -int sqlite3Fts5IterNextPos(Fts5IndexIter*); +// int sqlite3Fts5IterFirstPos(Fts5IndexIter*, int iCol); +// int sqlite3Fts5IterNextPos(Fts5IndexIter*); /* ** Close an iterator opened by sqlite3Fts5IndexQuery(). */ void sqlite3Fts5IterClose(Fts5IndexIter*); @@ -211,10 +210,15 @@ /************************************************************************** ** Interface to code in fts5_storage.c. fts5_storage.c contains contains ** code to access the data stored in the %_content and %_docsize tables. */ + +#define FTS5_STMT_SCAN_ASC 0 /* SELECT rowid, * FROM ... ORDER BY 1 ASC */ +#define FTS5_STMT_SCAN_DESC 1 /* SELECT rowid, * FROM ... ORDER BY 1 DESC */ +#define FTS5_STMT_LOOKUP 2 /* SELECT rowid, * FROM ... WHERE rowid=? */ + typedef struct Fts5Storage Fts5Storage; int sqlite3Fts5StorageOpen(Fts5Config*, Fts5Index*, int, Fts5Storage**, char**); int sqlite3Fts5StorageClose(Fts5Storage *p, int bDestroy); @@ -224,17 +228,12 @@ int sqlite3Fts5StorageDelete(Fts5Storage *p, i64); int sqlite3Fts5StorageInsert(Fts5Storage *p, sqlite3_value **apVal, int, i64*); int sqlite3Fts5StorageIntegrity(Fts5Storage *p); -#define FTS5_STMT_SCAN_ASC 0 /* SELECT rowid, * FROM ... ORDER BY 1 ASC */ -#define FTS5_STMT_SCAN_DESC 1 /* SELECT rowid, * FROM ... ORDER BY 1 DESC */ -#define FTS5_STMT_LOOKUP 2 /* SELECT rowid, * FROM ... WHERE rowid=? */ - int sqlite3Fts5StorageStmt(Fts5Storage *p, int eStmt, sqlite3_stmt **); void sqlite3Fts5StorageStmtRelease(Fts5Storage *p, int eStmt, sqlite3_stmt*); - /* ** End of interface to code in fts5_storage.c. **************************************************************************/ @@ -242,10 +241,11 @@ /************************************************************************** ** Interface to code in fts5_expr.c. */ typedef struct Fts5Expr Fts5Expr; +typedef struct Fts5ExprNode Fts5ExprNode; typedef struct Fts5Parse Fts5Parse; typedef struct Fts5Token Fts5Token; typedef struct Fts5ExprPhrase Fts5ExprPhrase; typedef struct Fts5ExprNearset Fts5ExprNearset; @@ -252,27 +252,33 @@ struct Fts5Token { const char *p; /* Token text (not NULL terminated) */ int n; /* Size of buffer p in bytes */ }; +/* Parse a MATCH expression. */ int sqlite3Fts5ExprNew( Fts5Config *pConfig, - Fts5Index *pIdx, const char *zExpr, Fts5Expr **ppNew, char **pzErr ); -int sqlite3Fts5ExprFirst(Fts5Expr *p); -int sqlite3Fts5ExprNext(Fts5Expr *p); -int sqlite3Fts5ExprEof(Fts5Expr *p); -i64 sqlite3Fts5ExprRowid(Fts5Expr *p); - -void sqlite3Fts5ExprFree(Fts5Expr *p); - -// int sqlite3Fts5IterFirstPos(Fts5Expr*, int iCol, int *piPos); -// int sqlite3Fts5IterNextPos(Fts5Expr*, int *piPos); +/* +** for(rc = sqlite3Fts5ExprFirst(pExpr, pIdx, bAsc); +** rc==SQLITE_OK && 0==sqlite3Fts5ExprEof(pExpr); +** rc = sqlite3Fts5ExprNext(pExpr) +** ){ +** // The document with rowid iRowid matches the expression! +** i64 iRowid = sqlite3Fts5ExprRowid(pExpr); +** } +*/ +int sqlite3Fts5ExprFirst(Fts5Expr*, Fts5Index *pIdx, int bAsc); +int sqlite3Fts5ExprNext(Fts5Expr*); +int sqlite3Fts5ExprEof(Fts5Expr*); +i64 sqlite3Fts5ExprRowid(Fts5Expr*); + +void sqlite3Fts5ExprFree(Fts5Expr*); /* Called during startup to register a UDF with SQLite */ int sqlite3Fts5ExprInit(sqlite3*); /******************************************* @@ -280,15 +286,15 @@ ** C code in this module. The interfaces below this point are called by ** the parser code in fts5parse.y. */ void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...); -Fts5Expr *sqlite3Fts5ParseExpr( - Fts5Parse *pParse, - int eType, - Fts5Expr *pLeft, - Fts5Expr *pRight, +Fts5ExprNode *sqlite3Fts5ParseNode( + Fts5Parse *pParse, + int eType, + Fts5ExprNode *pLeft, + Fts5ExprNode *pRight, Fts5ExprNearset *pNear ); Fts5ExprPhrase *sqlite3Fts5ParseTerm( Fts5Parse *pParse, @@ -303,17 +309,18 @@ Fts5ExprPhrase* ); void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase*); void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset*); +void sqlite3Fts5ParseNodeFree(Fts5ExprNode*); void sqlite3Fts5ParseSetDistance(Fts5Parse*, Fts5ExprNearset*, Fts5Token*); void sqlite3Fts5ParseSetColumn(Fts5Parse*, Fts5ExprNearset*, Fts5Token*); -void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5Expr *p); +void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p); void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token*); /* ** End of interface to code in fts5_expr.c. **************************************************************************/ #endif Index: ext/fts5/fts5_expr.c ================================================================== --- ext/fts5/fts5_expr.c +++ ext/fts5/fts5_expr.c @@ -27,33 +27,42 @@ */ void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(size_t)); void sqlite3Fts5ParserFree(void*, void (*freeProc)(void*)); void sqlite3Fts5Parser(void*, int, Fts5Token, Fts5Parse*); +struct Fts5Expr { + Fts5Index *pIndex; + Fts5ExprNode *pRoot; + int bAsc; +}; + /* ** eType: ** Expression node type. Always one of: ** ** FTS5_AND (pLeft, pRight valid) ** FTS5_OR (pLeft, pRight valid) ** FTS5_NOT (pLeft, pRight valid) ** FTS5_STRING (pNear valid) */ -struct Fts5Expr { +struct Fts5ExprNode { int eType; /* Node type */ - Fts5Expr *pLeft; /* Left hand child node */ - Fts5Expr *pRight; /* Right hand child node */ + Fts5ExprNode *pLeft; /* Left hand child node */ + Fts5ExprNode *pRight; /* Right hand child node */ Fts5ExprNearset *pNear; /* For FTS5_STRING - cluster of phrases */ + int bEof; /* True at EOF */ + i64 iRowid; }; /* ** An instance of the following structure represents a single search term ** or term prefix. */ struct Fts5ExprTerm { int bPrefix; /* True for a prefix term */ char *zTerm; /* nul-terminated term */ + Fts5IndexIter *pIter; /* Iterator for this term */ }; /* ** A phrase. One or more terms that must appear in a contiguous sequence ** within a document for it to match. @@ -80,11 +89,11 @@ */ struct Fts5Parse { Fts5Config *pConfig; char *zErr; int rc; - Fts5Expr *pExpr; /* Result of a successful parse */ + Fts5ExprNode *pExpr; /* Result of a successful parse */ }; void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...){ if( pParse->rc==SQLITE_OK ){ va_list ap; @@ -166,52 +175,169 @@ static void *fts5ParseAlloc(size_t t){ return sqlite3_malloc((int)t); } static void fts5ParseFree(void *p){ sqlite3_free(p); } int sqlite3Fts5ExprNew( - Fts5Config *pConfig, - Fts5Index *pIdx, + Fts5Config *pConfig, /* FTS5 Configuration */ const char *zExpr, /* Expression text */ Fts5Expr **ppNew, char **pzErr ){ Fts5Parse sParse; Fts5Token token; const char *z = zExpr; int t; /* Next token type */ void *pEngine; + Fts5Expr *pNew; *ppNew = 0; *pzErr = 0; memset(&sParse, 0, sizeof(sParse)); pEngine = sqlite3Fts5ParserAlloc(fts5ParseAlloc); - if( pEngine==0 ) return SQLITE_NOMEM; + if( pEngine==0 ){ return SQLITE_NOMEM; } sParse.pConfig = pConfig; do { t = fts5ExprGetToken(&sParse, &z, &token); sqlite3Fts5Parser(pEngine, t, token, &sParse); }while( sParse.rc==SQLITE_OK && t!=FTS5_EOF ); sqlite3Fts5ParserFree(pEngine, fts5ParseFree); assert( sParse.pExpr==0 || (sParse.rc==SQLITE_OK && sParse.zErr==0) ); - *ppNew = sParse.pExpr; + if( sParse.rc==SQLITE_OK ){ + *ppNew = pNew = sqlite3_malloc(sizeof(Fts5Expr)); + if( pNew==0 ){ + sParse.rc = SQLITE_NOMEM; + }else{ + pNew->pRoot = sParse.pExpr; + pNew->pIndex = 0; + } + } + *pzErr = sParse.zErr; return sParse.rc; } /* -** Free the object passed as the only argument. +** Free the expression node object passed as the only argument. */ -void sqlite3Fts5ExprFree(Fts5Expr *p){ +void sqlite3Fts5ParseNodeFree(Fts5ExprNode *p){ if( p ){ - sqlite3Fts5ExprFree(p->pLeft); - sqlite3Fts5ExprFree(p->pRight); + sqlite3Fts5ParseNodeFree(p->pLeft); + sqlite3Fts5ParseNodeFree(p->pRight); sqlite3Fts5ParseNearsetFree(p->pNear); sqlite3_free(p); } } + +/* +** Free the expression object passed as the only argument. +*/ +void sqlite3Fts5ExprFree(Fts5Expr *p){ + if( p ){ + sqlite3Fts5ParseNodeFree(p->pRoot); + sqlite3_free(p); + } +} + +/* +** +*/ +static int fts5ExprNodeTest(Fts5Expr *pExpr, Fts5ExprNode *pNode){ + assert( 0 ); + return SQLITE_OK; +} + +static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){ + int rc = SQLITE_OK; + + pNode->bEof = 0; + if( pNode->eType==FTS5_STRING ){ + Fts5ExprPhrase *pPhrase = pNode->pNear->apPhrase[0]; + Fts5ExprTerm *pTerm = &pPhrase->aTerm[0]; + assert( pNode->pNear->nPhrase==1 && pPhrase->nTerm==1 ); + + pTerm->pIter = sqlite3Fts5IndexQuery( + pExpr->pIndex, pTerm->zTerm, strlen(pTerm->zTerm), + (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) | + (pExpr->bAsc ? FTS5INDEX_QUERY_ASC : 0) + ); + if( sqlite3Fts5IterEof(pTerm->pIter) ){ + pNode->bEof = 1; + }else{ + pNode->iRowid = sqlite3Fts5IterRowid(pTerm->pIter); + } + + }else{ + rc = fts5ExprNodeFirst(pExpr, pNode->pLeft); + if( rc==SQLITE_OK ){ + rc = fts5ExprNodeFirst(pExpr, pNode->pRight); + } + if( rc==SQLITE_OK ){ + rc = fts5ExprNodeTest(pExpr, pNode); + } + } + return rc; +} + +static int fts5ExprNodeNext(Fts5Expr *pExpr, Fts5ExprNode *pNode){ + int rc = SQLITE_OK; + + if( pNode->eType==FTS5_STRING ){ + Fts5ExprPhrase *pPhrase = pNode->pNear->apPhrase[0]; + Fts5ExprTerm *pTerm = &pPhrase->aTerm[0]; + assert( pNode->pNear->nPhrase==1 && pPhrase->nTerm==1 ); + sqlite3Fts5IterNext(pTerm->pIter, 0); + if( sqlite3Fts5IterEof(pTerm->pIter) ){ + pNode->bEof = 1; + }else{ + pNode->iRowid = sqlite3Fts5IterRowid(pTerm->pIter); + } + }else{ + assert( 0 ); + } + return rc; +} + + + +/* +** Begin iterating through the set of documents in index pIdx matched by +** the MATCH expression passed as the first argument. If the "bAsc" parameter +** is passed a non-zero value, iteration is in ascending rowid order. Or, +** if it is zero, in descending order. +** +** Return SQLITE_OK if successful, or an SQLite error code otherwise. It +** is not considered an error if the query does not match any documents. +*/ +int sqlite3Fts5ExprFirst(Fts5Expr *p, Fts5Index *pIdx, int bAsc){ + int rc; + p->pIndex = pIdx; + p->bAsc = bAsc; + rc = fts5ExprNodeFirst(p, p->pRoot); + return rc; +} + +/* +** Move to the next document +** +** Return SQLITE_OK if successful, or an SQLite error code otherwise. It +** is not considered an error if the query does not match any documents. +*/ +int sqlite3Fts5ExprNext(Fts5Expr *p){ + int rc; + rc = fts5ExprNodeNext(p, p->pRoot); + return rc; +} + +int sqlite3Fts5ExprEof(Fts5Expr *p){ + return p->pRoot->bEof; +} + +i64 sqlite3Fts5ExprRowid(Fts5Expr *p){ + return p->pRoot->iRowid; +} /* ** Argument pIn points to a buffer of nIn bytes. This function allocates ** and returns a new buffer populated with a copy of (pIn/nIn) with a ** nul-terminator byte appended to it. @@ -227,11 +353,11 @@ } return zRet; } static int fts5ParseStringFromToken(Fts5Token *pToken, char **pz){ - *pz = sqlite3_mprintf("%.*s", pToken->n, pToken->p); + *pz = fts5Strdup(pToken->p, pToken->n); if( *pz==0 ) return SQLITE_NOMEM; return SQLITE_OK; } /* @@ -239,11 +365,15 @@ */ static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){ if( pPhrase ){ int i; for(i=0; inTerm; i++){ - sqlite3_free(pPhrase->aTerm[i].zTerm); + Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; + sqlite3_free(pTerm->zTerm); + if( pTerm->pIter ){ + sqlite3Fts5IterClose(pTerm->pIter); + } } sqlite3_free(pPhrase); } } @@ -355,11 +485,11 @@ } sqlite3_free(pNear); } } -void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5Expr *p){ +void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p){ assert( pParse->pExpr==0 ); pParse->pExpr = p; } /* @@ -399,11 +529,11 @@ void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token *pTok){ if( pParse->rc==SQLITE_OK ){ if( pTok->n!=4 || memcmp("NEAR", pTok->p, 4) ){ sqlite3Fts5ParseError( - pParse, "syntax error near \"%.*s\"", pTok->n, pTok->p + pParse, "fts5: syntax error near \"%.*s\"", pTok->n, pTok->p ); } } } @@ -458,24 +588,24 @@ /* ** Allocate and return a new expression object. If anything goes wrong (i.e. ** OOM error), leave an error code in pParse and return NULL. */ -Fts5Expr *sqlite3Fts5ParseExpr( +Fts5ExprNode *sqlite3Fts5ParseNode( Fts5Parse *pParse, /* Parse context */ int eType, /* FTS5_STRING, AND, OR or NOT */ - Fts5Expr *pLeft, /* Left hand child expression */ - Fts5Expr *pRight, /* Right hand child expression */ + Fts5ExprNode *pLeft, /* Left hand child expression */ + Fts5ExprNode *pRight, /* Right hand child expression */ Fts5ExprNearset *pNear /* For STRING expressions, the near cluster */ ){ - Fts5Expr *pRet = 0; + Fts5ExprNode *pRet = 0; if( pParse->rc==SQLITE_OK ){ assert( (eType!=FTS5_STRING && pLeft && pRight && !pNear) || (eType==FTS5_STRING && !pLeft && !pRight && pNear) ); - pRet = (Fts5Expr*)sqlite3_malloc(sizeof(Fts5Expr)); + pRet = (Fts5ExprNode*)sqlite3_malloc(sizeof(Fts5ExprNode)); if( pRet==0 ){ pParse->rc = SQLITE_NOMEM; }else{ memset(pRet, 0, sizeof(*pRet)); pRet->eType = eType; @@ -485,12 +615,12 @@ } } if( pRet==0 ){ assert( pParse->rc!=SQLITE_OK ); - sqlite3Fts5ExprFree(pLeft); - sqlite3Fts5ExprFree(pRight); + sqlite3Fts5ParseNodeFree(pLeft); + sqlite3Fts5ParseNodeFree(pRight); sqlite3Fts5ParseNearsetFree(pNear); } return pRet; } @@ -527,11 +657,11 @@ } sqlite3_free(zApp); return zNew; } -static char *fts5ExprPrint(Fts5Config *pConfig, Fts5Expr *pExpr){ +static char *fts5ExprPrint(Fts5Config *pConfig, Fts5ExprNode *pExpr){ char *zRet = 0; if( pExpr->eType==FTS5_STRING ){ Fts5ExprNearset *pNear = pExpr->pNear; int i; int iTerm; @@ -632,14 +762,14 @@ } zExpr = (const char*)sqlite3_value_text(apVal[0]); rc = sqlite3Fts5ConfigParse(db, nConfig, azConfig, &pConfig, &zErr); if( rc==SQLITE_OK ){ - rc = sqlite3Fts5ExprNew(pConfig, 0, zExpr, &pExpr, &zErr); + rc = sqlite3Fts5ExprNew(pConfig, zExpr, &pExpr, &zErr); } if( rc==SQLITE_OK ){ - char *zText = fts5ExprPrint(pConfig, pExpr); + char *zText = fts5ExprPrint(pConfig, pExpr->pRoot); if( rc==SQLITE_OK ){ sqlite3_result_text(pCtx, zText, -1, SQLITE_TRANSIENT); sqlite3_free(zText); } } Index: ext/fts5/fts5_index.c ================================================================== --- ext/fts5/fts5_index.c +++ ext/fts5/fts5_index.c @@ -294,10 +294,17 @@ /* State used by the fts5DataXXX() functions. */ sqlite3_blob *pReader; /* RO incr-blob open on %_data table */ sqlite3_stmt *pWriter; /* "INSERT ... %_data VALUES(?,?)" */ sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */ }; + +struct Fts5IndexIter { + Fts5Index *pIndex; + Fts5Structure *pStruct; + Fts5MultiSegIter *pMulti; +}; + /* ** Buffer object for the incremental building of string data. */ struct Fts5Buffer { @@ -423,14 +430,19 @@ ** Buffer containing current leaf page data. Set to NULL at EOF. ** ** iTermLeafPgno, iTermLeafOffset: ** Leaf page number containing the last term read from the segment. And ** the offset immediately following the term data. +** +** bOneTerm: +** If true, set the iterator to point to EOF after the current doclist has +** been exhausted. Do not proceed to the next term in the segment. */ struct Fts5SegIter { Fts5StructureSegment *pSeg; /* Segment to iterate through */ int iIdx; /* Byte offset within current leaf */ + int bOneTerm; /* If true, iterate through single doclist */ int iLeafPgno; /* Current leaf page number */ Fts5Data *pLeaf; /* Current leaf data */ int iLeafOffset; /* Byte offset within current leaf */ int iTermLeafPgno; @@ -654,10 +666,27 @@ const u8 *pData ){ pBuf->n = 0; fts5BufferAppendBlob(pRc, pBuf, nData, pData); } + +/* +** Compare the contents of the pLeft buffer with the pRight/nRight blob. +** +** Return -ve if pLeft is smaller than pRight, 0 if they are equal or +** +ve if pRight is smaller than pLeft. In other words: +** +** res = *pLeft - *pRight +*/ +static int fts5BufferCompareBlob( + Fts5Buffer *pLeft, /* Left hand side of comparison */ + const u8 *pRight, int nRight /* Right hand side of comparison */ +){ + int nCmp = MIN(pLeft->n, nRight); + int res = memcmp(pLeft->p, pRight, nCmp); + return (res==0 ? (pLeft->n - nRight) : res); +} /* ** Compare the contents of the two buffers using memcmp(). If one buffer ** is a prefix of the other, it is considered the lesser. ** @@ -737,11 +766,10 @@ ** Fts5Index object. */ static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){ Fts5Data *pRet = fts5DataReadOrBuffer(p, 0, iRowid); assert( (pRet==0)==(p->rc!=SQLITE_OK) ); -assert( pRet ); return pRet; } /* ** Read a record from the %_data table into the buffer supplied as the @@ -1003,10 +1031,64 @@ fts5DataWrite(p, FTS5_STRUCTURE_ROWID(iIdx), buf.p, buf.n); fts5BufferFree(&buf); } + +/* +** If the pIter->iOff offset currently points to an entry indicating one +** or more term-less nodes, advance past it and set pIter->nEmpty to +** the number of empty child nodes. +*/ +static void fts5NodeIterGobbleNEmpty(Fts5NodeIter *pIter){ + if( pIter->iOffnData && 0==(pIter->aData[pIter->iOff] & 0xfe) ){ + pIter->iOff++; + pIter->iOff += getVarint32(&pIter->aData[pIter->iOff], pIter->nEmpty); + }else{ + pIter->nEmpty = 0; + } +} + +/* +** Advance to the next entry within the node. +*/ +static void fts5NodeIterNext(int *pRc, Fts5NodeIter *pIter){ + if( pIter->iOff>=pIter->nData ){ + pIter->aData = 0; + pIter->iChild += pIter->nEmpty; + }else{ + int nPre, nNew; + pIter->iOff += getVarint32(&pIter->aData[pIter->iOff], nPre); + pIter->iOff += getVarint32(&pIter->aData[pIter->iOff], nNew); + pIter->term.n = nPre-2; + fts5BufferAppendBlob(pRc, &pIter->term, nNew, pIter->aData+pIter->iOff); + pIter->iOff += nNew; + pIter->iChild += (1 + pIter->nEmpty); + fts5NodeIterGobbleNEmpty(pIter); + if( *pRc ) pIter->aData = 0; + } +} + + +/* +** Initialize the iterator object pIter to iterate through the internal +** segment node in pData. +*/ +static void fts5NodeIterInit(const u8 *aData, int nData, Fts5NodeIter *pIter){ + memset(pIter, 0, sizeof(*pIter)); + pIter->aData = aData; + pIter->nData = nData; + pIter->iOff = getVarint32(aData, pIter->iChild); + fts5NodeIterGobbleNEmpty(pIter); +} + +/* +** Free any memory allocated by the iterator object. +*/ +static void fts5NodeIterFree(Fts5NodeIter *pIter){ + fts5BufferFree(&pIter->term); +} /* ** Load the next leaf page into the segment iterator. */ static void fts5SegIterNextPage( @@ -1076,10 +1158,81 @@ u8 *a = pIter->pLeaf->p; pIter->iLeafOffset = fts5GetU16(&a[2]); fts5SegIterLoadTerm(p, pIter, 0); } } + +/* +** Initialize the object pIter to point to term pTerm/nTerm within segment +** pSeg, index iIdx. If there is no such term in the index, the iterator +** is set to EOF. +** +** If an error occurs, Fts5Index.rc is set to an appropriate error code. If +** an error has already occurred when this function is called, it is a no-op. +*/ +static void fts5SegIterSeekInit( + Fts5Index *p, /* FTS5 backend */ + int iIdx, /* Config.aHash[] index of FTS index */ + const u8 *pTerm, int nTerm, /* Term to seek to */ + Fts5StructureSegment *pSeg, /* Description of segment */ + Fts5SegIter *pIter /* Object to populate */ +){ + int iPg = 1; + int h; + + assert( pTerm && nTerm ); + memset(pIter, 0, sizeof(*pIter)); + pIter->pSeg = pSeg; + pIter->iIdx = iIdx; + pIter->bOneTerm = 1; + + for(h=pSeg->nHeight-1; h>0; h--){ + Fts5NodeIter node; /* For iterating through internal nodes */ + i64 iRowid = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, h, iPg); + Fts5Data *pNode = fts5DataRead(p, iRowid); + if( pNode==0 ) break; + + fts5NodeIterInit(pNode->p, pNode->n, &node); + assert( node.term.n==0 ); + + iPg = node.iChild; + for(fts5NodeIterNext(&p->rc, &node); + node.aData && fts5BufferCompareBlob(&node.term, pTerm, nTerm)>=0; + fts5NodeIterNext(&p->rc, &node) + ){ + iPg = node.iChild; + } + } + + if( iPg>=pSeg->pgnoFirst ){ + int res; + pIter->iLeafPgno = iPg - 1; + fts5SegIterNextPage(p, pIter); + if( pIter->pLeaf ){ + u8 *a = pIter->pLeaf->p; + int n = pIter->pLeaf->n; + + pIter->iLeafOffset = fts5GetU16(&a[2]); + fts5SegIterLoadTerm(p, pIter, 0); + + while( (res = fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)) ){ + if( res<0 ){ + /* Search for the end of the position list within the current page. */ + int iOff; + for(iOff=pIter->iLeafOffset; iOffiLeafOffset = iOff+1; + if( iOffpLeaf); + pIter->pLeaf = 0; + break; + } + } + } +} /* ** Advance iterator pIter to the next entry. ** ** If an error occurs, Fts5Index.rc is set to an appropriate error code. It @@ -1135,13 +1288,17 @@ } } } /* Check if the iterator is now at EOF. If so, return early. */ - if( pIter->pLeaf==0 ) return; - if( bNewTerm ){ - fts5SegIterLoadTerm(p, pIter, nKeep); + if( pIter->pLeaf && bNewTerm ){ + if( pIter->bOneTerm ){ + fts5DataRelease(pIter->pLeaf); + pIter->pLeaf = 0; + }else{ + fts5SegIterLoadTerm(p, pIter, nKeep); + } } } } /* @@ -1261,10 +1418,11 @@ */ static void fts5MultiIterNew( Fts5Index *p, /* FTS5 backend to iterate within */ Fts5Structure *pStruct, /* Structure of specific index */ int iIdx, /* Config.aHash[] index of FTS index */ + const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */ int iLevel, /* Level to iterate (-1 for all) */ int nSegment, /* Number of segments to merge (iLevel>=0) */ Fts5MultiSegIter **ppOut /* New object */ ){ int nSeg; /* Number of segments merged */ @@ -1271,10 +1429,12 @@ int nSlot; /* Power of two >= nSeg */ int iIter = 0; /* */ int iSeg; /* Used to iterate through segments */ Fts5StructureLevel *pLvl; Fts5MultiSegIter *pNew; + + assert( (pTerm==0 && nTerm==0) || iLevel<0 ); /* Allocate space for the new multi-seg-iterator. */ if( iLevel<0 ){ nSeg = fts5StructureCountSegments(pStruct); }else{ @@ -1294,11 +1454,16 @@ /* Initialize each of the component segment iterators. */ if( iLevel<0 ){ Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel]; for(pLvl=&pStruct->aLevel[0]; pLvlnSeg-1; iSeg>=0; iSeg--){ - fts5SegIterInit(p, iIdx, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]); + Fts5SegIter *pIter = &pNew->aSeg[iIter++]; + if( pTerm==0 ){ + fts5SegIterInit(p, iIdx, &pLvl->aSeg[iSeg], pIter); + }else{ + fts5SegIterSeekInit(p, iIdx, pTerm, nTerm, &pLvl->aSeg[iSeg], pIter); + } } } }else{ pLvl = &pStruct->aLevel[iLevel]; for(iSeg=nSeg-1; iSeg>=0; iSeg--){ @@ -1699,64 +1864,10 @@ if( pOld[i]!=pNew[i] ) break; } return i; } -/* -** If the pIter->iOff offset currently points to an entry indicating one -** or more term-less nodes, advance past it and set pIter->nEmpty to -** the number of empty child nodes. -*/ -static void fts5NodeIterGobbleNEmpty(Fts5NodeIter *pIter){ - if( pIter->iOffnData && 0==(pIter->aData[pIter->iOff] & 0xfe) ){ - pIter->iOff++; - pIter->iOff += getVarint32(&pIter->aData[pIter->iOff], pIter->nEmpty); - }else{ - pIter->nEmpty = 0; - } -} - -/* -** Advance to the next entry within the node. -*/ -static void fts5NodeIterNext(int *pRc, Fts5NodeIter *pIter){ - if( pIter->iOff>=pIter->nData ){ - pIter->aData = 0; - pIter->iChild += pIter->nEmpty; - }else{ - int nPre, nNew; - pIter->iOff += getVarint32(&pIter->aData[pIter->iOff], nPre); - pIter->iOff += getVarint32(&pIter->aData[pIter->iOff], nNew); - pIter->term.n = nPre-2; - fts5BufferAppendBlob(pRc, &pIter->term, nNew, pIter->aData+pIter->iOff); - pIter->iOff += nNew; - pIter->iChild += (1 + pIter->nEmpty); - fts5NodeIterGobbleNEmpty(pIter); - if( *pRc ) pIter->aData = 0; - } -} - - -/* -** Initialize the iterator object pIter to iterate through the internal -** segment node in pData. -*/ -static void fts5NodeIterInit(int nData, const u8 *aData, Fts5NodeIter *pIter){ - memset(pIter, 0, sizeof(*pIter)); - pIter->aData = aData; - pIter->nData = nData; - pIter->iOff = getVarint32(aData, pIter->iChild); - fts5NodeIterGobbleNEmpty(pIter); -} - -/* -** Free any memory allocated by the iterator object. -*/ -static void fts5NodeIterFree(Fts5NodeIter *pIter){ - fts5BufferFree(&pIter->term); -} - /* ** This is called once for each leaf page except the first that contains ** at least one term. Argument (nTerm/pTerm) is the split-key - a term that ** is larger than all terms written to earlier leaves, and equal to or @@ -2060,11 +2171,11 @@ Fts5PageWriter *pPg = &pWriter->aWriter[i]; pPg->pgno = pgno; fts5DataBuffer(p, &pPg->buf, iRowid); if( p->rc==SQLITE_OK ){ Fts5NodeIter ss; - fts5NodeIterInit(pPg->buf.n, pPg->buf.p, &ss); + fts5NodeIterInit(pPg->buf.p, pPg->buf.n, &ss); while( ss.aData ) fts5NodeIterNext(&p->rc, &ss); fts5BufferSet(&p->rc, &pPg->term, ss.term.n, ss.term.p); pgno = ss.iChild; fts5NodeIterFree(&ss); } @@ -2165,11 +2276,11 @@ #if 0 fprintf(stdout, "merging %d segments from level %d!", nInput, iLvl); fflush(stdout); #endif - for(fts5MultiIterNew(p, pStruct, iIdx, iLvl, nInput, &pIter); + for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, iLvl, nInput, &pIter); fts5MultiIterEof(p, pIter)==0; fts5MultiIterNext(p, pIter) ){ Fts5PosIter sPos; /* Used to iterate through position list */ int iCol = 0; /* Current output column */ @@ -2522,11 +2633,11 @@ for(i=0; p->rc==SQLITE_OK && inLvl; i++){ i64 iRowid = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, i+1, 1); Fts5Data *pData; pIter->aLvl[i].pData = pData = fts5DataRead(p, iRowid); if( pData ){ - fts5NodeIterInit(pData->n, pData->p, &pIter->aLvl[i].s); + fts5NodeIterInit(pData->p, pData->n, &pIter->aLvl[i].s); } } if( pIter->nLvl==0 || p->rc ){ pIter->bEof = 1; @@ -2561,11 +2672,11 @@ for(i--; i>=0; i--){ Fts5BtreeIterLevel *pLvl = &pIter->aLvl[i]; i64 iRowid = FTS5_SEGMENT_ROWID(pIter->iIdx,iSegid,i+1,pLvl[1].s.iChild); pLvl->pData = fts5DataRead(p, iRowid); if( pLvl->pData ){ - fts5NodeIterInit(pLvl->pData->n, pLvl->pData->p, &pLvl->s); + fts5NodeIterInit(pLvl->pData->p, pLvl->pData->n, &pLvl->s); } } } pIter->nEmpty = pIter->aLvl[0].s.nEmpty; @@ -2665,11 +2776,11 @@ /* Check that the checksum of the index matches the argument checksum */ for(iIdx=0; iIdx<=pConfig->nPrefix; iIdx++){ Fts5MultiSegIter *pIter; Fts5Structure *pStruct = fts5StructureRead(p, iIdx); - for(fts5MultiIterNew(p, pStruct, iIdx, -1, 0, &pIter); + for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, -1, 0, &pIter); fts5MultiIterEof(p, pIter)==0; fts5MultiIterNext(p, pIter) ){ Fts5PosIter sPos; /* Used to iterate through position list */ int n; /* Size of term in bytes */ @@ -2891,11 +3002,11 @@ } } fts5BufferFree(&term); }else{ Fts5NodeIter ss; - for(fts5NodeIterInit(n, a, &ss); ss.aData; fts5NodeIterNext(&rc, &ss)){ + for(fts5NodeIterInit(a, n, &ss); ss.aData; fts5NodeIterNext(&rc, &ss)){ if( ss.term.n==0 ){ fts5BufferAppendPrintf(&rc, &s, " left=%d", ss.iChild); }else{ fts5BufferAppendPrintf(&rc,&s, " \"%.*s\"", ss.term.n, ss.term.p); } @@ -2934,6 +3045,80 @@ ** Set the target page size for the index object. */ void sqlite3Fts5IndexPgsz(Fts5Index *p, int pgsz){ p->pgsz = pgsz; } + +/* +** Open a new iterator to iterate though all docids that match the +** specified token or token prefix. +*/ +Fts5IndexIter *sqlite3Fts5IndexQuery( + Fts5Index *p, /* FTS index to query */ + const char *pToken, int nToken, /* Token (or prefix) to query for */ + int flags /* Mask of FTS5INDEX_QUERY_X flags */ +){ + Fts5IndexIter *pRet; + int iIdx = 0; + + if( flags & FTS5INDEX_QUERY_PREFIX ){ + Fts5Config *pConfig = p->pConfig; + for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){ + if( pConfig->aPrefix[iIdx-1]==nToken ) break; + } + if( iIdx>pConfig->nPrefix ){ + /* No matching prefix index. todo: deal with this. */ + assert( 0 ); + } + } + + pRet = (Fts5IndexIter*)sqlite3_malloc(sizeof(Fts5IndexIter)); + if( pRet ){ + pRet->pStruct = fts5StructureRead(p, 0); + if( pRet->pStruct ){ + fts5MultiIterNew(p, + pRet->pStruct, iIdx, (const u8*)pToken, nToken, -1, 0, &pRet->pMulti + ); + } + pRet->pIndex = p; + } + + if( p->rc ){ + sqlite3Fts5IterClose(pRet); + pRet = 0; + } + return pRet; +} + +/* +** Return true if the iterator passed as the only argument is at EOF. +*/ +int sqlite3Fts5IterEof(Fts5IndexIter *pIter){ + return fts5MultiIterEof(pIter->pIndex, pIter->pMulti); +} + +/* +** Move to the next matching rowid. +*/ +void sqlite3Fts5IterNext(Fts5IndexIter *pIter, i64 iMatch){ + fts5MultiIterNext(pIter->pIndex, pIter->pMulti); +} + +/* +** Return the current rowid. +*/ +i64 sqlite3Fts5IterRowid(Fts5IndexIter *pIter){ + return fts5MultiIterRowid(pIter->pMulti); +} + +/* +** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery(). +*/ +void sqlite3Fts5IterClose(Fts5IndexIter *pIter){ + if( pIter ){ + fts5MultiIterFree(pIter->pIndex, pIter->pMulti); + fts5StructureRelease(pIter->pStruct); + fts5CloseReader(pIter->pIndex); + sqlite3_free(pIter); + } +} Index: test/fts5ab.test ================================================================== --- test/fts5ab.test +++ test/fts5ab.test @@ -51,7 +51,27 @@ } {} do_execsql_test 1.6 { SELECT * FROM t1 WHERE rowid=1.99; } {} + +#------------------------------------------------------------------------- + +reset_db +do_execsql_test 2.1 { + CREATE VIRTUAL TABLE t1 USING fts5(x); + INSERT INTO t1 VALUES('one'); + INSERT INTO t1 VALUES('two'); + INSERT INTO t1 VALUES('three'); +} + +do_catchsql_test 2.2 { + SELECT rowid, * FROM t1 WHERE t1 MATCH 'AND AND' +} {1 {fts5: syntax error near "AND"}} + +do_execsql_test 2.3 { SELECT rowid, * FROM t1 WHERE t1 MATCH 'two' } {2 two} +do_execsql_test 2.4 { SELECT rowid, * FROM t1 WHERE t1 MATCH 'three' } {3 three} +do_execsql_test 2.5 { SELECT rowid, * FROM t1 WHERE t1 MATCH 'one' } {1 one} + + finish_test Index: test/fts5ea.test ================================================================== --- test/fts5ea.test +++ test/fts5ea.test @@ -54,16 +54,16 @@ do_execsql_test 2.$tn {SELECT fts5_expr($expr, 'c1', 'c2')} [list $res] } breakpoint foreach {tn expr err} { - 1 {AND} {syntax error near "AND"} - 2 {abc def AND} {syntax error near ""} - 3 {abc OR AND} {syntax error near "AND"} - 4 {(a OR b) abc} {syntax error near "abc"} - 5 {NEaR (a b)} {syntax error near "NEaR"} - 6 {(a OR b) NOT c)} {syntax error near ")"} + 1 {AND} {fts5: syntax error near "AND"} + 2 {abc def AND} {fts5: syntax error near ""} + 3 {abc OR AND} {fts5: syntax error near "AND"} + 4 {(a OR b) abc} {fts5: syntax error near "abc"} + 5 {NEaR (a b)} {fts5: syntax error near "NEaR"} + 6 {(a OR b) NOT c)} {fts5: syntax error near ")"} 7 {nosuch: a nosuch2: b} {no such column: nosuch} 8 {addr: a nosuch2: b} {no such column: nosuch2} } { do_catchsql_test 3.$tn {SELECT fts5_expr($expr, 'name', 'addr')} [list 1 $err] } Index: test/permutations.test ================================================================== --- test/permutations.test +++ test/permutations.test @@ -219,10 +219,16 @@ fts3corrupt2.test fts3first.test fts4langid.test fts4merge.test fts4check.test fts4unicode.test fts4noti.test fts3varint.test fts4growth.test fts4growth2.test } + +test_suite "fts5" -prefix "" -description { + All FTS5 tests. +} -files { + fts5aa.test fts5ab.test fts5ea.test +} test_suite "nofaultsim" -prefix "" -description { "Very" quick test suite. Runs in less than 5 minutes on a workstation. This test suite is the same as the "quick" tests, except that some files that test malloc and IO errors are omitted.