Index: src/expr.c ================================================================== --- src/expr.c +++ src/expr.c @@ -2588,10 +2588,13 @@ } if( pDef->flags & SQLITE4_FUNC_NEEDCOLL ){ if( !pColl ) pColl = db->pDfltColl; sqlite4VdbeAddOp4(v, OP_CollSeq, 0, 0, 0, (char *)pColl, P4_COLLSEQ); } + if( pDef->bMatchinfo ){ + sqlite4VdbeAddOp1(v, OP_Mifunction, pFarg->a[0].pExpr->iTable); + } sqlite4VdbeAddOp4(v, OP_Function, constMask, r1, target, (char*)pDef, P4_FUNCDEF); sqlite4VdbeChangeP5(v, (u8)nFarg); if( nFarg ){ sqlite4ReleaseTempRange(pParse, r1, nFarg); Index: src/fts5.c ================================================================== --- src/fts5.c +++ src/fts5.c @@ -12,10 +12,73 @@ */ #include "sqliteInt.h" #include "vdbeInt.h" +/* +** Stream numbers must be lower than this. +** +** For optimization purposes, it is assumed that a given tokenizer uses +** a set of contiguous stream numbers starting with 0. And that most +** tokens belong to stream 0. +** +** The hard limit is 63 (due to the format of "row size" records). +*/ +#define SQLITE4_FTS5_NSTREAM 32 + +/* +** Records stored within the index: +** +** Row size record: +** There is one "row size" record in the index for each row in the +** indexed table. The "row size" record contains the number of tokens +** in the associated row for each combination of a stream and column +** number (i.e. contains the data required to find the number of +** tokens associated with stream S present in column C of the row for +** all S and C). +** +** The key for the row size record is a single 0x00 byte followed by +** a copy of the PK blob for the table row. +** +** The value is a series of varints. Each column of the table is +** represented by one or more varints packed into the array. +** +** If a column contains only stream 0 tokens, then it is represented +** by a single varint - (nToken << 1), where nToken is the number of +** stream 0 tokens stored in the column. +** +** Or, if the column contains tokens from multiple streams, the first +** varint contains a bitmask indicating which of the streams are present +** (stored as ((bitmask << 1) | 0x01)). Following the bitmask is a +** varint containing the number of tokens for each stream present, in +** ascending order of stream number. +** +** TODO: The format above is not currently implemented! Instead, there +** is a simpler place-holder format (which consumes more space). +** +** Global size record: +** There is a single "global size" record stored in the database. The +** database key for this record is a single byte - 0x00. +** +** The data for this record is a series of varint values. The first +** varint is the total number of rows in the table. The subsequent +** varints make up a "row size" record containing the total number of +** tokens for each S/C combination in all rows of the table. +** +** FTS index records: +** The FTS index records implement the following mapping: +** +** (token, document-pk) -> (list of instances) +** +** The key for each index record is in the same format as the keys for +** regular text indexes. An 0x24 byte, followed by the utf-8 representation +** of the token, followed by 0x00, followed by the PK blob for the table +** row. +** +** TODO: Describe value format. +*/ + /* ** Default distance value for NEAR operators. */ #define FTS5_DEFAULT_NEAR 10 @@ -78,19 +141,21 @@ ** expr := expr OR expr ** expr := LP expr RP */ /* -** Context object used by expression parser. +** Structure types used by this module. */ typedef struct Fts5Expr Fts5Expr; typedef struct Fts5ExprNode Fts5ExprNode; typedef struct Fts5List Fts5List; +typedef struct Fts5MatchIter Fts5MatchIter; typedef struct Fts5Parser Fts5Parser; typedef struct Fts5ParserToken Fts5ParserToken; typedef struct Fts5Phrase Fts5Phrase; typedef struct Fts5Prefix Fts5Prefix; +typedef struct Fts5Size Fts5Size; typedef struct Fts5Str Fts5Str; typedef struct Fts5Token Fts5Token; struct Fts5ParserToken { @@ -170,23 +235,45 @@ const u8 *aPk; /* Primary key of current entry (or null) */ int nPk; /* Size of aPk[] in bytes */ }; struct Fts5Expr { - Fts5ExprNode *pRoot; + Fts5ExprNode *pRoot; /* Root node of expression */ + int nPhrase; /* Number of Fts5Str objects in query */ + Fts5Str **apPhrase; /* All Fts5Str objects */ }; /* ** FTS5 specific cursor data. */ struct Fts5Cursor { sqlite4 *db; Fts5Info *pInfo; Fts5Expr *pExpr; /* MATCH expression for this cursor */ - + char *zExpr; /* Full text of MATCH expression */ KVByteArray *aKey; /* Buffer for primary key */ int nKeyAlloc; /* Bytes allocated at aKey[] */ + + KVCursor *pCsr; /* Cursor used to retrive values */ + Mem *aMem; /* Array of column values */ + int bMemValid; /* True if contents of aMem[] are valid */ + + Fts5Size *pSz; /* Local size data */ + Fts5Size *pGlobal; /* Global size data */ + i64 nGlobal; /* Total number of rows in table */ + int *anRow; + + Fts5MatchIter *pIter; /* Used by mi_match_detail() */ +}; + +/* +** A deserialized 'size record' (see above). +*/ +struct Fts5Size { + int nCol; /* Number of columns in indexed table */ + int nStream; /* Number of streams */ + i64 *aSz; /* Token count for each C/S */ }; /* ** This type is used when reading (decoding) an instance-list. */ @@ -196,13 +283,24 @@ int nList; int iList; /* The current entry */ int iCol; - int iWeight; + int iStream; int iOff; }; + +/* +** An instance of this structure is used by the sqlite4_mi_match_detail() +** API to iterate through matches. +*/ +struct Fts5MatchIter { + int bValid; /* True if aList[] is current row */ + int iCurrent; /* Current index in aList[] (or -1) */ + int iMatch; /* Current iMatch value */ + InstanceList *aList; /* One iterator for each phrase in expr */ +}; /* ** Return true for EOF, or false if the next entry is valid. */ static int fts5InstanceListNext(InstanceList *p){ @@ -215,11 +313,11 @@ if( (iVal & 0x03)==0x01 ){ p->iCol = (iVal>>2); p->iOff = 0; } else if( (iVal & 0x03)==0x03 ){ - p->iWeight = (iVal>>2); + p->iStream = (iVal>>2); } else{ p->iOff += (iVal>>1); bRet = 0; } @@ -237,11 +335,11 @@ } static void fts5InstanceListAppend( InstanceList *p, /* Instance list to append to */ int iCol, /* Column of new entry */ - int iWeight, /* Weight of new entry */ + int iStream, /* Weight of new entry */ int iOff /* Offset of new entry */ ){ assert( iCol>=p->iCol ); assert( iCol>p->iCol || iOff>=p->iOff ); @@ -249,13 +347,13 @@ p->iList += putVarint32(&p->aList[p->iList], (iCol<<2)|0x01); p->iCol = iCol; p->iOff = 0; } - if( iWeight!=p->iWeight ){ - p->iList += putVarint32(&p->aList[p->iList], (iWeight<<2)|0x03); - p->iWeight = iWeight; + if( iStream!=p->iStream ){ + p->iList += putVarint32(&p->aList[p->iList], (iStream<<2)|0x03); + p->iStream = iStream; } p->iList += putVarint32(&p->aList[p->iList], (iOff-p->iOff)<<1); p->iOff = iOff; @@ -444,11 +542,11 @@ /* ** Callback for fts5CountTokens(). */ static int fts5CountTokensCb( void *pCtx, - int iWeight, + int iStream, int iOff, const char *z, int n, int iSrc, int nSrc ){ (*((int *)pCtx))++; @@ -479,11 +577,11 @@ Fts5Str *pStr; }; static int fts5AppendTokensCb( void *pCtx, - int iWeight, + int iStream, int iOff, const char *z, int n, int iSrc, int nSrc ){ struct AppendTokensCtx *p = (struct AppendTokensCtx *)pCtx; @@ -690,10 +788,11 @@ } static void fts5ExpressionFree(sqlite4 *db, Fts5Expr *pExpr){ if( pExpr ){ fts5FreeExprNode(db, pExpr->pRoot); + sqlite4DbFree(db, pExpr->apPhrase); sqlite4DbFree(db, pExpr); } } typedef struct ExprHier ExprHier; @@ -746,10 +845,25 @@ (*paHier)[*pnHier].nOpen = 0; (*pnHier)++; return SQLITE4_OK; } + +static void fts5FindStrings(Fts5ExprNode *p, Fts5Str ***papStr){ + if( p ){ + if( p->eType==TOKEN_PRIMITIVE ){ + int i; + Fts5Str *aStr = p->pPhrase->aStr; + for(i=0; ipPhrase->nStr; i++){ + **papStr = &aStr[i]; + (*papStr)++; + } + } + fts5FindStrings(p->pLeft, papStr); + fts5FindStrings(p->pRight, papStr); + } +} static int fts5ParseExpression( sqlite4 *db, /* Database handle */ Fts5Tokenizer *pTokenizer, /* Tokenizer module */ sqlite4_tokenizer *p, /* Tokenizer instance */ @@ -760,10 +874,11 @@ Fts5Expr **ppExpr, /* OUT: Expression object */ char **pzErr /* OUT: Error message */ ){ int rc = SQLITE4_OK; Fts5Parser sParse; + int nStr = 0; int nExpr; int i; Fts5Expr *pExpr; int nHier = 0; @@ -815,10 +930,11 @@ pNode->eType = TOKEN_PRIMITIVE; pNode->pPhrase = pPhrase; *pp = pNode; } } + nStr += pPhrase->nStr; break; } case TOKEN_AND: case TOKEN_OR: @@ -883,17 +999,28 @@ rc = SQLITE4_ERROR; } for(i=0; rc==SQLITE4_OK && i0 ) rc = SQLITE4_ERROR; } + + if( rc==SQLITE4_OK ){ + pExpr->nPhrase = nStr; + pExpr->apPhrase = (Fts5Str**)sqlite4DbMallocZero(db, sizeof(Fts5Str*)*nStr); + if( pExpr->apPhrase==0 ){ + rc = SQLITE4_NOMEM; + }else{ + Fts5Str **a = pExpr->apPhrase; + fts5FindStrings(pExpr->pRoot, &a); + } + } if( rc!=SQLITE4_OK ){ fts5ExpressionFree(db, pExpr); *pzErr = sParse.zErr; - }else{ - *ppExpr = pExpr; + pExpr = 0; } + *ppExpr = pExpr; sqlite4DbFree(db, aHier); return rc; } /* @@ -1032,10 +1159,11 @@ nByte += sqlite4Strlen30(pItem->pExpr->u.zToken) + 1; } nByte += sizeof(char *) * (j-i); pFts->azTokenizer = (char **)sqlite4DbMallocZero(pParse->db, nByte); if( pFts->azTokenizer==0 ) return; + pFts->nTokenizer = (j-i); pSpace = (char *)&pFts->azTokenizer[j-i]; for(j=i; jnExpr; j++){ ExprListItem *pItem = &pArgs->a[j]; if( pItem->zName && j>i ){ @@ -1092,16 +1220,19 @@ typedef struct TokenizeCtx TokenizeCtx; typedef struct TokenizeTerm TokenizeTerm; struct TokenizeCtx { int rc; int iCol; + int nCol; /* Number of columns in table */ sqlite4 *db; int nMax; + i64 *aSz; /* Number of tokens in each column/stream */ + int nStream; /* Number of streams in document */ Hash hash; }; struct TokenizeTerm { - int iWeight; /* Weight of previous entry */ + int iStream; /* Weight of previous entry */ int iCol; /* Column containing previous entry */ int iOff; /* Token offset of previous entry */ int nToken; /* Size of token in bytes */ int nData; /* Bytes of data in value */ int nAlloc; /* Bytes of data allocated */ @@ -1127,23 +1258,37 @@ return pTerm; } static int fts5TokenizeCb( void *pCtx, - int iWeight, + int iStream, int iOff, const char *zToken, int nToken, int iSrc, int nSrc ){ TokenizeCtx *p = (TokenizeCtx *)pCtx; + sqlite4 *db = p->db; TokenizeTerm *pTerm = 0; TokenizeTerm *pOrig = 0; + /* TODO: Error here if iStream is out of range */ + if( nToken>p->nMax ) p->nMax = nToken; + if( iStream>=p->nStream ){ + int nOld = p->nStream; + int nNew = 4; + while( nNew<=iStream ) nNew = nNew*2; + p->aSz = (i64*)sqlite4DbReallocOrFree(db, p->aSz, nNew*p->nCol*sizeof(i64)); + if( p->aSz==0 ) goto tokenize_cb_out; + memset(&p->aSz[nOld * p->nCol], 0, (nNew-nOld)*p->nCol*sizeof(i64)); + p->nStream = nNew; + } + p->aSz[iStream*p->nCol + p->iCol]++; + pTerm = (TokenizeTerm *)sqlite4HashFind(&p->hash, zToken, nToken); if( pTerm==0 ){ /* Size the initial allocation so that it fits in the lookaside buffer */ int nAlloc = sizeof(TokenizeTerm) + nToken + 32; @@ -1161,14 +1306,14 @@ if( pTerm==0 ) goto tokenize_cb_out; } } pOrig = pTerm; - if( iWeight!=pTerm->iWeight ){ - pTerm = fts5TokenizeAppendInt(p, pTerm, (iWeight << 2) | 0x00000003); + if( iStream!=pTerm->iStream ){ + pTerm = fts5TokenizeAppendInt(p, pTerm, (iStream << 2) | 0x00000003); if( !pTerm ) goto tokenize_cb_out; - pTerm->iWeight = iWeight; + pTerm->iStream = iStream; } if( pTerm && p->iCol!=pTerm->iCol ){ pTerm = fts5TokenizeAppendInt(p, pTerm, (p->iCol << 2) | 0x00000001); if( !pTerm ) goto tokenize_cb_out; @@ -1189,10 +1334,150 @@ return 1; } return 0; } + +static int fts5LoadSizeRecord( + sqlite4 *db, /* Database handle */ + u8 *aKey, int nKey, /* KVStore key */ + int nMinStream, /* Space for at least this many streams */ + Fts5Info *pInfo, /* Info record */ + i64 *pnRow, /* non-NULL when reading global record */ + Fts5Size **ppSz /* OUT: Loaded size record */ +){ + Fts5Size *pSz = 0; /* Size object */ + KVCursor *pCsr = 0; /* Cursor used to read global record */ + int rc; + + rc = sqlite4KVStoreOpenCursor(db->aDb[pInfo->iDb].pKV, &pCsr); + if( rc==SQLITE4_OK ){ + rc = sqlite4KVCursorSeek(pCsr, aKey, nKey, 0); + if( rc==SQLITE4_NOTFOUND ){ + if( pnRow ){ + int nByte = sizeof(Fts5Size) + sizeof(i64) * pInfo->nCol * nMinStream; + pSz = sqlite4DbMallocZero(db, nByte); + if( pSz==0 ){ + rc = SQLITE4_NOMEM; + }else{ + pSz->aSz = (i64 *)&pSz[1]; + pSz->nStream = nMinStream; + pSz->nCol = pInfo->nCol; + *pnRow = 0; + rc = SQLITE4_OK; + } + }else{ + rc = SQLITE4_CORRUPT_BKPT; + } + }else if( rc==SQLITE4_OK ){ + const u8 *aData = 0; + int nData = 0; + rc = sqlite4KVCursorData(pCsr, 0, -1, &aData, &nData); + if( rc==SQLITE4_OK ){ + int iOff = 0; + int nStream = 0; + int nAlloc; + + /* If pnRow is not NULL, then this is the global record. Read the + ** number of documents in the table from the start of the record. */ + if( pnRow ){ + iOff += sqlite4GetVarint(&aData[iOff], (u64 *)pnRow); + } + iOff += getVarint32(&aData[iOff], nStream); + nAlloc = (nStream < nMinStream ? nMinStream : nStream); + + pSz = sqlite4DbMallocZero(db, + sizeof(Fts5Size) + sizeof(i64) * pInfo->nCol * nAlloc + ); + if( pSz==0 ){ + rc = SQLITE4_NOMEM; + }else{ + int iCol = 0; + pSz->aSz = (i64 *)&pSz[1]; + pSz->nCol = pInfo->nCol; + pSz->nStream = nAlloc; + while( iOffaSz[iCol*nAlloc]; + for(i=0; i=0 ){ + iOff += sqlite4PutVarint(&a[iOff], nRow); + } + iOff += sqlite4PutVarint(&a[iOff], pSz->nStream); + + for(iCol=0; iColnCol; iCol++){ + int i; + for(i=0; inStream; i++){ + iOff += sqlite4PutVarint(&a[iOff], pSz->aSz[i*pSz->nCol+iCol]); + } + } + + return sqlite4KVStoreReplace(p, aKey, nKey, a, iOff); +} + +static int fts5CsrLoadGlobal(Fts5Cursor *pCsr){ + int rc = SQLITE4_OK; + if( pCsr->pGlobal==0 ){ + int nKey; + u8 aKey[10]; + nKey = putVarint32(aKey, pCsr->pInfo->iRoot); + aKey[nKey++] = 0x00; + rc = fts5LoadSizeRecord( + pCsr->db, aKey, nKey, 0, pCsr->pInfo, &pCsr->nGlobal, &pCsr->pGlobal + ); + } + return rc; +} + +static int fts5CsrLoadSz(Fts5Cursor *pCsr){ + int rc = SQLITE4_OK; + if( pCsr->pSz==0 ){ + sqlite4 *db = pCsr->db; + Fts5Info *pInfo = pCsr->pInfo; + u8 *aKey; + int nKey = 0; + int nPk = pCsr->pExpr->pRoot->nPk; + + aKey = (u8 *)sqlite4DbMallocZero(db, 10 + nPk); + if( !aKey ) return SQLITE4_NOMEM; + + nKey = putVarint32(aKey, pInfo->iRoot); + aKey[nKey++] = 0x00; + memcpy(&aKey[nKey], pCsr->pExpr->pRoot->aPk, nPk); + nKey += nPk; + + rc = fts5LoadSizeRecord(pCsr->db, aKey, nKey, 0, pInfo, 0, &pCsr->pSz); + sqlite4DbFree(db, aKey); + } + + return rc; +} + /* ** Update an fts index. */ int sqlite4Fts5Update( @@ -1205,23 +1490,25 @@ ){ int i; int rc = SQLITE4_OK; KVStore *pStore; TokenizeCtx sCtx; - u8 *aKey = 0; - int nKey = 0; int nTnum = 0; u32 dummy = 0; + + u8 *aSpace = 0; + int nSpace = 0; const u8 *pPK; int nPK; HashElem *pElem; pStore = db->aDb[pInfo->iDb].pKV; - sCtx.rc = SQLITE4_OK; + + memset(&sCtx, 0, sizeof(sCtx)); sCtx.db = db; - sCtx.nMax = 0; + sCtx.nCol = pInfo->nCol; sqlite4HashInit(db->pEnv, &sCtx.hash, 1); pPK = (const u8 *)sqlite4_value_blob(pKey); nPK = sqlite4_value_bytes(pKey); @@ -1234,26 +1521,41 @@ if( pArg->flags & MEM_Str ){ const char *zText; int nText; zText = (const char *)sqlite4_value_text(pArg); - nText = sqlite4_value_bytes(pArg); sCtx.iCol = i; + nText = sqlite4_value_bytes(pArg); + sCtx.iCol = i; rc = pInfo->pTokenizer->xTokenize( &sCtx, pInfo->p, zText, nText, fts5TokenizeCb ); } } - nKey = sqlite4VarintLen(pInfo->iRoot) + 2 + sCtx.nMax + nPK; - aKey = sqlite4DbMallocRaw(db, nKey); - if( aKey==0 ) rc = SQLITE4_NOMEM; + /* Allocate enough space to serialize all the stuff that needs to + ** be inserted into the database. Specifically: + ** + ** * Space for index record keys, + ** * space for the size record and key for this document, and + ** * space for the updated global size record for the document set. + ** + ** To make it easier, the below allocates enough space to simultaneously + ** store the largest index record key and the largest possible global + ** size record. + */ + nSpace = (sqlite4VarintLen(pInfo->iRoot) + 2 + sCtx.nMax + nPK) + + (9 * (2 + pInfo->nCol * sCtx.nStream)); + aSpace = sqlite4DbMallocRaw(db, nSpace); + if( aSpace==0 ) rc = SQLITE4_NOMEM; for(pElem=sqliteHashFirst(&sCtx.hash); pElem; pElem=sqliteHashNext(pElem)){ TokenizeTerm *pTerm = (TokenizeTerm *)sqliteHashData(pElem); if( rc==SQLITE4_OK ){ int nToken = sqliteHashKeysize(pElem); char *zToken = (char *)sqliteHashKey(pElem); + u8 *aKey = aSpace; + int nKey; nKey = putVarint32(aKey, pInfo->iRoot); aKey[nKey++] = 0x24; memcpy(&aKey[nKey], zToken, nToken); nKey += nToken; @@ -1271,12 +1573,63 @@ rc = sqlite4KVStoreReplace(pStore, aKey, nKey, aData, pTerm->nData); } } sqlite4DbFree(db, pTerm); } + + /* Write the size record into the db */ + if( rc==SQLITE4_OK ){ + u8 *aKey = aSpace; + int nKey; + + nKey = putVarint32(aKey, pInfo->iRoot); + aKey[nKey++] = 0x00; + memcpy(&aKey[nKey], pPK, nPK); + nKey += nPK; + + if( bDel==0 ){ + Fts5Size sSz; + sSz.nCol = pInfo->nCol; + sSz.nStream = sCtx.nStream; + sSz.aSz = sCtx.aSz; + rc = fts5StoreSizeRecord(pStore, aKey, nKey, &sSz, -1, &aKey[nKey]); + }else{ + rc = sqlite4KVStoreReplace(pStore, aKey, nKey, 0, -1); + } + } + + /* Update the global record */ + if( rc==SQLITE4_OK ){ + Fts5Size *pSz; /* Deserialized global size record */ + i64 nRow; /* Number of rows in indexed table */ + u8 *aKey = aSpace; /* Space to format the global record key */ + int nKey; /* Size of global record key in bytes */ + + nKey = putVarint32(aKey, pInfo->iRoot); + aKey[nKey++] = 0x00; + rc = fts5LoadSizeRecord(db, aKey, nKey, sCtx.nStream, pInfo, &nRow, &pSz); + assert( rc!=SQLITE4_OK || pSz->nStream>=sCtx.nStream ); + + if( rc==SQLITE4_OK ){ + int iCol; + for(iCol=0; iColnCol; iCol++){ + int iStr; + i64 *aIn = &sCtx.aSz[iCol * sCtx.nStream]; + i64 *aOut = &pSz->aSz[iCol * pSz->nStream]; + for(iStr=0; iStriDb = sqlite4SchemaToIndex(db, pIdx->pSchema); pInfo->iRoot = pIdx->tnum; + pInfo->iTbl = sqlite4FindPrimaryKey(pIdx->pTable, 0)->tnum; pInfo->nCol = pIdx->pTable->nCol; fts5TokenizerCreate(pParse, pIdx->pFts, &pInfo->pTokenizer, &pInfo->p); if( pInfo->p==0 ){ assert( pParse->nErr ); @@ -1399,11 +1753,11 @@ ** * the term offset. */ static i64 fts5TermInstanceCksum( const u8 *aTerm, int nTerm, const u8 *aPk, int nPk, - int iWeight, + int iStream, int iCol, int iOff ){ int i; i64 cksum = 0; @@ -1417,11 +1771,11 @@ for(i=0; ipPK, p->nPK, - (const u8 *)zToken, nToken, iWeight, p->iCol, iOff + (const u8 *)zToken, nToken, iStream, p->iCol, iOff ); p->cksum = (p->cksum ^ cksum); return 0; } @@ -1660,11 +2015,11 @@ pAdv = &in1; }else{ pAdv = &in2; } - fts5InstanceListAppend(&out, pAdv->iCol, pAdv->iWeight, pAdv->iOff); + fts5InstanceListAppend(&out, pAdv->iCol, pAdv->iStream, pAdv->iOff); fts5InstanceListNext(pAdv); } if( bFree ){ sqlite4DbFree(db, p1->aData); @@ -1900,14 +2255,26 @@ */ static int fts5OpenCursors(sqlite4 *db, Fts5Info *pInfo, Fts5Cursor *pCsr){ return fts5OpenExprCursors(db, pInfo, pCsr->pExpr->pRoot); } -void sqlite4Fts5Close(sqlite4 *db, Fts5Cursor *pCsr){ +void sqlite4Fts5Close(Fts5Cursor *pCsr){ if( pCsr ){ + sqlite4 *db = pCsr->db; + if( pCsr->aMem ){ + int i; + for(i=0; ipInfo->nCol; i++){ + sqlite4DbFree(db, pCsr->aMem[i].zMalloc); + } + sqlite4DbFree(db, pCsr->aMem); + } + + sqlite4KVCursorClose(pCsr->pCsr); fts5ExpressionFree(db, pCsr->pExpr); + sqlite4DbFree(db, pCsr->pIter); sqlite4DbFree(db, pCsr->aKey); + sqlite4DbFree(db, pCsr->anRow); sqlite4DbFree(db, pCsr); } } static int fts5TokenAdvanceToMatch( @@ -1927,12 +2294,11 @@ } return (p->iCol==pFirst->iCol && p->iOff==iReq); } -static int fts5StringFindInstances(Fts5Cursor *pCsr, int iCol, Fts5Str *pStr){ - sqlite4 *db = pCsr->db; +static int fts5StringFindInstances(sqlite4 *db, int iCol, Fts5Str *pStr){ int i; int rc = SQLITE4_OK; int bEof = 0; int nByte = sizeof(InstanceList) * pStr->nToken; InstanceList *aIn; @@ -1971,11 +2337,11 @@ int bMatch = fts5TokenAdvanceToMatch(&aIn[i], &aIn[0], i, &bEof); if( bMatch==0 || bEof ) break; } if( i==pStr->nToken && (iCol<0 || aIn[0].iCol==iCol) ){ /* Record a match here */ - fts5InstanceListAppend(&out, aIn[0].iCol, aIn[0].iWeight, aIn[0].iOff); + fts5InstanceListAppend(&out, aIn[0].iCol, aIn[0].iStream, aIn[0].iOff); } bEof = fts5InstanceListNext(&aIn[0]); } pStr->nList = out.iList; @@ -1990,11 +2356,10 @@ } return 0; } static int fts5StringNearTrim( - Fts5Cursor *pCsr, /* Cursor object that owns both strings */ Fts5Str *pTrim, /* Trim this instance list */ Fts5Str *pNext, /* According to this one */ int nNear ){ if( pNext->nList==0 ){ @@ -2018,19 +2383,19 @@ if( fts5IsNear(&near, &in, nTrail) || fts5IsNear(&in, &near, nLead) ){ /* The current position is a match. Append an entry to the output ** and advance the input cursor. */ - fts5InstanceListAppend(&out, in.iCol, in.iWeight, in.iOff); + fts5InstanceListAppend(&out, in.iCol, in.iStream, in.iOff); bEof = fts5InstanceListNext(&in); }else{ if( near.iColnStr; i++){ Fts5Str *pStr = &pPhrase->aStr[i]; - rc = fts5StringFindInstances(pCsr, pPhrase->iCol, pStr); + rc = fts5StringFindInstances(db, pPhrase->iCol, pStr); } /* Trim the instance lists according to any NEAR constraints. */ for(i=1; rc==SQLITE4_OK && inStr; i++){ int n = pPhrase->aiNear[i-1]; - rc = fts5StringNearTrim(pCsr, &pPhrase->aStr[i], &pPhrase->aStr[i-1], n); + rc = fts5StringNearTrim(&pPhrase->aStr[i], &pPhrase->aStr[i-1], n); } for(i=pPhrase->nStr-1; rc==SQLITE4_OK && i>0; i--){ int n = pPhrase->aiNear[i-1]; - rc = fts5StringNearTrim(pCsr, &pPhrase->aStr[i-1], &pPhrase->aStr[i], n); + rc = fts5StringNearTrim(&pPhrase->aStr[i-1], &pPhrase->aStr[i], n); } *pbMatch = (pPhrase->aStr[0].nList>0); return rc; } -static int fts5PhraseAdvanceToMatch(Fts5Cursor *pCsr, Fts5Phrase *pPhrase){ +static int fts5PhraseAdvanceToMatch(sqlite4 *db, Fts5Phrase *pPhrase){ int rc; do { int bMatch; Fts5Token *pAdvance = 0; - rc = fts5PhraseIsMatch(pCsr, pPhrase, &bMatch, &pAdvance); + rc = fts5PhraseIsMatch(db, pPhrase, &bMatch, &pAdvance); if( rc!=SQLITE4_OK || bMatch ) break; - rc = fts5TokenAdvance(pCsr->db, pAdvance); + rc = fts5TokenAdvance(db, pAdvance); }while( rc==SQLITE4_OK ); return rc; } -static int fts5ExprAdvance(Fts5Cursor *pCsr, Fts5ExprNode *p, int bFirst){ +static int fts5ExprAdvance(sqlite4 *db, Fts5ExprNode *p, int bFirst){ int rc = SQLITE4_OK; switch( p->eType ){ case TOKEN_PRIMITIVE: { Fts5Phrase *pPhrase = p->pPhrase; if( bFirst==0 ){ - rc = fts5TokenAdvance(pCsr->db, &pPhrase->aStr[0].aToken[0]); + rc = fts5TokenAdvance(db, &pPhrase->aStr[0].aToken[0]); } - if( rc==SQLITE4_OK ) rc = fts5PhraseAdvanceToMatch(pCsr, pPhrase); + if( rc==SQLITE4_OK ) rc = fts5PhraseAdvanceToMatch(db, pPhrase); if( rc==SQLITE4_OK ){ rc = fts5TokenPk(&pPhrase->aStr[0].aToken[0], &p->aPk, &p->nPk); }else{ p->aPk = 0; p->nPk = 0; @@ -2141,20 +2506,20 @@ } case TOKEN_AND: p->aPk = 0; p->nPk = 0; - rc = fts5ExprAdvance(pCsr, p->pLeft, bFirst); - if( rc==SQLITE4_OK ) rc = fts5ExprAdvance(pCsr, p->pRight, bFirst); + rc = fts5ExprAdvance(db, p->pLeft, bFirst); + if( rc==SQLITE4_OK ) rc = fts5ExprAdvance(db, p->pRight, bFirst); while( rc==SQLITE4_OK && p->pLeft->aPk && p->pRight->aPk ){ int res = fts5KeyCompare( p->pLeft->aPk, p->pLeft->nPk, p->pRight->aPk, p->pRight->nPk ); if( res<0 ){ - rc = fts5ExprAdvance(pCsr, p->pLeft, 0); + rc = fts5ExprAdvance(db, p->pLeft, 0); }else if( res>0 ){ - rc = fts5ExprAdvance(pCsr, p->pRight, 0); + rc = fts5ExprAdvance(db, p->pRight, 0); }else{ p->aPk = p->pLeft->aPk; p->nPk = p->pLeft->nPk; break; } @@ -2167,13 +2532,13 @@ res = fts5KeyCompare( p->pLeft->aPk, p->pLeft->nPk, p->pRight->aPk, p->pRight->nPk ); } - if( res<=0 ) rc = fts5ExprAdvance(pCsr, p->pLeft, bFirst); + if( res<=0 ) rc = fts5ExprAdvance(db, p->pLeft, bFirst); if( rc==SQLITE4_OK && res>=0 ){ - rc = fts5ExprAdvance(pCsr, p->pRight, bFirst); + rc = fts5ExprAdvance(db, p->pRight, bFirst); } res = fts5KeyCompare( p->pLeft->aPk, p->pLeft->nPk, p->pRight->aPk, p->pRight->nPk ); @@ -2192,25 +2557,25 @@ default: assert( p->eType==TOKEN_NOT ); p->aPk = 0; p->nPk = 0; - rc = fts5ExprAdvance(pCsr, p->pLeft, bFirst); + rc = fts5ExprAdvance(db, p->pLeft, bFirst); if( bFirst && rc==SQLITE4_OK ){ - rc = fts5ExprAdvance(pCsr, p->pRight, bFirst); + rc = fts5ExprAdvance(db, p->pRight, bFirst); } while( rc==SQLITE4_OK && p->pLeft->aPk && p->pRight->aPk ){ int res = fts5KeyCompare( p->pLeft->aPk, p->pLeft->nPk, p->pRight->aPk, p->pRight->nPk ); if( res<0 ){ break; }else if( res>0 ){ - rc = fts5ExprAdvance(pCsr, p->pRight, 0); + rc = fts5ExprAdvance(db, p->pRight, 0); }else{ - rc = fts5ExprAdvance(pCsr, p->pLeft, 0); + rc = fts5ExprAdvance(db, p->pLeft, 0); } } p->aPk = p->pLeft->aPk; p->nPk = p->pLeft->nPk; @@ -2220,11 +2585,14 @@ assert( rc!=SQLITE4_NOTFOUND ); return rc; } int sqlite4Fts5Next(Fts5Cursor *pCsr){ - return fts5ExprAdvance(pCsr, pCsr->pExpr->pRoot, 0); + sqlite4DbFree(pCsr->db, pCsr->pSz); + pCsr->pSz = 0; + pCsr->bMemValid = 0; + return fts5ExprAdvance(pCsr->db, pCsr->pExpr->pRoot, 0); } int sqlite4Fts5Open( sqlite4 *db, /* Database handle */ Fts5Info *pInfo, /* Index description */ @@ -2233,15 +2601,19 @@ Fts5Cursor **ppCsr, /* OUT: New FTS cursor object */ char **pzErr /* OUT: Error message */ ){ int rc = SQLITE4_OK; Fts5Cursor *pCsr; + int nMatch = sqlite4Strlen30(zMatch); - pCsr = sqlite4DbMallocZero(db, sizeof(Fts5Cursor)); + pCsr = sqlite4DbMallocZero(db, sizeof(Fts5Cursor) + nMatch + 1); + if( !pCsr ){ rc = SQLITE4_NOMEM; }else{ + pCsr->zExpr = (char *)&pCsr[1]; + memcpy(pCsr->zExpr, zMatch, nMatch); pCsr->pInfo = pInfo; pCsr->db = db; rc = fts5ParseExpression(db, pInfo->pTokenizer, pInfo->p, pInfo->iRoot, pInfo->azCol, pInfo->nCol, zMatch, &pCsr->pExpr, pzErr ); @@ -2251,14 +2623,14 @@ /* Open a KV cursor for each term in the expression. Set each cursor ** to point to the first entry in the range it will scan. */ rc = fts5OpenCursors(db, pInfo, pCsr); } if( rc!=SQLITE4_OK ){ - sqlite4Fts5Close(db, pCsr); + sqlite4Fts5Close(pCsr); pCsr = 0; }else{ - rc = fts5ExprAdvance(pCsr, pCsr->pExpr->pRoot, 1); + rc = fts5ExprAdvance(db, pCsr->pExpr->pRoot, 1); } *ppCsr = pCsr; return rc; } @@ -2270,12 +2642,12 @@ return( pCsr->pExpr->pRoot->aPk!=0 ); } int sqlite4Fts5Pk( Fts5Cursor *pCsr, - int iTbl, - KVByteArray **paKey, + int iTbl, + KVByteArray **paKey, KVSize *pnKey ){ int i; int nReq; const u8 *aPk; @@ -2296,10 +2668,520 @@ *paKey = pCsr->aKey; *pnKey = nReq; return SQLITE4_OK; } + +int sqlite4_mi_column_count(sqlite4_context *pCtx, int *pn){ + int rc = SQLITE4_OK; + if( pCtx->pFts ){ + *pn = pCtx->pFts->pInfo->nCol; + }else{ + rc = SQLITE4_MISUSE; + } + return rc; +} + +int sqlite4_mi_phrase_count(sqlite4_context *pCtx, int *pn){ + int rc = SQLITE4_OK; + if( pCtx->pFts ){ + *pn = pCtx->pFts->pExpr->nPhrase; + }else{ + rc = SQLITE4_MISUSE; + } + return rc; +} + +int sqlite4_mi_phrase_token_count(sqlite4_context *pCtx, int iP, int *pn){ + int rc = SQLITE4_OK; + if( pCtx->pFts ){ + Fts5Expr *pExpr = pCtx->pFts->pExpr; + if( iP>pExpr->nPhrase || iP<0 ){ + *pn = 0; + }else{ + *pn = pExpr->apPhrase[iP]->nToken; + } + }else{ + rc = SQLITE4_MISUSE; + } + return rc; +} + +int sqlite4_mi_stream_count(sqlite4_context *pCtx, int *pn){ + int rc = SQLITE4_OK; + Fts5Cursor *pCsr = pCtx->pFts; + if( pCsr ){ + rc = fts5CsrLoadGlobal(pCtx->pFts); + if( rc==SQLITE4_OK ) *pn = pCsr->pGlobal->nStream; + }else{ + rc = SQLITE4_MISUSE; + } + return rc; +} + +static int fts5GetSize(Fts5Size *pSz, int iC, int iS){ + int nToken = 0; + int i; + + if( iC<0 && iS<0 ){ + int nFin = pSz->nCol * pSz->nStream; + for(i=0; iaSz[i]; + }else if( iC<0 ){ + for(i=0; inCol; i++) nToken += pSz->aSz[i*pSz->nStream + iS]; + }else if( iS<0 ){ + for(i=0; inStream; i++) nToken += pSz->aSz[pSz->nStream*iC + i]; + }else if( iCnCol && iSnStream ){ + nToken = pSz->aSz[iC * pSz->nStream + iS]; + } + + return nToken; +} + +int sqlite4_mi_size(sqlite4_context *pCtx, int iC, int iS, int *pn){ + int rc = SQLITE4_OK; + Fts5Cursor *pCsr = pCtx->pFts; + + if( pCsr==0 ){ + rc = SQLITE4_MISUSE; + }else{ + rc = fts5CsrLoadSz(pCsr); + if( rc==SQLITE4_OK ){ + *pn = fts5GetSize(pCsr->pSz, iC, iS); + } + } + return rc; +} + +int sqlite4_mi_total_size(sqlite4_context *pCtx, int iC, int iS, int *pn){ + int rc = SQLITE4_OK; + Fts5Cursor *pCsr = pCtx->pFts; + + if( pCsr==0 ){ + rc = SQLITE4_MISUSE; + }else{ + rc = fts5CsrLoadGlobal(pCsr); + if( rc==SQLITE4_OK ){ + *pn = fts5GetSize(pCsr->pGlobal, iC, iS); + } + } + return rc; +} + +int sqlite4_mi_total_rows(sqlite4_context *pCtx, int *pn){ + int rc = SQLITE4_OK; + Fts5Cursor *pCsr = pCtx->pFts; + if( pCsr==0 ){ + rc = SQLITE4_MISUSE; + }else{ + rc = fts5CsrLoadGlobal(pCsr); + if( rc==SQLITE4_OK ) *pn = pCsr->nGlobal; + } + return rc; +} + +int sqlite4_mi_column_value( + sqlite4_context *pCtx, + int iCol, + sqlite4_value **ppVal +){ + int rc = SQLITE4_OK; + Fts5Cursor *pCsr = pCtx->pFts; + if( pCsr==0 ){ + rc = SQLITE4_MISUSE; + }else{ + if( pCsr->bMemValid==0 ){ + sqlite4 *db = pCsr->db; + + Fts5Info *pInfo = pCsr->pInfo; + if( pCsr->aMem==0 ){ + int nByte = sizeof(Mem) * pInfo->nCol; + pCsr->aMem = (Mem *)sqlite4DbMallocZero(db, nByte); + if( pCsr->aMem==0 ){ + rc = SQLITE4_NOMEM; + }else{ + int i; + for(i=0; inCol; i++){ + pCsr->aMem[i].db = db; + } + } + } + + if( pCsr->pCsr==0 && rc==SQLITE4_OK ){ + KVStore *pStore = db->aDb[pInfo->iDb].pKV; + rc = sqlite4KVStoreOpenCursor(pStore, &pCsr->pCsr); + } + + if( rc==SQLITE4_OK ){ + u8 *aKey = 0; int nKey; /* Primary key for current row */ + const u8 *aData; int nData; /* Data record for current row */ + + rc = sqlite4Fts5Pk(pCsr, pInfo->iTbl, &aKey, &nKey); + if( rc==SQLITE4_OK ){ + rc = sqlite4KVCursorSeek(pCsr->pCsr, aKey, nKey, 0); + if( rc==SQLITE4_NOTFOUND ){ + rc = SQLITE4_CORRUPT_BKPT; + } + } + + if( rc==SQLITE4_OK ){ + rc = sqlite4KVCursorData(pCsr->pCsr, 0, -1, &aData, &nData); + } + + if( rc==SQLITE4_OK ){ + int i; + ValueDecoder *pCodec; /* The decoder object */ + + rc = sqlite4VdbeCreateDecoder(db, aData, nData, pInfo->nCol, &pCodec); + for(i=0; rc==SQLITE4_OK && inCol; i++){ + rc = sqlite4VdbeDecodeValue(pCodec, i, 0, &pCsr->aMem[i]); + } + sqlite4VdbeDestroyDecoder(pCodec); + } + + if( rc==SQLITE4_OK ) pCsr->bMemValid = 1; + } + } + + if( rc==SQLITE4_OK ){ + assert( pCsr->bMemValid ); + *ppVal = &pCsr->aMem[iCol]; + } + } + + return rc; +} + +int sqlite4_mi_tokenize( + sqlite4_context *pCtx, + const char *zText, + int nText, + void *p, + int(*x)(void *, int, int, const char *, int, int, int) +){ + int rc = SQLITE4_OK; + Fts5Cursor *pCsr = pCtx->pFts; + + if( pCsr==0 ){ + rc = SQLITE4_MISUSE; + }else{ + Fts5Info *pInfo = pCsr->pInfo; + rc = pInfo->pTokenizer->xTokenize(p, pInfo->p, zText, nText, x); + } + return rc; +} + +static Fts5Str *fts5FindStr(Fts5ExprNode *p, int *piStr){ + Fts5Str *pRet = 0; + if( p->eType==TOKEN_PRIMITIVE ){ + int iStr = *piStr; + if( iStrpPhrase->nStr ){ + pRet = &p->pPhrase->aStr[iStr]; + }else{ + *piStr = iStr - p->pPhrase->nStr; + } + }else{ + pRet = fts5FindStr(p->pLeft, piStr); + if( pRet==0 ) pRet = fts5FindStr(p->pRight, piStr); + } + return pRet; +} + +int sqlite4_mi_match_count( + sqlite4_context *pCtx, + int iC, + int iS, + int iPhrase, + int *pnMatch +){ + int rc = SQLITE4_OK; + Fts5Cursor *pCsr = pCtx->pFts; + if( pCsr ){ + int nMatch = 0; + Fts5Str *pStr; + int iCopy = iPhrase; + InstanceList sList; + + pStr = fts5FindStr(pCsr->pExpr->pRoot, &iCopy); + assert( pStr ); + + fts5InstanceListInit(pStr->aList, pStr->nList, &sList); + while( 0==fts5InstanceListNext(&sList) ){ + if( (iC<0 || sList.iCol==iC) && (iS<0 || sList.iStream==iS) ) nMatch++; + } + *pnMatch = nMatch; + }else{ + rc = SQLITE4_MISUSE; + } + return rc; +} + +int sqlite4_mi_match_offset( + sqlite4_context *pCtx, + int iCol, + int iPhrase, + int iMatch, + int *piOff +){ + return SQLITE4_OK; +} + +int sqlite4_mi_total_match_count( + sqlite4_context *pCtx, + int iCol, + int iPhrase, + int *pnMatch, + int *pnDoc, + int *pnRelevant +){ + return SQLITE4_OK; +} + +static void fts5StrLoadRowcounts(Fts5Str *pStr, int nStream, int *anRow){ + u32 mask = 0; + int iPrevCol = 0; + InstanceList sList; + + fts5InstanceListInit(pStr->aList, pStr->nList, &sList); + while( 0==fts5InstanceListNext(&sList) ){ + if( sList.iCol!=iPrevCol ) mask = 0; + if( (mask & (1<eType==TOKEN_PRIMITIVE ){ + int *anRow = *panRow; + Fts5Phrase *pPhrase = pNode->pPhrase; + + rc = fts5ExprAdvance(db, pNode, 1); + while( rc==SQLITE4_OK ){ + int nIncr = pInfo->nCol * nStream; /* Values for each Fts5Str */ + int i; + for(i=0; inStr; i++){ + fts5StrLoadRowcounts(&pPhrase->aStr[i], nStream, &anRow[i*nIncr]); + } + rc = fts5ExprAdvance(db, pNode, 0); + } + + *panRow = &anRow[pInfo->nCol * nStream * pPhrase->nStr]; + } + + if( rc==SQLITE4_OK ){ + rc = fts5ExprLoadRowcounts(db, pInfo, nStream, pNode->pLeft, panRow); + } + if( rc==SQLITE4_OK ){ + rc = fts5ExprLoadRowcounts(db, pInfo, nStream, pNode->pRight, panRow); + } + } + + return rc; +} + +static int fts5CsrLoadRowcounts(Fts5Cursor *pCsr){ + int rc = SQLITE4_OK; + + if( pCsr->anRow==0 ){ + int nStream = pCsr->pGlobal->nStream; + sqlite4 *db = pCsr->db; + Fts5Expr *pCopy; + Fts5Expr *pExpr = pCsr->pExpr; + Fts5Info *pInfo = pCsr->pInfo; + int *anRow; + + pCsr->anRow = anRow = (int *)sqlite4DbMallocZero(db, + pExpr->nPhrase * pInfo->nCol * pCsr->pGlobal->nStream * sizeof(int) + ); + if( !anRow ) return SQLITE4_NOMEM; + + rc = fts5ParseExpression(db, pInfo->pTokenizer, pInfo->p, + pInfo->iRoot, pInfo->azCol, pInfo->nCol, pCsr->zExpr, &pCopy, 0 + ); + if( rc==SQLITE4_OK ){ + rc = fts5OpenExprCursors(db, pInfo, pExpr->pRoot); + } + if( rc==SQLITE4_OK ){ + rc = fts5ExprLoadRowcounts(db, pInfo, nStream, pCopy->pRoot, &anRow); + } + + fts5ExpressionFree(db, pCopy); + } + + return rc; +} + +int sqlite4_mi_row_count( + sqlite4_context *pCtx, /* Context object passed to mi function */ + int iC, /* Specific column (or -ve for all columns) */ + int iS, /* Specific stream (or -ve for all streams) */ + int iP, /* Specific phrase */ + int *pn /* Total number of rows containing C/S/P */ +){ + int rc = SQLITE4_OK; + Fts5Cursor *pCsr = pCtx->pFts; + if( pCsr==0 ){ + rc = SQLITE4_MISUSE; + }else{ + rc = fts5CsrLoadGlobal(pCsr); + if( rc==SQLITE4_OK ) rc = fts5CsrLoadRowcounts(pCsr); + + if( rc==SQLITE4_OK ){ + int i; + int nRow = 0; + int nStream = pCsr->pGlobal->nStream; + int nCol = pCsr->pInfo->nCol; + int *aRow = &pCsr->anRow[iP * nStream * nCol]; + + if( iC<0 && iS<0 ){ + int nFin = nCol * nStream; + for(i=0; iaList[i]; + if( fts5InstanceListEof(p)==0 ){ + if( (pBest==0) + || (p->iColiCol) + || (p->iCol==pBest->iCol && p->iOffiOff) + ){ + pBest = p; + } + } + } + + if( pBest==0 ){ + pIter->iCurrent = -1; + }else{ + pIter->iCurrent = pBest - pIter->aList; + } +} + +static void fts5InitExprIterator( + const u8 *aPk, + int nPk, + Fts5ExprNode *p, + Fts5MatchIter *pIter +){ + if( p ){ + if( p->eType==TOKEN_PRIMITIVE ){ + if( p->nPk==nPk && 0==memcmp(aPk, p->aPk, nPk) ){ + int i; + for(i=0; ipPhrase->nStr; i++){ + Fts5Str *pStr = &p->pPhrase->aStr[i]; + InstanceList *pList = &pIter->aList[pIter->iCurrent++]; + fts5InstanceListInit(pStr->aList, pStr->nList, pList); + fts5InstanceListNext(pList); + } + }else{ + memset(&pIter->aList[pIter->iCurrent], 0, sizeof(InstanceList)); + pIter->iCurrent += p->pPhrase->nStr; + } + } + fts5InitExprIterator(aPk, nPk, p->pLeft, pIter); + fts5InitExprIterator(aPk, nPk, p->pRight, pIter); + } +} + +static void fts5InitIterator(Fts5Cursor *pCsr){ + Fts5MatchIter *pIter = pCsr->pIter; + Fts5ExprNode *pRoot = pCsr->pExpr->pRoot; + + pIter->iCurrent = 0; + fts5InitExprIterator(pRoot->aPk, pRoot->nPk, pRoot, pIter); + pIter->iMatch = 0; + pIter->bValid = 1; + fts5IterSetCurrent(pIter, pCsr->pExpr->nPhrase); +} + +int sqlite4_mi_match_detail( + sqlite4_context *pCtx, /* Context object passed to mi function */ + int iMatch, /* Index of match */ + int *piOff, /* OUT: Token offset of match */ + int *piC, /* OUT: Column number of match iMatch */ + int *piS, /* OUT: Stream number of match iMatch */ + int *piP /* OUT: Phrase number of match iMatch */ +){ + int rc = SQLITE4_OK; + Fts5Cursor *pCsr = pCtx->pFts; + if( pCsr==0 ){ + rc = SQLITE4_MISUSE; + }else{ + int nPhrase = pCsr->pExpr->nPhrase; + Fts5MatchIter *pIter = pCsr->pIter; + if( pIter==0 ){ + pCsr->pIter = pIter = (Fts5MatchIter *)sqlite4DbMallocZero( + pCsr->db, sizeof(Fts5MatchIter) + sizeof(InstanceList)*nPhrase + ); + if( pIter ){ + pIter->aList = (InstanceList *)&pIter[1]; + }else{ + rc = SQLITE4_NOMEM; + } + } + + if( rc==SQLITE4_OK && (pIter->bValid==0 || iMatchiMatch) ){ + fts5InitIterator(pCsr); +#if 0 + int i; + for(i=0; ipExpr->nPhrase; i++){ + Fts5Str *pStr = pCsr->pExpr->apPhrase[i]; + fts5InstanceListInit(pStr->aList, pStr->nList, &pIter->aList[i]); + fts5InstanceListNext(&pIter->aList[i]); + } + pIter->iMatch = 0; + fts5IterSetCurrent(pIter, pCsr->pExpr->nPhrase); +#endif + } + + if( rc==SQLITE4_OK ){ + assert( pIter->iMatch<=iMatch ); + while( pIter->iCurrent>=0 && pIter->iMatchaList[pIter->iCurrent]); + fts5IterSetCurrent(pIter, pCsr->pExpr->nPhrase); + pIter->iMatch++; + } + if( pIter->iCurrent<0 ){ + rc = SQLITE4_NOTFOUND; + }else{ + InstanceList *p = &pIter->aList[pIter->iCurrent]; + *piOff = p->iOff; + *piC = p->iCol; + *piS = p->iStream; + *piP = pIter->iCurrent; + } + } + } + return rc; +} /************************************************************************** *************************************************************************** ** Below this point is test code. */ Index: src/fts5func.c ================================================================== --- src/fts5func.c +++ src/fts5func.c @@ -8,12 +8,26 @@ ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* */ + +/* +** The BM25 and BM25F implementations in this file are based on information +** found in: +** +** Stephen Robertson and Hugo Zaragoza: "The Probablistic Relevance +** Framework: BM25 and Beyond", 2009. +*/ #include "sqliteInt.h" +#include /* temporary: For log() */ + +static char fts5Tolower(char c){ + if( c>='A' && c<='Z' ) c = c + ('a' - 'A'); + return c; +} static int fts5SimpleCreate( void *pCtx, const char **azArg, int nArg, @@ -25,18 +39,438 @@ static int fts5SimpleDestroy(sqlite4_tokenizer *p){ return SQLITE4_OK; } -static char fts5Tolower(char c){ - if( c>='A' && c<='Z' ) c = c + ('a' - 'A'); - return c; +typedef struct Fts5RankCtx Fts5RankCtx; +struct Fts5RankCtx { + sqlite4 *db; + double avgdl; /* Average document size in tokens */ + int nPhrase; /* Number of phrases in query */ + double *aIdf; /* IDF weights for each phrase in query */ +}; + +static void fts5RankFreeCtx(void *pCtx){ + if( pCtx ){ + Fts5RankCtx *p = (Fts5RankCtx *)pCtx; + sqlite4DbFree(p->db, p); + } +} + +/* +** A BM25 based ranking function for fts5. +** +** This is based on the information in the Robertson/Zaragoza paper +** referenced above. As there is no way to provide relevance feedback +** IDF weights (equation 3.3 in R/Z) are used instead of RSJ for each phrase. +** The rest of the implementation is as presented in equation 3.15. +** +** R and Z observe that the experimental evidence suggests that reasonable +** values for free parameters "b" and "k1" are often in the ranges +** (0.5 < b < 0.8) and (1.2 < k1 < 2), although the optimal values depend +** on the nature of both the documents and queries. The implementation +** below sets each parameter to the midpoint of the suggested range. +*/ +static void fts5Rank(sqlite4_context *pCtx, int nArg, sqlite4_value **apArg){ + const double b = 0.65; + const double k1 = 1.6; + + int rc = SQLITE4_OK; /* Error code */ + Fts5RankCtx *p; /* Structure to store reusable values */ + int i; /* Used to iterate through phrases */ + double rank = 0.0; /* UDF return value */ + + p = sqlite4_get_auxdata(pCtx, 0); + if( p==0 ){ + sqlite4 *db = sqlite4_context_db_handle(pCtx); + int nPhrase; /* Number of phrases in query expression */ + int nByte; /* Number of bytes of data to allocate */ + + sqlite4_mi_phrase_count(pCtx, &nPhrase); + nByte = sizeof(Fts5RankCtx) + nPhrase * sizeof(double); + p = (Fts5RankCtx *)sqlite4DbMallocZero(db, nByte); + sqlite4_set_auxdata(pCtx, 0, (void *)p, fts5RankFreeCtx); + p = sqlite4_get_auxdata(pCtx, 0); + + if( !p ){ + rc = SQLITE4_NOMEM; + }else{ + int N; /* Total number of docs in collection */ + int ni; /* Number of docs with phrase i */ + + p->db = db; + p->nPhrase = nPhrase; + p->aIdf = (double *)&p[1]; + + /* Determine the IDF weight for each phrase in the query. */ + rc = sqlite4_mi_total_rows(pCtx, &N); + for(i=0; rc==SQLITE4_OK && iaIdf[i] = log((0.5 + N - ni) / (0.5 + ni)); + } + } + + /* Determine the average document length */ + if( rc==SQLITE4_OK ){ + int nTotal; + rc = sqlite4_mi_total_size(pCtx, -1, -1, &nTotal); + if( rc==SQLITE4_OK ){ + p->avgdl = (double)nTotal / (double)N; + } + } + } + } + + for(i=0; rc==SQLITE4_OK && inPhrase; i++){ + int tf; /* Occurences of phrase i in row (term freq.) */ + int dl; /* Tokens in this row (document length) */ + double L; /* Normalized document length */ + double prank; /* Contribution to rank of this phrase */ + + /* Set variable tf to the total number of occurrences of phrase iPhrase + ** in this row (within any column). And dl to the number of tokens in + ** the current row (again, in any column). */ + rc = sqlite4_mi_match_count(pCtx, -1, -1, i, &tf); + if( rc==SQLITE4_OK ) rc = sqlite4_mi_size(pCtx, -1, -1, &dl); + + /* Calculate the normalized document length */ + L = (double)dl / p->avgdl; + + /* Calculate the contribution to the rank made by this phrase. Then + ** add it to variable rank. */ + prank = (p->aIdf[i] * tf) / (k1 * ( (1.0 - b) + b * L) + tf); + rank += prank; + } + + if( rc==SQLITE4_OK ){ + sqlite4_result_double(pCtx, rank); + }else{ + sqlite4_result_error_code(pCtx, rc); + } +} + +typedef struct Snippet Snippet; +typedef struct SnippetText SnippetText; + +struct Snippet { + int iCol; + int iOff; + u64 hlmask; +}; + +struct SnippetText { + char *zOut; /* Pointer to snippet text */ + int nOut; /* Size of zOut in bytes */ + int nAlloc; /* Bytes of space allocated at zOut */ +}; + +typedef struct SnippetCtx SnippetCtx; +struct SnippetCtx { + sqlite4 *db; /* Database handle */ + int nToken; /* Number of tokens in snippet */ + int iOff; /* First token in snippet */ + u64 mask; /* Snippet mask. Highlight these terms */ + const char *zStart; + const char *zEnd; + const char *zEllipses; + + SnippetText *pOut; + + int iFrom; + int iTo; + const char *zText; /* Document to extract snippet from */ + int rc; /* Set to NOMEM if OOM is encountered */ +}; + +static void fts5SnippetAppend(SnippetCtx *p, const char *z, int n){ + if( p->rc==SQLITE4_OK ){ + SnippetText *pOut = p->pOut; + if( n<0 ) n = strlen(z); + if( (pOut->nOut + n) > pOut->nAlloc ){ + int nNew = (pOut->nOut+n) * 2; + + pOut->zOut = sqlite4DbReallocOrFree(p->db, pOut->zOut, nNew); + if( pOut->zOut==0 ){ + p->rc = SQLITE4_NOMEM; + return; + } + pOut->nAlloc = sqlite4DbMallocSize(p->db, pOut->zOut); + } + + memcpy(&pOut->zOut[pOut->nOut], z, n); + pOut->nOut += n; + } +} + +static int fts5SnippetCb( + void *pCtx, + int iStream, + int iOff, + const char *z, int n, + int iSrc, int nSrc +){ + SnippetCtx *p = (SnippetCtx *)pCtx; + + if( iOffiOff ){ + return 0; + }else if( iOff>=(p->iOff + p->nToken) ){ + fts5SnippetAppend(p, &p->zText[p->iFrom], p->iTo - p->iFrom); + fts5SnippetAppend(p, "...", 3); + p->iFrom = -1; + return 1; + }else{ + int bHighlight; /* True to highlight term */ + + bHighlight = (p->mask & (1 << (iOff-p->iOff))); + + if( p->iFrom==0 && p->iOff!=0 ){ + p->iFrom = iSrc; + if( p->pOut->nOut==0 ) fts5SnippetAppend(p, p->zEllipses, -1); + } + + if( bHighlight ){ + fts5SnippetAppend(p, &p->zText[p->iFrom], iSrc - p->iFrom); + fts5SnippetAppend(p, p->zStart, -1); + fts5SnippetAppend(p, &p->zText[iSrc], nSrc); + fts5SnippetAppend(p, p->zEnd, -1); + p->iTo = p->iFrom = iSrc+nSrc; + }else{ + p->iTo = iSrc + nSrc; + } + } + + return 0; +} + +static int fts5SnippetText( + sqlite4_context *pCtx, + Snippet *pSnip, + SnippetText *pText, + int nToken, + const char *zStart, + const char *zEnd, + const char *zEllipses +){ + int rc; + sqlite4_value *pVal = 0; + + u64 mask = pSnip->hlmask; + int iOff = pSnip->iOff; + int iCol = pSnip->iCol; + + rc = sqlite4_mi_column_value(pCtx, iCol, &pVal); + if( rc==SQLITE4_OK ){ + SnippetCtx sCtx; + int nText; + + nText = sqlite4_value_bytes(pVal); + memset(&sCtx, 0, sizeof(sCtx)); + sCtx.zText = (const char *)sqlite4_value_text(pVal); + sCtx.db = sqlite4_context_db_handle(pCtx); + sCtx.nToken = nToken; + sCtx.iOff = iOff; + sCtx.mask = mask; + sCtx.zStart = zStart; + sCtx.zEnd = zEnd; + sCtx.zEllipses = zEllipses; + sCtx.pOut = pText; + + sqlite4_mi_tokenize(pCtx, sCtx.zText, nText, &sCtx, fts5SnippetCb); + if( sCtx.rc==SQLITE4_OK && sCtx.iFrom>0 ){ + fts5SnippetAppend(&sCtx, &sCtx.zText[sCtx.iFrom], nText - sCtx.iFrom); + } + rc = sCtx.rc; + } + + return rc; +} + +static int fts5BestSnippet( + sqlite4_context *pCtx, /* Context snippet() was called in */ + int iColumn, /* In this column (-1 means any column) */ + u64 *pMask, /* IN/OUT: Mask of high-priority phrases */ + int nToken, /* Number of tokens in requested snippet */ + Snippet *pSnip /* Populate this object */ +){ + sqlite4 *db = sqlite4_context_db_handle(pCtx); + int nPhrase; + int rc = SQLITE4_OK; + int i; + int iPrev = 0; + int iPrevCol = 0; + u64 *aMask; + u64 mask = *pMask; + u64 allmask = 0; + + int iBestOff = nToken-1; + int iBestCol = (iColumn >= 0 ? iColumn : 0); + int nBest = 0; + u64 hlmask = 0; /* Highlight mask associated with iBestOff */ + u64 missmask = 0; /* Mask of missing terms in iBestOff snip. */ + + sqlite4_mi_phrase_count(pCtx, &nPhrase); + aMask = sqlite4DbMallocZero(db, sizeof(u64) * nPhrase); + if( !aMask ) return SQLITE4_NOMEM; + + /* Iterate through all matches for all phrases */ + for(i=0; rc==SQLITE4_OK; i++){ + int iOff; + int iCol; + int iStream; + int iPhrase; + + rc = sqlite4_mi_match_detail(pCtx, i, &iOff, &iCol, &iStream, &iPhrase); + if( rc==SQLITE4_OK ){ + u64 tmask = 0; + u64 miss = 0; + int iMask; + int nShift; + int nScore = 0; + + int nPTok; + int iPTok; + + if( iColumn>=0 && iColumn!=iCol ) continue; + + allmask |= (1 << iPhrase); + + nShift = ((iPrevCol==iCol) ? (iOff-iPrev) : 100); + + for(iMask=0; iMask> nShift; + }else{ + aMask[iMask] = 0; + } + } + sqlite4_mi_phrase_token_count(pCtx, iPhrase, &nPTok); + for(iPTok=0; iPToknBest ){ + hlmask = tmask; + missmask = miss; + nBest = nScore; + iBestOff = iOff; + iBestCol = iCol; + } + + iPrev = iOff; + iPrevCol = iCol; + } + } + if( rc==SQLITE4_NOTFOUND ) rc = SQLITE4_OK; + + pSnip->iOff = iBestOff-nToken+1; + pSnip->iCol = iBestCol; + pSnip->hlmask = hlmask; + *pMask = mask & missmask & allmask; + + sqlite4DbFree(db, aMask); + return rc; +} + +static void fts5SnippetImprove( + sqlite4_context *pCtx, + int nToken, /* Size of required snippet */ + int nSz, /* Total size of column in tokens */ + Snippet *pSnip +){ + int i; + int nLead = 0; + int nShift = 0; + + u64 mask = pSnip->hlmask; + int iOff = pSnip->iOff; + + if( mask==0 ) return; + assert( mask & (1 << (nToken-1)) ); + + for(i=0; (mask & (1< nSz-nToken ) nShift = (nSz-nToken) - iOff; + if( iOff+nShift < 0 ) nShift = -1 * iOff; + + iOff += nShift; + mask = mask >> nShift; + + pSnip->iOff = iOff; + pSnip->hlmask = mask; +} + +static void fts5Snippet(sqlite4_context *pCtx, int nArg, sqlite4_value **apArg){ + Snippet aSnip[4]; + int nSnip; + int iCol = -1; + int nToken = -15; + int rc; + int nPhrase; + + const char *zStart = ""; + const char *zEnd = ""; + const char *zEllipses = "..."; + + if( nArg>0 ) zStart = (const char *)sqlite4_value_text(apArg[0]); + if( nArg>1 ) zEnd = (const char *)sqlite4_value_text(apArg[1]); + if( nArg>2 ) zEllipses = (const char *)sqlite4_value_text(apArg[2]); + if( nArg>3 ) iCol = sqlite4_value_int(apArg[3]); + if( nArg>4 ) nToken = sqlite4_value_int(apArg[4]); + + rc = sqlite4_mi_phrase_count(pCtx, &nPhrase); + for(nSnip=1; rc==SQLITE4_OK && nSnip<5; nSnip = ((nSnip==2) ? 3 : (nSnip+1))){ + int nTok; + int i; + u64 mask = ((u64)1 << nPhrase) - 1; + + if( nToken<0 ){ + nTok = nToken * -1; + }else{ + nTok = (nToken + (nSnip-1)) / nSnip; + } + + memset(aSnip, 0, sizeof(aSnip)); + for(i=0; rc==SQLITE4_OK && imutex); + return rc; +} + +int sqlite4_create_mi_function( + sqlite4 *db, + const char *zFunc, + int nArg, + int enc, + void *p, + void (*xFunc)(sqlite4_context*,int,sqlite4_value **), + void (*xDestroy)(void *) +){ + int rc; + int n; + + n = nArg + (nArg>=0); + sqlite4_mutex_enter(db->mutex); + rc = sqlite4_create_function_v2(db, zFunc, n, enc, p, xFunc, 0,0,xDestroy); + if( rc==SQLITE4_OK ){ + FuncDef *p = sqlite4FindFunction(db, zFunc, -1, n, enc, 0); + p->bMatchinfo = 1; + } rc = sqlite4ApiExit(db, rc); sqlite4_mutex_leave(db->mutex); return rc; } Index: src/resolve.c ================================================================== --- src/resolve.c +++ src/resolve.c @@ -433,10 +433,38 @@ pItem->colUsed |= ((Bitmask)1)<<(iCol>=BMS ? BMS-1 : iCol); ExprSetProperty(p, EP_Resolved); } return p; } + +static void resolveMatchArg(Parse *pParse, NameContext *pNC, Expr *pExpr){ + SrcList *pSrc = pNC->pSrcList; + SrcListItem *pItem; + char *zLhs; + int i; + Index *pIdx; + + if( pExpr->op!=TK_ID || pSrc==0 || pExpr==0 ){ + sqlite4ErrorMsg(pParse, "first argument xxx must be a table name"); + return; + } + zLhs = pExpr->u.zToken; + + for(i=0; inSrc; i++){ + pItem = &pSrc->a[i]; + if( pItem->zAlias && sqlite4StrICmp(zLhs, pItem->zAlias)==0 ) break; + if( pItem->zAlias==0 && sqlite4StrICmp(zLhs, pItem->zName)==0 ) break; + } + if( i==pSrc->nSrc ){ + sqlite4ErrorMsg(pParse, "no such table: %s", zLhs); + return; + } + + pExpr->op = TK_NULL; + pExpr->iTable = pItem->iCursor; + ExprSetProperty(pExpr, EP_Resolved); +} static void resolveMatch(Parse *pParse, NameContext *pNC, Expr *pExpr){ Expr *pLeft = pExpr->pLeft; SrcList *pSrc = pNC->pSrcList; SrcListItem *pItem; @@ -614,13 +642,20 @@ } if( is_agg ){ pExpr->op = TK_AGG_FUNCTION; pNC->hasAgg = 1; } - if( is_agg ) pNC->allowAgg = 0; - sqlite4WalkExprList(pWalker, pList); - if( is_agg ) pNC->allowAgg = 1; + + if( pParse->nErr==0 ){ + if( pDef->bMatchinfo ){ + resolveMatchArg(pParse, pNC, n>0 ? pList->a[0].pExpr : 0); + } + if( is_agg ) pNC->allowAgg = 0; + sqlite4WalkExprList(pWalker, pList); + if( is_agg ) pNC->allowAgg = 1; + } + /* FIX ME: Compute pExpr->affinity based on the expected return ** type of the function */ return WRC_Prune; } Index: src/sqlite.h.in ================================================================== --- src/sqlite.h.in +++ src/sqlite.h.in @@ -4401,10 +4401,116 @@ int(*x)(void *ctx, int iWeight, int iOff, const char *zToken, int nToken, int iSrc, int nSrc) ), int (*xDestroy)(sqlite4_tokenizer *) ); + +/* +** CAPI4REF: Register a matchinfo function. +*/ +int sqlite4_create_mi_function( + sqlite4 *db, + const char *zFunc, + int nArg, + int enc, + void *p, + void (*xFunc)(sqlite4_context*,int,sqlite4_value **), + void (*xDestroy)(void *) +); + +/* +** CAPIREF: Matchinfo APIs. +** +** Special functions that may be called from within matchinfo UDFs. All +** return an SQLite error code - SQLITE4_OK if successful, or some other +** error code otherwise. +** +** sqlite4_mi_column_count(): +** Set *pn to the number of columns in the queried table. +** +** sqlite4_mi_phrase_count(): +** Set *pn to the number of phrases in the query. +** +** sqlite4_mi_stream_count(): +** Set *pn to the number of streams in the FTS index. +** +** sqlite4_mi_phrase_token_count(): +** Set *pn to the number of tokens in phrase iP of the query. +** +** sqlite4_mi_size(): +** Set *pn to the number of tokens belonging to stream iS in the value +** stored in column iC of the current row. +** +** Either or both of iS and iC may be negative. If iC is negative, then the +** output value is the total number of tokens for the specified stream (or +** streams) across all table columns. Similarly, if iS is negative, the +** output value is the total number of tokens in the specified column or +** columns, regardless of stream. +** +** sqlite4_mi_total_size(): +** Similar to sqlite4_mi_size(), except the output parameter is set to +** the total number of tokens belonging to the specified column(s) +** and stream(s) in all rows of the table, not just the current row. +** +** sqlite4_mi_total_rows(): +** Set *pn to the total number of rows in the indexed table. +** +** sqlite4_mi_row_count(): +** Set the output parameter to the total number of rows in the table that +** contain at least one instance of the phrase identified by parameter +** iP in the column(s) and stream(s) identified by parameters iC and iS. +** +** sqlite4_mi_match_count(): +** Set the output parameter to the total number of occurences of phrase +** iP in the current row that belong to the column(s) and stream(s) +** identified by parameters iC and iS. +** +** Parameter iP may also be negative. In this case, the output value is +** set to the total number of occurrences of all query phrases in the +** current row, subject to the constraints imposed by iC and iS. +** +** sqlite4_mi_match_detail(): +** This function is used to access the details of the iMatch'th match +** (of any phrase) in the current row. Matches are sorted in order of +** occurrence. If parameter iMatch is equal to or greater than the number +** of matches in the current row, SQLITE_NOTFOUND is returned. Otherwise, +** unless an error occurs, SQLITE4_OK is returned and the *piOff, *piC, *piS, +** and *piP output parameters are set to the token offset, column number, +** stream number and phrase number respectively. +** +** It is anticipated that this function be used to iterate through matches +** in order of occurrence. It is optimized so that it is fastest when +** called with the iMatch parameter set to 0, P or P+1, where P is the +** iMatch value passed to the previous call. +** +** sqlite4_mi_column_value(): +** Set *ppVal to point to an sqlite4_value object containing the value +** read from column iCol of the current row. This object is valid until +** the function callback returns. +*/ +int sqlite4_mi_column_count(sqlite4_context *, int *pn); +int sqlite4_mi_phrase_count(sqlite4_context *, int *pn); +int sqlite4_mi_stream_count(sqlite4_context *, int *pn); +int sqlite4_mi_phrase_token_count(sqlite4_context *, int iP, int *pn); + +int sqlite4_mi_total_size(sqlite4_context *, int iC, int iS, int *pn); +int sqlite4_mi_total_rows(sqlite4_context *, int *pn); + +int sqlite4_mi_row_count(sqlite4_context *, int iC, int iS, int iP, int *pn); + +int sqlite4_mi_size(sqlite4_context *, int iC, int iS, int *pn); +int sqlite4_mi_match_count(sqlite4_context *, int iC, int iS, int iP, int *pn); +int sqlite4_mi_match_detail( + sqlite4_context *, int iMatch, int *piOff, int *piC, int *piS, int *piP +); +int sqlite4_mi_column_value(sqlite4_context *, int iC, sqlite4_value **ppVal); + +int sqlite4_mi_tokenize(sqlite4_context *, const char *, int, void *, + int(*x)(void *, int, int, const char *, int, int, int) +); + + /* ** Undo the hack that converts floating point types to integer for ** builds on processors without floating point support. */ Index: src/sqliteInt.h ================================================================== --- src/sqliteInt.h +++ src/sqliteInt.h @@ -637,10 +637,11 @@ void (*xStep)(sqlite4_context*,int,sqlite4_value**); /* Aggregate step */ void (*xFinalize)(sqlite4_context*); /* Aggregate finalizer */ char *zName; /* SQL name of the function. */ FuncDef *pNextName; /* Next function with a different name */ FuncDestructor *pDestructor; /* Reference counted destructor function */ + u8 bMatchinfo; /* True for matchinfo function */ }; /* ** A table of SQL functions. ** @@ -2486,10 +2487,11 @@ ** related vdbe opcodes. */ struct Fts5Info { int iDb; /* Database containing this index */ int iRoot; /* Root page number of index */ + int iTbl; /* Root page number of indexed table */ int nCol; /* Number of columns in indexed table */ char **azCol; /* Column names for table */ Fts5Tokenizer *pTokenizer; /* Tokenizer module */ sqlite4_tokenizer *p; /* Tokenizer instance */ }; @@ -3277,7 +3279,13 @@ void sqlite4Fts5CodeCksum(Parse *, Index *, int, int, int); void sqlite4Fts5CodeQuery(Parse *, Index *, int, int, int); int sqlite4Fts5Pk(Fts5Cursor *, int, KVByteArray **, KVSize *); int sqlite4Fts5Next(Fts5Cursor *pCsr); + +int sqlite4Fts5EntryCksum(sqlite4 *, Fts5Info *, Mem *, Mem *, i64 *); +int sqlite4Fts5RowCksum(sqlite4 *, Fts5Info *, Mem *, Mem *, i64 *); +int sqlite4Fts5Open(sqlite4*, Fts5Info*, const char*, int, Fts5Cursor**,char**); +int sqlite4Fts5Valid(Fts5Cursor *); +void sqlite4Fts5Close(Fts5Cursor *); #endif /* _SQLITEINT_H_ */ Index: src/vdbe.c ================================================================== --- src/vdbe.c +++ src/vdbe.c @@ -1287,10 +1287,18 @@ */ case OP_CollSeq: { assert( pOp->p4type==P4_COLLSEQ ); break; } + +/* Opcode: Mifunction P1 +*/ +case OP_Mifunction: { + pc++; + pOp++; + /* fall through to OP_Function */ +}; /* Opcode: Function P1 P2 P3 P4 P5 ** ** Invoke a user function (P4 is a pointer to a Function structure that ** defines the function) with P5 arguments taken from register P2 and @@ -1342,10 +1350,17 @@ ctx.s.flags = MEM_Null; ctx.s.db = db; ctx.s.xDel = 0; ctx.s.zMalloc = 0; + if( pOp[-1].opcode==OP_Mifunction ){ + ctx.pFts = p->apCsr[pOp[-1].p1]->pFts; + apVal++; + n--; + }else{ + ctx.pFts = 0; + } /* The output cell may already have a buffer allocated. Move ** the pointer to ctx.s so in case the user-function can use ** the already allocated buffer instead of allocating a new one. */ Index: src/vdbeInt.h ================================================================== --- src/vdbeInt.h +++ src/vdbeInt.h @@ -239,10 +239,11 @@ VdbeFunc *pVdbeFunc; /* Auxilary data, if created. */ Mem s; /* The return value is stored here */ Mem *pMem; /* Memory cell used to store aggregate context */ int isError; /* Error code returned by the function. */ CollSeq *pColl; /* Collating sequence */ + Fts5Cursor *pFts; /* fts5 cursor for matchinfo functions */ }; /* ** An Explain object accumulates indented output which is helpful ** in describing recursive data structures. Index: src/vdbeaux.c ================================================================== --- src/vdbeaux.c +++ src/vdbeaux.c @@ -1498,11 +1498,11 @@ */ void sqlite4VdbeFreeCursor(Vdbe *p, VdbeCursor *pCx){ if( pCx==0 ){ return; } - sqlite4Fts5Close(p->db, pCx->pFts); + sqlite4Fts5Close(pCx->pFts); if( pCx->pKVCur ){ sqlite4KVCursorClose(pCx->pKVCur); } if( pCx->pTmpKV ){ sqlite4KVStoreClose(pCx->pTmpKV); Index: src/where.c ================================================================== --- src/where.c +++ src/where.c @@ -5227,10 +5227,28 @@ pOp->p1 = pLevel->iIdxCur; } pOp++; } } + + if( (pLevel->plan.wsFlags & WHERE_INDEXED) + && (pLevel->plan.u.pIdx->eIndexType==SQLITE4_INDEX_FTS5) + ){ + VdbeOp *pOp; + VdbeOp *pEnd; + + assert( pLevel->iTabCur!=pLevel->iIdxCur ); + pOp = sqlite4VdbeGetOp(v, pWInfo->iTop); + pEnd = &pOp[sqlite4VdbeCurrentAddr(v) - pWInfo->iTop]; + + while( pOpp1==pLevel->iTabCur && pOp->opcode==OP_Mifunction ){ + pOp->p1 = pLevel->iIdxCur; + } + pOp++; + } + } } /* Final cleanup */ pParse->nQueryLoop = pWInfo->savedNQueryLoop; Index: test/csr1.test ================================================================== --- test/csr1.test +++ test/csr1.test @@ -75,11 +75,11 @@ populate_db_2 do_execsql_test 3.1 { BEGIN; INSERT INTO t1 VALUES(10, randstr(910, 910)); } -do_test 3.2 { sqlite4_lsm_config db main autoflush } [expr 2*1024*1024] +do_test 3.2 { sqlite4_lsm_config db main autoflush } [expr 1*1024*1024] do_test 3.3 { sqlite4_lsm_config db main autoflush 4096 } 4096 do_test 3.4 { set res [list] db eval { SELECT a, length(b) AS l FROM t1 } { Index: test/fts5create.test ================================================================== --- test/fts5create.test +++ test/fts5create.test @@ -72,13 +72,13 @@ do_catchsql_test 2.3 { CREATE INDEX ft ON t2 USING fts5("a b c"); } {1 {unrecognized argument: "a b c"}} -do_catchsql_test 2.4 { +breakpoint +do_catchsql_test 2.4 { CREATE INDEX ft ON t2 USING fts5(tokenizer="nosuch"); } {1 {no such tokenizer: "nosuch"}} finish_test - Index: test/fts5query1.test ================================================================== --- test/fts5query1.test +++ test/fts5query1.test @@ -132,8 +132,53 @@ 4 {c:a} {1 2} 5 {a:a*} {1} } { do_execsql_test 7.$tn {SELECT docid FROM t7 WHERE t7 MATCH $expr} $res } + +#------------------------------------------------------------------------- +# +do_execsql_test 8.0 { + CREATE TABLE t8(a PRIMARY KEY, b, c); + CREATE INDEX i8 ON t8 USING fts5(); + INSERT INTO t8 VALUES('one', 'a b c', 'a a a'); + INSERT INTO t8 VALUES('two', 'd e f', 'b b b'); +} + +#do_execsql_test 8.1 { +# SELECT rank(t8) FROM t8 WHERE t8 MATCH 'b a' +#} + +do_execsql_test 9.0 { + CREATE TABLE t9(a PRIMARY KEY, b); + CREATE INDEX i9 ON t9 USING fts5(); + INSERT INTO t9 VALUES('one', + 'a b c d e f g h i j k l m n o p q r s t u v w x y z ' || + 'a b c d e f g h i j k l m n o p q r s t u v w x y z' + ); +} + +#do_execsql_test 9.1 { +# SELECT snippet(t9) FROM t9 WHERE t9 MATCH 'b' +#} + +do_execsql_test 10.1 { + CREATE TABLE ft(content); + CREATE INDEX fti ON ft USING fts5(); +} +do_execsql_test 10.2 { + INSERT INTO ft VALUES('a b c d e'); + INSERT INTO ft VALUES('f g h i j'); +} +do_execsql_test 10.3 { SELECT rowid FROM ft WHERE ft MATCH 'c' } {1} +do_execsql_test 10.4 { SELECT rowid FROM ft WHERE ft MATCH 'f' } {2} + +do_execsql_test 10.5 { + DELETE FROM ft; + CREATE TABLE ft2(a, b, c); + CREATE INDEX fti2 ON ft2 USING fts5(); + INSERT INTO ft2 VALUES('1 2 3 4 5', '6 7 8 9 10', '11 12 13 14 15'); + SELECT snippet(ft2, '[', ']', '...', -1, 3) FROM ft2 WHERE ft2 MATCH '5'; +} {{...3 4 [5]}} finish_test ADDED test/fts5snippet.test Index: test/fts5snippet.test ================================================================== --- /dev/null +++ test/fts5snippet.test @@ -0,0 +1,292 @@ +# 2013 January 10 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# +# The tests in this file test the FTS5 snippet() function. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +# If SQLITE4_ENABLE_FTS3 is not defined, omit this file. +source $testdir/fts3_common.tcl + +set DO_MALLOC_TEST 0 + +# Transform the list $L to its "normal" form. So that it can be compared to +# another list with the same set of elements using [string compare]. +# +proc normalize {L} { + set ret [list] + foreach l $L {lappend ret $l} + return $ret +} + +# Document text used by a few tests. Contains the English names of all +# integers between 1 and 300. +# +set numbers [normalize { + one two three four five six seven eight nine ten eleven twelve thirteen + fourteen fifteen sixteen seventeen eighteen nineteen twenty twentyone + twentytwo twentythree twentyfour twentyfive twentysix twentyseven + twentyeight twentynine thirty thirtyone thirtytwo thirtythree thirtyfour + thirtyfive thirtysix thirtyseven thirtyeight thirtynine forty fortyone + fortytwo fortythree fortyfour fortyfive fortysix fortyseven fortyeight + fortynine fifty fiftyone fiftytwo fiftythree fiftyfour fiftyfive fiftysix + fiftyseven fiftyeight fiftynine sixty sixtyone sixtytwo sixtythree sixtyfour + sixtyfive sixtysix sixtyseven sixtyeight sixtynine seventy seventyone + seventytwo seventythree seventyfour seventyfive seventysix seventyseven + seventyeight seventynine eighty eightyone eightytwo eightythree eightyfour + eightyfive eightysix eightyseven eightyeight eightynine ninety ninetyone + ninetytwo ninetythree ninetyfour ninetyfive ninetysix ninetyseven + ninetyeight ninetynine onehundred onehundredone onehundredtwo + onehundredthree onehundredfour onehundredfive onehundredsix onehundredseven + onehundredeight onehundrednine onehundredten onehundredeleven + onehundredtwelve onehundredthirteen onehundredfourteen onehundredfifteen + onehundredsixteen onehundredseventeen onehundredeighteen onehundrednineteen + onehundredtwenty onehundredtwentyone onehundredtwentytwo + onehundredtwentythree onehundredtwentyfour onehundredtwentyfive + onehundredtwentysix onehundredtwentyseven onehundredtwentyeight + onehundredtwentynine onehundredthirty onehundredthirtyone + onehundredthirtytwo onehundredthirtythree onehundredthirtyfour + onehundredthirtyfive onehundredthirtysix onehundredthirtyseven + onehundredthirtyeight onehundredthirtynine onehundredforty + onehundredfortyone onehundredfortytwo onehundredfortythree + onehundredfortyfour onehundredfortyfive onehundredfortysix + onehundredfortyseven onehundredfortyeight onehundredfortynine + onehundredfifty onehundredfiftyone onehundredfiftytwo onehundredfiftythree + onehundredfiftyfour onehundredfiftyfive onehundredfiftysix + onehundredfiftyseven onehundredfiftyeight onehundredfiftynine + onehundredsixty onehundredsixtyone onehundredsixtytwo onehundredsixtythree + onehundredsixtyfour onehundredsixtyfive onehundredsixtysix + onehundredsixtyseven onehundredsixtyeight onehundredsixtynine + onehundredseventy onehundredseventyone onehundredseventytwo + onehundredseventythree onehundredseventyfour onehundredseventyfive + onehundredseventysix onehundredseventyseven onehundredseventyeight + onehundredseventynine onehundredeighty onehundredeightyone + onehundredeightytwo onehundredeightythree onehundredeightyfour + onehundredeightyfive onehundredeightysix onehundredeightyseven + onehundredeightyeight onehundredeightynine onehundredninety + onehundredninetyone onehundredninetytwo onehundredninetythree + onehundredninetyfour onehundredninetyfive onehundredninetysix + onehundredninetyseven onehundredninetyeight onehundredninetynine twohundred + twohundredone twohundredtwo twohundredthree twohundredfour twohundredfive + twohundredsix twohundredseven twohundredeight twohundrednine twohundredten + twohundredeleven twohundredtwelve twohundredthirteen twohundredfourteen + twohundredfifteen twohundredsixteen twohundredseventeen twohundredeighteen + twohundrednineteen twohundredtwenty twohundredtwentyone twohundredtwentytwo + twohundredtwentythree twohundredtwentyfour twohundredtwentyfive + twohundredtwentysix twohundredtwentyseven twohundredtwentyeight + twohundredtwentynine twohundredthirty twohundredthirtyone + twohundredthirtytwo twohundredthirtythree twohundredthirtyfour + twohundredthirtyfive twohundredthirtysix twohundredthirtyseven + twohundredthirtyeight twohundredthirtynine twohundredforty + twohundredfortyone twohundredfortytwo twohundredfortythree + twohundredfortyfour twohundredfortyfive twohundredfortysix + twohundredfortyseven twohundredfortyeight twohundredfortynine + twohundredfifty twohundredfiftyone twohundredfiftytwo twohundredfiftythree + twohundredfiftyfour twohundredfiftyfive twohundredfiftysix + twohundredfiftyseven twohundredfiftyeight twohundredfiftynine + twohundredsixty twohundredsixtyone twohundredsixtytwo twohundredsixtythree + twohundredsixtyfour twohundredsixtyfive twohundredsixtysix + twohundredsixtyseven twohundredsixtyeight twohundredsixtynine + twohundredseventy twohundredseventyone twohundredseventytwo + twohundredseventythree twohundredseventyfour twohundredseventyfive + twohundredseventysix twohundredseventyseven twohundredseventyeight + twohundredseventynine twohundredeighty twohundredeightyone + twohundredeightytwo twohundredeightythree twohundredeightyfour + twohundredeightyfive twohundredeightysix twohundredeightyseven + twohundredeightyeight twohundredeightynine twohundredninety + twohundredninetyone twohundredninetytwo twohundredninetythree + twohundredninetyfour twohundredninetyfive twohundredninetysix + twohundredninetyseven twohundredninetyeight twohundredninetynine + threehundred +}] + +foreach {DO_MALLOC_TEST enc} { + 0 utf8 + 1 utf8 + 1 utf16 +} { +if {$DO_MALLOC_TEST || $enc=="utf16"} continue + + db close + forcedelete test.db + sqlite4 db test.db + sqlite4_db_config_lookaside db 0 0 0 + db eval "PRAGMA encoding = \"$enc\"" + + # Set variable $T to the test name prefix for this iteration of the loop. + # + set T "fts5snippet-$enc" + + ########################################################################## + # Test the snippet function. + # + proc do_snippet_test {name expr iCol nTok args} { + set res [list] + foreach a $args { lappend res [string trim $a] } + do_select_test $name { + SELECT snippet(ft,'{','}','...',$iCol,$nTok) FROM ft WHERE ft MATCH $expr + } $res + } + do_test $T.3.1 { + execsql { + DROP TABLE IF EXISTS ft; + CREATE TABLE ft(content); + CREATE INDEX fti ON ft USING fts5(); + + INSERT INTO ft VALUES('one two three four five six seven eight nine ten'); + } + } {} + do_snippet_test $T.3.2 one 0 5 "{one} two three four five..." + do_snippet_test $T.3.3 two 0 5 "one {two} three four five..." + do_snippet_test $T.3.4 three 0 5 "one two {three} four five..." + do_snippet_test $T.3.5 four 0 5 "...two three {four} five six..." + do_snippet_test $T.3.6 five 0 5 "...three four {five} six seven..." + do_snippet_test $T.3.7 six 0 5 "...four five {six} seven eight..." + do_snippet_test $T.3.8 seven 0 5 "...five six {seven} eight nine..." + do_snippet_test $T.3.9 eight 0 5 "...six seven {eight} nine ten" + do_snippet_test $T.3.10 nine 0 5 "...six seven eight {nine} ten" + do_snippet_test $T.3.11 ten 0 5 "...six seven eight nine {ten}" + + do_test $T.4.1 { + execsql { + INSERT INTO ft VALUES( + 'one two three four five ' + || 'six seven eight nine ten ' + || 'eleven twelve thirteen fourteen fifteen ' + || 'sixteen seventeen eighteen nineteen twenty ' + || 'one two three four five ' + || 'six seven eight nine ten ' + || 'eleven twelve thirteen fourteen fifteen ' + || 'sixteen seventeen eighteen nineteen twenty' + ); + } + } {} + + do_snippet_test $T.4.2 {one nine} 0 5 { + {one} two three...eight {nine} ten + } { + {one} two three...eight {nine} ten... + } + + do_snippet_test $T.4.3 {one nine} 0 -5 { + {one} two three four five...six seven eight {nine} ten + } { + {one} two three four five...seven eight {nine} ten eleven... + } + do_snippet_test $T.4.3 {one nineteen} 0 -5 { + ...eighteen {nineteen} twenty {one} two... + } + do_snippet_test $T.4.4 {two nineteen} 0 -5 { + ...eighteen {nineteen} twenty one {two}... + } + do_snippet_test $T.4.5 {three nineteen} 0 -5 { + ...{nineteen} twenty one two {three}... + } + + do_snippet_test $T.4.6 {four nineteen} 0 -5 { + ...two three {four} five six...seventeen eighteen {nineteen} twenty one... + } + do_snippet_test $T.4.7 {four NEAR nineteen} 0 -5 { + ...seventeen eighteen {nineteen} twenty one...two three {four} five six... + } + + do_snippet_test $T.4.8 {four nineteen} 0 5 { + ...three {four} five...eighteen {nineteen} twenty... + } + do_snippet_test $T.4.9 {four NEAR nineteen} 0 5 { + ...eighteen {nineteen} twenty...three {four} five... + } + do_snippet_test $T.4.10 {four NEAR nineteen} 0 -5 { + ...seventeen eighteen {nineteen} twenty one...two three {four} five six... + } + do_snippet_test $T.4.11 {four NOT (nineteen+twentyone)} 0 5 { + ...two three {four} five six... + } { + ...two three {four} five six... + } + do_snippet_test $T.4.12 {four OR nineteen NEAR twentyone} 0 5 { + ...two three {four} five six... + } { + ...two three {four} five six... + } + + do_test $T.5.1 { + execsql { + DROP TABLE IF EXISTS ft; + CREATE TABLE ft(a, b, c); + CREATE INDEX fti ON ft USING fts5(); + INSERT INTO ft VALUES( + 'one two three four five', + 'four five six seven eight', + 'seven eight nine ten eleven' + ); + } + } {} + + do_snippet_test $T.5.2 {five} -1 3 {...three four {five}} + do_snippet_test $T.5.3 {five} 0 3 {...three four {five}} + do_snippet_test $T.5.4 {five} 1 3 {four {five} six...} + do_snippet_test $T.5.5 {five} 2 3 {seven eight nine...} + + do_test $T.5.6 { + execsql { UPDATE ft SET b = NULL } + } {} + + do_snippet_test $T.5.7 {five} -1 3 {...three four {five}} + do_snippet_test $T.5.8 {five} 0 3 {...three four {five}} + do_snippet_test $T.5.9 {five} 1 3 {} + do_snippet_test $T.5.10 {five} 2 3 {seven eight nine...} + + do_snippet_test $T.5.11 {one "seven eight nine"} -1 -3 { + {one} two three...{seven} {eight} {nine}... + } + + do_test $T.6.1 { + execsql { + DROP TABLE IF EXISTS ft; + CREATE TABLE ft(x); + CREATE INDEX fti ON ft USING fts5(); + INSERT INTO ft VALUES($numbers); + } + } {} + do_snippet_test $T.6.2 { + one fifty onehundred onehundredfifty twohundredfifty threehundred + } -1 4 { + {one}...{fifty}...{onehundred}...{onehundredfifty}... + } + do_snippet_test $T.6.3 { + one fifty onehundred onehundredfifty twohundredfifty threehundred + } -1 -4 { + {one} two three four...fortyeight fortynine {fifty} fiftyone...ninetyeight ninetynine {onehundred} onehundredone...onehundredfortyeight onehundredfortynine {onehundredfifty} onehundredfiftyone... + } + + do_test $T.7.1 { + execsql { + BEGIN; + DROP TABLE IF EXISTS ft; + CREATE TABLE ft(x); + CREATE INDEX fti ON ft USING fts5(); + } + set testresults [list] + for {set i 1} {$i < 150} {incr i} { + set commas [string repeat , $i] + execsql {INSERT INTO ft VALUES('one' || $commas || 'two')} + lappend testresults "{one}$commas{two}" + } + execsql COMMIT + } {} + eval [list do_snippet_test $T.7.2 {one two} -1 3] $testresults + +} + +finish_test Index: test/permutations.test ================================================================== --- test/permutations.test +++ test/permutations.test @@ -138,10 +138,11 @@ lsm1.test lsm2.test csr1.test ckpt1.test mc1.test fts5expr1.test fts5query1.test fts5rnd1.test fts5create.test + fts5snippet.test aggerror.test attach.test autoindex1.test badutf.test