Index: ext/fts5/fts5Int.h ================================================================== --- ext/fts5/fts5Int.h +++ ext/fts5/fts5Int.h @@ -509,10 +509,11 @@ typedef struct Fts5ExprNode Fts5ExprNode; typedef struct Fts5Parse Fts5Parse; typedef struct Fts5Token Fts5Token; typedef struct Fts5ExprPhrase Fts5ExprPhrase; typedef struct Fts5ExprNearset Fts5ExprNearset; +typedef struct Fts5ExprColset Fts5ExprColset; struct Fts5Token { const char *p; /* Token text (not NULL terminated) */ int n; /* Size of buffer p in bytes */ }; @@ -575,17 +576,23 @@ Fts5ExprNearset *sqlite3Fts5ParseNearset( Fts5Parse*, Fts5ExprNearset*, Fts5ExprPhrase* ); + +Fts5ExprColset *sqlite3Fts5ParseColset( + Fts5Parse*, + Fts5ExprColset*, + Fts5Token * +); void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase*); void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset*); void sqlite3Fts5ParseNodeFree(Fts5ExprNode*); void sqlite3Fts5ParseSetDistance(Fts5Parse*, Fts5ExprNearset*, Fts5Token*); -void sqlite3Fts5ParseSetColumn(Fts5Parse*, Fts5ExprNearset*, Fts5Token*); +void sqlite3Fts5ParseSetColset(Fts5Parse*, Fts5ExprNearset*, Fts5ExprColset*); void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p); void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token*); /* ** End of interface to code in fts5_expr.c. Index: ext/fts5/fts5_expr.c ================================================================== --- ext/fts5/fts5_expr.c +++ ext/fts5/fts5_expr.c @@ -77,17 +77,27 @@ Fts5Buffer poslist; /* Current position list */ int nTerm; /* Number of entries in aTerm[] */ Fts5ExprTerm aTerm[0]; /* Terms that make up this phrase */ }; +/* +** If a NEAR() clump may only match a specific set of columns, then +** Fts5ExprNearset.pColset points to an object of the following type. +** Each entry in the aiCol[] array +*/ +struct Fts5ExprColset { + int nCol; + int aiCol[1]; +}; + /* ** One or more phrases that must appear within a certain token distance of ** each other within each matching document. */ struct Fts5ExprNearset { int nNear; /* NEAR parameter */ - int iCol; /* Column to search (-1 -> all columns) */ + Fts5ExprColset *pColset; /* Columns to search (NULL -> all columns) */ int nPhrase; /* Number of entries in aPhrase[] array */ Fts5ExprPhrase *apPhrase[0]; /* Array of phrase pointers */ }; @@ -134,10 +144,12 @@ pToken->p = z; pToken->n = 1; switch( *z ){ case '(': tok = FTS5_LP; break; case ')': tok = FTS5_RP; break; + case '[': tok = FTS5_LSP; break; + case ']': tok = FTS5_RSP; break; case ':': tok = FTS5_COLON; break; case ',': tok = FTS5_COMMA; break; case '+': tok = FTS5_PLUS; break; case '*': tok = FTS5_STAR; break; case '\0': tok = FTS5_EOF; break; @@ -273,11 +285,10 @@ pNew->apExprPhrase[0] = pCopy; pNode->eType = FTS5_STRING; pNode->pNear = pNear; - pNear->iCol = -1; pNear->nPhrase = 1; pNear->apPhrase[0] = pCopy; pCopy->nTerm = pOrig->nTerm; pCopy->pNode = pNode; @@ -333,19 +344,20 @@ ** otherwise. It is not considered an error code if the current rowid is ** not a match. */ static int fts5ExprPhraseIsMatch( Fts5Expr *pExpr, /* Expression pPhrase belongs to */ - int iCol, /* If >=0, search for matches in iCol only */ + Fts5ExprColset *pColset, /* Restrict matches to these columns */ Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ int *pbMatch /* OUT: Set to true if really a match */ ){ Fts5PoslistWriter writer = {0}; Fts5PoslistReader aStatic[4]; Fts5PoslistReader *aIter = aStatic; int i; int rc = SQLITE_OK; + int iCol = pColset ? pColset->aiCol[0] : -1; fts5BufferZero(&pPhrase->poslist); /* If the aStatic[] array is not large enough, allocate a large array ** using sqlite3_malloc(). This approach could be improved upon. */ @@ -662,11 +674,10 @@ static int fts5ExprExtractCol( const u8 **pa, /* IN/OUT: Pointer to poslist */ int n, /* IN: Size of poslist in bytes */ int iCol /* Column to extract from poslist */ ){ - int ii; int iCurrent = 0; const u8 *p = *pa; const u8 *pEnd = &p[n]; /* One byte past end of position list */ u8 prev = 0; @@ -714,40 +725,56 @@ ){ Fts5ExprNearset *pNear = pNode->pNear; int rc = SQLITE_OK; while( 1 ){ - int i; if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 ){ /* If this "NEAR" object is actually a single phrase that consists ** of a single term only, then grab pointers into the poslist ** managed by the fts5_index.c iterator object. This is much faster ** than synthesizing a new poslist the way we have to for more ** complicated phrase or NEAR expressions. */ Fts5ExprPhrase *pPhrase = pNear->apPhrase[0]; Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; - assert( pPhrase->poslist.nSpace==0 ); - rc = sqlite3Fts5IterPoslist(pIter, - (const u8**)&pPhrase->poslist.p, &pPhrase->poslist.n, &pNode->iRowid - ); + Fts5ExprColset *pColset = pNear->pColset; + const u8 *pPos; + int nPos; + + rc = sqlite3Fts5IterPoslist(pIter, &pPos, &nPos, &pNode->iRowid); /* If the term may match any column, then this must be a match. ** Return immediately in this case. Otherwise, try to find the ** part of the poslist that corresponds to the required column. ** If it can be found, return. If it cannot, the next iteration ** of the loop will test the next rowid in the database for this ** term. */ - if( pNear->iCol<0 ) return rc; + if( pColset==0 ){ + assert( pPhrase->poslist.nSpace==0 ); + pPhrase->poslist.p = (u8*)pPos; + pPhrase->poslist.n = nPos; + }else if( pColset->nCol==1 ){ + assert( pPhrase->poslist.nSpace==0 ); + pPhrase->poslist.n = fts5ExprExtractCol(&pPos, nPos, pColset->aiCol[0]); + pPhrase->poslist.p = (u8*)pPos; + }else{ + int i; + fts5BufferZero(&pPhrase->poslist); + for(i=0; inCol; i++){ + const u8 *pSub = pPos; + int nSub = fts5ExprExtractCol(&pSub, nPos, pColset->aiCol[i]); + if( nSub ){ + fts5BufferAppendBlob(&rc, &pPhrase->poslist, nSub, pSub); + } + } + } - pPhrase->poslist.n = fts5ExprExtractCol( - (const u8**)&pPhrase->poslist.p, - pPhrase->poslist.n, - pNear->iCol - ); if( pPhrase->poslist.n ) return rc; }else{ + int i; + + assert( pNear->pColset==0 || pNear->pColset->nCol==1 ); /* Advance the iterators until they all point to the same rowid */ rc = fts5ExprNearNextRowidMatch(pExpr, pNode); if( rc!=SQLITE_OK || pNode->bEof ) break; @@ -754,18 +781,18 @@ /* Check that each phrase in the nearset matches the current row. ** Populate the pPhrase->poslist buffers at the same time. If any ** phrase is not a match, break out of the loop early. */ for(i=0; rc==SQLITE_OK && inPhrase; i++){ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; - if( pPhrase->nTerm>1 || pNear->iCol>=0 ){ + if( pPhrase->nTerm>1 || pNear->pColset ){ int bMatch = 0; - rc = fts5ExprPhraseIsMatch(pExpr, pNear->iCol, pPhrase, &bMatch); + rc = fts5ExprPhraseIsMatch(pExpr, pNear->pColset, pPhrase, &bMatch); if( bMatch==0 ) break; }else{ rc = sqlite3Fts5IterPoslistBuffer( pPhrase->aTerm[0].pIter, &pPhrase->poslist - ); + ); } } if( rc==SQLITE_OK && i==pNear->nPhrase ){ int bMatch = 1; @@ -1150,11 +1177,10 @@ pRet = sqlite3_malloc(nByte); if( pRet==0 ){ pParse->rc = SQLITE_NOMEM; }else{ memset(pRet, 0, nByte); - pRet->iCol = -1; } }else if( (pNear->nPhrase % SZALLOC)==0 ){ int nNew = pNear->nPhrase + SZALLOC; int nByte = sizeof(Fts5ExprNearset) + nNew * sizeof(Fts5ExprPhrase*); @@ -1233,10 +1259,11 @@ if( pNear ){ int i; for(i=0; inPhrase; i++){ fts5ExprPhraseFree(pNear->apPhrase[i]); } + sqlite3_free(pNear->pColset); sqlite3_free(pNear); } } void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p){ @@ -1311,11 +1338,11 @@ } } void sqlite3Fts5ParseSetDistance( Fts5Parse *pParse, - Fts5ExprNearset *pNear, + Fts5ExprNearset *pNear, Fts5Token *p ){ int nNear = 0; int i; if( p->n ){ @@ -1333,34 +1360,104 @@ nNear = FTS5_DEFAULT_NEARDIST; } pNear->nNear = nNear; } -void sqlite3Fts5ParseSetColumn( - Fts5Parse *pParse, - Fts5ExprNearset *pNear, +/* +** The second argument passed to this function may be NULL, or it may be +** an existing Fts5ExprColset object. This function returns a pointer to +** a new colset object containing the contents of (p) with new value column +** number iCol appended. +** +** If an OOM error occurs, store an error code in pParse and return NULL. +** The old colset object (if any) is not freed in this case. +*/ +static Fts5ExprColset *fts5ParseColset( + Fts5Parse *pParse, /* Store SQLITE_NOMEM here if required */ + Fts5ExprColset *p, /* Existing colset object */ + int iCol /* New column to add to colset object */ +){ + int nCol = p ? p->nCol : 0; /* Num. columns already in colset object */ + Fts5ExprColset *pNew; /* New colset object to return */ + + assert( pParse->rc==SQLITE_OK ); + assert( iCol>=0 && iColpConfig->nCol ); + + pNew = sqlite3_realloc(p, sizeof(Fts5ExprColset) + sizeof(int)*nCol); + if( pNew==0 ){ + pParse->rc = SQLITE_NOMEM; + }else{ + int *aiCol = pNew->aiCol; + int i, j; + for(i=0; iiCol ) break; + } + for(j=nCol; j>i; j--){ + aiCol[j] = aiCol[j-1]; + } + aiCol[i] = iCol; + pNew->nCol = nCol+1; + +#ifndef NDEBUG + /* Check that the array is in order and contains no duplicate entries. */ + for(i=1; inCol; i++) assert( pNew->aiCol[i]>pNew->aiCol[i-1] ); +#endif + } + + return pNew; +} + +Fts5ExprColset *sqlite3Fts5ParseColset( + Fts5Parse *pParse, /* Store SQLITE_NOMEM here if required */ + Fts5ExprColset *pColset, /* Existing colset object */ Fts5Token *p ){ + Fts5ExprColset *pRet = 0; + if( pParse->rc==SQLITE_OK ){ + int iCol; char *z = 0; int rc = fts5ParseStringFromToken(p, &z); if( rc==SQLITE_OK ){ Fts5Config *pConfig = pParse->pConfig; - int i; - for(i=0; inCol; i++){ - if( 0==sqlite3_stricmp(pConfig->azCol[i], z) ){ - pNear->iCol = i; + sqlite3Fts5Dequote(z); + for(iCol=0; iColnCol; iCol++){ + if( 0==sqlite3_stricmp(pConfig->azCol[iCol], z) ){ break; } } - if( i==pConfig->nCol ){ + if( iCol==pConfig->nCol ){ sqlite3Fts5ParseError(pParse, "no such column: %s", z); } sqlite3_free(z); }else{ pParse->rc = rc; } + + if( pParse->rc==SQLITE_OK ){ + pRet = fts5ParseColset(pParse, pColset, iCol); + } + } + + if( pParse->rc!=SQLITE_OK ){ + assert( pRet==0 ); + sqlite3_free(pColset); + } + + return pRet; +} + +void sqlite3Fts5ParseSetColset( + Fts5Parse *pParse, + Fts5ExprNearset *pNear, + Fts5ExprColset *pColset +){ + if( pNear ){ + pNear->pColset = pColset; + }else{ + sqlite3_free(pColset); } } /* ** Allocate and return a new expression object. If anything goes wrong (i.e. @@ -1461,12 +1558,22 @@ int i; int iTerm; zRet = fts5PrintfAppend(zRet, "[%s ", zNearsetCmd); if( zRet==0 ) return 0; - if( pNear->iCol>=0 ){ - zRet = fts5PrintfAppend(zRet, "-col %d ", pNear->iCol); + if( pNear->pColset ){ + int *aiCol = pNear->pColset->aiCol; + int nCol = pNear->pColset->nCol; + if( nCol==1 ){ + zRet = fts5PrintfAppend(zRet, "-col %d ", aiCol[0]); + }else{ + zRet = fts5PrintfAppend(zRet, "-col {%d", aiCol[0]); + for(i=1; ipColset->nCol; i++){ + zRet = fts5PrintfAppend(zRet, " %d", aiCol[i]); + } + zRet = fts5PrintfAppend(zRet, "} "); + } if( zRet==0 ) return 0; } if( pNear->nPhrase>1 ){ zRet = fts5PrintfAppend(zRet, "-near %d ", pNear->nNear); @@ -1528,12 +1635,13 @@ if( pExpr->eType==FTS5_STRING ){ Fts5ExprNearset *pNear = pExpr->pNear; int i; int iTerm; - if( pNear->iCol>=0 ){ - zRet = fts5PrintfAppend(zRet, "%s : ", pConfig->azCol[pNear->iCol]); + if( pNear->pColset ){ + int iCol = pNear->pColset->aiCol[0]; + zRet = fts5PrintfAppend(zRet, "%s : ", pConfig->azCol[iCol]); if( zRet==0 ) return 0; } if( pNear->nPhrase>1 ){ zRet = fts5PrintfAppend(zRet, "NEAR("); Index: ext/fts5/fts5_index.c ================================================================== --- ext/fts5/fts5_index.c +++ ext/fts5/fts5_index.c @@ -4477,11 +4477,10 @@ /* ** Move to the next matching rowid. */ int sqlite3Fts5IterNext(Fts5IndexIter *pIter){ assert( pIter->pIndex->rc==SQLITE_OK ); - fts5BufferZero(&pIter->poslist); fts5MultiIterNext(pIter->pIndex, pIter->pMulti, 0, 0); return fts5IndexReturn(pIter->pIndex); } /* @@ -4492,11 +4491,10 @@ Fts5MultiSegIter *pMulti = pIter->pMulti; assert( pIter->pIndex->rc==SQLITE_OK ); assert( pMulti ); - fts5BufferZero(&pIter->poslist); fts5MultiIterNext(p, pMulti, 0, 0); if( p->rc==SQLITE_OK ){ Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ]; if( pSeg->pLeaf && pSeg->term.p[0]!=FTS5_MAIN_PREFIX ){ fts5DataRelease(pSeg->pLeaf); Index: ext/fts5/fts5parse.y ================================================================== --- ext/fts5/fts5parse.y +++ ext/fts5/fts5parse.y @@ -93,14 +93,31 @@ } cnearset(A) ::= nearset(X). { A = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, X); } -cnearset(A) ::= STRING(X) COLON nearset(Y). { - sqlite3Fts5ParseSetColumn(pParse, Y, &X); +cnearset(A) ::= colset(X) COLON nearset(Y). { + sqlite3Fts5ParseSetColset(pParse, Y, X); A = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, Y); } + +%type colset {Fts5ExprColset*} +%destructor colset { sqlite3_free($$); } +%type colsetlist {Fts5ExprColset*} +%destructor colsetlist { sqlite3_free($$); } + +colset(A) ::= LSP colsetlist(X) RSP. { A = X; } +colset(A) ::= STRING(X). { + A = sqlite3Fts5ParseColset(pParse, 0, &X); +} + +colsetlist(A) ::= colsetlist(Y) STRING(X). { + A = sqlite3Fts5ParseColset(pParse, Y, &X); } +colsetlist(A) ::= STRING(X). { + A = sqlite3Fts5ParseColset(pParse, 0, &X); +} + %type nearset {Fts5ExprNearset*} %type nearphrases {Fts5ExprNearset*} %destructor nearset { sqlite3Fts5ParseNearsetFree($$); } %destructor nearphrases { sqlite3Fts5ParseNearsetFree($$); } Index: ext/fts5/test/fts5ac.test ================================================================== --- ext/fts5/test/fts5ac.test +++ ext/fts5/test/fts5ac.test @@ -123,17 +123,36 @@ 97 {u h h k m n k} {u b v n u a o c} 98 {s p e t c z d f n w f} {l s f j b l c e s h} 99 {r c v w i v h a t a c v c r e} {h h u m g o f b a e o} } +#------------------------------------------------------------------------- # Usage: # # poslist aCol ?-pc VARNAME? ?-near N? ?-col C? -- phrase1 phrase2... # +# This command is used to test if a document (set of column values) matches +# the logical equivalent of a single FTS5 NEAR() clump and, if so, return +# the equivalent of an FTS5 position list. +# +# Parameter $aCol is passed a list of the column values for the document +# to test. Parameters $phrase1 and so on are the phrases. +# +# The result is a list of phrase hits. Each phrase hit is formatted as +# three integers separated by "." characters, in the following format: +# +# . . +# +# Options: +# +# -near N (NEAR distance. Default 10) +# -col C (List of column indexes to match against) +# -pc VARNAME (variable in caller frame to use for phrase numbering) +# proc poslist {aCol args} { set O(-near) 10 - set O(-col) -1 + set O(-col) {} set O(-pc) "" set nOpt [lsearch -exact $args --] if {$nOpt<0} { error "no -- option" } @@ -159,12 +178,11 @@ } set iCol -1 foreach col $aCol { incr iCol - if {$O(-col)>=0 && $O(-col)!=$iCol} continue - + if {$O(-col)!="" && [lsearch $O(-col) $iCol]<0} continue set nToken [llength $col] set iFL [expr $O(-near) >= $nToken ? $nToken - 1 : $O(-near)] for { } {$iFL < $nToken} {incr iFL} { for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { @@ -359,14 +377,28 @@ #------------------------------------------------------------------------- # Queries on a specific column. # foreach {tn expr} { - 1 "x:a" - 2 "y:a" - 3 "x:b" - 4 "y:b" + 1.1 "x:a" + 1.2 "y:a" + 1.3 "x:b" + 1.4 "y:b" + 2.1 "[x]:a" + 2.2 "[y]:a" + 2.3 "[x]:b" + 2.4 "[y]:b" + + 3.1 "[x y]:a" + 3.2 "[y x]:a" + 3.3 "[x x]:b" + 3.4 "[y y]:b" + + 4.1 {["x" "y"]:a} + 4.2 {["y" x]:a} + 4.3 {[x "x"]:b} + 4.4 {["y" y]:b} } { set res [matchdata 1 $expr] do_execsql_test $tn2.3.$tn.[llength $res] { SELECT rowid, fts5_test_poslist(xx) FROM xx WHERE xx match $expr } $res