Index: ext/fts3/fts3.c ================================================================== --- ext/fts3/fts3.c +++ ext/fts3/fts3.c @@ -310,10 +310,15 @@ #ifndef SQLITE_CORE # include "sqlite3ext.h" SQLITE_EXTENSION_INIT1 #endif +static int fts3EvalNext(Fts3Cursor *pCsr); +static int fts3EvalStart(Fts3Cursor *pCsr); +static int fts3TermSegReaderCursor( + Fts3Cursor *, const char *, int, int, Fts3MultiSegReader **); + /* ** Write a 64-bit variable-length integer to memory starting at p[0]. ** The length of data written will be between 1 and FTS3_VARINT_MAX bytes. ** The number of bytes written is returned. */ @@ -818,31 +823,60 @@ } sqlite3_free(zFree); return zRet; } +/* +** This function interprets the string at (*pp) as a non-negative integer +** value. It reads the integer and sets *pnOut to the value read, then +** sets *pp to point to the byte immediately following the last byte of +** the integer value. +** +** Only decimal digits ('0'..'9') may be part of an integer value. +** +** If *pp does not being with a decimal digit SQLITE_ERROR is returned and +** the output value undefined. Otherwise SQLITE_OK is returned. +** +** This function is used when parsing the "prefix=" FTS4 parameter. +*/ static int fts3GobbleInt(const char **pp, int *pnOut){ - const char *p = *pp; - int nInt = 0; + const char *p = *pp; /* Iterator pointer */ + int nInt = 0; /* Output value */ + for(p=*pp; p[0]>='0' && p[0]<='9'; p++){ nInt = nInt * 10 + (p[0] - '0'); } if( p==*pp ) return SQLITE_ERROR; *pnOut = nInt; *pp = p; return SQLITE_OK; } - +/* +** This function is called to allocate an array of Fts3Index structures +** representing the indexes maintained by the current FTS table. FTS tables +** always maintain the main "terms" index, but may also maintain one or +** more "prefix" indexes, depending on the value of the "prefix=" parameter +** (if any) specified as part of the CREATE VIRTUAL TABLE statement. +** +** Argument zParam is passed the value of the "prefix=" option if one was +** specified, or NULL otherwise. +** +** If no error occurs, SQLITE_OK is returned and *apIndex set to point to +** the allocated array. *pnIndex is set to the number of elements in the +** array. If an error does occur, an SQLite error code is returned. +** +** Regardless of whether or not an error is returned, it is the responsibility +** of the caller to call sqlite3_free() on the output array to free it. +*/ static int fts3PrefixParameter( const char *zParam, /* ABC in prefix=ABC parameter to parse */ int *pnIndex, /* OUT: size of *apIndex[] array */ - struct Fts3Index **apIndex, /* OUT: Array of indexes for this table */ - struct Fts3Index **apFree /* OUT: Free this with sqlite3_free() */ + struct Fts3Index **apIndex /* OUT: Array of indexes for this table */ ){ - struct Fts3Index *aIndex; - int nIndex = 1; + struct Fts3Index *aIndex; /* Allocated array */ + int nIndex = 1; /* Number of entries in array */ if( zParam && zParam[0] ){ const char *p; nIndex++; for(p=zParam; *p; p++){ @@ -849,11 +883,11 @@ if( *p==',' ) nIndex++; } } aIndex = sqlite3_malloc(sizeof(struct Fts3Index) * nIndex); - *apIndex = *apFree = aIndex; + *apIndex = aIndex; *pnIndex = nIndex; if( !aIndex ){ return SQLITE_NOMEM; } @@ -906,12 +940,11 @@ int isFts4 = (argv[0][3]=='4'); /* True for FTS4, false for FTS3 */ const char **aCol; /* Array of column names */ sqlite3_tokenizer *pTokenizer = 0; /* Tokenizer for this table */ int nIndex; /* Size of aIndex[] array */ - struct Fts3Index *aIndex; /* Array of indexes for this table */ - struct Fts3Index *aFree = 0; /* Free this before returning */ + struct Fts3Index *aIndex = 0; /* Array of indexes for this table */ /* The results of parsing supported FTS4 key=value options: */ int bNoDocsize = 0; /* True to omit %_docsize table */ int bDescIdx = 0; /* True to store descending indexes */ char *zPrefix = 0; /* Prefix parameter value (or NULL) */ @@ -1044,11 +1077,11 @@ rc = sqlite3Fts3InitTokenizer(pHash, "simple", &pTokenizer, pzErr); if( rc!=SQLITE_OK ) goto fts3_init_out; } assert( pTokenizer ); - rc = fts3PrefixParameter(zPrefix, &nIndex, &aIndex, &aFree); + rc = fts3PrefixParameter(zPrefix, &nIndex, &aIndex); if( rc==SQLITE_ERROR ){ assert( zPrefix ); *pzErr = sqlite3_mprintf("error parsing prefix parameter: %s", zPrefix); } if( rc!=SQLITE_OK ) goto fts3_init_out; @@ -1131,11 +1164,11 @@ /* Declare the table schema to SQLite. */ fts3DeclareVtab(&rc, p); fts3_init_out: sqlite3_free(zPrefix); - sqlite3_free(aFree); + sqlite3_free(aIndex); sqlite3_free(zCompress); sqlite3_free(zUncompress); sqlite3_free((void *)aCol); if( rc!=SQLITE_OK ){ if( p ){ @@ -1722,12 +1755,10 @@ *pp1 = p1 + 1; *pp2 = p2 + 1; } /* -** nToken==1 searches for adjacent positions. -** ** This function is used to merge two position lists into one. When it is ** called, *pp1 and *pp2 must both point to position lists. A position-list is ** the part of a doclist that follows each document id. For example, if a row ** contains: ** @@ -1743,10 +1774,12 @@ ** If isSaveLeft is 0, an entry is added to the output position list for ** each position in *pp2 for which there exists one or more positions in ** *pp1 so that (pos(*pp2)>pos(*pp1) && pos(*pp2)-pos(*pp1)<=nToken). i.e. ** when the *pp1 token appears before the *pp2 token, but not more than nToken ** slots before it. +** +** e.g. nToken==1 searches for adjacent positions. */ static int fts3PoslistPhraseMerge( char **pp, /* IN/OUT: Preallocated output buffer */ int nToken, /* Maximum difference in token positions */ int isSaveLeft, /* Save the left position */ @@ -1909,26 +1942,38 @@ return res; } /* -** A pointer to an instance of this structure is used as the context -** argument to sqlite3Fts3SegReaderIterate() +** An instance of this function is used to merge together the (potentially +** large number of) doclists for each term that matches a prefix query. +** See function fts3TermSelectMerge() for details. */ typedef struct TermSelect TermSelect; struct TermSelect { - int isReqPos; - char *aaOutput[16]; /* Malloc'd output buffer */ - int anOutput[16]; /* Size of output in bytes */ + char *aaOutput[16]; /* Malloc'd output buffers */ + int anOutput[16]; /* Size each output buffer in bytes */ }; - +/* +** This function is used to read a single varint from a buffer. Parameter +** pEnd points 1 byte past the end of the buffer. When this function is +** called, if *pp points to pEnd or greater, then the end of the buffer +** has been reached. In this case *pp is set to 0 and the function returns. +** +** If *pp does not point to or past pEnd, then a single varint is read +** from *pp. *pp is then set to point 1 byte past the end of the read varint. +** +** If bDescIdx is false, the value read is added to *pVal before returning. +** If it is true, the value read is subtracted from *pVal before this +** function returns. +*/ static void fts3GetDeltaVarint3( - char **pp, - char *pEnd, - int bDescIdx, - sqlite3_int64 *pVal + char **pp, /* IN/OUT: Point to read varint from */ + char *pEnd, /* End of buffer */ + int bDescIdx, /* True if docids are descending */ + sqlite3_int64 *pVal /* IN/OUT: Integer value */ ){ if( *pp>=pEnd ){ *pp = 0; }else{ sqlite3_int64 iVal; @@ -1939,10 +1984,25 @@ *pVal += iVal; } } } +/* +** This function is used to write a single varint to a buffer. The varint +** is written to *pp. Before returning, *pp is set to point 1 byte past the +** end of the value written. +** +** If *pbFirst is zero when this function is called, the value written to +** the buffer is that of parameter iVal. +** +** If *pbFirst is non-zero when this function is called, then the value +** written is either (iVal-*piPrev) (if bDescIdx is zero) or (*piPrev-iVal) +** (if bDescIdx is non-zero). +** +** Before returning, this function always sets *pbFirst to 1 and *piPrev +** to the value of parameter iVal. +*/ static void fts3PutDeltaVarint3( char **pp, /* IN/OUT: Output pointer */ int bDescIdx, /* True for descending docids */ sqlite3_int64 *piPrev, /* IN/OUT: Previous value written to list */ int *pbFirst, /* IN/OUT: True after first int written */ @@ -1959,14 +2019,38 @@ *pp += sqlite3Fts3PutVarint(*pp, iWrite); *piPrev = iVal; *pbFirst = 1; } -#define COMPARE_DOCID(i1, i2) ((bDescIdx?-1:1) * (i1-i2)) +/* +** This macro is used by various functions that merge doclists. The two +** arguments are 64-bit docid values. If the value of the stack variable +** bDescDoclist is 0 when this macro is invoked, then it returns (i1-i2). +** Otherwise, (i2-i1). +** +** Using this makes it easier to write code that can merge doclists that are +** sorted in either ascending or descending order. +*/ +#define DOCID_CMP(i1, i2) ((bDescDoclist?-1:1) * (i1-i2)) + +/* +** This function does an "OR" merge of two doclists (output contains all +** positions contained in either argument doclist). If the docids in the +** input doclists are sorted in ascending order, parameter bDescDoclist +** should be false. If they are sorted in ascending order, it should be +** passed a non-zero value. +** +** If no error occurs, *paOut is set to point at an sqlite3_malloc'd buffer +** containing the output doclist and SQLITE_OK is returned. In this case +** *pnOut is set to the number of bytes in the output doclist. +** +** If an error occurs, an SQLite error code is returned. The output values +** are undefined in this case. +*/ static int fts3DoclistOrMerge( - int bDescIdx, /* True if arguments are desc */ + int bDescDoclist, /* True if arguments are desc */ char *a1, int n1, /* First doclist */ char *a2, int n2, /* Second doclist */ char **paOut, int *pnOut /* OUT: Malloc'd doclist */ ){ sqlite3_int64 i1 = 0; @@ -1987,35 +2071,47 @@ p = aOut; fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1); fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2); while( p1 || p2 ){ - sqlite3_int64 iDiff = COMPARE_DOCID(i1, i2); + sqlite3_int64 iDiff = DOCID_CMP(i1, i2); if( p2 && p1 && iDiff==0 ){ - fts3PutDeltaVarint3(&p, bDescIdx, &iPrev, &bFirstOut, i1); + fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1); fts3PoslistMerge(&p, &p1, &p2); - fts3GetDeltaVarint3(&p1, pEnd1, bDescIdx, &i1); - fts3GetDeltaVarint3(&p2, pEnd2, bDescIdx, &i2); + fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1); + fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2); }else if( !p2 || (p1 && iDiff<0) ){ - fts3PutDeltaVarint3(&p, bDescIdx, &iPrev, &bFirstOut, i1); + fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1); fts3PoslistCopy(&p, &p1); - fts3GetDeltaVarint3(&p1, pEnd1, bDescIdx, &i1); + fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1); }else{ - fts3PutDeltaVarint3(&p, bDescIdx, &iPrev, &bFirstOut, i2); + fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i2); fts3PoslistCopy(&p, &p2); - fts3GetDeltaVarint3(&p2, pEnd2, bDescIdx, &i2); + fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2); } } *paOut = aOut; *pnOut = (p-aOut); return SQLITE_OK; } +/* +** This function does a "phrase" merge of two doclists. In a phrase merge, +** the output contains a copy of each position from the right-hand input +** doclist for which there is a position in the left-hand input doclist +** exactly nDist tokens before it. +** +** If the docids in the input doclists are sorted in ascending order, +** parameter bDescDoclist should be false. If they are sorted in ascending +** order, it should be passed a non-zero value. +** +** The right-hand input doclist is overwritten by this function. +*/ static void fts3DoclistPhraseMerge( - int bDescIdx, /* True if arguments are desc */ + int bDescDoclist, /* True if arguments are desc */ int nDist, /* Distance from left to right (1=adjacent) */ char *aLeft, int nLeft, /* Left doclist */ char *aRight, int *pnRight /* IN/OUT: Right/output doclist */ ){ sqlite3_int64 i1 = 0; @@ -2034,30 +2130,30 @@ p = aOut; fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1); fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2); while( p1 && p2 ){ - sqlite3_int64 iDiff = COMPARE_DOCID(i1, i2); + sqlite3_int64 iDiff = DOCID_CMP(i1, i2); if( iDiff==0 ){ char *pSave = p; sqlite3_int64 iPrevSave = iPrev; int bFirstOutSave = bFirstOut; - fts3PutDeltaVarint3(&p, bDescIdx, &iPrev, &bFirstOut, i1); + fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1); if( 0==fts3PoslistPhraseMerge(&p, nDist, 0, 1, &p1, &p2) ){ p = pSave; iPrev = iPrevSave; bFirstOut = bFirstOutSave; } - fts3GetDeltaVarint3(&p1, pEnd1, bDescIdx, &i1); - fts3GetDeltaVarint3(&p2, pEnd2, bDescIdx, &i2); + fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1); + fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2); }else if( iDiff<0 ){ fts3PoslistCopy(0, &p1); - fts3GetDeltaVarint3(&p1, pEnd1, bDescIdx, &i1); + fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1); }else{ fts3PoslistCopy(0, &p2); - fts3GetDeltaVarint3(&p2, pEnd2, bDescIdx, &i2); + fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2); } } *pnRight = p - aOut; } @@ -2070,11 +2166,11 @@ ** ** If an OOM error occurs, return SQLITE_NOMEM. In this case it is ** the responsibility of the caller to free any doclists left in the ** TermSelect.aaOutput[] array. */ -static int fts3TermSelectMerge(Fts3Table *p, TermSelect *pTS){ +static int fts3TermSelectFinishMerge(Fts3Table *p, TermSelect *pTS){ char *aOut = 0; int nOut = 0; int i; /* Loop through the doclists in the aaOutput[] array. Merge them all @@ -2111,28 +2207,29 @@ pTS->anOutput[0] = nOut; return SQLITE_OK; } /* -** This function is used as the sqlite3Fts3SegReaderIterate() callback when -** querying the full-text index for a doclist associated with a term or -** term-prefix. +** Merge the doclist aDoclist/nDoclist into the TermSelect object passed +** as the first argument. The merge is an "OR" merge (see function +** fts3DoclistOrMerge() for details). +** +** This function is called with the doclist for each term that matches +** a queried prefix. It merges all these doclists into one, the doclist +** for the specified prefix. Since there can be a very large number of +** doclists to merge, the merging is done pair-wise using the TermSelect +** object. +** +** This function returns SQLITE_OK if the merge is successful, or an +** SQLite error code (SQLITE_NOMEM) if an error occurs. */ -static int fts3TermSelectCb( - Fts3Table *p, /* Virtual table object */ - void *pContext, /* Pointer to TermSelect structure */ - char *zTerm, - int nTerm, - char *aDoclist, - int nDoclist +static int fts3TermSelectMerge( + Fts3Table *p, /* FTS table handle */ + TermSelect *pTS, /* TermSelect object to merge into */ + char *aDoclist, /* Pointer to doclist */ + int nDoclist /* Size of aDoclist in bytes */ ){ - TermSelect *pTS = (TermSelect *)pContext; - - UNUSED_PARAMETER(p); - UNUSED_PARAMETER(zTerm); - UNUSED_PARAMETER(nTerm); - if( pTS->aaOutput[0]==0 ){ /* If this is the first term selected, copy the doclist to the output ** buffer using memcpy(). */ pTS->aaOutput[0] = sqlite3_malloc(nDoclist); pTS->anOutput[0] = nDoclist; @@ -2199,23 +2296,30 @@ } pCsr->apSegment[pCsr->nSegment++] = pNew; return SQLITE_OK; } +/* +** Add seg-reader objects to the Fts3MultiSegReader object passed as the +** 8th argument. +** +** This function returns SQLITE_OK if successful, or an SQLite error code +** otherwise. +*/ static int fts3SegReaderCursor( Fts3Table *p, /* FTS3 table handle */ int iIndex, /* Index to search (from 0 to p->nIndex-1) */ int iLevel, /* Level of segments to scan */ const char *zTerm, /* Term to query for */ int nTerm, /* Size of zTerm in bytes */ int isPrefix, /* True for a prefix search */ int isScan, /* True to scan from zTerm to EOF */ - Fts3MultiSegReader *pCsr /* Cursor object to populate */ + Fts3MultiSegReader *pCsr /* Cursor object to populate */ ){ - int rc = SQLITE_OK; - int rc2; - sqlite3_stmt *pStmt = 0; + int rc = SQLITE_OK; /* Error code */ + sqlite3_stmt *pStmt = 0; /* Statement to iterate through segments */ + int rc2; /* Result of sqlite3_reset() */ /* If iLevel is less than 0 and this is not a scan, include a seg-reader ** for the pending-terms. If this is a scan, then this call must be being ** made by an fts4aux module, not an FTS table. In this case calling ** Fts3SegReaderPending might segfault, as the data structures used by @@ -2300,28 +2404,46 @@ return fts3SegReaderCursor( p, iIndex, iLevel, zTerm, nTerm, isPrefix, isScan, pCsr ); } +/* +** In addition to its current configuration, have the Fts3MultiSegReader +** passed as the 4th argument also scan the doclist for term zTerm/nTerm. +** +** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. +*/ static int fts3SegReaderCursorAddZero( - Fts3Table *p, - const char *zTerm, - int nTerm, - Fts3MultiSegReader *pCsr + Fts3Table *p, /* FTS virtual table handle */ + const char *zTerm, /* Term to scan doclist of */ + int nTerm, /* Number of bytes in zTerm */ + Fts3MultiSegReader *pCsr /* Fts3MultiSegReader to modify */ ){ return fts3SegReaderCursor(p, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0,pCsr); } - -int sqlite3Fts3TermSegReaderCursor( +/* +** Open an Fts3MultiSegReader to scan the doclist for term zTerm/nTerm. Or, +** if isPrefix is true, to scan the doclist for all terms for which +** zTerm/nTerm is a prefix. If successful, return SQLITE_OK and write +** a pointer to the new Fts3MultiSegReader to *ppSegcsr. Otherwise, return +** an SQLite error code. +** +** It is the responsibility of the caller to free this object by eventually +** passing it to fts3SegReaderCursorFree() +** +** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. +** Output parameter *ppSegcsr is set to 0 if an error occurs. +*/ +static int fts3TermSegReaderCursor( Fts3Cursor *pCsr, /* Virtual table cursor handle */ const char *zTerm, /* Term to query for */ int nTerm, /* Size of zTerm in bytes */ int isPrefix, /* True for a prefix search */ Fts3MultiSegReader **ppSegcsr /* OUT: Allocated seg-reader cursor */ ){ - Fts3MultiSegReader *pSegcsr; /* Object to allocate and return */ + Fts3MultiSegReader *pSegcsr; /* Object to allocate and return */ int rc = SQLITE_NOMEM; /* Return code */ pSegcsr = sqlite3_malloc(sizeof(Fts3MultiSegReader)); if( pSegcsr ){ int i; @@ -2361,62 +2483,53 @@ *ppSegcsr = pSegcsr; return rc; } +/* +** Free an Fts3MultiSegReader allocated by fts3TermSegReaderCursor(). +*/ static void fts3SegReaderCursorFree(Fts3MultiSegReader *pSegcsr){ sqlite3Fts3SegReaderFinish(pSegcsr); sqlite3_free(pSegcsr); } /* ** This function retreives the doclist for the specified term (or term -** prefix) from the database. -** -** The returned doclist may be in one of two formats, depending on the -** value of parameter isReqPos. If isReqPos is zero, then the doclist is -** a sorted list of delta-compressed docids (a bare doclist). If isReqPos -** is non-zero, then the returned list is in the same format as is stored -** in the database without the found length specifier at the start of on-disk -** doclists. +** prefix) from the database. */ static int fts3TermSelect( Fts3Table *p, /* Virtual table handle */ Fts3PhraseToken *pTok, /* Token to query for */ int iColumn, /* Column to query (or -ve for all columns) */ - int isReqPos, /* True to include position lists in output */ int *pnOut, /* OUT: Size of buffer at *ppOut */ char **ppOut /* OUT: Malloced result buffer */ ){ int rc; /* Return code */ - Fts3MultiSegReader *pSegcsr; /* Seg-reader cursor for this term */ - TermSelect tsc; /* Context object for fts3TermSelectCb() */ + Fts3MultiSegReader *pSegcsr; /* Seg-reader cursor for this term */ + TermSelect tsc; /* Object for pair-wise doclist merging */ Fts3SegFilter filter; /* Segment term filter configuration */ pSegcsr = pTok->pSegcsr; memset(&tsc, 0, sizeof(TermSelect)); - tsc.isReqPos = isReqPos; - filter.flags = FTS3_SEGMENT_IGNORE_EMPTY + filter.flags = FTS3_SEGMENT_IGNORE_EMPTY | FTS3_SEGMENT_REQUIRE_POS | (pTok->isPrefix ? FTS3_SEGMENT_PREFIX : 0) - | (isReqPos ? FTS3_SEGMENT_REQUIRE_POS : 0) | (iColumnnColumn ? FTS3_SEGMENT_COLUMN_FILTER : 0); filter.iCol = iColumn; filter.zTerm = pTok->z; filter.nTerm = pTok->n; rc = sqlite3Fts3SegReaderStart(p, pSegcsr, &filter); while( SQLITE_OK==rc && SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pSegcsr)) ){ - rc = fts3TermSelectCb(p, (void *)&tsc, - pSegcsr->zTerm, pSegcsr->nTerm, pSegcsr->aDoclist, pSegcsr->nDoclist - ); + rc = fts3TermSelectMerge(p, &tsc, pSegcsr->aDoclist, pSegcsr->nDoclist); } if( rc==SQLITE_OK ){ - rc = fts3TermSelectMerge(p, &tsc); + rc = fts3TermSelectFinishMerge(p, &tsc); } if( rc==SQLITE_OK ){ *ppOut = tsc.aaOutput[0]; *pnOut = tsc.anOutput[0]; }else{ @@ -2438,28 +2551,19 @@ ** If the isPoslist argument is true, then it is assumed that the doclist ** contains a position-list following each docid. Otherwise, it is assumed ** that the doclist is simply a list of docids stored as delta encoded ** varints. */ -static int fts3DoclistCountDocids(int isPoslist, char *aList, int nList){ +static int fts3DoclistCountDocids(char *aList, int nList){ int nDoc = 0; /* Return value */ if( aList ){ char *aEnd = &aList[nList]; /* Pointer to one byte after EOF */ char *p = aList; /* Cursor */ - if( !isPoslist ){ - /* The number of docids in the list is the same as the number of - ** varints. In FTS3 a varint consists of a single byte with the 0x80 - ** bit cleared and zero or more bytes with the 0x80 bit set. So to - ** count the varints in the buffer, just count the number of bytes - ** with the 0x80 bit clear. */ - while( piPrevId = sqlite3_column_int64(pCsr->pStmt, 0); rc = SQLITE_OK; } }else{ - rc = sqlite3Fts3EvalNext((Fts3Cursor *)pCursor); + rc = fts3EvalNext((Fts3Cursor *)pCursor); } assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 ); return rc; } @@ -2562,11 +2666,11 @@ } rc = sqlite3Fts3ReadLock(p); if( rc!=SQLITE_OK ) return rc; - rc = sqlite3Fts3EvalStart(pCsr, pCsr->pExpr, 1); + rc = fts3EvalStart(pCsr); sqlite3Fts3SegmentsClose(p); if( rc!=SQLITE_OK ) return rc; pCsr->pNextId = pCsr->aDoclist; pCsr->iPrevId = 0; @@ -2969,26 +3073,43 @@ p->zDb, p->zName, zName ); return rc; } +/* +** The xSavepoint() method. +** +** Flush the contents of the pending-terms table to disk. +*/ static int fts3SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){ UNUSED_PARAMETER(iSavepoint); assert( ((Fts3Table *)pVtab)->inTransaction ); assert( ((Fts3Table *)pVtab)->mxSavepoint < iSavepoint ); TESTONLY( ((Fts3Table *)pVtab)->mxSavepoint = iSavepoint ); return fts3SyncMethod(pVtab); } + +/* +** The xRelease() method. +** +** This is a no-op. +*/ static int fts3ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){ TESTONLY( Fts3Table *p = (Fts3Table*)pVtab ); UNUSED_PARAMETER(iSavepoint); UNUSED_PARAMETER(pVtab); assert( p->inTransaction ); assert( p->mxSavepoint >= iSavepoint ); TESTONLY( p->mxSavepoint = iSavepoint-1 ); return SQLITE_OK; } + +/* +** The xRollbackTo() method. +** +** Discard the contents of the pending terms table. +*/ static int fts3RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){ Fts3Table *p = (Fts3Table*)pVtab; UNUSED_PARAMETER(iSavepoint); assert( p->inTransaction ); assert( p->mxSavepoint >= iSavepoint ); @@ -3133,22 +3254,10 @@ sqlite3Fts3HashClear(pHash); sqlite3_free(pHash); } return rc; } - -#if !SQLITE_CORE -int sqlite3_extension_init( - sqlite3 *db, - char **pzErrMsg, - const sqlite3_api_routines *pApi -){ - SQLITE_EXTENSION_INIT2(pApi) - return sqlite3Fts3Init(db); -} -#endif - /* ** Allocate an Fts3MultiSegReader for each token in the expression headed ** by pExpr. ** @@ -3162,24 +3271,24 @@ ** there exists prefix b-tree of the right length) then it may be traversed ** and merged incrementally. Otherwise, it has to be merged into an in-memory ** doclist and then traversed. */ static void fts3EvalAllocateReaders( - Fts3Cursor *pCsr, - Fts3Expr *pExpr, + Fts3Cursor *pCsr, /* FTS cursor handle */ + Fts3Expr *pExpr, /* Allocate readers for this expression */ int *pnToken, /* OUT: Total number of tokens in phrase. */ int *pnOr, /* OUT: Total number of OR nodes in expr. */ - int *pRc + int *pRc /* IN/OUT: Error code */ ){ if( pExpr && SQLITE_OK==*pRc ){ if( pExpr->eType==FTSQUERY_PHRASE ){ int i; int nToken = pExpr->pPhrase->nToken; *pnToken += nToken; for(i=0; ipPhrase->aToken[i]; - int rc = sqlite3Fts3TermSegReaderCursor(pCsr, + int rc = fts3TermSegReaderCursor(pCsr, pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr ); if( rc!=SQLITE_OK ){ *pRc = rc; return; @@ -3193,16 +3302,24 @@ fts3EvalAllocateReaders(pCsr, pExpr->pRight, pnToken, pnOr, pRc); } } } +/* +** Arguments pList/nList contain the doclist for token iToken of phrase p. +** It is merged into the main doclist stored in p->doclist.aAll/nAll. +** +** This function assumes that pList points to a buffer allocated using +** sqlite3_malloc(). This function takes responsibility for eventually +** freeing the buffer. +*/ static void fts3EvalPhraseMergeToken( - Fts3Table *pTab, - Fts3Phrase *p, - int iToken, - char *pList, - int nList + Fts3Table *pTab, /* FTS Table pointer */ + Fts3Phrase *p, /* Phrase to merge pList/nList into */ + int iToken, /* Token pList/nList corresponds to */ + char *pList, /* Pointer to doclist */ + int nList /* Number of bytes in pList */ ){ assert( iToken!=p->iDoclistToken ); if( pList==0 ){ sqlite3_free(p->doclist.aAll); @@ -3247,13 +3364,19 @@ } if( iToken>p->iDoclistToken ) p->iDoclistToken = iToken; } +/* +** Load the doclist for phrase p into p->doclist.aAll/nAll. The loaded doclist +** does not take deferred tokens into account. +** +** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. +*/ static int fts3EvalPhraseLoad( - Fts3Cursor *pCsr, - Fts3Phrase *p + Fts3Cursor *pCsr, /* FTS Cursor handle */ + Fts3Phrase *p /* Phrase object */ ){ Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; int iToken; int rc = SQLITE_OK; @@ -3262,11 +3385,11 @@ assert( pToken->pDeferred==0 || pToken->pSegcsr==0 ); if( pToken->pSegcsr ){ int nThis = 0; char *pThis = 0; - rc = fts3TermSelect(pTab, pToken, p->iColumn, 1, &nThis, &pThis); + rc = fts3TermSelect(pTab, pToken, p->iColumn, &nThis, &pThis); if( rc==SQLITE_OK ){ fts3EvalPhraseMergeToken(pTab, p, iToken, pThis, nThis); } } assert( pToken->pSegcsr==0 ); @@ -3273,18 +3396,26 @@ } return rc; } +/* +** This function is called on each phrase after the position lists for +** any deferred tokens have been loaded into memory. It updates the phrases +** current position list to include only those positions that are really +** instances of the phrase (after considering deferred tokens). If this +** means that the phrase does not appear in the current row, doclist.pList +** and doclist.nList are both zeroed. +** +** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. +*/ static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){ - int iToken; - int rc = SQLITE_OK; - - int nMaxUndeferred = pPhrase->iDoclistToken; - char *aPoslist = 0; - int nPoslist = 0; - int iPrev = -1; + int iToken; /* Used to iterate through phrase tokens */ + int rc = SQLITE_OK; /* Return code */ + char *aPoslist = 0; /* Position list for deferred tokens */ + int nPoslist = 0; /* Number of bytes in aPoslist */ + int iPrev = -1; /* Token number of previous deferred token */ assert( pPhrase->doclist.bFreeList==0 ); for(iToken=0; rc==SQLITE_OK && iTokennToken; iToken++){ Fts3PhraseToken *pToken = &pPhrase->aToken[iToken]; @@ -3326,10 +3457,11 @@ iPrev = iToken; } } if( iPrev>=0 ){ + int nMaxUndeferred = pPhrase->iDoclistToken; if( nMaxUndeferred<0 ){ pPhrase->doclist.pList = aPoslist; pPhrase->doclist.nList = nPoslist; pPhrase->doclist.iDocid = pCsr->iPrevId; pPhrase->doclist.bFreeList = 1; @@ -3374,13 +3506,19 @@ /* ** This function is called for each Fts3Phrase in a full-text query ** expression to initialize the mechanism for returning rows. Once this ** function has been called successfully on an Fts3Phrase, it may be ** used with fts3EvalPhraseNext() to iterate through the matching docids. +** +** If parameter bOptOk is true, then the phrase may (or may not) use the +** incremental loading strategy. Otherwise, the entire doclist is loaded into +** memory within this call. +** +** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. */ static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){ - int rc; + int rc; /* Error code */ Fts3PhraseToken *pFirst = &p->aToken[0]; Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; if( pCsr->bDesc==pTab->bDescIdx && bOptOk==1 @@ -3404,11 +3542,17 @@ return rc; } /* ** This function is used to iterate backwards (from the end to start) -** through doclists. +** through doclists. It is used by this module to iterate through phrase +** doclists in reverse and by the fts3_write.c module to iterate through +** pending-terms lists when writing to databases with "order=desc". +** +** The doclist may be sorted in ascending (parameter bDescIdx==0) or +** descending (parameter bDescIdx==1) order of docid. Regardless, this +** function iterates from the end of the doclist to the beginning. */ void sqlite3Fts3DoclistPrev( int bDescIdx, /* True if the doclist is desc */ char *aDoclist, /* Pointer to entire doclist */ int nDoclist, /* Length of aDoclist in bytes */ @@ -3469,13 +3613,13 @@ ** If there is no "next" entry and no error occurs, then *pbEof is set to ** 1 before returning. Otherwise, if no error occurs and the iterator is ** successfully advanced, *pbEof is set to 0. */ static int fts3EvalPhraseNext( - Fts3Cursor *pCsr, - Fts3Phrase *p, - u8 *pbEof + Fts3Cursor *pCsr, /* FTS Cursor handle */ + Fts3Phrase *p, /* Phrase object to advance to next docid */ + u8 *pbEof /* OUT: Set to 1 if EOF */ ){ int rc = SQLITE_OK; Fts3Doclist *pDL = &p->doclist; Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; @@ -3517,14 +3661,14 @@ fts3PoslistCopy(0, &pIter); pDL->nList = (pIter - pDL->pList); /* pIter now points just past the 0x00 that terminates the position- ** list for document pDL->iDocid. However, if this position-list was - ** edited in place by fts3EvalNearTrim2(), then pIter may not actually + ** edited in place by fts3EvalNearTrim(), then pIter may not actually ** point to the start of the next docid value. The following line deals ** with this case by advancing pIter past the zero-padding added by - ** fts3EvalNearTrim2(). */ + ** fts3EvalNearTrim(). */ while( pIterpNextDocid = pIter; assert( pIter>=&pDL->aAll[pDL->nAll] || *pIter ); *pbEof = 0; @@ -3532,15 +3676,31 @@ } return rc; } +/* +** +** If *pRc is not SQLITE_OK when this function is called, it is a no-op. +** Otherwise, fts3EvalPhraseStart() is called on all phrases within the +** expression. Also the Fts3Expr.bDeferred variable is set to true for any +** expressions for which all descendent tokens are deferred. +** +** If parameter bOptOk is zero, then it is guaranteed that the +** Fts3Phrase.doclist.aAll/nAll variables contain the entire doclist for +** each phrase in the expression (subject to deferred token processing). +** Or, if bOptOk is non-zero, then one or more tokens within the expression +** may be loaded incrementally, meaning doclist.aAll/nAll is not available. +** +** If an error occurs within this function, *pRc is set to an SQLite error +** code before returning. +*/ static void fts3EvalStartReaders( - Fts3Cursor *pCsr, - Fts3Expr *pExpr, - int bOptOk, - int *pRc + Fts3Cursor *pCsr, /* FTS Cursor handle */ + Fts3Expr *pExpr, /* Expression to initialize phrases in */ + int bOptOk, /* True to enable incremental loading */ + int *pRc /* IN/OUT: Error code */ ){ if( pExpr && SQLITE_OK==*pRc ){ if( pExpr->eType==FTSQUERY_PHRASE ){ int i; int nToken = pExpr->pPhrase->nToken; @@ -3555,27 +3715,46 @@ pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred); } } } +/* +** An array of the following structures is assembled as part of the process +** of selecting tokens to defer before the query starts executing (as part +** of the xFilter() method). There is one element in the array for each +** token in the FTS expression. +** +** Tokens are divided into AND/NEAR clusters. All tokens in a cluster belong +** to phrases that are connected only by AND and NEAR operators (not OR or +** NOT). When determining tokens to defer, each AND/NEAR cluster is considered +** separately. The root of a tokens AND/NEAR cluster is stored in +** Fts3TokenAndCost.pRoot. +*/ typedef struct Fts3TokenAndCost Fts3TokenAndCost; struct Fts3TokenAndCost { Fts3Phrase *pPhrase; /* The phrase the token belongs to */ int iToken; /* Position of token in phrase */ Fts3PhraseToken *pToken; /* The token itself */ - Fts3Expr *pRoot; - int nOvfl; + Fts3Expr *pRoot; /* Root of NEAR/AND cluster */ + int nOvfl; /* Number of overflow pages to load doclist */ int iCol; /* The column the token must match */ }; +/* +** This function is used to populate an allocated Fts3TokenAndCost array. +** +** If *pRc is not SQLITE_OK when this function is called, it is a no-op. +** Otherwise, if an error occurs during execution, *pRc is set to an +** SQLite error code. +*/ static void fts3EvalTokenCosts( - Fts3Cursor *pCsr, - Fts3Expr *pRoot, - Fts3Expr *pExpr, - Fts3TokenAndCost **ppTC, - Fts3Expr ***ppOr, - int *pRc + Fts3Cursor *pCsr, /* FTS Cursor handle */ + Fts3Expr *pRoot, /* Root of current AND/NEAR cluster */ + Fts3Expr *pExpr, /* Expression to consider */ + Fts3TokenAndCost **ppTC, /* Write new entries to *(*ppTC)++ */ + Fts3Expr ***ppOr, /* Write new OR root to *(*ppOr)++ */ + int *pRc /* IN/OUT: Error code */ ){ if( *pRc==SQLITE_OK && pExpr ){ if( pExpr->eType==FTSQUERY_PHRASE ){ Fts3Phrase *pPhrase = pExpr->pPhrase; int i; @@ -3603,23 +3782,34 @@ fts3EvalTokenCosts(pCsr, pRoot, pExpr->pRight, ppTC, ppOr, pRc); } } } +/* +** Determine the average document (row) size in pages. If successful, +** write this value to *pnPage and return SQLITE_OK. Otherwise, return +** an SQLite error code. +** +** The average document size in pages is calculated by first calculating +** determining the average size in bytes, B. If B is less than the amount +** of data that will fit on a single leaf page of an intkey table in +** this database, then the average docsize is 1. Otherwise, it is 1 plus +** the number of overflow pages consumed by a record B bytes in size. +*/ static int fts3EvalAverageDocsize(Fts3Cursor *pCsr, int *pnPage){ if( pCsr->nRowAvg==0 ){ /* The average document size, which is required to calculate the cost - ** of each doclist, has not yet been determined. Read the required - ** data from the %_stat table to calculate it. - ** - ** Entry 0 of the %_stat table is a blob containing (nCol+1) FTS3 - ** varints, where nCol is the number of columns in the FTS3 table. - ** The first varint is the number of documents currently stored in - ** the table. The following nCol varints contain the total amount of - ** data stored in all rows of each column of the table, from left - ** to right. - */ + ** of each doclist, has not yet been determined. Read the required + ** data from the %_stat table to calculate it. + ** + ** Entry 0 of the %_stat table is a blob containing (nCol+1) FTS3 + ** varints, where nCol is the number of columns in the FTS3 table. + ** The first varint is the number of documents currently stored in + ** the table. The following nCol varints contain the total amount of + ** data stored in all rows of each column of the table, from left + ** to right. + */ int rc; Fts3Table *p = (Fts3Table*)pCsr->base.pVtab; sqlite3_stmt *pStmt; sqlite3_int64 nDoc = 0; sqlite3_int64 nByte = 0; @@ -3650,109 +3840,151 @@ *pnPage = pCsr->nRowAvg; return SQLITE_OK; } +/* +** This function is called to select the tokens (if any) that will be +** deferred. The array aTC[] has already been populated when this is +** called. +** +** This function is called once for each AND/NEAR cluster in the +** expression. Each invocation determines which tokens to defer within +** the cluster with root node pRoot. See comments above the definition +** of struct Fts3TokenAndCost for more details. +** +** If no error occurs, SQLITE_OK is returned and sqlite3Fts3DeferToken() +** called on each token to defer. Otherwise, an SQLite error code is +** returned. +*/ static int fts3EvalSelectDeferred( - Fts3Cursor *pCsr, - Fts3Expr *pRoot, - Fts3TokenAndCost *aTC, - int nTC + Fts3Cursor *pCsr, /* FTS Cursor handle */ + Fts3Expr *pRoot, /* Consider tokens with this root node */ + Fts3TokenAndCost *aTC, /* Array of expression tokens and costs */ + int nTC /* Number of entries in aTC[] */ ){ - int nDocSize = 0; - int nDocEst = 0; - int rc = SQLITE_OK; Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; - int ii; + int nDocSize = 0; /* Number of pages per doc loaded */ + int rc = SQLITE_OK; /* Return code */ + int ii; /* Iterator variable for various purposes */ + int nOvfl = 0; /* Total overflow pages used by doclists */ + int nToken = 0; /* Total number of tokens in cluster */ - int nOvfl = 0; - int nTerm = 0; + int nMinEst = 0; /* The minimum count for any phrase so far. */ + int nLoad4 = 1; /* (Phrases that will be loaded)^4. */ + /* Count the tokens in this AND/NEAR cluster. If none of the doclists + ** associated with the tokens spill onto overflow pages, or if there is + ** only 1 token, exit early. No tokens to defer in this case. */ for(ii=0; iinOvfl) + assert( rc!=SQLITE_OK || nDocSize>0 ); + + + /* Iterate through all tokens in this AND/NEAR cluster, in ascending order + ** of the number of overflow pages that will be loaded by the pager layer + ** to retrieve the entire doclist for the token from the full-text index. + ** Load the doclists for tokens that are either: + ** + ** a. The cheapest token in the entire query (i.e. the one visited by the + ** first iteration of this loop), or + ** + ** b. Part of a multi-token phrase. + ** + ** After each token doclist is loaded, merge it with the others from the + ** same phrase and count the number of documents that the merged doclist + ** contains. Set variable "nMinEst" to the smallest number of documents in + ** any phrase doclist for which 1 or more token doclists have been loaded. + ** Let nOther be the number of other phrases for which it is certain that + ** one or more tokens will not be deferred. + ** + ** Then, for each token, defer it if loading the doclist would result in + ** loading N or more overflow pages into memory, where N is computed as: + ** + ** (nMinEst + 4^nOther - 1) / (4^nOther) + */ + for(ii=0; iinOvfl) ){ - pTC = &aTC[jj]; + pTC = &aTC[iTC]; } } assert( pTC ); - /* At this point pTC points to the cheapest remaining token. */ - if( ii==0 ){ - if( pTC->nOvfl ){ - nDocEst = (pTC->nOvfl * pTab->nPgsz + pTab->nPgsz) / 10; - }else{ + if( ii && pTC->nOvfl>=((nMinEst+(nLoad4/4)-1)/(nLoad4/4))*nDocSize ){ + /* The number of overflow pages to load for this (and therefore all + ** subsequent) tokens is greater than the estimated number of pages + ** that will be loaded if all subsequent tokens are deferred. + */ + Fts3PhraseToken *pToken = pTC->pToken; + rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol); + fts3SegReaderCursorFree(pToken->pSegcsr); + pToken->pSegcsr = 0; + }else{ + nLoad4 = nLoad4*4; + if( ii==0 || pTC->pPhrase->nToken>1 ){ + /* Either this is the cheapest token in the entire query, or it is + ** part of a multi-token phrase. Either way, the entire doclist will + ** (eventually) be loaded into memory. It may as well be now. */ Fts3PhraseToken *pToken = pTC->pToken; int nList = 0; char *pList = 0; - rc = fts3TermSelect(pTab, pToken, pTC->iCol, 1, &nList, &pList); + rc = fts3TermSelect(pTab, pToken, pTC->iCol, &nList, &pList); assert( rc==SQLITE_OK || pList==0 ); - if( rc==SQLITE_OK ){ - nDocEst = fts3DoclistCountDocids(1, pList, nList); + int nCount; fts3EvalPhraseMergeToken(pTab, pTC->pPhrase, pTC->iToken,pList,nList); + nCount = fts3DoclistCountDocids( + pTC->pPhrase->doclist.aAll, pTC->pPhrase->doclist.nAll + ); + if( ii==0 || nCountnOvfl>=(nDocEst*nDocSize) ){ - Fts3PhraseToken *pToken = pTC->pToken; - rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol); - fts3SegReaderCursorFree(pToken->pSegcsr); - pToken->pSegcsr = 0; - } - nDocEst = 1 + (nDocEst/4); } pTC->pToken = 0; } return rc; } -int sqlite3Fts3EvalStart(Fts3Cursor *pCsr, Fts3Expr *pExpr, int bOptOk){ +/* +** This function is called from within the xFilter method. It initializes +** the full-text query currently stored in pCsr->pExpr. To iterate through +** the results of a query, the caller does: +** +** fts3EvalStart(pCsr); +** while( 1 ){ +** fts3EvalNext(pCsr); +** if( pCsr->bEof ) break; +** ... return row pCsr->iPrevId to the caller ... +** } +*/ +static int fts3EvalStart(Fts3Cursor *pCsr){ Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; int rc = SQLITE_OK; int nToken = 0; int nOr = 0; /* Allocate a MultiSegReader for each token in the expression. */ - fts3EvalAllocateReaders(pCsr, pExpr, &nToken, &nOr, &rc); - - /* Call fts3EvalPhraseStart() on all phrases in the expression. TODO: - ** This call will eventually also be responsible for determining which - ** tokens are 'deferred' until the document text is loaded into memory. - ** - ** Each token in each phrase is dealt with using one of the following - ** three strategies: - ** - ** 1. Entire doclist loaded into memory as part of the - ** fts3EvalStartReaders() call. - ** - ** 2. Doclist loaded into memory incrementally, as part of each - ** sqlite3Fts3EvalNext() call. - ** - ** 3. Token doclist is never loaded. Instead, documents are loaded into - ** memory and scanned for the token as part of the sqlite3Fts3EvalNext() - ** call. This is known as a "deferred" token. - */ - - /* If bOptOk is true, check if there are any tokens that should be deferred. - */ - if( rc==SQLITE_OK && bOptOk && nToken>1 && pTab->bHasStat ){ + fts3EvalAllocateReaders(pCsr, pCsr->pExpr, &nToken, &nOr, &rc); + + /* Determine which, if any, tokens in the expression should be deferred. */ + if( rc==SQLITE_OK && nToken>1 && pTab->bHasStat ){ Fts3TokenAndCost *aTC; Fts3Expr **apOr; aTC = (Fts3TokenAndCost *)sqlite3_malloc( sizeof(Fts3TokenAndCost) * nToken + sizeof(Fts3Expr *) * nOr * 2 @@ -3764,11 +3996,11 @@ }else{ int ii; Fts3TokenAndCost *pTC = aTC; Fts3Expr **ppOr = apOr; - fts3EvalTokenCosts(pCsr, 0, pExpr, &pTC, &ppOr, &rc); + fts3EvalTokenCosts(pCsr, 0, pCsr->pExpr, &pTC, &ppOr, &rc); nToken = pTC-aTC; nOr = ppOr-apOr; if( rc==SQLITE_OK ){ rc = fts3EvalSelectDeferred(pCsr, 0, aTC, nToken); @@ -3779,25 +4011,50 @@ sqlite3_free(aTC); } } - fts3EvalStartReaders(pCsr, pExpr, bOptOk, &rc); + fts3EvalStartReaders(pCsr, pCsr->pExpr, 1, &rc); return rc; } -static void fts3EvalZeroPoslist(Fts3Phrase *pPhrase){ +/* +** Invalidate the current position list for phrase pPhrase. +*/ +static void fts3EvalInvalidatePoslist(Fts3Phrase *pPhrase){ if( pPhrase->doclist.bFreeList ){ sqlite3_free(pPhrase->doclist.pList); } pPhrase->doclist.pList = 0; pPhrase->doclist.nList = 0; pPhrase->doclist.bFreeList = 0; } -static int fts3EvalNearTrim2( - int nNear, +/* +** This function is called to edit the position list associated with +** the phrase object passed as the fifth argument according to a NEAR +** condition. For example: +** +** abc NEAR/5 "def ghi" +** +** Parameter nNear is passed the NEAR distance of the expression (5 in +** the example above). When this function is called, *paPoslist points to +** the position list, and *pnToken is the number of phrase tokens in, the +** phrase on the other side of the NEAR operator to pPhrase. For example, +** if pPhrase refers to the "def ghi" phrase, then *paPoslist points to +** the position list associated with phrase "abc". +** +** All positions in the pPhrase position list that are not sufficiently +** close to a position in the *paPoslist position list are removed. If this +** leaves 0 positions, zero is returned. Otherwise, non-zero. +** +** Before returning, *paPoslist is set to point to the position lsit +** associated with pPhrase. And *pnToken is set to the number of tokens in +** pPhrase. +*/ +static int fts3EvalNearTrim( + int nNear, /* NEAR distance. As in "NEAR/nNear". */ char *aTmp, /* Temporary space to use */ char **paPoslist, /* IN/OUT: Position list */ int *pnToken, /* IN/OUT: Tokens in phrase of *paPoslist */ Fts3Phrase *pPhrase /* The phrase object to trim the doclist of */ ){ @@ -3825,10 +4082,176 @@ } return res; } +/* +** This function is a no-op if *pRc is other than SQLITE_OK when it is called. +** Otherwise, it advances the expression passed as the second argument to +** point to the next matching row in the database. Expressions iterate through +** matching rows in docid order. Ascending order if Fts3Cursor.bDesc is zero, +** or descending if it is non-zero. +** +** If an error occurs, *pRc is set to an SQLite error code. Otherwise, if +** successful, the following variables in pExpr are set: +** +** Fts3Expr.bEof (non-zero if EOF - there is no next row) +** Fts3Expr.iDocid (valid if bEof==0. The docid of the next row) +** +** If the expression is of type FTSQUERY_PHRASE, and the expression is not +** at EOF, then the following variables are populated with the position list +** for the phrase for the visited row: +** +** FTs3Expr.pPhrase->doclist.nList (length of pList in bytes) +** FTs3Expr.pPhrase->doclist.pList (pointer to position list) +** +** It says above that this function advances the expression to the next +** matching row. This is usually true, but there are the following exceptions: +** +** 1. Deferred tokens are not taken into account. If a phrase consists +** entirely of deferred tokens, it is assumed to match every row in +** the db. In this case the position-list is not populated at all. +** +** Or, if a phrase contains one or more deferred tokens and one or +** more non-deferred tokens, then the expression is advanced to the +** next possible match, considering only non-deferred tokens. In other +** words, if the phrase is "A B C", and "B" is deferred, the expression +** is advanced to the next row that contains an instance of "A * C", +** where "*" may match any single token. The position list in this case +** is populated as for "A * C" before returning. +** +** 2. NEAR is treated as AND. If the expression is "x NEAR y", it is +** advanced to point to the next row that matches "x AND y". +** +** See fts3EvalTestDeferredAndNear() for details on testing if a row is +** really a match, taking into account deferred tokens and NEAR operators. +*/ +static void fts3EvalNextRow( + Fts3Cursor *pCsr, /* FTS Cursor handle */ + Fts3Expr *pExpr, /* Expr. to advance to next matching row */ + int *pRc /* IN/OUT: Error code */ +){ + if( *pRc==SQLITE_OK ){ + int bDescDoclist = pCsr->bDesc; /* Used by DOCID_CMP() macro */ + assert( pExpr->bEof==0 ); + pExpr->bStart = 1; + + switch( pExpr->eType ){ + case FTSQUERY_NEAR: + case FTSQUERY_AND: { + Fts3Expr *pLeft = pExpr->pLeft; + Fts3Expr *pRight = pExpr->pRight; + assert( !pLeft->bDeferred || !pRight->bDeferred ); + + if( pLeft->bDeferred ){ + /* LHS is entirely deferred. So we assume it matches every row. + ** Advance the RHS iterator to find the next row visited. */ + fts3EvalNextRow(pCsr, pRight, pRc); + pExpr->iDocid = pRight->iDocid; + pExpr->bEof = pRight->bEof; + }else if( pRight->bDeferred ){ + /* RHS is entirely deferred. So we assume it matches every row. + ** Advance the LHS iterator to find the next row visited. */ + fts3EvalNextRow(pCsr, pLeft, pRc); + pExpr->iDocid = pLeft->iDocid; + pExpr->bEof = pLeft->bEof; + }else{ + /* Neither the RHS or LHS are deferred. */ + fts3EvalNextRow(pCsr, pLeft, pRc); + fts3EvalNextRow(pCsr, pRight, pRc); + while( !pLeft->bEof && !pRight->bEof && *pRc==SQLITE_OK ){ + sqlite3_int64 iDiff = DOCID_CMP(pLeft->iDocid, pRight->iDocid); + if( iDiff==0 ) break; + if( iDiff<0 ){ + fts3EvalNextRow(pCsr, pLeft, pRc); + }else{ + fts3EvalNextRow(pCsr, pRight, pRc); + } + } + pExpr->iDocid = pLeft->iDocid; + pExpr->bEof = (pLeft->bEof || pRight->bEof); + } + break; + } + + case FTSQUERY_OR: { + Fts3Expr *pLeft = pExpr->pLeft; + Fts3Expr *pRight = pExpr->pRight; + sqlite3_int64 iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid); + + assert( pLeft->bStart || pLeft->iDocid==pRight->iDocid ); + assert( pRight->bStart || pLeft->iDocid==pRight->iDocid ); + + if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){ + fts3EvalNextRow(pCsr, pLeft, pRc); + }else if( pLeft->bEof || (pRight->bEof==0 && iCmp>0) ){ + fts3EvalNextRow(pCsr, pRight, pRc); + }else{ + fts3EvalNextRow(pCsr, pLeft, pRc); + fts3EvalNextRow(pCsr, pRight, pRc); + } + + pExpr->bEof = (pLeft->bEof && pRight->bEof); + iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid); + if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){ + pExpr->iDocid = pLeft->iDocid; + }else{ + pExpr->iDocid = pRight->iDocid; + } + + break; + } + + case FTSQUERY_NOT: { + Fts3Expr *pLeft = pExpr->pLeft; + Fts3Expr *pRight = pExpr->pRight; + + if( pRight->bStart==0 ){ + fts3EvalNextRow(pCsr, pRight, pRc); + assert( *pRc!=SQLITE_OK || pRight->bStart ); + } + + fts3EvalNextRow(pCsr, pLeft, pRc); + if( pLeft->bEof==0 ){ + while( !*pRc + && !pRight->bEof + && DOCID_CMP(pLeft->iDocid, pRight->iDocid)>0 + ){ + fts3EvalNextRow(pCsr, pRight, pRc); + } + } + pExpr->iDocid = pLeft->iDocid; + pExpr->bEof = pLeft->bEof; + break; + } + + default: { + Fts3Phrase *pPhrase = pExpr->pPhrase; + fts3EvalInvalidatePoslist(pPhrase); + *pRc = fts3EvalPhraseNext(pCsr, pPhrase, &pExpr->bEof); + pExpr->iDocid = pPhrase->doclist.iDocid; + break; + } + } + } +} + +/* +** If *pRc is not SQLITE_OK, or if pExpr is not the root node of a NEAR +** cluster, then this function returns 1 immediately. +** +** Otherwise, it checks if the current row really does match the NEAR +** expression, using the data currently stored in the position lists +** (Fts3Expr->pPhrase.doclist.pList/nList) for each phrase in the expression. +** +** If the current row is a match, the position list associated with each +** phrase in the NEAR expression is edited in place to contain only those +** phrase instances sufficiently close to their peers to satisfy all NEAR +** constraints. In this case it returns 1. If the NEAR expression does not +** match the current row, 0 is returned. The position lists may or may not +** be edited if 0 is returned. +*/ static int fts3EvalNearTest(Fts3Expr *pExpr, int *pRc){ int res = 1; /* The following block runs if pExpr is the root of a NEAR query. ** For example, the query: @@ -3846,11 +4269,11 @@ ** | | ** "w" "x" ** ** The right-hand child of a NEAR node is always a phrase. The ** left-hand child may be either a phrase or a NEAR node. There are - ** no exceptions to this. + ** no exceptions to this - it's the way the parser in fts3_expr.c works. */ if( *pRc==SQLITE_OK && pExpr->eType==FTSQUERY_NEAR && pExpr->bEof==0 && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR) @@ -3873,21 +4296,21 @@ int nToken = p->pPhrase->nToken; for(p=p->pParent;res && p && p->eType==FTSQUERY_NEAR; p=p->pParent){ Fts3Phrase *pPhrase = p->pRight->pPhrase; int nNear = p->nNear; - res = fts3EvalNearTrim2(nNear, aTmp, &aPoslist, &nToken, pPhrase); + res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase); } aPoslist = pExpr->pRight->pPhrase->doclist.pList; nToken = pExpr->pRight->pPhrase->nToken; for(p=pExpr->pLeft; p && res; p=p->pLeft){ int nNear = p->pParent->nNear; Fts3Phrase *pPhrase = ( p->eType==FTSQUERY_NEAR ? p->pRight->pPhrase : p->pPhrase ); - res = fts3EvalNearTrim2(nNear, aTmp, &aPoslist, &nToken, pPhrase); + res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase); } } sqlite3_free(aTmp); } @@ -3894,132 +4317,33 @@ return res; } /* -** This macro is used by the fts3EvalNext() function. The two arguments are -** 64-bit docid values. If the current query is "ORDER BY docid ASC", then -** the macro returns (i1 - i2). Or if it is "ORDER BY docid DESC", then -** it returns (i2 - i1). This allows the same code to be used for merging -** doclists in ascending or descending order. -*/ -#define DOCID_CMP(i1, i2) ((pCsr->bDesc?-1:1) * (i1-i2)) - -static void fts3EvalNext( - Fts3Cursor *pCsr, - Fts3Expr *pExpr, - int *pRc -){ - if( *pRc==SQLITE_OK ){ - assert( pExpr->bEof==0 ); - pExpr->bStart = 1; - - switch( pExpr->eType ){ - case FTSQUERY_NEAR: - case FTSQUERY_AND: { - Fts3Expr *pLeft = pExpr->pLeft; - Fts3Expr *pRight = pExpr->pRight; - assert( !pLeft->bDeferred || !pRight->bDeferred ); - if( pLeft->bDeferred ){ - fts3EvalNext(pCsr, pRight, pRc); - pExpr->iDocid = pRight->iDocid; - pExpr->bEof = pRight->bEof; - }else if( pRight->bDeferred ){ - fts3EvalNext(pCsr, pLeft, pRc); - pExpr->iDocid = pLeft->iDocid; - pExpr->bEof = pLeft->bEof; - }else{ - fts3EvalNext(pCsr, pLeft, pRc); - fts3EvalNext(pCsr, pRight, pRc); - - while( !pLeft->bEof && !pRight->bEof && *pRc==SQLITE_OK ){ - sqlite3_int64 iDiff = DOCID_CMP(pLeft->iDocid, pRight->iDocid); - if( iDiff==0 ) break; - if( iDiff<0 ){ - fts3EvalNext(pCsr, pLeft, pRc); - }else{ - fts3EvalNext(pCsr, pRight, pRc); - } - } - - pExpr->iDocid = pLeft->iDocid; - pExpr->bEof = (pLeft->bEof || pRight->bEof); - } - break; - } - - case FTSQUERY_OR: { - Fts3Expr *pLeft = pExpr->pLeft; - Fts3Expr *pRight = pExpr->pRight; - sqlite3_int64 iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid); - - assert( pLeft->bStart || pLeft->iDocid==pRight->iDocid ); - assert( pRight->bStart || pLeft->iDocid==pRight->iDocid ); - - if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){ - fts3EvalNext(pCsr, pLeft, pRc); - }else if( pLeft->bEof || (pRight->bEof==0 && iCmp>0) ){ - fts3EvalNext(pCsr, pRight, pRc); - }else{ - fts3EvalNext(pCsr, pLeft, pRc); - fts3EvalNext(pCsr, pRight, pRc); - } - - pExpr->bEof = (pLeft->bEof && pRight->bEof); - iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid); - if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){ - pExpr->iDocid = pLeft->iDocid; - }else{ - pExpr->iDocid = pRight->iDocid; - } - - break; - } - - case FTSQUERY_NOT: { - Fts3Expr *pLeft = pExpr->pLeft; - Fts3Expr *pRight = pExpr->pRight; - - if( pRight->bStart==0 ){ - fts3EvalNext(pCsr, pRight, pRc); - assert( *pRc!=SQLITE_OK || pRight->bStart ); - } - - fts3EvalNext(pCsr, pLeft, pRc); - if( pLeft->bEof==0 ){ - while( !*pRc - && !pRight->bEof - && DOCID_CMP(pLeft->iDocid, pRight->iDocid)>0 - ){ - fts3EvalNext(pCsr, pRight, pRc); - } - } - pExpr->iDocid = pLeft->iDocid; - pExpr->bEof = pLeft->bEof; - break; - } - - default: { - Fts3Phrase *pPhrase = pExpr->pPhrase; - fts3EvalZeroPoslist(pPhrase); - *pRc = fts3EvalPhraseNext(pCsr, pPhrase, &pExpr->bEof); - pExpr->iDocid = pPhrase->doclist.iDocid; - break; - } - } - } -} - -static int fts3EvalDeferredTest(Fts3Cursor *pCsr, Fts3Expr *pExpr, int *pRc){ - int bHit = 1; +** This function is a helper function for fts3EvalTestDeferredAndNear(). +** Assuming no error occurs or has occurred, It returns non-zero if the +** expression passed as the second argument matches the row that pCsr +** currently points to, or zero if it does not. +** +** If *pRc is not SQLITE_OK when this function is called, it is a no-op. +** If an error occurs during execution of this function, *pRc is set to +** the appropriate SQLite error code. In this case the returned value is +** undefined. +*/ +static int fts3EvalTestExpr( + Fts3Cursor *pCsr, /* FTS cursor handle */ + Fts3Expr *pExpr, /* Expr to test. May or may not be root. */ + int *pRc /* IN/OUT: Error code */ +){ + int bHit = 1; /* Return value */ if( *pRc==SQLITE_OK ){ switch( pExpr->eType ){ case FTSQUERY_NEAR: case FTSQUERY_AND: bHit = ( - fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc) - && fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc) + fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc) + && fts3EvalTestExpr(pCsr, pExpr->pRight, pRc) && fts3EvalNearTest(pExpr, pRc) ); /* If the NEAR expression does not match any rows, zero the doclist for ** all phrases involved in the NEAR. This is because the snippet(), @@ -4041,31 +4365,31 @@ && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR) ){ Fts3Expr *p; for(p=pExpr; p->pPhrase==0; p=p->pLeft){ if( p->pRight->iDocid==pCsr->iPrevId ){ - fts3EvalZeroPoslist(p->pRight->pPhrase); + fts3EvalInvalidatePoslist(p->pRight->pPhrase); } } if( p->iDocid==pCsr->iPrevId ){ - fts3EvalZeroPoslist(p->pPhrase); + fts3EvalInvalidatePoslist(p->pPhrase); } } break; case FTSQUERY_OR: { - int bHit1 = fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc); - int bHit2 = fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc); + int bHit1 = fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc); + int bHit2 = fts3EvalTestExpr(pCsr, pExpr->pRight, pRc); bHit = bHit1 || bHit2; break; } case FTSQUERY_NOT: bHit = ( - fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc) - && !fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc) + fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc) + && !fts3EvalTestExpr(pCsr, pExpr->pRight, pRc) ); break; default: { if( pCsr->pDeferred @@ -4072,11 +4396,11 @@ && (pExpr->iDocid==pCsr->iPrevId || pExpr->bDeferred) ){ Fts3Phrase *pPhrase = pExpr->pPhrase; assert( pExpr->bDeferred || pPhrase->doclist.bFreeList==0 ); if( pExpr->bDeferred ){ - fts3EvalZeroPoslist(pPhrase); + fts3EvalInvalidatePoslist(pPhrase); } *pRc = fts3EvalDeferredPhrase(pCsr, pPhrase); bHit = (pPhrase->doclist.pList!=0); pExpr->iDocid = pCsr->iPrevId; }else{ @@ -4088,31 +4412,53 @@ } return bHit; } /* -** Return 1 if both of the following are true: +** This function is called as the second part of each xNext operation when +** iterating through the results of a full-text query. At this point the +** cursor points to a row that matches the query expression, with the +** following caveats: +** +** * Up until this point, "NEAR" operators in the expression have been +** treated as "AND". +** +** * Deferred tokens have not yet been considered. +** +** If *pRc is not SQLITE_OK when this function is called, it immediately +** returns 0. Otherwise, it tests whether or not after considering NEAR +** operators and deferred tokens the current row is still a match for the +** expression. It returns 1 if both of the following are true: ** ** 1. *pRc is SQLITE_OK when this function returns, and ** ** 2. After scanning the current FTS table row for the deferred tokens, -** it is determined that the row does not match the query. +** it is determined that the row does *not* match the query. ** ** Or, if no error occurs and it seems the current row does match the FTS ** query, return 0. */ -static int fts3EvalLoadDeferred(Fts3Cursor *pCsr, int *pRc){ +static int fts3EvalTestDeferredAndNear(Fts3Cursor *pCsr, int *pRc){ int rc = *pRc; int bMiss = 0; if( rc==SQLITE_OK ){ + + /* If there are one or more deferred tokens, load the current row into + ** memory and scan it to determine the position list for each deferred + ** token. Then, see if this row is really a match, considering deferred + ** tokens and NEAR operators (neither of which were taken into account + ** earlier, by fts3EvalNextRow()). + */ if( pCsr->pDeferred ){ rc = fts3CursorSeek(0, pCsr); if( rc==SQLITE_OK ){ rc = sqlite3Fts3CacheDeferredDoclists(pCsr); } } - bMiss = (0==fts3EvalDeferredTest(pCsr, pCsr->pExpr, &rc)); + bMiss = (0==fts3EvalTestExpr(pCsr, pCsr->pExpr, &rc)); + + /* Free the position-lists accumulated for each deferred token above. */ sqlite3Fts3FreeDeferredDoclists(pCsr); *pRc = rc; } return (rc==SQLITE_OK && bMiss); } @@ -4119,11 +4465,11 @@ /* ** Advance to the next document that matches the FTS expression in ** Fts3Cursor.pExpr. */ -int sqlite3Fts3EvalNext(Fts3Cursor *pCsr){ +static int fts3EvalNext(Fts3Cursor *pCsr){ int rc = SQLITE_OK; /* Return Code */ Fts3Expr *pExpr = pCsr->pExpr; assert( pCsr->isEof==0 ); if( pExpr==0 ){ pCsr->isEof = 1; @@ -4131,23 +4477,23 @@ do { if( pCsr->isRequireSeek==0 ){ sqlite3_reset(pCsr->pStmt); } assert( sqlite3_data_count(pCsr->pStmt)==0 ); - fts3EvalNext(pCsr, pExpr, &rc); + fts3EvalNextRow(pCsr, pExpr, &rc); pCsr->isEof = pExpr->bEof; pCsr->isRequireSeek = 1; pCsr->isMatchinfoNeeded = 1; pCsr->iPrevId = pExpr->iDocid; - }while( pCsr->isEof==0 && fts3EvalLoadDeferred(pCsr, &rc) ); + }while( pCsr->isEof==0 && fts3EvalTestDeferredAndNear(pCsr, &rc) ); } return rc; } /* ** Restart interation for expression pExpr so that the next call to -** sqlite3Fts3EvalNext() visits the first row. Do not allow incremental +** fts3EvalNext() visits the first row. Do not allow incremental ** loading or merging of phrase doclists for this iteration. ** ** If *pRc is other than SQLITE_OK when this function is called, it is ** a no-op. If an error occurs within this function, *pRc is set to an ** SQLite error code before returning. @@ -4159,11 +4505,11 @@ ){ if( pExpr && *pRc==SQLITE_OK ){ Fts3Phrase *pPhrase = pExpr->pPhrase; if( pPhrase ){ - fts3EvalZeroPoslist(pPhrase); + fts3EvalInvalidatePoslist(pPhrase); if( pPhrase->bIncr ){ assert( pPhrase->nToken==1 ); assert( pPhrase->aToken[0].pSegcsr ); sqlite3Fts3MsrIncrRestart(pPhrase->aToken[0].pSegcsr); *pRc = fts3EvalPhraseStart(pCsr, 0, pPhrase); @@ -4275,18 +4621,18 @@ /* Ensure the %_content statement is reset. */ if( pCsr->isRequireSeek==0 ) sqlite3_reset(pCsr->pStmt); assert( sqlite3_data_count(pCsr->pStmt)==0 ); /* Advance to the next document */ - fts3EvalNext(pCsr, pRoot, &rc); + fts3EvalNextRow(pCsr, pRoot, &rc); pCsr->isEof = pRoot->bEof; pCsr->isRequireSeek = 1; pCsr->isMatchinfoNeeded = 1; pCsr->iPrevId = pRoot->iDocid; }while( pCsr->isEof==0 && pRoot->eType==FTSQUERY_NEAR - && fts3EvalLoadDeferred(pCsr, &rc) + && fts3EvalTestDeferredAndNear(pCsr, &rc) ); if( rc==SQLITE_OK && pCsr->isEof==0 ){ fts3EvalUpdateCounts(pRoot); } @@ -4304,14 +4650,14 @@ ** ** do {...} while( pRoot->iDocidbEof==0 ); }while( pRoot->iDocid!=iDocid && rc==SQLITE_OK ); - fts3EvalLoadDeferred(pCsr, &rc); + fts3EvalTestDeferredAndNear(pCsr, &rc); } } return rc; } @@ -4438,15 +4784,29 @@ */ void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *pPhrase){ if( pPhrase ){ int i; sqlite3_free(pPhrase->doclist.aAll); - fts3EvalZeroPoslist(pPhrase); + fts3EvalInvalidatePoslist(pPhrase); memset(&pPhrase->doclist, 0, sizeof(Fts3Doclist)); for(i=0; inToken; i++){ fts3SegReaderCursorFree(pPhrase->aToken[i].pSegcsr); pPhrase->aToken[i].pSegcsr = 0; } } } + +#if !SQLITE_CORE +/* +** Initialize API pointer table, if required. +*/ +int sqlite3_extension_init( + sqlite3 *db, + char **pzErrMsg, + const sqlite3_api_routines *pApi +){ + SQLITE_EXTENSION_INIT2(pApi) + return sqlite3Fts3Init(db); +} +#endif #endif Index: ext/fts3/fts3Int.h ================================================================== --- ext/fts3/fts3Int.h +++ ext/fts3/fts3Int.h @@ -505,13 +505,10 @@ Fts3MultiSegReader **ppSegcsr /* OUT: Allocated seg-reader cursor */ ); void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *); -int sqlite3Fts3EvalStart(Fts3Cursor *, Fts3Expr *, int); -int sqlite3Fts3EvalNext(Fts3Cursor *pCsr); - int sqlite3Fts3MsrIncrStart( Fts3Table*, Fts3MultiSegReader*, int, const char*, int); int sqlite3Fts3MsrIncrNext( Fts3Table *, Fts3MultiSegReader *, sqlite3_int64 *, char **, int *); char *sqlite3Fts3EvalPhrasePoslist(Fts3Cursor *, Fts3Expr *, int iCol); Index: test/fts3auto.test ================================================================== --- test/fts3auto.test +++ test/fts3auto.test @@ -105,19 +105,21 @@ SELECT docid, mit(matchinfo($tbl, 'x')) FROM $tbl WHERE $tbl MATCH '$match' ORDER BY docid ASC " $matchinfo_asc } -# fts3_make_deferrable TABLE TOKEN +# fts3_make_deferrable TABLE TOKEN ?NROW? # -proc fts3_make_deferrable {tbl token} { +proc fts3_make_deferrable {tbl token {nRow 0}} { set stmt [sqlite3_prepare db "SELECT * FROM $tbl" -1 dummy] set name [sqlite3_column_name $stmt 0] sqlite3_finalize $stmt - set nRow [db one "SELECT count(*) FROM $tbl"] + if {$nRow==0} { + set nRow [db one "SELECT count(*) FROM $tbl"] + } set pgsz [db one "PRAGMA page_size"] execsql BEGIN for {set i 0} {$i < ($nRow * $pgsz * 1.2)/100} {incr i} { set doc [string repeat "$token " 100] execsql "INSERT INTO $tbl ($name) VALUES(\$doc)" @@ -650,9 +652,53 @@ do_fts3query_test 6.$tn.4 t1 {a:C OR b:G OR c:K OR d:C} do_fts3query_test 6.$tn.5 t1 {a:G OR b:G} catchsql { COMMIT } } + +foreach {tn create} { + 1 "fts4(x)" + 2 "fts4(x, order=DESC)" +} { + execsql [subst { + DROP TABLE IF EXISTS t1; + CREATE VIRTUAL TABLE t1 USING $create; + }] + + foreach {x} { + "F E N O T K X V A X I E X A P G Q V H U" + "R V A E T C V Q N I E L O N U G J K L U" + "U Y I G W M V F J L X I D C H F P J Q B" + "S G D Z X R P G S S Y B K A S G A I L L" + "L S I C H T Z S R Q P R N K J X L F M J" + "C C C D P X B Z C M A D A C X S B T X V" + "W Y J M D R G V R K B X S A W R I T N C" + "P K L W T M S P O Y Y V V O E H Q A I R" + "C D Y I C Z F H J C O Y A Q F L S B D K" + "P G S C Y C Y V I M B D S Z D D Y W I E" + "Z K Z U E E S F Y X T U A L W O U J C Q" + "P A T Z S W L P L Q V Y Y I P W U X S S" + "I U I H U O F Z F R H R F T N D X A G M" + "N A B M S H K X S O Y D T X S B R Y H Z" + "L U D A S K I L S V Z J P U B E B Y H M" + } { + execsql { INSERT INTO t1 VALUES($x) } + } + + # Add extra documents to the database such that token "B" will be considered + # deferrable if considering the other tokens means that 2 or fewer documents + # will be loaded into memory. + # + fts3_make_deferrable t1 B 2 + + # B is not deferred in either of the first two tests below, since filtering + # on "M" or "D" returns 10 documents or so. But filtering on "M * D" only + # returns 2, so B is deferred in this case. + # + do_fts3query_test 7.$tn.1 t1 {"M B"} + do_fts3query_test 7.$tn.2 t1 {"B D"} + do_fts3query_test 7.$tn.3 -deferred B t1 {"M B D"} +} set sqlite_fts3_enable_parentheses $sfep finish_test