/ Check-in [8230d831]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix some of the code issues (missing comments etc.) in the new FTS code.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts3-changes
Files: files | file ages | folders
SHA1: 8230d83120e0f4d217fde56e22c6f05aa5adee09
User & Date: dan 2011-06-23 17:09:51
Context
2011-06-27
11:15
Changes to improve the selection of deferred tokens within phrases. check-in: 2c4bbd90 user: dan tags: fts3-changes
2011-06-23
17:09
Fix some of the code issues (missing comments etc.) in the new FTS code. check-in: 8230d831 user: dan tags: fts3-changes
16:40
Add a test for ticket [91e2e8ba6f]. No changes to code. check-in: c271f7e8 user: dan tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3.c.

   308    308   
   309    309   #include "fts3.h"
   310    310   #ifndef SQLITE_CORE 
   311    311   # include "sqlite3ext.h"
   312    312     SQLITE_EXTENSION_INIT1
   313    313   #endif
   314    314   
          315  +static int fts3EvalNext(Fts3Cursor *pCsr);
          316  +static int fts3EvalStart(Fts3Cursor *pCsr);
          317  +static int fts3TermSegReaderCursor(
          318  +    Fts3Cursor *, const char *, int, int, Fts3MultiSegReader **);
          319  +
   315    320   /* 
   316    321   ** Write a 64-bit variable-length integer to memory starting at p[0].
   317    322   ** The length of data written will be between 1 and FTS3_VARINT_MAX bytes.
   318    323   ** The number of bytes written is returned.
   319    324   */
   320    325   int sqlite3Fts3PutVarint(char *p, sqlite_int64 v){
   321    326     unsigned char *q = (unsigned char *) p;
................................................................................
   816    821     for(i=0; i<p->nColumn; i++){
   817    822       fts3Appendf(pRc, &zRet, ",%s(?)", zFunction);
   818    823     }
   819    824     sqlite3_free(zFree);
   820    825     return zRet;
   821    826   }
   822    827   
          828  +/*
          829  +** This function interprets the string at (*pp) as a non-negative integer
          830  +** value. It reads the integer and sets *pnOut to the value read, then 
          831  +** sets *pp to point to the byte immediately following the last byte of
          832  +** the integer value.
          833  +**
          834  +** Only decimal digits ('0'..'9') may be part of an integer value. 
          835  +**
          836  +** If *pp does not being with a decimal digit SQLITE_ERROR is returned and
          837  +** the output value undefined. Otherwise SQLITE_OK is returned.
          838  +**
          839  +** This function is used when parsing the "prefix=" FTS4 parameter.
          840  +*/
   823    841   static int fts3GobbleInt(const char **pp, int *pnOut){
   824         -  const char *p = *pp;
   825         -  int nInt = 0;
          842  +  const char *p = *pp;            /* Iterator pointer */
          843  +  int nInt = 0;                   /* Output value */
          844  +
   826    845     for(p=*pp; p[0]>='0' && p[0]<='9'; p++){
   827    846       nInt = nInt * 10 + (p[0] - '0');
   828    847     }
   829    848     if( p==*pp ) return SQLITE_ERROR;
   830    849     *pnOut = nInt;
   831    850     *pp = p;
   832    851     return SQLITE_OK;
   833    852   }
   834    853   
   835         -
          854  +/*
          855  +** This function is called to allocate an array of Fts3Index structures
          856  +** representing the indexes maintained by the current FTS table. FTS tables
          857  +** always maintain the main "terms" index, but may also maintain one or
          858  +** more "prefix" indexes, depending on the value of the "prefix=" parameter
          859  +** (if any) specified as part of the CREATE VIRTUAL TABLE statement.
          860  +**
          861  +** Argument zParam is passed the value of the "prefix=" option if one was
          862  +** specified, or NULL otherwise.
          863  +**
          864  +** If no error occurs, SQLITE_OK is returned and *apIndex set to point to
          865  +** the allocated array. *pnIndex is set to the number of elements in the
          866  +** array. If an error does occur, an SQLite error code is returned.
          867  +**
          868  +** Regardless of whether or not an error is returned, it is the responsibility
          869  +** of the caller to call sqlite3_free() on the output array to free it.
          870  +*/
   836    871   static int fts3PrefixParameter(
   837    872     const char *zParam,             /* ABC in prefix=ABC parameter to parse */
   838    873     int *pnIndex,                   /* OUT: size of *apIndex[] array */
   839         -  struct Fts3Index **apIndex,     /* OUT: Array of indexes for this table */
   840         -  struct Fts3Index **apFree       /* OUT: Free this with sqlite3_free() */
          874  +  struct Fts3Index **apIndex      /* OUT: Array of indexes for this table */
   841    875   ){
   842         -  struct Fts3Index *aIndex;
   843         -  int nIndex = 1;
          876  +  struct Fts3Index *aIndex;       /* Allocated array */
          877  +  int nIndex = 1;                 /* Number of entries in array */
   844    878   
   845    879     if( zParam && zParam[0] ){
   846    880       const char *p;
   847    881       nIndex++;
   848    882       for(p=zParam; *p; p++){
   849    883         if( *p==',' ) nIndex++;
   850    884       }
   851    885     }
   852    886   
   853    887     aIndex = sqlite3_malloc(sizeof(struct Fts3Index) * nIndex);
   854         -  *apIndex = *apFree = aIndex;
          888  +  *apIndex = aIndex;
   855    889     *pnIndex = nIndex;
   856    890     if( !aIndex ){
   857    891       return SQLITE_NOMEM;
   858    892     }
   859    893   
   860    894     memset(aIndex, 0, sizeof(struct Fts3Index) * nIndex);
   861    895     if( zParam ){
................................................................................
   904    938     int nDb;                        /* Bytes required to hold database name */
   905    939     int nName;                      /* Bytes required to hold table name */
   906    940     int isFts4 = (argv[0][3]=='4'); /* True for FTS4, false for FTS3 */
   907    941     const char **aCol;              /* Array of column names */
   908    942     sqlite3_tokenizer *pTokenizer = 0;        /* Tokenizer for this table */
   909    943   
   910    944     int nIndex;                     /* Size of aIndex[] array */
   911         -  struct Fts3Index *aIndex;       /* Array of indexes for this table */
   912         -  struct Fts3Index *aFree = 0;    /* Free this before returning */
          945  +  struct Fts3Index *aIndex = 0;   /* Array of indexes for this table */
   913    946   
   914    947     /* The results of parsing supported FTS4 key=value options: */
   915    948     int bNoDocsize = 0;             /* True to omit %_docsize table */
   916    949     int bDescIdx = 0;               /* True to store descending indexes */
   917    950     char *zPrefix = 0;              /* Prefix parameter value (or NULL) */
   918    951     char *zCompress = 0;            /* compress=? parameter (or NULL) */
   919    952     char *zUncompress = 0;          /* uncompress=? parameter (or NULL) */
................................................................................
  1042   1075   
  1043   1076     if( pTokenizer==0 ){
  1044   1077       rc = sqlite3Fts3InitTokenizer(pHash, "simple", &pTokenizer, pzErr);
  1045   1078       if( rc!=SQLITE_OK ) goto fts3_init_out;
  1046   1079     }
  1047   1080     assert( pTokenizer );
  1048   1081   
  1049         -  rc = fts3PrefixParameter(zPrefix, &nIndex, &aIndex, &aFree);
         1082  +  rc = fts3PrefixParameter(zPrefix, &nIndex, &aIndex);
  1050   1083     if( rc==SQLITE_ERROR ){
  1051   1084       assert( zPrefix );
  1052   1085       *pzErr = sqlite3_mprintf("error parsing prefix parameter: %s", zPrefix);
  1053   1086     }
  1054   1087     if( rc!=SQLITE_OK ) goto fts3_init_out;
  1055   1088   
  1056   1089     /* Allocate and populate the Fts3Table structure. */
................................................................................
  1129   1162     p->nNodeSize = p->nPgsz-35;
  1130   1163   
  1131   1164     /* Declare the table schema to SQLite. */
  1132   1165     fts3DeclareVtab(&rc, p);
  1133   1166   
  1134   1167   fts3_init_out:
  1135   1168     sqlite3_free(zPrefix);
  1136         -  sqlite3_free(aFree);
         1169  +  sqlite3_free(aIndex);
  1137   1170     sqlite3_free(zCompress);
  1138   1171     sqlite3_free(zUncompress);
  1139   1172     sqlite3_free((void *)aCol);
  1140   1173     if( rc!=SQLITE_OK ){
  1141   1174       if( p ){
  1142   1175         fts3DisconnectMethod((sqlite3_vtab *)p);
  1143   1176       }else if( pTokenizer ){
................................................................................
  1720   1753     *p++ = POS_END;
  1721   1754     *pp = p;
  1722   1755     *pp1 = p1 + 1;
  1723   1756     *pp2 = p2 + 1;
  1724   1757   }
  1725   1758   
  1726   1759   /*
  1727         -** nToken==1 searches for adjacent positions.
  1728         -**
  1729   1760   ** This function is used to merge two position lists into one. When it is
  1730   1761   ** called, *pp1 and *pp2 must both point to position lists. A position-list is
  1731   1762   ** the part of a doclist that follows each document id. For example, if a row
  1732   1763   ** contains:
  1733   1764   **
  1734   1765   **     'a b c'|'x y z'|'a b b a'
  1735   1766   **
................................................................................
  1741   1772   ** byte following the 0x00 terminator of their respective position lists.
  1742   1773   **
  1743   1774   ** If isSaveLeft is 0, an entry is added to the output position list for 
  1744   1775   ** each position in *pp2 for which there exists one or more positions in
  1745   1776   ** *pp1 so that (pos(*pp2)>pos(*pp1) && pos(*pp2)-pos(*pp1)<=nToken). i.e.
  1746   1777   ** when the *pp1 token appears before the *pp2 token, but not more than nToken
  1747   1778   ** slots before it.
         1779  +**
         1780  +** e.g. nToken==1 searches for adjacent positions.
  1748   1781   */
  1749   1782   static int fts3PoslistPhraseMerge(
  1750   1783     char **pp,                      /* IN/OUT: Preallocated output buffer */
  1751   1784     int nToken,                     /* Maximum difference in token positions */
  1752   1785     int isSaveLeft,                 /* Save the left position */
  1753   1786     int isExact,                    /* If *pp1 is exactly nTokens before *pp2 */
  1754   1787     char **pp1,                     /* IN/OUT: Left input list */
................................................................................
  1907   1940       res = 0;
  1908   1941     }
  1909   1942   
  1910   1943     return res;
  1911   1944   }
  1912   1945   
  1913   1946   /* 
  1914         -** A pointer to an instance of this structure is used as the context 
  1915         -** argument to sqlite3Fts3SegReaderIterate()
         1947  +** An instance of this function is used to merge together the (potentially
         1948  +** large number of) doclists for each term that matches a prefix query.
         1949  +** See function fts3TermSelectMerge() for details.
  1916   1950   */
  1917   1951   typedef struct TermSelect TermSelect;
  1918   1952   struct TermSelect {
  1919         -  int isReqPos;
  1920         -  char *aaOutput[16];             /* Malloc'd output buffer */
  1921         -  int anOutput[16];               /* Size of output in bytes */
         1953  +  char *aaOutput[16];             /* Malloc'd output buffers */
         1954  +  int anOutput[16];               /* Size each output buffer in bytes */
  1922   1955   };
  1923   1956   
  1924         -
         1957  +/*
         1958  +** This function is used to read a single varint from a buffer. Parameter
         1959  +** pEnd points 1 byte past the end of the buffer. When this function is
         1960  +** called, if *pp points to pEnd or greater, then the end of the buffer
         1961  +** has been reached. In this case *pp is set to 0 and the function returns.
         1962  +**
         1963  +** If *pp does not point to or past pEnd, then a single varint is read
         1964  +** from *pp. *pp is then set to point 1 byte past the end of the read varint.
         1965  +**
         1966  +** If bDescIdx is false, the value read is added to *pVal before returning.
         1967  +** If it is true, the value read is subtracted from *pVal before this 
         1968  +** function returns.
         1969  +*/
  1925   1970   static void fts3GetDeltaVarint3(
  1926         -  char **pp, 
  1927         -  char *pEnd, 
  1928         -  int bDescIdx,
  1929         -  sqlite3_int64 *pVal
         1971  +  char **pp,                      /* IN/OUT: Point to read varint from */
         1972  +  char *pEnd,                     /* End of buffer */
         1973  +  int bDescIdx,                   /* True if docids are descending */
         1974  +  sqlite3_int64 *pVal             /* IN/OUT: Integer value */
  1930   1975   ){
  1931   1976     if( *pp>=pEnd ){
  1932   1977       *pp = 0;
  1933   1978     }else{
  1934   1979       sqlite3_int64 iVal;
  1935   1980       *pp += sqlite3Fts3GetVarint(*pp, &iVal);
  1936   1981       if( bDescIdx ){
................................................................................
  1937   1982         *pVal -= iVal;
  1938   1983       }else{
  1939   1984         *pVal += iVal;
  1940   1985       }
  1941   1986     }
  1942   1987   }
  1943   1988   
         1989  +/*
         1990  +** This function is used to write a single varint to a buffer. The varint
         1991  +** is written to *pp. Before returning, *pp is set to point 1 byte past the
         1992  +** end of the value written.
         1993  +**
         1994  +** If *pbFirst is zero when this function is called, the value written to
         1995  +** the buffer is that of parameter iVal. 
         1996  +**
         1997  +** If *pbFirst is non-zero when this function is called, then the value 
         1998  +** written is either (iVal-*piPrev) (if bDescIdx is zero) or (*piPrev-iVal)
         1999  +** (if bDescIdx is non-zero).
         2000  +**
         2001  +** Before returning, this function always sets *pbFirst to 1 and *piPrev
         2002  +** to the value of parameter iVal.
         2003  +*/
  1944   2004   static void fts3PutDeltaVarint3(
  1945   2005     char **pp,                      /* IN/OUT: Output pointer */
  1946   2006     int bDescIdx,                   /* True for descending docids */
  1947   2007     sqlite3_int64 *piPrev,          /* IN/OUT: Previous value written to list */
  1948   2008     int *pbFirst,                   /* IN/OUT: True after first int written */
  1949   2009     sqlite3_int64 iVal              /* Write this value to the list */
  1950   2010   ){
................................................................................
  1957   2017     assert( *pbFirst || *piPrev==0 );
  1958   2018     assert( *pbFirst==0 || iWrite>0 );
  1959   2019     *pp += sqlite3Fts3PutVarint(*pp, iWrite);
  1960   2020     *piPrev = iVal;
  1961   2021     *pbFirst = 1;
  1962   2022   }
  1963   2023   
  1964         -#define COMPARE_DOCID(i1, i2) ((bDescIdx?-1:1) * (i1-i2))
  1965   2024   
         2025  +/*
         2026  +** This macro is used by various functions that merge doclists. The two
         2027  +** arguments are 64-bit docid values. If the value of the stack variable
         2028  +** bDescDoclist is 0 when this macro is invoked, then it returns (i1-i2). 
         2029  +** Otherwise, (i2-i1).
         2030  +**
         2031  +** Using this makes it easier to write code that can merge doclists that are
         2032  +** sorted in either ascending or descending order.
         2033  +*/
         2034  +#define DOCID_CMP(i1, i2) ((bDescDoclist?-1:1) * (i1-i2))
         2035  +
         2036  +/*
         2037  +** This function does an "OR" merge of two doclists (output contains all
         2038  +** positions contained in either argument doclist). If the docids in the 
         2039  +** input doclists are sorted in ascending order, parameter bDescDoclist
         2040  +** should be false. If they are sorted in ascending order, it should be
         2041  +** passed a non-zero value.
         2042  +**
         2043  +** If no error occurs, *paOut is set to point at an sqlite3_malloc'd buffer
         2044  +** containing the output doclist and SQLITE_OK is returned. In this case
         2045  +** *pnOut is set to the number of bytes in the output doclist.
         2046  +**
         2047  +** If an error occurs, an SQLite error code is returned. The output values
         2048  +** are undefined in this case.
         2049  +*/
  1966   2050   static int fts3DoclistOrMerge(
  1967         -  int bDescIdx,                   /* True if arguments are desc */
         2051  +  int bDescDoclist,               /* True if arguments are desc */
  1968   2052     char *a1, int n1,               /* First doclist */
  1969   2053     char *a2, int n2,               /* Second doclist */
  1970   2054     char **paOut, int *pnOut        /* OUT: Malloc'd doclist */
  1971   2055   ){
  1972   2056     sqlite3_int64 i1 = 0;
  1973   2057     sqlite3_int64 i2 = 0;
  1974   2058     sqlite3_int64 iPrev = 0;
................................................................................
  1985   2069     aOut = sqlite3_malloc(n1+n2);
  1986   2070     if( !aOut ) return SQLITE_NOMEM;
  1987   2071   
  1988   2072     p = aOut;
  1989   2073     fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1);
  1990   2074     fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2);
  1991   2075     while( p1 || p2 ){
  1992         -    sqlite3_int64 iDiff = COMPARE_DOCID(i1, i2);
         2076  +    sqlite3_int64 iDiff = DOCID_CMP(i1, i2);
  1993   2077   
  1994   2078       if( p2 && p1 && iDiff==0 ){
  1995         -      fts3PutDeltaVarint3(&p, bDescIdx, &iPrev, &bFirstOut, i1);
         2079  +      fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1);
  1996   2080         fts3PoslistMerge(&p, &p1, &p2);
  1997         -      fts3GetDeltaVarint3(&p1, pEnd1, bDescIdx, &i1);
  1998         -      fts3GetDeltaVarint3(&p2, pEnd2, bDescIdx, &i2);
         2081  +      fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
         2082  +      fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
  1999   2083       }else if( !p2 || (p1 && iDiff<0) ){
  2000         -      fts3PutDeltaVarint3(&p, bDescIdx, &iPrev, &bFirstOut, i1);
         2084  +      fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1);
  2001   2085         fts3PoslistCopy(&p, &p1);
  2002         -      fts3GetDeltaVarint3(&p1, pEnd1, bDescIdx, &i1);
         2086  +      fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
  2003   2087       }else{
  2004         -      fts3PutDeltaVarint3(&p, bDescIdx, &iPrev, &bFirstOut, i2);
         2088  +      fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i2);
  2005   2089         fts3PoslistCopy(&p, &p2);
  2006         -      fts3GetDeltaVarint3(&p2, pEnd2, bDescIdx, &i2);
         2090  +      fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
  2007   2091       }
  2008   2092     }
  2009   2093   
  2010   2094     *paOut = aOut;
  2011   2095     *pnOut = (p-aOut);
  2012   2096     return SQLITE_OK;
  2013   2097   }
  2014   2098   
         2099  +/*
         2100  +** This function does a "phrase" merge of two doclists. In a phrase merge,
         2101  +** the output contains a copy of each position from the right-hand input
         2102  +** doclist for which there is a position in the left-hand input doclist
         2103  +** exactly nDist tokens before it.
         2104  +**
         2105  +** If the docids in the input doclists are sorted in ascending order,
         2106  +** parameter bDescDoclist should be false. If they are sorted in ascending 
         2107  +** order, it should be passed a non-zero value.
         2108  +**
         2109  +** The right-hand input doclist is overwritten by this function.
         2110  +*/
  2015   2111   static void fts3DoclistPhraseMerge(
  2016         -  int bDescIdx,                   /* True if arguments are desc */
         2112  +  int bDescDoclist,               /* True if arguments are desc */
  2017   2113     int nDist,                      /* Distance from left to right (1=adjacent) */
  2018   2114     char *aLeft, int nLeft,         /* Left doclist */
  2019   2115     char *aRight, int *pnRight      /* IN/OUT: Right/output doclist */
  2020   2116   ){
  2021   2117     sqlite3_int64 i1 = 0;
  2022   2118     sqlite3_int64 i2 = 0;
  2023   2119     sqlite3_int64 iPrev = 0;
................................................................................
  2032   2128     assert( nDist>0 );
  2033   2129   
  2034   2130     p = aOut;
  2035   2131     fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1);
  2036   2132     fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2);
  2037   2133   
  2038   2134     while( p1 && p2 ){
  2039         -    sqlite3_int64 iDiff = COMPARE_DOCID(i1, i2);
         2135  +    sqlite3_int64 iDiff = DOCID_CMP(i1, i2);
  2040   2136       if( iDiff==0 ){
  2041   2137         char *pSave = p;
  2042   2138         sqlite3_int64 iPrevSave = iPrev;
  2043   2139         int bFirstOutSave = bFirstOut;
  2044   2140   
  2045         -      fts3PutDeltaVarint3(&p, bDescIdx, &iPrev, &bFirstOut, i1);
         2141  +      fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1);
  2046   2142         if( 0==fts3PoslistPhraseMerge(&p, nDist, 0, 1, &p1, &p2) ){
  2047   2143           p = pSave;
  2048   2144           iPrev = iPrevSave;
  2049   2145           bFirstOut = bFirstOutSave;
  2050   2146         }
  2051         -      fts3GetDeltaVarint3(&p1, pEnd1, bDescIdx, &i1);
  2052         -      fts3GetDeltaVarint3(&p2, pEnd2, bDescIdx, &i2);
         2147  +      fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
         2148  +      fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
  2053   2149       }else if( iDiff<0 ){
  2054   2150         fts3PoslistCopy(0, &p1);
  2055         -      fts3GetDeltaVarint3(&p1, pEnd1, bDescIdx, &i1);
         2151  +      fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
  2056   2152       }else{
  2057   2153         fts3PoslistCopy(0, &p2);
  2058         -      fts3GetDeltaVarint3(&p2, pEnd2, bDescIdx, &i2);
         2154  +      fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
  2059   2155       }
  2060   2156     }
  2061   2157   
  2062   2158     *pnRight = p - aOut;
  2063   2159   }
  2064   2160   
  2065   2161   
................................................................................
  2068   2164   ** doclist stored in TermSelect.aaOutput[0]. If successful, delete all
  2069   2165   ** other doclists (except the aaOutput[0] one) and return SQLITE_OK.
  2070   2166   **
  2071   2167   ** If an OOM error occurs, return SQLITE_NOMEM. In this case it is
  2072   2168   ** the responsibility of the caller to free any doclists left in the
  2073   2169   ** TermSelect.aaOutput[] array.
  2074   2170   */
  2075         -static int fts3TermSelectMerge(Fts3Table *p, TermSelect *pTS){
         2171  +static int fts3TermSelectFinishMerge(Fts3Table *p, TermSelect *pTS){
  2076   2172     char *aOut = 0;
  2077   2173     int nOut = 0;
  2078   2174     int i;
  2079   2175   
  2080   2176     /* Loop through the doclists in the aaOutput[] array. Merge them all
  2081   2177     ** into a single doclist.
  2082   2178     */
................................................................................
  2109   2205   
  2110   2206     pTS->aaOutput[0] = aOut;
  2111   2207     pTS->anOutput[0] = nOut;
  2112   2208     return SQLITE_OK;
  2113   2209   }
  2114   2210   
  2115   2211   /*
  2116         -** This function is used as the sqlite3Fts3SegReaderIterate() callback when
  2117         -** querying the full-text index for a doclist associated with a term or
  2118         -** term-prefix.
         2212  +** Merge the doclist aDoclist/nDoclist into the TermSelect object passed
         2213  +** as the first argument. The merge is an "OR" merge (see function
         2214  +** fts3DoclistOrMerge() for details).
         2215  +**
         2216  +** This function is called with the doclist for each term that matches
         2217  +** a queried prefix. It merges all these doclists into one, the doclist
         2218  +** for the specified prefix. Since there can be a very large number of
         2219  +** doclists to merge, the merging is done pair-wise using the TermSelect
         2220  +** object.
         2221  +**
         2222  +** This function returns SQLITE_OK if the merge is successful, or an
         2223  +** SQLite error code (SQLITE_NOMEM) if an error occurs.
  2119   2224   */
  2120         -static int fts3TermSelectCb(
  2121         -  Fts3Table *p,                   /* Virtual table object */
  2122         -  void *pContext,                 /* Pointer to TermSelect structure */
  2123         -  char *zTerm,
  2124         -  int nTerm,
  2125         -  char *aDoclist,
  2126         -  int nDoclist
         2225  +static int fts3TermSelectMerge(
         2226  +  Fts3Table *p,                   /* FTS table handle */
         2227  +  TermSelect *pTS,                /* TermSelect object to merge into */
         2228  +  char *aDoclist,                 /* Pointer to doclist */
         2229  +  int nDoclist                    /* Size of aDoclist in bytes */
  2127   2230   ){
  2128         -  TermSelect *pTS = (TermSelect *)pContext;
  2129         -
  2130         -  UNUSED_PARAMETER(p);
  2131         -  UNUSED_PARAMETER(zTerm);
  2132         -  UNUSED_PARAMETER(nTerm);
  2133         -
  2134   2231     if( pTS->aaOutput[0]==0 ){
  2135   2232       /* If this is the first term selected, copy the doclist to the output
  2136   2233       ** buffer using memcpy(). */
  2137   2234       pTS->aaOutput[0] = sqlite3_malloc(nDoclist);
  2138   2235       pTS->anOutput[0] = nDoclist;
  2139   2236       if( pTS->aaOutput[0] ){
  2140   2237         memcpy(pTS->aaOutput[0], aDoclist, nDoclist);
................................................................................
  2197   2294       }
  2198   2295       pCsr->apSegment = apNew;
  2199   2296     }
  2200   2297     pCsr->apSegment[pCsr->nSegment++] = pNew;
  2201   2298     return SQLITE_OK;
  2202   2299   }
  2203   2300   
         2301  +/*
         2302  +** Add seg-reader objects to the Fts3MultiSegReader object passed as the
         2303  +** 8th argument.
         2304  +**
         2305  +** This function returns SQLITE_OK if successful, or an SQLite error code
         2306  +** otherwise.
         2307  +*/
  2204   2308   static int fts3SegReaderCursor(
  2205   2309     Fts3Table *p,                   /* FTS3 table handle */
  2206   2310     int iIndex,                     /* Index to search (from 0 to p->nIndex-1) */
  2207   2311     int iLevel,                     /* Level of segments to scan */
  2208   2312     const char *zTerm,              /* Term to query for */
  2209   2313     int nTerm,                      /* Size of zTerm in bytes */
  2210   2314     int isPrefix,                   /* True for a prefix search */
  2211   2315     int isScan,                     /* True to scan from zTerm to EOF */
  2212         -  Fts3MultiSegReader *pCsr       /* Cursor object to populate */
         2316  +  Fts3MultiSegReader *pCsr        /* Cursor object to populate */
  2213   2317   ){
  2214         -  int rc = SQLITE_OK;
  2215         -  int rc2;
  2216         -  sqlite3_stmt *pStmt = 0;
         2318  +  int rc = SQLITE_OK;             /* Error code */
         2319  +  sqlite3_stmt *pStmt = 0;        /* Statement to iterate through segments */
         2320  +  int rc2;                        /* Result of sqlite3_reset() */
  2217   2321   
  2218   2322     /* If iLevel is less than 0 and this is not a scan, include a seg-reader 
  2219   2323     ** for the pending-terms. If this is a scan, then this call must be being
  2220   2324     ** made by an fts4aux module, not an FTS table. In this case calling
  2221   2325     ** Fts3SegReaderPending might segfault, as the data structures used by 
  2222   2326     ** fts4aux are not completely populated. So it's easiest to filter these
  2223   2327     ** calls out here.  */
................................................................................
  2298   2402     memset(pCsr, 0, sizeof(Fts3MultiSegReader));
  2299   2403   
  2300   2404     return fts3SegReaderCursor(
  2301   2405         p, iIndex, iLevel, zTerm, nTerm, isPrefix, isScan, pCsr
  2302   2406     );
  2303   2407   }
  2304   2408   
         2409  +/*
         2410  +** In addition to its current configuration, have the Fts3MultiSegReader
         2411  +** passed as the 4th argument also scan the doclist for term zTerm/nTerm.
         2412  +**
         2413  +** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
         2414  +*/
  2305   2415   static int fts3SegReaderCursorAddZero(
  2306         -  Fts3Table *p,
  2307         -  const char *zTerm,
  2308         -  int nTerm,
  2309         -  Fts3MultiSegReader *pCsr
         2416  +  Fts3Table *p,                   /* FTS virtual table handle */
         2417  +  const char *zTerm,              /* Term to scan doclist of */
         2418  +  int nTerm,                      /* Number of bytes in zTerm */
         2419  +  Fts3MultiSegReader *pCsr        /* Fts3MultiSegReader to modify */
  2310   2420   ){
  2311   2421     return fts3SegReaderCursor(p, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0,pCsr);
  2312   2422   }
  2313   2423   
  2314         -
  2315         -int sqlite3Fts3TermSegReaderCursor(
         2424  +/*
         2425  +** Open an Fts3MultiSegReader to scan the doclist for term zTerm/nTerm. Or,
         2426  +** if isPrefix is true, to scan the doclist for all terms for which 
         2427  +** zTerm/nTerm is a prefix. If successful, return SQLITE_OK and write
         2428  +** a pointer to the new Fts3MultiSegReader to *ppSegcsr. Otherwise, return
         2429  +** an SQLite error code.
         2430  +**
         2431  +** It is the responsibility of the caller to free this object by eventually
         2432  +** passing it to fts3SegReaderCursorFree() 
         2433  +**
         2434  +** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
         2435  +** Output parameter *ppSegcsr is set to 0 if an error occurs.
         2436  +*/
         2437  +static int fts3TermSegReaderCursor(
  2316   2438     Fts3Cursor *pCsr,               /* Virtual table cursor handle */
  2317   2439     const char *zTerm,              /* Term to query for */
  2318   2440     int nTerm,                      /* Size of zTerm in bytes */
  2319   2441     int isPrefix,                   /* True for a prefix search */
  2320   2442     Fts3MultiSegReader **ppSegcsr   /* OUT: Allocated seg-reader cursor */
  2321   2443   ){
  2322         -  Fts3MultiSegReader *pSegcsr;   /* Object to allocate and return */
         2444  +  Fts3MultiSegReader *pSegcsr;    /* Object to allocate and return */
  2323   2445     int rc = SQLITE_NOMEM;          /* Return code */
  2324   2446   
  2325   2447     pSegcsr = sqlite3_malloc(sizeof(Fts3MultiSegReader));
  2326   2448     if( pSegcsr ){
  2327   2449       int i;
  2328   2450       int bFound = 0;               /* True once an index has been found */
  2329   2451       Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;
................................................................................
  2359   2481       }
  2360   2482     }
  2361   2483   
  2362   2484     *ppSegcsr = pSegcsr;
  2363   2485     return rc;
  2364   2486   }
  2365   2487   
         2488  +/*
         2489  +** Free an Fts3MultiSegReader allocated by fts3TermSegReaderCursor().
         2490  +*/
  2366   2491   static void fts3SegReaderCursorFree(Fts3MultiSegReader *pSegcsr){
  2367   2492     sqlite3Fts3SegReaderFinish(pSegcsr);
  2368   2493     sqlite3_free(pSegcsr);
  2369   2494   }
  2370   2495   
  2371   2496   /*
  2372   2497   ** This function retreives the doclist for the specified term (or term
  2373         -** prefix) from the database. 
  2374         -**
  2375         -** The returned doclist may be in one of two formats, depending on the 
  2376         -** value of parameter isReqPos. If isReqPos is zero, then the doclist is
  2377         -** a sorted list of delta-compressed docids (a bare doclist). If isReqPos
  2378         -** is non-zero, then the returned list is in the same format as is stored 
  2379         -** in the database without the found length specifier at the start of on-disk
  2380         -** doclists.
         2498  +** prefix) from the database.
  2381   2499   */
  2382   2500   static int fts3TermSelect(
  2383   2501     Fts3Table *p,                   /* Virtual table handle */
  2384   2502     Fts3PhraseToken *pTok,          /* Token to query for */
  2385   2503     int iColumn,                    /* Column to query (or -ve for all columns) */
  2386         -  int isReqPos,                   /* True to include position lists in output */
  2387   2504     int *pnOut,                     /* OUT: Size of buffer at *ppOut */
  2388   2505     char **ppOut                    /* OUT: Malloced result buffer */
  2389   2506   ){
  2390   2507     int rc;                         /* Return code */
  2391         -  Fts3MultiSegReader *pSegcsr;   /* Seg-reader cursor for this term */
  2392         -  TermSelect tsc;                 /* Context object for fts3TermSelectCb() */
         2508  +  Fts3MultiSegReader *pSegcsr;    /* Seg-reader cursor for this term */
         2509  +  TermSelect tsc;                 /* Object for pair-wise doclist merging */
  2393   2510     Fts3SegFilter filter;           /* Segment term filter configuration */
  2394   2511   
  2395   2512     pSegcsr = pTok->pSegcsr;
  2396   2513     memset(&tsc, 0, sizeof(TermSelect));
  2397         -  tsc.isReqPos = isReqPos;
  2398   2514   
  2399         -  filter.flags = FTS3_SEGMENT_IGNORE_EMPTY 
         2515  +  filter.flags = FTS3_SEGMENT_IGNORE_EMPTY | FTS3_SEGMENT_REQUIRE_POS
  2400   2516           | (pTok->isPrefix ? FTS3_SEGMENT_PREFIX : 0)
  2401         -        | (isReqPos ? FTS3_SEGMENT_REQUIRE_POS : 0)
  2402   2517           | (iColumn<p->nColumn ? FTS3_SEGMENT_COLUMN_FILTER : 0);
  2403   2518     filter.iCol = iColumn;
  2404   2519     filter.zTerm = pTok->z;
  2405   2520     filter.nTerm = pTok->n;
  2406   2521   
  2407   2522     rc = sqlite3Fts3SegReaderStart(p, pSegcsr, &filter);
  2408   2523     while( SQLITE_OK==rc
  2409   2524         && SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pSegcsr)) 
  2410   2525     ){
  2411         -    rc = fts3TermSelectCb(p, (void *)&tsc, 
  2412         -        pSegcsr->zTerm, pSegcsr->nTerm, pSegcsr->aDoclist, pSegcsr->nDoclist
  2413         -    );
         2526  +    rc = fts3TermSelectMerge(p, &tsc, pSegcsr->aDoclist, pSegcsr->nDoclist);
  2414   2527     }
  2415   2528   
  2416   2529     if( rc==SQLITE_OK ){
  2417         -    rc = fts3TermSelectMerge(p, &tsc);
         2530  +    rc = fts3TermSelectFinishMerge(p, &tsc);
  2418   2531     }
  2419   2532     if( rc==SQLITE_OK ){
  2420   2533       *ppOut = tsc.aaOutput[0];
  2421   2534       *pnOut = tsc.anOutput[0];
  2422   2535     }else{
  2423   2536       int i;
  2424   2537       for(i=0; i<SizeofArray(tsc.aaOutput); i++){
................................................................................
  2483   2596         pCsr->isEof = 1;
  2484   2597         rc = sqlite3_reset(pCsr->pStmt);
  2485   2598       }else{
  2486   2599         pCsr->iPrevId = sqlite3_column_int64(pCsr->pStmt, 0);
  2487   2600         rc = SQLITE_OK;
  2488   2601       }
  2489   2602     }else{
  2490         -    rc = sqlite3Fts3EvalNext((Fts3Cursor *)pCursor);
         2603  +    rc = fts3EvalNext((Fts3Cursor *)pCursor);
  2491   2604     }
  2492   2605     assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 );
  2493   2606     return rc;
  2494   2607   }
  2495   2608   
  2496   2609   /*
  2497   2610   ** This is the xFilter interface for the virtual table.  See
................................................................................
  2560   2673         }
  2561   2674         return rc;
  2562   2675       }
  2563   2676   
  2564   2677       rc = sqlite3Fts3ReadLock(p);
  2565   2678       if( rc!=SQLITE_OK ) return rc;
  2566   2679   
  2567         -    rc = sqlite3Fts3EvalStart(pCsr, pCsr->pExpr, 1);
         2680  +    rc = fts3EvalStart(pCsr);
  2568   2681   
  2569   2682       sqlite3Fts3SegmentsClose(p);
  2570   2683       if( rc!=SQLITE_OK ) return rc;
  2571   2684       pCsr->pNextId = pCsr->aDoclist;
  2572   2685       pCsr->iPrevId = 0;
  2573   2686     }
  2574   2687   
................................................................................
  2967   3080     fts3DbExec(&rc, db,
  2968   3081       "ALTER TABLE %Q.'%q_segdir'   RENAME TO '%q_segdir';",
  2969   3082       p->zDb, p->zName, zName
  2970   3083     );
  2971   3084     return rc;
  2972   3085   }
  2973   3086   
         3087  +/*
         3088  +** The xSavepoint() method.
         3089  +**
         3090  +** Flush the contents of the pending-terms table to disk.
         3091  +*/
  2974   3092   static int fts3SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){
  2975   3093     UNUSED_PARAMETER(iSavepoint);
  2976   3094     assert( ((Fts3Table *)pVtab)->inTransaction );
  2977   3095     assert( ((Fts3Table *)pVtab)->mxSavepoint < iSavepoint );
  2978   3096     TESTONLY( ((Fts3Table *)pVtab)->mxSavepoint = iSavepoint );
  2979   3097     return fts3SyncMethod(pVtab);
  2980   3098   }
         3099  +
         3100  +/*
         3101  +** The xRelease() method.
         3102  +**
         3103  +** This is a no-op.
         3104  +*/
  2981   3105   static int fts3ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){
  2982   3106     TESTONLY( Fts3Table *p = (Fts3Table*)pVtab );
  2983   3107     UNUSED_PARAMETER(iSavepoint);
  2984   3108     UNUSED_PARAMETER(pVtab);
  2985   3109     assert( p->inTransaction );
  2986   3110     assert( p->mxSavepoint >= iSavepoint );
  2987   3111     TESTONLY( p->mxSavepoint = iSavepoint-1 );
  2988   3112     return SQLITE_OK;
  2989   3113   }
         3114  +
         3115  +/*
         3116  +** The xRollbackTo() method.
         3117  +**
         3118  +** Discard the contents of the pending terms table.
         3119  +*/
  2990   3120   static int fts3RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){
  2991   3121     Fts3Table *p = (Fts3Table*)pVtab;
  2992   3122     UNUSED_PARAMETER(iSavepoint);
  2993   3123     assert( p->inTransaction );
  2994   3124     assert( p->mxSavepoint >= iSavepoint );
  2995   3125     TESTONLY( p->mxSavepoint = iSavepoint );
  2996   3126     sqlite3Fts3PendingTermsClear(p);
................................................................................
  3131   3261     assert( rc!=SQLITE_OK );
  3132   3262     if( pHash ){
  3133   3263       sqlite3Fts3HashClear(pHash);
  3134   3264       sqlite3_free(pHash);
  3135   3265     }
  3136   3266     return rc;
  3137   3267   }
  3138         -
  3139         -#if !SQLITE_CORE
  3140         -int sqlite3_extension_init(
  3141         -  sqlite3 *db, 
  3142         -  char **pzErrMsg,
  3143         -  const sqlite3_api_routines *pApi
  3144         -){
  3145         -  SQLITE_EXTENSION_INIT2(pApi)
  3146         -  return sqlite3Fts3Init(db);
  3147         -}
  3148         -#endif
  3149         -
  3150   3268   
  3151   3269   /*
  3152   3270   ** Allocate an Fts3MultiSegReader for each token in the expression headed
  3153   3271   ** by pExpr. 
  3154   3272   **
  3155   3273   ** An Fts3SegReader object is a cursor that can seek or scan a range of
  3156   3274   ** entries within a single segment b-tree. An Fts3MultiSegReader uses multiple
................................................................................
  3160   3278   ** If the allocated Fts3MultiSegReader just seeks to a single entry in a
  3161   3279   ** segment b-tree (if the term is not a prefix or it is a prefix for which
  3162   3280   ** there exists prefix b-tree of the right length) then it may be traversed
  3163   3281   ** and merged incrementally. Otherwise, it has to be merged into an in-memory 
  3164   3282   ** doclist and then traversed.
  3165   3283   */
  3166   3284   static void fts3EvalAllocateReaders(
  3167         -  Fts3Cursor *pCsr, 
  3168         -  Fts3Expr *pExpr, 
         3285  +  Fts3Cursor *pCsr,               /* FTS cursor handle */
         3286  +  Fts3Expr *pExpr,                /* Allocate readers for this expression */
  3169   3287     int *pnToken,                   /* OUT: Total number of tokens in phrase. */
  3170   3288     int *pnOr,                      /* OUT: Total number of OR nodes in expr. */
  3171         -  int *pRc
         3289  +  int *pRc                        /* IN/OUT: Error code */
  3172   3290   ){
  3173   3291     if( pExpr && SQLITE_OK==*pRc ){
  3174   3292       if( pExpr->eType==FTSQUERY_PHRASE ){
  3175   3293         int i;
  3176   3294         int nToken = pExpr->pPhrase->nToken;
  3177   3295         *pnToken += nToken;
  3178   3296         for(i=0; i<nToken; i++){
  3179   3297           Fts3PhraseToken *pToken = &pExpr->pPhrase->aToken[i];
  3180         -        int rc = sqlite3Fts3TermSegReaderCursor(pCsr, 
         3298  +        int rc = fts3TermSegReaderCursor(pCsr, 
  3181   3299               pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr
  3182   3300           );
  3183   3301           if( rc!=SQLITE_OK ){
  3184   3302             *pRc = rc;
  3185   3303             return;
  3186   3304           }
  3187   3305         }
................................................................................
  3191   3309         *pnOr += (pExpr->eType==FTSQUERY_OR);
  3192   3310         fts3EvalAllocateReaders(pCsr, pExpr->pLeft, pnToken, pnOr, pRc);
  3193   3311         fts3EvalAllocateReaders(pCsr, pExpr->pRight, pnToken, pnOr, pRc);
  3194   3312       }
  3195   3313     }
  3196   3314   }
  3197   3315   
         3316  +/*
         3317  +** Arguments pList/nList contain the doclist for token iToken of phrase p.
         3318  +** It is merged into the main doclist stored in p->doclist.aAll/nAll.
         3319  +**
         3320  +** This function assumes that pList points to a buffer allocated using
         3321  +** sqlite3_malloc(). This function takes responsibility for eventually
         3322  +** freeing the buffer.
         3323  +*/
  3198   3324   static void fts3EvalPhraseMergeToken(
  3199         -  Fts3Table *pTab,
  3200         -  Fts3Phrase *p,
  3201         -  int iToken,
  3202         -  char *pList,
  3203         -  int nList
         3325  +  Fts3Table *pTab,                /* FTS Table pointer */
         3326  +  Fts3Phrase *p,                  /* Phrase to merge pList/nList into */
         3327  +  int iToken,                     /* Token pList/nList corresponds to */
         3328  +  char *pList,                    /* Pointer to doclist */
         3329  +  int nList                       /* Number of bytes in pList */
  3204   3330   ){
  3205   3331     assert( iToken!=p->iDoclistToken );
  3206   3332   
  3207   3333     if( pList==0 ){
  3208   3334       sqlite3_free(p->doclist.aAll);
  3209   3335       p->doclist.aAll = 0;
  3210   3336       p->doclist.nAll = 0;
................................................................................
  3245   3371       p->doclist.aAll = pRight;
  3246   3372       p->doclist.nAll = nRight;
  3247   3373     }
  3248   3374   
  3249   3375     if( iToken>p->iDoclistToken ) p->iDoclistToken = iToken;
  3250   3376   }
  3251   3377   
         3378  +/*
         3379  +** Load the doclist for phrase p into p->doclist.aAll/nAll. The loaded doclist
         3380  +** does not take deferred tokens into account.
         3381  +**
         3382  +** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
         3383  +*/
  3252   3384   static int fts3EvalPhraseLoad(
  3253         -  Fts3Cursor *pCsr, 
  3254         -  Fts3Phrase *p
         3385  +  Fts3Cursor *pCsr,               /* FTS Cursor handle */
         3386  +  Fts3Phrase *p                   /* Phrase object */
  3255   3387   ){
  3256   3388     Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
  3257   3389     int iToken;
  3258   3390     int rc = SQLITE_OK;
  3259   3391   
  3260   3392     for(iToken=0; rc==SQLITE_OK && iToken<p->nToken; iToken++){
  3261   3393       Fts3PhraseToken *pToken = &p->aToken[iToken];
  3262   3394       assert( pToken->pDeferred==0 || pToken->pSegcsr==0 );
  3263   3395   
  3264   3396       if( pToken->pSegcsr ){
  3265   3397         int nThis = 0;
  3266   3398         char *pThis = 0;
  3267         -      rc = fts3TermSelect(pTab, pToken, p->iColumn, 1, &nThis, &pThis);
         3399  +      rc = fts3TermSelect(pTab, pToken, p->iColumn, &nThis, &pThis);
  3268   3400         if( rc==SQLITE_OK ){
  3269   3401           fts3EvalPhraseMergeToken(pTab, p, iToken, pThis, nThis);
  3270   3402         }
  3271   3403       }
  3272   3404       assert( pToken->pSegcsr==0 );
  3273   3405     }
  3274   3406   
  3275   3407     return rc;
  3276   3408   }
  3277   3409   
         3410  +/*
         3411  +** This function is called on each phrase after the position lists for
         3412  +** any deferred tokens have been loaded into memory. It updates the phrases
         3413  +** current position list to include only those positions that are really
         3414  +** instances of the phrase (after considering deferred tokens). If this
         3415  +** means that the phrase does not appear in the current row, doclist.pList
         3416  +** and doclist.nList are both zeroed.
         3417  +**
         3418  +** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
         3419  +*/
  3278   3420   static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){
  3279         -  int iToken;
  3280         -  int rc = SQLITE_OK;
  3281         -
  3282         -  int nMaxUndeferred = pPhrase->iDoclistToken;
  3283         -  char *aPoslist = 0;
  3284         -  int nPoslist = 0;
  3285         -  int iPrev = -1;
         3421  +  int iToken;                     /* Used to iterate through phrase tokens */
         3422  +  int rc = SQLITE_OK;             /* Return code */
         3423  +  char *aPoslist = 0;             /* Position list for deferred tokens */
         3424  +  int nPoslist = 0;               /* Number of bytes in aPoslist */
         3425  +  int iPrev = -1;                 /* Token number of previous deferred token */
  3286   3426   
  3287   3427     assert( pPhrase->doclist.bFreeList==0 );
  3288   3428   
  3289   3429     for(iToken=0; rc==SQLITE_OK && iToken<pPhrase->nToken; iToken++){
  3290   3430       Fts3PhraseToken *pToken = &pPhrase->aToken[iToken];
  3291   3431       Fts3DeferredToken *pDeferred = pToken->pDeferred;
  3292   3432   
................................................................................
  3324   3464           }
  3325   3465         }
  3326   3466         iPrev = iToken;
  3327   3467       }
  3328   3468     }
  3329   3469   
  3330   3470     if( iPrev>=0 ){
         3471  +    int nMaxUndeferred = pPhrase->iDoclistToken;
  3331   3472       if( nMaxUndeferred<0 ){
  3332   3473         pPhrase->doclist.pList = aPoslist;
  3333   3474         pPhrase->doclist.nList = nPoslist;
  3334   3475         pPhrase->doclist.iDocid = pCsr->iPrevId;
  3335   3476         pPhrase->doclist.bFreeList = 1;
  3336   3477       }else{
  3337   3478         int nDistance;
................................................................................
  3372   3513   }
  3373   3514   
  3374   3515   /*
  3375   3516   ** This function is called for each Fts3Phrase in a full-text query 
  3376   3517   ** expression to initialize the mechanism for returning rows. Once this
  3377   3518   ** function has been called successfully on an Fts3Phrase, it may be
  3378   3519   ** used with fts3EvalPhraseNext() to iterate through the matching docids.
         3520  +**
         3521  +** If parameter bOptOk is true, then the phrase may (or may not) use the
         3522  +** incremental loading strategy. Otherwise, the entire doclist is loaded into
         3523  +** memory within this call.
         3524  +**
         3525  +** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
  3379   3526   */
  3380   3527   static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){
  3381         -  int rc;
         3528  +  int rc;                         /* Error code */
  3382   3529     Fts3PhraseToken *pFirst = &p->aToken[0];
  3383   3530     Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
  3384   3531   
  3385   3532     if( pCsr->bDesc==pTab->bDescIdx 
  3386   3533      && bOptOk==1 
  3387   3534      && p->nToken==1 
  3388   3535      && pFirst->pSegcsr 
................................................................................
  3402   3549   
  3403   3550     assert( rc!=SQLITE_OK || p->nToken<1 || p->aToken[0].pSegcsr==0 || p->bIncr );
  3404   3551     return rc;
  3405   3552   }
  3406   3553   
  3407   3554   /*
  3408   3555   ** This function is used to iterate backwards (from the end to start) 
  3409         -** through doclists.
         3556  +** through doclists. It is used by this module to iterate through phrase
         3557  +** doclists in reverse and by the fts3_write.c module to iterate through
         3558  +** pending-terms lists when writing to databases with "order=desc".
         3559  +**
         3560  +** The doclist may be sorted in ascending (parameter bDescIdx==0) or 
         3561  +** descending (parameter bDescIdx==1) order of docid. Regardless, this
         3562  +** function iterates from the end of the doclist to the beginning.
  3410   3563   */
  3411   3564   void sqlite3Fts3DoclistPrev(
  3412   3565     int bDescIdx,                   /* True if the doclist is desc */
  3413   3566     char *aDoclist,                 /* Pointer to entire doclist */
  3414   3567     int nDoclist,                   /* Length of aDoclist in bytes */
  3415   3568     char **ppIter,                  /* IN/OUT: Iterator pointer */
  3416   3569     sqlite3_int64 *piDocid,         /* IN/OUT: Docid pointer */
................................................................................
  3467   3620   ** SQLITE_OK.
  3468   3621   **
  3469   3622   ** If there is no "next" entry and no error occurs, then *pbEof is set to
  3470   3623   ** 1 before returning. Otherwise, if no error occurs and the iterator is
  3471   3624   ** successfully advanced, *pbEof is set to 0.
  3472   3625   */
  3473   3626   static int fts3EvalPhraseNext(
  3474         -  Fts3Cursor *pCsr, 
  3475         -  Fts3Phrase *p, 
  3476         -  u8 *pbEof
         3627  +  Fts3Cursor *pCsr,               /* FTS Cursor handle */
         3628  +  Fts3Phrase *p,                  /* Phrase object to advance to next docid */
         3629  +  u8 *pbEof                       /* OUT: Set to 1 if EOF */
  3477   3630   ){
  3478   3631     int rc = SQLITE_OK;
  3479   3632     Fts3Doclist *pDL = &p->doclist;
  3480   3633     Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
  3481   3634   
  3482   3635     if( p->bIncr ){
  3483   3636       assert( p->nToken==1 );
................................................................................
  3515   3668         }
  3516   3669         pDL->pList = pIter;
  3517   3670         fts3PoslistCopy(0, &pIter);
  3518   3671         pDL->nList = (pIter - pDL->pList);
  3519   3672   
  3520   3673         /* pIter now points just past the 0x00 that terminates the position-
  3521   3674         ** list for document pDL->iDocid. However, if this position-list was
  3522         -      ** edited in place by fts3EvalNearTrim2(), then pIter may not actually
         3675  +      ** edited in place by fts3EvalNearTrim(), then pIter may not actually
  3523   3676         ** point to the start of the next docid value. The following line deals
  3524   3677         ** with this case by advancing pIter past the zero-padding added by
  3525         -      ** fts3EvalNearTrim2().  */
         3678  +      ** fts3EvalNearTrim().  */
  3526   3679         while( pIter<pEnd && *pIter==0 ) pIter++;
  3527   3680   
  3528   3681         pDL->pNextDocid = pIter;
  3529   3682         assert( pIter>=&pDL->aAll[pDL->nAll] || *pIter );
  3530   3683         *pbEof = 0;
  3531   3684       }
  3532   3685     }
  3533   3686   
  3534   3687     return rc;
  3535   3688   }
  3536   3689   
         3690  +/*
         3691  +**
         3692  +** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
         3693  +** Otherwise, fts3EvalPhraseStart() is called on all phrases within the
         3694  +** expression. Also the Fts3Expr.bDeferred variable is set to true for any
         3695  +** expressions for which all descendent tokens are deferred.
         3696  +**
         3697  +** If parameter bOptOk is zero, then it is guaranteed that the
         3698  +** Fts3Phrase.doclist.aAll/nAll variables contain the entire doclist for
         3699  +** each phrase in the expression (subject to deferred token processing).
         3700  +** Or, if bOptOk is non-zero, then one or more tokens within the expression
         3701  +** may be loaded incrementally, meaning doclist.aAll/nAll is not available.
         3702  +**
         3703  +** If an error occurs within this function, *pRc is set to an SQLite error
         3704  +** code before returning.
         3705  +*/
  3537   3706   static void fts3EvalStartReaders(
  3538         -  Fts3Cursor *pCsr, 
  3539         -  Fts3Expr *pExpr, 
  3540         -  int bOptOk,
  3541         -  int *pRc
         3707  +  Fts3Cursor *pCsr,               /* FTS Cursor handle */
         3708  +  Fts3Expr *pExpr,                /* Expression to initialize phrases in */
         3709  +  int bOptOk,                     /* True to enable incremental loading */
         3710  +  int *pRc                        /* IN/OUT: Error code */
  3542   3711   ){
  3543   3712     if( pExpr && SQLITE_OK==*pRc ){
  3544   3713       if( pExpr->eType==FTSQUERY_PHRASE ){
  3545   3714         int i;
  3546   3715         int nToken = pExpr->pPhrase->nToken;
  3547   3716         for(i=0; i<nToken; i++){
  3548   3717           if( pExpr->pPhrase->aToken[i].pDeferred==0 ) break;
................................................................................
  3553   3722         fts3EvalStartReaders(pCsr, pExpr->pLeft, bOptOk, pRc);
  3554   3723         fts3EvalStartReaders(pCsr, pExpr->pRight, bOptOk, pRc);
  3555   3724         pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred);
  3556   3725       }
  3557   3726     }
  3558   3727   }
  3559   3728   
         3729  +/*
         3730  +** An array of the following structures is assembled as part of the process
         3731  +** of selecting tokens to defer before the query starts executing (as part
         3732  +** of the xFilter() method). There is one element in the array for each
         3733  +** token in the FTS expression.
         3734  +**
         3735  +** Tokens are divided into AND/NEAR clusters. All tokens in a cluster belong
         3736  +** to phrases that are connected only by AND and NEAR operators (not OR or
         3737  +** NOT). When determining tokens to defer, each AND/NEAR cluster is considered
         3738  +** separately. The root of a tokens AND/NEAR cluster is stored in 
         3739  +** Fts3TokenAndCost.pRoot.
         3740  +*/
  3560   3741   typedef struct Fts3TokenAndCost Fts3TokenAndCost;
  3561   3742   struct Fts3TokenAndCost {
  3562   3743     Fts3Phrase *pPhrase;            /* The phrase the token belongs to */
  3563   3744     int iToken;                     /* Position of token in phrase */
  3564   3745     Fts3PhraseToken *pToken;        /* The token itself */
  3565         -  Fts3Expr *pRoot; 
  3566         -  int nOvfl;
         3746  +  Fts3Expr *pRoot;                /* Root of NEAR/AND cluster */
         3747  +  int nOvfl;                      /* Number of overflow pages to load doclist */
  3567   3748     int iCol;                       /* The column the token must match */
  3568   3749   };
  3569   3750   
         3751  +/*
         3752  +** This function is used to populate an allocated Fts3TokenAndCost array.
         3753  +**
         3754  +** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
         3755  +** Otherwise, if an error occurs during execution, *pRc is set to an
         3756  +** SQLite error code.
         3757  +*/
  3570   3758   static void fts3EvalTokenCosts(
  3571         -  Fts3Cursor *pCsr, 
  3572         -  Fts3Expr *pRoot, 
  3573         -  Fts3Expr *pExpr, 
  3574         -  Fts3TokenAndCost **ppTC,
  3575         -  Fts3Expr ***ppOr,
  3576         -  int *pRc
         3759  +  Fts3Cursor *pCsr,               /* FTS Cursor handle */
         3760  +  Fts3Expr *pRoot,                /* Root of current AND/NEAR cluster */
         3761  +  Fts3Expr *pExpr,                /* Expression to consider */
         3762  +  Fts3TokenAndCost **ppTC,        /* Write new entries to *(*ppTC)++ */
         3763  +  Fts3Expr ***ppOr,               /* Write new OR root to *(*ppOr)++ */
         3764  +  int *pRc                        /* IN/OUT: Error code */
  3577   3765   ){
  3578   3766     if( *pRc==SQLITE_OK && pExpr ){
  3579   3767       if( pExpr->eType==FTSQUERY_PHRASE ){
  3580   3768         Fts3Phrase *pPhrase = pExpr->pPhrase;
  3581   3769         int i;
  3582   3770         for(i=0; *pRc==SQLITE_OK && i<pPhrase->nToken; i++){
  3583   3771           Fts3TokenAndCost *pTC = (*ppTC)++;
................................................................................
  3601   3789           (*ppOr)++;
  3602   3790         }
  3603   3791         fts3EvalTokenCosts(pCsr, pRoot, pExpr->pRight, ppTC, ppOr, pRc);
  3604   3792       }
  3605   3793     }
  3606   3794   }
  3607   3795   
         3796  +/*
         3797  +** Determine the average document (row) size in pages. If successful,
         3798  +** write this value to *pnPage and return SQLITE_OK. Otherwise, return
         3799  +** an SQLite error code.
         3800  +**
         3801  +** The average document size in pages is calculated by first calculating 
         3802  +** determining the average size in bytes, B. If B is less than the amount
         3803  +** of data that will fit on a single leaf page of an intkey table in
         3804  +** this database, then the average docsize is 1. Otherwise, it is 1 plus
         3805  +** the number of overflow pages consumed by a record B bytes in size.
         3806  +*/
  3608   3807   static int fts3EvalAverageDocsize(Fts3Cursor *pCsr, int *pnPage){
  3609   3808     if( pCsr->nRowAvg==0 ){
  3610   3809       /* The average document size, which is required to calculate the cost
  3611         -     ** of each doclist, has not yet been determined. Read the required 
  3612         -     ** data from the %_stat table to calculate it.
  3613         -     **
  3614         -     ** Entry 0 of the %_stat table is a blob containing (nCol+1) FTS3 
  3615         -     ** varints, where nCol is the number of columns in the FTS3 table.
  3616         -     ** The first varint is the number of documents currently stored in
  3617         -     ** the table. The following nCol varints contain the total amount of
  3618         -     ** data stored in all rows of each column of the table, from left
  3619         -     ** to right.
  3620         -     */
         3810  +    ** of each doclist, has not yet been determined. Read the required 
         3811  +    ** data from the %_stat table to calculate it.
         3812  +    **
         3813  +    ** Entry 0 of the %_stat table is a blob containing (nCol+1) FTS3 
         3814  +    ** varints, where nCol is the number of columns in the FTS3 table.
         3815  +    ** The first varint is the number of documents currently stored in
         3816  +    ** the table. The following nCol varints contain the total amount of
         3817  +    ** data stored in all rows of each column of the table, from left
         3818  +    ** to right.
         3819  +    */
  3621   3820       int rc;
  3622   3821       Fts3Table *p = (Fts3Table*)pCsr->base.pVtab;
  3623   3822       sqlite3_stmt *pStmt;
  3624   3823       sqlite3_int64 nDoc = 0;
  3625   3824       sqlite3_int64 nByte = 0;
  3626   3825       const char *pEnd;
  3627   3826       const char *a;
................................................................................
  3648   3847       if( rc!=SQLITE_OK ) return rc;
  3649   3848     }
  3650   3849   
  3651   3850     *pnPage = pCsr->nRowAvg;
  3652   3851     return SQLITE_OK;
  3653   3852   }
  3654   3853   
         3854  +/*
         3855  +** This function is called to select the tokens (if any) that will be 
         3856  +** deferred. The array aTC[] has already been populated when this is
         3857  +** called.
         3858  +**
         3859  +** This function is called once for each AND/NEAR cluster in the 
         3860  +** expression. Each invocation determines which tokens to defer within
         3861  +** the cluster with root node pRoot. See comments above the definition
         3862  +** of struct Fts3TokenAndCost for more details.
         3863  +**
         3864  +** If no error occurs, SQLITE_OK is returned and sqlite3Fts3DeferToken()
         3865  +** called on each token to defer. Otherwise, an SQLite error code is
         3866  +** returned.
         3867  +*/
  3655   3868   static int fts3EvalSelectDeferred(
  3656         -  Fts3Cursor *pCsr,
  3657         -  Fts3Expr *pRoot,
  3658         -  Fts3TokenAndCost *aTC,
  3659         -  int nTC
         3869  +  Fts3Cursor *pCsr,               /* FTS Cursor handle */
         3870  +  Fts3Expr *pRoot,                /* Consider tokens with this root node */
         3871  +  Fts3TokenAndCost *aTC,          /* Array of expression tokens and costs */
         3872  +  int nTC                         /* Number of entries in aTC[] */
  3660   3873   ){
  3661         -  int nDocSize = 0;
  3662         -  int nDocEst = 0;
  3663         -  int rc = SQLITE_OK;
  3664   3874     Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
  3665         -  int ii;
         3875  +  int nDocSize = 0;               /* Number of pages per doc loaded */
         3876  +  int nDocEst = 0;                /* Est. docs if all other tokens deferred */
         3877  +  int rc = SQLITE_OK;             /* Return code */
         3878  +  int ii;                         /* Iterator variable for various purposes */
         3879  +  int nOvfl = 0;                  /* Total overflow pages used by doclists */
         3880  +  int nToken = 0;                 /* Total number of tokens in cluster */
  3666   3881   
  3667         -  int nOvfl = 0;
  3668         -  int nTerm = 0;
  3669         -
         3882  +  /* Count the tokens in this AND/NEAR cluster. If none of the doclists
         3883  +  ** associated with the tokens spill onto overflow pages, or if there is
         3884  +  ** only 1 token, exit early. No tokens to defer in this case. */
  3670   3885     for(ii=0; ii<nTC; ii++){
  3671   3886       if( aTC[ii].pRoot==pRoot ){
  3672   3887         nOvfl += aTC[ii].nOvfl;
  3673         -      nTerm++;
         3888  +      nToken++;
  3674   3889       }
  3675   3890     }
  3676         -  if( nOvfl==0 || nTerm<2 ) return SQLITE_OK;
         3891  +  if( nOvfl==0 || nToken<2 ) return SQLITE_OK;
  3677   3892   
         3893  +  /* Obtain the average docsize (in pages). */
  3678   3894     rc = fts3EvalAverageDocsize(pCsr, &nDocSize);
         3895  +  assert( rc!=SQLITE_OK || nDocSize>0 );
  3679   3896   
  3680         -  for(ii=0; ii<nTerm && rc==SQLITE_OK; ii++){
  3681         -    int jj;
  3682         -    Fts3TokenAndCost *pTC = 0;
         3897  +  for(ii=0; ii<nToken && rc==SQLITE_OK; ii++){
         3898  +    int iTC;                      /* Used to iterate through aTC[] array. */
         3899  +    Fts3TokenAndCost *pTC = 0;    /* Set to cheapest remaining token. */
  3683   3900   
  3684         -    for(jj=0; jj<nTC; jj++){
  3685         -      if( aTC[jj].pToken && aTC[jj].pRoot==pRoot 
  3686         -       && (!pTC || aTC[jj].nOvfl<pTC->nOvfl) 
         3901  +    /* Set pTC to point to the cheapest remaining token. */
         3902  +    for(iTC=0; iTC<nTC; iTC++){
         3903  +      if( aTC[iTC].pToken && aTC[iTC].pRoot==pRoot 
         3904  +       && (!pTC || aTC[iTC].nOvfl<pTC->nOvfl) 
  3687   3905         ){
  3688         -        pTC = &aTC[jj];
         3906  +        pTC = &aTC[iTC];
  3689   3907         }
  3690   3908       }
  3691   3909       assert( pTC );
  3692   3910   
  3693         -    /* At this point pTC points to the cheapest remaining token. */
         3911  +    /* Determine if token pTC should be deferred. If not, update nDocEst. 
         3912  +    **
         3913  +    ** TODO: If there are performance regressions involving deferred tokens,
         3914  +    ** this (the logic that selects the tokens to be deferred) is probably
         3915  +    ** the bit that needs to change.
         3916  +    */
  3694   3917       if( ii==0 ){
  3695   3918         if( pTC->nOvfl ){
  3696   3919           nDocEst = (pTC->nOvfl * pTab->nPgsz + pTab->nPgsz) / 10;
  3697   3920         }else{
  3698   3921           Fts3PhraseToken *pToken = pTC->pToken;
  3699   3922           int nList = 0;
  3700   3923           char *pList = 0;
  3701         -        rc = fts3TermSelect(pTab, pToken, pTC->iCol, 1, &nList, &pList);
         3924  +        rc = fts3TermSelect(pTab, pToken, pTC->iCol, &nList, &pList);
  3702   3925           assert( rc==SQLITE_OK || pList==0 );
  3703         -
  3704   3926           if( rc==SQLITE_OK ){
  3705   3927             nDocEst = fts3DoclistCountDocids(1, pList, nList);
  3706   3928             fts3EvalPhraseMergeToken(pTab, pTC->pPhrase, pTC->iToken,pList,nList);
  3707   3929           }
  3708   3930         }
  3709   3931       }else{
  3710   3932         if( pTC->nOvfl>=(nDocEst*nDocSize) ){
................................................................................
  3717   3939       }
  3718   3940       pTC->pToken = 0;
  3719   3941     }
  3720   3942   
  3721   3943     return rc;
  3722   3944   }
  3723   3945   
  3724         -int sqlite3Fts3EvalStart(Fts3Cursor *pCsr, Fts3Expr *pExpr, int bOptOk){
         3946  +/*
         3947  +** This function is called from within the xFilter method. It initializes
         3948  +** the full-text query currently stored in pCsr->pExpr. To iterate through
         3949  +** the results of a query, the caller does:
         3950  +**
         3951  +**    fts3EvalStart(pCsr);
         3952  +**    while( 1 ){
         3953  +**      fts3EvalNext(pCsr);
         3954  +**      if( pCsr->bEof ) break;
         3955  +**      ... return row pCsr->iPrevId to the caller ...
         3956  +**    }
         3957  +*/
         3958  +static int fts3EvalStart(Fts3Cursor *pCsr){
  3725   3959     Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
  3726   3960     int rc = SQLITE_OK;
  3727   3961     int nToken = 0;
  3728   3962     int nOr = 0;
  3729   3963   
  3730   3964     /* Allocate a MultiSegReader for each token in the expression. */
  3731         -  fts3EvalAllocateReaders(pCsr, pExpr, &nToken, &nOr, &rc);
         3965  +  fts3EvalAllocateReaders(pCsr, pCsr->pExpr, &nToken, &nOr, &rc);
  3732   3966   
  3733         -  /* Call fts3EvalPhraseStart() on all phrases in the expression. TODO:
  3734         -  ** This call will eventually also be responsible for determining which
  3735         -  ** tokens are 'deferred' until the document text is loaded into memory.
  3736         -  **
  3737         -  ** Each token in each phrase is dealt with using one of the following
  3738         -  ** three strategies:
  3739         -  **
  3740         -  **   1. Entire doclist loaded into memory as part of the
  3741         -  **      fts3EvalStartReaders() call.
  3742         -  **
  3743         -  **   2. Doclist loaded into memory incrementally, as part of each
  3744         -  **      sqlite3Fts3EvalNext() call.
  3745         -  **
  3746         -  **   3. Token doclist is never loaded. Instead, documents are loaded into
  3747         -  **      memory and scanned for the token as part of the sqlite3Fts3EvalNext()
  3748         -  **      call. This is known as a "deferred" token.
  3749         -  */
  3750         -
  3751         -  /* If bOptOk is true, check if there are any tokens that should be deferred.
  3752         -  */
  3753         -  if( rc==SQLITE_OK && bOptOk && nToken>1 && pTab->bHasStat ){
         3967  +  /* Determine which, if any, tokens in the expression should be deferred. */
         3968  +  if( rc==SQLITE_OK && nToken>1 && pTab->bHasStat ){
  3754   3969       Fts3TokenAndCost *aTC;
  3755   3970       Fts3Expr **apOr;
  3756   3971       aTC = (Fts3TokenAndCost *)sqlite3_malloc(
  3757   3972           sizeof(Fts3TokenAndCost) * nToken
  3758   3973         + sizeof(Fts3Expr *) * nOr * 2
  3759   3974       );
  3760   3975       apOr = (Fts3Expr **)&aTC[nToken];
................................................................................
  3762   3977       if( !aTC ){
  3763   3978         rc = SQLITE_NOMEM;
  3764   3979       }else{
  3765   3980         int ii;
  3766   3981         Fts3TokenAndCost *pTC = aTC;
  3767   3982         Fts3Expr **ppOr = apOr;
  3768   3983   
  3769         -      fts3EvalTokenCosts(pCsr, 0, pExpr, &pTC, &ppOr, &rc);
         3984  +      fts3EvalTokenCosts(pCsr, 0, pCsr->pExpr, &pTC, &ppOr, &rc);
  3770   3985         nToken = pTC-aTC;
  3771   3986         nOr = ppOr-apOr;
  3772   3987   
  3773   3988         if( rc==SQLITE_OK ){
  3774   3989           rc = fts3EvalSelectDeferred(pCsr, 0, aTC, nToken);
  3775   3990           for(ii=0; rc==SQLITE_OK && ii<nOr; ii++){
  3776   3991             rc = fts3EvalSelectDeferred(pCsr, apOr[ii], aTC, nToken);
................................................................................
  3777   3992           }
  3778   3993         }
  3779   3994   
  3780   3995         sqlite3_free(aTC);
  3781   3996       }
  3782   3997     }
  3783   3998   
  3784         -  fts3EvalStartReaders(pCsr, pExpr, bOptOk, &rc);
         3999  +  fts3EvalStartReaders(pCsr, pCsr->pExpr, 1, &rc);
  3785   4000     return rc;
  3786   4001   }
  3787   4002   
  3788         -static void fts3EvalZeroPoslist(Fts3Phrase *pPhrase){
         4003  +/*
         4004  +** Invalidate the current position list for phrase pPhrase.
         4005  +*/
         4006  +static void fts3EvalInvalidatePoslist(Fts3Phrase *pPhrase){
  3789   4007     if( pPhrase->doclist.bFreeList ){
  3790   4008       sqlite3_free(pPhrase->doclist.pList);
  3791   4009     }
  3792   4010     pPhrase->doclist.pList = 0;
  3793   4011     pPhrase->doclist.nList = 0;
  3794   4012     pPhrase->doclist.bFreeList = 0;
  3795   4013   }
  3796   4014   
  3797         -static int fts3EvalNearTrim2(
  3798         -  int nNear,
         4015  +/*
         4016  +** This function is called to edit the position list associated with
         4017  +** the phrase object passed as the fifth argument according to a NEAR
         4018  +** condition. For example:
         4019  +**
         4020  +**     abc NEAR/5 "def ghi"
         4021  +**
         4022  +** Parameter nNear is passed the NEAR distance of the expression (5 in
         4023  +** the example above). When this function is called, *paPoslist points to
         4024  +** the position list, and *pnToken is the number of phrase tokens in, the
         4025  +** phrase on the other side of the NEAR operator to pPhrase. For example,
         4026  +** if pPhrase refers to the "def ghi" phrase, then *paPoslist points to
         4027  +** the position list associated with phrase "abc".
         4028  +**
         4029  +** All positions in the pPhrase position list that are not sufficiently
         4030  +** close to a position in the *paPoslist position list are removed. If this
         4031  +** leaves 0 positions, zero is returned. Otherwise, non-zero.
         4032  +**
         4033  +** Before returning, *paPoslist is set to point to the position lsit 
         4034  +** associated with pPhrase. And *pnToken is set to the number of tokens in
         4035  +** pPhrase.
         4036  +*/
         4037  +static int fts3EvalNearTrim(
         4038  +  int nNear,                      /* NEAR distance. As in "NEAR/nNear". */
  3799   4039     char *aTmp,                     /* Temporary space to use */
  3800   4040     char **paPoslist,               /* IN/OUT: Position list */
  3801   4041     int *pnToken,                   /* IN/OUT: Tokens in phrase of *paPoslist */
  3802   4042     Fts3Phrase *pPhrase             /* The phrase object to trim the doclist of */
  3803   4043   ){
  3804   4044     int nParam1 = nNear + pPhrase->nToken;
  3805   4045     int nParam2 = nNear + *pnToken;
................................................................................
  3823   4063       *paPoslist = pPhrase->doclist.pList;
  3824   4064       *pnToken = pPhrase->nToken;
  3825   4065     }
  3826   4066   
  3827   4067     return res;
  3828   4068   }
  3829   4069   
         4070  +/*
         4071  +** This function is a no-op if *pRc is other than SQLITE_OK when it is called.
         4072  +** Otherwise, it advances the expression passed as the second argument to
         4073  +** point to the next matching row in the database. Expressions iterate through
         4074  +** matching rows in docid order. Ascending order if Fts3Cursor.bDesc is zero,
         4075  +** or descending if it is non-zero.
         4076  +**
         4077  +** If an error occurs, *pRc is set to an SQLite error code. Otherwise, if
         4078  +** successful, the following variables in pExpr are set:
         4079  +**
         4080  +**   Fts3Expr.bEof                (non-zero if EOF - there is no next row)
         4081  +**   Fts3Expr.iDocid              (valid if bEof==0. The docid of the next row)
         4082  +**
         4083  +** If the expression is of type FTSQUERY_PHRASE, and the expression is not
         4084  +** at EOF, then the following variables are populated with the position list
         4085  +** for the phrase for the visited row:
         4086  +**
         4087  +**   FTs3Expr.pPhrase->doclist.nList        (length of pList in bytes)
         4088  +**   FTs3Expr.pPhrase->doclist.pList        (pointer to position list)
         4089  +**
         4090  +** It says above that this function advances the expression to the next
         4091  +** matching row. This is usually true, but there are the following exceptions:
         4092  +**
         4093  +**   1. Deferred tokens are not taken into account. If a phrase consists
         4094  +**      entirely of deferred tokens, it is assumed to match every row in
         4095  +**      the db. In this case the position-list is not populated at all. 
         4096  +**
         4097  +**      Or, if a phrase contains one or more deferred tokens and one or
         4098  +**      more non-deferred tokens, then the expression is advanced to the 
         4099  +**      next possible match, considering only non-deferred tokens. In other
         4100  +**      words, if the phrase is "A B C", and "B" is deferred, the expression
         4101  +**      is advanced to the next row that contains an instance of "A * C", 
         4102  +**      where "*" may match any single token. The position list in this case
         4103  +**      is populated as for "A * C" before returning.
         4104  +**
         4105  +**   2. NEAR is treated as AND. If the expression is "x NEAR y", it is 
         4106  +**      advanced to point to the next row that matches "x AND y".
         4107  +** 
         4108  +** See fts3EvalTestDeferredAndNear() for details on testing if a row is
         4109  +** really a match, taking into account deferred tokens and NEAR operators.
         4110  +*/
         4111  +static void fts3EvalNextRow(
         4112  +  Fts3Cursor *pCsr,               /* FTS Cursor handle */
         4113  +  Fts3Expr *pExpr,                /* Expr. to advance to next matching row */
         4114  +  int *pRc                        /* IN/OUT: Error code */
         4115  +){
         4116  +  if( *pRc==SQLITE_OK ){
         4117  +    int bDescDoclist = pCsr->bDesc;         /* Used by DOCID_CMP() macro */
         4118  +    assert( pExpr->bEof==0 );
         4119  +    pExpr->bStart = 1;
         4120  +
         4121  +    switch( pExpr->eType ){
         4122  +      case FTSQUERY_NEAR:
         4123  +      case FTSQUERY_AND: {
         4124  +        Fts3Expr *pLeft = pExpr->pLeft;
         4125  +        Fts3Expr *pRight = pExpr->pRight;
         4126  +        assert( !pLeft->bDeferred || !pRight->bDeferred );
         4127  +
         4128  +        if( pLeft->bDeferred ){
         4129  +          /* LHS is entirely deferred. So we assume it matches every row.
         4130  +          ** Advance the RHS iterator to find the next row visited. */
         4131  +          fts3EvalNextRow(pCsr, pRight, pRc);
         4132  +          pExpr->iDocid = pRight->iDocid;
         4133  +          pExpr->bEof = pRight->bEof;
         4134  +        }else if( pRight->bDeferred ){
         4135  +          /* RHS is entirely deferred. So we assume it matches every row.
         4136  +          ** Advance the LHS iterator to find the next row visited. */
         4137  +          fts3EvalNextRow(pCsr, pLeft, pRc);
         4138  +          pExpr->iDocid = pLeft->iDocid;
         4139  +          pExpr->bEof = pLeft->bEof;
         4140  +        }else{
         4141  +          /* Neither the RHS or LHS are deferred. */
         4142  +          fts3EvalNextRow(pCsr, pLeft, pRc);
         4143  +          fts3EvalNextRow(pCsr, pRight, pRc);
         4144  +          while( !pLeft->bEof && !pRight->bEof && *pRc==SQLITE_OK ){
         4145  +            sqlite3_int64 iDiff = DOCID_CMP(pLeft->iDocid, pRight->iDocid);
         4146  +            if( iDiff==0 ) break;
         4147  +            if( iDiff<0 ){
         4148  +              fts3EvalNextRow(pCsr, pLeft, pRc);
         4149  +            }else{
         4150  +              fts3EvalNextRow(pCsr, pRight, pRc);
         4151  +            }
         4152  +          }
         4153  +          pExpr->iDocid = pLeft->iDocid;
         4154  +          pExpr->bEof = (pLeft->bEof || pRight->bEof);
         4155  +        }
         4156  +        break;
         4157  +      }
         4158  +  
         4159  +      case FTSQUERY_OR: {
         4160  +        Fts3Expr *pLeft = pExpr->pLeft;
         4161  +        Fts3Expr *pRight = pExpr->pRight;
         4162  +        sqlite3_int64 iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid);
         4163  +
         4164  +        assert( pLeft->bStart || pLeft->iDocid==pRight->iDocid );
         4165  +        assert( pRight->bStart || pLeft->iDocid==pRight->iDocid );
         4166  +
         4167  +        if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){
         4168  +          fts3EvalNextRow(pCsr, pLeft, pRc);
         4169  +        }else if( pLeft->bEof || (pRight->bEof==0 && iCmp>0) ){
         4170  +          fts3EvalNextRow(pCsr, pRight, pRc);
         4171  +        }else{
         4172  +          fts3EvalNextRow(pCsr, pLeft, pRc);
         4173  +          fts3EvalNextRow(pCsr, pRight, pRc);
         4174  +        }
         4175  +
         4176  +        pExpr->bEof = (pLeft->bEof && pRight->bEof);
         4177  +        iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid);
         4178  +        if( pRight->bEof || (pLeft->bEof==0 &&  iCmp<0) ){
         4179  +          pExpr->iDocid = pLeft->iDocid;
         4180  +        }else{
         4181  +          pExpr->iDocid = pRight->iDocid;
         4182  +        }
         4183  +
         4184  +        break;
         4185  +      }
         4186  +
         4187  +      case FTSQUERY_NOT: {
         4188  +        Fts3Expr *pLeft = pExpr->pLeft;
         4189  +        Fts3Expr *pRight = pExpr->pRight;
         4190  +
         4191  +        if( pRight->bStart==0 ){
         4192  +          fts3EvalNextRow(pCsr, pRight, pRc);
         4193  +          assert( *pRc!=SQLITE_OK || pRight->bStart );
         4194  +        }
         4195  +
         4196  +        fts3EvalNextRow(pCsr, pLeft, pRc);
         4197  +        if( pLeft->bEof==0 ){
         4198  +          while( !*pRc 
         4199  +              && !pRight->bEof 
         4200  +              && DOCID_CMP(pLeft->iDocid, pRight->iDocid)>0 
         4201  +          ){
         4202  +            fts3EvalNextRow(pCsr, pRight, pRc);
         4203  +          }
         4204  +        }
         4205  +        pExpr->iDocid = pLeft->iDocid;
         4206  +        pExpr->bEof = pLeft->bEof;
         4207  +        break;
         4208  +      }
         4209  +
         4210  +      default: {
         4211  +        Fts3Phrase *pPhrase = pExpr->pPhrase;
         4212  +        fts3EvalInvalidatePoslist(pPhrase);
         4213  +        *pRc = fts3EvalPhraseNext(pCsr, pPhrase, &pExpr->bEof);
         4214  +        pExpr->iDocid = pPhrase->doclist.iDocid;
         4215  +        break;
         4216  +      }
         4217  +    }
         4218  +  }
         4219  +}
         4220  +
         4221  +/*
         4222  +** If *pRc is not SQLITE_OK, or if pExpr is not the root node of a NEAR
         4223  +** cluster, then this function returns 1 immediately.
         4224  +**
         4225  +** Otherwise, it checks if the current row really does match the NEAR 
         4226  +** expression, using the data currently stored in the position lists 
         4227  +** (Fts3Expr->pPhrase.doclist.pList/nList) for each phrase in the expression. 
         4228  +**
         4229  +** If the current row is a match, the position list associated with each
         4230  +** phrase in the NEAR expression is edited in place to contain only those
         4231  +** phrase instances sufficiently close to their peers to satisfy all NEAR
         4232  +** constraints. In this case it returns 1. If the NEAR expression does not 
         4233  +** match the current row, 0 is returned. The position lists may or may not
         4234  +** be edited if 0 is returned.
         4235  +*/
  3830   4236   static int fts3EvalNearTest(Fts3Expr *pExpr, int *pRc){
  3831   4237     int res = 1;
  3832   4238   
  3833   4239     /* The following block runs if pExpr is the root of a NEAR query.
  3834   4240     ** For example, the query:
  3835   4241     **
  3836   4242     **         "w" NEAR "x" NEAR "y" NEAR "z"
................................................................................
  3844   4250     **                     |        |
  3845   4251     **                +--NEAR--+   "y"
  3846   4252     **                |        |
  3847   4253     **               "w"      "x"
  3848   4254     **
  3849   4255     ** The right-hand child of a NEAR node is always a phrase. The 
  3850   4256     ** left-hand child may be either a phrase or a NEAR node. There are
  3851         -  ** no exceptions to this.
         4257  +  ** no exceptions to this - it's the way the parser in fts3_expr.c works.
  3852   4258     */
  3853   4259     if( *pRc==SQLITE_OK 
  3854   4260      && pExpr->eType==FTSQUERY_NEAR 
  3855   4261      && pExpr->bEof==0
  3856   4262      && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR)
  3857   4263     ){
  3858   4264       Fts3Expr *p; 
................................................................................
  3871   4277       }else{
  3872   4278         char *aPoslist = p->pPhrase->doclist.pList;
  3873   4279         int nToken = p->pPhrase->nToken;
  3874   4280   
  3875   4281         for(p=p->pParent;res && p && p->eType==FTSQUERY_NEAR; p=p->pParent){
  3876   4282           Fts3Phrase *pPhrase = p->pRight->pPhrase;
  3877   4283           int nNear = p->nNear;
  3878         -        res = fts3EvalNearTrim2(nNear, aTmp, &aPoslist, &nToken, pPhrase);
         4284  +        res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase);
  3879   4285         }
  3880   4286     
  3881   4287         aPoslist = pExpr->pRight->pPhrase->doclist.pList;
  3882   4288         nToken = pExpr->pRight->pPhrase->nToken;
  3883   4289         for(p=pExpr->pLeft; p && res; p=p->pLeft){
  3884   4290           int nNear = p->pParent->nNear;
  3885   4291           Fts3Phrase *pPhrase = (
  3886   4292               p->eType==FTSQUERY_NEAR ? p->pRight->pPhrase : p->pPhrase
  3887   4293           );
  3888         -        res = fts3EvalNearTrim2(nNear, aTmp, &aPoslist, &nToken, pPhrase);
         4294  +        res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase);
  3889   4295         }
  3890   4296       }
  3891   4297   
  3892   4298       sqlite3_free(aTmp);
  3893   4299     }
  3894   4300   
  3895   4301     return res;
  3896   4302   }
  3897   4303   
  3898   4304   /*
  3899         -** This macro is used by the fts3EvalNext() function. The two arguments are
  3900         -** 64-bit docid values. If the current query is "ORDER BY docid ASC", then
  3901         -** the macro returns (i1 - i2). Or if it is "ORDER BY docid DESC", then
  3902         -** it returns (i2 - i1). This allows the same code to be used for merging
  3903         -** doclists in ascending or descending order.
         4305  +** This function is a helper function for fts3EvalTestDeferredAndNear().
         4306  +** Assuming no error occurs or has occurred, It returns non-zero if the
         4307  +** expression passed as the second argument matches the row that pCsr 
         4308  +** currently points to, or zero if it does not.
         4309  +**
         4310  +** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
         4311  +** If an error occurs during execution of this function, *pRc is set to 
         4312  +** the appropriate SQLite error code. In this case the returned value is 
         4313  +** undefined.
  3904   4314   */
  3905         -#define DOCID_CMP(i1, i2) ((pCsr->bDesc?-1:1) * (i1-i2))
  3906         -
  3907         -static void fts3EvalNext(
  3908         -  Fts3Cursor *pCsr, 
  3909         -  Fts3Expr *pExpr, 
  3910         -  int *pRc
         4315  +static int fts3EvalTestExpr(
         4316  +  Fts3Cursor *pCsr,               /* FTS cursor handle */
         4317  +  Fts3Expr *pExpr,                /* Expr to test. May or may not be root. */
         4318  +  int *pRc                        /* IN/OUT: Error code */
  3911   4319   ){
  3912         -  if( *pRc==SQLITE_OK ){
  3913         -    assert( pExpr->bEof==0 );
  3914         -    pExpr->bStart = 1;
  3915         -
  3916         -    switch( pExpr->eType ){
  3917         -      case FTSQUERY_NEAR:
  3918         -      case FTSQUERY_AND: {
  3919         -        Fts3Expr *pLeft = pExpr->pLeft;
  3920         -        Fts3Expr *pRight = pExpr->pRight;
  3921         -        assert( !pLeft->bDeferred || !pRight->bDeferred );
  3922         -        if( pLeft->bDeferred ){
  3923         -          fts3EvalNext(pCsr, pRight, pRc);
  3924         -          pExpr->iDocid = pRight->iDocid;
  3925         -          pExpr->bEof = pRight->bEof;
  3926         -        }else if( pRight->bDeferred ){
  3927         -          fts3EvalNext(pCsr, pLeft, pRc);
  3928         -          pExpr->iDocid = pLeft->iDocid;
  3929         -          pExpr->bEof = pLeft->bEof;
  3930         -        }else{
  3931         -          fts3EvalNext(pCsr, pLeft, pRc);
  3932         -          fts3EvalNext(pCsr, pRight, pRc);
  3933         -
  3934         -          while( !pLeft->bEof && !pRight->bEof && *pRc==SQLITE_OK ){
  3935         -            sqlite3_int64 iDiff = DOCID_CMP(pLeft->iDocid, pRight->iDocid);
  3936         -            if( iDiff==0 ) break;
  3937         -            if( iDiff<0 ){
  3938         -              fts3EvalNext(pCsr, pLeft, pRc);
  3939         -            }else{
  3940         -              fts3EvalNext(pCsr, pRight, pRc);
  3941         -            }
  3942         -          }
  3943         -
  3944         -          pExpr->iDocid = pLeft->iDocid;
  3945         -          pExpr->bEof = (pLeft->bEof || pRight->bEof);
  3946         -        }
  3947         -        break;
  3948         -      }
  3949         -  
  3950         -      case FTSQUERY_OR: {
  3951         -        Fts3Expr *pLeft = pExpr->pLeft;
  3952         -        Fts3Expr *pRight = pExpr->pRight;
  3953         -        sqlite3_int64 iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid);
  3954         -
  3955         -        assert( pLeft->bStart || pLeft->iDocid==pRight->iDocid );
  3956         -        assert( pRight->bStart || pLeft->iDocid==pRight->iDocid );
  3957         -
  3958         -        if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){
  3959         -          fts3EvalNext(pCsr, pLeft, pRc);
  3960         -        }else if( pLeft->bEof || (pRight->bEof==0 && iCmp>0) ){
  3961         -          fts3EvalNext(pCsr, pRight, pRc);
  3962         -        }else{
  3963         -          fts3EvalNext(pCsr, pLeft, pRc);
  3964         -          fts3EvalNext(pCsr, pRight, pRc);
  3965         -        }
  3966         -
  3967         -        pExpr->bEof = (pLeft->bEof && pRight->bEof);
  3968         -        iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid);
  3969         -        if( pRight->bEof || (pLeft->bEof==0 &&  iCmp<0) ){
  3970         -          pExpr->iDocid = pLeft->iDocid;
  3971         -        }else{
  3972         -          pExpr->iDocid = pRight->iDocid;
  3973         -        }
  3974         -
  3975         -        break;
  3976         -      }
  3977         -
  3978         -      case FTSQUERY_NOT: {
  3979         -        Fts3Expr *pLeft = pExpr->pLeft;
  3980         -        Fts3Expr *pRight = pExpr->pRight;
  3981         -
  3982         -        if( pRight->bStart==0 ){
  3983         -          fts3EvalNext(pCsr, pRight, pRc);
  3984         -          assert( *pRc!=SQLITE_OK || pRight->bStart );
  3985         -        }
  3986         -
  3987         -        fts3EvalNext(pCsr, pLeft, pRc);
  3988         -        if( pLeft->bEof==0 ){
  3989         -          while( !*pRc 
  3990         -              && !pRight->bEof 
  3991         -              && DOCID_CMP(pLeft->iDocid, pRight->iDocid)>0 
  3992         -          ){
  3993         -            fts3EvalNext(pCsr, pRight, pRc);
  3994         -          }
  3995         -        }
  3996         -        pExpr->iDocid = pLeft->iDocid;
  3997         -        pExpr->bEof = pLeft->bEof;
  3998         -        break;
  3999         -      }
  4000         -
  4001         -      default: {
  4002         -        Fts3Phrase *pPhrase = pExpr->pPhrase;
  4003         -        fts3EvalZeroPoslist(pPhrase);
  4004         -        *pRc = fts3EvalPhraseNext(pCsr, pPhrase, &pExpr->bEof);
  4005         -        pExpr->iDocid = pPhrase->doclist.iDocid;
  4006         -        break;
  4007         -      }
  4008         -    }
  4009         -  }
  4010         -}
  4011         -
  4012         -static int fts3EvalDeferredTest(Fts3Cursor *pCsr, Fts3Expr *pExpr, int *pRc){
  4013         -  int bHit = 1;
         4320  +  int bHit = 1;                   /* Return value */
  4014   4321     if( *pRc==SQLITE_OK ){
  4015   4322       switch( pExpr->eType ){
  4016   4323         case FTSQUERY_NEAR:
  4017   4324         case FTSQUERY_AND:
  4018   4325           bHit = (
  4019         -            fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc)
  4020         -         && fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc)
         4326  +            fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc)
         4327  +         && fts3EvalTestExpr(pCsr, pExpr->pRight, pRc)
  4021   4328            && fts3EvalNearTest(pExpr, pRc)
  4022   4329           );
  4023   4330   
  4024   4331           /* If the NEAR expression does not match any rows, zero the doclist for 
  4025   4332           ** all phrases involved in the NEAR. This is because the snippet(),
  4026   4333           ** offsets() and matchinfo() functions are not supposed to recognize 
  4027   4334           ** any instances of phrases that are part of unmatched NEAR queries. 
................................................................................
  4039   4346           if( bHit==0 
  4040   4347            && pExpr->eType==FTSQUERY_NEAR 
  4041   4348            && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR)
  4042   4349           ){
  4043   4350             Fts3Expr *p;
  4044   4351             for(p=pExpr; p->pPhrase==0; p=p->pLeft){
  4045   4352               if( p->pRight->iDocid==pCsr->iPrevId ){
  4046         -              fts3EvalZeroPoslist(p->pRight->pPhrase);
         4353  +              fts3EvalInvalidatePoslist(p->pRight->pPhrase);
  4047   4354               }
  4048   4355             }
  4049   4356             if( p->iDocid==pCsr->iPrevId ){
  4050         -            fts3EvalZeroPoslist(p->pPhrase);
         4357  +            fts3EvalInvalidatePoslist(p->pPhrase);
  4051   4358             }
  4052   4359           }
  4053   4360   
  4054   4361           break;
  4055   4362   
  4056   4363         case FTSQUERY_OR: {
  4057         -        int bHit1 = fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc);
  4058         -        int bHit2 = fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc);
         4364  +        int bHit1 = fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc);
         4365  +        int bHit2 = fts3EvalTestExpr(pCsr, pExpr->pRight, pRc);
  4059   4366           bHit = bHit1 || bHit2;
  4060   4367           break;
  4061   4368         }
  4062   4369   
  4063   4370         case FTSQUERY_NOT:
  4064   4371           bHit = (
  4065         -            fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc)
  4066         -         && !fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc)
         4372  +            fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc)
         4373  +         && !fts3EvalTestExpr(pCsr, pExpr->pRight, pRc)
  4067   4374           );
  4068   4375           break;
  4069   4376   
  4070   4377         default: {
  4071   4378           if( pCsr->pDeferred 
  4072   4379            && (pExpr->iDocid==pCsr->iPrevId || pExpr->bDeferred)
  4073   4380           ){
  4074   4381             Fts3Phrase *pPhrase = pExpr->pPhrase;
  4075   4382             assert( pExpr->bDeferred || pPhrase->doclist.bFreeList==0 );
  4076   4383             if( pExpr->bDeferred ){
  4077         -            fts3EvalZeroPoslist(pPhrase);
         4384  +            fts3EvalInvalidatePoslist(pPhrase);
  4078   4385             }
  4079   4386             *pRc = fts3EvalDeferredPhrase(pCsr, pPhrase);
  4080   4387             bHit = (pPhrase->doclist.pList!=0);
  4081   4388             pExpr->iDocid = pCsr->iPrevId;
  4082   4389           }else{
  4083   4390             bHit = (pExpr->bEof==0 && pExpr->iDocid==pCsr->iPrevId);
  4084   4391           }
................................................................................
  4086   4393         }
  4087   4394       }
  4088   4395     }
  4089   4396     return bHit;
  4090   4397   }
  4091   4398   
  4092   4399   /*
  4093         -** Return 1 if both of the following are true:
         4400  +** This function is called as the second part of each xNext operation when
         4401  +** iterating through the results of a full-text query. At this point the
         4402  +** cursor points to a row that matches the query expression, with the
         4403  +** following caveats:
         4404  +**
         4405  +**   * Up until this point, "NEAR" operators in the expression have been
         4406  +**     treated as "AND".
         4407  +**
         4408  +**   * Deferred tokens have not yet been considered.
         4409  +**
         4410  +** If *pRc is not SQLITE_OK when this function is called, it immediately
         4411  +** returns 0. Otherwise, it tests whether or not after considering NEAR
         4412  +** operators and deferred tokens the current row is still a match for the
         4413  +** expression. It returns 1 if both of the following are true:
  4094   4414   **
  4095   4415   **   1. *pRc is SQLITE_OK when this function returns, and
  4096   4416   **
  4097   4417   **   2. After scanning the current FTS table row for the deferred tokens,
  4098         -**      it is determined that the row does not match the query.
         4418  +**      it is determined that the row does *not* match the query.
  4099   4419   **
  4100   4420   ** Or, if no error occurs and it seems the current row does match the FTS
  4101   4421   ** query, return 0.
  4102   4422   */
  4103         -static int fts3EvalLoadDeferred(Fts3Cursor *pCsr, int *pRc){
         4423  +static int fts3EvalTestDeferredAndNear(Fts3Cursor *pCsr, int *pRc){
  4104   4424     int rc = *pRc;
  4105   4425     int bMiss = 0;
  4106   4426     if( rc==SQLITE_OK ){
         4427  +
         4428  +    /* If there are one or more deferred tokens, load the current row into
         4429  +    ** memory and scan it to determine the position list for each deferred
         4430  +    ** token. Then, see if this row is really a match, considering deferred
         4431  +    ** tokens and NEAR operators (neither of which were taken into account
         4432  +    ** earlier, by fts3EvalNextRow()). 
         4433  +    */
  4107   4434       if( pCsr->pDeferred ){
  4108   4435         rc = fts3CursorSeek(0, pCsr);
  4109   4436         if( rc==SQLITE_OK ){
  4110   4437           rc = sqlite3Fts3CacheDeferredDoclists(pCsr);
  4111   4438         }
  4112   4439       }
  4113         -    bMiss = (0==fts3EvalDeferredTest(pCsr, pCsr->pExpr, &rc));
         4440  +    bMiss = (0==fts3EvalTestExpr(pCsr, pCsr->pExpr, &rc));
         4441  +
         4442  +    /* Free the position-lists accumulated for each deferred token above. */
  4114   4443       sqlite3Fts3FreeDeferredDoclists(pCsr);
  4115   4444       *pRc = rc;
  4116   4445     }
  4117   4446     return (rc==SQLITE_OK && bMiss);
  4118   4447   }
  4119   4448   
  4120   4449   /*
  4121   4450   ** Advance to the next document that matches the FTS expression in
  4122   4451   ** Fts3Cursor.pExpr.
  4123   4452   */
  4124         -int sqlite3Fts3EvalNext(Fts3Cursor *pCsr){
         4453  +static int fts3EvalNext(Fts3Cursor *pCsr){
  4125   4454     int rc = SQLITE_OK;             /* Return Code */
  4126   4455     Fts3Expr *pExpr = pCsr->pExpr;
  4127   4456     assert( pCsr->isEof==0 );
  4128   4457     if( pExpr==0 ){
  4129   4458       pCsr->isEof = 1;
  4130   4459     }else{
  4131   4460       do {
  4132   4461         if( pCsr->isRequireSeek==0 ){
  4133   4462           sqlite3_reset(pCsr->pStmt);
  4134   4463         }
  4135   4464         assert( sqlite3_data_count(pCsr->pStmt)==0 );
  4136         -      fts3EvalNext(pCsr, pExpr, &rc);
         4465  +      fts3EvalNextRow(pCsr, pExpr, &rc);
  4137   4466         pCsr->isEof = pExpr->bEof;
  4138   4467         pCsr->isRequireSeek = 1;
  4139   4468         pCsr->isMatchinfoNeeded = 1;
  4140   4469         pCsr->iPrevId = pExpr->iDocid;
  4141         -    }while( pCsr->isEof==0 && fts3EvalLoadDeferred(pCsr, &rc) );
         4470  +    }while( pCsr->isEof==0 && fts3EvalTestDeferredAndNear(pCsr, &rc) );
  4142   4471     }
  4143   4472     return rc;
  4144   4473   }
  4145   4474   
  4146   4475   /*
  4147   4476   ** Restart interation for expression pExpr so that the next call to
  4148         -** sqlite3Fts3EvalNext() visits the first row. Do not allow incremental 
         4477  +** fts3EvalNext() visits the first row. Do not allow incremental 
  4149   4478   ** loading or merging of phrase doclists for this iteration.
  4150   4479   **
  4151   4480   ** If *pRc is other than SQLITE_OK when this function is called, it is
  4152   4481   ** a no-op. If an error occurs within this function, *pRc is set to an
  4153   4482   ** SQLite error code before returning.
  4154   4483   */
  4155   4484   static void fts3EvalRestart(
................................................................................
  4157   4486     Fts3Expr *pExpr,
  4158   4487     int *pRc
  4159   4488   ){
  4160   4489     if( pExpr && *pRc==SQLITE_OK ){
  4161   4490       Fts3Phrase *pPhrase = pExpr->pPhrase;
  4162   4491   
  4163   4492       if( pPhrase ){
  4164         -      fts3EvalZeroPoslist(pPhrase);
         4493  +      fts3EvalInvalidatePoslist(pPhrase);
  4165   4494         if( pPhrase->bIncr ){
  4166   4495           assert( pPhrase->nToken==1 );
  4167   4496           assert( pPhrase->aToken[0].pSegcsr );
  4168   4497           sqlite3Fts3MsrIncrRestart(pPhrase->aToken[0].pSegcsr);
  4169   4498           *pRc = fts3EvalPhraseStart(pCsr, 0, pPhrase);
  4170   4499         }
  4171   4500   
................................................................................
  4273   4602   
  4274   4603         do {
  4275   4604           /* Ensure the %_content statement is reset. */
  4276   4605           if( pCsr->isRequireSeek==0 ) sqlite3_reset(pCsr->pStmt);
  4277   4606           assert( sqlite3_data_count(pCsr->pStmt)==0 );
  4278   4607   
  4279   4608           /* Advance to the next document */
  4280         -        fts3EvalNext(pCsr, pRoot, &rc);
         4609  +        fts3EvalNextRow(pCsr, pRoot, &rc);
  4281   4610           pCsr->isEof = pRoot->bEof;
  4282   4611           pCsr->isRequireSeek = 1;
  4283   4612           pCsr->isMatchinfoNeeded = 1;
  4284   4613           pCsr->iPrevId = pRoot->iDocid;
  4285   4614         }while( pCsr->isEof==0 
  4286   4615              && pRoot->eType==FTSQUERY_NEAR 
  4287         -           && fts3EvalLoadDeferred(pCsr, &rc) 
         4616  +           && fts3EvalTestDeferredAndNear(pCsr, &rc) 
  4288   4617         );
  4289   4618   
  4290   4619         if( rc==SQLITE_OK && pCsr->isEof==0 ){
  4291   4620           fts3EvalUpdateCounts(pRoot);
  4292   4621         }
  4293   4622       }
  4294   4623   
................................................................................
  4302   4631         ** order. For this reason, even though it seems more defensive, the 
  4303   4632         ** do loop can not be written:
  4304   4633         **
  4305   4634         **   do {...} while( pRoot->iDocid<iDocid && rc==SQLITE_OK );
  4306   4635         */
  4307   4636         fts3EvalRestart(pCsr, pRoot, &rc);
  4308   4637         do {
  4309         -        fts3EvalNext(pCsr, pRoot, &rc);
         4638  +        fts3EvalNextRow(pCsr, pRoot, &rc);
  4310   4639           assert( pRoot->bEof==0 );
  4311   4640         }while( pRoot->iDocid!=iDocid && rc==SQLITE_OK );
  4312         -      fts3EvalLoadDeferred(pCsr, &rc);
         4641  +      fts3EvalTestDeferredAndNear(pCsr, &rc);
  4313   4642       }
  4314   4643     }
  4315   4644     return rc;
  4316   4645   }
  4317   4646   
  4318   4647   /*
  4319   4648   ** This function is used by the matchinfo() module to query a phrase 
................................................................................
  4436   4765   **   * the contents of pPhrase->doclist, and
  4437   4766   **   * any Fts3MultiSegReader objects held by phrase tokens.
  4438   4767   */
  4439   4768   void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *pPhrase){
  4440   4769     if( pPhrase ){
  4441   4770       int i;
  4442   4771       sqlite3_free(pPhrase->doclist.aAll);
  4443         -    fts3EvalZeroPoslist(pPhrase);
         4772  +    fts3EvalInvalidatePoslist(pPhrase);
  4444   4773       memset(&pPhrase->doclist, 0, sizeof(Fts3Doclist));
  4445   4774       for(i=0; i<pPhrase->nToken; i++){
  4446   4775         fts3SegReaderCursorFree(pPhrase->aToken[i].pSegcsr);
  4447   4776         pPhrase->aToken[i].pSegcsr = 0;
  4448   4777       }
  4449   4778     }
  4450   4779   }
         4780  +
         4781  +#if !SQLITE_CORE
         4782  +/*
         4783  +** Initialize API pointer table, if required.
         4784  +*/
         4785  +int sqlite3_extension_init(
         4786  +  sqlite3 *db, 
         4787  +  char **pzErrMsg,
         4788  +  const sqlite3_api_routines *pApi
         4789  +){
         4790  +  SQLITE_EXTENSION_INIT2(pApi)
         4791  +  return sqlite3Fts3Init(db);
         4792  +}
         4793  +#endif
  4451   4794   
  4452   4795   #endif

Changes to ext/fts3/fts3Int.h.

   496    496     int nTerm,                      /* Size of zTerm in bytes */
   497    497     int isPrefix,                   /* True for a prefix search */
   498    498     Fts3MultiSegReader **ppSegcsr   /* OUT: Allocated seg-reader cursor */
   499    499   );
   500    500   
   501    501   void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *);
   502    502   
   503         -int sqlite3Fts3EvalStart(Fts3Cursor *, Fts3Expr *, int);
   504         -int sqlite3Fts3EvalNext(Fts3Cursor *pCsr);
   505         -
   506    503   int sqlite3Fts3MsrIncrStart(
   507    504       Fts3Table*, Fts3MultiSegReader*, int, const char*, int);
   508    505   int sqlite3Fts3MsrIncrNext(
   509    506       Fts3Table *, Fts3MultiSegReader *, sqlite3_int64 *, char **, int *);
   510    507   char *sqlite3Fts3EvalPhrasePoslist(Fts3Cursor *, Fts3Expr *, int iCol); 
   511    508   int sqlite3Fts3MsrOvfl(Fts3Cursor *, Fts3MultiSegReader *, int *);
   512    509   int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr);
   513    510   
   514    511   int sqlite3Fts3DeferredTokenList(Fts3DeferredToken *, char **, int *);
   515    512   
   516    513   #endif /* SQLITE_ENABLE_FTS3 */
   517    514   #endif /* _FTSINT_H */