SQLite

Check-in [f25cfa1aec]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:The FTS1 tables have a new automatic column named "offset" that returns a string containing byte offset information for all matching terms. Also added a large test case based on SQLite mailing list entries. (CVS 3417)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: f25cfa1aec0e4c1fe07176039a1b7f4e6a2c66ec
User & Date: drh 2006-09-14 01:17:31.000
Context
2006-09-14
13:47
Enhanced I/O error simulation. (CVS 3418) (check-in: 86931854fc user: drh tags: trunk)
01:17
The FTS1 tables have a new automatic column named "offset" that returns a string containing byte offset information for all matching terms. Also added a large test case based on SQLite mailing list entries. (CVS 3417) (check-in: f25cfa1aec user: drh tags: trunk)
2006-09-13
20:22
Modify the ".dump" command in the command-line shell so that it works with virtual tables. (CVS 3416) (check-in: afd40184b7 user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts1/fts1.c.
37
38
39
40
41
42
43
44

45
46
47
48
49

50
51
52
53
54
55


56





57
58
59
60
61
62
63
#else
# define TRACE(A)
#endif

/* utility functions */

typedef struct StringBuffer {
  int len;  /* length, not including null terminator */

  char *s;
} StringBuffer;

void initStringBuffer(StringBuffer *sb){
  sb->len = 0;

  sb->s = malloc(1);
  sb->s[0] = '\0';
}

void append(StringBuffer *sb, const char *zFrom){
  int nFrom = strlen(zFrom);


  sb->s = realloc(sb->s, sb->len + nFrom + 1);





  strcpy(sb->s + sb->len, zFrom);
  sb->len += nFrom;
}

/* We encode variable-length integers in little-endian order using seven bits
 * per byte as follows:
**







|
>
|




>
|





>
>
|
>
>
>
>
>







37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#else
# define TRACE(A)
#endif

/* utility functions */

typedef struct StringBuffer {
  int len;      /* length, not including null terminator */
  int alloced;  /* Space allocated for s[] */ 
  char *s;      /* Content of the string */
} StringBuffer;

void initStringBuffer(StringBuffer *sb){
  sb->len = 0;
  sb->alloced = 100;
  sb->s = malloc(100);
  sb->s[0] = '\0';
}

void append(StringBuffer *sb, const char *zFrom){
  int nFrom = strlen(zFrom);
  if( sb->len + nFrom >= sb->alloced ){
    sb->alloced = sb->len + nFrom + 100;
    sb->s = realloc(sb->s, sb->alloced+1);
    if( sb->s==0 ){
      initStringBuffer(sb);
      return;
    }
  }
  strcpy(sb->s + sb->len, zFrom);
  sb->len += nFrom;
}

/* We encode variable-length integers in little-endian order using seven bits
 * per byte as follows:
**
841
842
843
844
845
846
847

848
849
850
851
852
853
854
typedef struct fulltext_vtab fulltext_vtab;

/* A single term in a query is represented by an instances of
** the following structure.
*/
typedef struct QueryTerm {
  short int nPhrase; /* How many following terms are part of the same phrase */

  short int iColumn; /* Column of the index that must match this term */
  signed char isOr;  /* this term is preceded by "OR" */
  signed char isNot; /* this term is preceded by "-" */
  char *pTerm;       /* text of the term.  '\000' terminated.  malloced */
  int nTerm;         /* Number of bytes in pTerm[] */
} QueryTerm;








>







850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
typedef struct fulltext_vtab fulltext_vtab;

/* A single term in a query is represented by an instances of
** the following structure.
*/
typedef struct QueryTerm {
  short int nPhrase; /* How many following terms are part of the same phrase */
  short int iPhrase; /* This is the i-th term of a phrase. */
  short int iColumn; /* Column of the index that must match this term */
  signed char isOr;  /* this term is preceded by "OR" */
  signed char isNot; /* this term is preceded by "-" */
  char *pTerm;       /* text of the term.  '\000' terminated.  malloced */
  int nTerm;         /* Number of bytes in pTerm[] */
} QueryTerm;

886
887
888
889
890
891
892


















893
894
895
896
897
898
899
  int nTerms;           /* Number of terms in the query */
  QueryTerm *pTerms;    /* Array of terms.  Space obtained from malloc() */
  int nextIsOr;         /* Set the isOr flag on the next inserted term */
  int nextColumn;       /* Next word parsed must be in this column */
  int dfltColumn;       /* The default column */
} Query;




















typedef enum QueryType {
  QUERY_GENERIC,   /* table scan */
  QUERY_ROWID,     /* lookup by rowid */
  QUERY_FULLTEXT   /* QUERY_FULLTEXT + [i] is a full-text search for column i*/
} QueryType;








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
  int nTerms;           /* Number of terms in the query */
  QueryTerm *pTerms;    /* Array of terms.  Space obtained from malloc() */
  int nextIsOr;         /* Set the isOr flag on the next inserted term */
  int nextColumn;       /* Next word parsed must be in this column */
  int dfltColumn;       /* The default column */
} Query;


/*
** An instance of the following structure keeps track of generated
** matching-word offset information and snippets.
*/
typedef struct Snippet {
  int nMatch;     /* Total number of matches */
  int nAlloc;     /* Space allocated for aMatch[] */
  struct {        /* One entry for each matching term */
    int iCol;        /* The column that contains the match */
    int iTerm;       /* The index in Query.pTerms[] of the matching term */
    int iStart;      /* The offset to the first character of the term */
    int nByte;       /* Number of bytes in the term */
  } *aMatch;      /* Points to space obtained from malloc */
  char *zOffset;  /* Text rendering of aMatch[] */
  int nOffset;    /* strlen(zOffset) */
} Snippet;


typedef enum QueryType {
  QUERY_GENERIC,   /* table scan */
  QUERY_ROWID,     /* lookup by rowid */
  QUERY_FULLTEXT   /* QUERY_FULLTEXT + [i] is a full-text search for column i*/
} QueryType;

970
971
972
973
974
975
976
977
978
979
980


981
982
983
984
985
986
987
/*
** When the core wants to do a query, it create a cursor using a
** call to xOpen.  This structure is an instance of a cursor.  It
** is destroyed by xClose.
*/
typedef struct fulltext_cursor {
  sqlite3_vtab_cursor base;        /* Base class used by SQLite core */
  QueryType iCursorType;           /* Type of cursor */
  sqlite3_stmt *pStmt;             /* Prepared statement in use by the cursor */
  int eof;                         /* True if at End Of Results */
  Query q;                         /* Parsed query string */


  DocListReader result;  /* used when iCursorType == QUERY_FULLTEXT */ 
} fulltext_cursor;

static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){
  return (fulltext_vtab *) c->base.pVtab;
}








|



>
>







998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
/*
** When the core wants to do a query, it create a cursor using a
** call to xOpen.  This structure is an instance of a cursor.  It
** is destroyed by xClose.
*/
typedef struct fulltext_cursor {
  sqlite3_vtab_cursor base;        /* Base class used by SQLite core */
  QueryType iCursorType;           /* Copy of sqlite3_index_info.idxNum */
  sqlite3_stmt *pStmt;             /* Prepared statement in use by the cursor */
  int eof;                         /* True if at End Of Results */
  Query q;                         /* Parsed query string */
  Snippet snippet;                 /* Cached snippet for the current row */
  int iColumn;                     /* Column being searched */
  DocListReader result;  /* used when iCursorType == QUERY_FULLTEXT */ 
} fulltext_cursor;

static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){
  return (fulltext_vtab *) c->base.pVtab;
}

1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
*/
typedef struct TableSpec {
  const char *zName;       /* Name of the full-text index */
  int nColumn;             /* Number of columns to be indexed */
  char **azColumn;         /* Original names of columns to be indexed */
  char *zColumnList;       /* Comma-separated list of names for %_content */
  char **azTokenizer;      /* Name of tokenizer and its arguments */
  char **azDelimiter;      /* Delimiters used for snippets */
} TableSpec;

/*
** Reclaim all of the memory used by a TableSpec
*/
void clearTableSpec(TableSpec *p) {
  free(p->azColumn);
  free(p->zColumnList);
  free(p->azTokenizer);
  free(p->azDelimiter);
}

/* Parse a CREATE VIRTUAL TABLE statement, which looks like this:
 *
 * CREATE VIRTUAL TABLE email
 *        USING fts1(subject, body, tokenize mytokenizer(myarg))
 *
 * We return parsed information in a TableSpec structure.
 * 
 */
int parseSpec(TableSpec *pSpec, int argc, const char *const*argv, char**pzErr){
  int i, j, n;
  char *z, *zDummy;
  char **azArg;
  const char *zTokenizer = 0;    /* argv[] entry describing the tokenizer */
  const char *zDelimiter = 0;    /* argv[] entry describing the delimiters */

  assert( argc>=3 );
  /* Current interface:
  ** argv[0] - module name
  ** argv[1] - database name
  ** argv[2] - table name
  ** argv[3..] - columns, optionally followed by tokenizer specification







<









<















<







1627
1628
1629
1630
1631
1632
1633

1634
1635
1636
1637
1638
1639
1640
1641
1642

1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657

1658
1659
1660
1661
1662
1663
1664
*/
typedef struct TableSpec {
  const char *zName;       /* Name of the full-text index */
  int nColumn;             /* Number of columns to be indexed */
  char **azColumn;         /* Original names of columns to be indexed */
  char *zColumnList;       /* Comma-separated list of names for %_content */
  char **azTokenizer;      /* Name of tokenizer and its arguments */

} TableSpec;

/*
** Reclaim all of the memory used by a TableSpec
*/
void clearTableSpec(TableSpec *p) {
  free(p->azColumn);
  free(p->zColumnList);
  free(p->azTokenizer);

}

/* Parse a CREATE VIRTUAL TABLE statement, which looks like this:
 *
 * CREATE VIRTUAL TABLE email
 *        USING fts1(subject, body, tokenize mytokenizer(myarg))
 *
 * We return parsed information in a TableSpec structure.
 * 
 */
int parseSpec(TableSpec *pSpec, int argc, const char *const*argv, char**pzErr){
  int i, j, n;
  char *z, *zDummy;
  char **azArg;
  const char *zTokenizer = 0;    /* argv[] entry describing the tokenizer */


  assert( argc>=3 );
  /* Current interface:
  ** argv[0] - module name
  ** argv[1] - database name
  ** argv[2] - table name
  ** argv[3..] - columns, optionally followed by tokenizer specification
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
  /* Identify the column names and the tokenizer and delimiter arguments
  ** in the argv[][] array.
  */
  pSpec->zName = azArg[2];
  pSpec->nColumn = 0;
  pSpec->azColumn = azArg;
  zTokenizer = "tokenize simple";
  zDelimiter = "delimiters('[',']','...')";
  n = 0;
  for(i=3, j=0; i<argc; ++i){
    if( startsWith(azArg[i],"tokenize") ){
      zTokenizer = azArg[i];
    }else if( startsWith(azArg[i],"delimiters") ){
      zDelimiter = azArg[i];
    }else{
      z = azArg[pSpec->nColumn] = firstToken(azArg[i], &zDummy);
      pSpec->nColumn++;
      n += strlen(z) + 6;
    }
  }
  if( pSpec->nColumn==0 ){







<




<
<







1687
1688
1689
1690
1691
1692
1693

1694
1695
1696
1697


1698
1699
1700
1701
1702
1703
1704
  /* Identify the column names and the tokenizer and delimiter arguments
  ** in the argv[][] array.
  */
  pSpec->zName = azArg[2];
  pSpec->nColumn = 0;
  pSpec->azColumn = azArg;
  zTokenizer = "tokenize simple";

  n = 0;
  for(i=3, j=0; i<argc; ++i){
    if( startsWith(azArg[i],"tokenize") ){
      zTokenizer = azArg[i];


    }else{
      z = azArg[pSpec->nColumn] = firstToken(azArg[i], &zDummy);
      pSpec->nColumn++;
      n += strlen(z) + 6;
    }
  }
  if( pSpec->nColumn==0 ){
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759

  /*
  ** Parse the tokenizer specification string.
  */
  pSpec->azTokenizer = tokenizeString(zTokenizer, &n);
  tokenListToIdList(pSpec->azTokenizer);

  /*
  ** Parse the delimiter specification string.
  */
  pSpec->azDelimiter = tokenizeString(zDelimiter, &n);
  tokenListToIdList(pSpec->azDelimiter);

  return SQLITE_OK;
}

/*
** Generate a CREATE TABLE statement that describes the schema of
** the virtual table.  Return a pointer to this schema.  
**
** If the addAllColumn parameter is true, then add a column named
** "_all" to the end of the schema.
**
** Space is obtained from sqlite3_mprintf() and should be freed
** using sqlite3_free().
*/
static char *fulltextSchema(
  int nColumn,                  /* Number of columns */
  const char *const* azColumn   /* List of columns */
){
  int i;
  char *zSchema, *zNext;
  const char *zSep = "(";
  zSchema = sqlite3_mprintf("CREATE TABLE x");
  for(i=0; i<nColumn; i++){
    zNext = sqlite3_mprintf("%s%s%Q", zSchema, zSep, azColumn[i]);
    sqlite3_free(zSchema);
    zSchema = zNext;
    zSep = ",";
  }
  zNext = sqlite3_mprintf("%s,_all)", zSchema);
  sqlite3_free(zSchema);
  return zNext;
}

/*
** Build a new sqlite3_vtab structure that will describe the
** fulltext index defined by spec.







<
<
<
<
<
<








|


















|







1736
1737
1738
1739
1740
1741
1742






1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777

  /*
  ** Parse the tokenizer specification string.
  */
  pSpec->azTokenizer = tokenizeString(zTokenizer, &n);
  tokenListToIdList(pSpec->azTokenizer);







  return SQLITE_OK;
}

/*
** Generate a CREATE TABLE statement that describes the schema of
** the virtual table.  Return a pointer to this schema.  
**
** If the addAllColumn parameter is true, then add a column named
** "_all" to the end of the schema.  Also add the "offset" column.
**
** Space is obtained from sqlite3_mprintf() and should be freed
** using sqlite3_free().
*/
static char *fulltextSchema(
  int nColumn,                  /* Number of columns */
  const char *const* azColumn   /* List of columns */
){
  int i;
  char *zSchema, *zNext;
  const char *zSep = "(";
  zSchema = sqlite3_mprintf("CREATE TABLE x");
  for(i=0; i<nColumn; i++){
    zNext = sqlite3_mprintf("%s%s%Q", zSchema, zSep, azColumn[i]);
    sqlite3_free(zSchema);
    zSchema = zNext;
    zSep = ",";
  }
  zNext = sqlite3_mprintf("%s,_all,offset)", zSchema);
  sqlite3_free(zSchema);
  return zNext;
}

/*
** Build a new sqlite3_vtab structure that will describe the
** fulltext index defined by spec.
1971
1972
1973
1974
1975
1976
1977



































































































































































1978
1979
1980
1981
1982
1983
1984
1985
1986
1987

1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000

2001
2002
2003
2004
2005
2006
2007
  int i;
  for(i = 0; i < q->nTerms; ++i){
    free(q->pTerms[i].pTerm);
  }
  free(q->pTerms);
  memset(q, 0, sizeof(*q));
}




































































































































































/*
** Close the cursor.  For additional information see the documentation
** on the xClose method of the virtual table interface.
*/
static int fulltextClose(sqlite3_vtab_cursor *pCursor){
  fulltext_cursor *c = (fulltext_cursor *) pCursor;
  TRACE(("FTS1 Close %p\n", c));
  sqlite3_finalize(c->pStmt);
  queryClear(&c->q);

  if( c->result.pDoclist!=NULL ){
    docListDelete(c->result.pDoclist);
  }
  free(c);
  return SQLITE_OK;
}

static int fulltextNext(sqlite3_vtab_cursor *pCursor){
  fulltext_cursor *c = (fulltext_cursor *) pCursor;
  sqlite_int64 iDocid;
  int rc;

  TRACE(("FTS1 Next %p\n", pCursor));

  if( c->iCursorType < QUERY_FULLTEXT ){
    /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
    rc = sqlite3_step(c->pStmt);
    switch( rc ){
      case SQLITE_ROW:
        c->eof = 0;
        return SQLITE_OK;







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>










>













>







1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
  int i;
  for(i = 0; i < q->nTerms; ++i){
    free(q->pTerms[i].pTerm);
  }
  free(q->pTerms);
  memset(q, 0, sizeof(*q));
}

/* Free all of the dynamically allocated memory held by the
** Snippet
*/
static void snippetClear(Snippet *p){
  free(p->aMatch);
  free(p->zOffset);
  memset(p, 0, sizeof(*p));
}
/*
** Append a single entry to the p->aMatch[] log.
*/
static void snippetAppendMatch(
  Snippet *p,               /* Append the entry to this snippet */
  int iCol, int iTerm,      /* The column and query term */
  int iStart, int nByte     /* Offset and size of the match */
){
  int i;
  if( p->nMatch+1>=p->nAlloc ){
    p->nAlloc = p->nAlloc*2 + 10;
    p->aMatch = realloc(p->aMatch, p->nAlloc*sizeof(p->aMatch[0]) );
    if( p->aMatch==0 ){
      p->nMatch = 0;
      p->nAlloc = 0;
      return;
    }
  }
  i = p->nMatch++;
  p->aMatch[i].iCol = iCol;
  p->aMatch[i].iTerm = iTerm;
  p->aMatch[i].iStart = iStart;
  p->aMatch[i].nByte = nByte;
}

/*
** Sizing information for the circular buffer used in snippetOffsetsOfColumn()
*/
#define FTS1_ROTOR_SZ   (32)
#define FTS1_ROTOR_MASK (FTS1_ROTOR_SZ-1)

/*
** Add entries to pSnippet->aMatch[] for every match that occurs against
** document zDoc[0..nDoc-1] which is stored in column iColumn.
*/
static void snippetOffsetsOfColumn(
  Query *pQuery,
  Snippet *pSnippet,
  int iColumn,
  const char *zDoc,
  int nDoc
){
  const sqlite3_tokenizer_module *pTModule;  /* The tokenizer module */
  sqlite3_tokenizer *pTokenizer;             /* The specific tokenizer */
  sqlite3_tokenizer_cursor *pTCursor;        /* Tokenizer cursor */
  fulltext_vtab *pVtab;                /* The full text index */
  int nColumn;                         /* Number of columns in the index */
  const QueryTerm *aTerm;              /* Query string terms */
  int nTerm;                           /* Number of query string terms */  
  int i, j;                            /* Loop counters */
  int rc;                              /* Return code */
  unsigned int match, prevMatch;       /* Phrase search bitmasks */
  const char *zToken;                  /* Next token from the tokenizer */
  int nToken;                          /* Size of zToken */
  int iBegin, iEnd, iPos;              /* Offsets of beginning and end */

  /* The following variables keep a circular buffer of the last
  ** few tokens */
  unsigned int iRotor = 0;             /* Index of current token */
  int iRotorBegin[FTS1_ROTOR_SZ];      /* Beginning offset of token */
  int iRotorLen[FTS1_ROTOR_SZ];        /* Length of token */

  pVtab = pQuery->pFts;
  nColumn = pVtab->nColumn;
  pTokenizer = pVtab->pTokenizer;
  pTModule = pTokenizer->pModule;
  rc = pTModule->xOpen(pTokenizer, zDoc, nDoc, &pTCursor);
  if( rc ) return;
  pTCursor->pTokenizer = pTokenizer;
  aTerm = pQuery->pTerms;
  nTerm = pQuery->nTerms;
  if( nTerm>=FTS1_ROTOR_SZ ){
    nTerm = FTS1_ROTOR_SZ - 1;
  }
  prevMatch = 0;
  while(1){
    rc = pTModule->xNext(pTCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos);
    if( rc ) break;
    iRotorBegin[iRotor&FTS1_ROTOR_MASK] = iBegin;
    iRotorLen[iRotor&FTS1_ROTOR_MASK] = iEnd-iBegin;
    match = 0;
    for(i=0; i<nTerm; i++){
      int iCol;
      iCol = aTerm[i].iColumn;
      if( iCol>=0 && iCol<nColumn && iCol!=iColumn ) continue;
      if( aTerm[i].nTerm!=nToken ) continue;
      if( memcmp(aTerm[i].pTerm, zToken, nToken) ) continue;
      if( aTerm[i].iPhrase>1 && (prevMatch & (1<<i))==0 ) continue;
      match |= 1<<i;
      if( i==nTerm-1 || aTerm[i+1].iPhrase==1 ){
        for(j=aTerm[i].iPhrase-1; j>=0; j--){
          int k = (iRotor-j) & FTS1_ROTOR_MASK;
          snippetAppendMatch(pSnippet, iColumn, i-j,
                iRotorBegin[k], iRotorLen[k]);
        }
      }
    }
    prevMatch = match<<1;
    iRotor++;
  }
  pTModule->xClose(pTCursor);  
}


/*
** Compute all offsets for the current row of the query.  
** If the offsets have already been computed, this routine is a no-op.
*/
static void snippetAllOffsets(fulltext_cursor *p){
  int nColumn;
  int iColumn, i;
  int iFirst, iLast;
  fulltext_vtab *pFts;

  if( p->snippet.nMatch ) return;
  if( p->q.nTerms==0 ) return;
  pFts = p->q.pFts;
  nColumn = pFts->nColumn;
  iColumn = p->iCursorType;
  if( iColumn<0 || iColumn>=nColumn ){
    iFirst = 0;
    iLast = nColumn-1;
  }else{
    iFirst = iColumn;
    iLast = iColumn;
  }
  for(i=iFirst; i<=iLast; i++){
    const char *zDoc;
    int nDoc;
    zDoc = (const char*)sqlite3_column_text(p->pStmt, i+1);
    nDoc = sqlite3_column_bytes(p->pStmt, i+1);
    snippetOffsetsOfColumn(&p->q, &p->snippet, i, zDoc, nDoc);
  }
}

/*
** Convert the information in the aMatch[] array of the snippet
** into the string zOffset[0..nOffset-1].
*/
static void snippetOffsetText(Snippet *p){
  int i;
  StringBuffer sb;
  char zBuf[200];
  if( p->zOffset ) return;
  initStringBuffer(&sb);
  for(i=0; i<p->nMatch; i++){
    zBuf[0] = ' ';
    sprintf(&zBuf[i>0], "%d %d %d %d", p->aMatch[i].iCol,
        p->aMatch[i].iTerm, p->aMatch[i].iStart, p->aMatch[i].nByte);
    append(&sb, zBuf);
  }
  p->zOffset = sb.s;
  p->nOffset = sb.len;
}

/*
** Close the cursor.  For additional information see the documentation
** on the xClose method of the virtual table interface.
*/
static int fulltextClose(sqlite3_vtab_cursor *pCursor){
  fulltext_cursor *c = (fulltext_cursor *) pCursor;
  TRACE(("FTS1 Close %p\n", c));
  sqlite3_finalize(c->pStmt);
  queryClear(&c->q);
  snippetClear(&c->snippet);
  if( c->result.pDoclist!=NULL ){
    docListDelete(c->result.pDoclist);
  }
  free(c);
  return SQLITE_OK;
}

static int fulltextNext(sqlite3_vtab_cursor *pCursor){
  fulltext_cursor *c = (fulltext_cursor *) pCursor;
  sqlite_int64 iDocid;
  int rc;

  TRACE(("FTS1 Next %p\n", pCursor));
  snippetClear(&c->snippet);
  if( c->iCursorType < QUERY_FULLTEXT ){
    /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
    rc = sqlite3_step(c->pStmt);
    switch( rc ){
      case SQLITE_ROW:
        c->eof = 0;
        return SQLITE_OK;
2128
2129
2130
2131
2132
2133
2134

2135
2136
2137
2138
2139
2140
2141
  int inPhrase,                           /* True if within "..." */
  Query *pQuery                           /* Append results here */
){
  const sqlite3_tokenizer_module *pModule = pTokenizer->pModule;
  sqlite3_tokenizer_cursor *pCursor;
  int firstIndex = pQuery->nTerms;
  int iCol;

  
  int rc = pModule->xOpen(pTokenizer, pSegment, nSegment, &pCursor);
  if( rc!=SQLITE_OK ) return rc;
  pCursor->pTokenizer = pTokenizer;

  while( 1 ){
    const char *pToken;







>







2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
  int inPhrase,                           /* True if within "..." */
  Query *pQuery                           /* Append results here */
){
  const sqlite3_tokenizer_module *pModule = pTokenizer->pModule;
  sqlite3_tokenizer_cursor *pCursor;
  int firstIndex = pQuery->nTerms;
  int iCol;
  int nTerm = 1;
  
  int rc = pModule->xOpen(pTokenizer, pSegment, nSegment, &pCursor);
  if( rc!=SQLITE_OK ) return rc;
  pCursor->pTokenizer = pTokenizer;

  while( 1 ){
    const char *pToken;
2155
2156
2157
2158
2159
2160
2161




2162
2163
2164
2165
2166
2167
2168
         && pSegment[iBegin]=='O' && pSegment[iBegin+1]=='R' ){
      pQuery->nextIsOr = 1;
      continue;
    }
    queryAdd(pQuery, pToken, nToken);
    if( !inPhrase && iBegin>0 && pSegment[iBegin-1]=='-' ){
      pQuery->pTerms[pQuery->nTerms-1].isNot = 1;




    }
  }

  if( inPhrase && pQuery->nTerms>firstIndex ){
    pQuery->pTerms[firstIndex].nPhrase = pQuery->nTerms - firstIndex - 1;
  }








>
>
>
>







2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
         && pSegment[iBegin]=='O' && pSegment[iBegin+1]=='R' ){
      pQuery->nextIsOr = 1;
      continue;
    }
    queryAdd(pQuery, pToken, nToken);
    if( !inPhrase && iBegin>0 && pSegment[iBegin-1]=='-' ){
      pQuery->pTerms[pQuery->nTerms-1].isNot = 1;
    }
    pQuery->pTerms[pQuery->nTerms-1].iPhrase = nTerm;
    if( inPhrase ){
      nTerm++;
    }
  }

  if( inPhrase && pQuery->nTerms>firstIndex ){
    pQuery->pTerms[firstIndex].nPhrase = pQuery->nTerms - firstIndex - 1;
  }

2352
2353
2354
2355
2356
2357
2358




2359
2360
2361
2362
2363






2364
2365
2366
2367
2368
2369



2370

2371
2372
2373
2374


2375

2376
2377
2378
2379
2380




2381
2382
2383
2384
2385
2386
2387

  rc = fulltextNext(pCursor);

out:
  return rc;
}





static int fulltextEof(sqlite3_vtab_cursor *pCursor){
  fulltext_cursor *c = (fulltext_cursor *) pCursor;
  return c->eof;
}







static int fulltextColumn(sqlite3_vtab_cursor *pCursor,
                          sqlite3_context *pContext, int idxCol){
  fulltext_cursor *c = (fulltext_cursor *) pCursor;
  fulltext_vtab *v = cursor_vtab(c);
  const char *s;




  if( idxCol==v->nColumn ){  /* a request for _all */

    sqlite3_result_null(pContext);
  } else {
    assert( idxCol<v->nColumn );
    s = (const char *) sqlite3_column_text(c->pStmt, idxCol+1);


    sqlite3_result_text(pContext, s, -1, SQLITE_TRANSIENT);

  }

  return SQLITE_OK;
}





static int fulltextRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
  fulltext_cursor *c = (fulltext_cursor *) pCursor;

  *pRowid = sqlite3_column_int64(c->pStmt, 0);
  return SQLITE_OK;
}








>
>
>
>





>
>
>
>
>
>




<

>
>
>
|
>

<
|
|
>
>
|
>

<



>
>
>
>







2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565

2566
2567
2568
2569
2570
2571
2572

2573
2574
2575
2576
2577
2578
2579

2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593

  rc = fulltextNext(pCursor);

out:
  return rc;
}

/* This is the xEof method of the virtual table.  The SQLite core
** calls this routine to find out if it has reached the end of
** a query's results set.
*/
static int fulltextEof(sqlite3_vtab_cursor *pCursor){
  fulltext_cursor *c = (fulltext_cursor *) pCursor;
  return c->eof;
}

/* This is the xColumn method of the virtual table.  The SQLite
** core calls this method during a query when it needs the value
** of a column from the virtual table.  This method needs to use
** one of the sqlite3_result_*() routines to store the requested
** value back in the pContext.
*/
static int fulltextColumn(sqlite3_vtab_cursor *pCursor,
                          sqlite3_context *pContext, int idxCol){
  fulltext_cursor *c = (fulltext_cursor *) pCursor;
  fulltext_vtab *v = cursor_vtab(c);


  if( idxCol<v->nColumn ){
    sqlite3_value *pVal = sqlite3_column_value(c->pStmt, idxCol+1);
    sqlite3_result_value(pContext, pVal);
  }else if( idxCol==v->nColumn ){
    /* The _all column */
    sqlite3_result_null(pContext);

  }else if( idxCol==v->nColumn+1 ){
    /* The offset column */
    snippetAllOffsets(c);
    snippetOffsetText(&c->snippet);
    sqlite3_result_text(pContext, c->snippet.zOffset, c->snippet.nOffset,
                                   SQLITE_STATIC);
  }

  return SQLITE_OK;
}

/* This is the xRowid method.  The SQLite core calls this routine to
** retrive the rowid for the current row of the result set.  The
** rowid should be written to *pRowid.
*/
static int fulltextRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
  fulltext_cursor *c = (fulltext_cursor *) pCursor;

  *pRowid = sqlite3_column_int64(c->pStmt, 0);
  return SQLITE_OK;
}

2585
2586
2587
2588
2589
2590
2591
2592


2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612





2613
2614
2615
2616
2617
2618
2619

  if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){
    return SQLITE_ERROR;   /* an update; not yet supported */
  }

  /* ppArg[1] = rowid
   * ppArg[2..2+v->nColumn-1] = values
   * ppArg[2+v->nColumn] = value for _all (we ignore this) */


  assert( nArg==2+v->nColumn+1);    

  return index_insert(v, ppArg[1], &ppArg[2], pRowid);
}

static const sqlite3_module fulltextModule = {
  0,
  fulltextCreate,
  fulltextConnect,
  fulltextBestIndex,
  fulltextDisconnect,
  fulltextDestroy,
  fulltextOpen,
  fulltextClose,
  fulltextFilter,
  fulltextNext,
  fulltextEof,
  fulltextColumn,
  fulltextRowid,
  fulltextUpdate





};

int sqlite3Fts1Init(sqlite3 *db){
 return sqlite3_create_module(db, "fts1", &fulltextModule, 0);
}

#if !SQLITE_CORE







|
>
>
|





|
|
|
|
|
|
|
|
|
|
|
|
|
|
>
>
>
>
>







2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832

  if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){
    return SQLITE_ERROR;   /* an update; not yet supported */
  }

  /* ppArg[1] = rowid
   * ppArg[2..2+v->nColumn-1] = values
   * ppArg[2+v->nColumn] = value for _all (we ignore this)
   * ppArg[3+v->nColumn] = value of offset (we ignore this too)
   */
  assert( nArg==2+v->nColumn+2);    

  return index_insert(v, ppArg[1], &ppArg[2], pRowid);
}

static const sqlite3_module fulltextModule = {
  /* iVersion      */ 0,
  /* xCreate       */ fulltextCreate,
  /* xConnect      */ fulltextConnect,
  /* xBestIndex    */ fulltextBestIndex,
  /* xDisconnect   */ fulltextDisconnect,
  /* xDestroy      */ fulltextDestroy,
  /* xOpen         */ fulltextOpen,
  /* xClose        */ fulltextClose,
  /* xFilter       */ fulltextFilter,
  /* xNext         */ fulltextNext,
  /* xEof          */ fulltextEof,
  /* xColumn       */ fulltextColumn,
  /* xRowid        */ fulltextRowid,
  /* xUpdate       */ fulltextUpdate,
  /* xBegin        */ 0, 
  /* xSync         */ 0,
  /* xCommit       */ 0,
  /* xRollback     */ 0,
  /* xFindFunction */ 0,
};

int sqlite3Fts1Init(sqlite3 *db){
 return sqlite3_create_module(db, "fts1", &fulltextModule, 0);
}

#if !SQLITE_CORE