SQLite

Check-in [607d928ce9]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:In FTS1: Retain the Query structure as part of the cursor. It will be used laster as part of snippet generation. (CVS 3414)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 607d928ce91f3efa9c7019fc789a9cd3c41cfc92
User & Date: drh 2006-09-13 19:18:29.000
Context
2006-09-13
19:21
Remove unused malloc failure test. (Ticket #1976) Also include fixes for other problems discovered while investigating ticket #1976. (CVS 3415) (check-in: f4ab546b2e user: drh tags: trunk)
19:18
In FTS1: Retain the Query structure as part of the cursor. It will be used laster as part of snippet generation. (CVS 3414) (check-in: 607d928ce9 user: drh tags: trunk)
18:40
Earlier refactoring changed name in fts1.c but not fts1.h. (CVS 3413) (check-in: d4edb8035c user: shess tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts1/fts1.c.
832
833
834
835
836
837
838






















































839
840
841
842
843
844
845
  TRACE(("FTS1 prepare: %s\n", zCommand));
  rc = sqlite3_prepare(db, zCommand, -1, ppStmt, NULL);
  free(zCommand);
  return rc;
}

/* end utility functions */























































typedef enum QueryType {
  QUERY_GENERIC,   /* table scan */
  QUERY_ROWID,     /* lookup by rowid */
  QUERY_FULLTEXT   /* QUERY_FULLTEXT + [i] is a full-text search for column i*/
} QueryType;








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
  TRACE(("FTS1 prepare: %s\n", zCommand));
  rc = sqlite3_prepare(db, zCommand, -1, ppStmt, NULL);
  free(zCommand);
  return rc;
}

/* end utility functions */

/* Forward reference */
typedef struct fulltext_vtab fulltext_vtab;

/* A single term in a query is represented by an instances of
** the following structure.
*/
typedef struct QueryTerm {
  short int nPhrase; /* How many following terms are part of the same phrase */
  short int iColumn; /* Column of the index that must match this term */
  signed char isOr;  /* this term is preceded by "OR" */
  signed char isNot; /* this term is preceded by "-" */
  char *pTerm;       /* text of the term.  '\000' terminated.  malloced */
  int nTerm;         /* Number of bytes in pTerm[] */
} QueryTerm;


/* A query string is parsed into a Query structure.
 *
 * We could, in theory, allow query strings to be complicated
 * nested expressions with precedence determined by parentheses.
 * But none of the major search engines do this.  (Perhaps the
 * feeling is that an parenthesized expression is two complex of
 * an idea for the average user to grasp.)  Taking our lead from
 * the major search engines, we will allow queries to be a list
 * of terms (with an implied AND operator) or phrases in double-quotes,
 * with a single optional "-" before each non-phrase term to designate
 * negation and an optional OR connector.
 *
 * OR binds more tightly than the implied AND, which is what the
 * major search engines seem to do.  So, for example:
 * 
 *    [one two OR three]     ==>    one AND (two OR three)
 *    [one OR two three]     ==>    (one OR two) AND three
 *
 * A "-" before a term matches all entries that lack that term.
 * The "-" must occur immediately before the term with in intervening
 * space.  This is how the search engines do it.
 *
 * A NOT term cannot be the right-hand operand of an OR.  If this
 * occurs in the query string, the NOT is ignored:
 *
 *    [one OR -two]          ==>    one OR two
 *
 */
typedef struct Query {
  fulltext_vtab *pFts;  /* The full text index */
  int nTerms;           /* Number of terms in the query */
  QueryTerm *pTerms;    /* Array of terms.  Space obtained from malloc() */
  int nextIsOr;         /* Set the isOr flag on the next inserted term */
  int nextColumn;       /* Next word parsed must be in this column */
  int dfltColumn;       /* The default column */
} Query;


typedef enum QueryType {
  QUERY_GENERIC,   /* table scan */
  QUERY_ROWID,     /* lookup by rowid */
  QUERY_FULLTEXT   /* QUERY_FULLTEXT + [i] is a full-text search for column i*/
} QueryType;

887
888
889
890
891
892
893







894
895
896
897
898
899
900
901
902
903
904
905
906
907
908





909
910
911
912
913
914
915
916
917
918
919
920
921
922
  "select doclist from %_term where term = ? order by segment",
  /* TERM_INSERT */
  "insert into %_term (rowid, term, segment, doclist) values (?, ?, ?, ?)",
  /* TERM_UPDATE */ "update %_term set doclist = ? where rowid = ?",
  /* TERM_DELETE */ "delete from %_term where rowid = ?",
};








typedef struct fulltext_vtab {
  sqlite3_vtab base;
  sqlite3 *db;
  const char *zName;               /* virtual table name */
  int nColumn;                     /* number of columns in virtual table */
  char **azColumn;                 /* column names.  malloced */
  char *zColumnList;               /* comma-separate list of column names */
  sqlite3_tokenizer *pTokenizer;   /* tokenizer for inserts and queries */

  /* Precompiled statements which we keep as long as the table is
  ** open.
  */
  sqlite3_stmt *pFulltextStatements[MAX_STMT];
} fulltext_vtab;






typedef struct fulltext_cursor {
  sqlite3_vtab_cursor base;
  QueryType iCursorType;

  sqlite3_stmt *pStmt;
  int eof;

  DocListReader result;  /* used when iCursorType == QUERY_FULLTEXT */ 
} fulltext_cursor;

static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){
  return (fulltext_vtab *) c->base.pVtab;
}








>
>
>
>
>
>
>
|
|
|










|

>
>
>
>
>

|
|
<
|
|
|







941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977

978
979
980
981
982
983
984
985
986
987
  "select doclist from %_term where term = ? order by segment",
  /* TERM_INSERT */
  "insert into %_term (rowid, term, segment, doclist) values (?, ?, ?, ?)",
  /* TERM_UPDATE */ "update %_term set doclist = ? where rowid = ?",
  /* TERM_DELETE */ "delete from %_term where rowid = ?",
};

/*
** A connection to a fulltext index is an instance of the following
** structure.  The xCreate and xConnect methods create an instance
** of this structure and xDestroy and xDisconnect free that instance.
** All other methods receive a pointer to the structure as one of their
** arguments.
*/
struct fulltext_vtab {
  sqlite3_vtab base;               /* Base class used by SQLite core */
  sqlite3 *db;                     /* The database connection */
  const char *zName;               /* virtual table name */
  int nColumn;                     /* number of columns in virtual table */
  char **azColumn;                 /* column names.  malloced */
  char *zColumnList;               /* comma-separate list of column names */
  sqlite3_tokenizer *pTokenizer;   /* tokenizer for inserts and queries */

  /* Precompiled statements which we keep as long as the table is
  ** open.
  */
  sqlite3_stmt *pFulltextStatements[MAX_STMT];
};

/*
** When the core wants to do a query, it create a cursor using a
** call to xOpen.  This structure is an instance of a cursor.  It
** is destroyed by xClose.
*/
typedef struct fulltext_cursor {
  sqlite3_vtab_cursor base;        /* Base class used by SQLite core */
  QueryType iCursorType;           /* Type of cursor */

  sqlite3_stmt *pStmt;             /* Prepared statement in use by the cursor */
  int eof;                         /* True if at End Of Results */
  Query q;                         /* Parsed query string */
  DocListReader result;  /* used when iCursorType == QUERY_FULLTEXT */ 
} fulltext_cursor;

static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){
  return (fulltext_vtab *) c->base.pVtab;
}

1895
1896
1897
1898
1899
1900
1901
















1902
1903
1904
1905

1906
1907
1908
1909
1910
1911
1912
  /* sqlite will initialize c->base */
  *ppCursor = &c->base;
  TRACE(("FTS1 Open %p: %p\n", pVTab, c));

  return SQLITE_OK;
}

















static int fulltextClose(sqlite3_vtab_cursor *pCursor){
  fulltext_cursor *c = (fulltext_cursor *) pCursor;
  TRACE(("FTS1 Close %p\n", c));
  sqlite3_finalize(c->pStmt);

  if( c->result.pDoclist!=NULL ){
    docListDelete(c->result.pDoclist);
  }
  free(c);
  return SQLITE_OK;
}








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>




>







1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
  /* sqlite will initialize c->base */
  *ppCursor = &c->base;
  TRACE(("FTS1 Open %p: %p\n", pVTab, c));

  return SQLITE_OK;
}


/* Free all of the dynamically allocated memory held by *q
*/
static void queryClear(Query *q){
  int i;
  for(i = 0; i < q->nTerms; ++i){
    free(q->pTerms[i].pTerm);
  }
  free(q->pTerms);
  memset(q, 0, sizeof(*q));
}

/*
** Close the cursor.  For additional information see the documentation
** on the xClose method of the virtual table interface.
*/
static int fulltextClose(sqlite3_vtab_cursor *pCursor){
  fulltext_cursor *c = (fulltext_cursor *) pCursor;
  TRACE(("FTS1 Close %p\n", c));
  sqlite3_finalize(c->pStmt);
  queryClear(&c->q);
  if( c->result.pDoclist!=NULL ){
    docListDelete(c->result.pDoclist);
  }
  free(c);
  return SQLITE_OK;
}

1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
      return SQLITE_OK;
    }
    /* an error occurred; abort */
    return rc==SQLITE_DONE ? SQLITE_ERROR : rc;
  }
}

/* A single term in a query is represented by an instances of
** the following structure.
*/
typedef struct QueryTerm {
  short int nPhrase; /* How many following terms are part of the same phrase */
  short int iColumn; /* Column of the index that must match this term */
  signed char isOr;  /* this term is preceded by "OR" */
  signed char isNot; /* this term is preceded by "-" */
  char *pTerm;       /* text of the term.  '\000' terminated.  malloced */
  int nTerm;         /* Number of bytes in pTerm[] */
} QueryTerm;


/* Return a DocList corresponding to the query term *pTerm.  If *pTerm
** is the first term of a phrase query, go ahead and evaluate the phrase
** query and return the doclist for the entire phrase query.
**
** The result is stored in pTerm->doclist.
*/







<
<
<
<
<
<
<
<
<
<
<
<







2030
2031
2032
2033
2034
2035
2036












2037
2038
2039
2040
2041
2042
2043
      return SQLITE_OK;
    }
    /* an error occurred; abort */
    return rc==SQLITE_DONE ? SQLITE_ERROR : rc;
  }
}














/* Return a DocList corresponding to the query term *pTerm.  If *pTerm
** is the first term of a phrase query, go ahead and evaluate the phrase
** query and return the doclist for the entire phrase query.
**
** The result is stored in pTerm->doclist.
*/
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
    docListDelete(pRight);
    pLeft = pNew;
  }
  *ppResult = pLeft;
  return SQLITE_OK;
}

/* Parse a query string into a Query structure.
 *
 * We could, in theory, allow query strings to be complicated
 * nested expressions with precedence determined by parentheses.
 * But none of the major search engines do this.  (Perhaps the
 * feeling is that an parenthesized expression is two complex of
 * an idea for the average user to grasp.)  Taking our lead from
 * the major search engines, we will allow queries to be a list
 * of terms (with an implied AND operator) or phrases in double-quotes,
 * with a single optional "-" before each non-phrase term to designate
 * negation and an optional OR connector.
 *
 * OR binds more tightly than the implied AND, which is what the
 * major search engines seem to do.  So, for example:
 * 
 *    [one two OR three]     ==>    one AND (two OR three)
 *    [one OR two three]     ==>    (one OR two) AND three
 *
 * A "-" before a term matches all entries that lack that term.
 * The "-" must occur immediately before the term with in intervening
 * space.  This is how the search engines do it.
 *
 * A NOT term cannot be the right-hand operand of an OR.  If this
 * occurs in the query string, the NOT is ignored:
 *
 *    [one OR -two]          ==>    one OR two
 *
 */
typedef struct Query {
  fulltext_vtab *pFts;  /* The full text index */
  int nTerms;           /* Number of terms in the query */
  QueryTerm *pTerms;    /* Array of terms.  Space obtained from malloc() */
  int nextIsOr;         /* Set the isOr flag on the next inserted term */
  int nextColumn;       /* Next word parsed must be in this column */
  int dfltColumn;       /* The default column */
} Query;

/* Add a new term pTerm[0..nTerm-1] to the query *q.
*/
static void queryAdd(Query *q, const char *pTerm, int nTerm){
  QueryTerm *t;
  ++q->nTerms;
  q->pTerms = realloc(q->pTerms, q->nTerms * sizeof(q->pTerms[0]));
  if( q->pTerms==0 ){







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







2066
2067
2068
2069
2070
2071
2072





































2073
2074
2075
2076
2077
2078
2079
    docListDelete(pRight);
    pLeft = pNew;
  }
  *ppResult = pLeft;
  return SQLITE_OK;
}






































/* Add a new term pTerm[0..nTerm-1] to the query *q.
*/
static void queryAdd(Query *q, const char *pTerm, int nTerm){
  QueryTerm *t;
  ++q->nTerms;
  q->pTerms = realloc(q->pTerms, q->nTerms * sizeof(q->pTerms[0]));
  if( q->pTerms==0 ){
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
  t->nTerm = nTerm;
  t->isOr = q->nextIsOr;
  q->nextIsOr = 0;
  t->iColumn = q->nextColumn;
  q->nextColumn = q->dfltColumn;
}

/* Free all of the dynamically allocated memory held by *q
*/
static void queryClear(Query *q){
  int i;
  for(i = 0; i < q->nTerms; ++i){
    free(q->pTerms[i].pTerm);
  }
  free(q->pTerms);
}

/*
** Check to see if the string zToken[0...nToken-1] matches any
** column name in the virtual table.   If it does,
** return the zero-indexed column number.  If not, return -1.
*/
static int checkColumnSpecifier(
  fulltext_vtab *pVtab,    /* The virtual table */







<
<
<
<
<
<
<
<
<
<







2088
2089
2090
2091
2092
2093
2094










2095
2096
2097
2098
2099
2100
2101
  t->nTerm = nTerm;
  t->isOr = q->nextIsOr;
  q->nextIsOr = 0;
  t->iColumn = q->nextColumn;
  q->nextColumn = q->dfltColumn;
}











/*
** Check to see if the string zToken[0...nToken-1] matches any
** column name in the virtual table.   If it does,
** return the zero-indexed column number.  If not, return -1.
*/
static int checkColumnSpecifier(
  fulltext_vtab *pVtab,    /* The virtual table */
2198
2199
2200
2201
2202
2203
2204
2205

2206
2207
2208
2209
2210
2211

2212
2213
2214
2215
2216

2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
** they are allowed to match against any column.
*/
static int fulltextQuery(
  fulltext_vtab *v,      /* The full text index */
  int iColumn,           /* Match against this column by default */
  const char *zInput,    /* The query string */
  int nInput,            /* Number of bytes in zInput[] */
  DocList **pResult      /* Write the result doclist here */

){
  Query q;
  int i, rc;
  DocList *pLeft = NULL;
  DocList *pRight, *pNew;
  int nNot = 0;


  rc = parseQuery(v, zInput, nInput, iColumn, &q);
  if( rc!=SQLITE_OK ) return rc;

  /* Merge AND terms. */

  for(i = 0 ; i < q.nTerms; i += q.pTerms[i].nPhrase + 1){

    if( q.pTerms[i].isNot ){
      /* Handle all NOT terms in a separate pass */
      nNot++;
      continue;
    }

    rc = docListOfTerm(v, q.pTerms[i].iColumn, &q.pTerms[i], &pRight);
    if( rc ){
      queryClear(&q);
      return rc;
    }
    if( pLeft==0 ){
      pLeft = pRight;
    }else{
      pNew = docListNew(DL_DOCIDS);
      if( q.pTerms[i].isOr ){
        docListOrMerge(pLeft, pRight, pNew);
      }else{
        docListAndMerge(pLeft, pRight, pNew);
      }
      docListDelete(pRight);
      docListDelete(pLeft);
      pLeft = pNew;
    }
  }

  if( nNot && pLeft==0 ){
    /* We do not yet know how to handle a query of only NOT terms */
    return SQLITE_ERROR;
  }

  /* Do the EXCEPT terms */
  for(i=0; i<q.nTerms;  i += q.pTerms[i].nPhrase + 1){
    if( !q.pTerms[i].isNot ) continue;
    rc = docListOfTerm(v, q.pTerms[i].iColumn, &q.pTerms[i], &pRight);
    if( rc ){
      queryClear(&q);
      docListDelete(pLeft);
      return rc;
    }
    pNew = docListNew(DL_DOCIDS);
    docListExceptMerge(pLeft, pRight, pNew);
    docListDelete(pRight);
    docListDelete(pLeft);
    pLeft = pNew;
  }

  queryClear(&q);
  *pResult = pLeft;
  return rc;
}

/*
** This is the xFilter interface for the virtual table.  See
** the virtual table xFilter method documentation for additional







|
>

<




>

|



>
|

|





|

|






|
















|
|
|

|










<







2221
2222
2223
2224
2225
2226
2227
2228
2229
2230

2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290

2291
2292
2293
2294
2295
2296
2297
** they are allowed to match against any column.
*/
static int fulltextQuery(
  fulltext_vtab *v,      /* The full text index */
  int iColumn,           /* Match against this column by default */
  const char *zInput,    /* The query string */
  int nInput,            /* Number of bytes in zInput[] */
  DocList **pResult,     /* Write the result doclist here */
  Query *pQuery          /* Put parsed query string here */
){

  int i, rc;
  DocList *pLeft = NULL;
  DocList *pRight, *pNew;
  int nNot = 0;
  QueryTerm *aTerm;

  rc = parseQuery(v, zInput, nInput, iColumn, pQuery);
  if( rc!=SQLITE_OK ) return rc;

  /* Merge AND terms. */
  aTerm = pQuery->pTerms;
  for(i = 0; i<pQuery->nTerms; i += aTerm[i].nPhrase + 1){

    if( aTerm[i].isNot ){
      /* Handle all NOT terms in a separate pass */
      nNot++;
      continue;
    }

    rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &pRight);
    if( rc ){
      queryClear(pQuery);
      return rc;
    }
    if( pLeft==0 ){
      pLeft = pRight;
    }else{
      pNew = docListNew(DL_DOCIDS);
      if( aTerm[i].isOr ){
        docListOrMerge(pLeft, pRight, pNew);
      }else{
        docListAndMerge(pLeft, pRight, pNew);
      }
      docListDelete(pRight);
      docListDelete(pLeft);
      pLeft = pNew;
    }
  }

  if( nNot && pLeft==0 ){
    /* We do not yet know how to handle a query of only NOT terms */
    return SQLITE_ERROR;
  }

  /* Do the EXCEPT terms */
  for(i=0; i<pQuery->nTerms;  i += aTerm[i].nPhrase + 1){
    if( !aTerm[i].isNot ) continue;
    rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &pRight);
    if( rc ){
      queryClear(pQuery);
      docListDelete(pLeft);
      return rc;
    }
    pNew = docListNew(DL_DOCIDS);
    docListExceptMerge(pLeft, pRight, pNew);
    docListDelete(pRight);
    docListDelete(pLeft);
    pLeft = pNew;
  }


  *pResult = pLeft;
  return rc;
}

/*
** This is the xFilter interface for the virtual table.  See
** the virtual table xFilter method documentation for additional
2314
2315
2316
2317
2318
2319
2320

2321
2322
2323
2324
2325
2326
2327
2328

    default:   /* full-text search */
    {
      const char *zQuery = (const char *)sqlite3_value_text(argv[0]);
      DocList *pResult;
      assert( idxNum<=QUERY_FULLTEXT+v->nColumn);
      assert( argc==1 );

      rc = fulltextQuery(v, idxNum-QUERY_FULLTEXT, zQuery, -1, &pResult);
      if( rc!=SQLITE_OK ) goto out;
      readerInit(&c->result, pResult);
      break;
    }
  }

  rc = fulltextNext(pCursor);







>
|







2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353

    default:   /* full-text search */
    {
      const char *zQuery = (const char *)sqlite3_value_text(argv[0]);
      DocList *pResult;
      assert( idxNum<=QUERY_FULLTEXT+v->nColumn);
      assert( argc==1 );
      queryClear(&c->q);
      rc = fulltextQuery(v, idxNum-QUERY_FULLTEXT, zQuery, -1, &pResult, &c->q);
      if( rc!=SQLITE_OK ) goto out;
      readerInit(&c->result, pResult);
      break;
    }
  }

  rc = fulltextNext(pCursor);