/ Check-in [b0d8e0d3]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:A first implementation of a full-text search module for SQLite. (CVS 3363)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: b0d8e0d314d6f77b7d4b5dd00c694a1323f7a8e4
User & Date: adamd 2006-08-23 23:58:50
Context
2006-08-24
02:42
Tcl interface does filename translation prior to calling sqlite3_open(). Ticket #1937. (CVS 3364) check-in: 5696e0cb user: drh tags: trunk
2006-08-23
23:58
A first implementation of a full-text search module for SQLite. (CVS 3363) check-in: b0d8e0d3 user: adamd tags: trunk
20:07
Add the new experimental sqlite3_auto_extension() API. (CVS 3362) check-in: a85fc877 user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Added ext/fts1/ft_hash.c.

            1  +/*
            2  +** 2001 September 22
            3  +**
            4  +** The author disclaims copyright to this source code.  In place of
            5  +** a legal notice, here is a blessing:
            6  +**
            7  +**    May you do good and not evil.
            8  +**    May you find forgiveness for yourself and forgive others.
            9  +**    May you share freely, never taking more than you give.
           10  +**
           11  +*************************************************************************
           12  +** This is the implementation of generic hash-tables used in SQLite.
           13  +** We've modified it slightly to serve as a standalone hash table
           14  +** implementation for the full-text indexing module.
           15  +*/
           16  +#include <assert.h>
           17  +#include <stdlib.h>
           18  +#include <string.h>
           19  +
           20  +#include "ft_hash.h"
           21  +
           22  +void *malloc_and_zero(int n){
           23  +  void *p = malloc(n);
           24  +  if( p ){
           25  +    memset(p, 0, n);
           26  +  }
           27  +  return p;
           28  +}
           29  +
           30  +/* Turn bulk memory into a hash table object by initializing the
           31  +** fields of the Hash structure.
           32  +**
           33  +** "pNew" is a pointer to the hash table that is to be initialized.
           34  +** keyClass is one of the constants HASH_INT, HASH_POINTER,
           35  +** HASH_BINARY, or HASH_STRING.  The value of keyClass 
           36  +** determines what kind of key the hash table will use.  "copyKey" is
           37  +** true if the hash table should make its own private copy of keys and
           38  +** false if it should just use the supplied pointer.  CopyKey only makes
           39  +** sense for HASH_STRING and HASH_BINARY and is ignored
           40  +** for other key classes.
           41  +*/
           42  +void HashInit(Hash *pNew, int keyClass, int copyKey){
           43  +  assert( pNew!=0 );
           44  +  assert( keyClass>=HASH_STRING && keyClass<=HASH_BINARY );
           45  +  pNew->keyClass = keyClass;
           46  +#if 0
           47  +  if( keyClass==HASH_POINTER || keyClass==HASH_INT ) copyKey = 0;
           48  +#endif
           49  +  pNew->copyKey = copyKey;
           50  +  pNew->first = 0;
           51  +  pNew->count = 0;
           52  +  pNew->htsize = 0;
           53  +  pNew->ht = 0;
           54  +  pNew->xMalloc = malloc_and_zero;
           55  +  pNew->xFree = free;
           56  +}
           57  +
           58  +/* Remove all entries from a hash table.  Reclaim all memory.
           59  +** Call this routine to delete a hash table or to reset a hash table
           60  +** to the empty state.
           61  +*/
           62  +void HashClear(Hash *pH){
           63  +  HashElem *elem;         /* For looping over all elements of the table */
           64  +
           65  +  assert( pH!=0 );
           66  +  elem = pH->first;
           67  +  pH->first = 0;
           68  +  if( pH->ht ) pH->xFree(pH->ht);
           69  +  pH->ht = 0;
           70  +  pH->htsize = 0;
           71  +  while( elem ){
           72  +    HashElem *next_elem = elem->next;
           73  +    if( pH->copyKey && elem->pKey ){
           74  +      pH->xFree(elem->pKey);
           75  +    }
           76  +    pH->xFree(elem);
           77  +    elem = next_elem;
           78  +  }
           79  +  pH->count = 0;
           80  +}
           81  +
           82  +#if 0 /* NOT USED */
           83  +/*
           84  +** Hash and comparison functions when the mode is HASH_INT
           85  +*/
           86  +static int intHash(const void *pKey, int nKey){
           87  +  return nKey ^ (nKey<<8) ^ (nKey>>8);
           88  +}
           89  +static int intCompare(const void *pKey1, int n1, const void *pKey2, int n2){
           90  +  return n2 - n1;
           91  +}
           92  +#endif
           93  +
           94  +#if 0 /* NOT USED */
           95  +/*
           96  +** Hash and comparison functions when the mode is HASH_POINTER
           97  +*/
           98  +static int ptrHash(const void *pKey, int nKey){
           99  +  uptr x = Addr(pKey);
          100  +  return x ^ (x<<8) ^ (x>>8);
          101  +}
          102  +static int ptrCompare(const void *pKey1, int n1, const void *pKey2, int n2){
          103  +  if( pKey1==pKey2 ) return 0;
          104  +  if( pKey1<pKey2 ) return -1;
          105  +  return 1;
          106  +}
          107  +#endif
          108  +
          109  +/*
          110  +** Hash and comparison functions when the mode is HASH_STRING
          111  +*/
          112  +static int strHash(const void *pKey, int nKey){
          113  +  const char *z = (const char *)pKey;
          114  +  int h = 0;
          115  +  if( nKey<=0 ) nKey = (int) strlen(z);
          116  +  while( nKey > 0  ){
          117  +    h = (h<<3) ^ h ^ *z++;
          118  +    nKey--;
          119  +  }
          120  +  return h & 0x7fffffff;
          121  +}
          122  +static int strCompare(const void *pKey1, int n1, const void *pKey2, int n2){
          123  +  if( n1!=n2 ) return 1;
          124  +  return strncmp((const char*)pKey1,(const char*)pKey2,n1);
          125  +}
          126  +
          127  +/*
          128  +** Hash and comparison functions when the mode is HASH_BINARY
          129  +*/
          130  +static int binHash(const void *pKey, int nKey){
          131  +  int h = 0;
          132  +  const char *z = (const char *)pKey;
          133  +  while( nKey-- > 0 ){
          134  +    h = (h<<3) ^ h ^ *(z++);
          135  +  }
          136  +  return h & 0x7fffffff;
          137  +}
          138  +static int binCompare(const void *pKey1, int n1, const void *pKey2, int n2){
          139  +  if( n1!=n2 ) return 1;
          140  +  return memcmp(pKey1,pKey2,n1);
          141  +}
          142  +
          143  +/*
          144  +** Return a pointer to the appropriate hash function given the key class.
          145  +**
          146  +** The C syntax in this function definition may be unfamilar to some 
          147  +** programmers, so we provide the following additional explanation:
          148  +**
          149  +** The name of the function is "hashFunction".  The function takes a
          150  +** single parameter "keyClass".  The return value of hashFunction()
          151  +** is a pointer to another function.  Specifically, the return value
          152  +** of hashFunction() is a pointer to a function that takes two parameters
          153  +** with types "const void*" and "int" and returns an "int".
          154  +*/
          155  +static int (*hashFunction(int keyClass))(const void*,int){
          156  +#if 0  /* HASH_INT and HASH_POINTER are never used */
          157  +  switch( keyClass ){
          158  +    case HASH_INT:     return &intHash;
          159  +    case HASH_POINTER: return &ptrHash;
          160  +    case HASH_STRING:  return &strHash;
          161  +    case HASH_BINARY:  return &binHash;;
          162  +    default: break;
          163  +  }
          164  +  return 0;
          165  +#else
          166  +  if( keyClass==HASH_STRING ){
          167  +    return &strHash;
          168  +  }else{
          169  +    assert( keyClass==HASH_BINARY );
          170  +    return &binHash;
          171  +  }
          172  +#endif
          173  +}
          174  +
          175  +/*
          176  +** Return a pointer to the appropriate hash function given the key class.
          177  +**
          178  +** For help in interpreted the obscure C code in the function definition,
          179  +** see the header comment on the previous function.
          180  +*/
          181  +static int (*compareFunction(int keyClass))(const void*,int,const void*,int){
          182  +#if 0 /* HASH_INT and HASH_POINTER are never used */
          183  +  switch( keyClass ){
          184  +    case HASH_INT:     return &intCompare;
          185  +    case HASH_POINTER: return &ptrCompare;
          186  +    case HASH_STRING:  return &strCompare;
          187  +    case HASH_BINARY:  return &binCompare;
          188  +    default: break;
          189  +  }
          190  +  return 0;
          191  +#else
          192  +  if( keyClass==HASH_STRING ){
          193  +    return &strCompare;
          194  +  }else{
          195  +    assert( keyClass==HASH_BINARY );
          196  +    return &binCompare;
          197  +  }
          198  +#endif
          199  +}
          200  +
          201  +/* Link an element into the hash table
          202  +*/
          203  +static void insertElement(
          204  +  Hash *pH,              /* The complete hash table */
          205  +  struct _ht *pEntry,    /* The entry into which pNew is inserted */
          206  +  HashElem *pNew         /* The element to be inserted */
          207  +){
          208  +  HashElem *pHead;       /* First element already in pEntry */
          209  +  pHead = pEntry->chain;
          210  +  if( pHead ){
          211  +    pNew->next = pHead;
          212  +    pNew->prev = pHead->prev;
          213  +    if( pHead->prev ){ pHead->prev->next = pNew; }
          214  +    else             { pH->first = pNew; }
          215  +    pHead->prev = pNew;
          216  +  }else{
          217  +    pNew->next = pH->first;
          218  +    if( pH->first ){ pH->first->prev = pNew; }
          219  +    pNew->prev = 0;
          220  +    pH->first = pNew;
          221  +  }
          222  +  pEntry->count++;
          223  +  pEntry->chain = pNew;
          224  +}
          225  +
          226  +
          227  +/* Resize the hash table so that it cantains "new_size" buckets.
          228  +** "new_size" must be a power of 2.  The hash table might fail 
          229  +** to resize if sqliteMalloc() fails.
          230  +*/
          231  +static void rehash(Hash *pH, int new_size){
          232  +  struct _ht *new_ht;            /* The new hash table */
          233  +  HashElem *elem, *next_elem;    /* For looping over existing elements */
          234  +  int (*xHash)(const void*,int); /* The hash function */
          235  +
          236  +  assert( (new_size & (new_size-1))==0 );
          237  +  new_ht = (struct _ht *)pH->xMalloc( new_size*sizeof(struct _ht) );
          238  +  if( new_ht==0 ) return;
          239  +  if( pH->ht ) pH->xFree(pH->ht);
          240  +  pH->ht = new_ht;
          241  +  pH->htsize = new_size;
          242  +  xHash = hashFunction(pH->keyClass);
          243  +  for(elem=pH->first, pH->first=0; elem; elem = next_elem){
          244  +    int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1);
          245  +    next_elem = elem->next;
          246  +    insertElement(pH, &new_ht[h], elem);
          247  +  }
          248  +}
          249  +
          250  +/* This function (for internal use only) locates an element in an
          251  +** hash table that matches the given key.  The hash for this key has
          252  +** already been computed and is passed as the 4th parameter.
          253  +*/
          254  +static HashElem *findElementGivenHash(
          255  +  const Hash *pH,     /* The pH to be searched */
          256  +  const void *pKey,   /* The key we are searching for */
          257  +  int nKey,
          258  +  int h               /* The hash for this key. */
          259  +){
          260  +  HashElem *elem;                /* Used to loop thru the element list */
          261  +  int count;                     /* Number of elements left to test */
          262  +  int (*xCompare)(const void*,int,const void*,int);  /* comparison function */
          263  +
          264  +  if( pH->ht ){
          265  +    struct _ht *pEntry = &pH->ht[h];
          266  +    elem = pEntry->chain;
          267  +    count = pEntry->count;
          268  +    xCompare = compareFunction(pH->keyClass);
          269  +    while( count-- && elem ){
          270  +      if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){ 
          271  +        return elem;
          272  +      }
          273  +      elem = elem->next;
          274  +    }
          275  +  }
          276  +  return 0;
          277  +}
          278  +
          279  +/* Remove a single entry from the hash table given a pointer to that
          280  +** element and a hash on the element's key.
          281  +*/
          282  +static void removeElementGivenHash(
          283  +  Hash *pH,         /* The pH containing "elem" */
          284  +  HashElem* elem,   /* The element to be removed from the pH */
          285  +  int h             /* Hash value for the element */
          286  +){
          287  +  struct _ht *pEntry;
          288  +  if( elem->prev ){
          289  +    elem->prev->next = elem->next; 
          290  +  }else{
          291  +    pH->first = elem->next;
          292  +  }
          293  +  if( elem->next ){
          294  +    elem->next->prev = elem->prev;
          295  +  }
          296  +  pEntry = &pH->ht[h];
          297  +  if( pEntry->chain==elem ){
          298  +    pEntry->chain = elem->next;
          299  +  }
          300  +  pEntry->count--;
          301  +  if( pEntry->count<=0 ){
          302  +    pEntry->chain = 0;
          303  +  }
          304  +  if( pH->copyKey && elem->pKey ){
          305  +    pH->xFree(elem->pKey);
          306  +  }
          307  +  pH->xFree( elem );
          308  +  pH->count--;
          309  +  if( pH->count<=0 ){
          310  +    assert( pH->first==0 );
          311  +    assert( pH->count==0 );
          312  +    HashClear(pH);
          313  +  }
          314  +}
          315  +
          316  +/* Attempt to locate an element of the hash table pH with a key
          317  +** that matches pKey,nKey.  Return the data for this element if it is
          318  +** found, or NULL if there is no match.
          319  +*/
          320  +void *HashFind(const Hash *pH, const void *pKey, int nKey){
          321  +  int h;             /* A hash on key */
          322  +  HashElem *elem;    /* The element that matches key */
          323  +  int (*xHash)(const void*,int);  /* The hash function */
          324  +
          325  +  if( pH==0 || pH->ht==0 ) return 0;
          326  +  xHash = hashFunction(pH->keyClass);
          327  +  assert( xHash!=0 );
          328  +  h = (*xHash)(pKey,nKey);
          329  +  assert( (pH->htsize & (pH->htsize-1))==0 );
          330  +  elem = findElementGivenHash(pH,pKey,nKey, h & (pH->htsize-1));
          331  +  return elem ? elem->data : 0;
          332  +}
          333  +
          334  +/* Insert an element into the hash table pH.  The key is pKey,nKey
          335  +** and the data is "data".
          336  +**
          337  +** If no element exists with a matching key, then a new
          338  +** element is created.  A copy of the key is made if the copyKey
          339  +** flag is set.  NULL is returned.
          340  +**
          341  +** If another element already exists with the same key, then the
          342  +** new data replaces the old data and the old data is returned.
          343  +** The key is not copied in this instance.  If a malloc fails, then
          344  +** the new data is returned and the hash table is unchanged.
          345  +**
          346  +** If the "data" parameter to this function is NULL, then the
          347  +** element corresponding to "key" is removed from the hash table.
          348  +*/
          349  +void *HashInsert(Hash *pH, const void *pKey, int nKey, void *data){
          350  +  int hraw;             /* Raw hash value of the key */
          351  +  int h;                /* the hash of the key modulo hash table size */
          352  +  HashElem *elem;       /* Used to loop thru the element list */
          353  +  HashElem *new_elem;   /* New element added to the pH */
          354  +  int (*xHash)(const void*,int);  /* The hash function */
          355  +
          356  +  assert( pH!=0 );
          357  +  xHash = hashFunction(pH->keyClass);
          358  +  assert( xHash!=0 );
          359  +  hraw = (*xHash)(pKey, nKey);
          360  +  assert( (pH->htsize & (pH->htsize-1))==0 );
          361  +  h = hraw & (pH->htsize-1);
          362  +  elem = findElementGivenHash(pH,pKey,nKey,h);
          363  +  if( elem ){
          364  +    void *old_data = elem->data;
          365  +    if( data==0 ){
          366  +      removeElementGivenHash(pH,elem,h);
          367  +    }else{
          368  +      elem->data = data;
          369  +    }
          370  +    return old_data;
          371  +  }
          372  +  if( data==0 ) return 0;
          373  +  new_elem = (HashElem*)pH->xMalloc( sizeof(HashElem) );
          374  +  if( new_elem==0 ) return data;
          375  +  if( pH->copyKey && pKey!=0 ){
          376  +    new_elem->pKey = pH->xMalloc( nKey );
          377  +    if( new_elem->pKey==0 ){
          378  +      pH->xFree(new_elem);
          379  +      return data;
          380  +    }
          381  +    memcpy((void*)new_elem->pKey, pKey, nKey);
          382  +  }else{
          383  +    new_elem->pKey = (void*)pKey;
          384  +  }
          385  +  new_elem->nKey = nKey;
          386  +  pH->count++;
          387  +  if( pH->htsize==0 ){
          388  +    rehash(pH,8);
          389  +    if( pH->htsize==0 ){
          390  +      pH->count = 0;
          391  +      pH->xFree(new_elem);
          392  +      return data;
          393  +    }
          394  +  }
          395  +  if( pH->count > pH->htsize ){
          396  +    rehash(pH,pH->htsize*2);
          397  +  }
          398  +  assert( pH->htsize>0 );
          399  +  assert( (pH->htsize & (pH->htsize-1))==0 );
          400  +  h = hraw & (pH->htsize-1);
          401  +  insertElement(pH, &pH->ht[h], new_elem);
          402  +  new_elem->data = data;
          403  +  return 0;
          404  +}

Added ext/fts1/ft_hash.h.

            1  +/*
            2  +** 2001 September 22
            3  +**
            4  +** The author disclaims copyright to this source code.  In place of
            5  +** a legal notice, here is a blessing:
            6  +**
            7  +**    May you do good and not evil.
            8  +**    May you find forgiveness for yourself and forgive others.
            9  +**    May you share freely, never taking more than you give.
           10  +**
           11  +*************************************************************************
           12  +** This is the header file for the generic hash-table implemenation
           13  +** used in SQLite.  We've modified it slightly to serve as a standalone
           14  +** hash table implementation for the full-text indexing module.
           15  +**
           16  +*/
           17  +#ifndef _HASH_H_
           18  +#define _HASH_H_
           19  +
           20  +/* Forward declarations of structures. */
           21  +typedef struct Hash Hash;
           22  +typedef struct HashElem HashElem;
           23  +
           24  +/* A complete hash table is an instance of the following structure.
           25  +** The internals of this structure are intended to be opaque -- client
           26  +** code should not attempt to access or modify the fields of this structure
           27  +** directly.  Change this structure only by using the routines below.
           28  +** However, many of the "procedures" and "functions" for modifying and
           29  +** accessing this structure are really macros, so we can't really make
           30  +** this structure opaque.
           31  +*/
           32  +struct Hash {
           33  +  char keyClass;          /* HASH_INT, _POINTER, _STRING, _BINARY */
           34  +  char copyKey;           /* True if copy of key made on insert */
           35  +  int count;              /* Number of entries in this table */
           36  +  HashElem *first;        /* The first element of the array */
           37  +  void *(*xMalloc)(int);  /* malloc() function to use */
           38  +  void (*xFree)(void *);  /* free() function to use */
           39  +  int htsize;             /* Number of buckets in the hash table */
           40  +  struct _ht {            /* the hash table */
           41  +    int count;               /* Number of entries with this hash */
           42  +    HashElem *chain;         /* Pointer to first entry with this hash */
           43  +  } *ht;
           44  +};
           45  +
           46  +/* Each element in the hash table is an instance of the following 
           47  +** structure.  All elements are stored on a single doubly-linked list.
           48  +**
           49  +** Again, this structure is intended to be opaque, but it can't really
           50  +** be opaque because it is used by macros.
           51  +*/
           52  +struct HashElem {
           53  +  HashElem *next, *prev;   /* Next and previous elements in the table */
           54  +  void *data;              /* Data associated with this element */
           55  +  void *pKey; int nKey;    /* Key associated with this element */
           56  +};
           57  +
           58  +/*
           59  +** There are 4 different modes of operation for a hash table:
           60  +**
           61  +**   HASH_INT         nKey is used as the key and pKey is ignored.
           62  +**
           63  +**   HASH_POINTER     pKey is used as the key and nKey is ignored.
           64  +**
           65  +**   HASH_STRING      pKey points to a string that is nKey bytes long
           66  +**                           (including the null-terminator, if any).  Case
           67  +**                           is respected in comparisons.
           68  +**
           69  +**   HASH_BINARY      pKey points to binary data nKey bytes long. 
           70  +**                           memcmp() is used to compare keys.
           71  +**
           72  +** A copy of the key is made for HASH_STRING and HASH_BINARY
           73  +** if the copyKey parameter to HashInit is 1.  
           74  +*/
           75  +/* #define HASH_INT       1 // NOT USED */
           76  +/* #define HASH_POINTER   2 // NOT USED */
           77  +#define HASH_STRING    3
           78  +#define HASH_BINARY    4
           79  +
           80  +/*
           81  +** Access routines.  To delete, insert a NULL pointer.
           82  +*/
           83  +void HashInit(Hash*, int keytype, int copyKey);
           84  +void *HashInsert(Hash*, const void *pKey, int nKey, void *pData);
           85  +void *HashFind(const Hash*, const void *pKey, int nKey);
           86  +void HashClear(Hash*);
           87  +
           88  +/*
           89  +** Macros for looping over all elements of a hash table.  The idiom is
           90  +** like this:
           91  +**
           92  +**   Hash h;
           93  +**   HashElem *p;
           94  +**   ...
           95  +**   for(p=HashFirst(&h); p; p=HashNext(p)){
           96  +**     SomeStructure *pData = HashData(p);
           97  +**     // do something with pData
           98  +**   }
           99  +*/
          100  +#define HashFirst(H)  ((H)->first)
          101  +#define HashNext(E)   ((E)->next)
          102  +#define HashData(E)   ((E)->data)
          103  +#define HashKey(E)    ((E)->pKey)
          104  +#define HashKeysize(E) ((E)->nKey)
          105  +
          106  +/*
          107  +** Number of entries in a hash table
          108  +*/
          109  +#define HashCount(H)  ((H)->count)
          110  +
          111  +#endif /* _HASH_H_ */

Added ext/fts1/fulltext.c.

            1  +/* The author disclaims copyright to this source code.
            2  + *
            3  + * This is an SQLite module implementing full-text search.
            4  + */
            5  +
            6  +#include <assert.h>
            7  +#if !defined(__APPLE__)
            8  +#include <malloc.h>
            9  +#else
           10  +#include <stdlib.h>
           11  +#endif
           12  +#include <stdio.h>
           13  +#include <string.h>
           14  +#include <ctype.h>
           15  +
           16  +#include "fulltext.h"
           17  +#include "ft_hash.h"
           18  +#include "tokenizer.h"
           19  +#include "sqlite3.h"
           20  +#include "sqlite3ext.h"
           21  +SQLITE_EXTENSION_INIT1
           22  +
           23  +/* utility functions */
           24  +
           25  +/* We encode variable-length integers in little-endian order using seven bits
           26  + * per byte as follows:
           27  +**
           28  +** KEY:
           29  +**         A = 0xxxxxxx    7 bits of data and one flag bit
           30  +**         B = 1xxxxxxx    7 bits of data and one flag bit
           31  +**
           32  +**  7 bits - A
           33  +** 14 bits - BA
           34  +** 21 bits - BBA
           35  +** and so on.
           36  +*/
           37  +
           38  +/* We may need up to VARINT_MAX bytes to store an encoded 64-bit integer. */
           39  +#define VARINT_MAX 10
           40  +
           41  +/* Write a 64-bit variable-length integer to memory starting at p[0].
           42  + * The length of data written will be between 1 and VARINT_MAX bytes.
           43  + * The number of bytes written is returned. */
           44  +int putVarint(char *p, sqlite_int64 v){
           45  +  unsigned char *q = (unsigned char *) p;
           46  +  sqlite_uint64 vu = v;
           47  +  do{
           48  +    *q++ = (unsigned char) ((vu & 0x7f) | 0x80);
           49  +    vu >>= 7;
           50  +  }while( vu!=0 );
           51  +  q[-1] &= 0x7f;  /* turn off high bit in final byte */
           52  +  assert( q - (unsigned char *)p <= VARINT_MAX );
           53  +  return (int) (q - (unsigned char *)p);
           54  +}
           55  +
           56  +/* Read a 64-bit variable-length integer from memory starting at p[0].
           57  + * Return the number of bytes read, or 0 on error.
           58  + * The value is stored in *v. */
           59  +int getVarint(const char *p, sqlite_int64 *v){
           60  +  const unsigned char *q = (const unsigned char *) p;
           61  +  sqlite_uint64 x = 0, y = 1;
           62  +  while( (*q & 0x80) == 0x80 ){
           63  +    x += y * (*q++ & 0x7f);
           64  +    y <<= 7;
           65  +    if( q - (unsigned char *)p >= VARINT_MAX ){  /* bad data */
           66  +      assert( 0 );
           67  +      return 0;
           68  +    }
           69  +  }
           70  +  x += y * (*q++);
           71  +  *v = (sqlite_int64) x;
           72  +  return (int) (q - (unsigned char *)p);
           73  +}
           74  +
           75  +int getVarint32(const char *p, int *pi){
           76  + sqlite_int64 i;
           77  + int ret = getVarint(p, &i);
           78  + *pi = (int) i;
           79  + assert( *pi==i );
           80  + return ret;
           81  +}
           82  +
           83  +/*** Document lists ***
           84  + *
           85  + * A document list holds a sorted list of varint-encoded document IDs.
           86  + *
           87  + * A doclist with type DL_POSITIONS_OFFSETS is stored like this:
           88  + *
           89  + * array {
           90  + *   varint docid;
           91  + *   array {
           92  + *     varint position;     (delta from previous position plus 1, or 0 for end)
           93  + *     varint startOffset;  (delta from previous startOffset)
           94  + *     varint endOffset;    (delta from startOffset)
           95  + *   }
           96  + * }
           97  + *
           98  + * Here, array { X } means zero or more occurrences of X, adjacent in memory.
           99  + *
          100  + * A doclist with type DL_POSITIONS is like the above, but holds only docids
          101  + * and positions without offset information.
          102  + *
          103  + * A doclist with type DL_DOCIDS is like the above, but holds only docids
          104  + * without positions or offset information.
          105  + *
          106  + * On disk, every document list has positions and offsets, so we don't bother
          107  + * to serialize a doclist's type.
          108  + * 
          109  + * We don't yet delta-encode document IDs; doing so will probably be a
          110  + * modest win.
          111  + *
          112  + * NOTE(shess) I've thought of a slightly (1%) better offset encoding.
          113  + * After the first offset, estimate the next offset by using the
          114  + * current token position and the previous token position and offset,
          115  + * offset to handle some variance.  So the estimate would be
          116  + * (iPosition*w->iStartOffset/w->iPosition-64), which is delta-encoded
          117  + * as normal.  Offsets more than 64 chars from the estimate are
          118  + * encoded as the delta to the previous start offset + 128.  An
          119  + * additional tiny increment can be gained by using the end offset of
          120  + * the previous token to make the estimate a tiny bit more precise.
          121  +*/
          122  +
          123  +typedef enum DocListType {
          124  +  DL_DOCIDS,              /* docids only */
          125  +  DL_POSITIONS,           /* docids + positions */
          126  +  DL_POSITIONS_OFFSETS    /* docids + positions + offsets */
          127  +} DocListType;
          128  +
          129  +typedef struct DocList {
          130  +  char *pData;
          131  +  int nData;
          132  +  DocListType iType;
          133  +  int iLastPos;       /* the last position written */
          134  +  int iLastOffset;    /* the last start offset written */
          135  +} DocList;
          136  +
          137  +/* Initialize a new DocList to hold the given data. */
          138  +static void docListInit(DocList *d, DocListType iType,
          139  +                        const char *pData, int nData){
          140  +  d->nData = nData;
          141  +  if( nData>0 ){
          142  +    d->pData = malloc(nData);
          143  +    memcpy(d->pData, pData, nData);
          144  +  } else {
          145  +    d->pData = NULL;
          146  +  }
          147  +  d->iType = iType;
          148  +  d->iLastPos = 0;
          149  +  d->iLastOffset = 0;
          150  +}
          151  +
          152  +/* Create a new dynamically-allocated DocList. */
          153  +static DocList *docListNew(DocListType iType){
          154  +  DocList *d = (DocList *) malloc(sizeof(DocList));
          155  +  docListInit(d, iType, 0, 0);
          156  +  return d;
          157  +}
          158  +
          159  +static void docListDestroy(DocList *d){
          160  +  free(d->pData);
          161  +#ifndef NDEBUG
          162  +  memset(d, 0x55, sizeof(*d));
          163  +#endif
          164  +}
          165  +
          166  +static void docListDelete(DocList *d){
          167  +  docListDestroy(d);
          168  +  free(d);
          169  +}
          170  +
          171  +static char *docListEnd(DocList *d){
          172  +  return d->pData + d->nData;
          173  +}
          174  +
          175  +/* Append a varint to a DocList's data. */
          176  +static void appendVarint(DocList *d, sqlite_int64 i){
          177  +  char c[VARINT_MAX];
          178  +  int n = putVarint(c, i);
          179  +  d->pData = realloc(d->pData, d->nData + n);
          180  +  memcpy(d->pData + d->nData, c, n);
          181  +  d->nData += n;
          182  +}
          183  +
          184  +static void docListAddDocid(DocList *d, sqlite_int64 iDocid){
          185  +  appendVarint(d, iDocid);
          186  +  d->iLastPos = 0;
          187  +}
          188  +
          189  +/* Add a position to the last position list in a doclist. */
          190  +static void docListAddPos(DocList *d, int iPos){
          191  +  assert( d->iType>=DL_POSITIONS );
          192  +  appendVarint(d, iPos-d->iLastPos+1);
          193  +  d->iLastPos = iPos;
          194  +}
          195  +
          196  +static void docListAddPosOffset(DocList *d, int iPos,
          197  +                                int iStartOffset, int iEndOffset){
          198  +  assert( d->iType==DL_POSITIONS_OFFSETS );
          199  +  docListAddPos(d, iPos);
          200  +  appendVarint(d, iStartOffset-d->iLastOffset);
          201  +  d->iLastOffset = iStartOffset;
          202  +  appendVarint(d, iEndOffset-iStartOffset);
          203  +}
          204  +
          205  +/* Terminate the last position list in the given doclist. */
          206  +static void docListAddEndPos(DocList *d){
          207  +  appendVarint(d, 0);
          208  +}
          209  +
          210  +typedef struct DocListReader {
          211  +  DocList *pDoclist;
          212  +  char *p;
          213  +  int iLastPos;    /* the last position read */
          214  +} DocListReader;
          215  +
          216  +static void readerInit(DocListReader *r, DocList *pDoclist){
          217  +  r->pDoclist = pDoclist;
          218  +  if( pDoclist!=NULL ){
          219  +    r->p = pDoclist->pData;
          220  +  }
          221  +  r->iLastPos = 0;
          222  +}
          223  +
          224  +static int readerAtEnd(DocListReader *pReader){
          225  +  return pReader->p >= docListEnd(pReader->pDoclist);
          226  +}
          227  +
          228  +/* Peek at the next docid without advancing the read pointer. */
          229  +static sqlite_int64 peekDocid(DocListReader *pReader){
          230  +  sqlite_int64 ret;
          231  +  assert( !readerAtEnd(pReader) );
          232  +  getVarint(pReader->p, &ret);
          233  +  return ret;
          234  +}
          235  +
          236  +/* Read the next docid. */
          237  +static sqlite_int64 readDocid(DocListReader *pReader){
          238  +  sqlite_int64 ret;
          239  +  assert( !readerAtEnd(pReader) );
          240  +  pReader->p += getVarint(pReader->p, &ret);
          241  +  pReader->iLastPos = 0;
          242  +  return ret;
          243  +}
          244  +
          245  +/* Read the next position from a position list.
          246  + * Returns the position, or -1 at the end of the list. */
          247  +static int readPosition(DocListReader *pReader){
          248  +  int i;
          249  +  int iType = pReader->pDoclist->iType;
          250  +  assert( iType>=DL_POSITIONS );
          251  +  assert( !readerAtEnd(pReader) );
          252  +
          253  +  pReader->p += getVarint32(pReader->p, &i);
          254  +  if( i==0 ){
          255  +    pReader->iLastPos = -1;
          256  +    return -1;
          257  +  }
          258  +  pReader->iLastPos += ((int) i)-1;
          259  +  if( iType>=DL_POSITIONS_OFFSETS ){
          260  +    /* Skip over offsets, ignoring them for now. */
          261  +    int iStart, iEnd;
          262  +    pReader->p += getVarint32(pReader->p, &iStart);
          263  +    pReader->p += getVarint32(pReader->p, &iEnd);
          264  +  }
          265  +  return pReader->iLastPos;
          266  +}
          267  +
          268  +/* Skip past the end of a position list. */
          269  +static void skipPositionList(DocListReader *pReader){
          270  +  while( readPosition(pReader)!=-1 )
          271  +    ;
          272  +}
          273  +
          274  +/* Skip over a docid, including its position list if the doclist has
          275  + * positions. */
          276  +static void skipDocument(DocListReader *pReader){
          277  +  readDocid(pReader);
          278  +  if( pReader->pDoclist->iType >= DL_POSITIONS ){
          279  +    skipPositionList(pReader);
          280  +  }
          281  +}
          282  +
          283  +static sqlite_int64 firstDocid(DocList *d){
          284  +  DocListReader r;
          285  +  readerInit(&r, d);
          286  +  return readDocid(&r);
          287  +}
          288  +
          289  +/* Doclist multi-tool.  Pass pUpdate==NULL to delete the indicated docid;
          290  + * otherwise pUpdate, which must contain only the single docid [iDocid], is
          291  + * inserted (if not present) or updated (if already present). */
          292  +static int docListUpdate(DocList *d, sqlite_int64 iDocid, DocList *pUpdate){
          293  +  int modified = 0;
          294  +  DocListReader reader;
          295  +  char *p;
          296  +
          297  +  assert( d->iType==pUpdate->iType);
          298  +  if( pUpdate!=NULL ){
          299  +    assert( iDocid==firstDocid(pUpdate) );
          300  +  }
          301  +
          302  +  readerInit(&reader, d);
          303  +  while( !readerAtEnd(&reader) ){
          304  +    if( peekDocid(&reader) >= iDocid ) break;
          305  +    skipDocument(&reader);
          306  +  }
          307  +
          308  +  p = reader.p;
          309  +  /* Delete if there is a matching element. */
          310  +  if( !readerAtEnd(&reader) && iDocid==peekDocid(&reader) ){
          311  +    skipDocument(&reader);
          312  +    memmove(p, reader.p, docListEnd(d) - reader.p);
          313  +    d->nData -= (reader.p - p);
          314  +    modified = 1;
          315  +  }
          316  +
          317  +  /* Insert if indicated. */
          318  +  if( pUpdate!=NULL ){
          319  +    int iDoclist;
          320  +    int nNeed;
          321  +    docListAddEndPos(pUpdate);
          322  +    nNeed = pUpdate->nData;
          323  +
          324  +    iDoclist = p - d->pData;
          325  +    d->pData = realloc(d->pData, d->nData+nNeed);
          326  +    p = d->pData + iDoclist;
          327  +
          328  +    memmove(p+nNeed, p, docListEnd(d) - p);
          329  +    memcpy(p, pUpdate->pData, nNeed);
          330  +    p += nNeed;
          331  +    d->nData += nNeed;
          332  +    modified = 1;
          333  +  }
          334  +
          335  +  return modified;
          336  +}
          337  +
          338  +/* Split the second half of doclist d into a separate doclist d2.  Returns 1
          339  + * if successful, or 0 if d contains a single document and hence can't be
          340  + * split. */
          341  +static int docListSplit(DocList *d, DocList *d2){
          342  +  const char *pSplitPoint = d->pData + d->nData / 2;
          343  +  DocListReader reader;
          344  +
          345  +  readerInit(&reader, d);
          346  +  while( reader.p<pSplitPoint ){
          347  +    skipDocument(&reader);
          348  +  }
          349  +  if( readerAtEnd(&reader) ) return 0;
          350  +  docListInit(d2, d->iType, reader.p, docListEnd(d) - reader.p);
          351  +  d->nData = reader.p - d->pData;
          352  +  d->pData = realloc(d->pData, d->nData);
          353  +  return 1;
          354  +}
          355  +
          356  +/* A DocListMerge computes the AND of an in-memory DocList [in] and a chunked
          357  + * on-disk doclist, resulting in another in-memory DocList [out].  [in]
          358  + * and [out] may or may not store position information according to the
          359  + * caller's wishes.  The on-disk doclist always comes with positions.
          360  + *
          361  + * The caller must read each chunk of the on-disk doclist in succession and
          362  + * pass it to mergeBlock().
          363  + *
          364  + * If [in] has positions, then the merge output contains only documents with
          365  + * matching positions in the two input doclists.  If [in] does not have
          366  + * positions, then the merge output contains all documents common to the two
          367  + * input doclists.
          368  + *
          369  + * If [in] is NULL, then the on-disk doclist is copied to [out] directly.
          370  + *
          371  + * A merge is performed using an integer [iOffset] provided by the caller.
          372  + * [iOffset] is subtracted from each position in the on-disk doclist for the
          373  + * purpose of position comparison; this is helpful in implementing phrase
          374  + * searches.
          375  + *
          376  + * A DocListMerge is not yet able to propagate offsets through query
          377  + * processing; we should add that capability soon.
          378  +*/
          379  +typedef struct DocListMerge {
          380  +  DocListReader in;
          381  +  DocList *pOut;
          382  +  int iOffset;
          383  +} DocListMerge;
          384  +
          385  +static void mergeInit(DocListMerge *m,
          386  +                      DocList *pIn, int iOffset, DocList *pOut){
          387  +  readerInit(&m->in, pIn);
          388  +  m->pOut = pOut;
          389  +  m->iOffset = iOffset;
          390  +
          391  +  /* can't handle offsets yet */
          392  +  assert( pIn==NULL || pIn->iType <= DL_POSITIONS );
          393  +  assert( pOut->iType <= DL_POSITIONS );
          394  +}
          395  +
          396  +/* A helper function for mergeBlock(), below.  Merge the position lists
          397  + * pointed to by m->in and pBlockReader.
          398  + * If the merge matches, write [iDocid] to m->pOut; if m->pOut
          399  + * has positions then write all matching positions as well. */
          400  +static void mergePosList(DocListMerge *m, sqlite_int64 iDocid,
          401  +                  DocListReader *pBlockReader){
          402  +  int block_pos = readPosition(pBlockReader);
          403  +  int in_pos = readPosition(&m->in);
          404  +  int match = 0;
          405  +  while( block_pos!=-1 || in_pos!=-1 ){
          406  +    if( block_pos-m->iOffset==in_pos ){
          407  +      if( !match ){
          408  +        docListAddDocid(m->pOut, iDocid);
          409  +        match = 1;
          410  +      }
          411  +      if( m->pOut->iType >= DL_POSITIONS ){
          412  +        docListAddPos(m->pOut, in_pos);
          413  +      }
          414  +      block_pos = readPosition(pBlockReader);
          415  +      in_pos = readPosition(&m->in);
          416  +    } else if( in_pos==-1 || block_pos!=-1 && block_pos-m->iOffset<in_pos ){
          417  +      block_pos = readPosition(pBlockReader);
          418  +    } else {
          419  +      in_pos = readPosition(&m->in);
          420  +    }
          421  +  }
          422  +  if( m->pOut->iType >= DL_POSITIONS && match ){
          423  +    docListAddEndPos(m->pOut);
          424  +  }
          425  +}
          426  +
          427  +/* Merge one block of an on-disk doclist into a DocListMerge. */
          428  +static void mergeBlock(DocListMerge *m, DocList *pBlock){
          429  +  DocListReader blockReader;
          430  +  assert( pBlock->iType >= DL_POSITIONS );
          431  +  readerInit(&blockReader, pBlock);
          432  +  while( !readerAtEnd(&blockReader) ){
          433  +    sqlite_int64 iDocid = readDocid(&blockReader);
          434  +    if( m->in.pDoclist!=NULL ){
          435  +      while( 1 ){
          436  +        if( readerAtEnd(&m->in) ) return;  /* nothing more to merge */
          437  +        if( peekDocid(&m->in)>=iDocid ) break;
          438  +        skipDocument(&m->in);
          439  +      }
          440  +      if( peekDocid(&m->in)>iDocid ){  /* [pIn] has no match with iDocid */
          441  +        skipPositionList(&blockReader);  /* skip this docid in the block */
          442  +        continue;
          443  +      }
          444  +      readDocid(&m->in);
          445  +    }
          446  +    /* We have a document match. */
          447  +    if( m->in.pDoclist==NULL || m->in.pDoclist->iType < DL_POSITIONS ){
          448  +      /* We don't need to do a poslist merge. */
          449  +      docListAddDocid(m->pOut, iDocid);
          450  +      if( m->pOut->iType >= DL_POSITIONS ){
          451  +        /* Copy all positions to the output doclist. */
          452  +        while( 1 ){
          453  +          int pos = readPosition(&blockReader);
          454  +          if( pos==-1 ) break;
          455  +          docListAddPos(m->pOut, pos);
          456  +        }
          457  +        docListAddEndPos(m->pOut);
          458  +      } else skipPositionList(&blockReader);
          459  +      continue;
          460  +    }
          461  +    mergePosList(m, iDocid, &blockReader);
          462  +  }
          463  +}
          464  +
          465  +static char *string_dup_n(const char *s, int n){
          466  +  char *str = malloc(n + 1);
          467  +  memcpy(str, s, n);
          468  +  str[n] = '\0';
          469  +  return str;
          470  +}
          471  +
          472  +/* Duplicate a string; the caller must free() the returned string.
          473  + * (We don't use strdup() since it's not part of the standard C library and
          474  + * may not be available everywhere.) */
          475  +static char *string_dup(const char *s){
          476  +  return string_dup_n(s, strlen(s));
          477  +}
          478  +
          479  +/* Format a string, replacing each occurrence of the % character with
          480  + * zName.  This may be more convenient than sqlite_mprintf()
          481  + * when one string is used repeatedly in a format string.
          482  + * The caller must free() the returned string. */
          483  +static char *string_format(const char *zFormat, const char *zName){
          484  +  const char *p;
          485  +  size_t len = 0;
          486  +  size_t nName = strlen(zName);
          487  +  char *result;
          488  +  char *r;
          489  +
          490  +  /* first compute length needed */
          491  +  for(p = zFormat ; *p ; ++p){
          492  +    len += (*p=='%' ? nName : 1);
          493  +  }
          494  +  len += 1;  /* for null terminator */
          495  +
          496  +  r = result = malloc(len);
          497  +  for(p = zFormat; *p; ++p){
          498  +    if( *p=='%' ){
          499  +      memcpy(r, zName, nName);
          500  +      r += nName;
          501  +    } else {
          502  +      *r++ = *p;
          503  +    }
          504  +  }
          505  +  *r++ = '\0';
          506  +  assert( r == result + len );
          507  +  return result;
          508  +}
          509  +
          510  +static int sql_exec(sqlite3 *db, const char *zName, const char *zFormat){
          511  +  char *zCommand = string_format(zFormat, zName);
          512  +  int rc = sqlite3_exec(db, zCommand, NULL, 0, NULL);
          513  +  free(zCommand);
          514  +  return rc;
          515  +}
          516  +
          517  +static int sql_prepare(sqlite3 *db, const char *zName, sqlite3_stmt **ppStmt,
          518  +                const char *zFormat){
          519  +  char *zCommand = string_format(zFormat, zName);
          520  +  int rc = sqlite3_prepare(db, zCommand, -1, ppStmt, NULL);
          521  +  free(zCommand);
          522  +  return rc;
          523  +}
          524  +
          525  +/* end utility functions */
          526  +
          527  +#define QUERY_GENERIC 0
          528  +#define QUERY_FULLTEXT 1
          529  +
          530  +#define CHUNK_MAX 1024
          531  +
          532  +typedef enum fulltext_statement {
          533  +  CONTENT_INSERT_STMT,
          534  +  CONTENT_SELECT_STMT,
          535  +  CONTENT_DELETE_STMT,
          536  +
          537  +  TERM_SELECT_STMT,
          538  +  TERM_CHUNK_SELECT_STMT,
          539  +  TERM_INSERT_STMT,
          540  +  TERM_UPDATE_STMT,
          541  +  TERM_DELETE_STMT,
          542  +
          543  +  MAX_STMT                     /* Always at end! */
          544  +} fulltext_statement;
          545  +
          546  +/* These must exactly match the enum above. */
          547  +/* TODO(adam): Is there some risk that a statement (in particular,
          548  +** pTermSelectStmt) will be used in two cursors at once, e.g.  if a
          549  +** query joins a virtual table to itself?  If so perhaps we should
          550  +** move some of these to the cursor object.
          551  +*/
          552  +const char *fulltext_zStatement[MAX_STMT] = {
          553  +  /* CONTENT_INSERT */ "insert into %_content (rowid, content) values (?, ?)",
          554  +  /* CONTENT_SELECT */ "select content from %_content where rowid = ?",
          555  +  /* CONTENT_DELETE */ "delete from %_content where rowid = ?",
          556  +
          557  +  /* TERM_SELECT */
          558  +  "select rowid, doclist from %_term where term = ? and first = ?",
          559  +  /* TERM_CHUNK_SELECT */
          560  +  "select max(first) from %_term where term = ? and first <= ?",
          561  +  /* TERM_INSERT */
          562  +  "insert into %_term (term, first, doclist) values (?, ?, ?)",
          563  +  /* TERM_UPDATE */ "update %_term set doclist = ? where rowid = ?",
          564  +  /* TERM_DELETE */ "delete from %_term where rowid = ?",
          565  +};
          566  +
          567  +typedef struct fulltext_vtab {
          568  +  sqlite3_vtab base;
          569  +  sqlite3 *db;
          570  +  const char *zName;               /* virtual table name */
          571  +  sqlite3_tokenizer *pTokenizer;   /* tokenizer for inserts and queries */
          572  +
          573  +  /* Precompiled statements which we keep as long as the table is
          574  +  ** open.
          575  +  */
          576  +  sqlite3_stmt *pFulltextStatements[MAX_STMT];
          577  +} fulltext_vtab;
          578  +
          579  +typedef struct fulltext_cursor {
          580  +  sqlite3_vtab_cursor base;
          581  +  int iCursorType;  /* QUERY_GENERIC or QUERY_FULLTEXT */
          582  +
          583  +  sqlite3_stmt *pStmt;
          584  +
          585  +  int eof;
          586  +
          587  +  /* The following is used only when iCursorType == QUERY_FULLTEXT. */
          588  +  DocListReader result;
          589  +} fulltext_cursor;
          590  +
          591  +static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){
          592  +  return (fulltext_vtab *) c->base.pVtab;
          593  +}
          594  +
          595  +static sqlite3_module fulltextModule;   /* forward declaration */
          596  +
          597  +/* Puts a freshly-prepared statement determined by iStmt in *ppStmt.
          598  +** If the indicated statement has never been prepared, it is prepared
          599  +** and cached, otherwise the cached version is reset.
          600  +*/
          601  +static int sql_get_statement(fulltext_vtab *v, fulltext_statement iStmt,
          602  +                             sqlite3_stmt **ppStmt){
          603  +  assert( iStmt<MAX_STMT );
          604  +  if( v->pFulltextStatements[iStmt]==NULL ){
          605  +    int rc = sql_prepare(v->db, v->zName, &v->pFulltextStatements[iStmt],
          606  +                         fulltext_zStatement[iStmt]);
          607  +    if( rc!=SQLITE_OK ) return rc;
          608  +  } else {
          609  +    int rc = sqlite3_reset(v->pFulltextStatements[iStmt]);
          610  +    if( rc!=SQLITE_OK ) return rc;
          611  +  }
          612  +
          613  +  *ppStmt = v->pFulltextStatements[iStmt];
          614  +  return SQLITE_OK;
          615  +}
          616  +
          617  +/* Step the indicated statement, handling errors SQLITE_BUSY (by
          618  +** retrying) and SQLITE_SCHEMA (by re-preparing and transferring
          619  +** bindings to the new statement).
          620  +** TODO(adam): We should extend this function so that it can work with
          621  +** statements declared locally, not only globally cached statements.
          622  +*/
          623  +static int sql_step_statement(fulltext_vtab *v, fulltext_statement iStmt,
          624  +                              sqlite3_stmt **ppStmt){
          625  +  int rc;
          626  +  sqlite3_stmt *s = *ppStmt;
          627  +  assert( iStmt<MAX_STMT );
          628  +  assert( s==v->pFulltextStatements[iStmt] );
          629  +
          630  +  while( (rc=sqlite3_step(s))!=SQLITE_DONE && rc!=SQLITE_ROW ){
          631  +    sqlite3_stmt *pNewStmt;
          632  +
          633  +    if( rc==SQLITE_BUSY ) continue;
          634  +    if( rc!=SQLITE_ERROR ) return rc;
          635  +
          636  +    rc = sqlite3_reset(s);
          637  +    if( rc!=SQLITE_SCHEMA ) return SQLITE_ERROR;
          638  +
          639  +    v->pFulltextStatements[iStmt] = NULL;   /* Still in s */
          640  +    rc = sql_get_statement(v, iStmt, &pNewStmt);
          641  +    if( rc!=SQLITE_OK ) goto err;
          642  +    *ppStmt = pNewStmt;
          643  +
          644  +    rc = sqlite3_transfer_bindings(s, pNewStmt);
          645  +    if( rc!=SQLITE_OK ) goto err;
          646  +
          647  +    rc = sqlite3_finalize(s);
          648  +    if( rc!=SQLITE_OK ) return rc;
          649  +    s = pNewStmt;
          650  +  }
          651  +  return rc;
          652  +
          653  + err:
          654  +  sqlite3_finalize(s);
          655  +  return rc;
          656  +}
          657  +
          658  +/* Like sql_step_statement(), but convert SQLITE_DONE to SQLITE_OK.
          659  +** Useful for statements like UPDATE, where we expect no results.
          660  +*/
          661  +static int sql_single_step_statement(fulltext_vtab *v,
          662  +                                     fulltext_statement iStmt,
          663  +                                     sqlite3_stmt **ppStmt){
          664  +  int rc = sql_step_statement(v, iStmt, ppStmt);
          665  +  return (rc==SQLITE_DONE) ? SQLITE_OK : rc;
          666  +}
          667  +
          668  +/* insert into %_content (rowid, content) values ([rowid], [zContent]) */
          669  +static int content_insert(fulltext_vtab *v, sqlite3_value *rowid,
          670  +                          const char *zContent, int nContent){
          671  +  sqlite3_stmt *s;
          672  +  int rc = sql_get_statement(v, CONTENT_INSERT_STMT, &s);
          673  +  if( rc!=SQLITE_OK ) return rc;
          674  +
          675  +  rc = sqlite3_bind_value(s, 1, rowid);
          676  +  if( rc!=SQLITE_OK ) return rc;
          677  +
          678  +  rc = sqlite3_bind_text(s, 2, zContent, nContent, SQLITE_STATIC);
          679  +  if( rc!=SQLITE_OK ) return rc;
          680  +
          681  +  return sql_single_step_statement(v, CONTENT_INSERT_STMT, &s);
          682  +}
          683  +
          684  +/* select content from %_content where rowid = [iRow]
          685  + * The caller must delete the returned string. */
          686  +static int content_select(fulltext_vtab *v, sqlite_int64 iRow,
          687  +                          char **pzContent){
          688  +  sqlite3_stmt *s;
          689  +  int rc = sql_get_statement(v, CONTENT_SELECT_STMT, &s);
          690  +  if( rc!=SQLITE_OK ) return rc;
          691  +
          692  +  rc = sqlite3_bind_int64(s, 1, iRow);
          693  +  if( rc!=SQLITE_OK ) return rc;
          694  +
          695  +  rc = sql_step_statement(v, CONTENT_SELECT_STMT, &s);
          696  +  if( rc!=SQLITE_ROW ) return rc;
          697  +
          698  +  *pzContent = string_dup((const char *)sqlite3_column_text(s, 0));
          699  +
          700  +  /* We expect only one row.  We must execute another sqlite3_step()
          701  +   * to complete the iteration; otherwise the table will remain locked. */
          702  +  rc = sqlite3_step(s);
          703  +  if( rc==SQLITE_DONE ) return SQLITE_OK;
          704  +
          705  +  free(*pzContent);
          706  +  return rc;
          707  +}
          708  +
          709  +/* delete from %_content where rowid = [iRow ] */
          710  +static int content_delete(fulltext_vtab *v, sqlite_int64 iRow){
          711  +  sqlite3_stmt *s;
          712  +  int rc = sql_get_statement(v, CONTENT_DELETE_STMT, &s);
          713  +  if( rc!=SQLITE_OK ) return rc;
          714  +
          715  +  rc = sqlite3_bind_int64(s, 1, iRow);
          716  +  if( rc!=SQLITE_OK ) return rc;
          717  +
          718  +  return sql_single_step_statement(v, CONTENT_DELETE_STMT, &s);
          719  +}
          720  +
          721  +/* select rowid, doclist from %_term where term = [zTerm] and first = [iFirst]
          722  + * If found, returns SQLITE_OK; the caller must free the returned doclist.
          723  + * If no rows found, returns SQLITE_ERROR. */
          724  +static int term_select(fulltext_vtab *v, const char *zTerm, int nTerm,
          725  +                       sqlite_int64 iFirst,
          726  +                       sqlite_int64 *rowid,
          727  +                       DocList *out){
          728  +  sqlite3_stmt *s;
          729  +  int rc = sql_get_statement(v, TERM_SELECT_STMT, &s);
          730  +  if( rc!=SQLITE_OK ) return rc;
          731  +
          732  +  rc = sqlite3_bind_text(s, 1, zTerm, nTerm, SQLITE_TRANSIENT);
          733  +  if( rc!=SQLITE_OK ) return rc;
          734  +
          735  +  rc = sqlite3_bind_int64(s, 2, iFirst);
          736  +  if( rc!=SQLITE_OK ) return rc;
          737  +
          738  +  rc = sql_step_statement(v, TERM_SELECT_STMT, &s);
          739  +  if( rc!=SQLITE_ROW ) return rc==SQLITE_DONE ? SQLITE_ERROR : rc;
          740  +
          741  +  *rowid = sqlite3_column_int64(s, 0);
          742  +  docListInit(out, DL_POSITIONS_OFFSETS,
          743  +              sqlite3_column_blob(s, 1), sqlite3_column_bytes(s, 1));
          744  +
          745  +  /* We expect only one row.  We must execute another sqlite3_step()
          746  +   * to complete the iteration; otherwise the table will remain locked. */
          747  +  rc = sqlite3_step(s);
          748  +  return rc==SQLITE_DONE ? SQLITE_OK : rc;
          749  +}
          750  +
          751  +/* select max(first) from %_term where term = [zTerm] and first <= [iFirst]
          752  + * If found, returns SQLITE_ROW and result in *piResult; if the query returns
          753  + * NULL (meaning no row found) returns SQLITE_DONE.
          754  + */
          755  +static int term_chunk_select(fulltext_vtab *v, const char *zTerm, int nTerm,
          756  +                           sqlite_int64 iFirst, sqlite_int64 *piResult){
          757  +  sqlite3_stmt *s;
          758  +  int rc = sql_get_statement(v, TERM_CHUNK_SELECT_STMT, &s);
          759  +  if( rc!=SQLITE_OK ) return rc;
          760  +
          761  +  rc = sqlite3_bind_text(s, 1, zTerm, nTerm, SQLITE_STATIC);
          762  +  if( rc!=SQLITE_OK ) return rc;
          763  +
          764  +  rc = sqlite3_bind_int64(s, 2, iFirst);
          765  +  if( rc!=SQLITE_OK ) return rc;
          766  +
          767  +  rc = sql_step_statement(v, TERM_CHUNK_SELECT_STMT, &s);
          768  +  if( rc!=SQLITE_ROW ) return rc==SQLITE_DONE ? SQLITE_ERROR : rc;
          769  +
          770  +  switch( sqlite3_column_type(s, 0) ){
          771  +    case SQLITE_NULL:
          772  +      rc = SQLITE_DONE;
          773  +      break;
          774  +    case SQLITE_INTEGER:
          775  +     *piResult = sqlite3_column_int64(s, 0);
          776  +     break;
          777  +    default:
          778  +      return SQLITE_ERROR;
          779  +  }
          780  +  /* We expect only one row.  We must execute another sqlite3_step()
          781  +   * to complete the iteration; otherwise the table will remain locked. */
          782  +  if( sqlite3_step(s) != SQLITE_DONE ) return SQLITE_ERROR;
          783  +  return rc;
          784  +}
          785  +
          786  +/* insert into %_term (term, first, doclist)
          787  +               values ([zTerm], [iFirst], [doclist]) */
          788  +static int term_insert(fulltext_vtab *v, const char *zTerm, int nTerm,
          789  +                       sqlite_int64 iFirst, DocList *doclist){
          790  +  sqlite3_stmt *s;
          791  +  int rc = sql_get_statement(v, TERM_INSERT_STMT, &s);
          792  +  if( rc!=SQLITE_OK ) return rc;
          793  +
          794  +  rc = sqlite3_bind_text(s, 1, zTerm, nTerm, SQLITE_STATIC);
          795  +  if( rc!=SQLITE_OK ) return rc;
          796  +
          797  +  rc = sqlite3_bind_int64(s, 2, iFirst);
          798  +  if( rc!=SQLITE_OK ) return rc;
          799  +
          800  +  rc = sqlite3_bind_blob(s, 3, doclist->pData, doclist->nData, SQLITE_STATIC);
          801  +  if( rc!=SQLITE_OK ) return rc;
          802  +
          803  +  return sql_single_step_statement(v, TERM_INSERT_STMT, &s);
          804  +}
          805  +
          806  +/* update %_term set doclist = [doclist] where rowid = [rowid] */
          807  +static int term_update(fulltext_vtab *v, sqlite_int64 rowid,
          808  +                       DocList *doclist){
          809  +  sqlite3_stmt *s;
          810  +  int rc = sql_get_statement(v, TERM_UPDATE_STMT, &s);
          811  +  if( rc!=SQLITE_OK ) return rc;
          812  +
          813  +  rc = sqlite3_bind_blob(s, 1, doclist->pData, doclist->nData,
          814  +                         SQLITE_STATIC);
          815  +  if( rc!=SQLITE_OK ) return rc;
          816  +
          817  +  rc = sqlite3_bind_int64(s, 2, rowid);
          818  +  if( rc!=SQLITE_OK ) return rc;
          819  +
          820  +  return sql_single_step_statement(v, TERM_UPDATE_STMT, &s);
          821  +}
          822  +
          823  +static int term_delete(fulltext_vtab *v, sqlite_int64 rowid){
          824  +  sqlite3_stmt *s;
          825  +  int rc = sql_get_statement(v, TERM_DELETE_STMT, &s);
          826  +  if( rc!=SQLITE_OK ) return rc;
          827  +
          828  +  rc = sqlite3_bind_int64(s, 1, rowid);
          829  +  if( rc!=SQLITE_OK ) return rc;
          830  +
          831  +  return sql_single_step_statement(v, TERM_DELETE_STMT, &s);
          832  +}
          833  +
          834  +static void fulltext_vtab_destroy(fulltext_vtab *v){
          835  +  int iStmt;
          836  +
          837  +  for( iStmt=0; iStmt<MAX_STMT; iStmt++ ){
          838  +    if( v->pFulltextStatements[iStmt]!=NULL ){
          839  +      sqlite3_finalize(v->pFulltextStatements[iStmt]);
          840  +      v->pFulltextStatements[iStmt] = NULL;
          841  +    }
          842  +  }
          843  +
          844  +  if( v->pTokenizer!=NULL ){
          845  +    v->pTokenizer->pModule->xDestroy(v->pTokenizer);
          846  +    v->pTokenizer = NULL;
          847  +  }
          848  +
          849  +  free((void *) v->zName);
          850  +  free(v);
          851  +}
          852  +
          853  +/* Current interface:
          854  +** argv[0] - module name
          855  +** argv[1] - database name
          856  +** argv[2] - table name
          857  +** argv[3] - tokenizer name (optional, a sensible default is provided)
          858  +** argv[4..] - passed to tokenizer (optional based on tokenizer)
          859  +**/
          860  +static int fulltextConnect(sqlite3 *db, void *pAux, int argc, char **argv,
          861  +                           sqlite3_vtab **ppVTab){
          862  +  int rc;
          863  +  fulltext_vtab *v;
          864  +  sqlite3_tokenizer_module *m = NULL;
          865  +
          866  +  assert( argc>=3 );
          867  +  v = (fulltext_vtab *) malloc(sizeof(fulltext_vtab));
          868  +  /* sqlite will initialize v->base */
          869  +  v->db = db;
          870  +  v->zName = string_dup(argv[2]);
          871  +  v->pTokenizer = NULL;
          872  +
          873  +  if( argc==3 ){
          874  +    get_simple_tokenizer_module(&m);
          875  +  } else {
          876  +    /* TODO(shess) For now, add new tokenizers as else if clauses. */
          877  +    if( !strcmp(argv[3], "simple") ){
          878  +      get_simple_tokenizer_module(&m);
          879  +    } else {
          880  +      assert( "unrecognized tokenizer"==NULL );
          881  +    }
          882  +  }
          883  +
          884  +  /* TODO(shess) Since tokenization impacts the index, the parameters
          885  +  ** to the tokenizer need to be identical when a persistent virtual
          886  +  ** table is re-created.  One solution would be a meta-table to track
          887  +  ** such information in the database.  Then we could verify that the
          888  +  ** information is identical on subsequent creates.
          889  +  */
          890  +  /* TODO(shess) Why isn't argv already (const char **)? */
          891  +  rc = m->xCreate(argc-3, (const char **) (argv+3), &v->pTokenizer);
          892  +  if( rc!=SQLITE_OK ) return rc;
          893  +  v->pTokenizer->pModule = m;
          894  +
          895  +  /* TODO: verify the existence of backing tables foo_content, foo_term */
          896  +
          897  +  rc = sqlite3_declare_vtab(db, "create table x(content text)");
          898  +  if( rc!=SQLITE_OK ) return rc;
          899  +
          900  +  memset(v->pFulltextStatements, 0, sizeof(v->pFulltextStatements));
          901  +
          902  +  *ppVTab = &v->base;
          903  +  return SQLITE_OK;
          904  +}
          905  +
          906  +static int fulltextCreate(sqlite3 *db, void *pAux, int argc, char **argv,
          907  +                          sqlite3_vtab **ppVTab){
          908  +  int rc;
          909  +  assert( argc>=3 );
          910  +
          911  +  /* The %_content table holds the text of each full-text item, with
          912  +  ** the rowid used as the docid.
          913  +  **
          914  +  ** The %_term table maps each term to a document list blob
          915  +  ** containing elements sorted by ascending docid, each element
          916  +  ** encoded as:
          917  +  **
          918  +  **   docid varint-encoded
          919  +  **   token count varint-encoded
          920  +  **   "count" token elements (poslist):
          921  +  **     position varint-encoded as delta from previous position
          922  +  **     start offset varint-encoded as delta from previous start offset
          923  +  **     end offset varint-encoded as delta from start offset
          924  +  **
          925  +  ** Additionally, doclist blobs can be chunked into multiple rows,
          926  +  ** using "first" to order the blobs.  "first" is simply the first
          927  +  ** docid in the blob.
          928  +  */
          929  +  /*
          930  +  ** NOTE(shess) That last sentence is incorrect in the face of
          931  +  ** deletion, which can leave a doclist that doesn't contain the
          932  +  ** first from that row.  I _believe_ this does not matter to the
          933  +  ** operation of the system, but it might be reasonable to update
          934  +  ** appropriately in case this assumption becomes more important.
          935  +  */
          936  +  rc = sql_exec(db, argv[2],
          937  +    "create table %_content(content text);"
          938  +    "create table %_term(term text, first integer, doclist blob);"
          939  +    "create index %_index on %_term(term, first)");
          940  +  if( rc!=SQLITE_OK ) return rc;
          941  +
          942  +  return fulltextConnect(db, pAux, argc, argv, ppVTab);
          943  +}
          944  +
          945  +/* Decide how to handle an SQL query.
          946  + * At the moment, MATCH queries can include implicit boolean ANDs; we
          947  + * haven't implemented phrase searches or OR yet. */
          948  +static int fulltextBestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
          949  +  int i;
          950  +
          951  +  for(i=0; i<pInfo->nConstraint; ++i){
          952  +    const struct sqlite3_index_constraint *pConstraint;
          953  +    pConstraint = &pInfo->aConstraint[i];
          954  +    if( pConstraint->iColumn==0 &&
          955  +        pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH &&
          956  +        pConstraint->usable ){   /* a full-text search */
          957  +      pInfo->aConstraintUsage[i].argvIndex = 1;
          958  +      pInfo->aConstraintUsage[i].omit = 1;
          959  +      pInfo->idxNum = QUERY_FULLTEXT;
          960  +      pInfo->estimatedCost = 1.0;   /* an arbitrary value for now */
          961  +      return SQLITE_OK;
          962  +    }
          963  +  }
          964  +  pInfo->idxNum = QUERY_GENERIC;
          965  +  return SQLITE_OK;
          966  +}
          967  +
          968  +static int fulltextDisconnect(sqlite3_vtab *pVTab){
          969  +  fulltext_vtab_destroy((fulltext_vtab *)pVTab);
          970  +  return SQLITE_OK;
          971  +}
          972  +
          973  +static int fulltextDestroy(sqlite3_vtab *pVTab){
          974  +  fulltext_vtab *v = (fulltext_vtab *)pVTab;
          975  +
          976  +  int rc = sql_exec(v->db, v->zName,
          977  +                    "drop table %_content; drop table %_term");
          978  +  if( rc!=SQLITE_OK ) return rc;
          979  +
          980  +  fulltext_vtab_destroy((fulltext_vtab *)pVTab);
          981  +  return SQLITE_OK;
          982  +}
          983  +
          984  +static int fulltextOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
          985  +  fulltext_cursor *c;
          986  +
          987  +  c = (fulltext_cursor *) calloc(sizeof(fulltext_cursor), 1);
          988  +  /* sqlite will initialize c->base */
          989  +  *ppCursor = &c->base;
          990  +
          991  +  return SQLITE_OK;
          992  +}
          993  +
          994  +static int fulltextClose(sqlite3_vtab_cursor *pCursor){
          995  +  fulltext_cursor *c = (fulltext_cursor *) pCursor;
          996  +  sqlite3_finalize(c->pStmt);
          997  +  if( c->result.pDoclist!=NULL ){
          998  +    docListDelete(c->result.pDoclist);
          999  +  }
         1000  +  free(c);
         1001  +  return SQLITE_OK;
         1002  +}
         1003  +
         1004  +static int fulltextNext(sqlite3_vtab_cursor *pCursor){
         1005  +  fulltext_cursor *c = (fulltext_cursor *) pCursor;
         1006  +  sqlite_int64 iDocid;
         1007  +  int rc;
         1008  +
         1009  +  switch( c->iCursorType ){
         1010  +    case QUERY_GENERIC:
         1011  +      /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
         1012  +      rc = sqlite3_step(c->pStmt);
         1013  +      switch( rc ){
         1014  +        case SQLITE_ROW:
         1015  +          c->eof = 0;
         1016  +          return SQLITE_OK;
         1017  +        case SQLITE_DONE:
         1018  +          c->eof = 1;
         1019  +          return SQLITE_OK;
         1020  +        default:
         1021  +          c->eof = 1;
         1022  +          return rc;
         1023  +      }
         1024  +    case QUERY_FULLTEXT:
         1025  +      rc = sqlite3_reset(c->pStmt);
         1026  +      if( rc!=SQLITE_OK ) return rc;
         1027  +
         1028  +      if( readerAtEnd(&c->result)){
         1029  +        c->eof = 1;
         1030  +        return SQLITE_OK;
         1031  +      }
         1032  +      iDocid = readDocid(&c->result);
         1033  +      rc = sqlite3_bind_int64(c->pStmt, 1, iDocid);
         1034  +      if( rc!=SQLITE_OK ) return rc;
         1035  +      /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
         1036  +      rc = sqlite3_step(c->pStmt);
         1037  +      if( rc==SQLITE_ROW ){   /* the case we expect */
         1038  +        c->eof = 0;
         1039  +        return SQLITE_OK;
         1040  +      }
         1041  +      /* an error occurred; abort */
         1042  +      return rc==SQLITE_DONE ? SQLITE_ERROR : rc;
         1043  +    default:
         1044  +      assert( 0 );
         1045  +      return SQLITE_ERROR;  /* not reached */
         1046  +  }
         1047  +}
         1048  +
         1049  +static int term_select_doclist(fulltext_vtab *v, const char *pTerm, int nTerm,
         1050  +                               sqlite3_stmt **ppStmt){
         1051  +  int rc;
         1052  +  if( *ppStmt ){
         1053  +    rc = sqlite3_reset(*ppStmt);
         1054  +  } else {
         1055  +    rc = sql_prepare(v->db, v->zName, ppStmt,
         1056  +      "select doclist from %_term where term = ? order by first");
         1057  +  }
         1058  +  if( rc!=SQLITE_OK ) return rc;
         1059  +
         1060  +  rc = sqlite3_bind_text(*ppStmt, 1, pTerm, nTerm, SQLITE_TRANSIENT);
         1061  +  if( rc!=SQLITE_OK ) return rc;
         1062  +
         1063  +  return sqlite3_step(*ppStmt);   /* TODO(adamd): handle schema error */
         1064  +}
         1065  +
         1066  +/* Read the posting list for [zTerm]; AND it with the doclist [in] to
         1067  + * produce the doclist [out], using the given offset [iOffset] for phrase
         1068  + * matching.
         1069  + * (*pSelect) is used to hold an SQLite statement used inside this function;
         1070  + * the caller should initialize *pSelect to NULL before the first call.
         1071  + */
         1072  +static int query_merge(fulltext_vtab *v, sqlite3_stmt **pSelect,
         1073  +                       const char *zTerm,
         1074  +                       DocList *pIn, int iOffset, DocList *out){
         1075  +  int rc;
         1076  +  DocListMerge merge;
         1077  +
         1078  +  if( pIn!=NULL && !pIn->nData ){
         1079  +    /* If [pIn] is already empty, there's no point in reading the
         1080  +     * posting list to AND it in; return immediately. */
         1081  +      return SQLITE_OK;
         1082  +  }
         1083  +
         1084  +  rc = term_select_doclist(v, zTerm, -1, pSelect);
         1085  +  if( rc!=SQLITE_ROW && rc!=SQLITE_DONE ) return rc;
         1086  +
         1087  +  mergeInit(&merge, pIn, iOffset, out);
         1088  +  while( rc==SQLITE_ROW ){
         1089  +    DocList block;
         1090  +    docListInit(&block, DL_POSITIONS_OFFSETS,
         1091  +                sqlite3_column_blob(*pSelect, 0),
         1092  +                sqlite3_column_bytes(*pSelect, 0));
         1093  +    mergeBlock(&merge, &block);
         1094  +    docListDestroy(&block);
         1095  +
         1096  +    rc = sqlite3_step(*pSelect);
         1097  +    if( rc!=SQLITE_ROW && rc!=SQLITE_DONE ){
         1098  +      return rc;
         1099  +    }
         1100  +  }
         1101  +  
         1102  +  return SQLITE_OK;
         1103  +}
         1104  +
         1105  +typedef struct QueryTerm {
         1106  +  int is_phrase;    /* true if this term begins a new phrase */
         1107  +  const char *zTerm;
         1108  +} QueryTerm;
         1109  +
         1110  +/* A parsed query.
         1111  + *
         1112  + * As an example, parsing the query ["four score" years "new nation"] will
         1113  + * yield a Query with 5 terms:
         1114  + *   "four",   is_phrase = 1
         1115  + *   "score",  is_phrase = 0
         1116  + *   "years",  is_phrase = 1
         1117  + *   "new",    is_phrase = 1
         1118  + *   "nation", is_phrase = 0
         1119  + */
         1120  +typedef struct Query {
         1121  +  int nTerms;
         1122  +  QueryTerm *pTerm;
         1123  +} Query;
         1124  +
         1125  +void query_add(Query *q, int is_phrase, const char *zTerm){
         1126  +  QueryTerm *t;
         1127  +  ++q->nTerms;
         1128  +  q->pTerm = realloc(q->pTerm, q->nTerms * sizeof(q->pTerm[0]));
         1129  +  t = &q->pTerm[q->nTerms - 1];
         1130  +  t->is_phrase = is_phrase;
         1131  +  t->zTerm = zTerm;
         1132  +}
         1133  +    
         1134  +void query_free(Query *q){
         1135  +  int i;
         1136  +  for(i = 0; i < q->nTerms; ++i){
         1137  +    free((void *) q->pTerm[i].zTerm);
         1138  +  }
         1139  +  free(q->pTerm);
         1140  +}
         1141  +
         1142  +int tokenize_segment(sqlite3_tokenizer *pTokenizer,
         1143  +                     const char *zQuery, int in_phrase,
         1144  +                     Query *pQuery){
         1145  +  sqlite3_tokenizer_module *pModule = pTokenizer->pModule;
         1146  +  sqlite3_tokenizer_cursor *pCursor;
         1147  +  int is_first = 1;
         1148  +  
         1149  +  int rc = pModule->xOpen(pTokenizer, zQuery, -1, &pCursor);
         1150  +  if( rc!=SQLITE_OK ) return rc;
         1151  +  pCursor->pTokenizer = pTokenizer;
         1152  +
         1153  +  while( 1 ){
         1154  +    const char *zToken;
         1155  +    int nToken, iStartOffset, iEndOffset, dummy_pos;
         1156  +
         1157  +    rc = pModule->xNext(pCursor,
         1158  +                        &zToken, &nToken,
         1159  +                        &iStartOffset, &iEndOffset,
         1160  +                        &dummy_pos);
         1161  +    if( rc!=SQLITE_OK ) break;
         1162  +    query_add(pQuery, !in_phrase || is_first, string_dup_n(zToken, nToken));
         1163  +    is_first = 0;
         1164  +  }
         1165  +
         1166  +  return pModule->xClose(pCursor);
         1167  +}
         1168  +
         1169  +/* Parse a query string, yielding a Query object. */
         1170  +int parse_query(fulltext_vtab *v, const char *zQuery, Query *pQuery){
         1171  +  char *zQuery1 = string_dup(zQuery);
         1172  +  int in_phrase = 0;
         1173  +  char *s = zQuery1;
         1174  +  pQuery->nTerms = 0;
         1175  +  pQuery->pTerm = NULL;
         1176  +
         1177  +  while( *s ){
         1178  +    char *t = s;
         1179  +    while( *t ){
         1180  +      if( *t=='"' ){
         1181  +        *t++ = '\0';
         1182  +        break;
         1183  +      }
         1184  +      ++t;
         1185  +    }
         1186  +    if( *s ){
         1187  +      tokenize_segment(v->pTokenizer, s, in_phrase, pQuery);
         1188  +    }
         1189  +    s = t;
         1190  +    in_phrase = !in_phrase;
         1191  +  }
         1192  +  
         1193  +  free(zQuery1);
         1194  +  return SQLITE_OK;
         1195  +}
         1196  +
         1197  +/* Perform a full-text query; return a list of documents in [pResult]. */
         1198  +static int fulltext_query(fulltext_vtab *v, const char *zQuery,
         1199  +                          DocList **pResult){
         1200  +  Query q;
         1201  +  int phrase_start = -1;
         1202  +  int i;
         1203  +  sqlite3_stmt *pSelect = NULL;
         1204  +  DocList *d = NULL;
         1205  +
         1206  +  int rc = parse_query(v, zQuery, &q);
         1207  +  if( rc!=SQLITE_OK ) return rc;
         1208  +
         1209  +  /* Merge terms. */
         1210  +  for(i = 0 ; i < q.nTerms ; ++i){
         1211  +    /* In each merge step, we need to generate positions whenever we're
         1212  +     * processing a phrase which hasn't ended yet. */
         1213  +    int need_positions = i<q.nTerms-1 && !q.pTerm[i+1].is_phrase;
         1214  +    DocList *next = docListNew(need_positions ? DL_POSITIONS : DL_DOCIDS);
         1215  +    if( q.pTerm[i].is_phrase ){
         1216  +      phrase_start = i;
         1217  +    }
         1218  +    rc = query_merge(v, &pSelect, q.pTerm[i].zTerm, d, i - phrase_start, next);
         1219  +    if( rc!=SQLITE_OK ) break;
         1220  +    if( d!=NULL ){
         1221  +      docListDelete(d);
         1222  +    }
         1223  +    d = next;
         1224  +  }
         1225  +
         1226  +  sqlite3_finalize(pSelect);
         1227  +  query_free(&q);
         1228  +  *pResult = d;
         1229  +  return rc;
         1230  +}
         1231  +
         1232  +static int fulltextFilter(sqlite3_vtab_cursor *pCursor,
         1233  +                          int idxNum, const char *idxStr,
         1234  +                          int argc, sqlite3_value **argv){
         1235  +  fulltext_cursor *c = (fulltext_cursor *) pCursor;
         1236  +  fulltext_vtab *v = cursor_vtab(c);
         1237  +  int rc;
         1238  +  const char *zStatement;
         1239  +
         1240  +  c->iCursorType = idxNum;
         1241  +  switch( idxNum ){
         1242  +    case QUERY_GENERIC:
         1243  +      zStatement = "select rowid, content from %_content";
         1244  +      break;
         1245  +
         1246  +    case QUERY_FULLTEXT:   /* full-text search */
         1247  +    {
         1248  +      const char *zQuery = (const char *)sqlite3_value_text(argv[0]);
         1249  +      DocList *pResult;
         1250  +      assert( argc==1 );
         1251  +      rc = fulltext_query(v, zQuery, &pResult);
         1252  +      if( rc!=SQLITE_OK ) return rc;
         1253  +      readerInit(&c->result, pResult);
         1254  +      zStatement = "select rowid, content from %_content where rowid = ?";
         1255  +      break;
         1256  +    }
         1257  +
         1258  +    default:
         1259  +      assert( 0 );
         1260  +  }
         1261  +
         1262  +  rc = sql_prepare(v->db, v->zName, &c->pStmt, zStatement);
         1263  +  if( rc!=SQLITE_OK ) return rc;
         1264  +
         1265  +  return fulltextNext(pCursor);
         1266  +}
         1267  +
         1268  +static int fulltextEof(sqlite3_vtab_cursor *pCursor){
         1269  +  fulltext_cursor *c = (fulltext_cursor *) pCursor;
         1270  +  return c->eof;
         1271  +}
         1272  +
         1273  +static int fulltextColumn(sqlite3_vtab_cursor *pCursor,
         1274  +                          sqlite3_context *pContext, int idxCol){
         1275  +  fulltext_cursor *c = (fulltext_cursor *) pCursor;
         1276  +  const char *s;
         1277  +
         1278  +  assert( idxCol==0 );
         1279  +  s = (const char *) sqlite3_column_text(c->pStmt, 1);
         1280  +  sqlite3_result_text(pContext, s, -1, SQLITE_TRANSIENT);
         1281  +
         1282  +  return SQLITE_OK;
         1283  +}
         1284  +
         1285  +static int fulltextRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
         1286  +  fulltext_cursor *c = (fulltext_cursor *) pCursor;
         1287  +
         1288  +  *pRowid = sqlite3_column_int64(c->pStmt, 0);
         1289  +  return SQLITE_OK;
         1290  +}
         1291  +
         1292  +/* Build a hash table containing all terms in zText. */
         1293  +static int build_terms(Hash *terms, sqlite3_tokenizer *pTokenizer,
         1294  +                       const char *zText, sqlite_int64 iDocid){
         1295  +  sqlite3_tokenizer_cursor *pCursor;
         1296  +  const char *pToken;
         1297  +  int nTokenBytes;
         1298  +  int iStartOffset, iEndOffset, iPosition;
         1299  +
         1300  +  int rc = pTokenizer->pModule->xOpen(pTokenizer, zText, -1, &pCursor);
         1301  +  if( rc!=SQLITE_OK ) return rc;
         1302  +
         1303  +  pCursor->pTokenizer = pTokenizer;
         1304  +  HashInit(terms, HASH_STRING, 1);
         1305  +  while( SQLITE_OK==pTokenizer->pModule->xNext(pCursor,
         1306  +                                               &pToken, &nTokenBytes,
         1307  +                                               &iStartOffset, &iEndOffset,
         1308  +                                               &iPosition) ){
         1309  +    DocList *p;
         1310  +
         1311  +    /* Positions can't be negative; we use -1 as a terminator internally. */
         1312  +    if( iPosition<0 ) {
         1313  +      rc = SQLITE_ERROR;  
         1314  +      goto err;
         1315  +    }
         1316  +
         1317  +    p = HashFind(terms, pToken, nTokenBytes);
         1318  +    if( p==NULL ){
         1319  +      p = docListNew(DL_POSITIONS_OFFSETS);
         1320  +      docListAddDocid(p, iDocid);
         1321  +      HashInsert(terms, pToken, nTokenBytes, p);
         1322  +    }
         1323  +    docListAddPosOffset(p, iPosition, iStartOffset, iEndOffset);
         1324  +  }
         1325  +
         1326  +err:
         1327  +  /* TODO(shess) Check return?  Should this be able to cause errors at
         1328  +  ** this point?  Actually, same question about sqlite3_finalize(),
         1329  +  ** though one could argue that failure there means that the data is
         1330  +  ** not durable.  *ponder*
         1331  +  */
         1332  +  pTokenizer->pModule->xClose(pCursor);
         1333  +  return rc;
         1334  +}
         1335  +/* Update the %_terms table to map the term [zTerm] to the given rowid. */
         1336  +static int index_insert_term(fulltext_vtab *v, const char *zTerm, int nTerm,
         1337  +                             sqlite_int64 iDocid, DocList *p){
         1338  +  sqlite_int64 iFirst;
         1339  +  sqlite_int64 iIndexRow;
         1340  +  DocList doclist;
         1341  +  char *pBlob = NULL;
         1342  +  int nBlob = 0;
         1343  +
         1344  +  int rc = term_chunk_select(v, zTerm, nTerm, iDocid, &iFirst);
         1345  +  if( rc==SQLITE_DONE ){
         1346  +    docListInit(&doclist, DL_POSITIONS_OFFSETS, 0, 0);
         1347  +    if( docListUpdate(&doclist, iDocid, p) ){
         1348  +      rc = term_insert(v, zTerm, nTerm, iDocid, &doclist);
         1349  +      docListDestroy(&doclist);
         1350  +      return rc;
         1351  +    }
         1352  +    return SQLITE_OK;
         1353  +  }
         1354  +  if( rc!=SQLITE_ROW ) return SQLITE_ERROR;
         1355  +
         1356  +  /* This word is in the index; add this document ID to its blob. */
         1357  +
         1358  +  rc = term_select(v, zTerm, nTerm, iFirst, &iIndexRow, &doclist);
         1359  +  if( rc!=SQLITE_OK ) return rc;
         1360  +
         1361  +  if( docListUpdate(&doclist, iDocid, p) ){
         1362  +    /* If the blob is too big, split it in half. */
         1363  +    if( doclist.nData>CHUNK_MAX ){
         1364  +      DocList half;
         1365  +      if( docListSplit(&doclist, &half) ){
         1366  +        rc = term_insert(v, zTerm, nTerm, firstDocid(&half), &half);
         1367  +        docListDestroy(&half);
         1368  +        if( rc!=SQLITE_OK ) goto err;
         1369  +      }
         1370  +    }
         1371  +    rc = term_update(v, iIndexRow, &doclist);
         1372  +  }
         1373  +
         1374  +err:
         1375  +  docListDestroy(&doclist);
         1376  +  return rc;
         1377  +}
         1378  +
         1379  +/* Insert a row into the full-text index; set *piRowid to be the ID of the
         1380  + * new row. */
         1381  +static int index_insert(fulltext_vtab *v,
         1382  +                        sqlite3_value *pRequestRowid, const char *zText,
         1383  +                        sqlite_int64 *piRowid){
         1384  +  Hash terms;  /* maps term string -> PosList */
         1385  +  HashElem *e;
         1386  +
         1387  +  int rc = content_insert(v, pRequestRowid, zText, -1);
         1388  +  if( rc!=SQLITE_OK ) return rc;
         1389  +  *piRowid = sqlite3_last_insert_rowid(v->db);
         1390  +
         1391  +  if( !zText ) return SQLITE_OK;   /* nothing to index */
         1392  +
         1393  +  rc = build_terms(&terms, v->pTokenizer, zText, *piRowid);
         1394  +  if( rc!=SQLITE_OK ) return rc;
         1395  +
         1396  +  for(e=HashFirst(&terms); e; e=HashNext(e)){
         1397  +    DocList *p = HashData(e);
         1398  +    rc = index_insert_term(v, HashKey(e), HashKeysize(e), *piRowid, p);
         1399  +    if( rc!=SQLITE_OK ) break;
         1400  +  }
         1401  +
         1402  +  for(e=HashFirst(&terms); e; e=HashNext(e)){
         1403  +    DocList *p = HashData(e);
         1404  +    docListDelete(p);
         1405  +  }
         1406  +  HashClear(&terms);
         1407  +  return rc;
         1408  +}
         1409  +
         1410  +static int index_delete_term(fulltext_vtab *v, const char *zTerm, int nTerm,
         1411  +                             sqlite_int64 iDocid){
         1412  +  sqlite_int64 iFirst;
         1413  +  sqlite_int64 iIndexRow;
         1414  +  DocList doclist;
         1415  +
         1416  +  int rc = term_chunk_select(v, zTerm, nTerm, iDocid, &iFirst);
         1417  +  if( rc!=SQLITE_ROW ) return SQLITE_ERROR;
         1418  +
         1419  +  rc = term_select(v, zTerm, nTerm, iFirst, &iIndexRow, &doclist);
         1420  +  if( rc!=SQLITE_OK ) return rc;
         1421  +
         1422  +  if( docListUpdate(&doclist, iDocid, NULL) ){
         1423  +    if( doclist.nData>0 ){
         1424  +      rc = term_update(v, iIndexRow, &doclist);
         1425  +    } else {  /* empty posting list */
         1426  +      rc = term_delete(v, iIndexRow);
         1427  +    }
         1428  +  }
         1429  +  docListDestroy(&doclist);
         1430  +  return rc;
         1431  +}
         1432  +
         1433  +/* Delete a row from the full-text index. */
         1434  +static int index_delete(fulltext_vtab *v, sqlite_int64 iRow){
         1435  +  char *zText;
         1436  +  Hash terms;
         1437  +  HashElem *e;
         1438  +
         1439  +  int rc = content_select(v, iRow, &zText);
         1440  +  if( rc!=SQLITE_OK ) return rc;
         1441  +
         1442  +  rc = build_terms(&terms, v->pTokenizer, zText, iRow);
         1443  +  free(zText);
         1444  +  if( rc!=SQLITE_OK ) return rc;
         1445  +
         1446  +  for(e=HashFirst(&terms); e; e=HashNext(e)){
         1447  +    rc = index_delete_term(v, HashKey(e), HashKeysize(e), iRow);
         1448  +    if( rc!=SQLITE_OK ) break;
         1449  +  }
         1450  +  for(e=HashFirst(&terms); e; e=HashNext(e)){
         1451  +    DocList *p = HashData(e);
         1452  +    docListDelete(p);
         1453  +  }
         1454  +  HashClear(&terms);
         1455  +
         1456  +  return content_delete(v, iRow);
         1457  +}
         1458  +
         1459  +static int fulltextUpdate(sqlite3_vtab *pVtab, int nArg, sqlite3_value **ppArg,
         1460  +                   sqlite_int64 *pRowid){
         1461  +  fulltext_vtab *v = (fulltext_vtab *) pVtab;
         1462  +
         1463  +  if( nArg<2 ){
         1464  +    return index_delete(v, sqlite3_value_int64(ppArg[0]));
         1465  +  }
         1466  +
         1467  +  if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){
         1468  +    return SQLITE_ERROR;   /* an update; not yet supported */
         1469  +  }
         1470  +
         1471  +  assert( nArg==3 );    /* ppArg[1] = rowid, ppArg[2] = content */
         1472  +  return index_insert(v, ppArg[1],
         1473  +                      (const char *)sqlite3_value_text(ppArg[2]), pRowid);
         1474  +}
         1475  +
         1476  +static sqlite3_module fulltextModule = {
         1477  +  0,
         1478  +  fulltextCreate,
         1479  +  fulltextConnect,
         1480  +  fulltextBestIndex,
         1481  +  fulltextDisconnect,
         1482  +  fulltextDestroy,
         1483  +  fulltextOpen,
         1484  +  fulltextClose,
         1485  +  fulltextFilter,
         1486  +  fulltextNext,
         1487  +  fulltextEof,
         1488  +  fulltextColumn,
         1489  +  fulltextRowid,
         1490  +  fulltextUpdate
         1491  +};
         1492  +
         1493  +int fulltext_init(sqlite3 *db){
         1494  + return sqlite3_create_module(db, "fulltext", &fulltextModule, 0);
         1495  +}
         1496  +
         1497  +#if !SQLITE_CORE
         1498  +int sqlite3_extension_init(sqlite3 *db, char **pzErrMsg,
         1499  +                           const sqlite3_api_routines *pApi){
         1500  + SQLITE_EXTENSION_INIT2(pApi)
         1501  + return fulltext_init(db);
         1502  +}
         1503  +#endif

Added ext/fts1/fulltext.h.

            1  +#include "sqlite3.h"
            2  +
            3  +#ifdef __cplusplus
            4  +extern "C" {
            5  +#endif  /* __cplusplus */
            6  +
            7  +int fulltext_init(sqlite3 *db);
            8  +
            9  +#ifdef __cplusplus
           10  +}  /* extern "C" */
           11  +#endif  /* __cplusplus */

Added ext/fts1/simple_tokenizer.c.

            1  +/*
            2  +** The author disclaims copyright to this source code.
            3  +**
            4  +*************************************************************************
            5  +** Implementation of the "simple" full-text-search tokenizer.
            6  +*/
            7  +
            8  +#include <assert.h>
            9  +#if !defined(__APPLE__)
           10  +#include <malloc.h>
           11  +#else
           12  +#include <stdlib.h>
           13  +#endif
           14  +#include <stdio.h>
           15  +#include <string.h>
           16  +#include <ctype.h>
           17  +
           18  +#include "tokenizer.h"
           19  +
           20  +/* Duplicate a string; the caller must free() the returned string.
           21  + * (We don't use strdup() since it's not part of the standard C library and
           22  + * may not be available everywhere.) */
           23  +/* TODO(shess) Copied from fulltext.c, consider util.c for such
           24  +** things. */
           25  +static char *string_dup(const char *s){
           26  +  char *str = malloc(strlen(s) + 1);
           27  +  strcpy(str, s);
           28  +  return str;
           29  +}
           30  +
           31  +typedef struct simple_tokenizer {
           32  +  sqlite3_tokenizer base;
           33  +  const char *zDelim;          /* token delimiters */
           34  +} simple_tokenizer;
           35  +
           36  +typedef struct simple_tokenizer_cursor {
           37  +  sqlite3_tokenizer_cursor base;
           38  +  const char *pInput;          /* input we are tokenizing */
           39  +  int nBytes;                  /* size of the input */
           40  +  const char *pCurrent;        /* current position in pInput */
           41  +  int iToken;                  /* index of next token to be returned */
           42  +  char *zToken;                /* storage for current token */
           43  +  int nTokenBytes;             /* actual size of current token */
           44  +  int nTokenAllocated;         /* space allocated to zToken buffer */
           45  +} simple_tokenizer_cursor;
           46  +
           47  +static sqlite3_tokenizer_module simpleTokenizerModule;/* forward declaration */
           48  +
           49  +static int simpleCreate(
           50  +  int argc, const char **argv,
           51  +  sqlite3_tokenizer **ppTokenizer
           52  +){
           53  +  simple_tokenizer *t;
           54  +
           55  +  t = (simple_tokenizer *) malloc(sizeof(simple_tokenizer));
           56  +  /* TODO(shess) Delimiters need to remain the same from run to run,
           57  +  ** else we need to reindex.  One solution would be a meta-table to
           58  +  ** track such information in the database, then we'd only want this
           59  +  ** information on the initial create.
           60  +  */
           61  +  if( argc>1 ){
           62  +    t->zDelim = string_dup(argv[1]);
           63  +  } else {
           64  +    /* Build a string of non-alphanumeric ASCII characters */
           65  +    char zDelim[128];               /* nul-terminated, so nul not a member */
           66  +    int i, j;
           67  +    for(i=1, j=0; i<0x80; i++){
           68  +      if( !isalnum(i) ){
           69  +        zDelim[j++] = i;
           70  +      }
           71  +    }
           72  +    zDelim[j++] = '\0';
           73  +    assert( j<=sizeof(zDelim) );
           74  +    t->zDelim = string_dup(zDelim);
           75  +  }
           76  +
           77  +  *ppTokenizer = &t->base;
           78  +  return SQLITE_OK;
           79  +}
           80  +
           81  +static int simpleDestroy(sqlite3_tokenizer *pTokenizer){
           82  +  simple_tokenizer *t = (simple_tokenizer *) pTokenizer;
           83  +
           84  +  free((void *) t->zDelim);
           85  +  free(t);
           86  +
           87  +  return SQLITE_OK;
           88  +}
           89  +
           90  +static int simpleOpen(
           91  +  sqlite3_tokenizer *pTokenizer,
           92  +  const char *pInput, int nBytes,
           93  +  sqlite3_tokenizer_cursor **ppCursor
           94  +){
           95  +  simple_tokenizer_cursor *c;
           96  +
           97  +  c = (simple_tokenizer_cursor *) malloc(sizeof(simple_tokenizer_cursor));
           98  +  c->pInput = pInput;
           99  +  c->nBytes = nBytes<0 ? (int) strlen(pInput) : nBytes;
          100  +  c->pCurrent = c->pInput;        /* start tokenizing at the beginning */
          101  +  c->iToken = 0;
          102  +  c->zToken = NULL;               /* no space allocated, yet. */
          103  +  c->nTokenBytes = 0;
          104  +  c->nTokenAllocated = 0;
          105  +
          106  +  *ppCursor = &c->base;
          107  +  return SQLITE_OK;
          108  +}
          109  +
          110  +static int simpleClose(sqlite3_tokenizer_cursor *pCursor){
          111  +  simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
          112  +
          113  +  if( NULL!=c->zToken ){
          114  +    free(c->zToken);
          115  +  }
          116  +  free(c);
          117  +
          118  +  return SQLITE_OK;
          119  +}
          120  +
          121  +static int simpleNext(
          122  +  sqlite3_tokenizer_cursor *pCursor,
          123  +  const char **ppToken, int *pnBytes,
          124  +  int *piStartOffset, int *piEndOffset, int *piPosition
          125  +){
          126  +  simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
          127  +  simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer;
          128  +  int ii;
          129  +
          130  +  while( c->pCurrent-c->pInput<c->nBytes ){
          131  +    int n = (int) strcspn(c->pCurrent, t->zDelim);
          132  +    if( n>0 ){
          133  +      if( n+1>c->nTokenAllocated ){
          134  +        c->zToken = realloc(c->zToken, n+1);
          135  +      }
          136  +      for(ii=0; ii<n; ii++){
          137  +        c->zToken[ii] = tolower(c->pCurrent[ii]);
          138  +      }
          139  +      c->zToken[n] = '\0';
          140  +      *ppToken = c->zToken;
          141  +      *pnBytes = n;
          142  +      *piStartOffset = (int) (c->pCurrent-c->pInput);
          143  +      *piEndOffset = *piStartOffset+n;
          144  +      *piPosition = c->iToken++;
          145  +      c->pCurrent += n + 1;
          146  +
          147  +      return SQLITE_OK;
          148  +    }
          149  +    c->pCurrent += n + 1;
          150  +    /* TODO(shess) could strspn() to skip delimiters en masse.  Needs
          151  +    ** to happen in two places, though, which is annoying.
          152  +    */
          153  +  }
          154  +  return SQLITE_DONE;
          155  +}
          156  +
          157  +static sqlite3_tokenizer_module simpleTokenizerModule = {
          158  +  0,
          159  +  simpleCreate,
          160  +  simpleDestroy,
          161  +  simpleOpen,
          162  +  simpleClose,
          163  +  simpleNext,
          164  +};
          165  +
          166  +void get_simple_tokenizer_module(
          167  +  sqlite3_tokenizer_module **ppModule
          168  +){
          169  +  *ppModule = &simpleTokenizerModule;
          170  +}

Added ext/fts1/tokenizer.h.

            1  +/*
            2  +** 2006 July 10
            3  +**
            4  +** The author disclaims copyright to this source code.
            5  +**
            6  +*************************************************************************
            7  +** Defines the interface to tokenizers used by fulltext-search.  There
            8  +** are three basic components:
            9  +**
           10  +** sqlite3_tokenizer_module is a singleton defining the tokenizer
           11  +** interface functions.  This is essentially the class structure for
           12  +** tokenizers.
           13  +**
           14  +** sqlite3_tokenizer is used to define a particular tokenizer, perhaps
           15  +** including customization information defined at creation time.
           16  +**
           17  +** sqlite3_tokenizer_cursor is generated by a tokenizer to generate
           18  +** tokens from a particular input.
           19  +*/
           20  +#ifndef _TOKENIZER_H_
           21  +#define _TOKENIZER_H_
           22  +
           23  +/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time.
           24  +** If tokenizers are to be allowed to call sqlite3_*() functions, then
           25  +** we will need a way to register the API consistently.
           26  +*/
           27  +#include "sqlite3.h"
           28  +
           29  +/*
           30  +** Structures used by the tokenizer interface.
           31  +*/
           32  +typedef struct sqlite3_tokenizer sqlite3_tokenizer;
           33  +typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
           34  +typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
           35  +
           36  +struct sqlite3_tokenizer_module {
           37  +  int iVersion;                  /* currently 0 */
           38  +
           39  +  /*
           40  +  ** Create and destroy a tokenizer.  argc/argv are passed down from
           41  +  ** the fulltext virtual table creation to allow customization.
           42  +  */
           43  +  int (*xCreate)(int argc, const char **argv,
           44  +                 sqlite3_tokenizer **ppTokenizer);
           45  +  int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
           46  +
           47  +  /*
           48  +  ** Tokenize a particular input.  Call xOpen() to prepare to
           49  +  ** tokenize, xNext() repeatedly until it returns SQLITE_DONE, then
           50  +  ** xClose() to free any internal state.  The pInput passed to
           51  +  ** xOpen() must exist until the cursor is closed.  The ppToken
           52  +  ** result from xNext() is only valid until the next call to xNext()
           53  +  ** or until xClose() is called.
           54  +  */
           55  +  /* TODO(shess) current implementation requires pInput to be
           56  +  ** nul-terminated.  This should either be fixed, or pInput/nBytes
           57  +  ** should be converted to zInput.
           58  +  */
           59  +  int (*xOpen)(sqlite3_tokenizer *pTokenizer,
           60  +               const char *pInput, int nBytes,
           61  +               sqlite3_tokenizer_cursor **ppCursor);
           62  +  int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
           63  +  int (*xNext)(sqlite3_tokenizer_cursor *pCursor,
           64  +               const char **ppToken, int *pnBytes,
           65  +               int *piStartOffset, int *piEndOffset, int *piPosition);
           66  +};
           67  +
           68  +struct sqlite3_tokenizer {
           69  +  sqlite3_tokenizer_module *pModule;  /* The module for this tokenizer */
           70  +  /* Tokenizer implementations will typically add additional fields */
           71  +};
           72  +
           73  +struct sqlite3_tokenizer_cursor {
           74  +  sqlite3_tokenizer *pTokenizer;       /* Tokenizer for this cursor. */
           75  +  /* Tokenizer implementations will typically add additional fields */
           76  +};
           77  +
           78  +/*
           79  +** Get the module for a tokenizer which generates tokens based on a
           80  +** set of non-token characters.  The default is to break tokens at any
           81  +** non-alnum character, though the set of delimiters can also be
           82  +** specified by the first argv argument to xCreate().
           83  +*/
           84  +/* TODO(shess) This doesn't belong here.  Need some sort of
           85  +** registration process.
           86  +*/
           87  +void get_simple_tokenizer_module(sqlite3_tokenizer_module **ppModule);
           88  +
           89  +#endif /* _TOKENIZER_H_ */