/ Check-in [551dbe30]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Merge fts3-refactor into the main trunk.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 551dbe30eab5855756e6356f6ba050f5d1791ef8
User & Date: dan 2009-11-19 15:26:54
References
2011-09-14
11:13 Ticket [56be9768] Buffer overrun in FTS prefix queries status still Open with 3 other changes artifact: bf07d265 user: dan
Context
2009-11-19
18:28
Fix a performance regression introduced while reworking the fts3 code. check-in: 7cd178a7 user: dan tags: trunk
15:44
Remove an unreachable branch from the OP_Affinity suppression logic. check-in: 598727e6 user: drh tags: trunk
15:26
Merge fts3-refactor into the main trunk. check-in: 551dbe30 user: dan tags: trunk
15:25
Fix some fts3 related issues with the autoconf and amalgamation build systems. Closed-Leaf check-in: 3b179247 user: dan
14:48
Fix a bug introduced with recent optimizations: The unary minus operator is TK_UMINUS, not TK_MINUS. check-in: 4bd43307 user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to Makefile.in.

   307    307     $(TOP)/ext/fts2/fts2_porter.c \
   308    308     $(TOP)/ext/fts2/fts2_tokenizer.h \
   309    309     $(TOP)/ext/fts2/fts2_tokenizer.c \
   310    310     $(TOP)/ext/fts2/fts2_tokenizer1.c
   311    311   SRC += \
   312    312     $(TOP)/ext/fts3/fts3.c \
   313    313     $(TOP)/ext/fts3/fts3.h \
          314  +  $(TOP)/ext/fts3/fts3Int.h \
   314    315     $(TOP)/ext/fts3/fts3_expr.c \
   315         -  $(TOP)/ext/fts3/fts3_expr.h \
   316    316     $(TOP)/ext/fts3/fts3_hash.c \
   317    317     $(TOP)/ext/fts3/fts3_hash.h \
   318    318     $(TOP)/ext/fts3/fts3_icu.c \
   319    319     $(TOP)/ext/fts3/fts3_porter.c \
          320  +  $(TOP)/ext/fts3/fts3_snippet.c \
   320    321     $(TOP)/ext/fts3/fts3_tokenizer.h \
   321    322     $(TOP)/ext/fts3/fts3_tokenizer.c \
   322         -  $(TOP)/ext/fts3/fts3_tokenizer1.c
          323  +  $(TOP)/ext/fts3/fts3_tokenizer1.c \
          324  +  $(TOP)/ext/fts3/fts3_write.c
   323    325   SRC += \
   324    326     $(TOP)/ext/icu/sqliteicu.h \
   325    327     $(TOP)/ext/icu/icu.c
   326    328   SRC += \
   327    329     $(TOP)/ext/rtree/rtree.h \
   328    330     $(TOP)/ext/rtree/rtree.c
   329    331   
................................................................................
   424    426     $(TOP)/ext/fts1/fts1_tokenizer.h
   425    427   HDR += \
   426    428     $(TOP)/ext/fts2/fts2.h \
   427    429     $(TOP)/ext/fts2/fts2_hash.h \
   428    430     $(TOP)/ext/fts2/fts2_tokenizer.h
   429    431   HDR += \
   430    432     $(TOP)/ext/fts3/fts3.h \
   431         -  $(TOP)/ext/fts3/fts3_expr.h \
          433  +  $(TOP)/ext/fts3/fts3Int.h \
   432    434     $(TOP)/ext/fts3/fts3_hash.h \
   433    435     $(TOP)/ext/fts3/fts3_tokenizer.h
   434    436   HDR += \
   435    437     $(TOP)/ext/rtree/rtree.h
   436    438   HDR += \
   437    439     $(TOP)/ext/icu/sqliteicu.h
   438    440   

Changes to ext/fts3/fts3.c.

   274    274   
   275    275   #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
   276    276   
   277    277   #if defined(SQLITE_ENABLE_FTS3) && !defined(SQLITE_CORE)
   278    278   # define SQLITE_CORE 1
   279    279   #endif
   280    280   
          281  +#include "fts3Int.h"
          282  +
   281    283   #include <assert.h>
   282    284   #include <stdlib.h>
          285  +#include <stddef.h>
   283    286   #include <stdio.h>
   284    287   #include <string.h>
   285         -#include <ctype.h>
   286    288   
   287    289   #include "fts3.h"
   288         -#include "fts3_expr.h"
   289         -#include "fts3_hash.h"
   290         -#include "fts3_tokenizer.h"
   291    290   #ifndef SQLITE_CORE 
   292    291   # include "sqlite3ext.h"
   293    292     SQLITE_EXTENSION_INIT1
   294    293   #endif
          294  +
   295    295   
   296    296   
   297    297   /* TODO(shess) MAN, this thing needs some refactoring.  At minimum, it
   298    298   ** would be nice to order the file better, perhaps something along the
   299    299   ** lines of:
   300    300   **
   301    301   **  - utility functions
................................................................................
   309    309   
   310    310   #if 0
   311    311   # define FTSTRACE(A)  printf A; fflush(stdout)
   312    312   #else
   313    313   # define FTSTRACE(A)
   314    314   #endif
   315    315   
   316         -/* It is not safe to call isspace(), tolower(), or isalnum() on
   317         -** hi-bit-set characters.  This is the same solution used in the
   318         -** tokenizer.
   319         -*/
   320         -/* TODO(shess) The snippet-generation code should be using the
   321         -** tokenizer-generated tokens rather than doing its own local
   322         -** tokenization.
   323         -*/
   324         -/* TODO(shess) Is __isascii() a portable version of (c&0x80)==0? */
   325         -static int safe_isspace(char c){
   326         -  return (c&0x80)==0 ? isspace(c) : 0;
   327         -}
   328         -static int safe_tolower(char c){
   329         -  return (c&0x80)==0 ? tolower(c) : c;
   330         -}
   331         -static int safe_isalnum(char c){
   332         -  return (c&0x80)==0 ? isalnum(c) : 0;
   333         -}
   334         -
   335    316   typedef enum DocListType {
   336    317     DL_DOCIDS,              /* docids only */
   337    318     DL_POSITIONS,           /* docids + positions */
   338    319     DL_POSITIONS_OFFSETS    /* docids + positions + offsets */
   339    320   } DocListType;
   340    321   
   341    322   /*
................................................................................
   353    334   
   354    335   enum {
   355    336     POS_END = 0,        /* end of this position list */
   356    337     POS_COLUMN,         /* followed by new column number */
   357    338     POS_BASE
   358    339   };
   359    340   
   360         -/* MERGE_COUNT controls how often we merge segments (see comment at
   361         -** top of file).
   362         -*/
   363         -#define MERGE_COUNT 16
   364         -
   365    341   /* utility functions */
   366    342   
   367    343   /* CLEAR() and SCRAMBLE() abstract memset() on a pointer to a single
   368    344   ** record to prevent errors of the form:
   369    345   **
   370    346   ** my_function(SomeType *b){
   371    347   **   memset(b, '\0', sizeof(b));  // sizeof(b)!=sizeof(*b)
................................................................................
   376    352   
   377    353   #ifndef NDEBUG
   378    354   #  define SCRAMBLE(b) memset(b, 0x55, sizeof(*(b)))
   379    355   #else
   380    356   #  define SCRAMBLE(b)
   381    357   #endif
   382    358   
   383         -/* We may need up to VARINT_MAX bytes to store an encoded 64-bit integer. */
   384         -#define VARINT_MAX 10
   385         -
   386         -/* Write a 64-bit variable-length integer to memory starting at p[0].
   387         - * The length of data written will be between 1 and VARINT_MAX bytes.
   388         - * The number of bytes written is returned. */
   389         -static int fts3PutVarint(char *p, sqlite_int64 v){
          359  +/* 
          360  +** Write a 64-bit variable-length integer to memory starting at p[0].
          361  +** The length of data written will be between 1 and FTS3_VARINT_MAX bytes.
          362  +** The number of bytes written is returned.
          363  +*/
          364  +int sqlite3Fts3PutVarint(char *p, sqlite_int64 v){
   390    365     unsigned char *q = (unsigned char *) p;
   391    366     sqlite_uint64 vu = v;
   392    367     do{
   393    368       *q++ = (unsigned char) ((vu & 0x7f) | 0x80);
   394    369       vu >>= 7;
   395    370     }while( vu!=0 );
   396    371     q[-1] &= 0x7f;  /* turn off high bit in final byte */
   397         -  assert( q - (unsigned char *)p <= VARINT_MAX );
          372  +  assert( q - (unsigned char *)p <= FTS3_VARINT_MAX );
   398    373     return (int) (q - (unsigned char *)p);
   399    374   }
   400    375   
   401         -/* Read a 64-bit variable-length integer from memory starting at p[0].
   402         - * Return the number of bytes read, or 0 on error.
   403         - * The value is stored in *v. */
   404         -static int fts3GetVarint(const char *p, sqlite_int64 *v){
          376  +/* 
          377  +** Read a 64-bit variable-length integer from memory starting at p[0].
          378  +** Return the number of bytes read, or 0 on error.
          379  +** The value is stored in *v.
          380  +*/
          381  +int sqlite3Fts3GetVarint(const char *p, sqlite_int64 *v){
   405    382     const unsigned char *q = (const unsigned char *) p;
   406    383     sqlite_uint64 x = 0, y = 1;
   407    384     while( (*q & 0x80) == 0x80 ){
   408    385       x += y * (*q++ & 0x7f);
   409    386       y <<= 7;
   410         -    if( q - (unsigned char *)p >= VARINT_MAX ){  /* bad data */
          387  +    if( q - (unsigned char *)p >= FTS3_VARINT_MAX ){  /* bad data */
   411    388         assert( 0 );
   412    389         return 0;
   413    390       }
   414    391     }
   415    392     x += y * (*q++);
   416    393     *v = (sqlite_int64) x;
   417    394     return (int) (q - (unsigned char *)p);
   418    395   }
   419    396   
   420         -static int fts3GetVarint32(const char *p, int *pi){
          397  +/*
          398  +** Similar to sqlite3Fts3GetVarint(), except that the output is truncated to a
          399  +** 32-bit integer before it is returned.
          400  +*/
          401  +int sqlite3Fts3GetVarint32(const char *p, int *pi){
   421    402    sqlite_int64 i;
   422         - int ret = fts3GetVarint(p, &i);
          403  + int ret = sqlite3Fts3GetVarint(p, &i);
   423    404    *pi = (int) i;
   424    405    assert( *pi==i );
   425    406    return ret;
   426    407   }
   427    408   
   428         -/*******************************************************************/
   429         -/* DataBuffer is used to collect data into a buffer in piecemeal
   430         -** fashion.  It implements the usual distinction between amount of
   431         -** data currently stored (nData) and buffer capacity (nCapacity).
   432         -**
   433         -** dataBufferInit - create a buffer with given initial capacity.
   434         -** dataBufferReset - forget buffer's data, retaining capacity.
   435         -** dataBufferDestroy - free buffer's data.
   436         -** dataBufferSwap - swap contents of two buffers.
   437         -** dataBufferExpand - expand capacity without adding data.
   438         -** dataBufferAppend - append data.
   439         -** dataBufferAppend2 - append two pieces of data at once.
   440         -** dataBufferReplace - replace buffer's data.
   441         -*/
   442         -typedef struct DataBuffer {
   443         -  char *pData;          /* Pointer to malloc'ed buffer. */
   444         -  int nCapacity;        /* Size of pData buffer. */
   445         -  int nData;            /* End of data loaded into pData. */
   446         -} DataBuffer;
   447         -
   448         -static void dataBufferInit(DataBuffer *pBuffer, int nCapacity){
   449         -  assert( nCapacity>=0 );
   450         -  pBuffer->nData = 0;
   451         -  pBuffer->nCapacity = nCapacity;
   452         -  pBuffer->pData = nCapacity==0 ? NULL : sqlite3_malloc(nCapacity);
   453         -}
   454         -static void dataBufferReset(DataBuffer *pBuffer){
   455         -  pBuffer->nData = 0;
   456         -}
   457         -static void dataBufferDestroy(DataBuffer *pBuffer){
   458         -  if( pBuffer->pData!=NULL ) sqlite3_free(pBuffer->pData);
   459         -  SCRAMBLE(pBuffer);
   460         -}
   461         -static void dataBufferSwap(DataBuffer *pBuffer1, DataBuffer *pBuffer2){
   462         -  DataBuffer tmp = *pBuffer1;
   463         -  *pBuffer1 = *pBuffer2;
   464         -  *pBuffer2 = tmp;
   465         -}
   466         -static void dataBufferExpand(DataBuffer *pBuffer, int nAddCapacity){
   467         -  assert( nAddCapacity>0 );
   468         -  /* TODO(shess) Consider expanding more aggressively.  Note that the
   469         -  ** underlying malloc implementation may take care of such things for
   470         -  ** us already.
   471         -  */
   472         -  if( pBuffer->nData+nAddCapacity>pBuffer->nCapacity ){
   473         -    pBuffer->nCapacity = pBuffer->nData+nAddCapacity;
   474         -    pBuffer->pData = sqlite3_realloc(pBuffer->pData, pBuffer->nCapacity);
   475         -  }
   476         -}
   477         -static void dataBufferAppend(DataBuffer *pBuffer,
   478         -                             const char *pSource, int nSource){
   479         -  assert( nSource>0 && pSource!=NULL );
   480         -  dataBufferExpand(pBuffer, nSource);
   481         -  memcpy(pBuffer->pData+pBuffer->nData, pSource, nSource);
   482         -  pBuffer->nData += nSource;
   483         -}
   484         -static void dataBufferAppend2(DataBuffer *pBuffer,
   485         -                              const char *pSource1, int nSource1,
   486         -                              const char *pSource2, int nSource2){
   487         -  assert( nSource1>0 && pSource1!=NULL );
   488         -  assert( nSource2>0 && pSource2!=NULL );
   489         -  dataBufferExpand(pBuffer, nSource1+nSource2);
   490         -  memcpy(pBuffer->pData+pBuffer->nData, pSource1, nSource1);
   491         -  memcpy(pBuffer->pData+pBuffer->nData+nSource1, pSource2, nSource2);
   492         -  pBuffer->nData += nSource1+nSource2;
   493         -}
   494         -static void dataBufferReplace(DataBuffer *pBuffer,
   495         -                              const char *pSource, int nSource){
   496         -  dataBufferReset(pBuffer);
   497         -  dataBufferAppend(pBuffer, pSource, nSource);
   498         -}
   499         -
   500         -/* StringBuffer is a null-terminated version of DataBuffer. */
   501         -typedef struct StringBuffer {
   502         -  DataBuffer b;            /* Includes null terminator. */
   503         -} StringBuffer;
   504         -
   505         -static void initStringBuffer(StringBuffer *sb){
   506         -  dataBufferInit(&sb->b, 100);
   507         -  dataBufferReplace(&sb->b, "", 1);
   508         -}
   509         -static int stringBufferLength(StringBuffer *sb){
   510         -  return sb->b.nData-1;
   511         -}
   512         -static char *stringBufferData(StringBuffer *sb){
   513         -  return sb->b.pData;
   514         -}
   515         -static void stringBufferDestroy(StringBuffer *sb){
   516         -  dataBufferDestroy(&sb->b);
   517         -}
   518         -
   519         -static void nappend(StringBuffer *sb, const char *zFrom, int nFrom){
   520         -  assert( sb->b.nData>0 );
   521         -  if( nFrom>0 ){
   522         -    sb->b.nData--;
   523         -    dataBufferAppend2(&sb->b, zFrom, nFrom, "", 1);
   524         -  }
   525         -}
   526         -static void append(StringBuffer *sb, const char *zFrom){
   527         -  nappend(sb, zFrom, strlen(zFrom));
   528         -}
   529         -
   530         -/* Append a list of strings separated by commas. */
   531         -static void appendList(StringBuffer *sb, int nString, char **azString){
   532         -  int i;
   533         -  for(i=0; i<nString; ++i){
   534         -    if( i>0 ) append(sb, ", ");
   535         -    append(sb, azString[i]);
   536         -  }
   537         -}
   538         -
   539         -static int endsInWhiteSpace(StringBuffer *p){
   540         -  return stringBufferLength(p)>0 &&
   541         -    safe_isspace(stringBufferData(p)[stringBufferLength(p)-1]);
   542         -}
   543         -
   544         -/* If the StringBuffer ends in something other than white space, add a
   545         -** single space character to the end.
   546         -*/
   547         -static void appendWhiteSpace(StringBuffer *p){
   548         -  if( stringBufferLength(p)==0 ) return;
   549         -  if( !endsInWhiteSpace(p) ) append(p, " ");
   550         -}
   551         -
   552         -/* Remove white space from the end of the StringBuffer */
   553         -static void trimWhiteSpace(StringBuffer *p){
   554         -  while( endsInWhiteSpace(p) ){
   555         -    p->b.pData[--p->b.nData-1] = '\0';
   556         -  }
   557         -}
   558         -
   559         -/*******************************************************************/
   560         -/* DLReader is used to read document elements from a doclist.  The
   561         -** current docid is cached, so dlrDocid() is fast.  DLReader does not
   562         -** own the doclist buffer.
   563         -**
   564         -** dlrAtEnd - true if there's no more data to read.
   565         -** dlrDocid - docid of current document.
   566         -** dlrDocData - doclist data for current document (including docid).
   567         -** dlrDocDataBytes - length of same.
   568         -** dlrAllDataBytes - length of all remaining data.
   569         -** dlrPosData - position data for current document.
   570         -** dlrPosDataLen - length of pos data for current document (incl POS_END).
   571         -** dlrStep - step to current document.
   572         -** dlrInit - initial for doclist of given type against given data.
   573         -** dlrDestroy - clean up.
   574         -**
   575         -** Expected usage is something like:
   576         -**
   577         -**   DLReader reader;
   578         -**   dlrInit(&reader, pData, nData);
   579         -**   while( !dlrAtEnd(&reader) ){
   580         -**     // calls to dlrDocid() and kin.
   581         -**     dlrStep(&reader);
   582         -**   }
   583         -**   dlrDestroy(&reader);
   584         -*/
   585         -typedef struct DLReader {
   586         -  DocListType iType;
   587         -  const char *pData;
   588         -  int nData;
   589         -
   590         -  sqlite_int64 iDocid;
   591         -  int nElement;
   592         -} DLReader;
   593         -
   594         -static int dlrAtEnd(DLReader *pReader){
   595         -  assert( pReader->nData>=0 );
   596         -  return pReader->nData==0;
   597         -}
   598         -static sqlite_int64 dlrDocid(DLReader *pReader){
   599         -  assert( !dlrAtEnd(pReader) );
   600         -  return pReader->iDocid;
   601         -}
   602         -static const char *dlrDocData(DLReader *pReader){
   603         -  assert( !dlrAtEnd(pReader) );
   604         -  return pReader->pData;
   605         -}
   606         -static int dlrDocDataBytes(DLReader *pReader){
   607         -  assert( !dlrAtEnd(pReader) );
   608         -  return pReader->nElement;
   609         -}
   610         -static int dlrAllDataBytes(DLReader *pReader){
   611         -  assert( !dlrAtEnd(pReader) );
   612         -  return pReader->nData;
   613         -}
   614         -/* TODO(shess) Consider adding a field to track iDocid varint length
   615         -** to make these two functions faster.  This might matter (a tiny bit)
   616         -** for queries.
   617         -*/
   618         -static const char *dlrPosData(DLReader *pReader){
   619         -  sqlite_int64 iDummy;
   620         -  int n = fts3GetVarint(pReader->pData, &iDummy);
   621         -  assert( !dlrAtEnd(pReader) );
   622         -  return pReader->pData+n;
   623         -}
   624         -static int dlrPosDataLen(DLReader *pReader){
   625         -  sqlite_int64 iDummy;
   626         -  int n = fts3GetVarint(pReader->pData, &iDummy);
   627         -  assert( !dlrAtEnd(pReader) );
   628         -  return pReader->nElement-n;
   629         -}
   630         -static void dlrStep(DLReader *pReader){
   631         -  assert( !dlrAtEnd(pReader) );
   632         -
   633         -  /* Skip past current doclist element. */
   634         -  assert( pReader->nElement<=pReader->nData );
   635         -  pReader->pData += pReader->nElement;
   636         -  pReader->nData -= pReader->nElement;
   637         -
   638         -  /* If there is more data, read the next doclist element. */
   639         -  if( pReader->nData!=0 ){
   640         -    sqlite_int64 iDocidDelta;
   641         -    int iDummy, n = fts3GetVarint(pReader->pData, &iDocidDelta);
   642         -    pReader->iDocid += iDocidDelta;
   643         -    if( pReader->iType>=DL_POSITIONS ){
   644         -      assert( n<pReader->nData );
   645         -      while( 1 ){
   646         -        n += fts3GetVarint32(pReader->pData+n, &iDummy);
   647         -        assert( n<=pReader->nData );
   648         -        if( iDummy==POS_END ) break;
   649         -        if( iDummy==POS_COLUMN ){
   650         -          n += fts3GetVarint32(pReader->pData+n, &iDummy);
   651         -          assert( n<pReader->nData );
   652         -        }else if( pReader->iType==DL_POSITIONS_OFFSETS ){
   653         -          n += fts3GetVarint32(pReader->pData+n, &iDummy);
   654         -          n += fts3GetVarint32(pReader->pData+n, &iDummy);
   655         -          assert( n<pReader->nData );
   656         -        }
   657         -      }
   658         -    }
   659         -    pReader->nElement = n;
   660         -    assert( pReader->nElement<=pReader->nData );
   661         -  }
   662         -}
   663         -static void dlrInit(DLReader *pReader, DocListType iType,
   664         -                    const char *pData, int nData){
   665         -  assert( pData!=NULL && nData!=0 );
   666         -  pReader->iType = iType;
   667         -  pReader->pData = pData;
   668         -  pReader->nData = nData;
   669         -  pReader->nElement = 0;
   670         -  pReader->iDocid = 0;
   671         -
   672         -  /* Load the first element's data.  There must be a first element. */
   673         -  dlrStep(pReader);
   674         -}
   675         -static void dlrDestroy(DLReader *pReader){
   676         -  SCRAMBLE(pReader);
   677         -}
   678         -
   679         -#ifndef NDEBUG
   680         -/* Verify that the doclist can be validly decoded.  Also returns the
   681         -** last docid found because it is convenient in other assertions for
   682         -** DLWriter.
   683         -*/
   684         -static void docListValidate(DocListType iType, const char *pData, int nData,
   685         -                            sqlite_int64 *pLastDocid){
   686         -  sqlite_int64 iPrevDocid = 0;
   687         -  assert( nData>0 );
   688         -  assert( pData!=0 );
   689         -  assert( pData+nData>pData );
   690         -  while( nData!=0 ){
   691         -    sqlite_int64 iDocidDelta;
   692         -    int n = fts3GetVarint(pData, &iDocidDelta);
   693         -    iPrevDocid += iDocidDelta;
   694         -    if( iType>DL_DOCIDS ){
   695         -      int iDummy;
   696         -      while( 1 ){
   697         -        n += fts3GetVarint32(pData+n, &iDummy);
   698         -        if( iDummy==POS_END ) break;
   699         -        if( iDummy==POS_COLUMN ){
   700         -          n += fts3GetVarint32(pData+n, &iDummy);
   701         -        }else if( iType>DL_POSITIONS ){
   702         -          n += fts3GetVarint32(pData+n, &iDummy);
   703         -          n += fts3GetVarint32(pData+n, &iDummy);
   704         -        }
   705         -        assert( n<=nData );
   706         -      }
   707         -    }
   708         -    assert( n<=nData );
   709         -    pData += n;
   710         -    nData -= n;
   711         -  }
   712         -  if( pLastDocid ) *pLastDocid = iPrevDocid;
   713         -}
   714         -#define ASSERT_VALID_DOCLIST(i, p, n, o) docListValidate(i, p, n, o)
   715         -#else
   716         -#define ASSERT_VALID_DOCLIST(i, p, n, o) assert( 1 )
   717         -#endif
   718         -
   719         -/*******************************************************************/
   720         -/* DLWriter is used to write doclist data to a DataBuffer.  DLWriter
   721         -** always appends to the buffer and does not own it.
   722         -**
   723         -** dlwInit - initialize to write a given type doclistto a buffer.
   724         -** dlwDestroy - clear the writer's memory.  Does not free buffer.
   725         -** dlwAppend - append raw doclist data to buffer.
   726         -** dlwCopy - copy next doclist from reader to writer.
   727         -** dlwAdd - construct doclist element and append to buffer.
   728         -**    Only apply dlwAdd() to DL_DOCIDS doclists (else use PLWriter).
   729         -*/
   730         -typedef struct DLWriter {
   731         -  DocListType iType;
   732         -  DataBuffer *b;
   733         -  sqlite_int64 iPrevDocid;
   734         -#ifndef NDEBUG
   735         -  int has_iPrevDocid;
   736         -#endif
   737         -} DLWriter;
   738         -
   739         -static void dlwInit(DLWriter *pWriter, DocListType iType, DataBuffer *b){
   740         -  pWriter->b = b;
   741         -  pWriter->iType = iType;
   742         -  pWriter->iPrevDocid = 0;
   743         -#ifndef NDEBUG
   744         -  pWriter->has_iPrevDocid = 0;
   745         -#endif
   746         -}
   747         -static void dlwDestroy(DLWriter *pWriter){
   748         -  SCRAMBLE(pWriter);
   749         -}
   750         -/* iFirstDocid is the first docid in the doclist in pData.  It is
   751         -** needed because pData may point within a larger doclist, in which
   752         -** case the first item would be delta-encoded.
   753         -**
   754         -** iLastDocid is the final docid in the doclist in pData.  It is
   755         -** needed to create the new iPrevDocid for future delta-encoding.  The
   756         -** code could decode the passed doclist to recreate iLastDocid, but
   757         -** the only current user (docListMerge) already has decoded this
   758         -** information.
   759         -*/
   760         -/* TODO(shess) This has become just a helper for docListMerge.
   761         -** Consider a refactor to make this cleaner.
   762         -*/
   763         -static void dlwAppend(DLWriter *pWriter,
   764         -                      const char *pData, int nData,
   765         -                      sqlite_int64 iFirstDocid, sqlite_int64 iLastDocid){
   766         -  sqlite_int64 iDocid = 0;
   767         -  char c[VARINT_MAX];
   768         -  int nFirstOld, nFirstNew;     /* Old and new varint len of first docid. */
   769         -#ifndef NDEBUG
   770         -  sqlite_int64 iLastDocidDelta;
   771         -#endif
   772         -
   773         -  /* Recode the initial docid as delta from iPrevDocid. */
   774         -  nFirstOld = fts3GetVarint(pData, &iDocid);
   775         -  assert( nFirstOld<nData || (nFirstOld==nData && pWriter->iType==DL_DOCIDS) );
   776         -  nFirstNew = fts3PutVarint(c, iFirstDocid-pWriter->iPrevDocid);
   777         -
   778         -  /* Verify that the incoming doclist is valid AND that it ends with
   779         -  ** the expected docid.  This is essential because we'll trust this
   780         -  ** docid in future delta-encoding.
   781         -  */
   782         -  ASSERT_VALID_DOCLIST(pWriter->iType, pData, nData, &iLastDocidDelta);
   783         -  assert( iLastDocid==iFirstDocid-iDocid+iLastDocidDelta );
   784         -
   785         -  /* Append recoded initial docid and everything else.  Rest of docids
   786         -  ** should have been delta-encoded from previous initial docid.
   787         -  */
   788         -  if( nFirstOld<nData ){
   789         -    dataBufferAppend2(pWriter->b, c, nFirstNew,
   790         -                      pData+nFirstOld, nData-nFirstOld);
   791         -  }else{
   792         -    dataBufferAppend(pWriter->b, c, nFirstNew);
   793         -  }
   794         -  pWriter->iPrevDocid = iLastDocid;
   795         -}
   796         -static void dlwCopy(DLWriter *pWriter, DLReader *pReader){
   797         -  dlwAppend(pWriter, dlrDocData(pReader), dlrDocDataBytes(pReader),
   798         -            dlrDocid(pReader), dlrDocid(pReader));
   799         -}
   800         -static void dlwAdd(DLWriter *pWriter, sqlite_int64 iDocid){
   801         -  char c[VARINT_MAX];
   802         -  int n = fts3PutVarint(c, iDocid-pWriter->iPrevDocid);
   803         -
   804         -  /* Docids must ascend. */
   805         -  assert( !pWriter->has_iPrevDocid || iDocid>pWriter->iPrevDocid );
   806         -  assert( pWriter->iType==DL_DOCIDS );
   807         -
   808         -  dataBufferAppend(pWriter->b, c, n);
   809         -  pWriter->iPrevDocid = iDocid;
   810         -#ifndef NDEBUG
   811         -  pWriter->has_iPrevDocid = 1;
   812         -#endif
   813         -}
   814         -
   815         -/*******************************************************************/
   816         -/* PLReader is used to read data from a document's position list.  As
   817         -** the caller steps through the list, data is cached so that varints
   818         -** only need to be decoded once.
   819         -**
   820         -** plrInit, plrDestroy - create/destroy a reader.
   821         -** plrColumn, plrPosition, plrStartOffset, plrEndOffset - accessors
   822         -** plrAtEnd - at end of stream, only call plrDestroy once true.
   823         -** plrStep - step to the next element.
   824         -*/
   825         -typedef struct PLReader {
   826         -  /* These refer to the next position's data.  nData will reach 0 when
   827         -  ** reading the last position, so plrStep() signals EOF by setting
   828         -  ** pData to NULL.
   829         -  */
   830         -  const char *pData;
   831         -  int nData;
   832         -
   833         -  DocListType iType;
   834         -  int iColumn;         /* the last column read */
   835         -  int iPosition;       /* the last position read */
   836         -  int iStartOffset;    /* the last start offset read */
   837         -  int iEndOffset;      /* the last end offset read */
   838         -} PLReader;
   839         -
   840         -static int plrAtEnd(PLReader *pReader){
   841         -  return pReader->pData==NULL;
   842         -}
   843         -static int plrColumn(PLReader *pReader){
   844         -  assert( !plrAtEnd(pReader) );
   845         -  return pReader->iColumn;
   846         -}
   847         -static int plrPosition(PLReader *pReader){
   848         -  assert( !plrAtEnd(pReader) );
   849         -  return pReader->iPosition;
   850         -}
   851         -static int plrStartOffset(PLReader *pReader){
   852         -  assert( !plrAtEnd(pReader) );
   853         -  return pReader->iStartOffset;
   854         -}
   855         -static int plrEndOffset(PLReader *pReader){
   856         -  assert( !plrAtEnd(pReader) );
   857         -  return pReader->iEndOffset;
   858         -}
   859         -static void plrStep(PLReader *pReader){
   860         -  int i, n;
   861         -
   862         -  assert( !plrAtEnd(pReader) );
   863         -
   864         -  if( pReader->nData==0 ){
   865         -    pReader->pData = NULL;
   866         -    return;
   867         -  }
   868         -
   869         -  n = fts3GetVarint32(pReader->pData, &i);
   870         -  if( i==POS_COLUMN ){
   871         -    n += fts3GetVarint32(pReader->pData+n, &pReader->iColumn);
   872         -    pReader->iPosition = 0;
   873         -    pReader->iStartOffset = 0;
   874         -    n += fts3GetVarint32(pReader->pData+n, &i);
   875         -  }
   876         -  /* Should never see adjacent column changes. */
   877         -  assert( i!=POS_COLUMN );
   878         -
   879         -  if( i==POS_END ){
   880         -    pReader->nData = 0;
   881         -    pReader->pData = NULL;
   882         -    return;
   883         -  }
   884         -
   885         -  pReader->iPosition += i-POS_BASE;
   886         -  if( pReader->iType==DL_POSITIONS_OFFSETS ){
   887         -    n += fts3GetVarint32(pReader->pData+n, &i);
   888         -    pReader->iStartOffset += i;
   889         -    n += fts3GetVarint32(pReader->pData+n, &i);
   890         -    pReader->iEndOffset = pReader->iStartOffset+i;
   891         -  }
   892         -  assert( n<=pReader->nData );
   893         -  pReader->pData += n;
   894         -  pReader->nData -= n;
   895         -}
   896         -
   897         -static void plrInit(PLReader *pReader, DLReader *pDLReader){
   898         -  pReader->pData = dlrPosData(pDLReader);
   899         -  pReader->nData = dlrPosDataLen(pDLReader);
   900         -  pReader->iType = pDLReader->iType;
   901         -  pReader->iColumn = 0;
   902         -  pReader->iPosition = 0;
   903         -  pReader->iStartOffset = 0;
   904         -  pReader->iEndOffset = 0;
   905         -  plrStep(pReader);
   906         -}
   907         -static void plrDestroy(PLReader *pReader){
   908         -  SCRAMBLE(pReader);
   909         -}
   910         -
   911         -/*******************************************************************/
   912         -/* PLWriter is used in constructing a document's position list.  As a
   913         -** convenience, if iType is DL_DOCIDS, PLWriter becomes a no-op.
   914         -** PLWriter writes to the associated DLWriter's buffer.
   915         -**
   916         -** plwInit - init for writing a document's poslist.
   917         -** plwDestroy - clear a writer.
   918         -** plwAdd - append position and offset information.
   919         -** plwCopy - copy next position's data from reader to writer.
   920         -** plwTerminate - add any necessary doclist terminator.
   921         -**
   922         -** Calling plwAdd() after plwTerminate() may result in a corrupt
   923         -** doclist.
   924         -*/
   925         -/* TODO(shess) Until we've written the second item, we can cache the
   926         -** first item's information.  Then we'd have three states:
   927         -**
   928         -** - initialized with docid, no positions.
   929         -** - docid and one position.
   930         -** - docid and multiple positions.
   931         -**
   932         -** Only the last state needs to actually write to dlw->b, which would
   933         -** be an improvement in the DLCollector case.
   934         -*/
   935         -typedef struct PLWriter {
   936         -  DLWriter *dlw;
   937         -
   938         -  int iColumn;    /* the last column written */
   939         -  int iPos;       /* the last position written */
   940         -  int iOffset;    /* the last start offset written */
   941         -} PLWriter;
   942         -
   943         -/* TODO(shess) In the case where the parent is reading these values
   944         -** from a PLReader, we could optimize to a copy if that PLReader has
   945         -** the same type as pWriter.
   946         -*/
   947         -static void plwAdd(PLWriter *pWriter, int iColumn, int iPos,
   948         -                   int iStartOffset, int iEndOffset){
   949         -  /* Worst-case space for POS_COLUMN, iColumn, iPosDelta,
   950         -  ** iStartOffsetDelta, and iEndOffsetDelta.
   951         -  */
   952         -  char c[5*VARINT_MAX];
   953         -  int n = 0;
   954         -
   955         -  /* Ban plwAdd() after plwTerminate(). */
   956         -  assert( pWriter->iPos!=-1 );
   957         -
   958         -  if( pWriter->dlw->iType==DL_DOCIDS ) return;
   959         -
   960         -  if( iColumn!=pWriter->iColumn ){
   961         -    n += fts3PutVarint(c+n, POS_COLUMN);
   962         -    n += fts3PutVarint(c+n, iColumn);
   963         -    pWriter->iColumn = iColumn;
   964         -    pWriter->iPos = 0;
   965         -    pWriter->iOffset = 0;
   966         -  }
   967         -  assert( iPos>=pWriter->iPos );
   968         -  n += fts3PutVarint(c+n, POS_BASE+(iPos-pWriter->iPos));
   969         -  pWriter->iPos = iPos;
   970         -  if( pWriter->dlw->iType==DL_POSITIONS_OFFSETS ){
   971         -    assert( iStartOffset>=pWriter->iOffset );
   972         -    n += fts3PutVarint(c+n, iStartOffset-pWriter->iOffset);
   973         -    pWriter->iOffset = iStartOffset;
   974         -    assert( iEndOffset>=iStartOffset );
   975         -    n += fts3PutVarint(c+n, iEndOffset-iStartOffset);
   976         -  }
   977         -  dataBufferAppend(pWriter->dlw->b, c, n);
   978         -}
   979         -static void plwCopy(PLWriter *pWriter, PLReader *pReader){
   980         -  plwAdd(pWriter, plrColumn(pReader), plrPosition(pReader),
   981         -         plrStartOffset(pReader), plrEndOffset(pReader));
   982         -}
   983         -static void plwInit(PLWriter *pWriter, DLWriter *dlw, sqlite_int64 iDocid){
   984         -  char c[VARINT_MAX];
   985         -  int n;
   986         -
   987         -  pWriter->dlw = dlw;
   988         -
   989         -  /* Docids must ascend. */
   990         -  assert( !pWriter->dlw->has_iPrevDocid || iDocid>pWriter->dlw->iPrevDocid );
   991         -  n = fts3PutVarint(c, iDocid-pWriter->dlw->iPrevDocid);
   992         -  dataBufferAppend(pWriter->dlw->b, c, n);
   993         -  pWriter->dlw->iPrevDocid = iDocid;
   994         -#ifndef NDEBUG
   995         -  pWriter->dlw->has_iPrevDocid = 1;
   996         -#endif
   997         -
   998         -  pWriter->iColumn = 0;
   999         -  pWriter->iPos = 0;
  1000         -  pWriter->iOffset = 0;
  1001         -}
  1002         -/* TODO(shess) Should plwDestroy() also terminate the doclist?  But
  1003         -** then plwDestroy() would no longer be just a destructor, it would
  1004         -** also be doing work, which isn't consistent with the overall idiom.
  1005         -** Another option would be for plwAdd() to always append any necessary
  1006         -** terminator, so that the output is always correct.  But that would
  1007         -** add incremental work to the common case with the only benefit being
  1008         -** API elegance.  Punt for now.
  1009         -*/
  1010         -static void plwTerminate(PLWriter *pWriter){
  1011         -  if( pWriter->dlw->iType>DL_DOCIDS ){
  1012         -    char c[VARINT_MAX];
  1013         -    int n = fts3PutVarint(c, POS_END);
  1014         -    dataBufferAppend(pWriter->dlw->b, c, n);
  1015         -  }
  1016         -#ifndef NDEBUG
  1017         -  /* Mark as terminated for assert in plwAdd(). */
  1018         -  pWriter->iPos = -1;
  1019         -#endif
  1020         -}
  1021         -static void plwDestroy(PLWriter *pWriter){
  1022         -  SCRAMBLE(pWriter);
  1023         -}
  1024         -
  1025         -/*******************************************************************/
  1026         -/* DLCollector wraps PLWriter and DLWriter to provide a
  1027         -** dynamically-allocated doclist area to use during tokenization.
  1028         -**
  1029         -** dlcNew - malloc up and initialize a collector.
  1030         -** dlcDelete - destroy a collector and all contained items.
  1031         -** dlcAddPos - append position and offset information.
  1032         -** dlcAddDoclist - add the collected doclist to the given buffer.
  1033         -** dlcNext - terminate the current document and open another.
  1034         -*/
  1035         -typedef struct DLCollector {
  1036         -  DataBuffer b;
  1037         -  DLWriter dlw;
  1038         -  PLWriter plw;
  1039         -} DLCollector;
  1040         -
  1041         -/* TODO(shess) This could also be done by calling plwTerminate() and
  1042         -** dataBufferAppend().  I tried that, expecting nominal performance
  1043         -** differences, but it seemed to pretty reliably be worth 1% to code
  1044         -** it this way.  I suspect it is the incremental malloc overhead (some
  1045         -** percentage of the plwTerminate() calls will cause a realloc), so
  1046         -** this might be worth revisiting if the DataBuffer implementation
  1047         -** changes.
  1048         -*/
  1049         -static void dlcAddDoclist(DLCollector *pCollector, DataBuffer *b){
  1050         -  if( pCollector->dlw.iType>DL_DOCIDS ){
  1051         -    char c[VARINT_MAX];
  1052         -    int n = fts3PutVarint(c, POS_END);
  1053         -    dataBufferAppend2(b, pCollector->b.pData, pCollector->b.nData, c, n);
  1054         -  }else{
  1055         -    dataBufferAppend(b, pCollector->b.pData, pCollector->b.nData);
  1056         -  }
  1057         -}
  1058         -static void dlcNext(DLCollector *pCollector, sqlite_int64 iDocid){
  1059         -  plwTerminate(&pCollector->plw);
  1060         -  plwDestroy(&pCollector->plw);
  1061         -  plwInit(&pCollector->plw, &pCollector->dlw, iDocid);
  1062         -}
  1063         -static void dlcAddPos(DLCollector *pCollector, int iColumn, int iPos,
  1064         -                      int iStartOffset, int iEndOffset){
  1065         -  plwAdd(&pCollector->plw, iColumn, iPos, iStartOffset, iEndOffset);
  1066         -}
  1067         -
  1068         -static DLCollector *dlcNew(sqlite_int64 iDocid, DocListType iType){
  1069         -  DLCollector *pCollector = sqlite3_malloc(sizeof(DLCollector));
  1070         -  dataBufferInit(&pCollector->b, 0);
  1071         -  dlwInit(&pCollector->dlw, iType, &pCollector->b);
  1072         -  plwInit(&pCollector->plw, &pCollector->dlw, iDocid);
  1073         -  return pCollector;
  1074         -}
  1075         -static void dlcDelete(DLCollector *pCollector){
  1076         -  plwDestroy(&pCollector->plw);
  1077         -  dlwDestroy(&pCollector->dlw);
  1078         -  dataBufferDestroy(&pCollector->b);
  1079         -  SCRAMBLE(pCollector);
  1080         -  sqlite3_free(pCollector);
  1081         -}
  1082         -
  1083         -
  1084         -/* Copy the doclist data of iType in pData/nData into *out, trimming
  1085         -** unnecessary data as we go.  Only columns matching iColumn are
  1086         -** copied, all columns copied if iColumn is -1.  Elements with no
  1087         -** matching columns are dropped.  The output is an iOutType doclist.
  1088         -*/
  1089         -/* NOTE(shess) This code is only valid after all doclists are merged.
  1090         -** If this is run before merges, then doclist items which represent
  1091         -** deletion will be trimmed, and will thus not effect a deletion
  1092         -** during the merge.
  1093         -*/
  1094         -static void docListTrim(DocListType iType, const char *pData, int nData,
  1095         -                        int iColumn, DocListType iOutType, DataBuffer *out){
  1096         -  DLReader dlReader;
  1097         -  DLWriter dlWriter;
  1098         -
  1099         -  assert( iOutType<=iType );
  1100         -
  1101         -  dlrInit(&dlReader, iType, pData, nData);
  1102         -  dlwInit(&dlWriter, iOutType, out);
  1103         -
  1104         -  while( !dlrAtEnd(&dlReader) ){
  1105         -    PLReader plReader;
  1106         -    PLWriter plWriter;
  1107         -    int match = 0;
  1108         -
  1109         -    plrInit(&plReader, &dlReader);
  1110         -
  1111         -    while( !plrAtEnd(&plReader) ){
  1112         -      if( iColumn==-1 || plrColumn(&plReader)==iColumn ){
  1113         -        if( !match ){
  1114         -          plwInit(&plWriter, &dlWriter, dlrDocid(&dlReader));
  1115         -          match = 1;
  1116         -        }
  1117         -        plwAdd(&plWriter, plrColumn(&plReader), plrPosition(&plReader),
  1118         -               plrStartOffset(&plReader), plrEndOffset(&plReader));
  1119         -      }
  1120         -      plrStep(&plReader);
  1121         -    }
  1122         -    if( match ){
  1123         -      plwTerminate(&plWriter);
  1124         -      plwDestroy(&plWriter);
  1125         -    }
  1126         -
  1127         -    plrDestroy(&plReader);
  1128         -    dlrStep(&dlReader);
  1129         -  }
  1130         -  dlwDestroy(&dlWriter);
  1131         -  dlrDestroy(&dlReader);
  1132         -}
  1133         -
  1134         -/* Used by docListMerge() to keep doclists in the ascending order by
  1135         -** docid, then ascending order by age (so the newest comes first).
  1136         -*/
  1137         -typedef struct OrderedDLReader {
  1138         -  DLReader *pReader;
  1139         -
  1140         -  /* TODO(shess) If we assume that docListMerge pReaders is ordered by
  1141         -  ** age (which we do), then we could use pReader comparisons to break
  1142         -  ** ties.
  1143         -  */
  1144         -  int idx;
  1145         -} OrderedDLReader;
  1146         -
  1147         -/* Order eof to end, then by docid asc, idx desc. */
  1148         -static int orderedDLReaderCmp(OrderedDLReader *r1, OrderedDLReader *r2){
  1149         -  if( dlrAtEnd(r1->pReader) ){
  1150         -    if( dlrAtEnd(r2->pReader) ) return 0;  /* Both atEnd(). */
  1151         -    return 1;                              /* Only r1 atEnd(). */
  1152         -  }
  1153         -  if( dlrAtEnd(r2->pReader) ) return -1;   /* Only r2 atEnd(). */
  1154         -
  1155         -  if( dlrDocid(r1->pReader)<dlrDocid(r2->pReader) ) return -1;
  1156         -  if( dlrDocid(r1->pReader)>dlrDocid(r2->pReader) ) return 1;
  1157         -
  1158         -  /* Descending on idx. */
  1159         -  return r2->idx-r1->idx;
  1160         -}
  1161         -
  1162         -/* Bubble p[0] to appropriate place in p[1..n-1].  Assumes that
  1163         -** p[1..n-1] is already sorted.
  1164         -*/
  1165         -/* TODO(shess) Is this frequent enough to warrant a binary search?
  1166         -** Before implementing that, instrument the code to check.  In most
  1167         -** current usage, I expect that p[0] will be less than p[1] a very
  1168         -** high proportion of the time.
  1169         -*/
  1170         -static void orderedDLReaderReorder(OrderedDLReader *p, int n){
  1171         -  while( n>1 && orderedDLReaderCmp(p, p+1)>0 ){
  1172         -    OrderedDLReader tmp = p[0];
  1173         -    p[0] = p[1];
  1174         -    p[1] = tmp;
  1175         -    n--;
  1176         -    p++;
  1177         -  }
  1178         -}
  1179         -
  1180         -/* Given an array of doclist readers, merge their doclist elements
  1181         -** into out in sorted order (by docid), dropping elements from older
  1182         -** readers when there is a duplicate docid.  pReaders is assumed to be
  1183         -** ordered by age, oldest first.
  1184         -*/
  1185         -/* TODO(shess) nReaders must be <= MERGE_COUNT.  This should probably
  1186         -** be fixed.
  1187         -*/
  1188         -static void docListMerge(DataBuffer *out,
  1189         -                         DLReader *pReaders, int nReaders){
  1190         -  OrderedDLReader readers[MERGE_COUNT];
  1191         -  DLWriter writer;
  1192         -  int i, n;
  1193         -  const char *pStart = 0;
  1194         -  int nStart = 0;
  1195         -  sqlite_int64 iFirstDocid = 0, iLastDocid = 0;
  1196         -
  1197         -  assert( nReaders>0 );
  1198         -  if( nReaders==1 ){
  1199         -    dataBufferAppend(out, dlrDocData(pReaders), dlrAllDataBytes(pReaders));
  1200         -    return;
  1201         -  }
  1202         -
  1203         -  assert( nReaders<=MERGE_COUNT );
  1204         -  n = 0;
  1205         -  for(i=0; i<nReaders; i++){
  1206         -    assert( pReaders[i].iType==pReaders[0].iType );
  1207         -    readers[i].pReader = pReaders+i;
  1208         -    readers[i].idx = i;
  1209         -    n += dlrAllDataBytes(&pReaders[i]);
  1210         -  }
  1211         -  /* Conservatively size output to sum of inputs.  Output should end
  1212         -  ** up strictly smaller than input.
  1213         -  */
  1214         -  dataBufferExpand(out, n);
  1215         -
  1216         -  /* Get the readers into sorted order. */
  1217         -  while( i-->0 ){
  1218         -    orderedDLReaderReorder(readers+i, nReaders-i);
  1219         -  }
  1220         -
  1221         -  dlwInit(&writer, pReaders[0].iType, out);
  1222         -  while( !dlrAtEnd(readers[0].pReader) ){
  1223         -    sqlite_int64 iDocid = dlrDocid(readers[0].pReader);
  1224         -
  1225         -    /* If this is a continuation of the current buffer to copy, extend
  1226         -    ** that buffer.  memcpy() seems to be more efficient if it has a
  1227         -    ** lots of data to copy.
  1228         -    */
  1229         -    if( dlrDocData(readers[0].pReader)==pStart+nStart ){
  1230         -      nStart += dlrDocDataBytes(readers[0].pReader);
  1231         -    }else{
  1232         -      if( pStart!=0 ){
  1233         -        dlwAppend(&writer, pStart, nStart, iFirstDocid, iLastDocid);
  1234         -      }
  1235         -      pStart = dlrDocData(readers[0].pReader);
  1236         -      nStart = dlrDocDataBytes(readers[0].pReader);
  1237         -      iFirstDocid = iDocid;
  1238         -    }
  1239         -    iLastDocid = iDocid;
  1240         -    dlrStep(readers[0].pReader);
  1241         -
  1242         -    /* Drop all of the older elements with the same docid. */
  1243         -    for(i=1; i<nReaders &&
  1244         -             !dlrAtEnd(readers[i].pReader) &&
  1245         -             dlrDocid(readers[i].pReader)==iDocid; i++){
  1246         -      dlrStep(readers[i].pReader);
  1247         -    }
  1248         -
  1249         -    /* Get the readers back into order. */
  1250         -    while( i-->0 ){
  1251         -      orderedDLReaderReorder(readers+i, nReaders-i);
  1252         -    }
  1253         -  }
  1254         -
  1255         -  /* Copy over any remaining elements. */
  1256         -  if( nStart>0 ) dlwAppend(&writer, pStart, nStart, iFirstDocid, iLastDocid);
  1257         -  dlwDestroy(&writer);
  1258         -}
  1259         -
  1260         -/* Helper function for posListUnion().  Compares the current position
  1261         -** between left and right, returning as standard C idiom of <0 if
  1262         -** left<right, >0 if left>right, and 0 if left==right.  "End" always
  1263         -** compares greater.
  1264         -*/
  1265         -static int posListCmp(PLReader *pLeft, PLReader *pRight){
  1266         -  assert( pLeft->iType==pRight->iType );
  1267         -  if( pLeft->iType==DL_DOCIDS ) return 0;
  1268         -
  1269         -  if( plrAtEnd(pLeft) ) return plrAtEnd(pRight) ? 0 : 1;
  1270         -  if( plrAtEnd(pRight) ) return -1;
  1271         -
  1272         -  if( plrColumn(pLeft)<plrColumn(pRight) ) return -1;
  1273         -  if( plrColumn(pLeft)>plrColumn(pRight) ) return 1;
  1274         -
  1275         -  if( plrPosition(pLeft)<plrPosition(pRight) ) return -1;
  1276         -  if( plrPosition(pLeft)>plrPosition(pRight) ) return 1;
  1277         -  if( pLeft->iType==DL_POSITIONS ) return 0;
  1278         -
  1279         -  if( plrStartOffset(pLeft)<plrStartOffset(pRight) ) return -1;
  1280         -  if( plrStartOffset(pLeft)>plrStartOffset(pRight) ) return 1;
  1281         -
  1282         -  if( plrEndOffset(pLeft)<plrEndOffset(pRight) ) return -1;
  1283         -  if( plrEndOffset(pLeft)>plrEndOffset(pRight) ) return 1;
  1284         -
  1285         -  return 0;
  1286         -}
  1287         -
  1288         -/* Write the union of position lists in pLeft and pRight to pOut.
  1289         -** "Union" in this case meaning "All unique position tuples".  Should
  1290         -** work with any doclist type, though both inputs and the output
  1291         -** should be the same type.
  1292         -*/
  1293         -static void posListUnion(DLReader *pLeft, DLReader *pRight, DLWriter *pOut){
  1294         -  PLReader left, right;
  1295         -  PLWriter writer;
  1296         -
  1297         -  assert( dlrDocid(pLeft)==dlrDocid(pRight) );
  1298         -  assert( pLeft->iType==pRight->iType );
  1299         -  assert( pLeft->iType==pOut->iType );
  1300         -
  1301         -  plrInit(&left, pLeft);
  1302         -  plrInit(&right, pRight);
  1303         -  plwInit(&writer, pOut, dlrDocid(pLeft));
  1304         -
  1305         -  while( !plrAtEnd(&left) || !plrAtEnd(&right) ){
  1306         -    int c = posListCmp(&left, &right);
  1307         -    if( c<0 ){
  1308         -      plwCopy(&writer, &left);
  1309         -      plrStep(&left);
  1310         -    }else if( c>0 ){
  1311         -      plwCopy(&writer, &right);
  1312         -      plrStep(&right);
  1313         -    }else{
  1314         -      plwCopy(&writer, &left);
  1315         -      plrStep(&left);
  1316         -      plrStep(&right);
  1317         -    }
  1318         -  }
  1319         -
  1320         -  plwTerminate(&writer);
  1321         -  plwDestroy(&writer);
  1322         -  plrDestroy(&left);
  1323         -  plrDestroy(&right);
  1324         -}
  1325         -
  1326         -/* Write the union of doclists in pLeft and pRight to pOut.  For
  1327         -** docids in common between the inputs, the union of the position
  1328         -** lists is written.  Inputs and outputs are always type DL_DEFAULT.
  1329         -*/
  1330         -static void docListUnion(
  1331         -  const char *pLeft, int nLeft,
  1332         -  const char *pRight, int nRight,
  1333         -  DataBuffer *pOut      /* Write the combined doclist here */
  1334         -){
  1335         -  DLReader left, right;
  1336         -  DLWriter writer;
  1337         -
  1338         -  if( nLeft==0 ){
  1339         -    if( nRight!=0) dataBufferAppend(pOut, pRight, nRight);
  1340         -    return;
  1341         -  }
  1342         -  if( nRight==0 ){
  1343         -    dataBufferAppend(pOut, pLeft, nLeft);
  1344         -    return;
  1345         -  }
  1346         -
  1347         -  dlrInit(&left, DL_DEFAULT, pLeft, nLeft);
  1348         -  dlrInit(&right, DL_DEFAULT, pRight, nRight);
  1349         -  dlwInit(&writer, DL_DEFAULT, pOut);
  1350         -
  1351         -  while( !dlrAtEnd(&left) || !dlrAtEnd(&right) ){
  1352         -    if( dlrAtEnd(&right) ){
  1353         -      dlwCopy(&writer, &left);
  1354         -      dlrStep(&left);
  1355         -    }else if( dlrAtEnd(&left) ){
  1356         -      dlwCopy(&writer, &right);
  1357         -      dlrStep(&right);
  1358         -    }else if( dlrDocid(&left)<dlrDocid(&right) ){
  1359         -      dlwCopy(&writer, &left);
  1360         -      dlrStep(&left);
  1361         -    }else if( dlrDocid(&left)>dlrDocid(&right) ){
  1362         -      dlwCopy(&writer, &right);
  1363         -      dlrStep(&right);
  1364         -    }else{
  1365         -      posListUnion(&left, &right, &writer);
  1366         -      dlrStep(&left);
  1367         -      dlrStep(&right);
  1368         -    }
  1369         -  }
  1370         -
  1371         -  dlrDestroy(&left);
  1372         -  dlrDestroy(&right);
  1373         -  dlwDestroy(&writer);
  1374         -}
  1375         -
  1376         -/* 
  1377         -** This function is used as part of the implementation of phrase and
  1378         -** NEAR matching.
  1379         -**
  1380         -** pLeft and pRight are DLReaders positioned to the same docid in
  1381         -** lists of type DL_POSITION. This function writes an entry to the
  1382         -** DLWriter pOut for each position in pRight that is less than
  1383         -** (nNear+1) greater (but not equal to or smaller) than a position 
  1384         -** in pLeft. For example, if nNear is 0, and the positions contained
  1385         -** by pLeft and pRight are:
  1386         -**
  1387         -**    pLeft:  5 10 15 20
  1388         -**    pRight: 6  9 17 21
  1389         -**
  1390         -** then the docid is added to pOut. If pOut is of type DL_POSITIONS,
  1391         -** then a positionids "6" and "21" are also added to pOut.
  1392         -**
  1393         -** If boolean argument isSaveLeft is true, then positionids are copied
  1394         -** from pLeft instead of pRight. In the example above, the positions "5"
  1395         -** and "20" would be added instead of "6" and "21".
  1396         -*/
  1397         -static void posListPhraseMerge(
  1398         -  DLReader *pLeft, 
  1399         -  DLReader *pRight,
  1400         -  int nNear,
  1401         -  int isSaveLeft,
  1402         -  DLWriter *pOut
  1403         -){
  1404         -  PLReader left, right;
  1405         -  PLWriter writer;
  1406         -  int match = 0;
  1407         -
  1408         -  assert( dlrDocid(pLeft)==dlrDocid(pRight) );
  1409         -  assert( pOut->iType!=DL_POSITIONS_OFFSETS );
  1410         -
  1411         -  plrInit(&left, pLeft);
  1412         -  plrInit(&right, pRight);
  1413         -
  1414         -  while( !plrAtEnd(&left) && !plrAtEnd(&right) ){
  1415         -    if( plrColumn(&left)<plrColumn(&right) ){
  1416         -      plrStep(&left);
  1417         -    }else if( plrColumn(&left)>plrColumn(&right) ){
  1418         -      plrStep(&right);
  1419         -    }else if( plrPosition(&left)>=plrPosition(&right) ){
  1420         -      plrStep(&right);
  1421         -    }else{
  1422         -      if( (plrPosition(&right)-plrPosition(&left))<=(nNear+1) ){
  1423         -        if( !match ){
  1424         -          plwInit(&writer, pOut, dlrDocid(pLeft));
  1425         -          match = 1;
  1426         -        }
  1427         -        if( !isSaveLeft ){
  1428         -          plwAdd(&writer, plrColumn(&right), plrPosition(&right), 0, 0);
  1429         -        }else{
  1430         -          plwAdd(&writer, plrColumn(&left), plrPosition(&left), 0, 0);
  1431         -        }
  1432         -        plrStep(&right);
  1433         -      }else{
  1434         -        plrStep(&left);
  1435         -      }
  1436         -    }
  1437         -  }
  1438         -
  1439         -  if( match ){
  1440         -    plwTerminate(&writer);
  1441         -    plwDestroy(&writer);
  1442         -  }
  1443         -
  1444         -  plrDestroy(&left);
  1445         -  plrDestroy(&right);
  1446         -}
  1447         -
  1448         -/*
  1449         -** Compare the values pointed to by the PLReaders passed as arguments. 
  1450         -** Return -1 if the value pointed to by pLeft is considered less than
  1451         -** the value pointed to by pRight, +1 if it is considered greater
  1452         -** than it, or 0 if it is equal. i.e.
  1453         -**
  1454         -**     (*pLeft - *pRight)
  1455         -**
  1456         -** A PLReader that is in the EOF condition is considered greater than
  1457         -** any other. If neither argument is in EOF state, the return value of
  1458         -** plrColumn() is used. If the plrColumn() values are equal, the
  1459         -** comparison is on the basis of plrPosition().
  1460         -*/
  1461         -static int plrCompare(PLReader *pLeft, PLReader *pRight){
  1462         -  assert(!plrAtEnd(pLeft) || !plrAtEnd(pRight));
  1463         -
  1464         -  if( plrAtEnd(pRight) || plrAtEnd(pLeft) ){
  1465         -    return (plrAtEnd(pRight) ? -1 : 1);
  1466         -  }
  1467         -  if( plrColumn(pLeft)!=plrColumn(pRight) ){
  1468         -    return ((plrColumn(pLeft)<plrColumn(pRight)) ? -1 : 1);
  1469         -  }
  1470         -  if( plrPosition(pLeft)!=plrPosition(pRight) ){
  1471         -    return ((plrPosition(pLeft)<plrPosition(pRight)) ? -1 : 1);
  1472         -  }
  1473         -  return 0;
  1474         -}
  1475         -
  1476         -/* We have two doclists with positions:  pLeft and pRight. Depending
  1477         -** on the value of the nNear parameter, perform either a phrase
  1478         -** intersection (if nNear==0) or a NEAR intersection (if nNear>0)
  1479         -** and write the results into pOut.
  1480         -**
  1481         -** A phrase intersection means that two documents only match
  1482         -** if pLeft.iPos+1==pRight.iPos.
  1483         -**
  1484         -** A NEAR intersection means that two documents only match if 
  1485         -** (abs(pLeft.iPos-pRight.iPos)<nNear).
  1486         -**
  1487         -** If a NEAR intersection is requested, then the nPhrase argument should
  1488         -** be passed the number of tokens in the two operands to the NEAR operator
  1489         -** combined. For example:
  1490         -**
  1491         -**       Query syntax               nPhrase
  1492         -**      ------------------------------------
  1493         -**       "A B C" NEAR "D E"         5
  1494         -**       A NEAR B                   2
  1495         -**
  1496         -** iType controls the type of data written to pOut.  If iType is
  1497         -** DL_POSITIONS, the positions are those from pRight.
  1498         -*/
  1499         -static void docListPhraseMerge(
  1500         -  const char *pLeft, int nLeft,
  1501         -  const char *pRight, int nRight,
  1502         -  int nNear,            /* 0 for a phrase merge, non-zero for a NEAR merge */
  1503         -  int nPhrase,          /* Number of tokens in left+right operands to NEAR */
  1504         -  DocListType iType,    /* Type of doclist to write to pOut */
  1505         -  DataBuffer *pOut      /* Write the combined doclist here */
  1506         -){
  1507         -  DLReader left, right;
  1508         -  DLWriter writer;
  1509         -
  1510         -  if( nLeft==0 || nRight==0 ) return;
  1511         -
  1512         -  assert( iType!=DL_POSITIONS_OFFSETS );
  1513         -
  1514         -  dlrInit(&left, DL_POSITIONS, pLeft, nLeft);
  1515         -  dlrInit(&right, DL_POSITIONS, pRight, nRight);
  1516         -  dlwInit(&writer, iType, pOut);
  1517         -
  1518         -  while( !dlrAtEnd(&left) && !dlrAtEnd(&right) ){
  1519         -    if( dlrDocid(&left)<dlrDocid(&right) ){
  1520         -      dlrStep(&left);
  1521         -    }else if( dlrDocid(&right)<dlrDocid(&left) ){
  1522         -      dlrStep(&right);
  1523         -    }else{
  1524         -      if( nNear==0 ){
  1525         -        posListPhraseMerge(&left, &right, 0, 0, &writer);
  1526         -      }else{
  1527         -        /* This case occurs when two terms (simple terms or phrases) are
  1528         -         * connected by a NEAR operator, span (nNear+1). i.e.
  1529         -         *
  1530         -         *     '"terrible company" NEAR widget'
  1531         -         */
  1532         -        DataBuffer one = {0, 0, 0};
  1533         -        DataBuffer two = {0, 0, 0};
  1534         -
  1535         -        DLWriter dlwriter2;
  1536         -        DLReader dr1 = {0, 0, 0, 0, 0}; 
  1537         -        DLReader dr2 = {0, 0, 0, 0, 0};
  1538         -
  1539         -        dlwInit(&dlwriter2, iType, &one);
  1540         -        posListPhraseMerge(&right, &left, nNear-3+nPhrase, 1, &dlwriter2);
  1541         -        dlwInit(&dlwriter2, iType, &two);
  1542         -        posListPhraseMerge(&left, &right, nNear-1, 0, &dlwriter2);
  1543         -
  1544         -        if( one.nData) dlrInit(&dr1, iType, one.pData, one.nData);
  1545         -        if( two.nData) dlrInit(&dr2, iType, two.pData, two.nData);
  1546         -
  1547         -        if( !dlrAtEnd(&dr1) || !dlrAtEnd(&dr2) ){
  1548         -          PLReader pr1 = {0};
  1549         -          PLReader pr2 = {0};
  1550         -
  1551         -          PLWriter plwriter;
  1552         -          plwInit(&plwriter, &writer, dlrDocid(dlrAtEnd(&dr1)?&dr2:&dr1));
  1553         -
  1554         -          if( one.nData ) plrInit(&pr1, &dr1);
  1555         -          if( two.nData ) plrInit(&pr2, &dr2);
  1556         -          while( !plrAtEnd(&pr1) || !plrAtEnd(&pr2) ){
  1557         -            int iCompare = plrCompare(&pr1, &pr2);
  1558         -            switch( iCompare ){
  1559         -              case -1:
  1560         -                plwCopy(&plwriter, &pr1);
  1561         -                plrStep(&pr1);
  1562         -                break;
  1563         -              case 1:
  1564         -                plwCopy(&plwriter, &pr2);
  1565         -                plrStep(&pr2);
  1566         -                break;
  1567         -              case 0:
  1568         -                plwCopy(&plwriter, &pr1);
  1569         -                plrStep(&pr1);
  1570         -                plrStep(&pr2);
  1571         -                break;
  1572         -            }
  1573         -          }
  1574         -          plwTerminate(&plwriter);
  1575         -        }
  1576         -        dataBufferDestroy(&one);
  1577         -        dataBufferDestroy(&two);
  1578         -      }
  1579         -      dlrStep(&left);
  1580         -      dlrStep(&right);
  1581         -    }
  1582         -  }
  1583         -
  1584         -  dlrDestroy(&left);
  1585         -  dlrDestroy(&right);
  1586         -  dlwDestroy(&writer);
  1587         -}
  1588         -
  1589         -/* We have two DL_DOCIDS doclists:  pLeft and pRight.
  1590         -** Write the intersection of these two doclists into pOut as a
  1591         -** DL_DOCIDS doclist.
  1592         -*/
  1593         -static void docListAndMerge(
  1594         -  const char *pLeft, int nLeft,
  1595         -  const char *pRight, int nRight,
  1596         -  DataBuffer *pOut      /* Write the combined doclist here */
  1597         -){
  1598         -  DLReader left, right;
  1599         -  DLWriter writer;
  1600         -
  1601         -  if( nLeft==0 || nRight==0 ) return;
  1602         -
  1603         -  dlrInit(&left, DL_DOCIDS, pLeft, nLeft);
  1604         -  dlrInit(&right, DL_DOCIDS, pRight, nRight);
  1605         -  dlwInit(&writer, DL_DOCIDS, pOut);
  1606         -
  1607         -  while( !dlrAtEnd(&left) && !dlrAtEnd(&right) ){
  1608         -    if( dlrDocid(&left)<dlrDocid(&right) ){
  1609         -      dlrStep(&left);
  1610         -    }else if( dlrDocid(&right)<dlrDocid(&left) ){
  1611         -      dlrStep(&right);
  1612         -    }else{
  1613         -      dlwAdd(&writer, dlrDocid(&left));
  1614         -      dlrStep(&left);
  1615         -      dlrStep(&right);
  1616         -    }
  1617         -  }
  1618         -
  1619         -  dlrDestroy(&left);
  1620         -  dlrDestroy(&right);
  1621         -  dlwDestroy(&writer);
  1622         -}
  1623         -
  1624         -/* We have two DL_DOCIDS doclists:  pLeft and pRight.
  1625         -** Write the union of these two doclists into pOut as a
  1626         -** DL_DOCIDS doclist.
  1627         -*/
  1628         -static void docListOrMerge(
  1629         -  const char *pLeft, int nLeft,
  1630         -  const char *pRight, int nRight,
  1631         -  DataBuffer *pOut      /* Write the combined doclist here */
  1632         -){
  1633         -  DLReader left, right;
  1634         -  DLWriter writer;
  1635         -
  1636         -  if( nLeft==0 ){
  1637         -    if( nRight!=0 ) dataBufferAppend(pOut, pRight, nRight);
  1638         -    return;
  1639         -  }
  1640         -  if( nRight==0 ){
  1641         -    dataBufferAppend(pOut, pLeft, nLeft);
  1642         -    return;
  1643         -  }
  1644         -
  1645         -  dlrInit(&left, DL_DOCIDS, pLeft, nLeft);
  1646         -  dlrInit(&right, DL_DOCIDS, pRight, nRight);
  1647         -  dlwInit(&writer, DL_DOCIDS, pOut);
  1648         -
  1649         -  while( !dlrAtEnd(&left) || !dlrAtEnd(&right) ){
  1650         -    if( dlrAtEnd(&right) ){
  1651         -      dlwAdd(&writer, dlrDocid(&left));
  1652         -      dlrStep(&left);
  1653         -    }else if( dlrAtEnd(&left) ){
  1654         -      dlwAdd(&writer, dlrDocid(&right));
  1655         -      dlrStep(&right);
  1656         -    }else if( dlrDocid(&left)<dlrDocid(&right) ){
  1657         -      dlwAdd(&writer, dlrDocid(&left));
  1658         -      dlrStep(&left);
  1659         -    }else if( dlrDocid(&right)<dlrDocid(&left) ){
  1660         -      dlwAdd(&writer, dlrDocid(&right));
  1661         -      dlrStep(&right);
  1662         -    }else{
  1663         -      dlwAdd(&writer, dlrDocid(&left));
  1664         -      dlrStep(&left);
  1665         -      dlrStep(&right);
  1666         -    }
  1667         -  }
  1668         -
  1669         -  dlrDestroy(&left);
  1670         -  dlrDestroy(&right);
  1671         -  dlwDestroy(&writer);
  1672         -}
  1673         -
  1674         -/* We have two DL_DOCIDS doclists:  pLeft and pRight.
  1675         -** Write into pOut as DL_DOCIDS doclist containing all documents that
  1676         -** occur in pLeft but not in pRight.
  1677         -*/
  1678         -static void docListExceptMerge(
  1679         -  const char *pLeft, int nLeft,
  1680         -  const char *pRight, int nRight,
  1681         -  DataBuffer *pOut      /* Write the combined doclist here */
  1682         -){
  1683         -  DLReader left, right;
  1684         -  DLWriter writer;
  1685         -
  1686         -  if( nLeft==0 ) return;
  1687         -  if( nRight==0 ){
  1688         -    dataBufferAppend(pOut, pLeft, nLeft);
  1689         -    return;
  1690         -  }
  1691         -
  1692         -  dlrInit(&left, DL_DOCIDS, pLeft, nLeft);
  1693         -  dlrInit(&right, DL_DOCIDS, pRight, nRight);
  1694         -  dlwInit(&writer, DL_DOCIDS, pOut);
  1695         -
  1696         -  while( !dlrAtEnd(&left) ){
  1697         -    while( !dlrAtEnd(&right) && dlrDocid(&right)<dlrDocid(&left) ){
  1698         -      dlrStep(&right);
  1699         -    }
  1700         -    if( dlrAtEnd(&right) || dlrDocid(&left)<dlrDocid(&right) ){
  1701         -      dlwAdd(&writer, dlrDocid(&left));
  1702         -    }
  1703         -    dlrStep(&left);
  1704         -  }
  1705         -
  1706         -  dlrDestroy(&left);
  1707         -  dlrDestroy(&right);
  1708         -  dlwDestroy(&writer);
  1709         -}
  1710         -
  1711         -static char *string_dup_n(const char *s, int n){
  1712         -  char *str = sqlite3_malloc(n + 1);
  1713         -  memcpy(str, s, n);
  1714         -  str[n] = '\0';
  1715         -  return str;
  1716         -}
  1717         -
  1718         -/* Duplicate a string; the caller must free() the returned string.
  1719         - * (We don't use strdup() since it is not part of the standard C library and
  1720         - * may not be available everywhere.) */
  1721         -static char *string_dup(const char *s){
  1722         -  return string_dup_n(s, strlen(s));
  1723         -}
  1724         -
  1725         -/* Format a string, replacing each occurrence of the % character with
  1726         - * zDb.zName.  This may be more convenient than sqlite_mprintf()
  1727         - * when one string is used repeatedly in a format string.
  1728         - * The caller must free() the returned string. */
  1729         -static char *string_format(const char *zFormat,
  1730         -                           const char *zDb, const char *zName){
  1731         -  const char *p;
  1732         -  size_t len = 0;
  1733         -  size_t nDb = strlen(zDb);
  1734         -  size_t nName = strlen(zName);
  1735         -  size_t nFullTableName = nDb+1+nName;
  1736         -  char *result;
  1737         -  char *r;
  1738         -
  1739         -  /* first compute length needed */
  1740         -  for(p = zFormat ; *p ; ++p){
  1741         -    len += (*p=='%' ? nFullTableName : 1);
  1742         -  }
  1743         -  len += 1;  /* for null terminator */
  1744         -
  1745         -  r = result = sqlite3_malloc(len);
  1746         -  for(p = zFormat; *p; ++p){
  1747         -    if( *p=='%' ){
  1748         -      memcpy(r, zDb, nDb);
  1749         -      r += nDb;
  1750         -      *r++ = '.';
  1751         -      memcpy(r, zName, nName);
  1752         -      r += nName;
  1753         -    } else {
  1754         -      *r++ = *p;
  1755         -    }
  1756         -  }
  1757         -  *r++ = '\0';
  1758         -  assert( r == result + len );
  1759         -  return result;
  1760         -}
  1761         -
  1762         -static int sql_exec(sqlite3 *db, const char *zDb, const char *zName,
  1763         -                    const char *zFormat){
  1764         -  char *zCommand = string_format(zFormat, zDb, zName);
  1765         -  int rc;
  1766         -  FTSTRACE(("FTS3 sql: %s\n", zCommand));
  1767         -  rc = sqlite3_exec(db, zCommand, NULL, 0, NULL);
  1768         -  sqlite3_free(zCommand);
  1769         -  return rc;
  1770         -}
  1771         -
  1772         -static int sql_prepare(sqlite3 *db, const char *zDb, const char *zName,
  1773         -                       sqlite3_stmt **ppStmt, const char *zFormat){
  1774         -  char *zCommand = string_format(zFormat, zDb, zName);
  1775         -  int rc;
  1776         -  FTSTRACE(("FTS3 prepare: %s\n", zCommand));
  1777         -  rc = sqlite3_prepare_v2(db, zCommand, -1, ppStmt, NULL);
  1778         -  sqlite3_free(zCommand);
  1779         -  return rc;
  1780         -}
  1781         -
  1782         -/* end utility functions */
  1783         -
  1784         -/* Forward reference */
  1785         -typedef struct fulltext_vtab fulltext_vtab;
  1786         -
  1787         -/*
  1788         -** An instance of the following structure keeps track of generated
  1789         -** matching-word offset information and snippets.
  1790         -*/
  1791         -typedef struct Snippet {
  1792         -  int nMatch;     /* Total number of matches */
  1793         -  int nAlloc;     /* Space allocated for aMatch[] */
  1794         -  struct snippetMatch { /* One entry for each matching term */
  1795         -    char snStatus;       /* Status flag for use while constructing snippets */
  1796         -    short int iCol;      /* The column that contains the match */
  1797         -    short int iTerm;     /* The index in Query.pTerms[] of the matching term */
  1798         -    int iToken;          /* The index of the matching document token */
  1799         -    short int nByte;     /* Number of bytes in the term */
  1800         -    int iStart;          /* The offset to the first character of the term */
  1801         -  } *aMatch;      /* Points to space obtained from malloc */
  1802         -  char *zOffset;  /* Text rendering of aMatch[] */
  1803         -  int nOffset;    /* strlen(zOffset) */
  1804         -  char *zSnippet; /* Snippet text */
  1805         -  int nSnippet;   /* strlen(zSnippet) */
  1806         -} Snippet;
  1807         -
  1808         -
  1809         -typedef enum QueryType {
  1810         -  QUERY_GENERIC,   /* table scan */
  1811         -  QUERY_DOCID,     /* lookup by docid */
  1812         -  QUERY_FULLTEXT   /* QUERY_FULLTEXT + [i] is a full-text search for column i*/
  1813         -} QueryType;
  1814         -
  1815         -typedef enum fulltext_statement {
  1816         -  CONTENT_INSERT_STMT,
  1817         -  CONTENT_SELECT_STMT,
  1818         -  CONTENT_UPDATE_STMT,
  1819         -  CONTENT_DELETE_STMT,
  1820         -  CONTENT_EXISTS_STMT,
  1821         -
  1822         -  BLOCK_INSERT_STMT,
  1823         -  BLOCK_SELECT_STMT,
  1824         -  BLOCK_DELETE_STMT,
  1825         -  BLOCK_DELETE_ALL_STMT,
  1826         -
  1827         -  SEGDIR_MAX_INDEX_STMT,
  1828         -  SEGDIR_SET_STMT,
  1829         -  SEGDIR_SELECT_LEVEL_STMT,
  1830         -  SEGDIR_SPAN_STMT,
  1831         -  SEGDIR_DELETE_STMT,
  1832         -  SEGDIR_SELECT_SEGMENT_STMT,
  1833         -  SEGDIR_SELECT_ALL_STMT,
  1834         -  SEGDIR_DELETE_ALL_STMT,
  1835         -  SEGDIR_COUNT_STMT,
  1836         -
  1837         -  MAX_STMT                     /* Always at end! */
  1838         -} fulltext_statement;
  1839         -
  1840         -/* These must exactly match the enum above. */
  1841         -/* TODO(shess): Is there some risk that a statement will be used in two
  1842         -** cursors at once, e.g.  if a query joins a virtual table to itself?
  1843         -** If so perhaps we should move some of these to the cursor object.
  1844         -*/
  1845         -static const char *const fulltext_zStatement[MAX_STMT] = {
  1846         -  /* CONTENT_INSERT */ NULL,  /* generated in contentInsertStatement() */
  1847         -  /* CONTENT_SELECT */ NULL,  /* generated in contentSelectStatement() */
  1848         -  /* CONTENT_UPDATE */ NULL,  /* generated in contentUpdateStatement() */
  1849         -  /* CONTENT_DELETE */ "delete from %_content where docid = ?",
  1850         -  /* CONTENT_EXISTS */ "select docid from %_content limit 1",
  1851         -
  1852         -  /* BLOCK_INSERT */
  1853         -  "insert into %_segments (blockid, block) values (null, ?)",
  1854         -  /* BLOCK_SELECT */ "select block from %_segments where blockid = ?",
  1855         -  /* BLOCK_DELETE */ "delete from %_segments where blockid between ? and ?",
  1856         -  /* BLOCK_DELETE_ALL */ "delete from %_segments",
  1857         -
  1858         -  /* SEGDIR_MAX_INDEX */ "select max(idx) from %_segdir where level = ?",
  1859         -  /* SEGDIR_SET */ "insert into %_segdir values (?, ?, ?, ?, ?, ?)",
  1860         -  /* SEGDIR_SELECT_LEVEL */
  1861         -  "select start_block, leaves_end_block, root from %_segdir "
  1862         -  " where level = ? order by idx",
  1863         -  /* SEGDIR_SPAN */
  1864         -  "select min(start_block), max(end_block) from %_segdir "
  1865         -  " where level = ? and start_block <> 0",
  1866         -  /* SEGDIR_DELETE */ "delete from %_segdir where level = ?",
  1867         -
  1868         -  /* NOTE(shess): The first three results of the following two
  1869         -  ** statements must match.
  1870         -  */
  1871         -  /* SEGDIR_SELECT_SEGMENT */
  1872         -  "select start_block, leaves_end_block, root from %_segdir "
  1873         -  " where level = ? and idx = ?",
  1874         -  /* SEGDIR_SELECT_ALL */
  1875         -  "select start_block, leaves_end_block, root from %_segdir "
  1876         -  " order by level desc, idx asc",
  1877         -  /* SEGDIR_DELETE_ALL */ "delete from %_segdir",
  1878         -  /* SEGDIR_COUNT */ "select count(*), ifnull(max(level),0) from %_segdir",
  1879         -};
  1880         -
  1881         -/*
  1882         -** A connection to a fulltext index is an instance of the following
  1883         -** structure.  The xCreate and xConnect methods create an instance
  1884         -** of this structure and xDestroy and xDisconnect free that instance.
  1885         -** All other methods receive a pointer to the structure as one of their
  1886         -** arguments.
  1887         -*/
  1888         -struct fulltext_vtab {
  1889         -  sqlite3_vtab base;               /* Base class used by SQLite core */
  1890         -  sqlite3 *db;                     /* The database connection */
  1891         -  const char *zDb;                 /* logical database name */
  1892         -  const char *zName;               /* virtual table name */
  1893         -  int nColumn;                     /* number of columns in virtual table */
  1894         -  char **azColumn;                 /* column names.  malloced */
  1895         -  char **azContentColumn;          /* column names in content table; malloced */
  1896         -  sqlite3_tokenizer *pTokenizer;   /* tokenizer for inserts and queries */
  1897         -
  1898         -  /* Precompiled statements which we keep as long as the table is
  1899         -  ** open.
  1900         -  */
  1901         -  sqlite3_stmt *pFulltextStatements[MAX_STMT];
  1902         -
  1903         -  /* Precompiled statements used for segment merges.  We run a
  1904         -  ** separate select across the leaf level of each tree being merged.
  1905         -  */
  1906         -  sqlite3_stmt *pLeafSelectStmts[MERGE_COUNT];
  1907         -  /* The statement used to prepare pLeafSelectStmts. */
  1908         -#define LEAF_SELECT \
  1909         -  "select block from %_segments where blockid between ? and ? order by blockid"
  1910         -
  1911         -  /* These buffer pending index updates during transactions.
  1912         -  ** nPendingData estimates the memory size of the pending data.  It
  1913         -  ** doesn't include the hash-bucket overhead, nor any malloc
  1914         -  ** overhead.  When nPendingData exceeds kPendingThreshold, the
  1915         -  ** buffer is flushed even before the transaction closes.
  1916         -  ** pendingTerms stores the data, and is only valid when nPendingData
  1917         -  ** is >=0 (nPendingData<0 means pendingTerms has not been
  1918         -  ** initialized).  iPrevDocid is the last docid written, used to make
  1919         -  ** certain we're inserting in sorted order.
  1920         -  */
  1921         -  int nPendingData;
  1922         -#define kPendingThreshold (1*1024*1024)
  1923         -  sqlite_int64 iPrevDocid;
  1924         -  fts3Hash pendingTerms;
  1925         -};
  1926         -
  1927         -/*
  1928         -** When the core wants to do a query, it create a cursor using a
  1929         -** call to xOpen.  This structure is an instance of a cursor.  It
  1930         -** is destroyed by xClose.
  1931         -*/
  1932         -typedef struct fulltext_cursor {
  1933         -  sqlite3_vtab_cursor base;        /* Base class used by SQLite core */
  1934         -  QueryType iCursorType;           /* Copy of sqlite3_index_info.idxNum */
  1935         -  sqlite3_stmt *pStmt;             /* Prepared statement in use by the cursor */
  1936         -  int eof;                         /* True if at End Of Results */
  1937         -  Fts3Expr *pExpr;                 /* Parsed MATCH query string */
  1938         -  Snippet snippet;                 /* Cached snippet for the current row */
  1939         -  int iColumn;                     /* Column being searched */
  1940         -  DataBuffer result;               /* Doclist results from fulltextQuery */
  1941         -  DLReader reader;                 /* Result reader if result not empty */
  1942         -} fulltext_cursor;
  1943         -
  1944         -static fulltext_vtab *cursor_vtab(fulltext_cursor *c){
  1945         -  return (fulltext_vtab *) c->base.pVtab;
  1946         -}
  1947         -
  1948         -static const sqlite3_module fts3Module;   /* forward declaration */
  1949         -
  1950         -/* Return a dynamically generated statement of the form
  1951         - *   insert into %_content (docid, ...) values (?, ...)
  1952         - */
  1953         -static const char *contentInsertStatement(fulltext_vtab *v){
  1954         -  StringBuffer sb;
  1955         -  int i;
  1956         -
  1957         -  initStringBuffer(&sb);
  1958         -  append(&sb, "insert into %_content (docid, ");
  1959         -  appendList(&sb, v->nColumn, v->azContentColumn);
  1960         -  append(&sb, ") values (?");
  1961         -  for(i=0; i<v->nColumn; ++i)
  1962         -    append(&sb, ", ?");
  1963         -  append(&sb, ")");
  1964         -  return stringBufferData(&sb);
  1965         -}
  1966         -
  1967         -/* Return a dynamically generated statement of the form
  1968         - *   select <content columns> from %_content where docid = ?
  1969         - */
  1970         -static const char *contentSelectStatement(fulltext_vtab *v){
  1971         -  StringBuffer sb;
  1972         -  initStringBuffer(&sb);
  1973         -  append(&sb, "SELECT ");
  1974         -  appendList(&sb, v->nColumn, v->azContentColumn);
  1975         -  append(&sb, " FROM %_content WHERE docid = ?");
  1976         -  return stringBufferData(&sb);
  1977         -}
  1978         -
  1979         -/* Return a dynamically generated statement of the form
  1980         - *   update %_content set [col_0] = ?, [col_1] = ?, ...
  1981         - *                    where docid = ?
  1982         - */
  1983         -static const char *contentUpdateStatement(fulltext_vtab *v){
  1984         -  StringBuffer sb;
  1985         -  int i;
  1986         -
  1987         -  initStringBuffer(&sb);
  1988         -  append(&sb, "update %_content set ");
  1989         -  for(i=0; i<v->nColumn; ++i) {
  1990         -    if( i>0 ){
  1991         -      append(&sb, ", ");
  1992         -    }
  1993         -    append(&sb, v->azContentColumn[i]);
  1994         -    append(&sb, " = ?");
  1995         -  }
  1996         -  append(&sb, " where docid = ?");
  1997         -  return stringBufferData(&sb);
  1998         -}
  1999         -
  2000         -/* Puts a freshly-prepared statement determined by iStmt in *ppStmt.
  2001         -** If the indicated statement has never been prepared, it is prepared
  2002         -** and cached, otherwise the cached version is reset.
  2003         -*/
  2004         -static int sql_get_statement(fulltext_vtab *v, fulltext_statement iStmt,
  2005         -                             sqlite3_stmt **ppStmt){
  2006         -  assert( iStmt<MAX_STMT );
  2007         -  if( v->pFulltextStatements[iStmt]==NULL ){
  2008         -    const char *zStmt;
  2009         -    int rc;
  2010         -    switch( iStmt ){
  2011         -      case CONTENT_INSERT_STMT:
  2012         -        zStmt = contentInsertStatement(v); break;
  2013         -      case CONTENT_SELECT_STMT:
  2014         -        zStmt = contentSelectStatement(v); break;
  2015         -      case CONTENT_UPDATE_STMT:
  2016         -        zStmt = contentUpdateStatement(v); break;
  2017         -      default:
  2018         -        zStmt = fulltext_zStatement[iStmt];
  2019         -    }
  2020         -    rc = sql_prepare(v->db, v->zDb, v->zName, &v->pFulltextStatements[iStmt],
  2021         -                         zStmt);
  2022         -    if( zStmt != fulltext_zStatement[iStmt]) sqlite3_free((void *) zStmt);
  2023         -    if( rc!=SQLITE_OK ) return rc;
  2024         -  } else {
  2025         -    int rc = sqlite3_reset(v->pFulltextStatements[iStmt]);
  2026         -    if( rc!=SQLITE_OK ) return rc;
  2027         -  }
  2028         -
  2029         -  *ppStmt = v->pFulltextStatements[iStmt];
  2030         -  return SQLITE_OK;
  2031         -}
  2032         -
  2033         -/* Like sqlite3_step(), but convert SQLITE_DONE to SQLITE_OK and
  2034         -** SQLITE_ROW to SQLITE_ERROR.  Useful for statements like UPDATE,
  2035         -** where we expect no results.
  2036         -*/
  2037         -static int sql_single_step(sqlite3_stmt *s){
  2038         -  int rc = sqlite3_step(s);
  2039         -  return (rc==SQLITE_DONE) ? SQLITE_OK : rc;
  2040         -}
  2041         -
  2042         -/* Like sql_get_statement(), but for special replicated LEAF_SELECT
  2043         -** statements.  idx -1 is a special case for an uncached version of
  2044         -** the statement (used in the optimize implementation).
  2045         -*/
  2046         -/* TODO(shess) Write version for generic statements and then share
  2047         -** that between the cached-statement functions.
  2048         -*/
  2049         -static int sql_get_leaf_statement(fulltext_vtab *v, int idx,
  2050         -                                  sqlite3_stmt **ppStmt){
  2051         -  assert( idx>=-1 && idx<MERGE_COUNT );
  2052         -  if( idx==-1 ){
  2053         -    return sql_prepare(v->db, v->zDb, v->zName, ppStmt, LEAF_SELECT);
  2054         -  }else if( v->pLeafSelectStmts[idx]==NULL ){
  2055         -    int rc = sql_prepare(v->db, v->zDb, v->zName, &v->pLeafSelectStmts[idx],
  2056         -                         LEAF_SELECT);
  2057         -    if( rc!=SQLITE_OK ) return rc;
  2058         -  }else{
  2059         -    int rc = sqlite3_reset(v->pLeafSelectStmts[idx]);
  2060         -    if( rc!=SQLITE_OK ) return rc;
  2061         -  }
  2062         -
  2063         -  *ppStmt = v->pLeafSelectStmts[idx];
  2064         -  return SQLITE_OK;
  2065         -}
  2066         -
  2067         -/* insert into %_content (docid, ...) values ([docid], [pValues])
  2068         -** If the docid contains SQL NULL, then a unique docid will be
  2069         -** generated.
  2070         -*/
  2071         -static int content_insert(fulltext_vtab *v, sqlite3_value *docid,
  2072         -                          sqlite3_value **pValues){
  2073         -  sqlite3_stmt *s;
  2074         -  int i;
  2075         -  int rc = sql_get_statement(v, CONTENT_INSERT_STMT, &s);
  2076         -  if( rc!=SQLITE_OK ) return rc;
  2077         -
  2078         -  rc = sqlite3_bind_value(s, 1, docid);
  2079         -  if( rc!=SQLITE_OK ) return rc;
  2080         -
  2081         -  for(i=0; i<v->nColumn; ++i){
  2082         -    rc = sqlite3_bind_value(s, 2+i, pValues[i]);
  2083         -    if( rc!=SQLITE_OK ) return rc;
  2084         -  }
  2085         -
  2086         -  return sql_single_step(s);
  2087         -}
  2088         -
  2089         -/* update %_content set col0 = pValues[0], col1 = pValues[1], ...
  2090         - *                  where docid = [iDocid] */
  2091         -static int content_update(fulltext_vtab *v, sqlite3_value **pValues,
  2092         -                          sqlite_int64 iDocid){
  2093         -  sqlite3_stmt *s;
  2094         -  int i;
  2095         -  int rc = sql_get_statement(v, CONTENT_UPDATE_STMT, &s);
  2096         -  if( rc!=SQLITE_OK ) return rc;
  2097         -
  2098         -  for(i=0; i<v->nColumn; ++i){
  2099         -    rc = sqlite3_bind_value(s, 1+i, pValues[i]);
  2100         -    if( rc!=SQLITE_OK ) return rc;
  2101         -  }
  2102         -
  2103         -  rc = sqlite3_bind_int64(s, 1+v->nColumn, iDocid);
  2104         -  if( rc!=SQLITE_OK ) return rc;
  2105         -
  2106         -  return sql_single_step(s);
  2107         -}
  2108         -
  2109         -static void freeStringArray(int nString, const char **pString){
  2110         -  int i;
  2111         -
  2112         -  for (i=0 ; i < nString ; ++i) {
  2113         -    if( pString[i]!=NULL ) sqlite3_free((void *) pString[i]);
  2114         -  }
  2115         -  sqlite3_free((void *) pString);
  2116         -}
  2117         -
  2118         -/* select * from %_content where docid = [iDocid]
  2119         - * The caller must delete the returned array and all strings in it.
  2120         - * null fields will be NULL in the returned array.
  2121         - *
  2122         - * TODO: Perhaps we should return pointer/length strings here for consistency
  2123         - * with other code which uses pointer/length. */
  2124         -static int content_select(fulltext_vtab *v, sqlite_int64 iDocid,
  2125         -                          const char ***pValues){
  2126         -  sqlite3_stmt *s;
  2127         -  const char **values;
  2128         -  int i;
  2129         -  int rc;
  2130         -
  2131         -  *pValues = NULL;
  2132         -
  2133         -  rc = sql_get_statement(v, CONTENT_SELECT_STMT, &s);
  2134         -  if( rc!=SQLITE_OK ) return rc;
  2135         -
  2136         -  rc = sqlite3_bind_int64(s, 1, iDocid);
  2137         -  if( rc!=SQLITE_OK ) return rc;
  2138         -
  2139         -  rc = sqlite3_step(s);
  2140         -  if( rc!=SQLITE_ROW ) return rc;
  2141         -
  2142         -  values = (const char **) sqlite3_malloc(v->nColumn * sizeof(const char *));
  2143         -  for(i=0; i<v->nColumn; ++i){
  2144         -    if( sqlite3_column_type(s, i)==SQLITE_NULL ){
  2145         -      values[i] = NULL;
  2146         -    }else{
  2147         -      values[i] = string_dup((char*)sqlite3_column_text(s, i));
  2148         -    }
  2149         -  }
  2150         -
  2151         -  /* We expect only one row.  We must execute another sqlite3_step()
  2152         -   * to complete the iteration; otherwise the table will remain locked. */
  2153         -  rc = sqlite3_step(s);
  2154         -  if( rc==SQLITE_DONE ){
  2155         -    *pValues = values;
  2156         -    return SQLITE_OK;
  2157         -  }
  2158         -
  2159         -  freeStringArray(v->nColumn, values);
  2160         -  return rc;
  2161         -}
  2162         -
  2163         -/* delete from %_content where docid = [iDocid ] */
  2164         -static int content_delete(fulltext_vtab *v, sqlite_int64 iDocid){
  2165         -  sqlite3_stmt *s;
  2166         -  int rc = sql_get_statement(v, CONTENT_DELETE_STMT, &s);
  2167         -  if( rc!=SQLITE_OK ) return rc;
  2168         -
  2169         -  rc = sqlite3_bind_int64(s, 1, iDocid);
  2170         -  if( rc!=SQLITE_OK ) return rc;
  2171         -
  2172         -  return sql_single_step(s);
  2173         -}
  2174         -
  2175         -/* Returns SQLITE_ROW if any rows exist in %_content, SQLITE_DONE if
  2176         -** no rows exist, and any error in case of failure.
  2177         -*/
  2178         -static int content_exists(fulltext_vtab *v){
  2179         -  sqlite3_stmt *s;
  2180         -  int rc = sql_get_statement(v, CONTENT_EXISTS_STMT, &s);
  2181         -  if( rc!=SQLITE_OK ) return rc;
  2182         -
  2183         -  rc = sqlite3_step(s);
  2184         -  if( rc!=SQLITE_ROW ) return rc;
  2185         -
  2186         -  /* We expect only one row.  We must execute another sqlite3_step()
  2187         -   * to complete the iteration; otherwise the table will remain locked. */
  2188         -  rc = sqlite3_step(s);
  2189         -  if( rc==SQLITE_DONE ) return SQLITE_ROW;
  2190         -  if( rc==SQLITE_ROW ) return SQLITE_ERROR;
  2191         -  return rc;
  2192         -}
  2193         -
  2194         -/* insert into %_segments values ([pData])
  2195         -**   returns assigned blockid in *piBlockid
  2196         -*/
  2197         -static int block_insert(fulltext_vtab *v, const char *pData, int nData,
  2198         -                        sqlite_int64 *piBlockid){
  2199         -  sqlite3_stmt *s;
  2200         -  int rc = sql_get_statement(v, BLOCK_INSERT_STMT, &s);
  2201         -  if( rc!=SQLITE_OK ) return rc;
  2202         -
  2203         -  rc = sqlite3_bind_blob(s, 1, pData, nData, SQLITE_STATIC);
  2204         -  if( rc!=SQLITE_OK ) return rc;
  2205         -
  2206         -  rc = sqlite3_step(s);
  2207         -  if( rc==SQLITE_ROW ) return SQLITE_ERROR;
  2208         -  if( rc!=SQLITE_DONE ) return rc;
  2209         -
  2210         -  /* blockid column is an alias for rowid. */
  2211         -  *piBlockid = sqlite3_last_insert_rowid(v->db);
  2212         -  return SQLITE_OK;
  2213         -}
  2214         -
  2215         -/* delete from %_segments
  2216         -**   where blockid between [iStartBlockid] and [iEndBlockid]
  2217         -**
  2218         -** Deletes the range of blocks, inclusive, used to delete the blocks
  2219         -** which form a segment.
  2220         -*/
  2221         -static int block_delete(fulltext_vtab *v,
  2222         -                        sqlite_int64 iStartBlockid, sqlite_int64 iEndBlockid){
  2223         -  sqlite3_stmt *s;
  2224         -  int rc = sql_get_statement(v, BLOCK_DELETE_STMT, &s);
  2225         -  if( rc!=SQLITE_OK ) return rc;
  2226         -
  2227         -  rc = sqlite3_bind_int64(s, 1, iStartBlockid);
  2228         -  if( rc!=SQLITE_OK ) return rc;
  2229         -
  2230         -  rc = sqlite3_bind_int64(s, 2, iEndBlockid);
  2231         -  if( rc!=SQLITE_OK ) return rc;
  2232         -
  2233         -  return sql_single_step(s);
  2234         -}
  2235         -
  2236         -/* Returns SQLITE_ROW with *pidx set to the maximum segment idx found
  2237         -** at iLevel.  Returns SQLITE_DONE if there are no segments at
  2238         -** iLevel.  Otherwise returns an error.
  2239         -*/
  2240         -static int segdir_max_index(fulltext_vtab *v, int iLevel, int *pidx){
  2241         -  sqlite3_stmt *s;
  2242         -  int rc = sql_get_statement(v, SEGDIR_MAX_INDEX_STMT, &s);
  2243         -  if( rc!=SQLITE_OK ) return rc;
  2244         -
  2245         -  rc = sqlite3_bind_int(s, 1, iLevel);
  2246         -  if( rc!=SQLITE_OK ) return rc;
  2247         -
  2248         -  rc = sqlite3_step(s);
  2249         -  /* Should always get at least one row due to how max() works. */
  2250         -  if( rc==SQLITE_DONE ) return SQLITE_DONE;
  2251         -  if( rc!=SQLITE_ROW ) return rc;
  2252         -
  2253         -  /* NULL means that there were no inputs to max(). */
  2254         -  if( SQLITE_NULL==sqlite3_column_type(s, 0) ){
  2255         -    rc = sqlite3_step(s);
  2256         -    if( rc==SQLITE_ROW ) return SQLITE_ERROR;
  2257         -    return rc;
  2258         -  }
  2259         -
  2260         -  *pidx = sqlite3_column_int(s, 0);
  2261         -
  2262         -  /* We expect only one row.  We must execute another sqlite3_step()
  2263         -   * to complete the iteration; otherwise the table will remain locked. */
  2264         -  rc = sqlite3_step(s);
  2265         -  if( rc==SQLITE_ROW ) return SQLITE_ERROR;
  2266         -  if( rc!=SQLITE_DONE ) return rc;
  2267         -  return SQLITE_ROW;
  2268         -}
  2269         -
  2270         -/* insert into %_segdir values (
  2271         -**   [iLevel], [idx],
  2272         -**   [iStartBlockid], [iLeavesEndBlockid], [iEndBlockid],
  2273         -**   [pRootData]
  2274         -** )
  2275         -*/
  2276         -static int segdir_set(fulltext_vtab *v, int iLevel, int idx,
  2277         -                      sqlite_int64 iStartBlockid,
  2278         -                      sqlite_int64 iLeavesEndBlockid,
  2279         -                      sqlite_int64 iEndBlockid,
  2280         -                      const char *pRootData, int nRootData){
  2281         -  sqlite3_stmt *s;
  2282         -  int rc = sql_get_statement(v, SEGDIR_SET_STMT, &s);
  2283         -  if( rc!=SQLITE_OK ) return rc;
  2284         -
  2285         -  rc = sqlite3_bind_int(s, 1, iLevel);
  2286         -  if( rc!=SQLITE_OK ) return rc;
  2287         -
  2288         -  rc = sqlite3_bind_int(s, 2, idx);
  2289         -  if( rc!=SQLITE_OK ) return rc;
  2290         -
  2291         -  rc = sqlite3_bind_int64(s, 3, iStartBlockid);
  2292         -  if( rc!=SQLITE_OK ) return rc;
  2293         -
  2294         -  rc = sqlite3_bind_int64(s, 4, iLeavesEndBlockid);
  2295         -  if( rc!=SQLITE_OK ) return rc;
  2296         -
  2297         -  rc = sqlite3_bind_int64(s, 5, iEndBlockid);
  2298         -  if( rc!=SQLITE_OK ) return rc;
  2299         -
  2300         -  rc = sqlite3_bind_blob(s, 6, pRootData, nRootData, SQLITE_STATIC);
  2301         -  if( rc!=SQLITE_OK ) return rc;
  2302         -
  2303         -  return sql_single_step(s);
  2304         -}
  2305         -
  2306         -/* Queries %_segdir for the block span of the segments in level
  2307         -** iLevel.  Returns SQLITE_DONE if there are no blocks for iLevel,
  2308         -** SQLITE_ROW if there are blocks, else an error.
  2309         -*/
  2310         -static int segdir_span(fulltext_vtab *v, int iLevel,
  2311         -                       sqlite_int64 *piStartBlockid,
  2312         -                       sqlite_int64 *piEndBlockid){
  2313         -  sqlite3_stmt *s;
  2314         -  int rc = sql_get_statement(v, SEGDIR_SPAN_STMT, &s);
  2315         -  if( rc!=SQLITE_OK ) return rc;
  2316         -
  2317         -  rc = sqlite3_bind_int(s, 1, iLevel);
  2318         -  if( rc!=SQLITE_OK ) return rc;
  2319         -
  2320         -  rc = sqlite3_step(s);
  2321         -  if( rc==SQLITE_DONE ) return SQLITE_DONE;  /* Should never happen */
  2322         -  if( rc!=SQLITE_ROW ) return rc;
  2323         -
  2324         -  /* This happens if all segments at this level are entirely inline. */
  2325         -  if( SQLITE_NULL==sqlite3_column_type(s, 0) ){
  2326         -    /* We expect only one row.  We must execute another sqlite3_step()
  2327         -     * to complete the iteration; otherwise the table will remain locked. */
  2328         -    int rc2 = sqlite3_step(s);
  2329         -    if( rc2==SQLITE_ROW ) return SQLITE_ERROR;
  2330         -    return rc2;
  2331         -  }
  2332         -
  2333         -  *piStartBlockid = sqlite3_column_int64(s, 0);
  2334         -  *piEndBlockid = sqlite3_column_int64(s, 1);
  2335         -
  2336         -  /* We expect only one row.  We must execute another sqlite3_step()
  2337         -   * to complete the iteration; otherwise the table will remain locked. */
  2338         -  rc = sqlite3_step(s);
  2339         -  if( rc==SQLITE_ROW ) return SQLITE_ERROR;
  2340         -  if( rc!=SQLITE_DONE ) return rc;
  2341         -  return SQLITE_ROW;
  2342         -}
  2343         -
  2344         -/* Delete the segment blocks and segment directory records for all
  2345         -** segments at iLevel.
  2346         -*/
  2347         -static int segdir_delete(fulltext_vtab *v, int iLevel){
  2348         -  sqlite3_stmt *s;
  2349         -  sqlite_int64 iStartBlockid, iEndBlockid;
  2350         -  int rc = segdir_span(v, iLevel, &iStartBlockid, &iEndBlockid);
  2351         -  if( rc!=SQLITE_ROW && rc!=SQLITE_DONE ) return rc;
  2352         -
  2353         -  if( rc==SQLITE_ROW ){
  2354         -    rc = block_delete(v, iStartBlockid, iEndBlockid);
  2355         -    if( rc!=SQLITE_OK ) return rc;
  2356         -  }
  2357         -
  2358         -  /* Delete the segment directory itself. */
  2359         -  rc = sql_get_statement(v, SEGDIR_DELETE_STMT, &s);
  2360         -  if( rc!=SQLITE_OK ) return rc;
  2361         -
  2362         -  rc = sqlite3_bind_int64(s, 1, iLevel);
  2363         -  if( rc!=SQLITE_OK ) return rc;
  2364         -
  2365         -  return sql_single_step(s);
  2366         -}
  2367         -
  2368         -/* Delete entire fts index, SQLITE_OK on success, relevant error on
  2369         -** failure.
  2370         -*/
  2371         -static int segdir_delete_all(fulltext_vtab *v){
  2372         -  sqlite3_stmt *s;
  2373         -  int rc = sql_get_statement(v, SEGDIR_DELETE_ALL_STMT, &s);
  2374         -  if( rc!=SQLITE_OK ) return rc;
  2375         -
  2376         -  rc = sql_single_step(s);
  2377         -  if( rc!=SQLITE_OK ) return rc;
  2378         -
  2379         -  rc = sql_get_statement(v, BLOCK_DELETE_ALL_STMT, &s);
  2380         -  if( rc!=SQLITE_OK ) return rc;
  2381         -
  2382         -  return sql_single_step(s);
  2383         -}
  2384         -
  2385         -/* Returns SQLITE_OK with *pnSegments set to the number of entries in
  2386         -** %_segdir and *piMaxLevel set to the highest level which has a
  2387         -** segment.  Otherwise returns the SQLite error which caused failure.
  2388         -*/
  2389         -static int segdir_count(fulltext_vtab *v, int *pnSegments, int *piMaxLevel){
  2390         -  sqlite3_stmt *s;
  2391         -  int rc = sql_get_statement(v, SEGDIR_COUNT_STMT, &s);
  2392         -  if( rc!=SQLITE_OK ) return rc;
  2393         -
  2394         -  rc = sqlite3_step(s);
  2395         -  /* TODO(shess): This case should not be possible?  Should stronger
  2396         -  ** measures be taken if it happens?
  2397         -  */
  2398         -  if( rc==SQLITE_DONE ){
  2399         -    *pnSegments = 0;
  2400         -    *piMaxLevel = 0;
  2401         -    return SQLITE_OK;
  2402         -  }
  2403         -  if( rc!=SQLITE_ROW ) return rc;
  2404         -
  2405         -  *pnSegments = sqlite3_column_int(s, 0);
  2406         -  *piMaxLevel = sqlite3_column_int(s, 1);
  2407         -
  2408         -  /* We expect only one row.  We must execute another sqlite3_step()
  2409         -   * to complete the iteration; otherwise the table will remain locked. */
  2410         -  rc = sqlite3_step(s);
  2411         -  if( rc==SQLITE_DONE ) return SQLITE_OK;
  2412         -  if( rc==SQLITE_ROW ) return SQLITE_ERROR;
  2413         -  return rc;
  2414         -}
  2415         -
  2416         -/* TODO(shess) clearPendingTerms() is far down the file because
  2417         -** writeZeroSegment() is far down the file because LeafWriter is far
  2418         -** down the file.  Consider refactoring the code to move the non-vtab
  2419         -** code above the vtab code so that we don't need this forward
  2420         -** reference.
  2421         -*/
  2422         -static int clearPendingTerms(fulltext_vtab *v);
  2423         -
  2424         -/*
  2425         -** Free the memory used to contain a fulltext_vtab structure.
  2426         -*/
  2427         -static void fulltext_vtab_destroy(fulltext_vtab *v){
  2428         -  int iStmt, i;
  2429         -
  2430         -  FTSTRACE(("FTS3 Destroy %p\n", v));
  2431         -  for( iStmt=0; iStmt<MAX_STMT; iStmt++ ){
  2432         -    if( v->pFulltextStatements[iStmt]!=NULL ){
  2433         -      sqlite3_finalize(v->pFulltextStatements[iStmt]);
  2434         -      v->pFulltextStatements[iStmt] = NULL;
  2435         -    }
  2436         -  }
  2437         -
  2438         -  for( i=0; i<MERGE_COUNT; i++ ){
  2439         -    if( v->pLeafSelectStmts[i]!=NULL ){
  2440         -      sqlite3_finalize(v->pLeafSelectStmts[i]);
  2441         -      v->pLeafSelectStmts[i] = NULL;
  2442         -    }
  2443         -  }
  2444         -
  2445         -  if( v->pTokenizer!=NULL ){
  2446         -    v->pTokenizer->pModule->xDestroy(v->pTokenizer);
  2447         -    v->pTokenizer = NULL;
  2448         -  }
  2449         -
  2450         -  clearPendingTerms(v);
  2451         -
  2452         -  sqlite3_free(v->azColumn);
  2453         -  for(i = 0; i < v->nColumn; ++i) {
  2454         -    sqlite3_free(v->azContentColumn[i]);
  2455         -  }
  2456         -  sqlite3_free(v->azContentColumn);
  2457         -  sqlite3_free(v);
  2458         -}
  2459         -
  2460         -/*
  2461         -** Token types for parsing the arguments to xConnect or xCreate.
  2462         -*/
  2463         -#define TOKEN_EOF         0    /* End of file */
  2464         -#define TOKEN_SPACE       1    /* Any kind of whitespace */
  2465         -#define TOKEN_ID          2    /* An identifier */
  2466         -#define TOKEN_STRING      3    /* A string literal */
  2467         -#define TOKEN_PUNCT       4    /* A single punctuation character */
  2468         -
  2469         -/*
  2470         -** If X is a character that can be used in an identifier then
  2471         -** ftsIdChar(X) will be true.  Otherwise it is false.
  2472         -**
  2473         -** For ASCII, any character with the high-order bit set is
  2474         -** allowed in an identifier.  For 7-bit characters, 
  2475         -** isFtsIdChar[X] must be 1.
  2476         -**
  2477         -** Ticket #1066.  the SQL standard does not allow '$' in the
  2478         -** middle of identfiers.  But many SQL implementations do. 
  2479         -** SQLite will allow '$' in identifiers for compatibility.
  2480         -** But the feature is undocumented.
  2481         -*/
  2482         -static const char isFtsIdChar[] = {
  2483         -/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
  2484         -    0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 2x */
  2485         -    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,  /* 3x */
  2486         -    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 4x */
  2487         -    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,  /* 5x */
  2488         -    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 6x */
  2489         -    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,  /* 7x */
  2490         -};
  2491         -#define ftsIdChar(C)  (((c=C)&0x80)!=0 || (c>0x1f && isFtsIdChar[c-0x20]))
  2492         -
  2493         -
  2494         -/*
  2495         -** Return the length of the token that begins at z[0]. 
  2496         -** Store the token type in *tokenType before returning.
  2497         -*/
  2498         -static int ftsGetToken(const char *z, int *tokenType){
  2499         -  int i, c;
  2500         -  switch( *z ){
  2501         -    case 0: {
  2502         -      *tokenType = TOKEN_EOF;
  2503         -      return 0;
  2504         -    }
  2505         -    case ' ': case '\t': case '\n': case '\f': case '\r': {
  2506         -      for(i=1; safe_isspace(z[i]); i++){}
  2507         -      *tokenType = TOKEN_SPACE;
  2508         -      return i;
  2509         -    }
  2510         -    case '`':
  2511         -    case '\'':
  2512         -    case '"': {
  2513         -      int delim = z[0];
  2514         -      for(i=1; (c=z[i])!=0; i++){
  2515         -        if( c==delim ){
  2516         -          if( z[i+1]==delim ){
  2517         -            i++;
  2518         -          }else{
  2519         -            break;
  2520         -          }
  2521         -        }
  2522         -      }
  2523         -      *tokenType = TOKEN_STRING;
  2524         -      return i + (c!=0);
  2525         -    }
  2526         -    case '[': {
  2527         -      for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){}
  2528         -      *tokenType = TOKEN_ID;
  2529         -      return i;
  2530         -    }
  2531         -    default: {
  2532         -      if( !ftsIdChar(*z) ){
  2533         -        break;
  2534         -      }
  2535         -      for(i=1; ftsIdChar(z[i]); i++){}
  2536         -      *tokenType = TOKEN_ID;
  2537         -      return i;
  2538         -    }
  2539         -  }
  2540         -  *tokenType = TOKEN_PUNCT;
  2541         -  return 1;
  2542         -}
  2543         -
  2544         -/*
  2545         -** A token extracted from a string is an instance of the following
  2546         -** structure.
  2547         -*/
  2548         -typedef struct FtsToken {
  2549         -  const char *z;       /* Pointer to token text.  Not '\000' terminated */
  2550         -  short int n;         /* Length of the token text in bytes. */
  2551         -} FtsToken;
  2552         -
  2553         -/*
  2554         -** Given a input string (which is really one of the argv[] parameters
  2555         -** passed into xConnect or xCreate) split the string up into tokens.
  2556         -** Return an array of pointers to '\000' terminated strings, one string
  2557         -** for each non-whitespace token.
  2558         -**
  2559         -** The returned array is terminated by a single NULL pointer.
  2560         -**
  2561         -** Space to hold the returned array is obtained from a single
  2562         -** malloc and should be freed by passing the return value to free().
  2563         -** The individual strings within the token list are all a part of
  2564         -** the single memory allocation and will all be freed at once.
  2565         -*/
  2566         -static char **tokenizeString(const char *z, int *pnToken){
  2567         -  int nToken = 0;
  2568         -  FtsToken *aToken = sqlite3_malloc( strlen(z) * sizeof(aToken[0]) );
  2569         -  int n = 1;
  2570         -  int e, i;
  2571         -  int totalSize = 0;
  2572         -  char **azToken;
  2573         -  char *zCopy;
  2574         -  while( n>0 ){
  2575         -    n = ftsGetToken(z, &e);
  2576         -    if( e!=TOKEN_SPACE ){
  2577         -      aToken[nToken].z = z;
  2578         -      aToken[nToken].n = n;
  2579         -      nToken++;
  2580         -      totalSize += n+1;
  2581         -    }
  2582         -    z += n;
  2583         -  }
  2584         -  azToken = (char**)sqlite3_malloc( nToken*sizeof(char*) + totalSize );
  2585         -  zCopy = (char*)&azToken[nToken];
  2586         -  nToken--;
  2587         -  for(i=0; i<nToken; i++){
  2588         -    azToken[i] = zCopy;
  2589         -    n = aToken[i].n;
  2590         -    memcpy(zCopy, aToken[i].z, n);
  2591         -    zCopy[n] = 0;
  2592         -    zCopy += n+1;
  2593         -  }
  2594         -  azToken[nToken] = 0;
  2595         -  sqlite3_free(aToken);
  2596         -  *pnToken = nToken;
  2597         -  return azToken;
          409  +/*
          410  +** Return the number of bytes required to store the value passed as the
          411  +** first argument in varint form.
          412  +*/
          413  +int sqlite3Fts3VarintLen(sqlite3_uint64 v){
          414  +  int i = 0;
          415  +  do{
          416  +    i++;
          417  +    v >>= 7;
          418  +  }while( v!=0 );
          419  +  return i;
  2598    420   }
  2599    421   
  2600    422   /*
  2601    423   ** Convert an SQL-style quoted string into a normal string by removing
  2602    424   ** the quote characters.  The conversion is done in-place.  If the
  2603    425   ** input does not begin with a quote character, then this routine
  2604    426   ** is a no-op.
................................................................................
  2606    428   ** Examples:
  2607    429   **
  2608    430   **     "abc"   becomes   abc
  2609    431   **     'xyz'   becomes   xyz
  2610    432   **     [pqr]   becomes   pqr
  2611    433   **     `mno`   becomes   mno
  2612    434   */
  2613         -static void dequoteString(char *z){
          435  +void sqlite3Fts3Dequote(char *z){
  2614    436     int quote;
  2615    437     int i, j;
  2616         -  if( z==0 ) return;
          438  +
  2617    439     quote = z[0];
  2618    440     switch( quote ){
  2619    441       case '\'':  break;
  2620    442       case '"':   break;
  2621    443       case '`':   break;                /* For MySQL compatibility */
  2622    444       case '[':   quote = ']';  break;  /* For MS SqlServer compatibility */
  2623    445       default:    return;
................................................................................
  2633    455         }
  2634    456       }else{
  2635    457         z[j++] = z[i];
  2636    458       }
  2637    459     }
  2638    460   }
  2639    461   
  2640         -/*
  2641         -** The input azIn is a NULL-terminated list of tokens.  Remove the first
  2642         -** token and all punctuation tokens.  Remove the quotes from
  2643         -** around string literal tokens.
  2644         -**
  2645         -** Example:
  2646         -**
  2647         -**     input:      tokenize chinese ( 'simplifed' , 'mixed' )
  2648         -**     output:     chinese simplifed mixed
  2649         -**
  2650         -** Another example:
  2651         -**
  2652         -**     input:      delimiters ( '[' , ']' , '...' )
  2653         -**     output:     [ ] ...
  2654         -*/
  2655         -static void tokenListToIdList(char **azIn){
  2656         -  int i, j;
  2657         -  if( azIn ){
  2658         -    for(i=0, j=-1; azIn[i]; i++){
  2659         -      if( safe_isalnum(azIn[i][0]) || azIn[i][1] ){
  2660         -        dequoteString(azIn[i]);
  2661         -        if( j>=0 ){
  2662         -          azIn[j] = azIn[i];
  2663         -        }
  2664         -        j++;
  2665         -      }
  2666         -    }
  2667         -    azIn[j] = 0;
  2668         -  }
  2669         -}
  2670         -
  2671         -
  2672         -/*
  2673         -** Find the first alphanumeric token in the string zIn.  Null-terminate
  2674         -** this token.  Remove any quotation marks.  And return a pointer to
  2675         -** the result.
  2676         -*/
  2677         -static char *firstToken(char *zIn, char **pzTail){
  2678         -  int n, ttype;
  2679         -  while(1){
  2680         -    n = ftsGetToken(zIn, &ttype);
  2681         -    if( ttype==TOKEN_SPACE ){
  2682         -      zIn += n;
  2683         -    }else if( ttype==TOKEN_EOF ){
  2684         -      *pzTail = zIn;
  2685         -      return 0;
  2686         -    }else{
  2687         -      zIn[n] = 0;
  2688         -      *pzTail = &zIn[1];
  2689         -      dequoteString(zIn);
  2690         -      return zIn;
  2691         -    }
  2692         -  }
  2693         -  /*NOTREACHED*/
  2694         -}
  2695         -
  2696         -/* Return true if...
  2697         -**
  2698         -**   *  s begins with the string t, ignoring case
  2699         -**   *  s is longer than t
  2700         -**   *  The first character of s beyond t is not a alphanumeric
  2701         -** 
  2702         -** Ignore leading space in *s.
  2703         -**
  2704         -** To put it another way, return true if the first token of
  2705         -** s[] is t[].
  2706         -*/
  2707         -static int startsWith(const char *s, const char *t){
  2708         -  while( safe_isspace(*s) ){ s++; }
  2709         -  while( *t ){
  2710         -    if( safe_tolower(*s++)!=safe_tolower(*t++) ) return 0;
  2711         -  }
  2712         -  return *s!='_' && !safe_isalnum(*s);
  2713         -}
  2714         -
  2715         -/*
  2716         -** An instance of this structure defines the "spec" of a
  2717         -** full text index.  This structure is populated by parseSpec
  2718         -** and use by fulltextConnect and fulltextCreate.
  2719         -*/
  2720         -typedef struct TableSpec {
  2721         -  const char *zDb;         /* Logical database name */
  2722         -  const char *zName;       /* Name of the full-text index */
  2723         -  int nColumn;             /* Number of columns to be indexed */
  2724         -  char **azColumn;         /* Original names of columns to be indexed */
  2725         -  char **azContentColumn;  /* Column names for %_content */
  2726         -  char **azTokenizer;      /* Name of tokenizer and its arguments */
  2727         -} TableSpec;
  2728         -
  2729         -/*
  2730         -** Reclaim all of the memory used by a TableSpec
  2731         -*/
  2732         -static void clearTableSpec(TableSpec *p) {
  2733         -  sqlite3_free(p->azColumn);
  2734         -  sqlite3_free(p->azContentColumn);
  2735         -  sqlite3_free(p->azTokenizer);
  2736         -}
  2737         -
  2738         -/* Parse a CREATE VIRTUAL TABLE statement, which looks like this:
  2739         - *
  2740         - * CREATE VIRTUAL TABLE email
  2741         - *        USING fts3(subject, body, tokenize mytokenizer(myarg))
  2742         - *
  2743         - * We return parsed information in a TableSpec structure.
  2744         - * 
  2745         - */
  2746         -static int parseSpec(TableSpec *pSpec, int argc, const char *const*argv,
  2747         -                     char**pzErr){
  2748         -  int i, n;
  2749         -  char *z, *zDummy;
  2750         -  char **azArg;
  2751         -  const char *zTokenizer = 0;    /* argv[] entry describing the tokenizer */
  2752         -
  2753         -  assert( argc>=3 );
  2754         -  /* Current interface:
  2755         -  ** argv[0] - module name
  2756         -  ** argv[1] - database name
  2757         -  ** argv[2] - table name
  2758         -  ** argv[3..] - columns, optionally followed by tokenizer specification
  2759         -  **             and snippet delimiters specification.
          462  +static void fts3GetDeltaVarint(char **pp, sqlite3_int64 *pVal){
          463  +  sqlite3_int64 iVal;
          464  +  *pp += sqlite3Fts3GetVarint(*pp, &iVal);
          465  +  *pVal += iVal;
          466  +}
          467  +
          468  +static void fts3GetDeltaVarint2(char **pp, char *pEnd, sqlite3_int64 *pVal){
          469  +  if( *pp>=pEnd ){
          470  +    *pp = 0;
          471  +  }else{
          472  +    fts3GetDeltaVarint(pp, pVal);
          473  +  }
          474  +}
          475  +
          476  +
          477  +/*
          478  +** The Fts3Cursor.eType member is always set to one of the following.
          479  +*/
          480  +#define FTS3_FULLSCAN_SEARCH 0    /* Linear scan of %_content table */
          481  +#define FTS3_DOCID_SEARCH    1    /* Lookup by rowid on %_content table */
          482  +#define FTS3_FULLTEXT_SEARCH 2    /* Full-text index search */
          483  +
          484  +static Fts3Table *cursor_vtab(Fts3Cursor *c){
          485  +  return (Fts3Table *) c->base.pVtab;
          486  +}
          487  +
          488  +/*
          489  +** The xDisconnect() virtual table method.
          490  +*/
          491  +static int fts3DisconnectMethod(sqlite3_vtab *pVtab){
          492  +  Fts3Table *p = (Fts3Table *)pVtab;
          493  +  int i;
          494  +
          495  +  assert( p->nPendingData==0 );
          496  +
          497  +  /* Free any prepared statements held */
          498  +  for(i=0; i<SizeofArray(p->aStmt); i++){
          499  +    sqlite3_finalize(p->aStmt[i]);
          500  +  }
          501  +  sqlite3_free(p->zSelectLeaves);
          502  +
          503  +  /* Invoke the tokenizer destructor to free the tokenizer. */
          504  +  p->pTokenizer->pModule->xDestroy(p->pTokenizer);
          505  +
          506  +  sqlite3_free(p);
          507  +  return SQLITE_OK;
          508  +}
          509  +
          510  +/*
          511  +** The xDestroy() virtual table method.
          512  +*/
          513  +static int fts3DestroyMethod(sqlite3_vtab *pVtab){
          514  +  int rc;                         /* Return code */
          515  +  Fts3Table *p = (Fts3Table *)pVtab;
          516  +
          517  +  /* Create a script to drop the underlying three storage tables. */
          518  +  char *zSql = sqlite3_mprintf(
          519  +      "DROP TABLE IF EXISTS %Q.'%q_content';"
          520  +      "DROP TABLE IF EXISTS %Q.'%q_segments';"
          521  +      "DROP TABLE IF EXISTS %Q.'%q_segdir';", 
          522  +      p->zDb, p->zName, p->zDb, p->zName, p->zDb, p->zName
          523  +  );
          524  +
          525  +  /* If malloc has failed, set rc to SQLITE_NOMEM. Otherwise, try to
          526  +  ** execute the SQL script created above.
          527  +  */
          528  +  if( zSql ){
          529  +    rc = sqlite3_exec(p->db, zSql, 0, 0, 0);
          530  +    sqlite3_free(zSql);
          531  +  }else{
          532  +    rc = SQLITE_NOMEM;
          533  +  }
          534  +
          535  +  /* If everything has worked, invoke fts3DisconnectMethod() to free the
          536  +  ** memory associated with the Fts3Table structure and return SQLITE_OK.
          537  +  ** Otherwise, return an SQLite error code.
          538  +  */
          539  +  return (rc==SQLITE_OK ? fts3DisconnectMethod(pVtab) : rc);
          540  +}
          541  +
          542  +
          543  +/*
          544  +** Invoke sqlite3_declare_vtab() to declare the schema for the FTS3 table
          545  +** passed as the first argument. This is done as part of the xConnect()
          546  +** and xCreate() methods.
          547  +*/
          548  +static int fts3DeclareVtab(Fts3Table *p){
          549  +  int i;                          /* Iterator variable */
          550  +  int rc;                         /* Return code */
          551  +  char *zSql;                     /* SQL statement passed to declare_vtab() */
          552  +  char *zCols;                    /* List of user defined columns */
          553  +
          554  +  /* Create a list of user columns for the virtual table */
          555  +  zCols = sqlite3_mprintf("%Q, ", p->azColumn[0]);
          556  +  for(i=1; zCols && i<p->nColumn; i++){
          557  +    zCols = sqlite3_mprintf("%z%Q, ", zCols, p->azColumn[i]);
          558  +  }
          559  +
          560  +  /* Create the whole "CREATE TABLE" statement to pass to SQLite */
          561  +  zSql = sqlite3_mprintf(
          562  +      "CREATE TABLE x(%s %Q HIDDEN, docid HIDDEN)", zCols, p->zName
          563  +  );
          564  +
          565  +  if( !zCols || !zSql ){
          566  +    rc = SQLITE_NOMEM;
          567  +  }else{
          568  +    rc = sqlite3_declare_vtab(p->db, zSql);
          569  +  }
          570  +
          571  +  sqlite3_free(zSql);
          572  +  sqlite3_free(zCols);
          573  +  return rc;
          574  +}
          575  +
          576  +/*
          577  +** Create the backing store tables (%_content, %_segments and %_segdir)
          578  +** required by the FTS3 table passed as the only argument. This is done
          579  +** as part of the vtab xCreate() method.
          580  +*/
          581  +static int fts3CreateTables(Fts3Table *p){
          582  +  int rc;                         /* Return code */
          583  +  int i;                          /* Iterator variable */
          584  +  char *zContentCols;             /* Columns of %_content table */
          585  +  char *zSql;                     /* SQL script to create required tables */
          586  +
          587  +  /* Create a list of user columns for the content table */
          588  +  zContentCols = sqlite3_mprintf("docid INTEGER PRIMARY KEY");
          589  +  for(i=0; zContentCols && i<p->nColumn; i++){
          590  +    char *z = p->azColumn[i];
          591  +    zContentCols = sqlite3_mprintf("%z, 'c%d%q'", zContentCols, i, z);
          592  +  }
          593  +
          594  +  /* Create the whole SQL script */
          595  +  zSql = sqlite3_mprintf(
          596  +      "CREATE TABLE %Q.'%q_content'(%s);"
          597  +      "CREATE TABLE %Q.'%q_segments'(blockid INTEGER PRIMARY KEY, block BLOB);"
          598  +      "CREATE TABLE %Q.'%q_segdir'("
          599  +        "level INTEGER,"
          600  +        "idx INTEGER,"
          601  +        "start_block INTEGER,"
          602  +        "leaves_end_block INTEGER,"
          603  +        "end_block INTEGER,"
          604  +        "root BLOB,"
          605  +        "PRIMARY KEY(level, idx)"
          606  +      ");",
          607  +      p->zDb, p->zName, zContentCols, p->zDb, p->zName, p->zDb, p->zName
          608  +  );
          609  +
          610  +  /* Unless a malloc() failure has occurred, execute the SQL script to 
          611  +  ** create the tables used to store data for this FTS3 virtual table.
          612  +  */
          613  +  if( zContentCols==0 || zSql==0 ){
          614  +    rc = SQLITE_NOMEM;
          615  +  }else{
          616  +    rc = sqlite3_exec(p->db, zSql, 0, 0, 0);
          617  +  }
          618  +
          619  +  sqlite3_free(zSql);
          620  +  sqlite3_free(zContentCols);
          621  +  return rc;
          622  +}
          623  +
          624  +/*
          625  +** This function is the implementation of both the xConnect and xCreate
          626  +** methods of the FTS3 virtual table.
          627  +**
          628  +** The argv[] array contains the following:
          629  +**
          630  +**   argv[0]   -> module name
          631  +**   argv[1]   -> database name
          632  +**   argv[2]   -> table name
          633  +**   argv[...] -> "column name" fields...
          634  +*/
          635  +int fts3InitVtab(
          636  +  int isCreate,                   /* True for xCreate, false for xConnect */
          637  +  sqlite3 *db,                    /* The SQLite database connection */
          638  +  void *pAux,                     /* Hash table containing tokenizers */
          639  +  int argc,                       /* Number of elements in argv array */
          640  +  const char * const *argv,       /* xCreate/xConnect argument array */
          641  +  sqlite3_vtab **ppVTab,          /* Write the resulting vtab structure here */
          642  +  char **pzErr                    /* Write any error message here */
          643  +){
          644  +  Fts3Hash *pHash = (Fts3Hash *)pAux;
          645  +  Fts3Table *p;               /* Pointer to allocated vtab */
          646  +  int rc;                         /* Return code */
          647  +  int i;
          648  +  int nByte;
          649  +  int iCol;
          650  +  int nString = 0;
          651  +  int nCol = 0;
          652  +  char *zCsr;
          653  +  int nDb;
          654  +  int nName;
          655  +
          656  +  const char *zTokenizer = 0;
          657  +  sqlite3_tokenizer *pTokenizer;  /* Tokenizer for this table */
          658  +
          659  +  nDb = strlen(argv[1]) + 1;
          660  +  nName = strlen(argv[2]) + 1;
          661  +  for(i=3; i<argc; i++){
          662  +    char const *z = argv[i];
          663  +    rc = sqlite3Fts3InitTokenizer(pHash, z, &pTokenizer, &zTokenizer, pzErr);
          664  +    if( rc!=SQLITE_OK ){
          665  +      return rc;
          666  +    }
          667  +    if( z!=zTokenizer ){
          668  +      nString += strlen(z) + 1;
          669  +    }
          670  +  }
          671  +  nCol = argc - 3 - (zTokenizer!=0);
          672  +  if( zTokenizer==0 ){
          673  +    rc = sqlite3Fts3InitTokenizer(pHash, 0, &pTokenizer, 0, pzErr);
          674  +    if( rc!=SQLITE_OK ){
          675  +      return rc;
          676  +    }
          677  +    assert( pTokenizer );
          678  +  }
          679  +
          680  +  /* Allocate and populate the Fts3Table structure. */
          681  +  nByte = sizeof(Fts3Table) +              /* Fts3Table */
          682  +          nCol * sizeof(char *) +              /* azColumn */
          683  +          nName +                              /* zName */
          684  +          nDb +                                /* zDb */
          685  +          nString;                             /* Space for azColumn strings */
          686  +  p = (Fts3Table*)sqlite3_malloc(nByte);
          687  +  if( p==0 ){
          688  +    rc = SQLITE_NOMEM;
          689  +    goto fts3_init_out;
          690  +  }
          691  +  memset(p, 0, nByte);
          692  +
          693  +  p->db = db;
          694  +  p->nColumn = nCol;
          695  +  p->nPendingData = 0;
          696  +  p->azColumn = (char **)&p[1];
          697  +  p->pTokenizer = pTokenizer;
          698  +  zCsr = (char *)&p->azColumn[nCol];
          699  +
          700  +  fts3HashInit(&p->pendingTerms, FTS3_HASH_STRING, 1);
          701  +
          702  +  /* Fill in the zName and zDb fields of the vtab structure. */
          703  +  p->zName = zCsr;
          704  +  memcpy(zCsr, argv[2], nName);
          705  +  zCsr += nName;
          706  +  p->zDb = zCsr;
          707  +  memcpy(zCsr, argv[1], nDb);
          708  +  zCsr += nDb;
          709  +
          710  +  /* Fill in the azColumn array */
          711  +  iCol = 0;
          712  +  for(i=3; i<argc; i++){
          713  +    if( argv[i]!=zTokenizer ){
          714  +      char *z; 
          715  +      int n;
          716  +      z = (char *)sqlite3Fts3NextToken(argv[i], &n);
          717  +      memcpy(zCsr, z, n);
          718  +      zCsr[n] = '\0';
          719  +      sqlite3Fts3Dequote(zCsr);
          720  +      p->azColumn[iCol++] = zCsr;
          721  +      zCsr += n+1;
          722  +      assert( zCsr <= &((char *)p)[nByte] );
          723  +    }
          724  +  }
          725  +
          726  +  /* If this is an xCreate call, create the underlying tables in the 
          727  +  ** database. TODO: For xConnect(), it could verify that said tables exist.
          728  +  */
          729  +  if( isCreate ){
          730  +    rc = fts3CreateTables(p);
          731  +    if( rc!=SQLITE_OK ) goto fts3_init_out;
          732  +  }
          733  +
          734  +  rc = fts3DeclareVtab(p);
          735  +  if( rc!=SQLITE_OK ) goto fts3_init_out;
          736  +
          737  +  *ppVTab = &p->base;
          738  +
          739  +fts3_init_out:
          740  +  if( rc!=SQLITE_OK ){
          741  +    if( p ) fts3DisconnectMethod((sqlite3_vtab *)p);
          742  +    else if( pTokenizer ) pTokenizer->pModule->xDestroy(pTokenizer);
          743  +  }
          744  +  return rc;
          745  +}
          746  +
          747  +/*
          748  +** The xConnect() and xCreate() methods for the virtual table. All the
          749  +** work is done in function fts3InitVtab().
          750  +*/
          751  +static int fts3ConnectMethod(
          752  +  sqlite3 *db,                    /* Database connection */
          753  +  void *pAux,                     /* Pointer to tokenizer hash table */
          754  +  int argc,                       /* Number of elements in argv array */
          755  +  const char * const *argv,       /* xCreate/xConnect argument array */
          756  +  sqlite3_vtab **ppVtab,          /* OUT: New sqlite3_vtab object */
          757  +  char **pzErr                    /* OUT: sqlite3_malloc'd error message */
          758  +){
          759  +  return fts3InitVtab(0, db, pAux, argc, argv, ppVtab, pzErr);
          760  +}
          761  +static int fts3CreateMethod(
          762  +  sqlite3 *db,                    /* Database connection */
          763  +  void *pAux,                     /* Pointer to tokenizer hash table */
          764  +  int argc,                       /* Number of elements in argv array */
          765  +  const char * const *argv,       /* xCreate/xConnect argument array */
          766  +  sqlite3_vtab **ppVtab,          /* OUT: New sqlite3_vtab object */
          767  +  char **pzErr                    /* OUT: sqlite3_malloc'd error message */
          768  +){
          769  +  return fts3InitVtab(1, db, pAux, argc, argv, ppVtab, pzErr);
          770  +}
          771  +
          772  +/* 
          773  +** Implementation of the xBestIndex method for FTS3 tables. There
          774  +** are three possible strategies, in order of preference:
          775  +**
          776  +**   1. Direct lookup by rowid or docid. 
          777  +**   2. Full-text search using a MATCH operator on a non-docid column.
          778  +**   3. Linear scan of %_content table.
          779  +*/
          780  +static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
          781  +  Fts3Table *p = (Fts3Table *)pVTab;
          782  +  int i;                          /* Iterator variable */
          783  +  int iCons = -1;                 /* Index of constraint to use */
          784  +
          785  +  /* By default use a full table scan. This is an expensive option,
          786  +  ** so search through the constraints to see if a more efficient 
          787  +  ** strategy is possible.
  2760    788     */
  2761         -
  2762         -  /* Make a copy of the complete argv[][] array in a single allocation.
  2763         -  ** The argv[][] array is read-only and transient.  We can write to the
  2764         -  ** copy in order to modify things and the copy is persistent.
  2765         -  */
  2766         -  CLEAR(pSpec);
  2767         -  for(i=n=0; i<argc; i++){
  2768         -    n += strlen(argv[i]) + 1;
  2769         -  }
  2770         -  azArg = sqlite3_malloc( sizeof(char*)*argc + n );
  2771         -  if( azArg==0 ){
  2772         -    return SQLITE_NOMEM;
  2773         -  }
  2774         -  z = (char*)&azArg[argc];
  2775         -  for(i=0; i<argc; i++){
  2776         -    azArg[i] = z;
  2777         -    strcpy(z, argv[i]);
  2778         -    z += strlen(z)+1;
  2779         -  }
  2780         -
  2781         -  /* Identify the column names and the tokenizer and delimiter arguments
  2782         -  ** in the argv[][] array.
  2783         -  */
  2784         -  pSpec->zDb = azArg[1];
  2785         -  pSpec->zName = azArg[2];
  2786         -  pSpec->nColumn = 0;
  2787         -  pSpec->azColumn = azArg;
  2788         -  zTokenizer = "tokenize simple";
  2789         -  for(i=3; i<argc; ++i){
  2790         -    if( startsWith(azArg[i],"tokenize") ){
  2791         -      zTokenizer = azArg[i];
  2792         -    }else{
  2793         -      z = azArg[pSpec->nColumn] = firstToken(azArg[i], &zDummy);
  2794         -      pSpec->nColumn++;
  2795         -    }
  2796         -  }
  2797         -  if( pSpec->nColumn==0 ){
  2798         -    azArg[0] = "content";
  2799         -    pSpec->nColumn = 1;
  2800         -  }
  2801         -
  2802         -  /*
  2803         -  ** Construct the list of content column names.
  2804         -  **
  2805         -  ** Each content column name will be of the form cNNAAAA
  2806         -  ** where NN is the column number and AAAA is the sanitized
  2807         -  ** column name.  "sanitized" means that special characters are
  2808         -  ** converted to "_".  The cNN prefix guarantees that all column
  2809         -  ** names are unique.
  2810         -  **
  2811         -  ** The AAAA suffix is not strictly necessary.  It is included
  2812         -  ** for the convenience of people who might examine the generated
  2813         -  ** %_content table and wonder what the columns are used for.
  2814         -  */
  2815         -  pSpec->azContentColumn = sqlite3_malloc( pSpec->nColumn * sizeof(char *) );
  2816         -  if( pSpec->azContentColumn==0 ){
  2817         -    clearTableSpec(pSpec);
  2818         -    return SQLITE_NOMEM;
  2819         -  }
  2820         -  for(i=0; i<pSpec->nColumn; i++){
  2821         -    char *p;
  2822         -    pSpec->azContentColumn[i] = sqlite3_mprintf("c%d%s", i, azArg[i]);
  2823         -    for (p = pSpec->azContentColumn[i]; *p ; ++p) {
  2824         -      if( !safe_isalnum(*p) ) *p = '_';
  2825         -    }
  2826         -  }
  2827         -
  2828         -  /*
  2829         -  ** Parse the tokenizer specification string.
  2830         -  */
  2831         -  pSpec->azTokenizer = tokenizeString(zTokenizer, &n);
  2832         -  tokenListToIdList(pSpec->azTokenizer);
  2833         -
          789  +  pInfo->idxNum = FTS3_FULLSCAN_SEARCH;
          790  +  pInfo->estimatedCost = 500000;
          791  +  for(i=0; i<pInfo->nConstraint; i++){
          792  +    struct sqlite3_index_constraint *pCons = &pInfo->aConstraint[i];
          793  +    if( pCons->usable==0 ) continue;
          794  +
          795  +    /* A direct lookup on the rowid or docid column. This is the best
          796  +    ** strategy in all cases. Assign a cost of 1.0 and return early.
          797  +    */
          798  +    if( pCons->op==SQLITE_INDEX_CONSTRAINT_EQ 
          799  +     && (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1 )
          800  +    ){
          801  +      pInfo->idxNum = FTS3_DOCID_SEARCH;
          802  +      pInfo->estimatedCost = 1.0;
          803  +      iCons = i;
          804  +      break;
          805  +    }
          806  +
          807  +    /* A MATCH constraint. Use a full-text search.
          808  +    **
          809  +    ** If there is more than one MATCH constraint available, use the first
          810  +    ** one encountered. If there is both a MATCH constraint and a direct
          811  +    ** rowid/docid lookup, prefer the rowid/docid strategy.
          812  +    */
          813  +    if( iCons<0 
          814  +     && pCons->op==SQLITE_INDEX_CONSTRAINT_MATCH 
          815  +     && pCons->iColumn>=0 && pCons->iColumn<=p->nColumn
          816  +    ){
          817  +      pInfo->idxNum = FTS3_FULLTEXT_SEARCH + pCons->iColumn;
          818  +      pInfo->estimatedCost = 2.0;
          819  +      iCons = i;
          820  +    }
          821  +  }
          822  +
          823  +  if( iCons>=0 ){
          824  +    pInfo->aConstraintUsage[iCons].argvIndex = 1;
          825  +    pInfo->aConstraintUsage[iCons].omit = 1;
          826  +  } 
  2834    827     return SQLITE_OK;
  2835    828   }
  2836    829   
  2837    830   /*
  2838         -** Generate a CREATE TABLE statement that describes the schema of
  2839         -** the virtual table.  Return a pointer to this schema string.
  2840         -**
  2841         -** Space is obtained from sqlite3_mprintf() and should be freed
  2842         -** using sqlite3_free().
  2843         -*/
  2844         -static char *fulltextSchema(
  2845         -  int nColumn,                  /* Number of columns */
  2846         -  const char *const* azColumn,  /* List of columns */
  2847         -  const char *zTableName        /* Name of the table */
  2848         -){
  2849         -  int i;
  2850         -  char *zSchema, *zNext;
  2851         -  const char *zSep = "(";
  2852         -  zSchema = sqlite3_mprintf("CREATE TABLE x");
  2853         -  for(i=0; i<nColumn; i++){
  2854         -    zNext = sqlite3_mprintf("%s%s%Q", zSchema, zSep, azColumn[i]);
  2855         -    sqlite3_free(zSchema);
  2856         -    zSchema = zNext;
  2857         -    zSep = ",";
  2858         -  }
  2859         -  zNext = sqlite3_mprintf("%s,%Q HIDDEN", zSchema, zTableName);
  2860         -  sqlite3_free(zSchema);
  2861         -  zSchema = zNext;
  2862         -  zNext = sqlite3_mprintf("%s,docid HIDDEN)", zSchema);
  2863         -  sqlite3_free(zSchema);
  2864         -  return zNext;
  2865         -}
  2866         -
  2867         -/*
  2868         -** Build a new sqlite3_vtab structure that will describe the
  2869         -** fulltext index defined by spec.
  2870         -*/
  2871         -static int constructVtab(
  2872         -  sqlite3 *db,              /* The SQLite database connection */
  2873         -  fts3Hash *pHash,          /* Hash table containing tokenizers */
  2874         -  TableSpec *spec,          /* Parsed spec information from parseSpec() */
  2875         -  sqlite3_vtab **ppVTab,    /* Write the resulting vtab structure here */
  2876         -  char **pzErr              /* Write any error message here */
  2877         -){
  2878         -  int rc;
  2879         -  int n;
  2880         -  fulltext_vtab *v = 0;
  2881         -  const sqlite3_tokenizer_module *m = NULL;
  2882         -  char *schema;
  2883         -
  2884         -  char const *zTok;         /* Name of tokenizer to use for this fts table */
  2885         -  int nTok;                 /* Length of zTok, including nul terminator */
  2886         -
  2887         -  v = (fulltext_vtab *) sqlite3_malloc(sizeof(fulltext_vtab));
  2888         -  if( v==0 ) return SQLITE_NOMEM;
  2889         -  CLEAR(v);
  2890         -  /* sqlite will initialize v->base */
  2891         -  v->db = db;
  2892         -  v->zDb = spec->zDb;       /* Freed when azColumn is freed */
  2893         -  v->zName = spec->zName;   /* Freed when azColumn is freed */
  2894         -  v->nColumn = spec->nColumn;
  2895         -  v->azContentColumn = spec->azContentColumn;
  2896         -  spec->azContentColumn = 0;
  2897         -  v->azColumn = spec->azColumn;
  2898         -  spec->azColumn = 0;
  2899         -
  2900         -  if( spec->azTokenizer==0 ){
          831  +** Implementation of xOpen method.
          832  +*/
          833  +static int fts3OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
          834  +  sqlite3_vtab_cursor *pCsr;               /* Allocated cursor */
          835  +
          836  +  /* Allocate a buffer large enough for an Fts3Cursor structure. If the
          837  +  ** allocation succeeds, zero it and return SQLITE_OK. Otherwise, 
          838  +  ** if the allocation fails, return SQLITE_NOMEM.
          839  +  */
          840  +  *ppCsr = pCsr = (sqlite3_vtab_cursor *)sqlite3_malloc(sizeof(Fts3Cursor));
          841  +  if( !pCsr ){
  2901    842       return SQLITE_NOMEM;
  2902    843     }
  2903         -
  2904         -  zTok = spec->azTokenizer[0]; 
  2905         -  if( !zTok ){
  2906         -    zTok = "simple";
  2907         -  }
  2908         -  nTok = strlen(zTok)+1;
  2909         -
  2910         -  m = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zTok, nTok);
  2911         -  if( !m ){
  2912         -    *pzErr = sqlite3_mprintf("unknown tokenizer: %s", spec->azTokenizer[0]);
  2913         -    rc = SQLITE_ERROR;
  2914         -    goto err;
  2915         -  }
  2916         -
  2917         -  for(n=0; spec->azTokenizer[n]; n++){}
  2918         -  if( n ){
  2919         -    rc = m->xCreate(n-1, (const char*const*)&spec->azTokenizer[1],
  2920         -                    &v->pTokenizer);
  2921         -  }else{
  2922         -    rc = m->xCreate(0, 0, &v->pTokenizer);
  2923         -  }
  2924         -  if( rc!=SQLITE_OK ) goto err;
  2925         -  v->pTokenizer->pModule = m;
  2926         -
  2927         -  /* TODO: verify the existence of backing tables foo_content, foo_term */
  2928         -
  2929         -  schema = fulltextSchema(v->nColumn, (const char*const*)v->azColumn,
  2930         -                          spec->zName);
  2931         -  rc = sqlite3_declare_vtab(db, schema);
  2932         -  sqlite3_free(schema);
  2933         -  if( rc!=SQLITE_OK ) goto err;
  2934         -
  2935         -  memset(v->pFulltextStatements, 0, sizeof(v->pFulltextStatements));
  2936         -
  2937         -  /* Indicate that the buffer is not live. */
  2938         -  v->nPendingData = -1;
  2939         -
  2940         -  *ppVTab = &v->base;
  2941         -  FTSTRACE(("FTS3 Connect %p\n", v));
  2942         -
  2943         -  return rc;
  2944         -
  2945         -err:
  2946         -  fulltext_vtab_destroy(v);
  2947         -  return rc;
  2948         -}
  2949         -
  2950         -static int fulltextConnect(
  2951         -  sqlite3 *db,
  2952         -  void *pAux,
  2953         -  int argc, const char *const*argv,
  2954         -  sqlite3_vtab **ppVTab,
  2955         -  char **pzErr
  2956         -){
  2957         -  TableSpec spec;
  2958         -  int rc = parseSpec(&spec, argc, argv, pzErr);
  2959         -  if( rc!=SQLITE_OK ) return rc;
  2960         -
  2961         -  rc = constructVtab(db, (fts3Hash *)pAux, &spec, ppVTab, pzErr);
  2962         -  clearTableSpec(&spec);
  2963         -  return rc;
  2964         -}
  2965         -
  2966         -/* The %_content table holds the text of each document, with
  2967         -** the docid column exposed as the SQLite rowid for the table.
  2968         -*/
  2969         -/* TODO(shess) This comment needs elaboration to match the updated
  2970         -** code.  Work it into the top-of-file comment at that time.
  2971         -*/
  2972         -static int fulltextCreate(sqlite3 *db, void *pAux,
  2973         -                          int argc, const char * const *argv,
  2974         -                          sqlite3_vtab **ppVTab, char **pzErr){
  2975         -  int rc;
  2976         -  TableSpec spec;
  2977         -  StringBuffer schema;
  2978         -  FTSTRACE(("FTS3 Create\n"));
  2979         -
  2980         -  rc = parseSpec(&spec, argc, argv, pzErr);
  2981         -  if( rc!=SQLITE_OK ) return rc;
  2982         -
  2983         -  initStringBuffer(&schema);
  2984         -  append(&schema, "CREATE TABLE %_content(");
  2985         -  append(&schema, "  docid INTEGER PRIMARY KEY,");
  2986         -  appendList(&schema, spec.nColumn, spec.azContentColumn);
  2987         -  append(&schema, ")");
  2988         -  rc = sql_exec(db, spec.zDb, spec.zName, stringBufferData(&schema));
  2989         -  stringBufferDestroy(&schema);
  2990         -  if( rc!=SQLITE_OK ) goto out;
  2991         -
  2992         -  rc = sql_exec(db, spec.zDb, spec.zName,
  2993         -                "create table %_segments("
  2994         -                "  blockid INTEGER PRIMARY KEY,"
  2995         -                "  block blob"
  2996         -                ");"
  2997         -                );
  2998         -  if( rc!=SQLITE_OK ) goto out;
  2999         -
  3000         -  rc = sql_exec(db, spec.zDb, spec.zName,
  3001         -                "create table %_segdir("
  3002         -                "  level integer,"
  3003         -                "  idx integer,"
  3004         -                "  start_block integer,"
  3005         -                "  leaves_end_block integer,"
  3006         -                "  end_block integer,"
  3007         -                "  root blob,"
  3008         -                "  primary key(level, idx)"
  3009         -                ");");
  3010         -  if( rc!=SQLITE_OK ) goto out;
  3011         -
  3012         -  rc = constructVtab(db, (fts3Hash *)pAux, &spec, ppVTab, pzErr);
  3013         -
  3014         -out:
  3015         -  clearTableSpec(&spec);
  3016         -  return rc;
  3017         -}
  3018         -
  3019         -/* Decide how to handle an SQL query. */
  3020         -static int fulltextBestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
  3021         -  fulltext_vtab *v = (fulltext_vtab *)pVTab;
  3022         -  int i;
  3023         -  FTSTRACE(("FTS3 BestIndex\n"));
  3024         -
  3025         -  for(i=0; i<pInfo->nConstraint; ++i){
  3026         -    const struct sqlite3_index_constraint *pConstraint;
  3027         -    pConstraint = &pInfo->aConstraint[i];
  3028         -    if( pConstraint->usable ) {
  3029         -      if( (pConstraint->iColumn==-1 || pConstraint->iColumn==v->nColumn+1) &&
  3030         -          pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ ){
  3031         -        pInfo->idxNum = QUERY_DOCID;      /* lookup by docid */
  3032         -        FTSTRACE(("FTS3 QUERY_DOCID\n"));
  3033         -      } else if( pConstraint->iColumn>=0 && pConstraint->iColumn<=v->nColumn &&
  3034         -                 pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH ){
  3035         -        /* full-text search */
  3036         -        pInfo->idxNum = QUERY_FULLTEXT + pConstraint->iColumn;
  3037         -        FTSTRACE(("FTS3 QUERY_FULLTEXT %d\n", pConstraint->iColumn));
  3038         -      } else continue;
  3039         -
  3040         -      pInfo->aConstraintUsage[i].argvIndex = 1;
  3041         -      pInfo->aConstraintUsage[i].omit = 1;
  3042         -
  3043         -      /* An arbitrary value for now.
  3044         -       * TODO: Perhaps docid matches should be considered cheaper than
  3045         -       * full-text searches. */
  3046         -      pInfo->estimatedCost = 1.0;   
  3047         -
  3048         -      return SQLITE_OK;
  3049         -    }
  3050         -  }
  3051         -  pInfo->idxNum = QUERY_GENERIC;
          844  +  memset(pCsr, 0, sizeof(Fts3Cursor));
  3052    845     return SQLITE_OK;
  3053    846   }
  3054    847   
  3055         -static int fulltextDisconnect(sqlite3_vtab *pVTab){
  3056         -  FTSTRACE(("FTS3 Disconnect %p\n", pVTab));
  3057         -  fulltext_vtab_destroy((fulltext_vtab *)pVTab);
  3058         -  return SQLITE_OK;
  3059         -}
  3060         -
  3061         -static int fulltextDestroy(sqlite3_vtab *pVTab){
  3062         -  fulltext_vtab *v = (fulltext_vtab *)pVTab;
  3063         -  int rc;
  3064         -
  3065         -  FTSTRACE(("FTS3 Destroy %p\n", pVTab));
  3066         -  rc = sql_exec(v->db, v->zDb, v->zName,
  3067         -                "drop table if exists %_content;"
  3068         -                "drop table if exists %_segments;"
  3069         -                "drop table if exists %_segdir;"
  3070         -                );
  3071         -  if( rc!=SQLITE_OK ) return rc;
  3072         -
  3073         -  fulltext_vtab_destroy((fulltext_vtab *)pVTab);
  3074         -  return SQLITE_OK;
  3075         -}
  3076         -
  3077         -static int fulltextOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
  3078         -  fulltext_cursor *c;
  3079         -
  3080         -  c = (fulltext_cursor *) sqlite3_malloc(sizeof(fulltext_cursor));
  3081         -  if( c ){
  3082         -    memset(c, 0, sizeof(fulltext_cursor));
  3083         -    /* sqlite will initialize c->base */
  3084         -    *ppCursor = &c->base;
  3085         -    FTSTRACE(("FTS3 Open %p: %p\n", pVTab, c));
  3086         -    return SQLITE_OK;
  3087         -  }else{
  3088         -    return SQLITE_NOMEM;
  3089         -  }
  3090         -}
  3091         -
  3092         -/* Free all of the dynamically allocated memory held by the
  3093         -** Snippet
  3094         -*/
  3095         -static void snippetClear(Snippet *p){
  3096         -  sqlite3_free(p->aMatch);
  3097         -  sqlite3_free(p->zOffset);
  3098         -  sqlite3_free(p->zSnippet);
  3099         -  CLEAR(p);
  3100         -}
  3101         -
  3102         -/*
  3103         -** Append a single entry to the p->aMatch[] log.
  3104         -*/
  3105         -static void snippetAppendMatch(
  3106         -  Snippet *p,               /* Append the entry to this snippet */
  3107         -  int iCol, int iTerm,      /* The column and query term */
  3108         -  int iToken,               /* Matching token in document */
  3109         -  int iStart, int nByte     /* Offset and size of the match */
  3110         -){
  3111         -  int i;
  3112         -  struct snippetMatch *pMatch;
  3113         -  if( p->nMatch+1>=p->nAlloc ){
  3114         -    p->nAlloc = p->nAlloc*2 + 10;
  3115         -    p->aMatch = sqlite3_realloc(p->aMatch, p->nAlloc*sizeof(p->aMatch[0]) );
  3116         -    if( p->aMatch==0 ){
  3117         -      p->nMatch = 0;
  3118         -      p->nAlloc = 0;
  3119         -      return;
  3120         -    }
  3121         -  }
  3122         -  i = p->nMatch++;
  3123         -  pMatch = &p->aMatch[i];
  3124         -  pMatch->iCol = iCol;
  3125         -  pMatch->iTerm = iTerm;
  3126         -  pMatch->iToken = iToken;
  3127         -  pMatch->iStart = iStart;
  3128         -  pMatch->nByte = nByte;
  3129         -}
  3130         -
  3131         -/*
  3132         -** Sizing information for the circular buffer used in snippetOffsetsOfColumn()
  3133         -*/
  3134         -#define FTS3_ROTOR_SZ   (32)
  3135         -#define FTS3_ROTOR_MASK (FTS3_ROTOR_SZ-1)
  3136         -
  3137         -/*
  3138         -** Function to iterate through the tokens of a compiled expression.
  3139         -**
  3140         -** Except, skip all tokens on the right-hand side of a NOT operator.
  3141         -** This function is used to find tokens as part of snippet and offset
  3142         -** generation and we do nt want snippets and offsets to report matches
  3143         -** for tokens on the RHS of a NOT.
  3144         -*/
  3145         -static int fts3NextExprToken(Fts3Expr **ppExpr, int *piToken){
  3146         -  Fts3Expr *p = *ppExpr;
  3147         -  int iToken = *piToken;
  3148         -  if( iToken<0 ){
  3149         -    /* In this case the expression p is the root of an expression tree.
  3150         -    ** Move to the first token in the expression tree.
  3151         -    */
  3152         -    while( p->pLeft ){
  3153         -      p = p->pLeft;
  3154         -    }
  3155         -    iToken = 0;
  3156         -  }else{
  3157         -    assert(p && p->eType==FTSQUERY_PHRASE );
  3158         -    if( iToken<(p->pPhrase->nToken-1) ){
  3159         -      iToken++;
  3160         -    }else{
  3161         -      iToken = 0;
  3162         -      while( p->pParent && p->pParent->pLeft!=p ){
  3163         -        assert( p->pParent->pRight==p );
  3164         -        p = p->pParent;
  3165         -      }
  3166         -      p = p->pParent;
  3167         -      if( p ){
  3168         -        assert( p->pRight!=0 );
  3169         -        p = p->pRight;
  3170         -        while( p->pLeft ){
  3171         -          p = p->pLeft;
  3172         -        }
  3173         -      }
  3174         -    }
  3175         -  }
  3176         -
  3177         -  *ppExpr = p;
  3178         -  *piToken = iToken;
  3179         -  return p?1:0;
  3180         -}
  3181         -
  3182         -/*
  3183         -** Return TRUE if the expression node pExpr is located beneath the
  3184         -** RHS of a NOT operator.
  3185         -*/
  3186         -static int fts3ExprBeneathNot(Fts3Expr *p){
  3187         -  Fts3Expr *pParent;
  3188         -  while( p ){
  3189         -    pParent = p->pParent;
  3190         -    if( pParent && pParent->eType==FTSQUERY_NOT && pParent->pRight==p ){
  3191         -      return 1;
  3192         -    }
  3193         -    p = pParent;
  3194         -  }
  3195         -  return 0;
  3196         -}
  3197         -
  3198         -/*
  3199         -** Add entries to pSnippet->aMatch[] for every match that occurs against
  3200         -** document zDoc[0..nDoc-1] which is stored in column iColumn.
  3201         -*/
  3202         -static void snippetOffsetsOfColumn(
  3203         -  fulltext_cursor *pCur,         /* The fulltest search cursor */
  3204         -  Snippet *pSnippet,             /* The Snippet object to be filled in */
  3205         -  int iColumn,                   /* Index of fulltext table column */
  3206         -  const char *zDoc,              /* Text of the fulltext table column */
  3207         -  int nDoc                       /* Length of zDoc in bytes */
  3208         -){
  3209         -  const sqlite3_tokenizer_module *pTModule;  /* The tokenizer module */
  3210         -  sqlite3_tokenizer *pTokenizer;             /* The specific tokenizer */
  3211         -  sqlite3_tokenizer_cursor *pTCursor;        /* Tokenizer cursor */
  3212         -  fulltext_vtab *pVtab;                /* The full text index */
  3213         -  int nColumn;                         /* Number of columns in the index */
  3214         -  int i, j;                            /* Loop counters */
  3215         -  int rc;                              /* Return code */
  3216         -  unsigned int match, prevMatch;       /* Phrase search bitmasks */
  3217         -  const char *zToken;                  /* Next token from the tokenizer */
  3218         -  int nToken;                          /* Size of zToken */
  3219         -  int iBegin, iEnd, iPos;              /* Offsets of beginning and end */
  3220         -
  3221         -  /* The following variables keep a circular buffer of the last
  3222         -  ** few tokens */
  3223         -  unsigned int iRotor = 0;             /* Index of current token */
  3224         -  int iRotorBegin[FTS3_ROTOR_SZ];      /* Beginning offset of token */
  3225         -  int iRotorLen[FTS3_ROTOR_SZ];        /* Length of token */
  3226         -
  3227         -  pVtab = cursor_vtab(pCur);
  3228         -  nColumn = pVtab->nColumn;
  3229         -  pTokenizer = pVtab->pTokenizer;
  3230         -  pTModule = pTokenizer->pModule;
  3231         -  rc = pTModule->xOpen(pTokenizer, zDoc, nDoc, &pTCursor);
  3232         -  if( rc ) return;
  3233         -  pTCursor->pTokenizer = pTokenizer;
  3234         -
  3235         -  prevMatch = 0;
  3236         -  while( !pTModule->xNext(pTCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos) ){
  3237         -    Fts3Expr *pIter = pCur->pExpr;
  3238         -    int iIter = -1;
  3239         -    iRotorBegin[iRotor&FTS3_ROTOR_MASK] = iBegin;
  3240         -    iRotorLen[iRotor&FTS3_ROTOR_MASK] = iEnd-iBegin;
  3241         -    match = 0;
  3242         -    for(i=0; i<(FTS3_ROTOR_SZ-1) && fts3NextExprToken(&pIter, &iIter); i++){
  3243         -      int nPhrase;                    /* Number of tokens in current phrase */
  3244         -      struct PhraseToken *pToken;     /* Current token */
  3245         -      int iCol;                       /* Column index */
  3246         -
  3247         -      if( fts3ExprBeneathNot(pIter) ) continue;
  3248         -      nPhrase = pIter->pPhrase->nToken;
  3249         -      pToken = &pIter->pPhrase->aToken[iIter];
  3250         -      iCol = pIter->pPhrase->iColumn;
  3251         -      if( iCol>=0 && iCol<nColumn && iCol!=iColumn ) continue;
  3252         -      if( pToken->n>nToken ) continue;
  3253         -      if( !pToken->isPrefix && pToken->n<nToken ) continue;
  3254         -      assert( pToken->n<=nToken );
  3255         -      if( memcmp(pToken->z, zToken, pToken->n) ) continue;
  3256         -      if( iIter>0 && (prevMatch & (1<<i))==0 ) continue;
  3257         -      match |= 1<<i;
  3258         -      if( i==(FTS3_ROTOR_SZ-2) || nPhrase==iIter+1 ){
  3259         -        for(j=nPhrase-1; j>=0; j--){
  3260         -          int k = (iRotor-j) & FTS3_ROTOR_MASK;
  3261         -          snippetAppendMatch(pSnippet, iColumn, i-j, iPos-j,
  3262         -                iRotorBegin[k], iRotorLen[k]);
  3263         -        }
  3264         -      }
  3265         -    }
  3266         -    prevMatch = match<<1;
  3267         -    iRotor++;
  3268         -  }
  3269         -  pTModule->xClose(pTCursor);  
  3270         -}
  3271         -
  3272         -/*
  3273         -** Remove entries from the pSnippet structure to account for the NEAR
  3274         -** operator. When this is called, pSnippet contains the list of token 
  3275         -** offsets produced by treating all NEAR operators as AND operators.
  3276         -** This function removes any entries that should not be present after
  3277         -** accounting for the NEAR restriction. For example, if the queried
  3278         -** document is:
  3279         -**
  3280         -**     "A B C D E A"
  3281         -**
  3282         -** and the query is:
  3283         -** 
  3284         -**     A NEAR/0 E
  3285         -**
  3286         -** then when this function is called the Snippet contains token offsets
  3287         -** 0, 4 and 5. This function removes the "0" entry (because the first A
  3288         -** is not near enough to an E).
  3289         -**
  3290         -** When this function is called, the value pointed to by parameter piLeft is
  3291         -** the integer id of the left-most token in the expression tree headed by
  3292         -** pExpr. This function increments *piLeft by the total number of tokens
  3293         -** in the expression tree headed by pExpr.
  3294         -**
  3295         -** Return 1 if any trimming occurs.  Return 0 if no trimming is required.
  3296         -*/
  3297         -static int trimSnippetOffsets(
  3298         -  Fts3Expr *pExpr,      /* The search expression */
  3299         -  Snippet *pSnippet,    /* The set of snippet offsets to be trimmed */
  3300         -  int *piLeft           /* Index of left-most token in pExpr */
  3301         -){
  3302         -  if( pExpr ){
  3303         -    if( trimSnippetOffsets(pExpr->pLeft, pSnippet, piLeft) ){
  3304         -      return 1;
  3305         -    }
  3306         -
  3307         -    switch( pExpr->eType ){
  3308         -      case FTSQUERY_PHRASE:
  3309         -        *piLeft += pExpr->pPhrase->nToken;
  3310         -        break;
  3311         -      case FTSQUERY_NEAR: {
  3312         -        /* The right-hand-side of a NEAR operator is always a phrase. The
  3313         -        ** left-hand-side is either a phrase or an expression tree that is 
  3314         -        ** itself headed by a NEAR operator. The following initializations
  3315         -        ** set local variable iLeft to the token number of the left-most
  3316         -        ** token in the right-hand phrase, and iRight to the right most
  3317         -        ** token in the same phrase. For example, if we had:
  3318         -        **
  3319         -        **     <col> MATCH '"abc def" NEAR/2 "ghi jkl"'
  3320         -        **
  3321         -        ** then iLeft will be set to 2 (token number of ghi) and nToken will
  3322         -        ** be set to 4.
  3323         -        */
  3324         -        Fts3Expr *pLeft = pExpr->pLeft;
  3325         -        Fts3Expr *pRight = pExpr->pRight;
  3326         -        int iLeft = *piLeft;
  3327         -        int nNear = pExpr->nNear;
  3328         -        int nToken = pRight->pPhrase->nToken;
  3329         -        int jj, ii;
  3330         -        if( pLeft->eType==FTSQUERY_NEAR ){
  3331         -          pLeft = pLeft->pRight;
  3332         -        }
  3333         -        assert( pRight->eType==FTSQUERY_PHRASE );
  3334         -        assert( pLeft->eType==FTSQUERY_PHRASE );
  3335         -        nToken += pLeft->pPhrase->nToken;
  3336         -
  3337         -        for(ii=0; ii<pSnippet->nMatch; ii++){
  3338         -          struct snippetMatch *p = &pSnippet->aMatch[ii];
  3339         -          if( p->iTerm==iLeft ){
  3340         -            int isOk = 0;
  3341         -            /* Snippet ii is an occurence of query term iLeft in the document.
  3342         -            ** It occurs at position (p->iToken) of the document. We now
  3343         -            ** search for an instance of token (iLeft-1) somewhere in the 
  3344         -            ** range (p->iToken - nNear)...(p->iToken + nNear + nToken) within 
  3345         -            ** the set of snippetMatch structures. If one is found, proceed. 
  3346         -            ** If one cannot be found, then remove snippets ii..(ii+N-1) 
  3347         -            ** from the matching snippets, where N is the number of tokens 
  3348         -            ** in phrase pRight->pPhrase.
  3349         -            */
  3350         -            for(jj=0; isOk==0 && jj<pSnippet->nMatch; jj++){
  3351         -              struct snippetMatch *p2 = &pSnippet->aMatch[jj];
  3352         -              if( p2->iTerm==(iLeft-1) ){
  3353         -                if( p2->iToken>=(p->iToken-nNear-1) 
  3354         -                 && p2->iToken<(p->iToken+nNear+nToken) 
  3355         -                ){
  3356         -                  isOk = 1;
  3357         -                }
  3358         -              }
  3359         -            }
  3360         -            if( !isOk ){
  3361         -              int kk;
  3362         -              for(kk=0; kk<pRight->pPhrase->nToken; kk++){
  3363         -                pSnippet->aMatch[kk+ii].iTerm = -2;
  3364         -              }
  3365         -              return 1;
  3366         -            }
  3367         -          }
  3368         -          if( p->iTerm==(iLeft-1) ){
  3369         -            int isOk = 0;
  3370         -            for(jj=0; isOk==0 && jj<pSnippet->nMatch; jj++){
  3371         -              struct snippetMatch *p2 = &pSnippet->aMatch[jj];
  3372         -              if( p2->iTerm==iLeft ){
  3373         -                if( p2->iToken<=(p->iToken+nNear+1) 
  3374         -                 && p2->iToken>(p->iToken-nNear-nToken) 
  3375         -                ){
  3376         -                  isOk = 1;
  3377         -                }
  3378         -              }
  3379         -            }
  3380         -            if( !isOk ){
  3381         -              int kk;
  3382         -              for(kk=0; kk<pLeft->pPhrase->nToken; kk++){
  3383         -                pSnippet->aMatch[ii-kk].iTerm = -2;
  3384         -              }
  3385         -              return 1;
  3386         -            }
  3387         -          }
  3388         -        }
  3389         -        break;
  3390         -      }
  3391         -    }
  3392         -
  3393         -    if( trimSnippetOffsets(pExpr->pRight, pSnippet, piLeft) ){
  3394         -      return 1;
  3395         -    }
  3396         -  }
  3397         -  return 0;
  3398         -}
  3399         -
  3400         -/*
  3401         -** Compute all offsets for the current row of the query.  
  3402         -** If the offsets have already been computed, this routine is a no-op.
  3403         -*/
  3404         -static void snippetAllOffsets(fulltext_cursor *p){
  3405         -  int nColumn;
  3406         -  int iColumn, i;
  3407         -  int iFirst, iLast;
  3408         -  int iTerm = 0;
  3409         -  fulltext_vtab *pFts = cursor_vtab(p);
  3410         -
  3411         -  if( p->snippet.nMatch || p->pExpr==0 ){
  3412         -    return;
  3413         -  }
  3414         -  nColumn = pFts->nColumn;
  3415         -  iColumn = (p->iCursorType - QUERY_FULLTEXT);
  3416         -  if( iColumn<0 || iColumn>=nColumn ){
  3417         -    /* Look for matches over all columns of the full-text index */
  3418         -    iFirst = 0;
  3419         -    iLast = nColumn-1;
  3420         -  }else{
  3421         -    /* Look for matches in the iColumn-th column of the index only */
  3422         -    iFirst = iColumn;
  3423         -    iLast = iColumn;
  3424         -  }
  3425         -  for(i=iFirst; i<=iLast; i++){
  3426         -    const char *zDoc;
  3427         -    int nDoc;
  3428         -    zDoc = (const char*)sqlite3_column_text(p->pStmt, i+1);
  3429         -    nDoc = sqlite3_column_bytes(p->pStmt, i+1);
  3430         -    snippetOffsetsOfColumn(p, &p->snippet, i, zDoc, nDoc);
  3431         -  }
  3432         -
  3433         -  while( trimSnippetOffsets(p->pExpr, &p->snippet, &iTerm) ){
  3434         -    iTerm = 0;
  3435         -  }
  3436         -}
  3437         -
  3438         -/*
  3439         -** Convert the information in the aMatch[] array of the snippet
  3440         -** into the string zOffset[0..nOffset-1]. This string is used as
  3441         -** the return of the SQL offsets() function.
  3442         -*/
  3443         -static void snippetOffsetText(Snippet *p){
  3444         -  int i;
  3445         -  int cnt = 0;
  3446         -  StringBuffer sb;
  3447         -  char zBuf[200];
  3448         -  if( p->zOffset ) return;
  3449         -  initStringBuffer(&sb);
  3450         -  for(i=0; i<p->nMatch; i++){
  3451         -    struct snippetMatch *pMatch = &p->aMatch[i];
  3452         -    if( pMatch->iTerm>=0 ){
  3453         -      /* If snippetMatch.iTerm is less than 0, then the match was 
  3454         -      ** discarded as part of processing the NEAR operator (see the 
  3455         -      ** trimSnippetOffsetsForNear() function for details). Ignore 
  3456         -      ** it in this case
  3457         -      */
  3458         -      zBuf[0] = ' ';
  3459         -      sqlite3_snprintf(sizeof(zBuf)-1, &zBuf[cnt>0], "%d %d %d %d",
  3460         -          pMatch->iCol, pMatch->iTerm, pMatch->iStart, pMatch->nByte);
  3461         -      append(&sb, zBuf);
  3462         -      cnt++;
  3463         -    }
  3464         -  }
  3465         -  p->zOffset = stringBufferData(&sb);
  3466         -  p->nOffset = stringBufferLength(&sb);
  3467         -}
  3468         -
  3469         -/*
  3470         -** zDoc[0..nDoc-1] is phrase of text.  aMatch[0..nMatch-1] are a set
  3471         -** of matching words some of which might be in zDoc.  zDoc is column
  3472         -** number iCol.
  3473         -**
  3474         -** iBreak is suggested spot in zDoc where we could begin or end an
  3475         -** excerpt.  Return a value similar to iBreak but possibly adjusted
  3476         -** to be a little left or right so that the break point is better.
  3477         -*/
  3478         -static int wordBoundary(
  3479         -  int iBreak,                   /* The suggested break point */
  3480         -  const char *zDoc,             /* Document text */
  3481         -  int nDoc,                     /* Number of bytes in zDoc[] */
  3482         -  struct snippetMatch *aMatch,  /* Matching words */
  3483         -  int nMatch,                   /* Number of entries in aMatch[] */
  3484         -  int iCol                      /* The column number for zDoc[] */
  3485         -){
  3486         -  int i;
  3487         -  if( iBreak<=10 ){
  3488         -    return 0;
  3489         -  }
  3490         -  if( iBreak>=nDoc-10 ){
  3491         -    return nDoc;
  3492         -  }
  3493         -  for(i=0; i<nMatch && aMatch[i].iCol<iCol; i++){}
  3494         -  while( i<nMatch && aMatch[i].iStart+aMatch[i].nByte<iBreak ){ i++; }
  3495         -  if( i<nMatch ){
  3496         -    if( aMatch[i].iStart<iBreak+10 ){
  3497         -      return aMatch[i].iStart;
  3498         -    }
  3499         -    if( i>0 && aMatch[i-1].iStart+aMatch[i-1].nByte>=iBreak ){
  3500         -      return aMatch[i-1].iStart;
  3501         -    }
  3502         -  }
  3503         -  for(i=1; i<=10; i++){
  3504         -    if( safe_isspace(zDoc[iBreak-i]) ){
  3505         -      return iBreak - i + 1;
  3506         -    }
  3507         -    if( safe_isspace(zDoc[iBreak+i]) ){
  3508         -      return iBreak + i + 1;
  3509         -    }
  3510         -  }
  3511         -  return iBreak;
  3512         -}
  3513         -
  3514         -
  3515         -
  3516         -/*
  3517         -** Allowed values for Snippet.aMatch[].snStatus
  3518         -*/
  3519         -#define SNIPPET_IGNORE  0   /* It is ok to omit this match from the snippet */
  3520         -#define SNIPPET_DESIRED 1   /* We want to include this match in the snippet */
  3521         -
  3522         -/*
  3523         -** Generate the text of a snippet.
  3524         -*/
  3525         -static void snippetText(
  3526         -  fulltext_cursor *pCursor,   /* The cursor we need the snippet for */
  3527         -  const char *zStartMark,     /* Markup to appear before each match */
  3528         -  const char *zEndMark,       /* Markup to appear after each match */
  3529         -  const char *zEllipsis       /* Ellipsis mark */
  3530         -){
  3531         -  int i, j;
  3532         -  struct snippetMatch *aMatch;
  3533         -  int nMatch;
  3534         -  int nDesired;
  3535         -  StringBuffer sb;
  3536         -  int tailCol;
  3537         -  int tailOffset;
  3538         -  int iCol;
  3539         -  int nDoc;
  3540         -  const char *zDoc;
  3541         -  int iStart, iEnd;
  3542         -  int tailEllipsis = 0;
  3543         -  int iMatch;
  3544         -  
  3545         -
  3546         -  sqlite3_free(pCursor->snippet.zSnippet);
  3547         -  pCursor->snippet.zSnippet = 0;
  3548         -  aMatch = pCursor->snippet.aMatch;
  3549         -  nMatch = pCursor->snippet.nMatch;
  3550         -  initStringBuffer(&sb);
  3551         -
  3552         -  for(i=0; i<nMatch; i++){
  3553         -    aMatch[i].snStatus = SNIPPET_IGNORE;
  3554         -  }
  3555         -  nDesired = 0;
  3556         -  for(i=0; i<FTS3_ROTOR_SZ; i++){
  3557         -    for(j=0; j<nMatch; j++){
  3558         -      if( aMatch[j].iTerm==i ){
  3559         -        aMatch[j].snStatus = SNIPPET_DESIRED;
  3560         -        nDesired++;
  3561         -        break;
  3562         -      }
  3563         -    }
  3564         -  }
  3565         -
  3566         -  iMatch = 0;
  3567         -  tailCol = -1;
  3568         -  tailOffset = 0;
  3569         -  for(i=0; i<nMatch && nDesired>0; i++){
  3570         -    if( aMatch[i].snStatus!=SNIPPET_DESIRED ) continue;
  3571         -    nDesired--;
  3572         -    iCol = aMatch[i].iCol;
  3573         -    zDoc = (const char*)sqlite3_column_text(pCursor->pStmt, iCol+1);
  3574         -    nDoc = sqlite3_column_bytes(pCursor->pStmt, iCol+1);
  3575         -    iStart = aMatch[i].iStart - 40;
  3576         -    iStart = wordBoundary(iStart, zDoc, nDoc, aMatch, nMatch, iCol);
  3577         -    if( iStart<=10 ){
  3578         -      iStart = 0;
  3579         -    }
  3580         -    if( iCol==tailCol && iStart<=tailOffset+20 ){
  3581         -      iStart = tailOffset;
  3582         -    }
  3583         -    if( (iCol!=tailCol && tailCol>=0) || iStart!=tailOffset ){
  3584         -      trimWhiteSpace(&sb);
  3585         -      appendWhiteSpace(&sb);
  3586         -      append(&sb, zEllipsis);
  3587         -      appendWhiteSpace(&sb);
  3588         -    }
  3589         -    iEnd = aMatch[i].iStart + aMatch[i].nByte + 40;
  3590         -    iEnd = wordBoundary(iEnd, zDoc, nDoc, aMatch, nMatch, iCol);
  3591         -    if( iEnd>=nDoc-10 ){
  3592         -      iEnd = nDoc;
  3593         -      tailEllipsis = 0;
  3594         -    }else{
  3595         -      tailEllipsis = 1;
  3596         -    }
  3597         -    while( iMatch<nMatch && aMatch[iMatch].iCol<iCol ){ iMatch++; }
  3598         -    while( iStart<iEnd ){
  3599         -      while( iMatch<nMatch && aMatch[iMatch].iStart<iStart
  3600         -             && aMatch[iMatch].iCol<=iCol ){
  3601         -        iMatch++;
  3602         -      }
  3603         -      if( iMatch<nMatch && aMatch[iMatch].iStart<iEnd
  3604         -             && aMatch[iMatch].iCol==iCol ){
  3605         -        nappend(&sb, &zDoc[iStart], aMatch[iMatch].iStart - iStart);
  3606         -        iStart = aMatch[iMatch].iStart;
  3607         -        append(&sb, zStartMark);
  3608         -        nappend(&sb, &zDoc[iStart], aMatch[iMatch].nByte);
  3609         -        append(&sb, zEndMark);
  3610         -        iStart += aMatch[iMatch].nByte;
  3611         -        for(j=iMatch+1; j<nMatch; j++){
  3612         -          if( aMatch[j].iTerm==aMatch[iMatch].iTerm
  3613         -              && aMatch[j].snStatus==SNIPPET_DESIRED ){
  3614         -            nDesired--;
  3615         -            aMatch[j].snStatus = SNIPPET_IGNORE;
  3616         -          }
  3617         -        }
  3618         -      }else{
  3619         -        nappend(&sb, &zDoc[iStart], iEnd - iStart);
  3620         -        iStart = iEnd;
  3621         -      }
  3622         -    }
  3623         -    tailCol = iCol;
  3624         -    tailOffset = iEnd;
  3625         -  }
  3626         -  trimWhiteSpace(&sb);
  3627         -  if( tailEllipsis ){
  3628         -    appendWhiteSpace(&sb);
  3629         -    append(&sb, zEllipsis);
  3630         -  }
  3631         -  pCursor->snippet.zSnippet = stringBufferData(&sb);
  3632         -  pCursor->snippet.nSnippet = stringBufferLength(&sb);
  3633         -}
          848  +/****************************************************************/
          849  +/****************************************************************/
          850  +/****************************************************************/
          851  +/****************************************************************/
  3634    852   
  3635    853   
  3636    854   /*
  3637    855   ** Close the cursor.  For additional information see the documentation
  3638    856   ** on the xClose method of the virtual table interface.
  3639    857   */
  3640    858   static int fulltextClose(sqlite3_vtab_cursor *pCursor){
  3641         -  fulltext_cursor *c = (fulltext_cursor *) pCursor;
  3642         -  FTSTRACE(("FTS3 Close %p\n", c));
  3643         -  sqlite3_finalize(c->pStmt);
  3644         -  sqlite3Fts3ExprFree(c->pExpr);
  3645         -  snippetClear(&c->snippet);
  3646         -  if( c->result.nData!=0 ){
  3647         -    dlrDestroy(&c->reader);
  3648         -  }
  3649         -  dataBufferDestroy(&c->result);
  3650         -  sqlite3_free(c);
          859  +  Fts3Cursor *pCsr = (Fts3Cursor *)pCursor;
          860  +  sqlite3_finalize(pCsr->pStmt);
          861  +  sqlite3Fts3ExprFree(pCsr->pExpr);
          862  +  sqlite3_free(pCsr->aDoclist);
          863  +  sqlite3_free(pCsr);
          864  +  return SQLITE_OK;
          865  +}
          866  +
          867  +static int fts3NextMethod(sqlite3_vtab_cursor *pCursor){
          868  +  int rc;                         /* Return code */
          869  +  Fts3Cursor *pCsr = (Fts3Cursor *)pCursor;
          870  +
          871  +  if( pCsr->aDoclist==0 ){
          872  +    if( SQLITE_ROW==sqlite3_step(pCsr->pStmt) ){
          873  +      rc = SQLITE_OK;
          874  +    }else{
          875  +      pCsr->isEof = 1;
          876  +      rc = sqlite3_reset(pCsr->pStmt);
          877  +    }
          878  +  }else if( pCsr->pNextId>=&pCsr->aDoclist[pCsr->nDoclist] ){
          879  +    pCsr->isEof = 1;
          880  +    rc = SQLITE_OK;
          881  +  }else{
          882  +    sqlite3_reset(pCsr->pStmt);
          883  +    fts3GetDeltaVarint(&pCsr->pNextId, &pCsr->iPrevId);
          884  +    sqlite3_bind_int64(pCsr->pStmt, 1, pCsr->iPrevId);
          885  +    if( SQLITE_ROW==sqlite3_step(pCsr->pStmt) ){
          886  +      rc = SQLITE_OK;
          887  +    }else{
          888  +      pCsr->isEof = 1;
          889  +      if( SQLITE_OK==(rc = sqlite3_reset(pCsr->pStmt)) ){
          890  +        rc = SQLITE_ERROR;
          891  +      }
          892  +    }
          893  +  }
          894  +  return rc;
          895  +}
          896  +
          897  +
          898  +/*
          899  +** The buffer pointed to by argument zNode (size nNode bytes) contains the
          900  +** root node of a b-tree segment. The segment is guaranteed to be at least
          901  +** one level high (i.e. the root node is not also a leaf). If successful,
          902  +** this function locates the leaf node of the segment that may contain the 
          903  +** term specified by arguments zTerm and nTerm and writes its block number 
          904  +** to *piLeaf.
          905  +**
          906  +** It is possible that the returned leaf node does not contain the specified
          907  +** term. However, if the segment does contain said term, it is stored on
          908  +** the identified leaf node. Because this function only inspects interior
          909  +** segment nodes (and never loads leaf nodes into memory), it is not possible
          910  +** to be sure.
          911  +**
          912  +** If an error occurs, an error code other than SQLITE_OK is returned.
          913  +*/ 
          914  +static int fts3SelectLeaf(
          915  +  Fts3Table *p,                   /* Virtual table handle */
          916  +  const char *zTerm,              /* Term to select leaves for */
          917  +  int nTerm,                      /* Size of term zTerm in bytes */
          918  +  const char *zNode,              /* Buffer containing segment interior node */
          919  +  int nNode,                      /* Size of buffer at zNode */
          920  +  sqlite3_int64 *piLeaf           /* Selected leaf node */
          921  +){
          922  +  int rc = SQLITE_OK;             /* Return code */
          923  +  const char *zCsr = zNode;       /* Cursor to iterate through node */
          924  +  const char *zEnd = &zCsr[nNode];/* End of interior node buffer */
          925  +  char *zBuffer = 0;              /* Buffer to load terms into */
          926  +  int nAlloc = 0;                 /* Size of allocated buffer */
          927  +
          928  +  while( 1 ){
          929  +    int iHeight;                  /* Height of this node in tree */
          930  +    sqlite3_int64 iChild;         /* Block id of child node to descend to */
          931  +    int nBlock;                   /* Size of child node in bytes */
          932  +
          933  +    zCsr += sqlite3Fts3GetVarint32(zCsr, &iHeight);
          934  +    zCsr += sqlite3Fts3GetVarint(zCsr, &iChild);
          935  +  
          936  +    while( zCsr<zEnd ){
          937  +      int cmp;                    /* memcmp() result */
          938  +      int nSuffix;                /* Size of term suffix */
          939  +      int nPrefix = 0;            /* Size of term prefix */
          940  +      int nBuffer;                /* Total term size */
          941  +  
          942  +      /* Load the next term on the node into zBuffer */
          943  +      if( zBuffer ){
          944  +        zCsr += sqlite3Fts3GetVarint32(zCsr, &nPrefix);
          945  +      }
          946  +      zCsr += sqlite3Fts3GetVarint32(zCsr, &nSuffix);
          947  +      if( nPrefix+nSuffix>nAlloc ){
          948  +        char *zNew;
          949  +        nAlloc = (nPrefix+nSuffix) * 2;
          950  +        zNew = (char *)sqlite3_realloc(zBuffer, nAlloc);
          951  +        if( !zNew ){
          952  +          sqlite3_free(zBuffer);
          953  +          return SQLITE_NOMEM;
          954  +        }
          955  +        zBuffer = zNew;
          956  +      }
          957  +      memcpy(&zBuffer[nPrefix], zCsr, nSuffix);
          958  +      nBuffer = nPrefix + nSuffix;
          959  +      zCsr += nSuffix;
          960  +  
          961  +      /* Compare the term we are searching for with the term just loaded from
          962  +      ** the interior node. If the specified term is greater than or equal
          963  +      ** to the term from the interior node, then all terms on the sub-tree 
          964  +      ** headed by node iChild are smaller than zTerm. No need to search 
          965  +      ** iChild.
          966  +      **
          967  +      ** If the interior node term is larger than the specified term, then
          968  +      ** the tree headed by iChild may contain the specified term.
          969  +      */
          970  +      cmp = memcmp(zTerm, zBuffer, (nBuffer>nTerm ? nTerm : nBuffer));
          971  +      if( cmp<0 || (cmp==0 && nBuffer>nTerm) ) break;
          972  +      iChild++;
          973  +    };
          974  +
          975  +    /* If (iHeight==1), the children of this interior node are leaves. The
          976  +    ** specified term may be present on leaf node iChild.
          977  +    */
          978  +    if( iHeight==1 ){
          979  +      *piLeaf = iChild;
          980  +      break;
          981  +    }
          982  +
          983  +    /* Descend to interior node iChild. */
          984  +    rc = sqlite3Fts3ReadBlock(p, iChild, &zCsr, &nBlock);
          985  +    if( rc!=SQLITE_OK ) break;
          986  +    zEnd = &zCsr[nBlock];
          987  +  }
          988  +  sqlite3_free(zBuffer);
          989  +  return rc;
          990  +}
          991  +
          992  +static void fts3PutDeltaVarint(
          993  +  char **pp, 
          994  +  sqlite3_int64 *piPrev, 
          995  +  sqlite3_int64 iVal
          996  +){
          997  +  assert( iVal-*piPrev > 0 );
          998  +  *pp += sqlite3Fts3PutVarint(*pp, iVal-*piPrev);
          999  +  *piPrev = iVal;
         1000  +}
         1001  +
         1002  +static void fts3PoslistCopy(char **pp, char **ppPoslist){
         1003  +  char *pEnd = *ppPoslist;
         1004  +  char c = 0;
         1005  +  while( *pEnd | c ) c = *pEnd++ & 0x80;
         1006  +  pEnd++;
         1007  +  if( pp ){
         1008  +    int n = pEnd - *ppPoslist;
         1009  +    char *p = *pp;
         1010  +    memcpy(p, *ppPoslist, n);
         1011  +    p += n;
         1012  +    *pp = p;
         1013  +  }
         1014  +  *ppPoslist = pEnd;
         1015  +}
         1016  +
         1017  +static void fts3ColumnlistCopy(char **pp, char **ppPoslist){
         1018  +  char *pEnd = *ppPoslist;
         1019  +  char c = 0;
         1020  +  while( 0xFE & (*pEnd | c) ) c = *pEnd++ & 0x80;
         1021  +  if( pp ){
         1022  +    int n = pEnd - *ppPoslist;
         1023  +    char *p = *pp;
         1024  +    memcpy(p, *ppPoslist, n);
         1025  +    p += n;
         1026  +    *pp = p;
         1027  +  }
         1028  +  *ppPoslist = pEnd;
         1029  +}
         1030  +
         1031  +/*
         1032  +**
         1033  +*/
         1034  +static void fts3PoslistMerge(
         1035  +  char **pp,                      /* Output buffer */
         1036  +  char **pp1,                     /* Left input list */
         1037  +  char **pp2                      /* Right input list */
         1038  +){
         1039  +  char *p = *pp;
         1040  +  char *p1 = *pp1;
         1041  +  char *p2 = *pp2;
         1042  +
         1043  +  while( *p1 && *p2 ){
         1044  +    int iCol1 = 0;
         1045  +    int iCol2 = 0;
         1046  +    if( *p1==0x01 ) sqlite3Fts3GetVarint32(&p1[1], &iCol1);
         1047  +    if( *p2==0x01 ) sqlite3Fts3GetVarint32(&p2[1], &iCol2);
         1048  +
         1049  +    if( iCol1==iCol2 ){
         1050  +      sqlite3_int64 i1 = 0;
         1051  +      sqlite3_int64 i2 = 0;
         1052  +      sqlite3_int64 iPrev = 0;
         1053  +      if( iCol1!=0 ){
         1054  +        int n;
         1055  +        *p++ = 0x01;
         1056  +        n = sqlite3Fts3PutVarint(p, iCol1);
         1057  +        p += n;
         1058  +        p1 += 1 + n;
         1059  +        p2 += 1 + n;
         1060  +      }
         1061  +      while( (*p1&0xFE) || (*p2&0xFE) ){
         1062  +        if( i1==i2 ){
         1063  +          fts3GetDeltaVarint(&p1, &i1); i1 -= 2;
         1064  +          fts3GetDeltaVarint(&p2, &i2); i2 -= 2;
         1065  +        }else if( i1<i2 ){
         1066  +          fts3GetDeltaVarint(&p1, &i1); i1 -= 2;
         1067  +        }else{
         1068  +          fts3GetDeltaVarint(&p2, &i2); i2 -= 2;
         1069  +        }
         1070  +        fts3PutDeltaVarint(&p, &iPrev, (i1<i2 ? i1 : i2) + 2); iPrev -= 2;
         1071  +        if( 0==(*p1&0xFE) ) i1 = 0x7FFFFFFF;
         1072  +        if( 0==(*p2&0xFE) ) i2 = 0x7FFFFFFF;
         1073  +      }
         1074  +    }else if( iCol1<iCol2 ){
         1075  +      fts3ColumnlistCopy(&p, &p1);
         1076  +    }else{
         1077  +      fts3ColumnlistCopy(&p, &p2);
         1078  +    }
         1079  +  }
         1080  +
         1081  +  *p++ = '\0';
         1082  +  *pp = p;
         1083  +  *pp1 = p1 + 1;
         1084  +  *pp2 = p2 + 1;
         1085  +}
         1086  +
         1087  +/*
         1088  +** nToken==1 searches for adjacent positions.
         1089  +*/
         1090  +static int fts3PoslistPhraseMerge(
         1091  +  char **pp,                      /* Output buffer */
         1092  +  int nToken,                     /* Maximum difference in token positions */
         1093  +  int isSaveLeft,                 /* Save the left position */
         1094  +  char **pp1,                     /* Left input list */
         1095  +  char **pp2                      /* Right input list */
         1096  +){
         1097  +  char *p = (pp ? *pp : 0);
         1098  +  char *p1 = *pp1;
         1099  +  char *p2 = *pp2;
         1100  +
         1101  +  int iCol1 = 0;
         1102  +  int iCol2 = 0;
         1103  +  assert( *p1!=0 && *p2!=0 );
         1104  +  if( *p1==0x01 ){ 
         1105  +    p1++;
         1106  +    p1 += sqlite3Fts3GetVarint32(p1, &iCol1);
         1107  +  }
         1108  +  if( *p2==0x01 ){ 
         1109  +    p2++;
         1110  +    p2 += sqlite3Fts3GetVarint32(p2, &iCol2);
         1111  +  }
         1112  +
         1113  +  while( 1 ){
         1114  +    if( iCol1==iCol2 ){
         1115  +      char *pSave = p;
         1116  +      sqlite3_int64 iPrev = 0;
         1117  +      sqlite3_int64 iPos1 = 0;
         1118  +      sqlite3_int64 iPos2 = 0;
         1119  +
         1120  +      if( pp && iCol1 ){
         1121  +        *p++ = 0x01;
         1122  +        p += sqlite3Fts3PutVarint(p, iCol1);
         1123  +      }
         1124  +
         1125  +      assert( *p1!=0x00 && *p2!=0x00 && *p1!=0x01 && *p2!=0x01 );
         1126  +      fts3GetDeltaVarint(&p1, &iPos1); iPos1 -= 2;
         1127  +      fts3GetDeltaVarint(&p2, &iPos2); iPos2 -= 2;
         1128  +
         1129  +      while( 1 ){
         1130  +        if( iPos2>iPos1 && iPos2<=iPos1+nToken ){
         1131  +          sqlite3_int64 iSave;
         1132  +          if( !pp ){
         1133  +            fts3PoslistCopy(0, &p2);
         1134  +            fts3PoslistCopy(0, &p1);
         1135  +            *pp1 = p1;
         1136  +            *pp2 = p2;
         1137  +            return 1;
         1138  +          }
         1139  +          iSave = isSaveLeft ? iPos1 : iPos2;
         1140  +          fts3PutDeltaVarint(&p, &iPrev, iSave+2); iPrev -= 2;
         1141  +          pSave = 0;
         1142  +        }
         1143  +        if( iPos2<=iPos1 ){
         1144  +          if( (*p2&0xFE)==0 ) break;
         1145  +          fts3GetDeltaVarint(&p2, &iPos2); iPos2 -= 2;
         1146  +        }else{
         1147  +          if( (*p1&0xFE)==0 ) break;
         1148  +          fts3GetDeltaVarint(&p1, &iPos1); iPos1 -= 2;
         1149  +        }
         1150  +      }
         1151  +      if( pSave && pp ){
         1152  +        p = pSave;
         1153  +      }
         1154  +
         1155  +      fts3ColumnlistCopy(0, &p1);
         1156  +      fts3ColumnlistCopy(0, &p2);
         1157  +      assert( (*p1&0xFE)==0 && (*p2&0xFE)==0 );
         1158  +      if( 0==*p1 || 0==*p2 ) break;
         1159  +
         1160  +      p1++;
         1161  +      p1 += sqlite3Fts3GetVarint32(p1, &iCol1);
         1162  +      p2++;
         1163  +      p2 += sqlite3Fts3GetVarint32(p2, &iCol2);
         1164  +    }
         1165  +
         1166  +    /* Advance pointer p1 or p2 (whichever corresponds to the smaller of
         1167  +    ** iCol1 and iCol2) so that it points to either the 0x00 that marks the
         1168  +    ** end of the position list, or the 0x01 that precedes the next 
         1169  +    ** column-number in the position list. 
         1170  +    */
         1171  +    else if( iCol1<iCol2 ){
         1172  +      fts3ColumnlistCopy(0, &p1);
         1173  +      if( 0==*p1 ) break;
         1174  +      p1++;
         1175  +      p1 += sqlite3Fts3GetVarint32(p1, &iCol1);
         1176  +    }else{
         1177  +      fts3ColumnlistCopy(0, &p2);
         1178  +      if( 0==*p2 ) break;
         1179  +      p2++;
         1180  +      p2 += sqlite3Fts3GetVarint32(p2, &iCol2);
         1181  +    }
         1182  +  }
         1183  +
         1184  +  fts3PoslistCopy(0, &p2);
         1185  +  fts3PoslistCopy(0, &p1);
         1186  +  *pp1 = p1;
         1187  +  *pp2 = p2;
         1188  +  if( !pp || *pp==p ){
         1189  +    return 0;
         1190  +  }
         1191  +  *p++ = 0x00;
         1192  +  *pp = p;
         1193  +  return 1;
         1194  +}
         1195  +
         1196  +static int fts3PoslistNearMerge(
         1197  +  char **pp,                      /* Output buffer */
         1198  +  char *aTmp,                     /* Temporary buffer space */
         1199  +  int nRight,                     /* Maximum difference in token positions */
         1200  +  int nLeft,                      /* Maximum difference in token positions */
         1201  +  char **pp1,                     /* Left input list */
         1202  +  char **pp2                      /* Right input list */
         1203  +){
         1204  +  char *p1 = *pp1;
         1205  +  char *p2 = *pp2;
         1206  +
         1207  +  if( !pp ){
         1208  +    if( fts3PoslistPhraseMerge(0, nRight, 0, pp1, pp2) ) return 1;
         1209  +    *pp1 = p1;
         1210  +    *pp2 = p2;
         1211  +    return fts3PoslistPhraseMerge(0, nLeft, 0, pp2, pp1);
         1212  +  }else{
         1213  +    char *pTmp1 = aTmp;
         1214  +    char *pTmp2;
         1215  +    char *aTmp2;
         1216  +    int res = 1;
         1217  +
         1218  +    fts3PoslistPhraseMerge(&pTmp1, nRight, 0, pp1, pp2);
         1219  +    aTmp2 = pTmp2 = pTmp1;
         1220  +    *pp1 = p1;
         1221  +    *pp2 = p2;
         1222  +    fts3PoslistPhraseMerge(&pTmp2, nLeft, 1, pp2, pp1);
         1223  +    if( pTmp1!=aTmp && pTmp2!=aTmp2 ){
         1224  +      fts3PoslistMerge(pp, &aTmp, &aTmp2);
         1225  +    }else if( pTmp1!=aTmp ){
         1226  +      fts3PoslistCopy(pp, &aTmp);
         1227  +    }else if( pTmp2!=aTmp2 ){
         1228  +      fts3PoslistCopy(pp, &aTmp2);
         1229  +    }else{
         1230  +      res = 0;
         1231  +    }
         1232  +
         1233  +    return res;
         1234  +  }
         1235  +}
         1236  +
         1237  +/*
         1238  +** Values that may be used as the first parameter to fts3DoclistMerge().
         1239  +*/
         1240  +#define MERGE_NOT        2        /* D + D -> D */
         1241  +#define MERGE_AND        3        /* D + D -> D */
         1242  +#define MERGE_OR         4        /* D + D -> D */
         1243  +#define MERGE_POS_OR     5        /* P + P -> P */
         1244  +#define MERGE_PHRASE     6        /* P + P -> D */
         1245  +#define MERGE_POS_PHRASE 7        /* P + P -> P */
         1246  +#define MERGE_NEAR       8        /* P + P -> D */
         1247  +#define MERGE_POS_NEAR   9        /* P + P -> P */
         1248  +
         1249  +static int fts3DoclistMerge(
         1250  +  int mergetype,                  /* One of the MERGE_XXX constants */
         1251  +  int nParam1,
         1252  +  int nParam2,
         1253  +  char *aBuffer,                  /* Pre-allocated output buffer */
         1254  +  int *pnBuffer,                  /* OUT: Bytes written to aBuffer */
         1255  +  char *a1,                       /* Buffer containing first doclist */
         1256  +  int n1,                         /* Size of buffer a1 */
         1257  +  char *a2,                       /* Buffer containing second doclist */
         1258  +  int n2                          /* Size of buffer a2 */
         1259  +){
         1260  +  sqlite3_int64 i1 = 0;
         1261  +  sqlite3_int64 i2 = 0;
         1262  +  sqlite3_int64 iPrev = 0;
         1263  +
         1264  +  char *p = aBuffer;
         1265  +  char *p1 = a1;
         1266  +  char *p2 = a2;
         1267  +  char *pEnd1 = &a1[n1];
         1268  +  char *pEnd2 = &a2[n2];
         1269  +
         1270  +  assert( mergetype==MERGE_OR     || mergetype==MERGE_POS_OR 
         1271  +       || mergetype==MERGE_AND    || mergetype==MERGE_NOT
         1272  +       || mergetype==MERGE_PHRASE || mergetype==MERGE_POS_PHRASE
         1273  +       || mergetype==MERGE_NEAR   || mergetype==MERGE_POS_NEAR
         1274  +  );
         1275  +
         1276  +  if( !aBuffer ){
         1277  +    return SQLITE_NOMEM;
         1278  +  }
         1279  +
         1280  +  /* Read the first docid from each doclist */
         1281  +  fts3GetDeltaVarint2(&p1, pEnd1, &i1);
         1282  +  fts3GetDeltaVarint2(&p2, pEnd2, &i2);
         1283  +
         1284  +  switch( mergetype ){
         1285  +    case MERGE_OR:
         1286  +    case MERGE_POS_OR:
         1287  +      while( p1 || p2 ){
         1288  +        if( p2 && p1 && i1==i2 ){
         1289  +          fts3PutDeltaVarint(&p, &iPrev, i1);
         1290  +          if( mergetype==MERGE_POS_OR ) fts3PoslistMerge(&p, &p1, &p2);
         1291  +          fts3GetDeltaVarint2(&p1, pEnd1, &i1);
         1292  +          fts3GetDeltaVarint2(&p2, pEnd2, &i2);
         1293  +        }else if( !p2 || (p1 && i1<i2) ){
         1294  +          fts3PutDeltaVarint(&p, &iPrev, i1);
         1295  +          if( mergetype==MERGE_POS_OR ) fts3PoslistCopy(&p, &p1);
         1296  +          fts3GetDeltaVarint2(&p1, pEnd1, &i1);
         1297  +        }else{
         1298  +          fts3PutDeltaVarint(&p, &iPrev, i2);
         1299  +          if( mergetype==MERGE_POS_OR ) fts3PoslistCopy(&p, &p2);
         1300  +          fts3GetDeltaVarint2(&p2, pEnd2, &i2);
         1301  +        }
         1302  +      }
         1303  +      break;
         1304  +
         1305  +    case MERGE_AND:
         1306  +      while( p1 && p2 ){
         1307  +        if( i1==i2 ){
         1308  +          fts3PutDeltaVarint(&p, &iPrev, i1);
         1309  +          fts3GetDeltaVarint2(&p1, pEnd1, &i1);
         1310  +          fts3GetDeltaVarint2(&p2, pEnd2, &i2);
         1311  +        }else if( i1<i2 ){
         1312  +          fts3GetDeltaVarint2(&p1, pEnd1, &i1);
         1313  +        }else{
         1314  +          fts3GetDeltaVarint2(&p2, pEnd2, &i2);
         1315  +        }
         1316  +      }
         1317  +      break;
         1318  +
         1319  +    case MERGE_NOT:
         1320  +      while( p1 ){
         1321  +        if( p2 && i1==i2 ){
         1322  +          fts3GetDeltaVarint2(&p1, pEnd1, &i1);
         1323  +          fts3GetDeltaVarint2(&p2, pEnd2, &i2);
         1324  +        }else if( !p2 || i1<i2 ){
         1325  +          fts3PutDeltaVarint(&p, &iPrev, i1);
         1326  +          fts3GetDeltaVarint2(&p1, pEnd1, &i1);
         1327  +        }else{
         1328  +          fts3GetDeltaVarint2(&p2, pEnd2, &i2);
         1329  +        }
         1330  +      }
         1331  +      break;
         1332  +
         1333  +    case MERGE_POS_PHRASE:
         1334  +    case MERGE_PHRASE: {
         1335  +      char **ppPos = (mergetype==MERGE_PHRASE ? 0 : &p);
         1336  +      while( p1 && p2 ){
         1337  +        if( i1==i2 ){
         1338  +          char *pSave = p;
         1339  +          sqlite3_int64 iPrevSave = iPrev;
         1340  +          fts3PutDeltaVarint(&p, &iPrev, i1);
         1341  +          if( 0==fts3PoslistPhraseMerge(ppPos, 1, 0, &p1, &p2) ){
         1342  +            p = pSave;
         1343  +            iPrev = iPrevSave;
         1344  +          }
         1345  +          fts3GetDeltaVarint2(&p1, pEnd1, &i1);
         1346  +          fts3GetDeltaVarint2(&p2, pEnd2, &i2);
         1347  +        }else if( i1<i2 ){
         1348  +          fts3PoslistCopy(0, &p1);
         1349  +          fts3GetDeltaVarint2(&p1, pEnd1, &i1);
         1350  +        }else{
         1351  +          fts3PoslistCopy(0, &p2);
         1352  +          fts3GetDeltaVarint2(&p2, pEnd2, &i2);
         1353  +        }
         1354  +      }
         1355  +      break;
         1356  +    }
         1357  +
         1358  +    case MERGE_POS_NEAR:
         1359  +    case MERGE_NEAR: {
         1360  +      char *aTmp = 0;
         1361  +      char **ppPos = 0;
         1362  +      if( mergetype==MERGE_POS_NEAR ){
         1363  +        ppPos = &p;
         1364  +        aTmp = sqlite3_malloc(2*(n1+n2));
         1365  +        if( !aTmp ){
         1366  +          return SQLITE_NOMEM;
         1367  +        }
         1368  +      }
         1369  +
         1370  +      while( p1 && p2 ){
         1371  +        if( i1==i2 ){
         1372  +          char *pSave = p;
         1373  +          sqlite3_int64 iPrevSave = iPrev;
         1374  +          fts3PutDeltaVarint(&p, &iPrev, i1);
         1375  +
         1376  +          if( !fts3PoslistNearMerge(ppPos, aTmp, nParam1, nParam2, &p1, &p2) ){
         1377  +            iPrev = iPrevSave;
         1378  +            p = pSave;
         1379  +          }
         1380  +
         1381  +          fts3GetDeltaVarint2(&p1, pEnd1, &i1);
         1382  +          fts3GetDeltaVarint2(&p2, pEnd2, &i2);
         1383  +        }else if( i1<i2 ){
         1384  +          fts3PoslistCopy(0, &p1);
         1385  +          fts3GetDeltaVarint2(&p1, pEnd1, &i1);
         1386  +        }else{
         1387  +          fts3PoslistCopy(0, &p2);
         1388  +          fts3GetDeltaVarint2(&p2, pEnd2, &i2);
         1389  +        }
         1390  +      }
         1391  +      sqlite3_free(aTmp);
         1392  +      break;
         1393  +    }
         1394  +
         1395  +    default:
         1396  +      assert(!"Invalid mergetype value passed to fts3DoclistMerge()");
         1397  +  }
         1398  +
         1399  +  *pnBuffer = (p-aBuffer);
         1400  +  return SQLITE_OK;
         1401  +}
         1402  +
         1403  +/* 
         1404  +** A pointer to an instance of this structure is used as the context 
         1405  +** argument to sqlite3Fts3SegReaderIterate()
         1406  +*/
         1407  +typedef struct TermSelect TermSelect;
         1408  +struct TermSelect {
         1409  +  int isReqPos;
         1410  +  char *aOutput;                  /* Malloc'd output buffer */
         1411  +  int nOutput;                    /* Size of output in bytes */
         1412  +};
         1413  +
         1414  +static int fts3TermSelectCb(
         1415  +  Fts3Table *p,                   /* Virtual table object */
         1416  +  void *pContext,                 /* Pointer to TermSelect structure */
         1417  +  char *zTerm,
         1418  +  int nTerm,
         1419  +  char *aDoclist,
         1420  +  int nDoclist
         1421  +){
         1422  +  TermSelect *pTS = (TermSelect *)pContext;
         1423  +  int nNew = pTS->nOutput + nDoclist;
         1424  +  char *aNew = sqlite3_malloc(nNew);
         1425  +
         1426  +  if( !aNew ){
         1427  +    return SQLITE_NOMEM;
         1428  +  }
         1429  +
         1430  +  if( pTS->nOutput==0 ){
         1431  +    /* If this is the first term selected, copy the doclist to the output
         1432  +    ** buffer using memcpy(). TODO: Add a way to transfer control of the
         1433  +    ** aDoclist buffer from the caller so as to avoid the memcpy().
         1434  +    */
         1435  +    memcpy(aNew, aDoclist, nDoclist);
         1436  +  }else{
         1437  +    /* The output buffer is not empty. Merge doclist aDoclist with the
         1438  +    ** existing output. This can only happen with prefix-searches (as
         1439  +    ** searches for exact terms return exactly one doclist).
         1440  +    */
         1441  +    int mergetype = (pTS->isReqPos ? MERGE_POS_OR : MERGE_OR);
         1442  +    fts3DoclistMerge(mergetype, 0, 0,
         1443  +        aNew, &nNew, pTS->aOutput, pTS->nOutput, aDoclist, nDoclist
         1444  +    );
         1445  +  }
         1446  +
         1447  +  sqlite3_free(pTS->aOutput);
         1448  +  pTS->aOutput = aNew;
         1449  +  pTS->nOutput = nNew;
         1450  +
  3651   1451     return SQLITE_OK;
  3652   1452   }
  3653   1453   
  3654         -static int fulltextNext(sqlite3_vtab_cursor *pCursor){
  3655         -  fulltext_cursor *c = (fulltext_cursor *) pCursor;
  3656         -  int rc;
  3657         -
  3658         -  FTSTRACE(("FTS3 Next %p\n", pCursor));
  3659         -  snippetClear(&c->snippet);
  3660         -  if( c->iCursorType < QUERY_FULLTEXT ){
  3661         -    /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
  3662         -    rc = sqlite3_step(c->pStmt);
  3663         -    switch( rc ){
  3664         -      case SQLITE_ROW:
  3665         -        c->eof = 0;
  3666         -        return SQLITE_OK;
  3667         -      case SQLITE_DONE:
  3668         -        c->eof = 1;
  3669         -        return SQLITE_OK;
  3670         -      default:
  3671         -        c->eof = 1;
  3672         -        return rc;
         1454  +/*
         1455  +** This function retreives the doclist for the specified term (or term
         1456  +** prefix) from the database. 
         1457  +**
         1458  +** The returned doclist may be in one of two formats, depending on the 
         1459  +** value of parameter isReqPos. If isReqPos is zero, then the doclist is
         1460  +** a sorted list of delta-compressed docids. If isReqPos is non-zero, 
         1461  +** then the returned list is in the same format as is stored in the
         1462  +** database without the found length specifier at the start of on-disk
         1463  +** doclists.
         1464  +*/
         1465  +static int fts3TermSelect(
         1466  +  Fts3Table *p,                   /* Virtual table handle */
         1467  +  int iColumn,                    /* Column to query (or -ve for all columns) */
         1468  +  const char *zTerm,              /* Term to query for */
         1469  +  int nTerm,                      /* Size of zTerm in bytes */
         1470  +  int isPrefix,                   /* True for a prefix search */
         1471  +  int isReqPos,                   /* True to include position lists in output */
         1472  +  int *pnOut,                     /* OUT: Size of buffer at *ppOut */
         1473  +  char **ppOut                    /* OUT: Malloced result buffer */
         1474  +){
         1475  +  int i;
         1476  +  TermSelect tsc;
         1477  +  Fts3SegFilter filter;           /* Segment term filter configuration */
         1478  +  Fts3SegReader **apSegment = 0;  /* Array of segments to read data from */
         1479  +  int nSegment = 0;               /* Size of apSegment array */
         1480  +  int nAlloc = 0;                 /* Allocated size of segment array */
         1481  +  int rc;                         /* Return code */
         1482  +  sqlite3_stmt *pStmt;            /* SQL statement to scan %_segdir table */
         1483  +  int iAge = 0;                   /* Used to assign ages to segments */
         1484  +
         1485  +  /* Loop through the entire %_segdir table. For each segment, create a
         1486  +  ** Fts3SegReader to iterate through the subset of the segment leaves
         1487  +  ** that may contain a term that matches zTerm/nTerm. For non-prefix
         1488  +  ** searches, this is always a single leaf. For prefix searches, this
         1489  +  ** may be a contiguous block of leaves.
         1490  +  **
         1491  +  ** The code in this loop does not actually load any leaves into memory
         1492  +  ** (unless the root node happens to be a leaf). It simply examines the
         1493  +  ** b-tree structure to determine which leaves need to be inspected.
         1494  +  */
         1495  +  rc = sqlite3Fts3AllSegdirs(p, &pStmt);
         1496  +  while( rc==SQLITE_OK && SQLITE_ROW==(rc = sqlite3_step(pStmt)) ){
         1497  +    Fts3SegReader *pNew = 0;
         1498  +    int nRoot = sqlite3_column_bytes(pStmt, 4);
         1499  +    char const *zRoot = sqlite3_column_blob(pStmt, 4);
         1500  +    if( sqlite3_column_int64(pStmt, 1)==0 ){
         1501  +      /* The entire segment is stored on the root node (which must be a
         1502  +      ** leaf). Do not bother inspecting any data in this case, just
         1503  +      ** create a Fts3SegReader to scan the single leaf. 
         1504  +      */
         1505  +      rc = sqlite3Fts3SegReaderNew(p, iAge, 0, 0, 0, zRoot, nRoot, &pNew);
         1506  +    }else{
         1507  +      sqlite3_int64 i1;
         1508  +      rc = fts3SelectLeaf(p, zTerm, nTerm, zRoot, nRoot, &i1);
         1509  +      if( rc==SQLITE_OK ){
         1510  +        sqlite3_int64 i2 = sqlite3_column_int64(pStmt, 2);
         1511  +        rc = sqlite3Fts3SegReaderNew(p, iAge, i1, i2, 0, 0, 0, &pNew);
         1512  +      }
  3673   1513       }
  3674         -  } else {  /* full-text query */
  3675         -    rc = sqlite3_reset(c->pStmt);
  3676         -    if( rc!=SQLITE_OK ) return rc;
         1514  +    iAge++;
  3677   1515   
  3678         -    if( c->result.nData==0 || dlrAtEnd(&c->reader) ){
  3679         -      c->eof = 1;
  3680         -      return SQLITE_OK;
         1516  +    /* If a new Fts3SegReader was allocated, add it to the apSegment array. */
         1517  +    assert( (rc==SQLITE_OK)==(pNew!=0) );
         1518  +    if( pNew ){
         1519  +      if( nSegment==nAlloc ){
         1520  +        nAlloc += 16;
         1521  +        Fts3SegReader **pArray = (Fts3SegReader **)sqlite3_realloc(
         1522  +            apSegment, nAlloc*sizeof(Fts3SegReader *)
         1523  +        );
         1524  +        if( !pArray ){
         1525  +          sqlite3Fts3SegReaderFree(pNew);
         1526  +          rc = SQLITE_NOMEM;
         1527  +          goto finished;
         1528  +        }
         1529  +        apSegment = pArray;
         1530  +      }
         1531  +      apSegment[nSegment++] = pNew;
  3681   1532       }
  3682         -    rc = sqlite3_bind_int64(c->pStmt, 1, dlrDocid(&c->reader));
  3683         -    dlrStep(&c->reader);
  3684         -    if( rc!=SQLITE_OK ) return rc;
  3685         -    /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
  3686         -    rc = sqlite3_step(c->pStmt);
  3687         -    if( rc==SQLITE_ROW ){   /* the case we expect */
  3688         -      c->eof = 0;
  3689         -      return SQLITE_OK;
  3690         -    }
  3691         -    /* an error occurred; abort */
  3692         -    return rc==SQLITE_DONE ? SQLITE_ERROR : rc;
  3693         -  }
  3694         -}
  3695         -
  3696         -
  3697         -/* TODO(shess) If we pushed LeafReader to the top of the file, or to
  3698         -** another file, term_select() could be pushed above
  3699         -** docListOfTerm().
  3700         -*/
  3701         -static int termSelect(fulltext_vtab *v, int iColumn,
  3702         -                      const char *pTerm, int nTerm, int isPrefix,
  3703         -                      DocListType iType, DataBuffer *out);
         1533  +  }
         1534  +  if( rc!=SQLITE_DONE ){
         1535  +    assert( rc!=SQLITE_OK );
         1536  +    goto finished;
         1537  +  }
         1538  +
         1539  +  memset(&tsc, 0, sizeof(TermSelect));
         1540  +  tsc.isReqPos = isReqPos;
         1541  +
         1542  +  filter.flags = FTS3_SEGMENT_IGNORE_EMPTY 
         1543  +        | (isPrefix ? FTS3_SEGMENT_PREFIX : 0)
         1544  +        | (isReqPos ? FTS3_SEGMENT_REQUIRE_POS : 0)
         1545  +        | (iColumn<p->nColumn ? FTS3_SEGMENT_COLUMN_FILTER : 0);
         1546  +  filter.iCol = iColumn;
         1547  +  filter.zTerm = zTerm;
         1548  +  filter.nTerm = nTerm;
         1549  +
         1550  +  rc = sqlite3Fts3SegReaderIterate(p, apSegment, nSegment, &filter,
         1551  +      fts3TermSelectCb, (void *)&tsc
         1552  +  );
         1553  +
         1554  +  if( rc==SQLITE_OK ){
         1555  +    *ppOut = tsc.aOutput;
         1556  +    *pnOut = tsc.nOutput;
         1557  +  }else{
         1558  +    sqlite3_free(tsc.aOutput);
         1559  +  }
         1560  +
         1561  +finished:
         1562  +  sqlite3_reset(pStmt);
         1563  +  for(i=0; i<nSegment; i++){
         1564  +    sqlite3Fts3SegReaderFree(apSegment[i]);
         1565  +  }
         1566  +  sqlite3_free(apSegment);
         1567  +  return rc;
         1568  +}
         1569  +
  3704   1570   
  3705   1571   /* 
  3706   1572   ** Return a DocList corresponding to the phrase *pPhrase.
  3707   1573   **
  3708   1574   ** The resulting DL_DOCIDS doclist is stored in pResult, which is
  3709   1575   ** overwritten.
  3710   1576   */
  3711         -static int docListOfPhrase(
  3712         -  fulltext_vtab *pTab,   /* The full text index */
  3713         -  Fts3Phrase *pPhrase,   /* Phrase to return a doclist corresponding to */
  3714         -  DocListType eListType, /* Either DL_DOCIDS or DL_POSITIONS */
  3715         -  DataBuffer *pResult    /* Write the result here */
         1577  +static int fts3PhraseSelect(
         1578  +  Fts3Table *p,                   /* Virtual table handle */
         1579  +  Fts3Phrase *pPhrase,            /* Phrase to return a doclist for */
         1580  +  int isReqPos,                   /* True if output should contain positions */
         1581  +  char **paOut,                   /* OUT: Pointer to malloc'd result buffer */
         1582  +  int *pnOut                      /* OUT: Size of buffer at *paOut */
  3716   1583   ){
  3717         -  int ii;
         1584  +  char *pOut = 0;
         1585  +  int nOut = 0;
  3718   1586     int rc = SQLITE_OK;
         1587  +  int ii;
  3719   1588     int iCol = pPhrase->iColumn;
  3720         -  DocListType eType = eListType;
  3721         -  assert( eType==DL_POSITIONS || eType==DL_DOCIDS );
  3722         -  if( pPhrase->nToken>1 ){
  3723         -    eType = DL_POSITIONS;
  3724         -  }
  3725         -
  3726         -  /* This code should never be called with buffered updates. */
  3727         -  assert( pTab->nPendingData<0 );
  3728         -
  3729         -  for(ii=0; rc==SQLITE_OK && ii<pPhrase->nToken; ii++){
  3730         -    DataBuffer tmp;
  3731         -    struct PhraseToken *p = &pPhrase->aToken[ii];
  3732         -    rc = termSelect(pTab, iCol, p->z, p->n, p->isPrefix, eType, &tmp);
  3733         -    if( rc==SQLITE_OK ){
  3734         -      if( ii==0 ){
  3735         -        *pResult = tmp;
  3736         -      }else{
  3737         -        DataBuffer res = *pResult;
  3738         -        dataBufferInit(pResult, 0);
  3739         -        if( ii==(pPhrase->nToken-1) ){
  3740         -          eType = eListType;
  3741         -        }
  3742         -        docListPhraseMerge(
  3743         -          res.pData, res.nData, tmp.pData, tmp.nData, 0, 0, eType, pResult
  3744         -        );
  3745         -        dataBufferDestroy(&res);
  3746         -        dataBufferDestroy(&tmp);
         1589  +  int isTermPos = (pPhrase->nToken>1 || isReqPos);
         1590  +
         1591  +  assert( p->nPendingData==0 );
         1592  +
         1593  +  for(ii=0; ii<pPhrase->nToken; ii++){
         1594  +    struct PhraseToken *pTok = &pPhrase->aToken[ii];
         1595  +    char *z = pTok->z;            /* Next token of the phrase */
         1596  +    int n = pTok->n;              /* Size of z in bytes */
         1597  +    int isPrefix = pTok->isPrefix;/* True if token is a prefix */
         1598  +    char *pList;                  /* Pointer to token doclist */
         1599  +    int nList;                    /* Size of buffer at pList */
         1600  +
         1601  +    rc = fts3TermSelect(p, iCol, z, n, isPrefix, isTermPos, &nList, &pList);
         1602  +    if( rc!=SQLITE_OK ) break;
         1603  +
         1604  +    if( ii==0 ){
         1605  +      pOut = pList;
         1606  +      nOut = nList;
         1607  +    }else{
         1608  +      /* Merge the new term list and the current output. If this is the
         1609  +      ** last term in the phrase, and positions are not required in the
         1610  +      ** output of this function, the positions can be dropped as part
         1611  +      ** of this merge. Either way, the result of this merge will be
         1612  +      ** smaller than nList bytes. The code in fts3DoclistMerge() is written
         1613  +      ** so that it is safe to use pList as the output as well as an input
         1614  +      ** in this case.
         1615  +      */
         1616  +      int mergetype = MERGE_POS_PHRASE;
         1617  +      if( ii==pPhrase->nToken-1 && !isReqPos ){
         1618  +        mergetype = MERGE_PHRASE;
  3747   1619         }
         1620  +      fts3DoclistMerge(mergetype, 0, 0, pList, &nOut, pOut, nOut, pList, nList);
         1621  +      sqlite3_free(pOut);
         1622  +      pOut = pList;
  3748   1623       }
  3749   1624     }
  3750   1625   
         1626  +  if( rc==SQLITE_OK ){
         1627  +    *paOut = pOut;
         1628  +    *pnOut = nOut;
         1629  +  }else{
         1630  +    sqlite3_free(pOut);
         1631  +  }
  3751   1632     return rc;
  3752   1633   }
  3753   1634   
  3754   1635   /*
  3755   1636   ** Evaluate the full-text expression pExpr against fts3 table pTab. Write
  3756   1637   ** the results into pRes.
  3757   1638   */
  3758   1639   static int evalFts3Expr(
  3759         -  fulltext_vtab *pTab,           /* Fts3 Virtual table object */
  3760         -  Fts3Expr *pExpr,               /* Parsed fts3 expression */
  3761         -  DataBuffer *pRes               /* OUT: Write results of the expression here */
         1640  +  Fts3Table *p,                   /* Virtual table handle */
         1641  +  Fts3Expr *pExpr,                /* Parsed fts3 expression */
         1642  +  char **paOut,                   /* OUT: Pointer to malloc'd result buffer */
         1643  +  int *pnOut                      /* OUT: Size of buffer at *paOut */
  3762   1644   ){
  3763         -  int rc = SQLITE_OK;
         1645  +  int rc = SQLITE_OK;             /* Return code */
  3764   1646   
  3765         -  /* Initialize the output buffer. If this is an empty query (pExpr==0), 
  3766         -  ** this is all that needs to be done. Empty queries produce empty 
  3767         -  ** result sets.
  3768         -  */
  3769         -  dataBufferInit(pRes, 0);
         1647  +  /* Zero the output parameters. */
         1648  +  *paOut = 0;
         1649  +  *pnOut = 0;
  3770   1650   
  3771   1651     if( pExpr ){
  3772   1652       if( pExpr->eType==FTSQUERY_PHRASE ){
  3773         -      DocListType eType = DL_DOCIDS;
  3774         -      if( pExpr->pParent && pExpr->pParent->eType==FTSQUERY_NEAR ){
  3775         -        eType = DL_POSITIONS;
  3776         -      }
  3777         -      rc = docListOfPhrase(pTab, pExpr->pPhrase, eType, pRes);
         1653  +      int isReqPos = (pExpr->pParent && pExpr->pParent->eType==FTSQUERY_NEAR);
         1654  +      rc = fts3PhraseSelect(p, pExpr->pPhrase, isReqPos, paOut, pnOut);
  3778   1655       }else{
  3779         -      DataBuffer lhs;
  3780         -      DataBuffer rhs;
         1656  +      char *aLeft;
         1657  +      char *aRight;
         1658  +      int nLeft;
         1659  +      int nRight;
  3781   1660   
  3782         -      dataBufferInit(&rhs, 0);
  3783         -      if( SQLITE_OK==(rc = evalFts3Expr(pTab, pExpr->pLeft, &lhs)) 
  3784         -       && SQLITE_OK==(rc = evalFts3Expr(pTab, pExpr->pRight, &rhs)) 
         1661  +      if( SQLITE_OK==(rc = evalFts3Expr(p, pExpr->pRight, &aRight, &nRight))
         1662  +       && SQLITE_OK==(rc = evalFts3Expr(p, pExpr->pLeft, &aLeft, &nLeft))
  3785   1663         ){
  3786   1664           switch( pExpr->eType ){
  3787   1665             case FTSQUERY_NEAR: {
  3788         -            int nToken;
  3789   1666               Fts3Expr *pLeft;
  3790         -            DocListType eType = DL_DOCIDS;
         1667  +            Fts3Expr *pRight;
         1668  +            int mergetype = MERGE_NEAR;
         1669  +            int nParam1;
         1670  +            int nParam2;
         1671  +            char *aBuffer;
         1672  +           
  3791   1673               if( pExpr->pParent && pExpr->pParent->eType==FTSQUERY_NEAR ){
  3792         -              eType = DL_POSITIONS;
         1674  +              mergetype = MERGE_POS_NEAR;
  3793   1675               }
  3794   1676               pLeft = pExpr->pLeft;
  3795   1677               while( pLeft->eType==FTSQUERY_NEAR ){ 
  3796   1678                 pLeft=pLeft->pRight;
  3797   1679               }
  3798         -            assert( pExpr->pRight->eType==FTSQUERY_PHRASE );
         1680  +            pRight = pExpr->pRight;
         1681  +            assert( pRight->eType==FTSQUERY_PHRASE );
  3799   1682               assert( pLeft->eType==FTSQUERY_PHRASE );
  3800         -            nToken = pLeft->pPhrase->nToken + pExpr->pRight->pPhrase->nToken;
  3801         -            docListPhraseMerge(lhs.pData, lhs.nData, rhs.pData, rhs.nData, 
  3802         -                pExpr->nNear+1, nToken, eType, pRes
         1683  +
         1684  +            nParam1 = pExpr->nNear+1;
         1685  +            nParam2 = nParam1+pLeft->pPhrase->nToken+pRight->pPhrase->nToken-2;
         1686  +            aBuffer = sqlite3_malloc(nLeft+nRight+1);
         1687  +            rc = fts3DoclistMerge(mergetype, nParam1, nParam2, aBuffer,
         1688  +                pnOut, aLeft, nLeft, aRight, nRight
  3803   1689               );
         1690  +            if( rc!=SQLITE_OK ){
         1691  +              sqlite3_free(aBuffer);
         1692  +            }else{
         1693  +              *paOut = aBuffer;
         1694  +            }
         1695  +            sqlite3_free(aLeft);
  3804   1696               break;
  3805   1697             }
  3806         -          case FTSQUERY_NOT: {
  3807         -            docListExceptMerge(lhs.pData, lhs.nData, rhs.pData, rhs.nData,pRes);
         1698  +
         1699  +          case FTSQUERY_OR: {
         1700  +            /* Allocate a buffer for the output. The maximum size is the
         1701  +            ** sum of the sizes of the two input buffers. The +1 term is
         1702  +            ** so that a buffer of zero bytes is never allocated - this can
         1703  +            ** cause fts3DoclistMerge() to incorrectly return SQLITE_NOMEM.
         1704  +            */
         1705  +            char *aBuffer = sqlite3_malloc(nRight+nLeft+1);
         1706  +            rc = fts3DoclistMerge(MERGE_OR, 0, 0, aBuffer, pnOut,
         1707  +                aLeft, nLeft, aRight, nRight
         1708  +            );
         1709  +            *paOut = aBuffer;
         1710  +            sqlite3_free(aLeft);
  3808   1711               break;
  3809   1712             }
  3810         -          case FTSQUERY_AND: {
  3811         -            docListAndMerge(lhs.pData, lhs.nData, rhs.pData, rhs.nData, pRes);
  3812         -            break;
  3813         -          }
  3814         -          case FTSQUERY_OR: {
  3815         -            docListOrMerge(lhs.pData, lhs.nData, rhs.pData, rhs.nData, pRes);
         1713  +
         1714  +          case FTSQUERY_AND:
         1715  +          case FTSQUERY_NOT: {
         1716  +            assert( FTSQUERY_NOT==MERGE_NOT && FTSQUERY_AND==MERGE_AND );
         1717  +            fts3DoclistMerge(pExpr->eType, 0, 0, aLeft, pnOut,
         1718  +                aLeft, nLeft, aRight, nRight
         1719  +            );
         1720  +            *paOut = aLeft;
  3816   1721               break;
  3817   1722             }
  3818   1723           }
  3819   1724         }
  3820         -      dataBufferDestroy(&lhs);
  3821         -      dataBufferDestroy(&rhs);
         1725  +      sqlite3_free(aRight);
  3822   1726       }
  3823   1727     }
  3824   1728   
  3825   1729     return rc;
  3826   1730   }
  3827   1731   
  3828         -/* TODO(shess) Refactor the code to remove this forward decl. */
  3829         -static int flushPendingTerms(fulltext_vtab *v);
  3830         -
  3831         -/* Perform a full-text query using the search expression in
  3832         -** zInput[0..nInput-1].  Return a list of matching documents
  3833         -** in pResult.
  3834         -**
  3835         -** Queries must match column iColumn.  Or if iColumn>=nColumn
  3836         -** they are allowed to match against any column.
  3837         -*/
  3838         -static int fulltextQuery(
  3839         -  fulltext_vtab *v,      /* The full text index */
  3840         -  int iColumn,           /* Match against this column by default */
  3841         -  const char *zInput,    /* The query string */
  3842         -  int nInput,            /* Number of bytes in zInput[] */
  3843         -  DataBuffer *pResult,   /* Write the result doclist here */
  3844         -  Fts3Expr **ppExpr        /* Put parsed query string here */
  3845         -){
  3846         -  int rc;
  3847         -
  3848         -  /* TODO(shess) Instead of flushing pendingTerms, we could query for
  3849         -  ** the relevant term and merge the doclist into what we receive from
  3850         -  ** the database.  Wait and see if this is a common issue, first.
  3851         -  **
  3852         -  ** A good reason not to flush is to not generate update-related
  3853         -  ** error codes from here.
  3854         -  */
  3855         -
  3856         -  /* Flush any buffered updates before executing the query. */
  3857         -  rc = flushPendingTerms(v);
  3858         -  if( rc!=SQLITE_OK ){
  3859         -    return rc;
  3860         -  }
  3861         -
  3862         -  /* Parse the query passed to the MATCH operator. */
  3863         -  rc = sqlite3Fts3ExprParse(v->pTokenizer, 
  3864         -      v->azColumn, v->nColumn, iColumn, zInput, nInput, ppExpr
  3865         -  );
  3866         -  if( rc!=SQLITE_OK ){
  3867         -    assert( 0==(*ppExpr) );
  3868         -    return rc;
  3869         -  }
  3870         -
  3871         -  return evalFts3Expr(v, *ppExpr, pResult);
  3872         -}
  3873         -
  3874   1732   /*
  3875   1733   ** This is the xFilter interface for the virtual table.  See
  3876   1734   ** the virtual table xFilter method documentation for additional
  3877   1735   ** information.
  3878   1736   **
  3879         -** If idxNum==QUERY_GENERIC then do a full table scan against
         1737  +** If idxNum==FTS3_FULLSCAN_SEARCH then do a full table scan against
  3880   1738   ** the %_content table.
  3881   1739   **
  3882         -** If idxNum==QUERY_DOCID then do a docid lookup for a single entry
         1740  +** If idxNum==FTS3_DOCID_SEARCH then do a docid lookup for a single entry
  3883   1741   ** in the %_content table.
  3884   1742   **
  3885         -** If idxNum>=QUERY_FULLTEXT then use the full text index.  The
         1743  +** If idxNum>=FTS3_FULLTEXT_SEARCH then use the full text index.  The
  3886   1744   ** column on the left-hand side of the MATCH operator is column
  3887         -** number idxNum-QUERY_FULLTEXT, 0 indexed.  argv[0] is the right-hand
         1745  +** number idxNum-FTS3_FULLTEXT_SEARCH, 0 indexed.  argv[0] is the right-hand
  3888   1746   ** side of the MATCH operator.
  3889   1747   */
  3890   1748   /* TODO(shess) Upgrade the cursor initialization and destruction to
  3891         -** account for fulltextFilter() being called multiple times on the
  3892         -** same cursor.  The current solution is very fragile.  Apply fix to
         1749  +** account for fts3FilterMethod() being called multiple times on the
         1750  +** same cursor. The current solution is very fragile. Apply fix to
  3893   1751   ** fts3 as appropriate.
  3894   1752   */
  3895         -static int fulltextFilter(
  3896         -  sqlite3_vtab_cursor *pCursor,     /* The cursor used for this query */
  3897         -  int idxNum, const char *idxStr,   /* Which indexing scheme to use */
  3898         -  int argc, sqlite3_value **argv    /* Arguments for the indexing scheme */
         1753  +static int fts3FilterMethod(
         1754  +  sqlite3_vtab_cursor *pCursor,   /* The cursor used for this query */
         1755  +  int idxNum,                     /* Strategy index */
         1756  +  const char *idxStr,             /* Unused */
         1757  +  int nVal,                       /* Number of elements in apVal */
         1758  +  sqlite3_value **apVal           /* Arguments for the indexing scheme */
  3899   1759   ){
  3900         -  fulltext_cursor *c = (fulltext_cursor *) pCursor;
  3901         -  fulltext_vtab *v = cursor_vtab(c);
  3902         -  int rc;
  3903         -
  3904         -  FTSTRACE(("FTS3 Filter %p\n",pCursor));
  3905         -
  3906         -  /* If the cursor has a statement that was not prepared according to
  3907         -  ** idxNum, clear it.  I believe all calls to fulltextFilter with a
  3908         -  ** given cursor will have the same idxNum , but in this case it's
  3909         -  ** easy to be safe.
  3910         -  */
  3911         -  if( c->pStmt && c->iCursorType!=idxNum ){
  3912         -    sqlite3_finalize(c->pStmt);
  3913         -    c->pStmt = NULL;
  3914         -  }
  3915         -
  3916         -  /* Get a fresh statement appropriate to idxNum. */
  3917         -  /* TODO(shess): Add a prepared-statement cache in the vt structure.
  3918         -  ** The cache must handle multiple open cursors.  Easier to cache the
  3919         -  ** statement variants at the vt to reduce malloc/realloc/free here.
  3920         -  ** Or we could have a StringBuffer variant which allowed stack
  3921         -  ** construction for small values.
         1760  +  const char *azSql[] = {
         1761  +    "SELECT * FROM %Q.'%q_content' WHERE docid = ?", /* non-full-table-scan */
         1762  +    "SELECT * FROM %Q.'%q_content'",                 /* full-table-scan */
         1763  +  };
         1764  +  int rc;                         /* Return code */
         1765  +  char *zSql;                     /* SQL statement used to access %_content */
         1766  +  Fts3Table *p = (Fts3Table *)pCursor->pVtab;
         1767  +  Fts3Cursor *pCsr = (Fts3Cursor *)pCursor;
         1768  +
         1769  +  assert( idxNum>=0 && idxNum<=(FTS3_FULLTEXT_SEARCH+p->nColumn) );
         1770  +  assert( nVal==0 || nVal==1 );
         1771  +  assert( (nVal==0)==(idxNum==FTS3_FULLSCAN_SEARCH) );
         1772  +
         1773  +  /* In case the cursor has been used before, clear it now. */
         1774  +  sqlite3_finalize(pCsr->pStmt);
         1775  +  sqlite3_free(pCsr->aDoclist);
         1776  +  memset(&pCursor[1], 0, sizeof(Fts3Cursor)-sizeof(sqlite3_vtab_cursor));
         1777  +
         1778  +  /* Compile a SELECT statement for this cursor. For a full-table-scan, the
         1779  +  ** statement loops through all rows of the %_content table. For a
         1780  +  ** full-text query or docid lookup, the statement retrieves a single
         1781  +  ** row by docid.
  3922   1782     */
  3923         -  if( !c->pStmt ){
  3924         -    StringBuffer sb;
  3925         -    initStringBuffer(&sb);
  3926         -    append(&sb, "SELECT docid, ");
  3927         -    appendList(&sb, v->nColumn, v->azContentColumn);
  3928         -    append(&sb, " FROM %_content");
  3929         -    if( idxNum!=QUERY_GENERIC ) append(&sb, " WHERE docid = ?");
  3930         -    rc = sql_prepare(v->db, v->zDb, v->zName, &c->pStmt,
  3931         -                     stringBufferData(&sb));
  3932         -    stringBufferDestroy(&sb);
  3933         -    if( rc!=SQLITE_OK ) return rc;
  3934         -    c->iCursorType = idxNum;
         1783  +  zSql = sqlite3_mprintf(azSql[idxNum==FTS3_FULLSCAN_SEARCH], p->zDb, p->zName);
         1784  +  if( !zSql ){
         1785  +    rc = SQLITE_NOMEM;
  3935   1786     }else{
  3936         -    sqlite3_reset(c->pStmt);
  3937         -    assert( c->iCursorType==idxNum );
         1787  +    rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0);
         1788  +    sqlite3_free(zSql);
         1789  +  }
         1790  +  if( rc!=SQLITE_OK ) return rc;
         1791  +  pCsr->eType = idxNum;
         1792  +
         1793  +  if( idxNum==FTS3_DOCID_SEARCH ){
         1794  +    rc = sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]);
         1795  +  }else if( idxNum!=FTS3_FULLSCAN_SEARCH ){
         1796  +    int iCol = idxNum-FTS3_FULLTEXT_SEARCH;
         1797  +    const char *zQuery = (const char *)sqlite3_value_text(apVal[0]);
         1798  +
         1799  +    rc = sqlite3Fts3PendingTermsFlush(p);
         1800  +    if( rc!=SQLITE_OK ) return rc;
         1801  +
         1802  +    rc = sqlite3Fts3ExprParse(p->pTokenizer, p->azColumn, p->nColumn, 
         1803  +        iCol, zQuery, -1, &pCsr->pExpr
         1804  +    );
         1805  +    if( rc!=SQLITE_OK ) return rc;
         1806  +
         1807  +    rc = evalFts3Expr(p, pCsr->pExpr, &pCsr->aDoclist, &pCsr->nDoclist);
         1808  +    pCsr->pNextId = pCsr->aDoclist;
         1809  +    pCsr->iPrevId = 0;
  3938   1810     }
  3939   1811   
  3940         -  switch( idxNum ){
  3941         -    case QUERY_GENERIC:
  3942         -      break;
  3943         -
  3944         -    case QUERY_DOCID:
  3945         -      rc = sqlite3_bind_int64(c->pStmt, 1, sqlite3_value_int64(argv[0]));
  3946         -      if( rc!=SQLITE_OK ) return rc;
  3947         -      break;
  3948         -
  3949         -    default:   /* full-text search */
  3950         -    {
  3951         -      int iCol = idxNum-QUERY_FULLTEXT;
  3952         -      const char *zQuery = (const char *)sqlite3_value_text(argv[0]);
  3953         -      assert( idxNum<=QUERY_FULLTEXT+v->nColumn);
  3954         -      assert( argc==1 );
  3955         -      if( c->result.nData!=0 ){
  3956         -        /* This case happens if the same cursor is used repeatedly. */
  3957         -        dlrDestroy(&c->reader);
  3958         -        dataBufferReset(&c->result);
  3959         -      }else{
  3960         -        dataBufferInit(&c->result, 0);
  3961         -      }
  3962         -      rc = fulltextQuery(v, iCol, zQuery, -1, &c->result, &c->pExpr);
  3963         -      if( rc!=SQLITE_OK ) return rc;
  3964         -      if( c->result.nData!=0 ){
  3965         -        dlrInit(&c->reader, DL_DOCIDS, c->result.pData, c->result.nData);
  3966         -      }
  3967         -      break;
  3968         -    }
  3969         -  }
  3970         -
  3971         -  return fulltextNext(pCursor);
  3972         -}
  3973         -
  3974         -/* This is the xEof method of the virtual table.  The SQLite core
  3975         -** calls this routine to find out if it has reached the end of
  3976         -** a query's results set.
  3977         -*/
  3978         -static int fulltextEof(sqlite3_vtab_cursor *pCursor){
  3979         -  fulltext_cursor *c = (fulltext_cursor *) pCursor;
  3980         -  return c->eof;
         1812  +  if( rc!=SQLITE_OK ) return rc;
         1813  +  return fts3NextMethod(pCursor);
         1814  +}
         1815  +
         1816  +/* 
         1817  +** This is the xEof method of the virtual table. SQLite calls this 
         1818  +** routine to find out if it has reached the end of a result set.
         1819  +*/
         1820  +static int fts3EofMethod(sqlite3_vtab_cursor *pCursor){
         1821  +  return ((Fts3Cursor *)pCursor)->isEof;
  3981   1822   }
  3982   1823   
  3983   1824   /* This is the xColumn method of the virtual table.  The SQLite
  3984   1825   ** core calls this method during a query when it needs the value
  3985   1826   ** of a column from the virtual table.  This method needs to use
  3986   1827   ** one of the sqlite3_result_*() routines to store the requested
  3987   1828   ** value back in the pContext.
  3988   1829   */
  3989   1830   static int fulltextColumn(sqlite3_vtab_cursor *pCursor,
  3990   1831                             sqlite3_context *pContext, int idxCol){
  3991         -  fulltext_cursor *c = (fulltext_cursor *) pCursor;
  3992         -  fulltext_vtab *v = cursor_vtab(c);
         1832  +  Fts3Cursor *c = (Fts3Cursor *) pCursor;
         1833  +  Fts3Table *v = cursor_vtab(c);
  3993   1834   
  3994   1835     if( idxCol<v->nColumn ){
  3995   1836       sqlite3_value *pVal = sqlite3_column_value(c->pStmt, idxCol+1);
  3996   1837       sqlite3_result_value(pContext, pVal);
  3997   1838     }else if( idxCol==v->nColumn ){
  3998   1839       /* The extra column whose name is the same as the table.
  3999   1840       ** Return a blob which is a pointer to the cursor
................................................................................
  4003   1844       /* The docid column, which is an alias for rowid. */
  4004   1845       sqlite3_value *pVal = sqlite3_column_value(c->pStmt, 0);
  4005   1846       sqlite3_result_value(pContext, pVal);
  4006   1847     }
  4007   1848     return SQLITE_OK;
  4008   1849   }
  4009   1850   
  4010         -/* This is the xRowid method.  The SQLite core calls this routine to
  4011         -** retrieve the rowid for the current row of the result set.  fts3
  4012         -** exposes %_content.docid as the rowid for the virtual table.  The
         1851  +/* 
         1852  +** This is the xRowid method. The SQLite core calls this routine to
         1853  +** retrieve the rowid for the current row of the result set. fts3
         1854  +** exposes %_content.docid as the rowid for the virtual table. The
  4013   1855   ** rowid should be written to *pRowid.
  4014   1856   */
  4015         -static int fulltextRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
  4016         -  fulltext_cursor *c = (fulltext_cursor *) pCursor;
         1857  +static int fts3RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
         1858  +  Fts3Cursor *pCsr = (Fts3Cursor *) pCursor;
         1859  +  *pRowid = sqlite3_column_int64(pCsr->pStmt, 0);
         1860  +  return SQLITE_OK;
         1861  +}
  4017   1862   
  4018         -  *pRowid = sqlite3_column_int64(c->pStmt, 0);
         1863  +/* 
         1864  +** This function is the implementation of the xUpdate callback used by 
         1865  +** FTS3 virtual tables. It is invoked by SQLite each time a row is to be
         1866  +** inserted, updated or deleted.
         1867  +*/
         1868  +static int fts3UpdateMethod(
         1869  +  sqlite3_vtab *pVtab,            /* Virtual table handle */
         1870  +  int nArg,                       /* Size of argument array */
         1871  +  sqlite3_value **apVal,          /* Array of arguments */
         1872  +  sqlite_int64 *pRowid            /* OUT: The affected (or effected) rowid */
         1873  +){
         1874  +  return sqlite3Fts3UpdateMethod(pVtab, nArg, apVal, pRowid);
         1875  +}
         1876  +
         1877  +/*
         1878  +** Implementation of xSync() method. Flush the contents of the pending-terms
         1879  +** hash-table to the database.
         1880  +*/
         1881  +static int fts3SyncMethod(sqlite3_vtab *pVtab){
         1882  +  return sqlite3Fts3PendingTermsFlush((Fts3Table *)pVtab);
         1883  +}
         1884  +
         1885  +/*
         1886  +** Implementation of xBegin() method. This is a no-op.
         1887  +*/
         1888  +static int fts3BeginMethod(sqlite3_vtab *pVtab){
         1889  +  assert( ((Fts3Table *)pVtab)->nPendingData==0 );
         1890  +  return SQLITE_OK;
         1891  +}
         1892  +
         1893  +/*
         1894  +** Implementation of xCommit() method. This is a no-op. The contents of
         1895  +** the pending-terms hash-table have already been flushed into the database
         1896  +** by fts3SyncMethod().
         1897  +*/
         1898  +static int fts3CommitMethod(sqlite3_vtab *pVtab){
         1899  +  assert( ((Fts3Table *)pVtab)->nPendingData==0 );
  4019   1900     return SQLITE_OK;
  4020   1901   }
  4021   1902   
  4022         -/* Add all terms in [zText] to pendingTerms table.  If [iColumn] > 0,
  4023         -** we also store positions and offsets in the hash table using that
  4024         -** column number.
         1903  +/*
         1904  +** Implementation of xRollback(). Discard the contents of the pending-terms
         1905  +** hash-table. Any changes made to the database are reverted by SQLite.
  4025   1906   */
  4026         -static int buildTerms(fulltext_vtab *v, sqlite_int64 iDocid,
  4027         -                      const char *zText, int iColumn){
  4028         -  sqlite3_tokenizer *pTokenizer = v->pTokenizer;
  4029         -  sqlite3_tokenizer_cursor *pCursor;
  4030         -  const char *pToken;
  4031         -  int nTokenBytes;
  4032         -  int iStartOffset, iEndOffset, iPosition;
  4033         -  int rc;
  4034         -
  4035         -  rc = pTokenizer->pModule->xOpen(pTokenizer, zText, -1, &pCursor);
  4036         -  if( rc!=SQLITE_OK ) return rc;
  4037         -
  4038         -  pCursor->pTokenizer = pTokenizer;
  4039         -  while( SQLITE_OK==(rc=pTokenizer->pModule->xNext(pCursor,
  4040         -                                                   &pToken, &nTokenBytes,
  4041         -                                                   &iStartOffset, &iEndOffset,
  4042         -                                                   &iPosition)) ){
  4043         -    DLCollector *p;
  4044         -    int nData;                   /* Size of doclist before our update. */
  4045         -
  4046         -    /* Positions can't be negative; we use -1 as a terminator
  4047         -     * internally.  Token can't be NULL or empty. */
  4048         -    if( iPosition<0 || pToken == NULL || nTokenBytes == 0 ){
  4049         -      rc = SQLITE_ERROR;
  4050         -      break;
  4051         -    }
  4052         -
  4053         -    p = fts3HashFind(&v->pendingTerms, pToken, nTokenBytes);
  4054         -    if( p==NULL ){
  4055         -      nData = 0;
  4056         -      p = dlcNew(iDocid, DL_DEFAULT);
  4057         -      fts3HashInsert(&v->pendingTerms, pToken, nTokenBytes, p);
  4058         -
  4059         -      /* Overhead for our hash table entry, the key, and the value. */
  4060         -      v->nPendingData += sizeof(struct fts3HashElem)+sizeof(*p)+nTokenBytes;
  4061         -    }else{
  4062         -      nData = p->b.nData;
  4063         -      if( p->dlw.iPrevDocid!=iDocid ) dlcNext(p, iDocid);
  4064         -    }
  4065         -    if( iColumn>=0 ){
  4066         -      dlcAddPos(p, iColumn, iPosition, iStartOffset, iEndOffset);
  4067         -    }
  4068         -
  4069         -    /* Accumulate data added by dlcNew or dlcNext, and dlcAddPos. */
  4070         -    v->nPendingData += p->b.nData-nData;
  4071         -  }
  4072         -
  4073         -  /* TODO(shess) Check return?  Should this be able to cause errors at
  4074         -  ** this point?  Actually, same question about sqlite3_finalize(),
  4075         -  ** though one could argue that failure there means that the data is
  4076         -  ** not durable.  *ponder*
  4077         -  */
  4078         -  pTokenizer->pModule->xClose(pCursor);
  4079         -  if( SQLITE_DONE == rc ) return SQLITE_OK;
  4080         -  return rc;
  4081         -}
  4082         -
  4083         -/* Add doclists for all terms in [pValues] to pendingTerms table. */
  4084         -static int insertTerms(fulltext_vtab *v, sqlite_int64 iDocid,
  4085         -                       sqlite3_value **pValues){
  4086         -  int i;
  4087         -  for(i = 0; i < v->nColumn ; ++i){
  4088         -    char *zText = (char*)sqlite3_value_text(pValues[i]);
  4089         -    int rc = buildTerms(v, iDocid, zText, i);
  4090         -    if( rc!=SQLITE_OK ) return rc;
  4091         -  }
         1907  +static int fts3RollbackMethod(sqlite3_vtab *pVtab){
         1908  +  sqlite3Fts3PendingTermsClear((Fts3Table *)pVtab);
  4092   1909     return SQLITE_OK;
  4093   1910   }
  4094   1911   
  4095         -/* Add empty doclists for all terms in the given row's content to
  4096         -** pendingTerms.
  4097         -*/
  4098         -static int deleteTerms(fulltext_vtab *v, sqlite_int64 iDocid){
  4099         -  const char **pValues;
  4100         -  int i, rc;
  4101         -
  4102         -  /* TODO(shess) Should we allow such tables at all? */
  4103         -  if( DL_DEFAULT==DL_DOCIDS ) return SQLITE_ERROR;
  4104         -
  4105         -  rc = content_select(v, iDocid, &pValues);
  4106         -  if( rc!=SQLITE_OK ) return rc;
  4107         -
  4108         -  for(i = 0 ; i < v->nColumn; ++i) {
  4109         -    rc = buildTerms(v, iDocid, pValues[i], -1);
  4110         -    if( rc!=SQLITE_OK ) break;
  4111         -  }
  4112         -
  4113         -  freeStringArray(v->nColumn, pValues);
  4114         -  return SQLITE_OK;
  4115         -}
  4116         -
  4117         -/* TODO(shess) Refactor the code to remove this forward decl. */
  4118         -static int initPendingTerms(fulltext_vtab *v, sqlite_int64 iDocid);
  4119         -
  4120         -/* Insert a row into the %_content table; set *piDocid to be the ID of the
  4121         -** new row.  Add doclists for terms to pendingTerms.
         1912  +/*
         1913  +** Helper function used by the implementation of the overloaded snippet(),
         1914  +** offsets() and optimize() SQL functions.
         1915  +**
         1916  +** If the value passed as the third argument is a blob of size
         1917  +** sizeof(Fts3Cursor*), then the blob contents are copied to the 
         1918  +** output variable *ppCsr and SQLITE_OK is returned. Otherwise, an error
         1919  +** message is written to context pContext and SQLITE_ERROR returned. The
         1920  +** string passed via zFunc is used as part of the error message.
  4122   1921   */
  4123         -static int index_insert(fulltext_vtab *v, sqlite3_value *pRequestDocid,
  4124         -                        sqlite3_value **pValues, sqlite_int64 *piDocid){
  4125         -  int rc;
  4126         -
  4127         -  rc = content_insert(v, pRequestDocid, pValues);  /* execute an SQL INSERT */
  4128         -  if( rc!=SQLITE_OK ) return rc;
  4129         -
  4130         -  /* docid column is an alias for rowid. */
  4131         -  *piDocid = sqlite3_last_insert_rowid(v->db);
  4132         -  rc = initPendingTerms(v, *piDocid);
  4133         -  if( rc!=SQLITE_OK ) return rc;
  4134         -
  4135         -  return insertTerms(v, *piDocid, pValues);
  4136         -}
  4137         -
  4138         -/* Delete a row from the %_content table; add empty doclists for terms
  4139         -** to pendingTerms.
  4140         -*/
  4141         -static int index_delete(fulltext_vtab *v, sqlite_int64 iRow){
  4142         -  int rc = initPendingTerms(v, iRow);
  4143         -  if( rc!=SQLITE_OK ) return rc;
  4144         -
  4145         -  rc = deleteTerms(v, iRow);
  4146         -  if( rc!=SQLITE_OK ) return rc;
  4147         -
  4148         -  return content_delete(v, iRow);  /* execute an SQL DELETE */
  4149         -}
  4150         -
  4151         -/* Update a row in the %_content table; add delete doclists to
  4152         -** pendingTerms for old terms not in the new data, add insert doclists
  4153         -** to pendingTerms for terms in the new data.
  4154         -*/
  4155         -static int index_update(fulltext_vtab *v, sqlite_int64 iRow,
  4156         -                        sqlite3_value **pValues){
  4157         -  int rc = initPendingTerms(v, iRow);
  4158         -  if( rc!=SQLITE_OK ) return rc;
  4159         -
  4160         -  /* Generate an empty doclist for each term that previously appeared in this
  4161         -   * row. */
  4162         -  rc = deleteTerms(v, iRow);
  4163         -  if( rc!=SQLITE_OK ) return rc;
  4164         -
  4165         -  rc = content_update(v, pValues, iRow);  /* execute an SQL UPDATE */
  4166         -  if( rc!=SQLITE_OK ) return rc;
  4167         -
  4168         -  /* Now add positions for terms which appear in the updated row. */
  4169         -  return insertTerms(v, iRow, pValues);
  4170         -}
  4171         -
  4172         -/*******************************************************************/
  4173         -/* InteriorWriter is used to collect terms and block references into
  4174         -** interior nodes in %_segments.  See commentary at top of file for
  4175         -** format.
  4176         -*/
  4177         -
  4178         -/* How large interior nodes can grow. */
  4179         -#define INTERIOR_MAX 2048
  4180         -
  4181         -/* Minimum number of terms per interior node (except the root). This
  4182         -** prevents large terms from making the tree too skinny - must be >0
  4183         -** so that the tree always makes progress.  Note that the min tree
  4184         -** fanout will be INTERIOR_MIN_TERMS+1.
  4185         -*/
  4186         -#define INTERIOR_MIN_TERMS 7
  4187         -#if INTERIOR_MIN_TERMS<1
  4188         -# error INTERIOR_MIN_TERMS must be greater than 0.
  4189         -#endif
  4190         -
  4191         -/* ROOT_MAX controls how much data is stored inline in the segment
  4192         -** directory.
  4193         -*/
  4194         -/* TODO(shess) Push ROOT_MAX down to whoever is writing things.  It's
  4195         -** only here so that interiorWriterRootInfo() and leafWriterRootInfo()
  4196         -** can both see it, but if the caller passed it in, we wouldn't even
  4197         -** need a define.
  4198         -*/
  4199         -#define ROOT_MAX 1024
  4200         -#if ROOT_MAX<VARINT_MAX*2
  4201         -# error ROOT_MAX must have enough space for a header.
  4202         -#endif
  4203         -
  4204         -/* InteriorBlock stores a linked-list of interior blocks while a lower
  4205         -** layer is being constructed.
  4206         -*/
  4207         -typedef struct InteriorBlock {
  4208         -  DataBuffer term;           /* Leftmost term in block's subtree. */
  4209         -  DataBuffer data;           /* Accumulated data for the block. */
  4210         -  struct InteriorBlock *next;
  4211         -} InteriorBlock;
  4212         -
  4213         -static InteriorBlock *interiorBlockNew(int iHeight, sqlite_int64 iChildBlock,
  4214         -                                       const char *pTerm, int nTerm){
  4215         -  InteriorBlock *block = sqlite3_malloc(sizeof(InteriorBlock));
  4216         -  char c[VARINT_MAX+VARINT_MAX];
  4217         -  int n;
  4218         -
  4219         -  if( block ){
  4220         -    memset(block, 0, sizeof(*block));
  4221         -    dataBufferInit(&block->term, 0);
  4222         -    dataBufferReplace(&block->term, pTerm, nTerm);
  4223         -
  4224         -    n = fts3PutVarint(c, iHeight);
  4225         -    n += fts3PutVarint(c+n, iChildBlock);
  4226         -    dataBufferInit(&block->data, INTERIOR_MAX);
  4227         -    dataBufferReplace(&block->data, c, n);
         1922  +static int fts3FunctionArg(
         1923  +  sqlite3_context *pContext,      /* SQL function call context */
         1924  +  const char *zFunc,              /* Function name */
         1925  +  sqlite3_value *pVal,            /* argv[0] passed to function */
         1926  +  Fts3Cursor **ppCsr         /* OUT: Store cursor handle here */
         1927  +){
         1928  +  Fts3Cursor *pRet;
         1929  +  if( sqlite3_value_type(pVal)!=SQLITE_BLOB 
         1930  +   && sqlite3_value_bytes(pVal)!=sizeof(Fts3Cursor *)
         1931  +  ){
         1932  +    char *zErr = sqlite3_mprintf("illegal first argument to %s", zFunc);
         1933  +    sqlite3_result_error(pContext, zErr, -1);
         1934  +    sqlite3_free(zErr);
         1935  +    return SQLITE_ERROR;
  4228   1936     }
  4229         -  return block;
  4230         -}
  4231         -
  4232         -#ifndef NDEBUG
  4233         -/* Verify that the data is readable as an interior node. */
  4234         -static void interiorBlockValidate(InteriorBlock *pBlock){
  4235         -  const char *pData = pBlock->data.pData;
  4236         -  int nData = pBlock->data.nData;
  4237         -  int n, iDummy;
  4238         -  sqlite_int64 iBlockid;
  4239         -
  4240         -  assert( nData>0 );
  4241         -  assert( pData!=0 );
  4242         -  assert( pData+nData>pData );
  4243         -
  4244         -  /* Must lead with height of node as a varint(n), n>0 */
  4245         -  n = fts3GetVarint32(pData, &iDummy);
  4246         -  assert( n>0 );
  4247         -  assert( iDummy>0 );
  4248         -  assert( n<nData );
  4249         -  pData += n;
  4250         -  nData -= n;
  4251         -
  4252         -  /* Must contain iBlockid. */
  4253         -  n = fts3GetVarint(pData, &iBlockid);
  4254         -  assert( n>0 );
  4255         -  assert( n<=nData );
  4256         -  pData += n;
  4257         -  nData -= n;
  4258         -
  4259         -  /* Zero or more terms of positive length */
  4260         -  if( nData!=0 ){
  4261         -    /* First term is not delta-encoded. */
  4262         -    n = fts3GetVarint32(pData, &iDummy);
  4263         -    assert( n>0 );
  4264         -    assert( iDummy>0 );
  4265         -    assert( n+iDummy>0);
  4266         -    assert( n+iDummy<=nData );
  4267         -    pData += n+iDummy;
  4268         -    nData -= n+iDummy;
  4269         -
  4270         -    /* Following terms delta-encoded. */
  4271         -    while( nData!=0 ){
  4272         -      /* Length of shared prefix. */
  4273         -      n = fts3GetVarint32(pData, &iDummy);
  4274         -      assert( n>0 );
  4275         -      assert( iDummy>=0 );
  4276         -      assert( n<nData );
  4277         -      pData += n;
  4278         -      nData -= n;
  4279         -
  4280         -      /* Length and data of distinct suffix. */
  4281         -      n = fts3GetVarint32(pData, &iDummy);
  4282         -      assert( n>0 );
  4283         -      assert( iDummy>0 );
  4284         -      assert( n+iDummy>0);
  4285         -      assert( n+iDummy<=nData );
  4286         -      pData += n+iDummy;
  4287         -      nData -= n+iDummy;
  4288         -    }
  4289         -  }
  4290         -}
  4291         -#define ASSERT_VALID_INTERIOR_BLOCK(x) interiorBlockValidate(x)
  4292         -#else
  4293         -#define ASSERT_VALID_INTERIOR_BLOCK(x) assert( 1 )
  4294         -#endif
  4295         -
  4296         -typedef struct InteriorWriter {
  4297         -  int iHeight;                   /* from 0 at leaves. */
  4298         -  InteriorBlock *first, *last;
  4299         -  struct InteriorWriter *parentWriter;
  4300         -
  4301         -  DataBuffer term;               /* Last term written to block "last". */
  4302         -  sqlite_int64 iOpeningChildBlock; /* First child block in block "last". */
  4303         -#ifndef NDEBUG
  4304         -  sqlite_int64 iLastChildBlock;  /* for consistency checks. */
  4305         -#endif
  4306         -} InteriorWriter;
  4307         -
  4308         -/* Initialize an interior node where pTerm[nTerm] marks the leftmost
  4309         -** term in the tree.  iChildBlock is the leftmost child block at the
  4310         -** next level down the tree.
  4311         -*/
  4312         -static void interiorWriterInit(int iHeight, const char *pTerm, int nTerm,
  4313         -                               sqlite_int64 iChildBlock,
  4314         -                               InteriorWriter *pWriter){
  4315         -  InteriorBlock *block;
  4316         -  assert( iHeight>0 );
  4317         -  CLEAR(pWriter);
  4318         -
  4319         -  pWriter->iHeight = iHeight;
  4320         -  pWriter->iOpeningChildBlock = iChildBlock;
  4321         -#ifndef NDEBUG
  4322         -  pWriter->iLastChildBlock = iChildBlock;
  4323         -#endif
  4324         -  block = interiorBlockNew(iHeight, iChildBlock, pTerm, nTerm);
  4325         -  pWriter->last = pWriter->first = block;
  4326         -  ASSERT_VALID_INTERIOR_BLOCK(pWriter->last);
  4327         -  dataBufferInit(&pWriter->term, 0);
  4328         -}
  4329         -
  4330         -/* Append the child node rooted at iChildBlock to the interior node,
  4331         -** with pTerm[nTerm] as the leftmost term in iChildBlock's subtree.
  4332         -*/
  4333         -static void interiorWriterAppend(InteriorWriter *pWriter,
  4334         -                                 const char *pTerm, int nTerm,
  4335         -                                 sqlite_int64 iChildBlock){
  4336         -  char c[VARINT_MAX+VARINT_MAX];
  4337         -  int n, nPrefix = 0;
  4338         -
  4339         -  ASSERT_VALID_INTERIOR_BLOCK(pWriter->last);
  4340         -
  4341         -  /* The first term written into an interior node is actually
  4342         -  ** associated with the second child added (the first child was added
  4343         -  ** in interiorWriterInit, or in the if clause at the bottom of this
  4344         -  ** function).  That term gets encoded straight up, with nPrefix left
  4345         -  ** at 0.
  4346         -  */
  4347         -  if( pWriter->term.nData==0 ){
  4348         -    n = fts3PutVarint(c, nTerm);
  4349         -  }else{
  4350         -    while( nPrefix<pWriter->term.nData &&
  4351         -           pTerm[nPrefix]==pWriter->term.pData[nPrefix] ){
  4352         -      nPrefix++;
  4353         -    }
  4354         -
  4355         -    n = fts3PutVarint(c, nPrefix);
  4356         -    n += fts3PutVarint(c+n, nTerm-nPrefix);
  4357         -  }
  4358         -
  4359         -#ifndef NDEBUG
  4360         -  pWriter->iLastChildBlock++;
  4361         -#endif
  4362         -  assert( pWriter->iLastChildBlock==iChildBlock );
  4363         -
  4364         -  /* Overflow to a new block if the new term makes the current block
  4365         -  ** too big, and the current block already has enough terms.
  4366         -  */
  4367         -  if( pWriter->last->data.nData+n+nTerm-nPrefix>INTERIOR_MAX &&
  4368         -      iChildBlock-pWriter->iOpeningChildBlock>INTERIOR_MIN_TERMS ){
  4369         -    pWriter->last->next = interiorBlockNew(pWriter->iHeight, iChildBlock,
  4370         -                                           pTerm, nTerm);
  4371         -    pWriter->last = pWriter->last->next;
  4372         -    pWriter->iOpeningChildBlock = iChildBlock;
  4373         -    dataBufferReset(&pWriter->term);
  4374         -  }else{
  4375         -    dataBufferAppend2(&pWriter->last->data, c, n,
  4376         -                      pTerm+nPrefix, nTerm-nPrefix);
  4377         -    dataBufferReplace(&pWriter->term, pTerm, nTerm);
  4378         -  }
  4379         -  ASSERT_VALID_INTERIOR_BLOCK(pWriter->last);
  4380         -}
  4381         -
  4382         -/* Free the space used by pWriter, including the linked-list of
  4383         -** InteriorBlocks, and parentWriter, if present.
  4384         -*/
  4385         -static int interiorWriterDestroy(InteriorWriter *pWriter){
  4386         -  InteriorBlock *block = pWriter->first;
  4387         -
  4388         -  while( block!=NULL ){
  4389         -    InteriorBlock *b = block;
  4390         -    block = block->next;
  4391         -    dataBufferDestroy(&b->term);
  4392         -    dataBufferDestroy(&b->data);
  4393         -    sqlite3_free(b);
  4394         -  }
  4395         -  if( pWriter->parentWriter!=NULL ){
  4396         -    interiorWriterDestroy(pWriter->parentWriter);
  4397         -    sqlite3_free(pWriter->parentWriter);
  4398         -  }
  4399         -  dataBufferDestroy(&pWriter->term);
  4400         -  SCRAMBLE(pWriter);
  4401         -  return SQLITE_OK;
  4402         -}
  4403         -
  4404         -/* If pWriter can fit entirely in ROOT_MAX, return it as the root info
  4405         -** directly, leaving *piEndBlockid unchanged.  Otherwise, flush
  4406         -** pWriter to %_segments, building a new layer of interior nodes, and
  4407         -** recursively ask for their root into.
  4408         -*/
  4409         -static int interiorWriterRootInfo(fulltext_vtab *v, InteriorWriter *pWriter,
  4410         -                                  char **ppRootInfo, int *pnRootInfo,
  4411         -                                  sqlite_int64 *piEndBlockid){
  4412         -  InteriorBlock *block = pWriter->first;
  4413         -  sqlite_int64 iBlockid = 0;
  4414         -  int rc;
  4415         -
  4416         -  /* If we can fit the segment inline */
  4417         -  if( block==pWriter->last && block->data.nData<ROOT_MAX ){
  4418         -    *ppRootInfo = block->data.pData;
  4419         -    *pnRootInfo = block->data.nData;
  4420         -    return SQLITE_OK;
  4421         -  }
  4422         -
  4423         -  /* Flush the first block to %_segments, and create a new level of
  4424         -  ** interior node.
  4425         -  */
  4426         -  ASSERT_VALID_INTERIOR_BLOCK(block);
  4427         -  rc = block_insert(v, block->data.pData, block->data.nData, &iBlockid);
  4428         -  if( rc!=SQLITE_OK ) return rc;
  4429         -  *piEndBlockid = iBlockid;
  4430         -
  4431         -  pWriter->parentWriter = sqlite3_malloc(sizeof(*pWriter->parentWriter));
  4432         -  interiorWriterInit(pWriter->iHeight+1,
  4433         -                     block->term.pData, block->term.nData,
  4434         -                     iBlockid, pWriter->parentWriter);
  4435         -
  4436         -  /* Flush additional blocks and append to the higher interior
  4437         -  ** node.
  4438         -  */
  4439         -  for(block=block->next; block!=NULL; block=block->next){
  4440         -    ASSERT_VALID_INTERIOR_BLOCK(block);
  4441         -    rc = block_insert(v, block->data.pData, block->data.nData, &iBlockid);
  4442         -    if( rc!=SQLITE_OK ) return rc;
  4443         -    *piEndBlockid = iBlockid;
  4444         -
  4445         -    interiorWriterAppend(pWriter->parentWriter,
  4446         -                         block->term.pData, block->term.nData, iBlockid);
  4447         -  }
  4448         -
  4449         -  /* Parent node gets the chance to be the root. */
  4450         -  return interiorWriterRootInfo(v, pWriter->parentWriter,
  4451         -                                ppRootInfo, pnRootInfo, piEndBlockid);
  4452         -}
  4453         -
  4454         -/****************************************************************/
  4455         -/* InteriorReader is used to read off the data from an interior node
  4456         -** (see comment at top of file for the format).
  4457         -*/
  4458         -typedef struct InteriorReader {
  4459         -  const char *pData;
  4460         -  int nData;
  4461         -
  4462         -  DataBuffer term;          /* previous term, for decoding term delta. */
  4463         -
  4464         -  sqlite_int64 iBlockid;
  4465         -} InteriorReader;
  4466         -
  4467         -static void interiorReaderDestroy(InteriorReader *pReader){
  4468         -  dataBufferDestroy(&pReader->term);
  4469         -  SCRAMBLE(pReader);
  4470         -}
  4471         -
  4472         -/* TODO(shess) The assertions are great, but what if we're in NDEBUG
  4473         -** and the blob is empty or otherwise contains suspect data?
  4474         -*/
  4475         -static void interiorReaderInit(const char *pData, int nData,
  4476         -                               InteriorReader *pReader){
  4477         -  int n, nTerm;
  4478         -
  4479         -  /* Require at least the leading flag byte */
  4480         -  assert( nData>0 );
  4481         -  assert( pData[0]!='\0' );
  4482         -
  4483         -  CLEAR(pReader);
  4484         -
  4485         -  /* Decode the base blockid, and set the cursor to the first term. */
  4486         -  n = fts3GetVarint(pData+1, &pReader->iBlockid);
  4487         -  assert( 1+n<=nData );
  4488         -  pReader->pData = pData+1+n;
  4489         -  pReader->nData = nData-(1+n);
  4490         -
  4491         -  /* A single-child interior node (such as when a leaf node was too
  4492         -  ** large for the segment directory) won't have any terms.
  4493         -  ** Otherwise, decode the first term.
  4494         -  */
  4495         -  if( pReader->nData==0 ){
  4496         -    dataBufferInit(&pReader->term, 0);
  4497         -  }else{
  4498         -    n = fts3GetVarint32(pReader->pData, &nTerm);
  4499         -    dataBufferInit(&pReader->term, nTerm);
  4500         -    dataBufferReplace(&pReader->term, pReader->pData+n, nTerm);
  4501         -    assert( n+nTerm<=pReader->nData );
  4502         -    pReader->pData += n+nTerm;
  4503         -    pReader->nData -= n+nTerm;
  4504         -  }
  4505         -}
  4506         -
  4507         -static int interiorReaderAtEnd(InteriorReader *pReader){
  4508         -  return pReader->term.nData==0;
  4509         -}
  4510         -
  4511         -static sqlite_int64 interiorReaderCurrentBlockid(InteriorReader *pReader){
  4512         -  return pReader->iBlockid;
  4513         -}
  4514         -
  4515         -static int interiorReaderTermBytes(InteriorReader *pReader){
  4516         -  assert( !interiorReaderAtEnd(pReader) );
  4517         -  return pReader->term.nData;
  4518         -}
  4519         -static const char *interiorReaderTerm(InteriorReader *pReader){
  4520         -  assert( !interiorReaderAtEnd(pReader) );
  4521         -  return pReader->term.pData;
  4522         -}
  4523         -
  4524         -/* Step forward to the next term in the node. */
  4525         -static void interiorReaderStep(InteriorReader *pReader){
  4526         -  assert( !interiorReaderAtEnd(pReader) );
  4527         -
  4528         -  /* If the last term has been read, signal eof, else construct the
  4529         -  ** next term.
  4530         -  */
  4531         -  if( pReader->nData==0 ){
  4532         -    dataBufferReset(&pReader->term);
  4533         -  }else{
  4534         -    int n, nPrefix, nSuffix;
  4535         -
  4536         -    n = fts3GetVarint32(pReader->pData, &nPrefix);
  4537         -    n += fts3GetVarint32(pReader->pData+n, &nSuffix);
  4538         -
  4539         -    /* Truncate the current term and append suffix data. */
  4540         -    pReader->term.nData = nPrefix;
  4541         -    dataBufferAppend(&pReader->term, pReader->pData+n, nSuffix);
  4542         -
  4543         -    assert( n+nSuffix<=pReader->nData );
  4544         -    pReader->pData += n+nSuffix;
  4545         -    pReader->nData -= n+nSuffix;
  4546         -  }
  4547         -  pReader->iBlockid++;
  4548         -}
  4549         -
  4550         -/* Compare the current term to pTerm[nTerm], returning strcmp-style
  4551         -** results.  If isPrefix, equality means equal through nTerm bytes.
  4552         -*/
  4553         -static int interiorReaderTermCmp(InteriorReader *pReader,
  4554         -                                 const char *pTerm, int nTerm, int isPrefix){
  4555         -  const char *pReaderTerm = interiorReaderTerm(pReader);
  4556         -  int nReaderTerm = interiorReaderTermBytes(pReader);
  4557         -  int c, n = nReaderTerm<nTerm ? nReaderTerm : nTerm;
  4558         -
  4559         -  if( n==0 ){
  4560         -    if( nReaderTerm>0 ) return -1;
  4561         -    if( nTerm>0 ) return 1;
  4562         -    return 0;
  4563         -  }
  4564         -
  4565         -  c = memcmp(pReaderTerm, pTerm, n);
  4566         -  if( c!=0 ) return c;
  4567         -  if( isPrefix && n==nTerm ) return 0;
  4568         -  return nReaderTerm - nTerm;
  4569         -}
  4570         -
  4571         -/****************************************************************/
  4572         -/* LeafWriter is used to collect terms and associated doclist data
  4573         -** into leaf blocks in %_segments (see top of file for format info).
  4574         -** Expected usage is:
  4575         -**
  4576         -** LeafWriter writer;
  4577         -** leafWriterInit(0, 0, &writer);
  4578         -** while( sorted_terms_left_to_process ){
  4579         -**   // data is doclist data for that term.
  4580         -**   rc = leafWriterStep(v, &writer, pTerm, nTerm, pData, nData);
  4581         -**   if( rc!=SQLITE_OK ) goto err;
  4582         -** }
  4583         -** rc = leafWriterFinalize(v, &writer);
  4584         -**err:
  4585         -** leafWriterDestroy(&writer);
  4586         -** return rc;
  4587         -**
  4588         -** leafWriterStep() may write a collected leaf out to %_segments.
  4589         -** leafWriterFinalize() finishes writing any buffered data and stores
  4590         -** a root node in %_segdir.  leafWriterDestroy() frees all buffers and
  4591         -** InteriorWriters allocated as part of writing this segment.
  4592         -**
  4593         -** TODO(shess) Document leafWriterStepMerge().
  4594         -*/
  4595         -
  4596         -/* Put terms with data this big in their own block. */
  4597         -#define STANDALONE_MIN 1024
  4598         -
  4599         -/* Keep leaf blocks below this size. */
  4600         -#define LEAF_MAX 2048
  4601         -
  4602         -typedef struct LeafWriter {
  4603         -  int iLevel;
  4604         -  int idx;
  4605         -  sqlite_int64 iStartBlockid;     /* needed to create the root info */
  4606         -  sqlite_int64 iEndBlockid;       /* when we're done writing. */
  4607         -
  4608         -  DataBuffer term;                /* previous encoded term */
  4609         -  DataBuffer data;                /* encoding buffer */
  4610         -
  4611         -  /* bytes of first term in the current node which distinguishes that
  4612         -  ** term from the last term of the previous node.
  4613         -  */
  4614         -  int nTermDistinct;
  4615         -
  4616         -  InteriorWriter parentWriter;    /* if we overflow */
  4617         -  int has_parent;
  4618         -} LeafWriter;
  4619         -
  4620         -static void leafWriterInit(int iLevel, int idx, LeafWriter *pWriter){
  4621         -  CLEAR(pWriter);
  4622         -  pWriter->iLevel = iLevel;
  4623         -  pWriter->idx = idx;
  4624         -
  4625         -  dataBufferInit(&pWriter->term, 32);
  4626         -
  4627         -  /* Start out with a reasonably sized block, though it can grow. */
  4628         -  dataBufferInit(&pWriter->data, LEAF_MAX);
  4629         -}
  4630         -
  4631         -#ifndef NDEBUG
  4632         -/* Verify that the data is readable as a leaf node. */
  4633         -static void leafNodeValidate(const char *pData, int nData){
  4634         -  int n, iDummy;
  4635         -
  4636         -  if( nData==0 ) return;
  4637         -  assert( nData>0 );
  4638         -  assert( pData!=0 );
  4639         -  assert( pData+nData>pData );
  4640         -
  4641         -  /* Must lead with a varint(0) */
  4642         -  n = fts3GetVarint32(pData, &iDummy);
  4643         -  assert( iDummy==0 );
  4644         -  assert( n>0 );
  4645         -  assert( n<nData );
  4646         -  pData += n;
  4647         -  nData -= n;
  4648         -
  4649         -  /* Leading term length and data must fit in buffer. */
  4650         -  n = fts3GetVarint32(pData, &iDummy);
  4651         -  assert( n>0 );
  4652         -  assert( iDummy>0 );
  4653         -  assert( n+iDummy>0 );
  4654         -  assert( n+iDummy<nData );
  4655         -  pData += n+iDummy;
  4656         -  nData -= n+iDummy;
  4657         -
  4658         -  /* Leading term's doclist length and data must fit. */
  4659         -  n = fts3GetVarint32(pData, &iDummy);
  4660         -  assert( n>0 );
  4661         -  assert( iDummy>0 );
  4662         -  assert( n+iDummy>0 );
  4663         -  assert( n+iDummy<=nData );
  4664         -  ASSERT_VALID_DOCLIST(DL_DEFAULT, pData+n, iDummy, NULL);
  4665         -  pData += n+iDummy;
  4666         -  nData -= n+iDummy;
  4667         -
  4668         -  /* Verify that trailing terms and doclists also are readable. */
  4669         -  while( nData!=0 ){
  4670         -    n = fts3GetVarint32(pData, &iDummy);
  4671         -    assert( n>0 );
  4672         -    assert( iDummy>=0 );
  4673         -    assert( n<nData );
  4674         -    pData += n;
  4675         -    nData -= n;
  4676         -    n = fts3GetVarint32(pData, &iDummy);
  4677         -    assert( n>0 );
  4678         -    assert( iDummy>0 );
  4679         -    assert( n+iDummy>0 );
  4680         -    assert( n+iDummy<nData );
  4681         -    pData += n+iDummy;
  4682         -    nData -= n+iDummy;
  4683         -
  4684         -    n = fts3GetVarint32(pData, &iDummy);
  4685         -    assert( n>0 );
  4686         -    assert( iDummy>0 );
  4687         -    assert( n+iDummy>0 );
  4688         -    assert( n+iDummy<=nData );
  4689         -    ASSERT_VALID_DOCLIST(DL_DEFAULT, pData+n, iDummy, NULL);
  4690         -    pData += n+iDummy;
  4691         -    nData -= n+iDummy;
  4692         -  }
  4693         -}
  4694         -#define ASSERT_VALID_LEAF_NODE(p, n) leafNodeValidate(p, n)
  4695         -#else
  4696         -#define ASSERT_VALID_LEAF_NODE(p, n) assert( 1 )
  4697         -#endif
  4698         -
  4699         -/* Flush the current leaf node to %_segments, and adding the resulting
  4700         -** blockid and the starting term to the interior node which will
  4701         -** contain it.
  4702         -*/
  4703         -static int leafWriterInternalFlush(fulltext_vtab *v, LeafWriter *pWriter,
  4704         -                                   int iData, int nData){
  4705         -  sqlite_int64 iBlockid = 0;
  4706         -  const char *pStartingTerm;
  4707         -  int nStartingTerm, rc, n;
  4708         -
  4709         -  /* Must have the leading varint(0) flag, plus at least some
  4710         -  ** valid-looking data.
  4711         -  */
  4712         -  assert( nData>2 );
  4713         -  assert( iData>=0 );
  4714         -  assert( iData+nData<=pWriter->data.nData );
  4715         -  ASSERT_VALID_LEAF_NODE(pWriter->data.pData+iData, nData);
  4716         -
  4717         -  rc = block_insert(v, pWriter->data.pData+iData, nData, &iBlockid);
  4718         -  if( rc!=SQLITE_OK ) return rc;
  4719         -  assert( iBlockid!=0 );
  4720         -
  4721         -  /* Reconstruct the first term in the leaf for purposes of building
  4722         -  ** the interior node.
  4723         -  */
  4724         -  n = fts3GetVarint32(pWriter->data.pData+iData+1, &nStartingTerm);
  4725         -  pStartingTerm = pWriter->data.pData+iData+1+n;
  4726         -  assert( pWriter->data.nData>iData+1+n+nStartingTerm );
  4727         -  assert( pWriter->nTermDistinct>0 );
  4728         -  assert( pWriter->nTermDistinct<=nStartingTerm );
  4729         -  nStartingTerm = pWriter->nTermDistinct;
  4730         -
  4731         -  if( pWriter->has_parent ){
  4732         -    interiorWriterAppend(&pWriter->parentWriter,
  4733         -                         pStartingTerm, nStartingTerm, iBlockid);
  4734         -  }else{
  4735         -    interiorWriterInit(1, pStartingTerm, nStartingTerm, iBlockid,
  4736         -                       &pWriter->parentWriter);
  4737         -    pWriter->has_parent = 1;
  4738         -  }
  4739         -
  4740         -  /* Track the span of this segment's leaf nodes. */
  4741         -  if( pWriter->iEndBlockid==0 ){
  4742         -    pWriter->iEndBlockid = pWriter->iStartBlockid = iBlockid;
  4743         -  }else{
  4744         -    pWriter->iEndBlockid++;
  4745         -    assert( iBlockid==pWriter->iEndBlockid );
  4746         -  }
  4747         -
  4748         -  return SQLITE_OK;
  4749         -}
  4750         -static int leafWriterFlush(fulltext_vtab *v, LeafWriter *pWriter){
  4751         -  int rc = leafWriterInternalFlush(v, pWriter, 0, pWriter->data.nData);
  4752         -  if( rc!=SQLITE_OK ) return rc;
  4753         -
  4754         -  /* Re-initialize the output buffer. */
  4755         -  dataBufferReset(&pWriter->data);
  4756         -
  4757         -  return SQLITE_OK;
  4758         -}
  4759         -
  4760         -/* Fetch the root info for the segment.  If the entire leaf fits
  4761         -** within ROOT_MAX, then it will be returned directly, otherwise it
  4762         -** will be flushed and the root info will be returned from the
  4763         -** interior node.  *piEndBlockid is set to the blockid of the last
  4764         -** interior or leaf node written to disk (0 if none are written at
  4765         -** all).
  4766         -*/
  4767         -static int leafWriterRootInfo(fulltext_vtab *v, LeafWriter *pWriter,
  4768         -                              char **ppRootInfo, int *pnRootInfo,
  4769         -                              sqlite_int64 *piEndBlockid){
  4770         -  /* we can fit the segment entirely inline */
  4771         -  if( !pWriter->has_parent && pWriter->data.nData<ROOT_MAX ){
  4772         -    *ppRootInfo = pWriter->data.pData;
  4773         -    *pnRootInfo = pWriter->data.nData;
  4774         -    *piEndBlockid = 0;
  4775         -    return SQLITE_OK;
  4776         -  }
  4777         -
  4778         -  /* Flush remaining leaf data. */
  4779         -  if( pWriter->data.nData>0 ){
  4780         -    int rc = leafWriterFlush(v, pWriter);
  4781         -    if( rc!=SQLITE_OK ) return rc;
  4782         -  }
  4783         -
  4784         -  /* We must have flushed a leaf at some point. */
  4785         -  assert( pWriter->has_parent );
  4786         -
  4787         -  /* Tenatively set the end leaf blockid as the end blockid.  If the
  4788         -  ** interior node can be returned inline, this will be the final
  4789         -  ** blockid, otherwise it will be overwritten by
  4790         -  ** interiorWriterRootInfo().
  4791         -  */
  4792         -  *piEndBlockid = pWriter->iEndBlockid;
  4793         -
  4794         -  return interiorWriterRootInfo(v, &pWriter->parentWriter,
  4795         -                                ppRootInfo, pnRootInfo, piEndBlockid);
  4796         -}
  4797         -
  4798         -/* Collect the rootInfo data and store it into the segment directory.
  4799         -** This has the effect of flushing the segment's leaf data to
  4800         -** %_segments, and also flushing any interior nodes to %_segments.
  4801         -*/
  4802         -static int leafWriterFinalize(fulltext_vtab *v, LeafWriter *pWriter){
  4803         -  sqlite_int64 iEndBlockid;
  4804         -  char *pRootInfo;
  4805         -  int rc, nRootInfo;
  4806         -
  4807         -  rc = leafWriterRootInfo(v, pWriter, &pRootInfo, &nRootInfo, &iEndBlockid);
  4808         -  if( rc!=SQLITE_OK ) return rc;
  4809         -
  4810         -  /* Don't bother storing an entirely empty segment. */
  4811         -  if( iEndBlockid==0 && nRootInfo==0 ) return SQLITE_OK;
  4812         -
  4813         -  return segdir_set(v, pWriter->iLevel, pWriter->idx,
  4814         -                    pWriter->iStartBlockid, pWriter->iEndBlockid,
  4815         -                    iEndBlockid, pRootInfo, nRootInfo);
  4816         -}
  4817         -
  4818         -static void leafWriterDestroy(LeafWriter *pWriter){
  4819         -  if( pWriter->has_parent ) interiorWriterDestroy(&pWriter->parentWriter);
  4820         -  dataBufferDestroy(&pWriter->term);
  4821         -  dataBufferDestroy(&pWriter->data);
  4822         -}
  4823         -
  4824         -/* Encode a term into the leafWriter, delta-encoding as appropriate.
  4825         -** Returns the length of the new term which distinguishes it from the
  4826         -** previous term, which can be used to set nTermDistinct when a node
  4827         -** boundary is crossed.
  4828         -*/
  4829         -static int leafWriterEncodeTerm(LeafWriter *pWriter,
  4830         -                                const char *pTerm, int nTerm){
  4831         -  char c[VARINT_MAX+VARINT_MAX];
  4832         -  int n, nPrefix = 0;
  4833         -
  4834         -  assert( nTerm>0 );
  4835         -  while( nPrefix<pWriter->term.nData &&
  4836         -         pTerm[nPrefix]==pWriter->term.pData[nPrefix] ){
  4837         -    nPrefix++;
  4838         -    /* Failing this implies that the terms weren't in order. */
  4839         -    assert( nPrefix<nTerm );
  4840         -  }
  4841         -
  4842         -  if( pWriter->data.nData==0 ){
  4843         -    /* Encode the node header and leading term as:
  4844         -    **  varint(0)
  4845         -    **  varint(nTerm)
  4846         -    **  char pTerm[nTerm]
  4847         -    */
  4848         -    n = fts3PutVarint(c, '\0');
  4849         -    n += fts3PutVarint(c+n, nTerm);
  4850         -    dataBufferAppend2(&pWriter->data, c, n, pTerm, nTerm);
  4851         -  }else{
  4852         -    /* Delta-encode the term as:
  4853         -    **  varint(nPrefix)
  4854         -    **  varint(nSuffix)
  4855         -    **  char pTermSuffix[nSuffix]
  4856         -    */
  4857         -    n = fts3PutVarint(c, nPrefix);
  4858         -    n += fts3PutVarint(c+n, nTerm-nPrefix);
  4859         -    dataBufferAppend2(&pWriter->data, c, n, pTerm+nPrefix, nTerm-nPrefix);
  4860         -  }
  4861         -  dataBufferReplace(&pWriter->term, pTerm, nTerm);
  4862         -
  4863         -  return nPrefix+1;
  4864         -}
  4865         -
  4866         -/* Used to avoid a memmove when a large amount of doclist data is in
  4867         -** the buffer.  This constructs a node and term header before
  4868         -** iDoclistData and flushes the resulting complete node using
  4869         -** leafWriterInternalFlush().
  4870         -*/
  4871         -static int leafWriterInlineFlush(fulltext_vtab *v, LeafWriter *pWriter,
  4872         -                                 const char *pTerm, int nTerm,
  4873         -                                 int iDoclistData){
  4874         -  char c[VARINT_MAX+VARINT_MAX];
  4875         -  int iData, n = fts3PutVarint(c, 0);
  4876         -  n += fts3PutVarint(c+n, nTerm);
  4877         -
  4878         -  /* There should always be room for the header.  Even if pTerm shared
  4879         -  ** a substantial prefix with the previous term, the entire prefix
  4880         -  ** could be constructed from earlier data in the doclist, so there
  4881         -  ** should be room.
  4882         -  */
  4883         -  assert( iDoclistData>=n+nTerm );
  4884         -
  4885         -  iData = iDoclistData-(n+nTerm);
  4886         -  memcpy(pWriter->data.pData+iData, c, n);
  4887         -  memcpy(pWriter->data.pData+iData+n, pTerm, nTerm);
  4888         -
  4889         -  return leafWriterInternalFlush(v, pWriter, iData, pWriter->data.nData-iData);
  4890         -}
  4891         -
  4892         -/* Push pTerm[nTerm] along with the doclist data to the leaf layer of
  4893         -** %_segments.
  4894         -*/
  4895         -static int leafWriterStepMerge(fulltext_vtab *v, LeafWriter *pWriter,
  4896         -                               const char *pTerm, int nTerm,
  4897         -                               DLReader *pReaders, int nReaders){
  4898         -  char c[VARINT_MAX+VARINT_MAX];
  4899         -  int iTermData = pWriter->data.nData, iDoclistData;
  4900         -  int i, nData, n, nActualData, nActual, rc, nTermDistinct;
  4901         -
  4902         -  ASSERT_VALID_LEAF_NODE(pWriter->data.pData, pWriter->data.nData);
  4903         -  nTermDistinct = leafWriterEncodeTerm(pWriter, pTerm, nTerm);
  4904         -
  4905         -  /* Remember nTermDistinct if opening a new node. */
  4906         -  if( iTermData==0 ) pWriter->nTermDistinct = nTermDistinct;
  4907         -
  4908         -  iDoclistData = pWriter->data.nData;
  4909         -
  4910         -  /* Estimate the length of the merged doclist so we can leave space
  4911         -  ** to encode it.
  4912         -  */
  4913         -  for(i=0, nData=0; i<nReaders; i++){
  4914         -    nData += dlrAllDataBytes(&pReaders[i]);
  4915         -  }
  4916         -  n = fts3PutVarint(c, nData);
  4917         -  dataBufferAppend(&pWriter->data, c, n);
  4918         -
  4919         -  docListMerge(&pWriter->data, pReaders, nReaders);
  4920         -  ASSERT_VALID_DOCLIST(DL_DEFAULT,
  4921         -                       pWriter->data.pData+iDoclistData+n,
  4922         -                       pWriter->data.nData-iDoclistData-n, NULL);
  4923         -
  4924         -  /* The actual amount of doclist data at this point could be smaller
  4925         -  ** than the length we encoded.  Additionally, the space required to
  4926         -  ** encode this length could be smaller.  For small doclists, this is
  4927         -  ** not a big deal, we can just use memmove() to adjust things.
  4928         -  */
  4929         -  nActualData = pWriter->data.nData-(iDoclistData+n);
  4930         -  nActual = fts3PutVarint(c, nActualData);
  4931         -  assert( nActualData<=nData );
  4932         -  assert( nActual<=n );
  4933         -
  4934         -  /* If the new doclist is big enough for force a standalone leaf
  4935         -  ** node, we can immediately flush it inline without doing the
  4936         -  ** memmove().
  4937         -  */
  4938         -  /* TODO(shess) This test matches leafWriterStep(), which does this
  4939         -  ** test before it knows the cost to varint-encode the term and
  4940         -  ** doclist lengths.  At some point, change to
  4941         -  ** pWriter->data.nData-iTermData>STANDALONE_MIN.
  4942         -  */
  4943         -  if( nTerm+nActualData>STANDALONE_MIN ){
  4944         -    /* Push leaf node from before this term. */
  4945         -    if( iTermData>0 ){
  4946         -      rc = leafWriterInternalFlush(v, pWriter, 0, iTermData);
  4947         -      if( rc!=SQLITE_OK ) return rc;
  4948         -
  4949         -      pWriter->nTermDistinct = nTermDistinct;
  4950         -    }
  4951         -
  4952         -    /* Fix the encoded doclist length. */
  4953         -    iDoclistData += n - nActual;
  4954         -    memcpy(pWriter->data.pData+iDoclistData, c, nActual);
  4955         -
  4956         -    /* Push the standalone leaf node. */
  4957         -    rc = leafWriterInlineFlush(v, pWriter, pTerm, nTerm, iDoclistData);
  4958         -    if( rc!=SQLITE_OK ) return rc;
  4959         -
  4960         -    /* Leave the node empty. */
  4961         -    dataBufferReset(&pWriter->data);
  4962         -
  4963         -    return rc;
  4964         -  }
  4965         -
  4966         -  /* At this point, we know that the doclist was small, so do the
  4967         -  ** memmove if indicated.
  4968         -  */
  4969         -  if( nActual<n ){
  4970         -    memmove(pWriter->data.pData+iDoclistData+nActual,
  4971         -            pWriter->data.pData+iDoclistData+n,
  4972         -            pWriter->data.nData-(iDoclistData+n));
  4973         -    pWriter->data.nData -= n-nActual;
  4974         -  }
  4975         -
  4976         -  /* Replace written length with actual length. */
  4977         -  memcpy(pWriter->data.pData+iDoclistData, c, nActual);
  4978         -
  4979         -  /* If the node is too large, break things up. */
  4980         -  /* TODO(shess) This test matches leafWriterStep(), which does this
  4981         -  ** test before it knows the cost to varint-encode the term and
  4982         -  ** doclist lengths.  At some point, change to
  4983         -  ** pWriter->data.nData>LEAF_MAX.
  4984         -  */
  4985         -  if( iTermData+nTerm+nActualData>LEAF_MAX ){
  4986         -    /* Flush out the leading data as a node */
  4987         -    rc = leafWriterInternalFlush(v, pWriter, 0, iTermData);
  4988         -    if( rc!=SQLITE_OK ) return rc;
  4989         -
  4990         -    pWriter->nTermDistinct = nTermDistinct;
  4991         -
  4992         -    /* Rebuild header using the current term */
  4993         -    n = fts3PutVarint(pWriter->data.pData, 0);
  4994         -    n += fts3PutVarint(pWriter->data.pData+n, nTerm);
  4995         -    memcpy(pWriter->data.pData+n, pTerm, nTerm);
  4996         -    n += nTerm;
  4997         -
  4998         -    /* There should always be room, because the previous encoding
  4999         -    ** included all data necessary to construct the term.
  5000         -    */
  5001         -    assert( n<iDoclistData );
  5002         -    /* So long as STANDALONE_MIN is half or less of LEAF_MAX, the
  5003         -    ** following memcpy() is safe (as opposed to needing a memmove).
  5004         -    */
  5005         -    assert( 2*STANDALONE_MIN<=LEAF_MAX );
  5006         -    assert( n+pWriter->data.nData-iDoclistData<iDoclistData );
  5007         -    memcpy(pWriter->data.pData+n,
  5008         -           pWriter->data.pData+iDoclistData,
  5009         -           pWriter->data.nData-iDoclistData);
  5010         -    pWriter->data.nData -= iDoclistData-n;
  5011         -  }
  5012         -  ASSERT_VALID_LEAF_NODE(pWriter->data.pData, pWriter->data.nData);
  5013         -
  5014         -  return SQLITE_OK;
  5015         -}
  5016         -
  5017         -/* Push pTerm[nTerm] along with the doclist data to the leaf layer of
  5018         -** %_segments.
  5019         -*/
  5020         -/* TODO(shess) Revise writeZeroSegment() so that doclists are
  5021         -** constructed directly in pWriter->data.
  5022         -*/
  5023         -static int leafWriterStep(fulltext_vtab *v, LeafWriter *pWriter,
  5024         -                          const char *pTerm, int nTerm,
  5025         -                          const char *pData, int nData){
  5026         -  int rc;
  5027         -  DLReader reader;
  5028         -
  5029         -  dlrInit(&reader, DL_DEFAULT, pData, nData);
  5030         -  rc = leafWriterStepMerge(v, pWriter, pTerm, nTerm, &reader, 1);
  5031         -  dlrDestroy(&reader);
  5032         -
  5033         -  return rc;
  5034         -}
  5035         -
  5036         -
  5037         -/****************************************************************/
  5038         -/* LeafReader is used to iterate over an individual leaf node. */
  5039         -typedef struct LeafReader {
  5040         -  DataBuffer term;          /* copy of current term. */
  5041         -
  5042         -  const char *pData;        /* data for current term. */
  5043         -  int nData;
  5044         -} LeafReader;
  5045         -
  5046         -static void leafReaderDestroy(LeafReader *pReader){
  5047         -  dataBufferDestroy(&pReader->term);
  5048         -  SCRAMBLE(pReader);
  5049         -}
  5050         -
  5051         -static int leafReaderAtEnd(LeafReader *pReader){
  5052         -  return pReader->nData<=0;
  5053         -}
  5054         -
  5055         -/* Access the current term. */
  5056         -static int leafReaderTermBytes(LeafReader *pReader){
  5057         -  return pReader->term.nData;
  5058         -}
  5059         -static const char *leafReaderTerm(LeafReader *pReader){
  5060         -  assert( pReader->term.nData>0 );
  5061         -  return pReader->term.pData;
  5062         -}
  5063         -
  5064         -/* Access the doclist data for the current term. */
  5065         -static int leafReaderDataBytes(LeafReader *pReader){
  5066         -  int nData;
  5067         -  assert( pReader->term.nData>0 );
  5068         -  fts3GetVarint32(pReader->pData, &nData);
  5069         -  return nData;
  5070         -}
  5071         -static const char *leafReaderData(LeafReader *pReader){
  5072         -  int n, nData;
  5073         -  assert( pReader->term.nData>0 );
  5074         -  n = fts3GetVarint32(pReader->pData, &nData);
  5075         -  return pReader->pData+n;
  5076         -}
  5077         -
  5078         -static void leafReaderInit(const char *pData, int nData,
  5079         -                           LeafReader *pReader){
  5080         -  int nTerm, n;
  5081         -
  5082         -  assert( nData>0 );
  5083         -  assert( pData[0]=='\0' );
  5084         -
  5085         -  CLEAR(pReader);
  5086         -
  5087         -  /* Read the first term, skipping the header byte. */
  5088         -  n = fts3GetVarint32(pData+1, &nTerm);
  5089         -  dataBufferInit(&pReader->term, nTerm);
  5090         -  dataBufferReplace(&pReader->term, pData+1+n, nTerm);
  5091         -
  5092         -  /* Position after the first term. */
  5093         -  assert( 1+n+nTerm<nData );
  5094         -  pReader->pData = pData+1+n+nTerm;
  5095         -  pReader->nData = nData-1-n-nTerm;
  5096         -}
  5097         -
  5098         -/* Step the reader forward to the next term. */
  5099         -static void leafReaderStep(LeafReader *pReader){
  5100         -  int n, nData, nPrefix, nSuffix;
  5101         -  assert( !leafReaderAtEnd(pReader) );
  5102         -
  5103         -  /* Skip previous entry's data block. */
  5104         -  n = fts3GetVarint32(pReader->pData, &nData);
  5105         -  assert( n+nData<=pReader->nData );
  5106         -  pReader->pData += n+nData;
  5107         -  pReader->nData -= n+nData;
  5108         -
  5109         -  if( !leafReaderAtEnd(pReader) ){
  5110         -    /* Construct the new term using a prefix from the old term plus a
  5111         -    ** suffix from the leaf data.
  5112         -    */
  5113         -    n = fts3GetVarint32(pReader->pData, &nPrefix);
  5114         -    n += fts3GetVarint32(pReader->pData+n, &nSuffix);
  5115         -    assert( n+nSuffix<pReader->nData );
  5116         -    pReader->term.nData = nPrefix;
  5117         -    dataBufferAppend(&pReader->term, pReader->pData+n, nSuffix);
  5118         -
  5119         -    pReader->pData += n+nSuffix;
  5120         -    pReader->nData -= n+nSuffix;
  5121         -  }
  5122         -}
  5123         -
  5124         -/* strcmp-style comparison of pReader's current term against pTerm.
  5125         -** If isPrefix, equality means equal through nTerm bytes.
  5126         -*/
  5127         -static int leafReaderTermCmp(LeafReader *pReader,
  5128         -                             const char *pTerm, int nTerm, int isPrefix){
  5129         -  int c, n = pReader->term.nData<nTerm ? pReader->term.nData : nTerm;
  5130         -  if( n==0 ){
  5131         -    if( pReader->term.nData>0 ) return -1;
  5132         -    if(nTerm>0 ) return 1;
  5133         -    return 0;
  5134         -  }
  5135         -
  5136         -  c = memcmp(pReader->term.pData, pTerm, n);
  5137         -  if( c!=0 ) return c;
  5138         -  if( isPrefix && n==nTerm ) return 0;
  5139         -  return pReader->term.nData - nTerm;
  5140         -}
  5141         -
  5142         -
  5143         -/****************************************************************/
  5144         -/* LeavesReader wraps LeafReader to allow iterating over the entire
  5145         -** leaf layer of the tree.
  5146         -*/
  5147         -typedef struct LeavesReader {
  5148         -  int idx;                  /* Index within the segment. */
  5149         -
  5150         -  sqlite3_stmt *pStmt;      /* Statement we're streaming leaves from. */
  5151         -  int eof;                  /* we've seen SQLITE_DONE from pStmt. */
  5152         -
  5153         -  LeafReader leafReader;    /* reader for the current leaf. */
  5154         -  DataBuffer rootData;      /* root data for inline. */
  5155         -} LeavesReader;
  5156         -
  5157         -/* Access the current term. */
  5158         -static int leavesReaderTermBytes(LeavesReader *pReader){
  5159         -  assert( !pReader->eof );
  5160         -  return leafReaderTermBytes(&pReader->leafReader);
  5161         -}
  5162         -static const char *leavesReaderTerm(LeavesReader *pReader){
  5163         -  assert( !pReader->eof );
  5164         -  return leafReaderTerm(&pReader->leafReader);
  5165         -}
  5166         -
  5167         -/* Access the doclist data for the current term. */
  5168         -static int leavesReaderDataBytes(LeavesReader *pReader){
  5169         -  assert( !pReader->eof );
  5170         -  return leafReaderDataBytes(&pReader->leafReader);
  5171         -}
  5172         -static const char *leavesReaderData(LeavesReader *pReader){
  5173         -  assert( !pReader->eof );
  5174         -  return leafReaderData(&pReader->leafReader);
  5175         -}
  5176         -
  5177         -static int leavesReaderAtEnd(LeavesReader *pReader){
  5178         -  return pReader->eof;
  5179         -}
  5180         -
  5181         -/* loadSegmentLeaves() may not read all the way to SQLITE_DONE, thus
  5182         -** leaving the statement handle open, which locks the table.
  5183         -*/
  5184         -/* TODO(shess) This "solution" is not satisfactory.  Really, there
  5185         -** should be check-in function for all statement handles which
  5186         -** arranges to call sqlite3_reset().  This most likely will require
  5187         -** modification to control flow all over the place, though, so for now
  5188         -** just punt.
  5189         -**
  5190         -** Note the the current system assumes that segment merges will run to
  5191         -** completion, which is why this particular probably hasn't arisen in
  5192         -** this case.  Probably a brittle assumption.
  5193         -*/
  5194         -static int leavesReaderReset(LeavesReader *pReader){
  5195         -  return sqlite3_reset(pReader->pStmt);
  5196         -}
  5197         -
  5198         -static void leavesReaderDestroy(LeavesReader *pReader){
  5199         -  /* If idx is -1, that means we're using a non-cached statement
  5200         -  ** handle in the optimize() case, so we need to release it.
  5201         -  */
  5202         -  if( pReader->pStmt!=NULL && pReader->idx==-1 ){
  5203         -    sqlite3_finalize(pReader->pStmt);
  5204         -  }
  5205         -  leafReaderDestroy(&pReader->leafReader);
  5206         -  dataBufferDestroy(&pReader->rootData);
  5207         -  SCRAMBLE(pReader);
  5208         -}
  5209         -
  5210         -/* Initialize pReader with the given root data (if iStartBlockid==0
  5211         -** the leaf data was entirely contained in the root), or from the
  5212         -** stream of blocks between iStartBlockid and iEndBlockid, inclusive.
  5213         -*/
  5214         -static int leavesReaderInit(fulltext_vtab *v,
  5215         -                            int idx,
  5216         -                            sqlite_int64 iStartBlockid,
  5217         -                            sqlite_int64 iEndBlockid,
  5218         -                            const char *pRootData, int nRootData,
  5219         -                            LeavesReader *pReader){
  5220         -  CLEAR(pReader);
  5221         -  pReader->idx = idx;
  5222         -
  5223         -  dataBufferInit(&pReader->rootData, 0);
  5224         -  if( iStartBlockid==0 ){
  5225         -    /* Entire leaf level fit in root data. */
  5226         -    dataBufferReplace(&pReader->rootData, pRootData, nRootData);
  5227         -    leafReaderInit(pReader->rootData.pData, pReader->rootData.nData,
  5228         -                   &pReader->leafReader);
  5229         -  }else{
  5230         -    sqlite3_stmt *s;
  5231         -    int rc = sql_get_leaf_statement(v, idx, &s);
  5232         -    if( rc!=SQLITE_OK ) return rc;
  5233         -
  5234         -    rc = sqlite3_bind_int64(s, 1, iStartBlockid);
  5235         -    if( rc!=SQLITE_OK ) return rc;
  5236         -
  5237         -    rc = sqlite3_bind_int64(s, 2, iEndBlockid);
  5238         -    if( rc!=SQLITE_OK ) return rc;
  5239         -
  5240         -    rc = sqlite3_step(s);
  5241         -    if( rc==SQLITE_DONE ){
  5242         -      pReader->eof = 1;
  5243         -      return SQLITE_OK;
  5244         -    }
  5245         -    if( rc!=SQLITE_ROW ) return rc;
  5246         -
  5247         -    pReader->pStmt = s;
  5248         -    leafReaderInit(sqlite3_column_blob(pReader->pStmt, 0),
  5249         -                   sqlite3_column_bytes(pReader->pStmt, 0),
  5250         -                   &pReader->leafReader);
  5251         -  }
  5252         -  return SQLITE_OK;
  5253         -}
  5254         -
  5255         -/* Step the current leaf forward to the next term.  If we reach the
  5256         -** end of the current leaf, step forward to the next leaf block.
  5257         -*/
  5258         -static int leavesReaderStep(fulltext_vtab *v, LeavesReader *pReader){
  5259         -  assert( !leavesReaderAtEnd(pReader) );
  5260         -  leafReaderStep(&pReader->leafReader);
  5261         -
  5262         -  if( leafReaderAtEnd(&pReader->leafReader) ){
  5263         -    int rc;
  5264         -    if( pReader->rootData.pData ){
  5265         -      pReader->eof = 1;
  5266         -      return SQLITE_OK;
  5267         -    }
  5268         -    rc = sqlite3_step(pReader->pStmt);
  5269         -    if( rc!=SQLITE_ROW ){
  5270         -      pReader->eof = 1;
  5271         -      return rc==SQLITE_DONE ? SQLITE_OK : rc;
  5272         -    }
  5273         -    leafReaderDestroy(&pReader->leafReader);
  5274         -    leafReaderInit(sqlite3_column_blob(pReader->pStmt, 0),
  5275         -                   sqlite3_column_bytes(pReader->pStmt, 0),
  5276         -                   &pReader->leafReader);
  5277         -  }
  5278         -  return SQLITE_OK;
  5279         -}
  5280         -
  5281         -/* Order LeavesReaders by their term, ignoring idx.  Readers at eof
  5282         -** always sort to the end.
  5283         -*/
  5284         -static int leavesReaderTermCmp(LeavesReader *lr1, LeavesReader *lr2){
  5285         -  if( leavesReaderAtEnd(lr1) ){
  5286         -    if( leavesReaderAtEnd(lr2) ) return 0;
  5287         -    return 1;
  5288         -  }
  5289         -  if( leavesReaderAtEnd(lr2) ) return -1;
  5290         -
  5291         -  return leafReaderTermCmp(&lr1->leafReader,
  5292         -                           leavesReaderTerm(lr2), leavesReaderTermBytes(lr2),
  5293         -                           0);
  5294         -}
  5295         -
  5296         -/* Similar to leavesReaderTermCmp(), with additional ordering by idx
  5297         -** so that older segments sort before newer segments.
  5298         -*/
  5299         -static int leavesReaderCmp(LeavesReader *lr1, LeavesReader *lr2){
  5300         -  int c = leavesReaderTermCmp(lr1, lr2);
  5301         -  if( c!=0 ) return c;
  5302         -  return lr1->idx-lr2->idx;
  5303         -}
  5304         -
  5305         -/* Assume that pLr[1]..pLr[nLr] are sorted.  Bubble pLr[0] into its
  5306         -** sorted position.
  5307         -*/
  5308         -static void leavesReaderReorder(LeavesReader *pLr, int nLr){
  5309         -  while( nLr>1 && leavesReaderCmp(pLr, pLr+1)>0 ){
  5310         -    LeavesReader tmp = pLr[0];
  5311         -    pLr[0] = pLr[1];
  5312         -    pLr[1] = tmp;
  5313         -    nLr--;
  5314         -    pLr++;
  5315         -  }
  5316         -}
  5317         -
  5318         -/* Initializes pReaders with the segments from level iLevel, returning
  5319         -** the number of segments in *piReaders.  Leaves pReaders in sorted
  5320         -** order.
  5321         -*/
  5322         -static int leavesReadersInit(fulltext_vtab *v, int iLevel,
  5323         -                             LeavesReader *pReaders, int *piReaders){
  5324         -  sqlite3_stmt *s;
  5325         -  int i, rc = sql_get_statement(v, SEGDIR_SELECT_LEVEL_STMT, &s);
  5326         -  if( rc!=SQLITE_OK ) return rc;
  5327         -
  5328         -  rc = sqlite3_bind_int(s, 1, iLevel);
  5329         -  if( rc!=SQLITE_OK ) return rc;
  5330         -
  5331         -  i = 0;
  5332         -  while( (rc = sqlite3_step(s))==SQLITE_ROW ){
  5333         -    sqlite_int64 iStart = sqlite3_column_int64(s, 0);
  5334         -    sqlite_int64 iEnd = sqlite3_column_int64(s, 1);
  5335         -    const char *pRootData = sqlite3_column_blob(s, 2);
  5336         -    int nRootData = sqlite3_column_bytes(s, 2);
  5337         -
  5338         -    assert( i<MERGE_COUNT );
  5339         -    rc = leavesReaderInit(v, i, iStart, iEnd, pRootData, nRootData,
  5340         -                          &pReaders[i]);
  5341         -    if( rc!=SQLITE_OK ) break;
  5342         -
  5343         -    i++;
  5344         -  }
  5345         -  if( rc!=SQLITE_DONE ){
  5346         -    while( i-->0 ){
  5347         -      leavesReaderDestroy(&pReaders[i]);
  5348         -    }
  5349         -    return rc;
  5350         -  }
  5351         -
  5352         -  *piReaders = i;
  5353         -
  5354         -  /* Leave our results sorted by term, then age. */
  5355         -  while( i-- ){
  5356         -    leavesReaderReorder(pReaders+i, *piReaders-i);
  5357         -  }
  5358         -  return SQLITE_OK;
  5359         -}
  5360         -
  5361         -/* Merge doclists from pReaders[nReaders] into a single doclist, which
  5362         -** is written to pWriter.  Assumes pReaders is ordered oldest to
  5363         -** newest.
  5364         -*/
  5365         -/* TODO(shess) Consider putting this inline in segmentMerge(). */
  5366         -static int leavesReadersMerge(fulltext_vtab *v,
  5367         -                              LeavesReader *pReaders, int nReaders,
  5368         -                              LeafWriter *pWriter){
  5369         -  DLReader dlReaders[MERGE_COUNT];
  5370         -  const char *pTerm = leavesReaderTerm(pReaders);
  5371         -  int i, nTerm = leavesReaderTermBytes(pReaders);
  5372         -
  5373         -  assert( nReaders<=MERGE_COUNT );
  5374         -
  5375         -  for(i=0; i<nReaders; i++){
  5376         -    dlrInit(&dlReaders[i], DL_DEFAULT,
  5377         -            leavesReaderData(pReaders+i),
  5378         -            leavesReaderDataBytes(pReaders+i));
  5379         -  }
  5380         -
  5381         -  return leafWriterStepMerge(v, pWriter, pTerm, nTerm, dlReaders, nReaders);
  5382         -}
  5383         -
  5384         -/* Forward ref due to mutual recursion with segdirNextIndex(). */
  5385         -static int segmentMerge(fulltext_vtab *v, int iLevel);
  5386         -
  5387         -/* Put the next available index at iLevel into *pidx.  If iLevel
  5388         -** already has MERGE_COUNT segments, they are merged to a higher
  5389         -** level to make room.
  5390         -*/
  5391         -static int segdirNextIndex(fulltext_vtab *v, int iLevel, int *pidx){
  5392         -  int rc = segdir_max_index(v, iLevel, pidx);
  5393         -  if( rc==SQLITE_DONE ){              /* No segments at iLevel. */
  5394         -    *pidx = 0;
  5395         -  }else if( rc==SQLITE_ROW ){
  5396         -    if( *pidx==(MERGE_COUNT-1) ){
  5397         -      rc = segmentMerge(v, iLevel);
  5398         -      if( rc!=SQLITE_OK ) return rc;
  5399         -      *pidx = 0;
  5400         -    }else{
  5401         -      (*pidx)++;
  5402         -    }
  5403         -  }else{
  5404         -    return rc;
  5405         -  }
  5406         -  return SQLITE_OK;
  5407         -}
  5408         -
  5409         -/* Merge MERGE_COUNT segments at iLevel into a new segment at
  5410         -** iLevel+1.  If iLevel+1 is already full of segments, those will be
  5411         -** merged to make room.
  5412         -*/
  5413         -static int segmentMerge(fulltext_vtab *v, int iLevel){
  5414         -  LeafWriter writer;
  5415         -  LeavesReader lrs[MERGE_COUNT];
  5416         -  int i, rc, idx = 0;
  5417         -
  5418         -  /* Determine the next available segment index at the next level,
  5419         -  ** merging as necessary.
  5420         -  */
  5421         -  rc = segdirNextIndex(v, iLevel+1, &idx);
  5422         -  if( rc!=SQLITE_OK ) return rc;
  5423         -
  5424         -  /* TODO(shess) This assumes that we'll always see exactly
  5425         -  ** MERGE_COUNT segments to merge at a given level.  That will be
  5426         -  ** broken if we allow the developer to request preemptive or
  5427         -  ** deferred merging.
  5428         -  */
  5429         -  memset(&lrs, '\0', sizeof(lrs));
  5430         -  rc = leavesReadersInit(v, iLevel, lrs, &i);
  5431         -  if( rc!=SQLITE_OK ) return rc;
  5432         -  assert( i==MERGE_COUNT );
  5433         -
  5434         -  leafWriterInit(iLevel+1, idx, &writer);
  5435         -
  5436         -  /* Since leavesReaderReorder() pushes readers at eof to the end,
  5437         -  ** when the first reader is empty, all will be empty.
  5438         -  */
  5439         -  while( !leavesReaderAtEnd(lrs) ){
  5440         -    /* Figure out how many readers share their next term. */
  5441         -    for(i=1; i<MERGE_COUNT && !leavesReaderAtEnd(lrs+i); i++){
  5442         -      if( 0!=leavesReaderTermCmp(lrs, lrs+i) ) break;
  5443         -    }
  5444         -
  5445         -    rc = leavesReadersMerge(v, lrs, i, &writer);
  5446         -    if( rc!=SQLITE_OK ) goto err;
  5447         -
  5448         -    /* Step forward those that were merged. */
  5449         -    while( i-->0 ){
  5450         -      rc = leavesReaderStep(v, lrs+i);
  5451         -      if( rc!=SQLITE_OK ) goto err;
  5452         -
  5453         -      /* Reorder by term, then by age. */
  5454         -      leavesReaderReorder(lrs+i, MERGE_COUNT-i);
  5455         -    }
  5456         -  }
  5457         -
  5458         -  for(i=0; i<MERGE_COUNT; i++){
  5459         -    leavesReaderDestroy(&lrs[i]);
  5460         -  }
  5461         -
  5462         -  rc = leafWriterFinalize(v, &writer);
  5463         -  leafWriterDestroy(&writer);
  5464         -  if( rc!=SQLITE_OK ) return rc;
  5465         -
  5466         -  /* Delete the merged segment data. */
  5467         -  return segdir_delete(v, iLevel);
  5468         -
  5469         - err:
  5470         -  for(i=0; i<MERGE_COUNT; i++){
  5471         -    leavesReaderDestroy(&lrs[i]);
  5472         -  }
  5473         -  leafWriterDestroy(&writer);
  5474         -  return rc;
  5475         -}
  5476         -
  5477         -/* Accumulate the union of *acc and *pData into *acc. */
  5478         -static void docListAccumulateUnion(DataBuffer *acc,
  5479         -                                   const char *pData, int nData) {
  5480         -  DataBuffer tmp = *acc;
  5481         -  dataBufferInit(acc, tmp.nData+nData);
  5482         -  docListUnion(tmp.pData, tmp.nData, pData, nData, acc);
  5483         -  dataBufferDestroy(&tmp);
  5484         -}
  5485         -
  5486         -/* TODO(shess) It might be interesting to explore different merge
  5487         -** strategies, here.  For instance, since this is a sorted merge, we
  5488         -** could easily merge many doclists in parallel.  With some
  5489         -** comprehension of the storage format, we could merge all of the
  5490         -** doclists within a leaf node directly from the leaf node's storage.
  5491         -** It may be worthwhile to merge smaller doclists before larger
  5492         -** doclists, since they can be traversed more quickly - but the
  5493         -** results may have less overlap, making them more expensive in a
  5494         -** different way.
  5495         -*/
  5496         -
  5497         -/* Scan pReader for pTerm/nTerm, and merge the term's doclist over
  5498         -** *out (any doclists with duplicate docids overwrite those in *out).
  5499         -** Internal function for loadSegmentLeaf().
  5500         -*/
  5501         -static int loadSegmentLeavesInt(fulltext_vtab *v, LeavesReader *pReader,
  5502         -                                const char *pTerm, int nTerm, int isPrefix,
  5503         -                                DataBuffer *out){
  5504         -  /* doclist data is accumulated into pBuffers similar to how one does
  5505         -  ** increment in binary arithmetic.  If index 0 is empty, the data is
  5506         -  ** stored there.  If there is data there, it is merged and the
  5507         -  ** results carried into position 1, with further merge-and-carry
  5508         -  ** until an empty position is found.
  5509         -  */
  5510         -  DataBuffer *pBuffers = NULL;
  5511         -  int nBuffers = 0, nMaxBuffers = 0, rc;
  5512         -
  5513         -  assert( nTerm>0 );
  5514         -
  5515         -  for(rc=SQLITE_OK; rc==SQLITE_OK && !leavesReaderAtEnd(pReader);
  5516         -      rc=leavesReaderStep(v, pReader)){
  5517         -    /* TODO(shess) Really want leavesReaderTermCmp(), but that name is
  5518         -    ** already taken to compare the terms of two LeavesReaders.  Think
  5519         -    ** on a better name.  [Meanwhile, break encapsulation rather than
  5520         -    ** use a confusing name.]
  5521         -    */
  5522         -    int c = leafReaderTermCmp(&pReader->leafReader, pTerm, nTerm, isPrefix);
  5523         -    if( c>0 ) break;      /* Past any possible matches. */
  5524         -    if( c==0 ){
  5525         -      const char *pData = leavesReaderData(pReader);
  5526         -      int iBuffer, nData = leavesReaderDataBytes(pReader);
  5527         -
  5528         -      /* Find the first empty buffer. */
  5529         -      for(iBuffer=0; iBuffer<nBuffers; ++iBuffer){
  5530         -        if( 0==pBuffers[iBuffer].nData ) break;
  5531         -      }
  5532         -
  5533         -      /* Out of buffers, add an empty one. */
  5534         -      if( iBuffer==nBuffers ){
  5535         -        if( nBuffers==nMaxBuffers ){
  5536         -          DataBuffer *p;
  5537         -          nMaxBuffers += 20;
  5538         -
  5539         -          /* Manual realloc so we can handle NULL appropriately. */
  5540         -          p = sqlite3_malloc(nMaxBuffers*sizeof(*pBuffers));
  5541         -          if( p==NULL ){
  5542         -            rc = SQLITE_NOMEM;
  5543         -            break;
  5544         -          }
  5545         -
  5546         -          if( nBuffers>0 ){
  5547         -            assert(pBuffers!=NULL);
  5548         -            memcpy(p, pBuffers, nBuffers*sizeof(*pBuffers));
  5549         -            sqlite3_free(pBuffers);
  5550         -          }
  5551         -          pBuffers = p;
  5552         -        }
  5553         -        dataBufferInit(&(pBuffers[nBuffers]), 0);
  5554         -        nBuffers++;
  5555         -      }
  5556         -
  5557         -      /* At this point, must have an empty at iBuffer. */
  5558         -      assert(iBuffer<nBuffers && pBuffers[iBuffer].nData==0);
  5559         -
  5560         -      /* If empty was first buffer, no need for merge logic. */
  5561         -      if( iBuffer==0 ){
  5562         -        dataBufferReplace(&(pBuffers[0]), pData, nData);
  5563         -      }else{
  5564         -        /* pAcc is the empty buffer the merged data will end up in. */
  5565         -        DataBuffer *pAcc = &(pBuffers[iBuffer]);
  5566         -        DataBuffer *p = &(pBuffers[0]);
  5567         -
  5568         -        /* Handle position 0 specially to avoid need to prime pAcc
  5569         -        ** with pData/nData.
  5570         -        */
  5571         -        dataBufferSwap(p, pAcc);
  5572         -        docListAccumulateUnion(pAcc, pData, nData);
  5573         -
  5574         -        /* Accumulate remaining doclists into pAcc. */
  5575         -        for(++p; p<pAcc; ++p){
  5576         -          docListAccumulateUnion(pAcc, p->pData, p->nData);
  5577         -
  5578         -          /* dataBufferReset() could allow a large doclist to blow up
  5579         -          ** our memory requirements.
  5580         -          */
  5581         -          if( p->nCapacity<1024 ){
  5582         -            dataBufferReset(p);
  5583         -          }else{
  5584         -            dataBufferDestroy(p);
  5585         -            dataBufferInit(p, 0);
  5586         -          }
  5587         -        }
  5588         -      }
  5589         -    }
  5590         -  }
  5591         -
  5592         -  /* Union all the doclists together into *out. */
  5593         -  /* TODO(shess) What if *out is big?  Sigh. */
  5594         -  if( rc==SQLITE_OK && nBuffers>0 ){
  5595         -    int iBuffer;
  5596         -    for(iBuffer=0; iBuffer<nBuffers; ++iBuffer){
  5597         -      if( pBuffers[iBuffer].nData>0 ){
  5598         -        if( out->nData==0 ){
  5599         -          dataBufferSwap(out, &(pBuffers[iBuffer]));
  5600         -        }else{
  5601         -          docListAccumulateUnion(out, pBuffers[iBuffer].pData,
  5602         -                                 pBuffers[iBuffer].nData);
  5603         -        }
  5604         -      }
  5605         -    }
  5606         -  }
  5607         -
  5608         -  while( nBuffers-- ){
  5609         -    dataBufferDestroy(&(pBuffers[nBuffers]));
  5610         -  }
  5611         -  if( pBuffers!=NULL ) sqlite3_free(pBuffers);
  5612         -
  5613         -  return rc;
  5614         -}
  5615         -
  5616         -/* Call loadSegmentLeavesInt() with pData/nData as input. */
  5617         -static int loadSegmentLeaf(fulltext_vtab *v, const char *pData, int nData,
  5618         -                           const char *pTerm, int nTerm, int isPrefix,
  5619         -                           DataBuffer *out){
  5620         -  LeavesReader reader;
  5621         -  int rc;
  5622         -
  5623         -  assert( nData>1 );
  5624         -  assert( *pData=='\0' );
  5625         -  rc = leavesReaderInit(v, 0, 0, 0, pData, nData, &reader);
  5626         -  if( rc!=SQLITE_OK ) return rc;
  5627         -
  5628         -  rc = loadSegmentLeavesInt(v, &reader, pTerm, nTerm, isPrefix, out);
  5629         -  leavesReaderReset(&reader);
  5630         -  leavesReaderDestroy(&reader);
  5631         -  return rc;
  5632         -}
  5633         -
  5634         -/* Call loadSegmentLeavesInt() with the leaf nodes from iStartLeaf to
  5635         -** iEndLeaf (inclusive) as input, and merge the resulting doclist into
  5636         -** out.
  5637         -*/
  5638         -static int loadSegmentLeaves(fulltext_vtab *v,
  5639         -                             sqlite_int64 iStartLeaf, sqlite_int64 iEndLeaf,
  5640         -                             const char *pTerm, int nTerm, int isPrefix,
  5641         -                             DataBuffer *out){
  5642         -  int rc;
  5643         -  LeavesReader reader;
  5644         -
  5645         -  assert( iStartLeaf<=iEndLeaf );
  5646         -  rc = leavesReaderInit(v, 0, iStartLeaf, iEndLeaf, NULL, 0, &reader);
  5647         -  if( rc!=SQLITE_OK ) return rc;
  5648         -
  5649         -  rc = loadSegmentLeavesInt(v, &reader, pTerm, nTerm, isPrefix, out);
  5650         -  leavesReaderReset(&reader);
  5651         -  leavesReaderDestroy(&reader);
  5652         -  return rc;
  5653         -}
  5654         -
  5655         -/* Taking pData/nData as an interior node, find the sequence of child
  5656         -** nodes which could include pTerm/nTerm/isPrefix.  Note that the
  5657         -** interior node terms logically come between the blocks, so there is
  5658         -** one more blockid than there are terms (that block contains terms >=
  5659         -** the last interior-node term).
  5660         -*/
  5661         -/* TODO(shess) The calling code may already know that the end child is
  5662         -** not worth calculating, because the end may be in a later sibling
  5663         -** node.  Consider whether breaking symmetry is worthwhile.  I suspect
  5664         -** it is not worthwhile.
  5665         -*/
  5666         -static void getChildrenContaining(const char *pData, int nData,
  5667         -                                  const char *pTerm, int nTerm, int isPrefix,
  5668         -                                  sqlite_int64 *piStartChild,
  5669         -                                  sqlite_int64 *piEndChild){
  5670         -  InteriorReader reader;
  5671         -
  5672         -  assert( nData>1 );
  5673         -  assert( *pData!='\0' );
  5674         -  interiorReaderInit(pData, nData, &reader);
  5675         -
  5676         -  /* Scan for the first child which could contain pTerm/nTerm. */
  5677         -  while( !interiorReaderAtEnd(&reader) ){
  5678         -    if( interiorReaderTermCmp(&reader, pTerm, nTerm, 0)>0 ) break;
  5679         -    interiorReaderStep(&reader);
  5680         -  }
  5681         -  *piStartChild = interiorReaderCurrentBlockid(&reader);
  5682         -
  5683         -  /* Keep scanning to find a term greater than our term, using prefix
  5684         -  ** comparison if indicated.  If isPrefix is false, this will be the
  5685         -  ** same blockid as the starting block.
  5686         -  */
  5687         -  while( !interiorReaderAtEnd(&reader) ){
  5688         -    if( interiorReaderTermCmp(&reader, pTerm, nTerm, isPrefix)>0 ) break;
  5689         -    interiorReaderStep(&reader);
  5690         -  }
  5691         -  *piEndChild = interiorReaderCurrentBlockid(&reader);
  5692         -
  5693         -  interiorReaderDestroy(&reader);
  5694         -
  5695         -  /* Children must ascend, and if !prefix, both must be the same. */
  5696         -  assert( *piEndChild>=*piStartChild );
  5697         -  assert( isPrefix || *piStartChild==*piEndChild );
  5698         -}
  5699         -
  5700         -/* Read block at iBlockid and pass it with other params to
  5701         -** getChildrenContaining().
  5702         -*/
  5703         -static int loadAndGetChildrenContaining(
  5704         -  fulltext_vtab *v,
  5705         -  sqlite_int64 iBlockid,
  5706         -  const char *pTerm, int nTerm, int isPrefix,
  5707         -  sqlite_int64 *piStartChild, sqlite_int64 *piEndChild
  5708         -){
  5709         -  sqlite3_stmt *s = NULL;
  5710         -  int rc;
  5711         -
  5712         -  assert( iBlockid!=0 );
  5713         -  assert( pTerm!=NULL );
  5714         -  assert( nTerm!=0 );        /* TODO(shess) Why not allow this? */
  5715         -  assert( piStartChild!=NULL );
  5716         -  assert( piEndChild!=NULL );
  5717         -
  5718         -  rc = sql_get_statement(v, BLOCK_SELECT_STMT, &s);
  5719         -  if( rc!=SQLITE_OK ) return rc;
  5720         -
  5721         -  rc = sqlite3_bind_int64(s, 1, iBlockid);
  5722         -  if( rc!=SQLITE_OK ) return rc;
  5723         -
  5724         -  rc = sqlite3_step(s);
  5725         -  if( rc==SQLITE_DONE ) return SQLITE_ERROR;
  5726         -  if( rc!=SQLITE_ROW ) return rc;
  5727         -
  5728         -  getChildrenContaining(sqlite3_column_blob(s, 0), sqlite3_column_bytes(s, 0),
  5729         -                        pTerm, nTerm, isPrefix, piStartChild, piEndChild);
  5730         -
  5731         -  /* We expect only one row.  We must execute another sqlite3_step()
  5732         -   * to complete the iteration; otherwise the table will remain
  5733         -   * locked. */
  5734         -  rc = sqlite3_step(s);
  5735         -  if( rc==SQLITE_ROW ) return SQLITE_ERROR;
  5736         -  if( rc!=SQLITE_DONE ) return rc;
  5737         -
  5738         -  return SQLITE_OK;
  5739         -}
  5740         -
  5741         -/* Traverse the tree represented by pData[nData] looking for
  5742         -** pTerm[nTerm], placing its doclist into *out.  This is internal to
  5743         -** loadSegment() to make error-handling cleaner.
  5744         -*/
  5745         -static int loadSegmentInt(fulltext_vtab *v, const char *pData, int nData,
  5746         -                          sqlite_int64 iLeavesEnd,
  5747         -                          const char *pTerm, int nTerm, int isPrefix,
  5748         -                          DataBuffer *out){
  5749         -  /* Special case where root is a leaf. */
  5750         -  if( *pData=='\0' ){
  5751         -    return loadSegmentLeaf(v, pData, nData, pTerm, nTerm, isPrefix, out);
  5752         -  }else{
  5753         -    int rc;
  5754         -    sqlite_int64 iStartChild, iEndChild;
  5755         -
  5756         -    /* Process pData as an interior node, then loop down the tree
  5757         -    ** until we find the set of leaf nodes to scan for the term.
  5758         -    */
  5759         -    getChildrenContaining(pData, nData, pTerm, nTerm, isPrefix,
  5760         -                          &iStartChild, &iEndChild);
  5761         -    while( iStartChild>iLeavesEnd ){
  5762         -      sqlite_int64 iNextStart, iNextEnd;
  5763         -      rc = loadAndGetChildrenContaining(v, iStartChild, pTerm, nTerm, isPrefix,
  5764         -                                        &iNextStart, &iNextEnd);
  5765         -      if( rc!=SQLITE_OK ) return rc;
  5766         -
  5767         -      /* If we've branched, follow the end branch, too. */
  5768         -      if( iStartChild!=iEndChild ){
  5769         -        sqlite_int64 iDummy;
  5770         -        rc = loadAndGetChildrenContaining(v, iEndChild, pTerm, nTerm, isPrefix,
  5771         -                                          &iDummy, &iNextEnd);
  5772         -        if( rc!=SQLITE_OK ) return rc;
  5773         -      }
  5774         -
  5775         -      assert( iNextStart<=iNextEnd );
  5776         -      iStartChild = iNextStart;
  5777         -      iEndChild = iNextEnd;
  5778         -    }
  5779         -    assert( iStartChild<=iLeavesEnd );
  5780         -    assert( iEndChild<=iLeavesEnd );
  5781         -
  5782         -    /* Scan through the leaf segments for doclists. */
  5783         -    return loadSegmentLeaves(v, iStartChild, iEndChild,
  5784         -                             pTerm, nTerm, isPrefix, out);
  5785         -  }
  5786         -}
  5787         -
  5788         -/* Call loadSegmentInt() to collect the doclist for pTerm/nTerm, then
  5789         -** merge its doclist over *out (any duplicate doclists read from the
  5790         -** segment rooted at pData will overwrite those in *out).
  5791         -*/
  5792         -/* TODO(shess) Consider changing this to determine the depth of the
  5793         -** leaves using either the first characters of interior nodes (when
  5794         -** ==1, we're one level above the leaves), or the first character of
  5795         -** the root (which will describe the height of the tree directly).
  5796         -** Either feels somewhat tricky to me.
  5797         -*/
  5798         -/* TODO(shess) The current merge is likely to be slow for large
  5799         -** doclists (though it should process from newest/smallest to
  5800         -** oldest/largest, so it may not be that bad).  It might be useful to
  5801         -** modify things to allow for N-way merging.  This could either be
  5802         -** within a segment, with pairwise merges across segments, or across
  5803         -** all segments at once.
  5804         -*/
  5805         -static int loadSegment(fulltext_vtab *v, const char *pData, int nData,
  5806         -                       sqlite_int64 iLeavesEnd,
  5807         -                       const char *pTerm, int nTerm, int isPrefix,
  5808         -                       DataBuffer *out){
  5809         -  DataBuffer result;
  5810         -  int rc;
  5811         -
  5812         -  assert( nData>1 );
  5813         -
  5814         -  /* This code should never be called with buffered updates. */
  5815         -  assert( v->nPendingData<0 );
  5816         -
  5817         -  dataBufferInit(&result, 0);
  5818         -  rc = loadSegmentInt(v, pData, nData, iLeavesEnd,
  5819         -                      pTerm, nTerm, isPrefix, &result);
  5820         -  if( rc==SQLITE_OK && result.nData>0 ){
  5821         -    if( out->nData==0 ){
  5822         -      DataBuffer tmp = *out;
  5823         -      *out = result;
  5824         -      result = tmp;
  5825         -    }else{
  5826         -      DataBuffer merged;
  5827         -      DLReader readers[2];
  5828         -
  5829         -      dlrInit(&readers[0], DL_DEFAULT, out->pData, out->nData);
  5830         -      dlrInit(&readers[1], DL_DEFAULT, result.pData, result.nData);
  5831         -      dataBufferInit(&merged, out->nData+result.nData);
  5832         -      docListMerge(&merged, readers, 2);
  5833         -      dataBufferDestroy(out);
  5834         -      *out = merged;
  5835         -      dlrDestroy(&readers[0]);
  5836         -      dlrDestroy(&readers[1]);
  5837         -    }
  5838         -  }
  5839         -  dataBufferDestroy(&result);
  5840         -  return rc;
  5841         -}
  5842         -
  5843         -/* Scan the database and merge together the posting lists for the term
  5844         -** into *out.
  5845         -*/
  5846         -static int termSelect(
  5847         -  fulltext_vtab *v, 
  5848         -  int iColumn,
  5849         -  const char *pTerm, int nTerm,             /* Term to query for */
  5850         -  int isPrefix,                             /* True for a prefix search */
  5851         -  DocListType iType, 
  5852         -  DataBuffer *out                           /* Write results here */
  5853         -){
  5854         -  DataBuffer doclist;
  5855         -  sqlite3_stmt *s;
  5856         -  int rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s);
  5857         -  if( rc!=SQLITE_OK ) return rc;
  5858         -
  5859         -  /* This code should never be called with buffered updates. */
  5860         -  assert( v->nPendingData<0 );
  5861         -
  5862         -  dataBufferInit(&doclist, 0);
  5863         -  dataBufferInit(out, 0);
  5864         -
  5865         -  /* Traverse the segments from oldest to newest so that newer doclist
  5866         -  ** elements for given docids overwrite older elements.
  5867         -  */
  5868         -  while( (rc = sqlite3_step(s))==SQLITE_ROW ){
  5869         -    const char *pData = sqlite3_column_blob(s, 2);
  5870         -    const int nData = sqlite3_column_bytes(s, 2);
  5871         -    const sqlite_int64 iLeavesEnd = sqlite3_column_int64(s, 1);
  5872         -    rc = loadSegment(v, pData, nData, iLeavesEnd, pTerm, nTerm, isPrefix,
  5873         -                     &doclist);
  5874         -    if( rc!=SQLITE_OK ) goto err;
  5875         -  }
  5876         -  if( rc==SQLITE_DONE ){
  5877         -    if( doclist.nData!=0 ){
  5878         -      /* TODO(shess) The old term_select_all() code applied the column
  5879         -      ** restrict as we merged segments, leading to smaller buffers.
  5880         -      ** This is probably worthwhile to bring back, once the new storage
  5881         -      ** system is checked in.
  5882         -      */
  5883         -      if( iColumn==v->nColumn) iColumn = -1;
  5884         -      docListTrim(DL_DEFAULT, doclist.pData, doclist.nData,
  5885         -                  iColumn, iType, out);
  5886         -    }
  5887         -    rc = SQLITE_OK;
  5888         -  }
  5889         -
  5890         - err:
  5891         -  dataBufferDestroy(&doclist);
  5892         -  return rc;
  5893         -}
  5894         -
  5895         -/****************************************************************/
  5896         -/* Used to hold hashtable data for sorting. */
  5897         -typedef struct TermData {
  5898         -  const char *pTerm;
  5899         -  int nTerm;
  5900         -  DLCollector *pCollector;
  5901         -} TermData;
  5902         -
  5903         -/* Orders TermData elements in strcmp fashion ( <0 for less-than, 0
  5904         -** for equal, >0 for greater-than).
  5905         -*/
  5906         -static int termDataCmp(const void *av, const void *bv){
  5907         -  const TermData *a = (const TermData *)av;
  5908         -  const TermData *b = (const TermData *)bv;
  5909         -  int n = a->nTerm<b->nTerm ? a->nTerm : b->nTerm;
  5910         -  int c = memcmp(a->pTerm, b->pTerm, n);
  5911         -  if( c!=0 ) return c;
  5912         -  return a->nTerm-b->nTerm;
  5913         -}
  5914         -
  5915         -/* Order pTerms data by term, then write a new level 0 segment using
  5916         -** LeafWriter.
  5917         -*/
  5918         -static int writeZeroSegment(fulltext_vtab *v, fts3Hash *pTerms){
  5919         -  fts3HashElem *e;
  5920         -  int idx, rc, i, n;
  5921         -  TermData *pData;
  5922         -  LeafWriter writer;
  5923         -  DataBuffer dl;
  5924         -
  5925         -  /* Determine the next index at level 0, merging as necessary. */
  5926         -  rc = segdirNextIndex(v, 0, &idx);
  5927         -  if( rc!=SQLITE_OK ) return rc;
  5928         -
  5929         -  n = fts3HashCount(pTerms);
  5930         -  pData = sqlite3_malloc(n*sizeof(TermData));
  5931         -
  5932         -  for(i = 0, e = fts3HashFirst(pTerms); e; i++, e = fts3HashNext(e)){
  5933         -    assert( i<n );
  5934         -    pData[i].pTerm = fts3HashKey(e);
  5935         -    pData[i].nTerm = fts3HashKeysize(e);
  5936         -    pData[i].pCollector = fts3HashData(e);
  5937         -  }
  5938         -  assert( i==n );
  5939         -
  5940         -  /* TODO(shess) Should we allow user-defined collation sequences,
  5941         -  ** here?  I think we only need that once we support prefix searches.
  5942         -  */
  5943         -  if( n>1 ) qsort(pData, n, sizeof(*pData), termDataCmp);
  5944         -
  5945         -  /* TODO(shess) Refactor so that we can write directly to the segment
  5946         -  ** DataBuffer, as happens for segment merges.
  5947         -  */
  5948         -  leafWriterInit(0, idx, &writer);
  5949         -  dataBufferInit(&dl, 0);
  5950         -  for(i=0; i<n; i++){
  5951         -    dataBufferReset(&dl);
  5952         -    dlcAddDoclist(pData[i].pCollector, &dl);
  5953         -    rc = leafWriterStep(v, &writer,
  5954         -                        pData[i].pTerm, pData[i].nTerm, dl.pData, dl.nData);
  5955         -    if( rc!=SQLITE_OK ) goto err;
  5956         -  }
  5957         -  rc = leafWriterFinalize(v, &writer);
  5958         -
  5959         - err:
  5960         -  dataBufferDestroy(&dl);
  5961         -  sqlite3_free(pData);
  5962         -  leafWriterDestroy(&writer);
  5963         -  return rc;
  5964         -}
  5965         -
  5966         -/* If pendingTerms has data, free it. */
  5967         -static int clearPendingTerms(fulltext_vtab *v){
  5968         -  if( v->nPendingData>=0 ){
  5969         -    fts3HashElem *e;
  5970         -    for(e=fts3HashFirst(&v->pendingTerms); e; e=fts3HashNext(e)){
  5971         -      dlcDelete(fts3HashData(e));
  5972         -    }
  5973         -    fts3HashClear(&v->pendingTerms);
  5974         -    v->nPendingData = -1;
  5975         -  }
  5976         -  return SQLITE_OK;
  5977         -}
  5978         -
  5979         -/* If pendingTerms has data, flush it to a level-zero segment, and
  5980         -** free it.
  5981         -*/
  5982         -static int flushPendingTerms(fulltext_vtab *v){
  5983         -  if( v->nPendingData>=0 ){
  5984         -    int rc = writeZeroSegment(v, &v->pendingTerms);
  5985         -    if( rc==SQLITE_OK ) clearPendingTerms(v);
  5986         -    return rc;
  5987         -  }
  5988         -  return SQLITE_OK;
  5989         -}
  5990         -
  5991         -/* If pendingTerms is "too big", or docid is out of order, flush it.
  5992         -** Regardless, be certain that pendingTerms is initialized for use.
  5993         -*/
  5994         -static int initPendingTerms(fulltext_vtab *v, sqlite_int64 iDocid){
  5995         -  /* TODO(shess) Explore whether partially flushing the buffer on
  5996         -  ** forced-flush would provide better performance.  I suspect that if
  5997         -  ** we ordered the doclists by size and flushed the largest until the
  5998         -  ** buffer was half empty, that would let the less frequent terms
  5999         -  ** generate longer doclists.
  6000         -  */
  6001         -  if( iDocid<=v->iPrevDocid || v->nPendingData>kPendingThreshold ){
  6002         -    int rc = flushPendingTerms(v);
  6003         -    if( rc!=SQLITE_OK ) return rc;
  6004         -  }
  6005         -  if( v->nPendingData<0 ){
  6006         -    fts3HashInit(&v->pendingTerms, FTS3_HASH_STRING, 1);
  6007         -    v->nPendingData = 0;
  6008         -  }
  6009         -  v->iPrevDocid = iDocid;
  6010         -  return SQLITE_OK;
  6011         -}
  6012         -
  6013         -/* This function implements the xUpdate callback; it is the top-level entry
  6014         - * point for inserting, deleting or updating a row in a full-text table. */
  6015         -static int fulltextUpdate(sqlite3_vtab *pVtab, int nArg, sqlite3_value **ppArg,
  6016         -                          sqlite_int64 *pRowid){
  6017         -  fulltext_vtab *v = (fulltext_vtab *) pVtab;
  6018         -  int rc;
  6019         -
  6020         -  FTSTRACE(("FTS3 Update %p\n", pVtab));
  6021         -
  6022         -  if( nArg<2 ){
  6023         -    rc = index_delete(v, sqlite3_value_int64(ppArg[0]));
  6024         -    if( rc==SQLITE_OK ){
  6025         -      /* If we just deleted the last row in the table, clear out the
  6026         -      ** index data.
  6027         -      */
  6028         -      rc = content_exists(v);
  6029         -      if( rc==SQLITE_ROW ){
  6030         -        rc = SQLITE_OK;
  6031         -      }else if( rc==SQLITE_DONE ){
  6032         -        /* Clear the pending terms so we don't flush a useless level-0
  6033         -        ** segment when the transaction closes.
  6034         -        */
  6035         -        rc = clearPendingTerms(v);
  6036         -        if( rc==SQLITE_OK ){
  6037         -          rc = segdir_delete_all(v);
  6038         -        }
  6039         -      }
  6040         -    }
  6041         -  } else if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){
  6042         -    /* An update:
  6043         -     * ppArg[0] = old rowid
  6044         -     * ppArg[1] = new rowid
  6045         -     * ppArg[2..2+v->nColumn-1] = values
  6046         -     * ppArg[2+v->nColumn] = value for magic column (we ignore this)
  6047         -     * ppArg[2+v->nColumn+1] = value for docid
  6048         -     */
  6049         -    sqlite_int64 rowid = sqlite3_value_int64(ppArg[0]);
  6050         -    if( sqlite3_value_type(ppArg[1]) != SQLITE_INTEGER ||
  6051         -        sqlite3_value_int64(ppArg[1]) != rowid ){
  6052         -      rc = SQLITE_ERROR;  /* we don't allow changing the rowid */
  6053         -    }else if( sqlite3_value_type(ppArg[2+v->nColumn+1]) != SQLITE_INTEGER ||
  6054         -              sqlite3_value_int64(ppArg[2+v->nColumn+1]) != rowid ){
  6055         -      rc = SQLITE_ERROR;  /* we don't allow changing the docid */
  6056         -    }else{
  6057         -      assert( nArg==2+v->nColumn+2);
  6058         -      rc = index_update(v, rowid, &ppArg[2]);
  6059         -    }
  6060         -  } else {
  6061         -    /* An insert:
  6062         -     * ppArg[1] = requested rowid
  6063         -     * ppArg[2..2+v->nColumn-1] = values
  6064         -     * ppArg[2+v->nColumn] = value for magic column (we ignore this)
  6065         -     * ppArg[2+v->nColumn+1] = value for docid
  6066         -     */
  6067         -    sqlite3_value *pRequestDocid = ppArg[2+v->nColumn+1];
  6068         -    assert( nArg==2+v->nColumn+2);
  6069         -    if( SQLITE_NULL != sqlite3_value_type(pRequestDocid) &&
  6070         -        SQLITE_NULL != sqlite3_value_type(ppArg[1]) ){
  6071         -      /* TODO(shess) Consider allowing this to work if the values are
  6072         -      ** identical.  I'm inclined to discourage that usage, though,
  6073         -      ** given that both rowid and docid are special columns.  Better
  6074         -      ** would be to define one or the other as the default winner,
  6075         -      ** but should it be fts3-centric (docid) or SQLite-centric
  6076         -      ** (rowid)?
  6077         -      */
  6078         -      rc = SQLITE_ERROR;
  6079         -    }else{
  6080         -      if( SQLITE_NULL == sqlite3_value_type(pRequestDocid) ){
  6081         -        pRequestDocid = ppArg[1];
  6082         -      }
  6083         -      rc = index_insert(v, pRequestDocid, &ppArg[2], pRowid);
  6084         -    }
  6085         -  }
  6086         -
  6087         -  return rc;
  6088         -}
  6089         -
  6090         -static int fulltextSync(sqlite3_vtab *pVtab){
  6091         -  FTSTRACE(("FTS3 xSync()\n"));
  6092         -  return flushPendingTerms((fulltext_vtab *)pVtab);
  6093         -}
  6094         -
  6095         -static int fulltextBegin(sqlite3_vtab *pVtab){
  6096         -  fulltext_vtab *v = (fulltext_vtab *) pVtab;
  6097         -  FTSTRACE(("FTS3 xBegin()\n"));
  6098         -
  6099         -  /* Any buffered updates should have been cleared by the previous
  6100         -  ** transaction.
  6101         -  */
  6102         -  assert( v->nPendingData<0 );
  6103         -  return clearPendingTerms(v);
  6104         -}
  6105         -
  6106         -static int fulltextCommit(sqlite3_vtab *pVtab){
  6107         -  fulltext_vtab *v = (fulltext_vtab *) pVtab;
  6108         -  FTSTRACE(("FTS3 xCommit()\n"));
  6109         -
  6110         -  /* Buffered updates should have been cleared by fulltextSync(). */
  6111         -  assert( v->nPendingData<0 );
  6112         -  return clearPendingTerms(v);
  6113         -}
  6114         -
  6115         -static int fulltextRollback(sqlite3_vtab *pVtab){
  6116         -  FTSTRACE(("FTS3 xRollback()\n"));
  6117         -  return clearPendingTerms((fulltext_vtab *)pVtab);
         1937  +  memcpy(&pRet, sqlite3_value_blob(pVal), sizeof(Fts3Cursor *));
         1938  +  *ppCsr = pRet;
         1939  +  return SQLITE_OK;
  6118   1940   }
  6119   1941   
  6120   1942   /*
  6121   1943   ** Implementation of the snippet() function for FTS3
  6122   1944   */
  6123         -static void snippetFunc(
         1945  +static void fts3SnippetFunc(
  6124   1946     sqlite3_context *pContext,
  6125   1947     int argc,
  6126   1948     sqlite3_value **argv
  6127   1949   ){
  6128         -  fulltext_cursor *pCursor;
  6129         -  if( argc<1 ) return;
  6130         -  if( sqlite3_value_type(argv[0])!=SQLITE_BLOB ||
  6131         -      sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){
  6132         -    sqlite3_result_error(pContext, "illegal first argument to html_snippet",-1);
  6133         -  }else{
  6134         -    const char *zStart = "<b>";
  6135         -    const char *zEnd = "</b>";
  6136         -    const char *zEllipsis = "<b>...</b>";
  6137         -    memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor));
  6138         -    if( argc>=2 ){
  6139         -      zStart = (const char*)sqlite3_value_text(argv[1]);
  6140         -      if( argc>=3 ){
  6141         -        zEnd = (const char*)sqlite3_value_text(argv[2]);
  6142         -        if( argc>=4 ){
  6143         -          zEllipsis = (const char*)sqlite3_value_text(argv[3]);
  6144         -        }
  6145         -      }
  6146         -    }
  6147         -    snippetAllOffsets(pCursor);
  6148         -    snippetText(pCursor, zStart, zEnd, zEllipsis);
  6149         -    sqlite3_result_text(pContext, pCursor->snippet.zSnippet,
  6150         -                        pCursor->snippet.nSnippet, SQLITE_STATIC);
         1950  +  Fts3Cursor *pCsr;               /* Cursor handle passed through apVal[0] */
         1951  +  const char *zStart = "<b>";
         1952  +  const char *zEnd = "</b>";
         1953  +  const char *zEllipsis = "<b>...</b>";
         1954  +
         1955  +  if( argc<1 || argc>4 ) return;
         1956  +  if( fts3FunctionArg(pContext, "snippet", argv[0], &pCsr) ) return;
         1957  +
         1958  +  switch( argc ){
         1959  +    case 4: zEllipsis = (const char*)sqlite3_value_text(argv[3]);
         1960  +    case 3: zEnd = (const char*)sqlite3_value_text(argv[2]);
         1961  +    case 2: zStart = (const char*)sqlite3_value_text(argv[1]);
  6151   1962     }
         1963  +
         1964  +  sqlite3Fts3Snippet(pContext, pCsr, zStart, zEnd, zEllipsis);
  6152   1965   }
  6153   1966   
  6154   1967   /*
  6155   1968   ** Implementation of the offsets() function for FTS3
  6156   1969   */
  6157         -static void snippetOffsetsFunc(
  6158         -  sqlite3_context *pContext,
  6159         -  int argc,
  6160         -  sqlite3_value **argv
         1970  +static void fts3OffsetsFunc(
         1971  +  sqlite3_context *pContext,      /* SQLite function call context */
         1972  +  int nVal,                       /* Size of argument array */
         1973  +  sqlite3_value **apVal           /* Array of arguments */
  6161   1974   ){
  6162         -  fulltext_cursor *pCursor;
  6163         -  if( argc<1 ) return;
  6164         -  if( sqlite3_value_type(argv[0])!=SQLITE_BLOB ||
  6165         -      sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){
  6166         -    sqlite3_result_error(pContext, "illegal first argument to offsets",-1);
  6167         -  }else{
  6168         -    memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor));
  6169         -    snippetAllOffsets(pCursor);
  6170         -    snippetOffsetText(&pCursor->snippet);
  6171         -    sqlite3_result_text(pContext,
  6172         -                        pCursor->snippet.zOffset, pCursor->snippet.nOffset,
  6173         -                        SQLITE_STATIC);
  6174         -  }
         1975  +  Fts3Cursor *pCsr;               /* Cursor handle passed through apVal[0] */
         1976  +
         1977  +  assert( nVal==1 );
         1978  +  if( fts3FunctionArg(pContext, "offsets", apVal[0], &pCsr) ) return;
         1979  +  assert( pCsr );
         1980  +  sqlite3Fts3Offsets(pContext, pCsr);
  6175   1981   }
  6176   1982   
  6177         -/* OptLeavesReader is nearly identical to LeavesReader, except that
  6178         -** where LeavesReader is geared towards the merging of complete
  6179         -** segment levels (with exactly MERGE_COUNT segments), OptLeavesReader
  6180         -** is geared towards implementation of the optimize() function, and
  6181         -** can merge all segments simultaneously.  This version may be
  6182         -** somewhat less efficient than LeavesReader because it merges into an
  6183         -** accumulator rather than doing an N-way merge, but since segment
  6184         -** size grows exponentially (so segment count logrithmically) this is
  6185         -** probably not an immediate problem.
         1983  +/* 
         1984  +** Implementation of the special optimize() function for FTS3. This 
         1985  +** function merges all segments in the database to a single segment.
         1986  +** Example usage is:
         1987  +**
         1988  +**   SELECT optimize(t) FROM t LIMIT 1;
         1989  +**
         1990  +** where 't' is the name of an FTS3 table.
  6186   1991   */
  6187         -/* TODO(shess): Prove that assertion, or extend the merge code to
  6188         -** merge tree fashion (like the prefix-searching code does).
  6189         -*/
  6190         -/* TODO(shess): OptLeavesReader and LeavesReader could probably be
  6191         -** merged with little or no loss of performance for LeavesReader.  The
  6192         -** merged code would need to handle >MERGE_COUNT segments, and would
  6193         -** also need to be able to optionally optimize away deletes.
  6194         -*/
  6195         -typedef struct OptLeavesReader {
  6196         -  /* Segment number, to order readers by age. */
  6197         -  int segment;
  6198         -  LeavesReader reader;
  6199         -} OptLeavesReader;
  6200         -
  6201         -static int optLeavesReaderAtEnd(OptLeavesReader *pReader){
  6202         -  return leavesReaderAtEnd(&pReader->reader);
  6203         -}
  6204         -static int optLeavesReaderTermBytes(OptLeavesReader *pReader){
  6205         -  return leavesReaderTermBytes(&pReader->reader);
  6206         -}
  6207         -static const char *optLeavesReaderData(OptLeavesReader *pReader){
  6208         -  return leavesReaderData(&pReader->reader);
  6209         -}
  6210         -static int optLeavesReaderDataBytes(OptLeavesReader *pReader){
  6211         -  return leavesReaderDataBytes(&pReader->reader);
  6212         -}
  6213         -static const char *optLeavesReaderTerm(OptLeavesReader *pReader){
  6214         -  return leavesReaderTerm(&pReader->reader);
  6215         -}
  6216         -static int optLeavesReaderStep(fulltext_vtab *v, OptLeavesReader *pReader){
  6217         -  return leavesReaderStep(v, &pReader->reader);
  6218         -}
  6219         -static int optLeavesReaderTermCmp(OptLeavesReader *lr1, OptLeavesReader *lr2){
  6220         -  return leavesReaderTermCmp(&lr1->reader, &lr2->reader);
  6221         -}
  6222         -/* Order by term ascending, segment ascending (oldest to newest), with
  6223         -** exhausted readers to the end.
  6224         -*/
  6225         -static int optLeavesReaderCmp(OptLeavesReader *lr1, OptLeavesReader *lr2){
  6226         -  int c = optLeavesReaderTermCmp(lr1, lr2);
  6227         -  if( c!=0 ) return c;
  6228         -  return lr1->segment-lr2->segment;
  6229         -}
  6230         -/* Bubble pLr[0] to appropriate place in pLr[1..nLr-1].  Assumes that
  6231         -** pLr[1..nLr-1] is already sorted.
  6232         -*/
  6233         -static void optLeavesReaderReorder(OptLeavesReader *pLr, int nLr){
  6234         -  while( nLr>1 && optLeavesReaderCmp(pLr, pLr+1)>0 ){
  6235         -    OptLeavesReader tmp = pLr[0];
  6236         -    pLr[0] = pLr[1];
  6237         -    pLr[1] = tmp;
  6238         -    nLr--;
  6239         -    pLr++;
         1992  +static void fts3OptimizeFunc(
         1993  +  sqlite3_context *pContext,      /* SQLite function call context */
         1994  +  int nVal,                       /* Size of argument array */
         1995  +  sqlite3_value **apVal           /* Array of arguments */
         1996  +){
         1997  +  int rc;                         /* Return code */
         1998  +  Fts3Table *p;                   /* Virtual table handle */
         1999  +  Fts3Cursor *pCursor;            /* Cursor handle passed through apVal[0] */
         2000  +
         2001  +  assert( nVal==1 );
         2002  +  if( fts3FunctionArg(pContext, "optimize", apVal[0], &pCursor) ) return;
         2003  +  p = (Fts3Table *)pCursor->base.pVtab;
         2004  +  assert( p );
         2005  +
         2006  +  rc = sqlite3Fts3Optimize(p);
         2007  +
         2008  +  switch( rc ){
         2009  +    case SQLITE_OK:
         2010  +      sqlite3_result_text(pContext, "Index optimized", -1, SQLITE_STATIC);
         2011  +      break;
         2012  +    case SQLITE_DONE:
         2013  +      sqlite3_result_text(pContext, "Index already optimal", -1, SQLITE_STATIC);
         2014  +      break;
         2015  +    default:
         2016  +      sqlite3_result_error_code(pContext, rc);
         2017  +      break;
  6240   2018     }
  6241   2019   }
  6242   2020   
  6243         -/* optimize() helper function.  Put the readers in order and iterate
  6244         -** through them, merging doclists for matching terms into pWriter.
  6245         -** Returns SQLITE_OK on success, or the SQLite error code which
  6246         -** prevented success.
  6247         -*/
  6248         -static int optimizeInternal(fulltext_vtab *v,
  6249         -                            OptLeavesReader *readers, int nReaders,
  6250         -                            LeafWriter *pWriter){
  6251         -  int i, rc = SQLITE_OK;
  6252         -  DataBuffer doclist, merged, tmp;
  6253         -
  6254         -  /* Order the readers. */
  6255         -  i = nReaders;
  6256         -  while( i-- > 0 ){
  6257         -    optLeavesReaderReorder(&readers[i], nReaders-i);
  6258         -  }
  6259         -
  6260         -  dataBufferInit(&doclist, LEAF_MAX);
  6261         -  dataBufferInit(&merged, LEAF_MAX);
  6262         -
  6263         -  /* Exhausted readers bubble to the end, so when the first reader is
  6264         -  ** at eof, all are at eof.
  6265         -  */
  6266         -  while( !optLeavesReaderAtEnd(&readers[0]) ){
  6267         -
  6268         -    /* Figure out how many readers share the next term. */
  6269         -    for(i=1; i<nReaders && !optLeavesReaderAtEnd(&readers[i]); i++){
  6270         -      if( 0!=optLeavesReaderTermCmp(&readers[0], &readers[i]) ) break;
  6271         -    }
  6272         -
  6273         -    /* Special-case for no merge. */
  6274         -    if( i==1 ){
  6275         -      /* Trim deletions from the doclist. */
  6276         -      dataBufferReset(&merged);
  6277         -      docListTrim(DL_DEFAULT,
  6278         -                  optLeavesReaderData(&readers[0]),
  6279         -                  optLeavesReaderDataBytes(&readers[0]),
  6280         -                  -1, DL_DEFAULT, &merged);
  6281         -    }else{
  6282         -      DLReader dlReaders[MERGE_COUNT];
  6283         -      int iReader, nReaders;
  6284         -
  6285         -      /* Prime the pipeline with the first reader's doclist.  After
  6286         -      ** one pass index 0 will reference the accumulated doclist.
  6287         -      */
  6288         -      dlrInit(&dlReaders[0], DL_DEFAULT,
  6289         -              optLeavesReaderData(&readers[0]),
  6290         -              optLeavesReaderDataBytes(&readers[0]));
  6291         -      iReader = 1;
  6292         -
  6293         -      assert( iReader<i );  /* Must execute the loop at least once. */
  6294         -      while( iReader<i ){
  6295         -        /* Merge 16 inputs per pass. */
  6296         -        for( nReaders=1; iReader<i && nReaders<MERGE_COUNT;
  6297         -             iReader++, nReaders++ ){
  6298         -          dlrInit(&dlReaders[nReaders], DL_DEFAULT,
  6299         -                  optLeavesReaderData(&readers[iReader]),
  6300         -                  optLeavesReaderDataBytes(&readers[iReader]));
  6301         -        }
  6302         -
  6303         -        /* Merge doclists and swap result into accumulator. */
  6304         -        dataBufferReset(&merged);
  6305         -        docListMerge(&merged, dlReaders, nReaders);
  6306         -        tmp = merged;
  6307         -        merged = doclist;
  6308         -        doclist = tmp;
  6309         -
  6310         -        while( nReaders-- > 0 ){
  6311         -          dlrDestroy(&dlReaders[nReaders]);
  6312         -        }
  6313         -
  6314         -        /* Accumulated doclist to reader 0 for next pass. */
  6315         -        dlrInit(&dlReaders[0], DL_DEFAULT, doclist.pData, doclist.nData);
  6316         -      }
  6317         -
  6318         -      /* Destroy reader that was left in the pipeline. */
  6319         -      dlrDestroy(&dlReaders[0]);
  6320         -
  6321         -      /* Trim deletions from the doclist. */
  6322         -      dataBufferReset(&merged);
  6323         -      docListTrim(DL_DEFAULT, doclist.pData, doclist.nData,
  6324         -                  -1, DL_DEFAULT, &merged);
  6325         -    }
  6326         -
  6327         -    /* Only pass doclists with hits (skip if all hits deleted). */
  6328         -    if( merged.nData>0 ){
  6329         -      rc = leafWriterStep(v, pWriter,
  6330         -                          optLeavesReaderTerm(&readers[0]),
  6331         -                          optLeavesReaderTermBytes(&readers[0]),
  6332         -                          merged.pData, merged.nData);
  6333         -      if( rc!=SQLITE_OK ) goto err;
  6334         -    }
  6335         -
  6336         -    /* Step merged readers to next term and reorder. */
  6337         -    while( i-- > 0 ){
  6338         -      rc = optLeavesReaderStep(v, &readers[i]);
  6339         -      if( rc!=SQLITE_OK ) goto err;
  6340         -
  6341         -      optLeavesReaderReorder(&readers[i], nReaders-i);
  6342         -    }
  6343         -  }
  6344         -
  6345         - err:
  6346         -  dataBufferDestroy(&doclist);
  6347         -  dataBufferDestroy(&merged);
  6348         -  return rc;
  6349         -}
  6350         -
  6351         -/* Implement optimize() function for FTS3.  optimize(t) merges all
  6352         -** segments in the fts index into a single segment.  't' is the magic
  6353         -** table-named column.
  6354         -*/
  6355         -static void optimizeFunc(sqlite3_context *pContext,
  6356         -                         int argc, sqlite3_value **argv){
  6357         -  fulltext_cursor *pCursor;
  6358         -  if( argc>1 ){
  6359         -    sqlite3_result_error(pContext, "excess arguments to optimize()",-1);
  6360         -  }else if( sqlite3_value_type(argv[0])!=SQLITE_BLOB ||
  6361         -            sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){
  6362         -    sqlite3_result_error(pContext, "illegal first argument to optimize",-1);
  6363         -  }else{
  6364         -    fulltext_vtab *v;
  6365         -    int i, rc, iMaxLevel;
  6366         -    OptLeavesReader *readers;
  6367         -    int nReaders;
  6368         -    LeafWriter writer;
  6369         -    sqlite3_stmt *s;
  6370         -
  6371         -    memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor));
  6372         -    v = cursor_vtab(pCursor);
  6373         -
  6374         -    /* Flush any buffered updates before optimizing. */
  6375         -    rc = flushPendingTerms(v);
  6376         -    if( rc!=SQLITE_OK ) goto err;
  6377         -
  6378         -    rc = segdir_count(v, &nReaders, &iMaxLevel);
  6379         -    if( rc!=SQLITE_OK ) goto err;
  6380         -    if( nReaders==0 || nReaders==1 ){
  6381         -      sqlite3_result_text(pContext, "Index already optimal", -1,
  6382         -                          SQLITE_STATIC);
  6383         -      return;
  6384         -    }
  6385         -
  6386         -    rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s);
  6387         -    if( rc!=SQLITE_OK ) goto err;
  6388         -
  6389         -    readers = sqlite3_malloc(nReaders*sizeof(readers[0]));
  6390         -    if( readers==NULL ) goto err;
  6391         -
  6392         -    /* Note that there will already be a segment at this position
  6393         -    ** until we call segdir_delete() on iMaxLevel.
  6394         -    */
  6395         -    leafWriterInit(iMaxLevel, 0, &writer);
  6396         -
  6397         -    i = 0;
  6398         -    while( (rc = sqlite3_step(s))==SQLITE_ROW ){
  6399         -      sqlite_int64 iStart = sqlite3_column_int64(s, 0);
  6400         -      sqlite_int64 iEnd = sqlite3_column_int64(s, 1);
  6401         -      const char *pRootData = sqlite3_column_blob(s, 2);
  6402         -      int nRootData = sqlite3_column_bytes(s, 2);
  6403         -
  6404         -      assert( i<nReaders );
  6405         -      rc = leavesReaderInit(v, -1, iStart, iEnd, pRootData, nRootData,
  6406         -                            &readers[i].reader);
  6407         -      if( rc!=SQLITE_OK ) break;
  6408         -
  6409         -      readers[i].segment = i;
  6410         -      i++;
  6411         -    }
  6412         -
  6413         -    /* If we managed to successfully read them all, optimize them. */
  6414         -    if( rc==SQLITE_DONE ){
  6415         -      assert( i==nReaders );
  6416         -      rc = optimizeInternal(v, readers, nReaders, &writer);
  6417         -    }
  6418         -
  6419         -    while( i-- > 0 ){
  6420         -      leavesReaderDestroy(&readers[i].reader);
  6421         -    }
  6422         -    sqlite3_free(readers);
  6423         -
  6424         -    /* If we've successfully gotten to here, delete the old segments
  6425         -    ** and flush the interior structure of the new segment.
  6426         -    */
  6427         -    if( rc==SQLITE_OK ){
  6428         -      for( i=0; i<=iMaxLevel; i++ ){
  6429         -        rc = segdir_delete(v, i);
  6430         -        if( rc!=SQLITE_OK ) break;
  6431         -      }
  6432         -
  6433         -      if( rc==SQLITE_OK ) rc = leafWriterFinalize(v, &writer);
  6434         -    }
  6435         -
  6436         -    leafWriterDestroy(&writer);
  6437         -
  6438         -    if( rc!=SQLITE_OK ) goto err;
  6439         -
  6440         -    sqlite3_result_text(pContext, "Index optimized", -1, SQLITE_STATIC);
  6441         -    return;
  6442         -
  6443         -    /* TODO(shess): Error-handling needs to be improved along the
  6444         -    ** lines of the dump_ functions.
  6445         -    */
  6446         - err:
  6447         -    {
  6448         -      char buf[512];
  6449         -      sqlite3_snprintf(sizeof(buf), buf, "Error in optimize: %s",
  6450         -                       sqlite3_errmsg(sqlite3_context_db_handle(pContext)));
  6451         -      sqlite3_result_error(pContext, buf, -1);
  6452         -    }
  6453         -  }
  6454         -}
  6455         -
  6456         -#ifdef SQLITE_TEST
  6457         -/* Generate an error of the form "<prefix>: <msg>".  If msg is NULL,
  6458         -** pull the error from the context's db handle.
  6459         -*/
  6460         -static void generateError(sqlite3_context *pContext,
  6461         -                          const char *prefix, const char *msg){
  6462         -  char buf[512];
  6463         -  if( msg==NULL ) msg = sqlite3_errmsg(sqlite3_context_db_handle(pContext));
  6464         -  sqlite3_snprintf(sizeof(buf), buf, "%s: %s", prefix, msg);
  6465         -  sqlite3_result_error(pContext, buf, -1);
  6466         -}
  6467         -
  6468         -/* Helper function to collect the set of terms in the segment into
  6469         -** pTerms.  The segment is defined by the leaf nodes between
  6470         -** iStartBlockid and iEndBlockid, inclusive, or by the contents of
  6471         -** pRootData if iStartBlockid is 0 (in which case the entire segment
  6472         -** fit in a leaf).
  6473         -*/
  6474         -static int collectSegmentTerms(fulltext_vtab *v, sqlite3_stmt *s,
  6475         -                               fts3Hash *pTerms){
  6476         -  const sqlite_int64 iStartBlockid = sqlite3_column_int64(s, 0);
  6477         -  const sqlite_int64 iEndBlockid = sqlite3_column_int64(s, 1);
  6478         -  const char *pRootData = sqlite3_column_blob(s, 2);
  6479         -  const int nRootData = sqlite3_column_bytes(s, 2);
  6480         -  LeavesReader reader;
  6481         -  int rc = leavesReaderInit(v, 0, iStartBlockid, iEndBlockid,
  6482         -                            pRootData, nRootData, &reader);
  6483         -  if( rc!=SQLITE_OK ) return rc;
  6484         -
  6485         -  while( rc==SQLITE_OK && !leavesReaderAtEnd(&reader) ){
  6486         -    const char *pTerm = leavesReaderTerm(&reader);
  6487         -    const int nTerm = leavesReaderTermBytes(&reader);
  6488         -    void *oldValue = sqlite3Fts3HashFind(pTerms, pTerm, nTerm);
  6489         -    void *newValue = (void *)((char *)oldValue+1);
  6490         -
  6491         -    /* From the comment before sqlite3Fts3HashInsert in fts3_hash.c,
  6492         -    ** the data value passed is returned in case of malloc failure.
  6493         -    */
  6494         -    if( newValue==sqlite3Fts3HashInsert(pTerms, pTerm, nTerm, newValue) ){
  6495         -      rc = SQLITE_NOMEM;
  6496         -    }else{
  6497         -      rc = leavesReaderStep(v, &reader);
  6498         -    }
  6499         -  }
  6500         -
  6501         -  leavesReaderDestroy(&reader);
  6502         -  return rc;
  6503         -}
  6504         -
  6505         -/* Helper function to build the result string for dump_terms(). */
  6506         -static int generateTermsResult(sqlite3_context *pContext, fts3Hash *pTerms){
  6507         -  int iTerm, nTerms, nResultBytes, iByte;
  6508         -  char *result;
  6509         -  TermData *pData;
  6510         -  fts3HashElem *e;
  6511         -
  6512         -  /* Iterate pTerms to generate an array of terms in pData for
  6513         -  ** sorting.
  6514         -  */
  6515         -  nTerms = fts3HashCount(pTerms);
  6516         -  assert( nTerms>0 );
  6517         -  pData = sqlite3_malloc(nTerms*sizeof(TermData));
  6518         -  if( pData==NULL ) return SQLITE_NOMEM;
  6519         -
  6520         -  nResultBytes = 0;
  6521         -  for(iTerm = 0, e = fts3HashFirst(pTerms); e; iTerm++, e = fts3HashNext(e)){
  6522         -    nResultBytes += fts3HashKeysize(e)+1;   /* Term plus trailing space */
  6523         -    assert( iTerm<nTerms );
  6524         -    pData[iTerm].pTerm = fts3HashKey(e);
  6525         -    pData[iTerm].nTerm = fts3HashKeysize(e);
  6526         -    pData[iTerm].pCollector = fts3HashData(e);  /* unused */
  6527         -  }
  6528         -  assert( iTerm==nTerms );
  6529         -
  6530         -  assert( nResultBytes>0 );   /* nTerms>0, nResultsBytes must be, too. */
  6531         -  result = sqlite3_malloc(nResultBytes);
  6532         -  if( result==NULL ){
  6533         -    sqlite3_free(pData);
  6534         -    return SQLITE_NOMEM;
  6535         -  }
  6536         -
  6537         -  if( nTerms>1 ) qsort(pData, nTerms, sizeof(*pData), termDataCmp);
  6538         -
  6539         -  /* Read the terms in order to build the result. */
  6540         -  iByte = 0;
  6541         -  for(iTerm=0; iTerm<nTerms; ++iTerm){
  6542         -    memcpy(result+iByte, pData[iTerm].pTerm, pData[iTerm].nTerm);
  6543         -    iByte += pData[iTerm].nTerm;
  6544         -    result[iByte++] = ' ';
  6545         -  }
  6546         -  assert( iByte==nResultBytes );
  6547         -  assert( result[nResultBytes-1]==' ' );
  6548         -  result[nResultBytes-1] = '\0';
  6549         -
  6550         -  /* Passes away ownership of result. */
  6551         -  sqlite3_result_text(pContext, result, nResultBytes-1, sqlite3_free);
  6552         -  sqlite3_free(pData);
  6553         -  return SQLITE_OK;
  6554         -}
  6555         -
  6556         -/* Implements dump_terms() for use in inspecting the fts3 index from
  6557         -** tests.  TEXT result containing the ordered list of terms joined by
  6558         -** spaces.  dump_terms(t, level, idx) dumps the terms for the segment
  6559         -** specified by level, idx (in %_segdir), while dump_terms(t) dumps
  6560         -** all terms in the index.  In both cases t is the fts table's magic
  6561         -** table-named column.
  6562         -*/
  6563         -static void dumpTermsFunc(
  6564         -  sqlite3_context *pContext,
  6565         -  int argc, sqlite3_value **argv
  6566         -){
  6567         -  fulltext_cursor *pCursor;
  6568         -  if( argc!=3 && argc!=1 ){
  6569         -    generateError(pContext, "dump_terms", "incorrect arguments");
  6570         -  }else if( sqlite3_value_type(argv[0])!=SQLITE_BLOB ||
  6571         -            sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){
  6572         -    generateError(pContext, "dump_terms", "illegal first argument");
  6573         -  }else{
  6574         -    fulltext_vtab *v;
  6575         -    fts3Hash terms;
  6576         -    sqlite3_stmt *s = NULL;
  6577         -    int rc;
  6578         -
  6579         -    memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor));
  6580         -    v = cursor_vtab(pCursor);
  6581         -
  6582         -    /* If passed only the cursor column, get all segments.  Otherwise
  6583         -    ** get the segment described by the following two arguments.
  6584         -    */
  6585         -    if( argc==1 ){
  6586         -      rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s);
  6587         -    }else{
  6588         -      rc = sql_get_statement(v, SEGDIR_SELECT_SEGMENT_STMT, &s);
  6589         -      if( rc==SQLITE_OK ){
  6590         -        rc = sqlite3_bind_int(s, 1, sqlite3_value_int(argv[1]));
  6591         -        if( rc==SQLITE_OK ){
  6592         -          rc = sqlite3_bind_int(s, 2, sqlite3_value_int(argv[2]));
  6593         -        }
  6594         -      }
  6595         -    }
  6596         -
  6597         -    if( rc!=SQLITE_OK ){
  6598         -      generateError(pContext, "dump_terms", NULL);
  6599         -      return;
  6600         -    }
  6601         -
  6602         -    /* Collect the terms for each segment. */
  6603         -    sqlite3Fts3HashInit(&terms, FTS3_HASH_STRING, 1);
  6604         -    while( (rc = sqlite3_step(s))==SQLITE_ROW ){
  6605         -      rc = collectSegmentTerms(v, s, &terms);
  6606         -      if( rc!=SQLITE_OK ) break;
  6607         -    }
  6608         -
  6609         -    if( rc!=SQLITE_DONE ){
  6610         -      sqlite3_reset(s);
  6611         -      generateError(pContext, "dump_terms", NULL);
  6612         -    }else{
  6613         -      const int nTerms = fts3HashCount(&terms);
  6614         -      if( nTerms>0 ){
  6615         -        rc = generateTermsResult(pContext, &terms);
  6616         -        if( rc==SQLITE_NOMEM ){
  6617         -          generateError(pContext, "dump_terms", "out of memory");
  6618         -        }else{
  6619         -          assert( rc==SQLITE_OK );
  6620         -        }
  6621         -      }else if( argc==3 ){
  6622         -        /* The specific segment asked for could not be found. */
  6623         -        generateError(pContext, "dump_terms", "segment not found");
  6624         -      }else{
  6625         -        /* No segments found. */
  6626         -        /* TODO(shess): It should be impossible to reach this.  This
  6627         -        ** case can only happen for an empty table, in which case
  6628         -        ** SQLite has no rows to call this function on.
  6629         -        */
  6630         -        sqlite3_result_null(pContext);
  6631         -      }
  6632         -    }
  6633         -    sqlite3Fts3HashClear(&terms);
  6634         -  }
  6635         -}
  6636         -
  6637         -/* Expand the DL_DEFAULT doclist in pData into a text result in
  6638         -** pContext.
  6639         -*/
  6640         -static void createDoclistResult(sqlite3_context *pContext,
  6641         -                                const char *pData, int nData){
  6642         -  DataBuffer dump;
  6643         -  DLReader dlReader;
  6644         -
  6645         -  assert( pData!=NULL && nData>0 );
  6646         -
  6647         -  dataBufferInit(&dump, 0);
  6648         -  dlrInit(&dlReader, DL_DEFAULT, pData, nData);
  6649         -  for( ; !dlrAtEnd(&dlReader); dlrStep(&dlReader) ){
  6650         -    char buf[256];
  6651         -    PLReader plReader;
  6652         -
  6653         -    plrInit(&plReader, &dlReader);
  6654         -    if( DL_DEFAULT==DL_DOCIDS || plrAtEnd(&plReader) ){
  6655         -      sqlite3_snprintf(sizeof(buf), buf, "[%lld] ", dlrDocid(&dlReader));
  6656         -      dataBufferAppend(&dump, buf, strlen(buf));
  6657         -    }else{
  6658         -      int iColumn = plrColumn(&plReader);
  6659         -
  6660         -      sqlite3_snprintf(sizeof(buf), buf, "[%lld %d[",
  6661         -                       dlrDocid(&dlReader), iColumn);
  6662         -      dataBufferAppend(&dump, buf, strlen(buf));
  6663         -
  6664         -      for( ; !plrAtEnd(&plReader); plrStep(&plReader) ){
  6665         -        if( plrColumn(&plReader)!=iColumn ){
  6666         -          iColumn = plrColumn(&plReader);
  6667         -          sqlite3_snprintf(sizeof(buf), buf, "] %d[", iColumn);
  6668         -          assert( dump.nData>0 );
  6669         -          dump.nData--;                     /* Overwrite trailing space. */
  6670         -          assert( dump.pData[dump.nData]==' ');
  6671         -          dataBufferAppend(&dump, buf, strlen(buf));
  6672         -        }
  6673         -        if( DL_DEFAULT==DL_POSITIONS_OFFSETS ){
  6674         -          sqlite3_snprintf(sizeof(buf), buf, "%d,%d,%d ",
  6675         -                           plrPosition(&plReader),
  6676         -                           plrStartOffset(&plReader), plrEndOffset(&plReader));
  6677         -        }else if( DL_DEFAULT==DL_POSITIONS ){
  6678         -          sqlite3_snprintf(sizeof(buf), buf, "%d ", plrPosition(&plReader));
  6679         -        }else{
  6680         -          assert( NULL=="Unhandled DL_DEFAULT value");
  6681         -        }
  6682         -        dataBufferAppend(&dump, buf, strlen(buf));
  6683         -      }
  6684         -      plrDestroy(&plReader);
  6685         -
  6686         -      assert( dump.nData>0 );
  6687         -      dump.nData--;                     /* Overwrite trailing space. */
  6688         -      assert( dump.pData[dump.nData]==' ');
  6689         -      dataBufferAppend(&dump, "]] ", 3);
  6690         -    }
  6691         -  }
  6692         -  dlrDestroy(&dlReader);
  6693         -
  6694         -  assert( dump.nData>0 );
  6695         -  dump.nData--;                     /* Overwrite trailing space. */
  6696         -  assert( dump.pData[dump.nData]==' ');
  6697         -  dump.pData[dump.nData] = '\0';
  6698         -  assert( dump.nData>0 );
  6699         -
  6700         -  /* Passes ownership of dump's buffer to pContext. */
  6701         -  sqlite3_result_text(pContext, dump.pData, dump.nData, sqlite3_free);
  6702         -  dump.pData = NULL;
  6703         -  dump.nData = dump.nCapacity = 0;
  6704         -}
  6705         -
  6706         -/* Implements dump_doclist() for use in inspecting the fts3 index from
  6707         -** tests.  TEXT result containing a string representation of the
  6708         -** doclist for the indicated term.  dump_doclist(t, term, level, idx)
  6709         -** dumps the doclist for term from the segment specified by level, idx
  6710         -** (in %_segdir), while dump_doclist(t, term) dumps the logical
  6711         -** doclist for the term across all segments.  The per-segment doclist
  6712         -** can contain deletions, while the full-index doclist will not
  6713         -** (deletions are omitted).
  6714         -**
  6715         -** Result formats differ with the setting of DL_DEFAULTS.  Examples:
  6716         -**
  6717         -** DL_DOCIDS: [1] [3] [7]
  6718         -** DL_POSITIONS: [1 0[0 4] 1[17]] [3 1[5]]
  6719         -** DL_POSITIONS_OFFSETS: [1 0[0,0,3 4,23,26] 1[17,102,105]] [3 1[5,20,23]]
  6720         -**
  6721         -** In each case the number after the outer '[' is the docid.  In the
  6722         -** latter two cases, the number before the inner '[' is the column
  6723         -** associated with the values within.  For DL_POSITIONS the numbers
  6724         -** within are the positions, for DL_POSITIONS_OFFSETS they are the
  6725         -** position, the start offset, and the end offset.
  6726         -*/
  6727         -static void dumpDoclistFunc(
  6728         -  sqlite3_context *pContext,
  6729         -  int argc, sqlite3_value **argv
  6730         -){
  6731         -  fulltext_cursor *pCursor;
  6732         -  if( argc!=2 && argc!=4 ){
  6733         -    generateError(pContext, "dump_doclist", "incorrect arguments");
  6734         -  }else if( sqlite3_value_type(argv[0])!=SQLITE_BLOB ||
  6735         -            sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){
  6736         -    generateError(pContext, "dump_doclist", "illegal first argument");
  6737         -  }else if( sqlite3_value_text(argv[1])==NULL ||
  6738         -            sqlite3_value_text(argv[1])[0]=='\0' ){
  6739         -    generateError(pContext, "dump_doclist", "empty second argument");
  6740         -  }else{
  6741         -    const char *pTerm = (const char *)sqlite3_value_text(argv[1]);
  6742         -    const int nTerm = strlen(pTerm);
  6743         -    fulltext_vtab *v;
  6744         -    int rc;
  6745         -    DataBuffer doclist;
  6746         -
  6747         -    memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor));
  6748         -    v = cursor_vtab(pCursor);
  6749         -
  6750         -    dataBufferInit(&doclist, 0);
  6751         -
  6752         -    /* termSelect() yields the same logical doclist that queries are
  6753         -    ** run against.
  6754         -    */
  6755         -    if( argc==2 ){
  6756         -      rc = termSelect(v, v->nColumn, pTerm, nTerm, 0, DL_DEFAULT, &doclist);
  6757         -    }else{
  6758         -      sqlite3_stmt *s = NULL;
  6759         -
  6760         -      /* Get our specific segment's information. */
  6761         -      rc = sql_get_statement(v, SEGDIR_SELECT_SEGMENT_STMT, &s);
  6762         -      if( rc==SQLITE_OK ){
  6763         -        rc = sqlite3_bind_int(s, 1, sqlite3_value_int(argv[2]));
  6764         -        if( rc==SQLITE_OK ){
  6765         -          rc = sqlite3_bind_int(s, 2, sqlite3_value_int(argv[3]));
  6766         -        }
  6767         -      }
  6768         -
  6769         -      if( rc==SQLITE_OK ){
  6770         -        rc = sqlite3_step(s);
  6771         -
  6772         -        if( rc==SQLITE_DONE ){
  6773         -          dataBufferDestroy(&doclist);
  6774         -          generateError(pContext, "dump_doclist", "segment not found");
  6775         -          return;
  6776         -        }
  6777         -
  6778         -        /* Found a segment, load it into doclist. */
  6779         -        if( rc==SQLITE_ROW ){
  6780         -          const sqlite_int64 iLeavesEnd = sqlite3_column_int64(s, 1);
  6781         -          const char *pData = sqlite3_column_blob(s, 2);
  6782         -          const int nData = sqlite3_column_bytes(s, 2);
  6783         -
  6784         -          /* loadSegment() is used by termSelect() to load each
  6785         -          ** segment's data.
  6786         -          */
  6787         -          rc = loadSegment(v, pData, nData, iLeavesEnd, pTerm, nTerm, 0,
  6788         -                           &doclist);
  6789         -          if( rc==SQLITE_OK ){
  6790         -            rc = sqlite3_step(s);
  6791         -
  6792         -            /* Should not have more than one matching segment. */
  6793         -            if( rc!=SQLITE_DONE ){
  6794         -              sqlite3_reset(s);
  6795         -              dataBufferDestroy(&doclist);
  6796         -              generateError(pContext, "dump_doclist", "invalid segdir");
  6797         -              return;
  6798         -            }
  6799         -            rc = SQLITE_OK;
  6800         -          }
  6801         -        }
  6802         -      }
  6803         -
  6804         -      sqlite3_reset(s);
  6805         -    }
  6806         -
  6807         -    if( rc==SQLITE_OK ){
  6808         -      if( doclist.nData>0 ){
  6809         -        createDoclistResult(pContext, doclist.pData, doclist.nData);
  6810         -      }else{
  6811         -        /* TODO(shess): This can happen if the term is not present, or
  6812         -        ** if all instances of the term have been deleted and this is
  6813         -        ** an all-index dump.  It may be interesting to distinguish
  6814         -        ** these cases.
  6815         -        */
  6816         -        sqlite3_result_text(pContext, "", 0, SQLITE_STATIC);
  6817         -      }
  6818         -    }else if( rc==SQLITE_NOMEM ){
  6819         -      /* Handle out-of-memory cases specially because if they are
  6820         -      ** generated in fts3 code they may not be reflected in the db
  6821         -      ** handle.
  6822         -      */
  6823         -      /* TODO(shess): Handle this more comprehensively.
  6824         -      ** sqlite3ErrStr() has what I need, but is internal.
  6825         -      */
  6826         -      generateError(pContext, "dump_doclist", "out of memory");
  6827         -    }else{
  6828         -      generateError(pContext, "dump_doclist", NULL);
  6829         -    }
  6830         -
  6831         -    dataBufferDestroy(&doclist);
  6832         -  }
  6833         -}
  6834         -#endif
  6835         -
  6836   2021   /*
  6837   2022   ** This routine implements the xFindFunction method for the FTS3
  6838   2023   ** virtual table.
  6839   2024   */
  6840         -static int fulltextFindFunction(
  6841         -  sqlite3_vtab *pVtab,
  6842         -  int nArg,
  6843         -  const char *zName,
  6844         -  void (**pxFunc)(sqlite3_context*,int,sqlite3_value**),
  6845         -  void **ppArg
         2025  +static int fts3FindFunctionMethod(
         2026  +  sqlite3_vtab *pVtab,            /* Virtual table handle */
         2027  +  int nArg,                       /* Number of SQL function arguments */
         2028  +  const char *zName,              /* Name of SQL function */
         2029  +  void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), /* OUT: Result */
         2030  +  void **ppArg                    /* Unused */
  6846   2031   ){
  6847         -  if( strcmp(zName,"snippet")==0 ){
  6848         -    *pxFunc = snippetFunc;
  6849         -    return 1;
  6850         -  }else if( strcmp(zName,"offsets")==0 ){
  6851         -    *pxFunc = snippetOffsetsFunc;
  6852         -    return 1;
  6853         -  }else if( strcmp(zName,"optimize")==0 ){
  6854         -    *pxFunc = optimizeFunc;
  6855         -    return 1;
  6856         -#ifdef SQLITE_TEST
  6857         -    /* NOTE(shess): These functions are present only for testing
  6858         -    ** purposes.  No particular effort is made to optimize their
  6859         -    ** execution or how they build their results.
  6860         -    */
  6861         -  }else if( strcmp(zName,"dump_terms")==0 ){
  6862         -    /* fprintf(stderr, "Found dump_terms\n"); */
  6863         -    *pxFunc = dumpTermsFunc;
  6864         -    return 1;
  6865         -  }else if( strcmp(zName,"dump_doclist")==0 ){
  6866         -    /* fprintf(stderr, "Found dump_doclist\n"); */
  6867         -    *pxFunc = dumpDoclistFunc;
  6868         -    return 1;
  6869         -#endif
         2032  +  struct Overloaded {
         2033  +    const char *zName;
         2034  +    void (*xFunc)(sqlite3_context*,int,sqlite3_value**);
         2035  +  } aOverload[] = {
         2036  +    { "snippet", fts3SnippetFunc },
         2037  +    { "offsets", fts3OffsetsFunc },
         2038  +    { "optimize", fts3OptimizeFunc },
         2039  +  };
         2040  +  int i;                          /* Iterator variable */
         2041  +  for(i=0; i<SizeofArray(aOverload); i++){
         2042  +    if( strcmp(zName, aOverload[i].zName)==0 ){
         2043  +      *pxFunc = aOverload[i].xFunc;
         2044  +      return 1;
         2045  +    }
  6870   2046     }
         2047  +
         2048  +  /* No function of the specified name was found. Return 0. */
  6871   2049     return 0;
  6872   2050   }
  6873   2051   
  6874   2052   /*
  6875         -** Rename an fts3 table.
         2053  +** Implementation of FTS3 xRename method. Rename an fts3 table.
  6876   2054   */
  6877         -static int fulltextRename(
  6878         -  sqlite3_vtab *pVtab,
  6879         -  const char *zName
         2055  +static int fts3RenameMethod(
         2056  +  sqlite3_vtab *pVtab,            /* Virtual table handle */
         2057  +  const char *zName               /* New name of table */
  6880   2058   ){
  6881         -  fulltext_vtab *p = (fulltext_vtab *)pVtab;
  6882         -  int rc = SQLITE_NOMEM;
  6883         -  char *zSql = sqlite3_mprintf(
         2059  +  Fts3Table *p = (Fts3Table *)pVtab;     
         2060  +  int rc = SQLITE_NOMEM;          /* Return Code */
         2061  +  char *zSql;                     /* SQL script to run to rename tables */
         2062  + 
         2063  +  zSql = sqlite3_mprintf(
  6884   2064       "ALTER TABLE %Q.'%q_content'  RENAME TO '%q_content';"
  6885   2065       "ALTER TABLE %Q.'%q_segments' RENAME TO '%q_segments';"
  6886   2066       "ALTER TABLE %Q.'%q_segdir'   RENAME TO '%q_segdir';"
  6887   2067       , p->zDb, p->zName, zName 
  6888   2068       , p->zDb, p->zName, zName 
  6889   2069       , p->zDb, p->zName, zName
  6890   2070     );
................................................................................
  6893   2073       sqlite3_free(zSql);
  6894   2074     }
  6895   2075     return rc;
  6896   2076   }
  6897   2077   
  6898   2078   static const sqlite3_module fts3Module = {
  6899   2079     /* iVersion      */ 0,
  6900         -  /* xCreate       */ fulltextCreate,
  6901         -  /* xConnect      */ fulltextConnect,
  6902         -  /* xBestIndex    */ fulltextBestIndex,
  6903         -  /* xDisconnect   */ fulltextDisconnect,
  6904         -  /* xDestroy      */ fulltextDestroy,
  6905         -  /* xOpen         */ fulltextOpen,
         2080  +  /* xCreate       */ fts3CreateMethod,
         2081  +  /* xConnect      */ fts3ConnectMethod,
         2082  +  /* xBestIndex    */ fts3BestIndexMethod,
         2083  +  /* xDisconnect   */ fts3DisconnectMethod,
         2084  +  /* xDestroy      */ fts3DestroyMethod,
         2085  +  /* xOpen         */ fts3OpenMethod,
  6906   2086     /* xClose        */ fulltextClose,
  6907         -  /* xFilter       */ fulltextFilter,
  6908         -  /* xNext         */ fulltextNext,
  6909         -  /* xEof          */ fulltextEof,
         2087  +  /* xFilter       */ fts3FilterMethod,
         2088  +  /* xNext         */ fts3NextMethod,
         2089  +  /* xEof          */ fts3EofMethod,
  6910   2090     /* xColumn       */ fulltextColumn,
  6911         -  /* xRowid        */ fulltextRowid,
  6912         -  /* xUpdate       */ fulltextUpdate,
  6913         -  /* xBegin        */ fulltextBegin,
  6914         -  /* xSync         */ fulltextSync,
  6915         -  /* xCommit       */ fulltextCommit,
  6916         -  /* xRollback     */ fulltextRollback,
  6917         -  /* xFindFunction */ fulltextFindFunction,
  6918         -  /* xRename */       fulltextRename,
         2091  +  /* xRowid        */ fts3RowidMethod,
         2092  +  /* xUpdate       */ fts3UpdateMethod,
         2093  +  /* xBegin        */ fts3BeginMethod,
         2094  +  /* xSync         */ fts3SyncMethod,
         2095  +  /* xCommit       */ fts3CommitMethod,
         2096  +  /* xRollback     */ fts3RollbackMethod,
         2097  +  /* xFindFunction */ fts3FindFunctionMethod,
         2098  +  /* xRename */       fts3RenameMethod,
  6919   2099   };
  6920   2100   
         2101  +/*
         2102  +** This function is registered as the module destructor (called when an
         2103  +** FTS3 enabled database connection is closed). It frees the memory
         2104  +** allocated for the tokenizer hash table.
         2105  +*/
  6921   2106   static void hashDestroy(void *p){
  6922         -  fts3Hash *pHash = (fts3Hash *)p;
         2107  +  Fts3Hash *pHash = (Fts3Hash *)p;
  6923   2108     sqlite3Fts3HashClear(pHash);
  6924   2109     sqlite3_free(pHash);
  6925   2110   }
  6926   2111   
  6927   2112   /*
  6928   2113   ** The fts3 built-in tokenizers - "simple" and "porter" - are implemented
  6929   2114   ** in files fts3_tokenizer1.c and fts3_porter.c respectively. The following
................................................................................
  6935   2120   ** Function ...PorterTokenizerModule() sets *pModule to point to the
  6936   2121   ** porter tokenizer/stemmer implementation.
  6937   2122   */
  6938   2123   void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
  6939   2124   void sqlite3Fts3PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule);
  6940   2125   void sqlite3Fts3IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule);
  6941   2126   
  6942         -int sqlite3Fts3InitHashTable(sqlite3 *, fts3Hash *, const char *);
  6943         -
  6944   2127   /*
  6945   2128   ** Initialise the fts3 extension. If this extension is built as part
  6946   2129   ** of the sqlite library, then this function is called directly by
  6947   2130   ** SQLite. If fts3 is built as a dynamically loadable extension, this
  6948   2131   ** function is called by the sqlite3_extension_init() entry point.
  6949   2132   */
  6950   2133   int sqlite3Fts3Init(sqlite3 *db){
  6951   2134     int rc = SQLITE_OK;
  6952         -  fts3Hash *pHash = 0;
         2135  +  Fts3Hash *pHash = 0;
  6953   2136     const sqlite3_tokenizer_module *pSimple = 0;
  6954   2137     const sqlite3_tokenizer_module *pPorter = 0;
  6955   2138     const sqlite3_tokenizer_module *pIcu = 0;
  6956   2139   
  6957   2140     sqlite3Fts3SimpleTokenizerModule(&pSimple);
  6958   2141     sqlite3Fts3PorterTokenizerModule(&pPorter);
  6959   2142   #ifdef SQLITE_ENABLE_ICU
  6960   2143     sqlite3Fts3IcuTokenizerModule(&pIcu);
  6961   2144   #endif
  6962   2145   
  6963   2146     /* Allocate and initialise the hash-table used to store tokenizers. */
  6964         -  pHash = sqlite3_malloc(sizeof(fts3Hash));
         2147  +  pHash = sqlite3_malloc(sizeof(Fts3Hash));
  6965   2148     if( !pHash ){
  6966   2149       rc = SQLITE_NOMEM;
  6967   2150     }else{
  6968   2151       sqlite3Fts3HashInit(pHash, FTS3_HASH_STRING, 1);
  6969   2152     }
  6970   2153   
  6971   2154     /* Load the built-in tokenizers into the hash table */
................................................................................
  6985   2168     /* Create the virtual table wrapper around the hash-table and overload 
  6986   2169     ** the two scalar functions. If this is successful, register the
  6987   2170     ** module with sqlite.
  6988   2171     */
  6989   2172     if( SQLITE_OK==rc 
  6990   2173      && SQLITE_OK==(rc = sqlite3Fts3InitHashTable(db, pHash, "fts3_tokenizer"))
  6991   2174      && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1))
  6992         -   && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", -1))
  6993         -   && SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", -1))
  6994         -#ifdef SQLITE_TEST
  6995         -   && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_terms", -1))
  6996         -   && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_doclist", -1))
  6997         -#endif
         2175  +   && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", 1))
         2176  +   && SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", 1))
  6998   2177     ){
  6999   2178       return sqlite3_create_module_v2(
  7000   2179           db, "fts3", &fts3Module, (void *)pHash, hashDestroy
  7001   2180       );
  7002   2181     }
  7003   2182   
  7004   2183     /* An error has occurred. Delete the hash table and return the error code. */
................................................................................
  7017   2196     const sqlite3_api_routines *pApi
  7018   2197   ){
  7019   2198     SQLITE_EXTENSION_INIT2(pApi)
  7020   2199     return sqlite3Fts3Init(db);
  7021   2200   }
  7022   2201   #endif
  7023   2202   
  7024         -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
         2203  +#endif

Added ext/fts3/fts3Int.h.

            1  +/*
            2  +** 2009 Nov 12
            3  +**
            4  +** The author disclaims copyright to this source code.  In place of
            5  +** a legal notice, here is a blessing:
            6  +**
            7  +**    May you do good and not evil.
            8  +**    May you find forgiveness for yourself and forgive others.
            9  +**    May you share freely, never taking more than you give.
           10  +**
           11  +******************************************************************************
           12  +**
           13  +*/
           14  +
           15  +#ifndef _FTSINT_H
           16  +#define _FTSINT_H
           17  +
           18  +#if !defined(NDEBUG) && !defined(SQLITE_DEBUG) 
           19  +# define NDEBUG 1
           20  +#endif
           21  +
           22  +#include <sqlite3.h>
           23  +#include "fts3_tokenizer.h"
           24  +#include "fts3_hash.h"
           25  +
           26  +/*
           27  +** This constant controls how often segments are merged. Once there are
           28  +** FTS3_MERGE_COUNT segments of level N, they are merged into a single
           29  +** segment of level N+1.
           30  +*/
           31  +#define FTS3_MERGE_COUNT 16
           32  +
           33  +/*
           34  +** This is the maximum amount of data (in bytes) to store in the 
           35  +** Fts3Table.pendingTerms hash table. Normally, the hash table is
           36  +** populated as documents are inserted/updated/deleted in a transaction
           37  +** and used to create a new segment when the transaction is committed.
           38  +** However if this limit is reached midway through a transaction, a new 
           39  +** segment is created and the hash table cleared immediately.
           40  +*/
           41  +#define FTS3_MAX_PENDING_DATA (1*1024*1024)
           42  +
           43  +/*
           44  +** Macro to return the number of elements in an array. SQLite has a
           45  +** similar macro called ArraySize(). Use a different name to avoid
           46  +** a collision when building an amalgamation with built-in FTS3.
           47  +*/
           48  +#define SizeofArray(X) ((int)(sizeof(X)/sizeof(X[0])))
           49  +
           50  +/*
           51  +** Maximum length of a varint encoded integer. The varint format is different
           52  +** from that used by SQLite, so the maximum length is 10, not 9.
           53  +*/
           54  +#define FTS3_VARINT_MAX 10
           55  +
           56  +typedef struct Fts3Table Fts3Table;
           57  +typedef struct Fts3Cursor Fts3Cursor;
           58  +typedef struct Fts3Expr Fts3Expr;
           59  +typedef struct Fts3Phrase Fts3Phrase;
           60  +typedef struct Fts3SegReader Fts3SegReader;
           61  +typedef struct Fts3SegFilter Fts3SegFilter;
           62  +
           63  +/*
           64  +** A connection to a fulltext index is an instance of the following
           65  +** structure. The xCreate and xConnect methods create an instance
           66  +** of this structure and xDestroy and xDisconnect free that instance.
           67  +** All other methods receive a pointer to the structure as one of their
           68  +** arguments.
           69  +*/
           70  +struct Fts3Table {
           71  +  sqlite3_vtab base;               /* Base class used by SQLite core */
           72  +  sqlite3 *db;                     /* The database connection */
           73  +  const char *zDb;                 /* logical database name */
           74  +  const char *zName;               /* virtual table name */
           75  +  int nColumn;                     /* number of columns in virtual table */
           76  +  char **azColumn;                 /* column names.  malloced */
           77  +  sqlite3_tokenizer *pTokenizer;   /* tokenizer for inserts and queries */
           78  +
           79  +  /* Precompiled statements used by the implementation. Each of these 
           80  +  ** statements is run and reset within a single virtual table API call. 
           81  +  */
           82  +  sqlite3_stmt *aStmt[18];
           83  +
           84  +  /* Pointer to string containing the SQL:
           85  +  **
           86  +  ** "SELECT block FROM %_segments WHERE blockid BETWEEN ? AND ? 
           87  +  **    ORDER BY blockid"
           88  +  */
           89  +  char *zSelectLeaves;
           90  +
           91  +  /* The following hash table is used to buffer pending index updates during
           92  +  ** transactions. Variable nPendingData estimates the memory size of the 
           93  +  ** pending data, including hash table overhead, but not malloc overhead. 
           94  +  ** When nPendingData exceeds FTS3_MAX_PENDING_DATA, the buffer is flushed 
           95  +  ** automatically. Variable iPrevDocid is the docid of the most recently
           96  +  ** inserted record.
           97  +  */
           98  +  int nPendingData;
           99  +  sqlite_int64 iPrevDocid;
          100  +  Fts3Hash pendingTerms;
          101  +};
          102  +
          103  +/*
          104  +** When the core wants to read from the virtual table, it creates a
          105  +** virtual table cursor (an instance of the following structure) using
          106  +** the xOpen method. Cursors are destroyed using the xClose method.
          107  +*/
          108  +struct Fts3Cursor {
          109  +  sqlite3_vtab_cursor base;       /* Base class used by SQLite core */
          110  +  int eType;                      /* Search strategy (see below) */
          111  +  sqlite3_stmt *pStmt;            /* Prepared statement in use by the cursor */
          112  +  int isEof;                      /* True if at End Of Results */
          113  +  Fts3Expr *pExpr;                /* Parsed MATCH query string */
          114  +  sqlite3_int64 iPrevId;          /* Previous id read from aDoclist */
          115  +  char *pNextId;                  /* Pointer into the body of aDoclist */
          116  +  char *aDoclist;                 /* List of docids for full-text queries */
          117  +  int nDoclist;                   /* Size of buffer at aDoclist */
          118  +};
          119  +
          120  +/*
          121  +** A "phrase" is a sequence of one or more tokens that must match in
          122  +** sequence.  A single token is the base case and the most common case.
          123  +** For a sequence of tokens contained in "...", nToken will be the number
          124  +** of tokens in the string.
          125  +*/
          126  +struct Fts3Phrase {
          127  +  int nToken;                /* Number of tokens in the phrase */
          128  +  int iColumn;               /* Index of column this phrase must match */
          129  +  int isNot;                 /* Phrase prefixed by unary not (-) operator */
          130  +  struct PhraseToken {
          131  +    char *z;                 /* Text of the token */
          132  +    int n;                   /* Number of bytes in buffer pointed to by z */
          133  +    int isPrefix;            /* True if token ends in with a "*" character */
          134  +  } aToken[1];               /* One entry for each token in the phrase */
          135  +};
          136  +
          137  +/*
          138  +** A tree of these objects forms the RHS of a MATCH operator.
          139  +*/
          140  +struct Fts3Expr {
          141  +  int eType;                 /* One of the FTSQUERY_XXX values defined below */
          142  +  int nNear;                 /* Valid if eType==FTSQUERY_NEAR */
          143  +  Fts3Expr *pParent;         /* pParent->pLeft==this or pParent->pRight==this */
          144  +  Fts3Expr *pLeft;           /* Left operand */
          145  +  Fts3Expr *pRight;          /* Right operand */
          146  +  Fts3Phrase *pPhrase;       /* Valid if eType==FTSQUERY_PHRASE */
          147  +};
          148  +
          149  +/*
          150  +** Candidate values for Fts3Query.eType. Note that the order of the first
          151  +** four values is in order of precedence when parsing expressions. For 
          152  +** example, the following:
          153  +**
          154  +**   "a OR b AND c NOT d NEAR e"
          155  +**
          156  +** is equivalent to:
          157  +**
          158  +**   "a OR (b AND (c NOT (d NEAR e)))"
          159  +*/
          160  +#define FTSQUERY_NEAR   1
          161  +#define FTSQUERY_NOT    2
          162  +#define FTSQUERY_AND    3
          163  +#define FTSQUERY_OR     4
          164  +#define FTSQUERY_PHRASE 5
          165  +
          166  +
          167  +/* fts3_init.c */
          168  +int sqlite3Fts3DeleteVtab(int, sqlite3_vtab *);
          169  +int sqlite3Fts3InitVtab(int, sqlite3*, void*, int, const char*const*, 
          170  +                        sqlite3_vtab **, char **);
          171  +
          172  +/* fts3_write.c */
          173  +int sqlite3Fts3UpdateMethod(sqlite3_vtab*,int,sqlite3_value**,sqlite3_int64*);
          174  +int sqlite3Fts3PendingTermsFlush(Fts3Table *);
          175  +void sqlite3Fts3PendingTermsClear(Fts3Table *);
          176  +int sqlite3Fts3Optimize(Fts3Table *);
          177  +int sqlite3Fts3SegReaderNew(Fts3Table *,int, sqlite3_int64,
          178  +  sqlite3_int64, sqlite3_int64, const char *, int, Fts3SegReader**);
          179  +void sqlite3Fts3SegReaderFree(Fts3SegReader *);
          180  +int sqlite3Fts3SegReaderIterate(
          181  +  Fts3Table *, Fts3SegReader **, int, Fts3SegFilter *,
          182  +  int (*)(Fts3Table *, void *, char *, int, char *, int),  void *
          183  +);
          184  +int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char const**, int*);
          185  +int sqlite3Fts3AllSegdirs(Fts3Table*, sqlite3_stmt **);
          186  +
          187  +/* Flags allowed as part of the 4th argument to SegmentReaderIterate() */
          188  +#define FTS3_SEGMENT_REQUIRE_POS   0x00000001
          189  +#define FTS3_SEGMENT_IGNORE_EMPTY  0x00000002
          190  +#define FTS3_SEGMENT_COLUMN_FILTER 0x00000004
          191  +#define FTS3_SEGMENT_PREFIX        0x00000008
          192  +
          193  +/* Type passed as 4th argument to SegmentReaderIterate() */
          194  +struct Fts3SegFilter {
          195  +  const char *zTerm;
          196  +  int nTerm;
          197  +  int iCol;
          198  +  int flags;
          199  +};
          200  +
          201  +/* fts3.c */
          202  +int sqlite3Fts3PutVarint(char *, sqlite3_int64);
          203  +int sqlite3Fts3GetVarint(const char *, sqlite_int64 *);
          204  +int sqlite3Fts3GetVarint32(const char *, int *);
          205  +int sqlite3Fts3VarintLen(sqlite3_uint64);
          206  +void sqlite3Fts3Dequote(char *);
          207  +
          208  +/* fts3_tokenizer.c */
          209  +const char *sqlite3Fts3NextToken(const char *, int *);
          210  +int sqlite3Fts3InitHashTable(sqlite3 *, Fts3Hash *, const char *);
          211  +int sqlite3Fts3InitTokenizer(Fts3Hash *pHash, 
          212  +  const char *, sqlite3_tokenizer **, const char **, char **
          213  +);
          214  +
          215  +/* fts3_snippet.c */
          216  +void sqlite3Fts3Offsets(sqlite3_context*, Fts3Cursor*);
          217  +void sqlite3Fts3Snippet(sqlite3_context*, Fts3Cursor*, 
          218  +  const char *, const char *, const char *
          219  +);
          220  +
          221  +/* fts3_expr.c */
          222  +int sqlite3Fts3ExprParse(sqlite3_tokenizer *, 
          223  +  char **, int, int, const char *, int, Fts3Expr **
          224  +);
          225  +void sqlite3Fts3ExprFree(Fts3Expr *);
          226  +#ifdef SQLITE_TEST
          227  +void sqlite3Fts3ExprInitTestInterface(sqlite3 *db);
          228  +#endif
          229  +
          230  +#endif /* _FTSINT_H */

Changes to ext/fts3/fts3_expr.c.

     9      9   **    May you share freely, never taking more than you give.
    10     10   **
    11     11   ******************************************************************************
    12     12   **
    13     13   ** This module contains code that implements a parser for fts3 query strings
    14     14   ** (the right-hand argument to the MATCH operator). Because the supported 
    15     15   ** syntax is relatively simple, the whole tokenizer/parser system is
    16         -** hand-coded. The public interface to this module is declared in source
    17         -** code file "fts3_expr.h".
           16  +** hand-coded. 
    18     17   */
    19     18   #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
    20     19   
    21     20   /*
    22     21   ** By default, this module parses the legacy syntax that has been 
    23     22   ** traditionally used by fts3. Or, if SQLITE_ENABLE_FTS3_PARENTHESIS
    24     23   ** is defined, then it uses the new syntax. The differences between
................................................................................
    36     35   **     AND operators have a higher precedence than OR.
    37     36   **
    38     37   ** If compiled with SQLITE_TEST defined, then this module exports the
    39     38   ** symbol "int sqlite3_fts3_enable_parentheses". Setting this variable
    40     39   ** to zero causes the module to use the old syntax. If it is set to 
    41     40   ** non-zero the new syntax is activated. This is so both syntaxes can
    42     41   ** be tested using a single build of testfixture.
           42  +**
           43  +** The following describes the syntax supported by the fts3 MATCH
           44  +** operator in a similar format to that used by the lemon parser
           45  +** generator. This module does not use actually lemon, it uses a
           46  +** custom parser.
           47  +**
           48  +**   query ::= andexpr (OR andexpr)*.
           49  +**
           50  +**   andexpr ::= notexpr (AND? notexpr)*.
           51  +**
           52  +**   notexpr ::= nearexpr (NOT nearexpr|-TOKEN)*.
           53  +**   notexpr ::= LP query RP.
           54  +**
           55  +**   nearexpr ::= phrase (NEAR distance_opt nearexpr)*.
           56  +**
           57  +**   distance_opt ::= .
           58  +**   distance_opt ::= / INTEGER.
           59  +**
           60  +**   phrase ::= TOKEN.
           61  +**   phrase ::= COLUMN:TOKEN.
           62  +**   phrase ::= "TOKEN TOKEN TOKEN...".
    43     63   */
           64  +
    44     65   #ifdef SQLITE_TEST
    45     66   int sqlite3_fts3_enable_parentheses = 0;
    46     67   #else
    47     68   # ifdef SQLITE_ENABLE_FTS3_PARENTHESIS 
    48     69   #  define sqlite3_fts3_enable_parentheses 1
    49     70   # else
    50     71   #  define sqlite3_fts3_enable_parentheses 0
................................................................................
    52     73   #endif
    53     74   
    54     75   /*
    55     76   ** Default span for NEAR operators.
    56     77   */
    57     78   #define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10
    58     79   
    59         -#include "fts3_expr.h"
    60         -#include "sqlite3.h"
           80  +#include "fts3Int.h"
    61     81   #include <ctype.h>
    62     82   #include <string.h>
    63     83   #include <assert.h>
    64     84   
    65     85   typedef struct ParseContext ParseContext;
    66     86   struct ParseContext {
    67     87     sqlite3_tokenizer *pTokenizer;      /* Tokenizer module */
................................................................................
   350    370         ** parenthesis, a quote character, or EOF. 
   351    371         */
   352    372         cNext = zInput[nKey];
   353    373         if( fts3isspace(cNext) 
   354    374          || cNext=='"' || cNext=='(' || cNext==')' || cNext==0
   355    375         ){
   356    376           pRet = (Fts3Expr *)sqlite3_malloc(sizeof(Fts3Expr));
          377  +        if( !pRet ){
          378  +          return SQLITE_NOMEM;
          379  +        }
   357    380           memset(pRet, 0, sizeof(Fts3Expr));
   358    381           pRet->eType = pKey->eType;
   359    382           pRet->nNear = nNear;
   360    383           *ppExpr = pRet;
   361    384           *pnConsumed = (zInput - z) + nKey;
   362    385           return SQLITE_OK;
   363    386         }

Deleted ext/fts3/fts3_expr.h.

     1         -/*
     2         -** 2008 Nov 28
     3         -**
     4         -** The author disclaims copyright to this source code.  In place of
     5         -** a legal notice, here is a blessing:
     6         -**
     7         -**    May you do good and not evil.
     8         -**    May you find forgiveness for yourself and forgive others.
     9         -**    May you share freely, never taking more than you give.
    10         -**
    11         -******************************************************************************
    12         -**
    13         -*/
    14         -
    15         -#include "fts3_tokenizer.h"
    16         -#include "sqlite3.h"
    17         -
    18         -/*
    19         -** The following describes the syntax supported by the fts3 MATCH
    20         -** operator in a similar format to that used by the lemon parser
    21         -** generator. This module does not use actually lemon, it uses a
    22         -** custom parser.
    23         -**
    24         -**   query ::= andexpr (OR andexpr)*.
    25         -**
    26         -**   andexpr ::= notexpr (AND? notexpr)*.
    27         -**
    28         -**   notexpr ::= nearexpr (NOT nearexpr|-TOKEN)*.
    29         -**   notexpr ::= LP query RP.
    30         -**
    31         -**   nearexpr ::= phrase (NEAR distance_opt nearexpr)*.
    32         -**
    33         -**   distance_opt ::= .
    34         -**   distance_opt ::= / INTEGER.
    35         -**
    36         -**   phrase ::= TOKEN.
    37         -**   phrase ::= COLUMN:TOKEN.
    38         -**   phrase ::= "TOKEN TOKEN TOKEN...".
    39         -*/
    40         -
    41         -typedef struct Fts3Expr Fts3Expr;
    42         -typedef struct Fts3Phrase Fts3Phrase;
    43         -
    44         -/*
    45         -** A "phrase" is a sequence of one or more tokens that must match in
    46         -** sequence.  A single token is the base case and the most common case.
    47         -** For a sequence of tokens contained in "...", nToken will be the number
    48         -** of tokens in the string.
    49         -*/
    50         -struct Fts3Phrase {
    51         -  int nToken;          /* Number of tokens in the phrase */
    52         -  int iColumn;         /* Index of column this phrase must match */
    53         -  int isNot;           /* Phrase prefixed by unary not (-) operator */
    54         -  struct PhraseToken {
    55         -    char *z;              /* Text of the token */
    56         -    int n;                /* Number of bytes in buffer pointed to by z */
    57         -    int isPrefix;         /* True if token ends in with a "*" character */
    58         -  } aToken[1];         /* One entry for each token in the phrase */
    59         -};
    60         -
    61         -/*
    62         -** A tree of these objects forms the RHS of a MATCH operator.
    63         -*/
    64         -struct Fts3Expr {
    65         -  int eType;                 /* One of the FTSQUERY_XXX values defined below */
    66         -  int nNear;                 /* Valid if eType==FTSQUERY_NEAR */
    67         -  Fts3Expr *pParent;         /* pParent->pLeft==this or pParent->pRight==this */
    68         -  Fts3Expr *pLeft;           /* Left operand */
    69         -  Fts3Expr *pRight;          /* Right operand */
    70         -  Fts3Phrase *pPhrase;       /* Valid if eType==FTSQUERY_PHRASE */
    71         -};
    72         -
    73         -int sqlite3Fts3ExprParse(sqlite3_tokenizer *, char **, int, int, 
    74         -                         const char *, int, Fts3Expr **);
    75         -void sqlite3Fts3ExprFree(Fts3Expr *);
    76         -
    77         -/*
    78         -** Candidate values for Fts3Query.eType. Note that the order of the first
    79         -** four values is in order of precedence when parsing expressions. For 
    80         -** example, the following:
    81         -**
    82         -**   "a OR b AND c NOT d NEAR e"
    83         -**
    84         -** is equivalent to:
    85         -**
    86         -**   "a OR (b AND (c NOT (d NEAR e)))"
    87         -*/
    88         -#define FTSQUERY_NEAR   1
    89         -#define FTSQUERY_NOT    2
    90         -#define FTSQUERY_AND    3
    91         -#define FTSQUERY_OR     4
    92         -#define FTSQUERY_PHRASE 5
    93         -
    94         -#ifdef SQLITE_TEST
    95         -void sqlite3Fts3ExprInitTestInterface(sqlite3 *db);
    96         -#endif

Changes to ext/fts3/fts3_hash.c.

    52     52   ** "pNew" is a pointer to the hash table that is to be initialized.
    53     53   ** keyClass is one of the constants 
    54     54   ** FTS3_HASH_BINARY or FTS3_HASH_STRING.  The value of keyClass 
    55     55   ** determines what kind of key the hash table will use.  "copyKey" is
    56     56   ** true if the hash table should make its own private copy of keys and
    57     57   ** false if it should just use the supplied pointer.
    58     58   */
    59         -void sqlite3Fts3HashInit(fts3Hash *pNew, int keyClass, int copyKey){
           59  +void sqlite3Fts3HashInit(Fts3Hash *pNew, int keyClass, int copyKey){
    60     60     assert( pNew!=0 );
    61     61     assert( keyClass>=FTS3_HASH_STRING && keyClass<=FTS3_HASH_BINARY );
    62     62     pNew->keyClass = keyClass;
    63     63     pNew->copyKey = copyKey;
    64     64     pNew->first = 0;
    65     65     pNew->count = 0;
    66     66     pNew->htsize = 0;
................................................................................
    67     67     pNew->ht = 0;
    68     68   }
    69     69   
    70     70   /* Remove all entries from a hash table.  Reclaim all memory.
    71     71   ** Call this routine to delete a hash table or to reset a hash table
    72     72   ** to the empty state.
    73     73   */
    74         -void sqlite3Fts3HashClear(fts3Hash *pH){
    75         -  fts3HashElem *elem;         /* For looping over all elements of the table */
           74  +void sqlite3Fts3HashClear(Fts3Hash *pH){
           75  +  Fts3HashElem *elem;         /* For looping over all elements of the table */
    76     76   
    77     77     assert( pH!=0 );
    78     78     elem = pH->first;
    79     79     pH->first = 0;
    80     80     fts3HashFree(pH->ht);
    81     81     pH->ht = 0;
    82     82     pH->htsize = 0;
    83     83     while( elem ){
    84         -    fts3HashElem *next_elem = elem->next;
           84  +    Fts3HashElem *next_elem = elem->next;
    85     85       if( pH->copyKey && elem->pKey ){
    86     86         fts3HashFree(elem->pKey);
    87     87       }
    88     88       fts3HashFree(elem);
    89     89       elem = next_elem;
    90     90     }
    91     91     pH->count = 0;
................................................................................
   160    160       return &fts3BinCompare;
   161    161     }
   162    162   }
   163    163   
   164    164   /* Link an element into the hash table
   165    165   */
   166    166   static void fts3HashInsertElement(
   167         -  fts3Hash *pH,            /* The complete hash table */
          167  +  Fts3Hash *pH,            /* The complete hash table */
   168    168     struct _fts3ht *pEntry,  /* The entry into which pNew is inserted */
   169         -  fts3HashElem *pNew       /* The element to be inserted */
          169  +  Fts3HashElem *pNew       /* The element to be inserted */
   170    170   ){
   171         -  fts3HashElem *pHead;     /* First element already in pEntry */
          171  +  Fts3HashElem *pHead;     /* First element already in pEntry */
   172    172     pHead = pEntry->chain;
   173    173     if( pHead ){
   174    174       pNew->next = pHead;
   175    175       pNew->prev = pHead->prev;
   176    176       if( pHead->prev ){ pHead->prev->next = pNew; }
   177    177       else             { pH->first = pNew; }
   178    178       pHead->prev = pNew;
................................................................................
   187    187   }
   188    188   
   189    189   
   190    190   /* Resize the hash table so that it cantains "new_size" buckets.
   191    191   ** "new_size" must be a power of 2.  The hash table might fail 
   192    192   ** to resize if sqliteMalloc() fails.
   193    193   */
   194         -static void fts3Rehash(fts3Hash *pH, int new_size){
          194  +static void fts3Rehash(Fts3Hash *pH, int new_size){
   195    195     struct _fts3ht *new_ht;          /* The new hash table */
   196         -  fts3HashElem *elem, *next_elem;  /* For looping over existing elements */
          196  +  Fts3HashElem *elem, *next_elem;  /* For looping over existing elements */
   197    197     int (*xHash)(const void*,int);   /* The hash function */
   198    198   
   199    199     assert( (new_size & (new_size-1))==0 );
   200    200     new_ht = (struct _fts3ht *)fts3HashMalloc( new_size*sizeof(struct _fts3ht) );
   201    201     if( new_ht==0 ) return;
   202    202     fts3HashFree(pH->ht);
   203    203     pH->ht = new_ht;
................................................................................
   210    210     }
   211    211   }
   212    212   
   213    213   /* This function (for internal use only) locates an element in an
   214    214   ** hash table that matches the given key.  The hash for this key has
   215    215   ** already been computed and is passed as the 4th parameter.
   216    216   */
   217         -static fts3HashElem *fts3FindElementByHash(
   218         -  const fts3Hash *pH, /* The pH to be searched */
          217  +static Fts3HashElem *fts3FindElementByHash(
          218  +  const Fts3Hash *pH, /* The pH to be searched */
   219    219     const void *pKey,   /* The key we are searching for */
   220    220     int nKey,
   221    221     int h               /* The hash for this key. */
   222    222   ){
   223         -  fts3HashElem *elem;            /* Used to loop thru the element list */
          223  +  Fts3HashElem *elem;            /* Used to loop thru the element list */
   224    224     int count;                     /* Number of elements left to test */
   225    225     int (*xCompare)(const void*,int,const void*,int);  /* comparison function */
   226    226   
   227    227     if( pH->ht ){
   228    228       struct _fts3ht *pEntry = &pH->ht[h];
   229    229       elem = pEntry->chain;
   230    230       count = pEntry->count;
................................................................................
   239    239     return 0;
   240    240   }
   241    241   
   242    242   /* Remove a single entry from the hash table given a pointer to that
   243    243   ** element and a hash on the element's key.
   244    244   */
   245    245   static void fts3RemoveElementByHash(
   246         -  fts3Hash *pH,         /* The pH containing "elem" */
   247         -  fts3HashElem* elem,   /* The element to be removed from the pH */
          246  +  Fts3Hash *pH,         /* The pH containing "elem" */
          247  +  Fts3HashElem* elem,   /* The element to be removed from the pH */
   248    248     int h                 /* Hash value for the element */
   249    249   ){
   250    250     struct _fts3ht *pEntry;
   251    251     if( elem->prev ){
   252    252       elem->prev->next = elem->next; 
   253    253     }else{
   254    254       pH->first = elem->next;
................................................................................
   276    276     }
   277    277   }
   278    278   
   279    279   /* Attempt to locate an element of the hash table pH with a key
   280    280   ** that matches pKey,nKey.  Return the data for this element if it is
   281    281   ** found, or NULL if there is no match.
   282    282   */
   283         -void *sqlite3Fts3HashFind(const fts3Hash *pH, const void *pKey, int nKey){
          283  +void *sqlite3Fts3HashFind(const Fts3Hash *pH, const void *pKey, int nKey){
   284    284     int h;                 /* A hash on key */
   285         -  fts3HashElem *elem;    /* The element that matches key */
          285  +  Fts3HashElem *elem;    /* The element that matches key */
   286    286     int (*xHash)(const void*,int);  /* The hash function */
   287    287   
   288    288     if( pH==0 || pH->ht==0 ) return 0;
   289    289     xHash = ftsHashFunction(pH->keyClass);
   290    290     assert( xHash!=0 );
   291    291     h = (*xHash)(pKey,nKey);
   292    292     assert( (pH->htsize & (pH->htsize-1))==0 );
................................................................................
   306    306   ** The key is not copied in this instance.  If a malloc fails, then
   307    307   ** the new data is returned and the hash table is unchanged.
   308    308   **
   309    309   ** If the "data" parameter to this function is NULL, then the
   310    310   ** element corresponding to "key" is removed from the hash table.
   311    311   */
   312    312   void *sqlite3Fts3HashInsert(
   313         -  fts3Hash *pH,        /* The hash table to insert into */
          313  +  Fts3Hash *pH,        /* The hash table to insert into */
   314    314     const void *pKey,    /* The key */
   315    315     int nKey,            /* Number of bytes in the key */
   316    316     void *data           /* The data */
   317    317   ){
   318    318     int hraw;                 /* Raw hash value of the key */
   319    319     int h;                    /* the hash of the key modulo hash table size */
   320         -  fts3HashElem *elem;       /* Used to loop thru the element list */
   321         -  fts3HashElem *new_elem;   /* New element added to the pH */
          320  +  Fts3HashElem *elem;       /* Used to loop thru the element list */
          321  +  Fts3HashElem *new_elem;   /* New element added to the pH */
   322    322     int (*xHash)(const void*,int);  /* The hash function */
   323    323   
   324    324     assert( pH!=0 );
   325    325     xHash = ftsHashFunction(pH->keyClass);
   326    326     assert( xHash!=0 );
   327    327     hraw = (*xHash)(pKey, nKey);
   328    328     assert( (pH->htsize & (pH->htsize-1))==0 );
................................................................................
   341    341     if( pH->htsize==0 ){
   342    342       fts3Rehash(pH,8);
   343    343       if( pH->htsize==0 ){
   344    344         pH->count = 0;
   345    345         return data;
   346    346       }
   347    347     }
   348         -  new_elem = (fts3HashElem*)fts3HashMalloc( sizeof(fts3HashElem) );
          348  +  new_elem = (Fts3HashElem*)fts3HashMalloc( sizeof(Fts3HashElem) );
   349    349     if( new_elem==0 ) return data;
   350    350     if( pH->copyKey && pKey!=0 ){
   351    351       new_elem->pKey = fts3HashMalloc( nKey );
   352    352       if( new_elem->pKey==0 ){
   353    353         fts3HashFree(new_elem);
   354    354         return data;
   355    355       }

Changes to ext/fts3/fts3_hash.h.

    14     14   ** hash table implementation for the full-text indexing module.
    15     15   **
    16     16   */
    17     17   #ifndef _FTS3_HASH_H_
    18     18   #define _FTS3_HASH_H_
    19     19   
    20     20   /* Forward declarations of structures. */
    21         -typedef struct fts3Hash fts3Hash;
    22         -typedef struct fts3HashElem fts3HashElem;
           21  +typedef struct Fts3Hash Fts3Hash;
           22  +typedef struct Fts3HashElem Fts3HashElem;
    23     23   
    24     24   /* A complete hash table is an instance of the following structure.
    25     25   ** The internals of this structure are intended to be opaque -- client
    26     26   ** code should not attempt to access or modify the fields of this structure
    27     27   ** directly.  Change this structure only by using the routines below.
    28     28   ** However, many of the "procedures" and "functions" for modifying and
    29     29   ** accessing this structure are really macros, so we can't really make
    30     30   ** this structure opaque.
    31     31   */
    32         -struct fts3Hash {
           32  +struct Fts3Hash {
    33     33     char keyClass;          /* HASH_INT, _POINTER, _STRING, _BINARY */
    34     34     char copyKey;           /* True if copy of key made on insert */
    35     35     int count;              /* Number of entries in this table */
    36         -  fts3HashElem *first;    /* The first element of the array */
           36  +  Fts3HashElem *first;    /* The first element of the array */
    37     37     int htsize;             /* Number of buckets in the hash table */
    38     38     struct _fts3ht {        /* the hash table */
    39     39       int count;               /* Number of entries with this hash */
    40         -    fts3HashElem *chain;     /* Pointer to first entry with this hash */
           40  +    Fts3HashElem *chain;     /* Pointer to first entry with this hash */
    41     41     } *ht;
    42     42   };
    43     43   
    44     44   /* Each element in the hash table is an instance of the following 
    45     45   ** structure.  All elements are stored on a single doubly-linked list.
    46     46   **
    47     47   ** Again, this structure is intended to be opaque, but it can't really
    48     48   ** be opaque because it is used by macros.
    49     49   */
    50         -struct fts3HashElem {
    51         -  fts3HashElem *next, *prev; /* Next and previous elements in the table */
           50  +struct Fts3HashElem {
           51  +  Fts3HashElem *next, *prev; /* Next and previous elements in the table */
    52     52     void *data;                /* Data associated with this element */
    53     53     void *pKey; int nKey;      /* Key associated with this element */
    54     54   };
    55     55   
    56     56   /*
    57     57   ** There are 2 different modes of operation for a hash table:
    58     58   **
................................................................................
    67     67   */
    68     68   #define FTS3_HASH_STRING    1
    69     69   #define FTS3_HASH_BINARY    2
    70     70   
    71     71   /*
    72     72   ** Access routines.  To delete, insert a NULL pointer.
    73     73   */
    74         -void sqlite3Fts3HashInit(fts3Hash*, int keytype, int copyKey);
    75         -void *sqlite3Fts3HashInsert(fts3Hash*, const void *pKey, int nKey, void *pData);
    76         -void *sqlite3Fts3HashFind(const fts3Hash*, const void *pKey, int nKey);
    77         -void sqlite3Fts3HashClear(fts3Hash*);
           74  +void sqlite3Fts3HashInit(Fts3Hash*, int keytype, int copyKey);
           75  +void *sqlite3Fts3HashInsert(Fts3Hash*, const void *pKey, int nKey, void *pData);
           76  +void *sqlite3Fts3HashFind(const Fts3Hash*, const void *pKey, int nKey);
           77  +void sqlite3Fts3HashClear(Fts3Hash*);
    78     78   
    79     79   /*
    80     80   ** Shorthand for the functions above
    81     81   */
    82     82   #define fts3HashInit   sqlite3Fts3HashInit
    83     83   #define fts3HashInsert sqlite3Fts3HashInsert
    84     84   #define fts3HashFind   sqlite3Fts3HashFind
    85     85   #define fts3HashClear  sqlite3Fts3HashClear
    86     86   
    87     87   /*
    88     88   ** Macros for looping over all elements of a hash table.  The idiom is
    89     89   ** like this:
    90     90   **
    91         -**   fts3Hash h;
    92         -**   fts3HashElem *p;
           91  +**   Fts3Hash h;
           92  +**   Fts3HashElem *p;
    93     93   **   ...
    94     94   **   for(p=fts3HashFirst(&h); p; p=fts3HashNext(p)){
    95     95   **     SomeStructure *pData = fts3HashData(p);
    96     96   **     // do something with pData
    97     97   **   }
    98     98   */
    99     99   #define fts3HashFirst(H)  ((H)->first)

Added ext/fts3/fts3_snippet.c.

            1  +/*
            2  +** 2009 Oct 23
            3  +**
            4  +** The author disclaims copyright to this source code.  In place of
            5  +** a legal notice, here is a blessing:
            6  +**
            7  +**    May you do good and not evil.
            8  +**    May you find forgiveness for yourself and forgive others.
            9  +**    May you share freely, never taking more than you give.
           10  +**
           11  +******************************************************************************
           12  +*/
           13  +
           14  +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
           15  +
           16  +#include "fts3Int.h"
           17  +#include <string.h>
           18  +#include <assert.h>
           19  +#include <ctype.h>
           20  +
           21  +typedef struct Snippet Snippet;
           22  +
           23  +/*
           24  +** An instance of the following structure keeps track of generated
           25  +** matching-word offset information and snippets.
           26  +*/
           27  +struct Snippet {
           28  +  int nMatch;                     /* Total number of matches */
           29  +  int nAlloc;                     /* Space allocated for aMatch[] */
           30  +  struct snippetMatch {  /* One entry for each matching term */
           31  +    char snStatus;       /* Status flag for use while constructing snippets */
           32  +    short int iCol;      /* The column that contains the match */
           33  +    short int iTerm;     /* The index in Query.pTerms[] of the matching term */
           34  +    int iToken;          /* The index of the matching document token */
           35  +    short int nByte;     /* Number of bytes in the term */
           36  +    int iStart;          /* The offset to the first character of the term */
           37  +  } *aMatch;                      /* Points to space obtained from malloc */
           38  +  char *zOffset;                  /* Text rendering of aMatch[] */
           39  +  int nOffset;                    /* strlen(zOffset) */
           40  +  char *zSnippet;                 /* Snippet text */
           41  +  int nSnippet;                   /* strlen(zSnippet) */
           42  +};
           43  +
           44  +
           45  +/* It is not safe to call isspace(), tolower(), or isalnum() on
           46  +** hi-bit-set characters.  This is the same solution used in the
           47  +** tokenizer.
           48  +*/
           49  +/* TODO(shess) The snippet-generation code should be using the
           50  +** tokenizer-generated tokens rather than doing its own local
           51  +** tokenization.
           52  +*/
           53  +/* TODO(shess) Is __isascii() a portable version of (c&0x80)==0? */
           54  +static int safe_isspace(char c){
           55  +  return (c&0x80)==0 ? isspace(c) : 0;
           56  +}
           57  +static int safe_isalnum(char c){
           58  +  return (c&0x80)==0 ? isalnum(c) : 0;
           59  +}
           60  +
           61  +/*******************************************************************/
           62  +/* DataBuffer is used to collect data into a buffer in piecemeal
           63  +** fashion.  It implements the usual distinction between amount of
           64  +** data currently stored (nData) and buffer capacity (nCapacity).
           65  +**
           66  +** dataBufferInit - create a buffer with given initial capacity.
           67  +** dataBufferReset - forget buffer's data, retaining capacity.
           68  +** dataBufferSwap - swap contents of two buffers.
           69  +** dataBufferExpand - expand capacity without adding data.
           70  +** dataBufferAppend - append data.
           71  +** dataBufferAppend2 - append two pieces of data at once.
           72  +** dataBufferReplace - replace buffer's data.
           73  +*/
           74  +typedef struct DataBuffer {
           75  +  char *pData;          /* Pointer to malloc'ed buffer. */
           76  +  int nCapacity;        /* Size of pData buffer. */
           77  +  int nData;            /* End of data loaded into pData. */
           78  +} DataBuffer;
           79  +
           80  +static void dataBufferInit(DataBuffer *pBuffer, int nCapacity){
           81  +  assert( nCapacity>=0 );
           82  +  pBuffer->nData = 0;
           83  +  pBuffer->nCapacity = nCapacity;
           84  +  pBuffer->pData = nCapacity==0 ? NULL : sqlite3_malloc(nCapacity);
           85  +}
           86  +static void dataBufferReset(DataBuffer *pBuffer){
           87  +  pBuffer->nData = 0;
           88  +}
           89  +static void dataBufferExpand(DataBuffer *pBuffer, int nAddCapacity){
           90  +  assert( nAddCapacity>0 );
           91  +  /* TODO(shess) Consider expanding more aggressively.  Note that the
           92  +  ** underlying malloc implementation may take care of such things for
           93  +  ** us already.
           94  +  */
           95  +  if( pBuffer->nData+nAddCapacity>pBuffer->nCapacity ){
           96  +    pBuffer->nCapacity = pBuffer->nData+nAddCapacity;
           97  +    pBuffer->pData = sqlite3_realloc(pBuffer->pData, pBuffer->nCapacity);
           98  +  }
           99  +}
          100  +static void dataBufferAppend(DataBuffer *pBuffer,
          101  +                             const char *pSource, int nSource){
          102  +  assert( nSource>0 && pSource!=NULL );
          103  +  dataBufferExpand(pBuffer, nSource);
          104  +  memcpy(pBuffer->pData+pBuffer->nData, pSource, nSource);
          105  +  pBuffer->nData += nSource;
          106  +}
          107  +static void dataBufferAppend2(DataBuffer *pBuffer,
          108  +                              const char *pSource1, int nSource1,
          109  +                              const char *pSource2, int nSource2){
          110  +  assert( nSource1>0 && pSource1!=NULL );
          111  +  assert( nSource2>0 && pSource2!=NULL );
          112  +  dataBufferExpand(pBuffer, nSource1+nSource2);
          113  +  memcpy(pBuffer->pData+pBuffer->nData, pSource1, nSource1);
          114  +  memcpy(pBuffer->pData+pBuffer->nData+nSource1, pSource2, nSource2);
          115  +  pBuffer->nData += nSource1+nSource2;
          116  +}
          117  +static void dataBufferReplace(DataBuffer *pBuffer,
          118  +                              const char *pSource, int nSource){
          119  +  dataBufferReset(pBuffer);
          120  +  dataBufferAppend(pBuffer, pSource, nSource);
          121  +}
          122  +
          123  +
          124  +/* StringBuffer is a null-terminated version of DataBuffer. */
          125  +typedef struct StringBuffer {
          126  +  DataBuffer b;            /* Includes null terminator. */
          127  +} StringBuffer;
          128  +
          129  +static void initStringBuffer(StringBuffer *sb){
          130  +  dataBufferInit(&sb->b, 100);
          131  +  dataBufferReplace(&sb->b, "", 1);
          132  +}
          133  +static int stringBufferLength(StringBuffer *sb){
          134  +  return sb->b.nData-1;
          135  +}
          136  +static char *stringBufferData(StringBuffer *sb){
          137  +  return sb->b.pData;
          138  +}
          139  +
          140  +static void nappend(StringBuffer *sb, const char *zFrom, int nFrom){
          141  +  assert( sb->b.nData>0 );
          142  +  if( nFrom>0 ){
          143  +    sb->b.nData--;
          144  +    dataBufferAppend2(&sb->b, zFrom, nFrom, "", 1);
          145  +  }
          146  +}
          147  +static void append(StringBuffer *sb, const char *zFrom){
          148  +  nappend(sb, zFrom, strlen(zFrom));
          149  +}
          150  +
          151  +static int endsInWhiteSpace(StringBuffer *p){
          152  +  return stringBufferLength(p)>0 &&
          153  +    safe_isspace(stringBufferData(p)[stringBufferLength(p)-1]);
          154  +}
          155  +
          156  +/* If the StringBuffer ends in something other than white space, add a
          157  +** single space character to the end.
          158  +*/
          159  +static void appendWhiteSpace(StringBuffer *p){
          160  +  if( stringBufferLength(p)==0 ) return;
          161  +  if( !endsInWhiteSpace(p) ) append(p, " ");
          162  +}
          163  +
          164  +/* Remove white space from the end of the StringBuffer */
          165  +static void trimWhiteSpace(StringBuffer *p){
          166  +  while( endsInWhiteSpace(p) ){
          167  +    p->b.pData[--p->b.nData-1] = '\0';
          168  +  }
          169  +}
          170  +
          171  +
          172  +/* 
          173  +** Release all memory associated with the Snippet structure passed as
          174  +** an argument.
          175  +*/
          176  +static void fts3SnippetFree(Snippet *p){
          177  +  sqlite3_free(p->aMatch);
          178  +  sqlite3_free(p->zOffset);
          179  +  sqlite3_free(p->zSnippet);
          180  +  sqlite3_free(p);
          181  +}
          182  +
          183  +/*
          184  +** Append a single entry to the p->aMatch[] log.
          185  +*/
          186  +static void snippetAppendMatch(
          187  +  Snippet *p,               /* Append the entry to this snippet */
          188  +  int iCol, int iTerm,      /* The column and query term */
          189  +  int iToken,               /* Matching token in document */
          190  +  int iStart, int nByte     /* Offset and size of the match */
          191  +){
          192  +  int i;
          193  +  struct snippetMatch *pMatch;
          194  +  if( p->nMatch+1>=p->nAlloc ){
          195  +    p->nAlloc = p->nAlloc*2 + 10;
          196  +    p->aMatch = sqlite3_realloc(p->aMatch, p->nAlloc*sizeof(p->aMatch[0]) );
          197  +    if( p->aMatch==0 ){
          198  +      p->nMatch = 0;
          199  +      p->nAlloc = 0;
          200  +      return;
          201  +    }
          202  +  }
          203  +  i = p->nMatch++;
          204  +  pMatch = &p->aMatch[i];
          205  +  pMatch->iCol = iCol;
          206  +  pMatch->iTerm = iTerm;
          207  +  pMatch->iToken = iToken;
          208  +  pMatch->iStart = iStart;
          209  +  pMatch->nByte = nByte;
          210  +}
          211  +
          212  +/*
          213  +** Sizing information for the circular buffer used in snippetOffsetsOfColumn()
          214  +*/
          215  +#define FTS3_ROTOR_SZ   (32)
          216  +#define FTS3_ROTOR_MASK (FTS3_ROTOR_SZ-1)
          217  +
          218  +/*
          219  +** Function to iterate through the tokens of a compiled expression.
          220  +**
          221  +** Except, skip all tokens on the right-hand side of a NOT operator.
          222  +** This function is used to find tokens as part of snippet and offset
          223  +** generation and we do nt want snippets and offsets to report matches
          224  +** for tokens on the RHS of a NOT.
          225  +*/
          226  +static int fts3NextExprToken(Fts3Expr **ppExpr, int *piToken){
          227  +  Fts3Expr *p = *ppExpr;
          228  +  int iToken = *piToken;
          229  +  if( iToken<0 ){
          230  +    /* In this case the expression p is the root of an expression tree.
          231  +    ** Move to the first token in the expression tree.
          232  +    */
          233  +    while( p->pLeft ){
          234  +      p = p->pLeft;
          235  +    }
          236  +    iToken = 0;
          237  +  }else{
          238  +    assert(p && p->eType==FTSQUERY_PHRASE );
          239  +    if( iToken<(p->pPhrase->nToken-1) ){
          240  +      iToken++;
          241  +    }else{
          242  +      iToken = 0;
          243  +      while( p->pParent && p->pParent->pLeft!=p ){
          244  +        assert( p->pParent->pRight==p );
          245  +        p = p->pParent;
          246  +      }
          247  +      p = p->pParent;
          248  +      if( p ){
          249  +        assert( p->pRight!=0 );
          250  +        p = p->pRight;
          251  +        while( p->pLeft ){
          252  +          p = p->pLeft;
          253  +        }
          254  +      }
          255  +    }
          256  +  }
          257  +
          258  +  *ppExpr = p;
          259  +  *piToken = iToken;
          260  +  return p?1:0;
          261  +}
          262  +
          263  +/*
          264  +** Return TRUE if the expression node pExpr is located beneath the
          265  +** RHS of a NOT operator.
          266  +*/
          267  +static int fts3ExprBeneathNot(Fts3Expr *p){
          268  +  Fts3Expr *pParent;
          269  +  while( p ){
          270  +    pParent = p->pParent;
          271  +    if( pParent && pParent->eType==FTSQUERY_NOT && pParent->pRight==p ){
          272  +      return 1;
          273  +    }
          274  +    p = pParent;
          275  +  }
          276  +  return 0;
          277  +}
          278  +
          279  +/*
          280  +** Add entries to pSnippet->aMatch[] for every match that occurs against
          281  +** document zDoc[0..nDoc-1] which is stored in column iColumn.
          282  +*/
          283  +static void snippetOffsetsOfColumn(
          284  +  Fts3Cursor *pCur,         /* The fulltest search cursor */
          285  +  Snippet *pSnippet,             /* The Snippet object to be filled in */
          286  +  int iColumn,                   /* Index of fulltext table column */
          287  +  const char *zDoc,              /* Text of the fulltext table column */
          288  +  int nDoc                       /* Length of zDoc in bytes */
          289  +){
          290  +  const sqlite3_tokenizer_module *pTModule;  /* The tokenizer module */
          291  +  sqlite3_tokenizer *pTokenizer;             /* The specific tokenizer */
          292  +  sqlite3_tokenizer_cursor *pTCursor;        /* Tokenizer cursor */
          293  +  Fts3Table *pVtab;                /* The full text index */
          294  +  int nColumn;                         /* Number of columns in the index */
          295  +  int i, j;                            /* Loop counters */
          296  +  int rc;                              /* Return code */
          297  +  unsigned int match, prevMatch;       /* Phrase search bitmasks */
          298  +  const char *zToken;                  /* Next token from the tokenizer */
          299  +  int nToken;                          /* Size of zToken */
          300  +  int iBegin, iEnd, iPos;              /* Offsets of beginning and end */
          301  +
          302  +  /* The following variables keep a circular buffer of the last
          303  +  ** few tokens */
          304  +  unsigned int iRotor = 0;             /* Index of current token */
          305  +  int iRotorBegin[FTS3_ROTOR_SZ];      /* Beginning offset of token */
          306  +  int iRotorLen[FTS3_ROTOR_SZ];        /* Length of token */
          307  +
          308  +  pVtab =  (Fts3Table *)pCur->base.pVtab;
          309  +  nColumn = pVtab->nColumn;
          310  +  pTokenizer = pVtab->pTokenizer;
          311  +  pTModule = pTokenizer->pModule;
          312  +  rc = pTModule->xOpen(pTokenizer, zDoc, nDoc, &pTCursor);
          313  +  if( rc ) return;
          314  +  pTCursor->pTokenizer = pTokenizer;
          315  +
          316  +  prevMatch = 0;
          317  +  while( !pTModule->xNext(pTCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos) ){
          318  +    Fts3Expr *pIter = pCur->pExpr;
          319  +    int iIter = -1;
          320  +    iRotorBegin[iRotor&FTS3_ROTOR_MASK] = iBegin;
          321  +    iRotorLen[iRotor&FTS3_ROTOR_MASK] = iEnd-iBegin;
          322  +    match = 0;
          323  +    for(i=0; i<(FTS3_ROTOR_SZ-1) && fts3NextExprToken(&pIter, &iIter); i++){
          324  +      int nPhrase;                    /* Number of tokens in current phrase */
          325  +      struct PhraseToken *pToken;     /* Current token */
          326  +      int iCol;                       /* Column index */
          327  +
          328  +      if( fts3ExprBeneathNot(pIter) ) continue;
          329  +      nPhrase = pIter->pPhrase->nToken;
          330  +      pToken = &pIter->pPhrase->aToken[iIter];
          331  +      iCol = pIter->pPhrase->iColumn;
          332  +      if( iCol>=0 && iCol<nColumn && iCol!=iColumn ) continue;
          333  +      if( pToken->n>nToken ) continue;
          334  +      if( !pToken->isPrefix && pToken->n<nToken ) continue;
          335  +      assert( pToken->n<=nToken );
          336  +      if( memcmp(pToken->z, zToken, pToken->n) ) continue;
          337  +      if( iIter>0 && (prevMatch & (1<<i))==0 ) continue;
          338  +      match |= 1<<i;
          339  +      if( i==(FTS3_ROTOR_SZ-2) || nPhrase==iIter+1 ){
          340  +        for(j=nPhrase-1; j>=0; j--){
          341  +          int k = (iRotor-j) & FTS3_ROTOR_MASK;
          342  +          snippetAppendMatch(pSnippet, iColumn, i-j, iPos-j,
          343  +                iRotorBegin[k], iRotorLen[k]);
          344  +        }
          345  +      }
          346  +    }
          347  +    prevMatch = match<<1;
          348  +    iRotor++;
          349  +  }
          350  +  pTModule->xClose(pTCursor);  
          351  +}
          352  +
          353  +/*
          354  +** Remove entries from the pSnippet structure to account for the NEAR
          355  +** operator. When this is called, pSnippet contains the list of token 
          356  +** offsets produced by treating all NEAR operators as AND operators.
          357  +** This function removes any entries that should not be present after
          358  +** accounting for the NEAR restriction. For example, if the queried
          359  +** document is:
          360  +**
          361  +**     "A B C D E A"
          362  +**
          363  +** and the query is:
          364  +** 
          365  +**     A NEAR/0 E
          366  +**
          367  +** then when this function is called the Snippet contains token offsets
          368  +** 0, 4 and 5. This function removes the "0" entry (because the first A
          369  +** is not near enough to an E).
          370  +**
          371  +** When this function is called, the value pointed to by parameter piLeft is
          372  +** the integer id of the left-most token in the expression tree headed by
          373  +** pExpr. This function increments *piLeft by the total number of tokens
          374  +** in the expression tree headed by pExpr.
          375  +**
          376  +** Return 1 if any trimming occurs.  Return 0 if no trimming is required.
          377  +*/
          378  +static int trimSnippetOffsets(
          379  +  Fts3Expr *pExpr,      /* The search expression */
          380  +  Snippet *pSnippet,    /* The set of snippet offsets to be trimmed */
          381  +  int *piLeft           /* Index of left-most token in pExpr */
          382  +){
          383  +  if( pExpr ){
          384  +    if( trimSnippetOffsets(pExpr->pLeft, pSnippet, piLeft) ){
          385  +      return 1;
          386  +    }
          387  +
          388  +    switch( pExpr->eType ){
          389  +      case FTSQUERY_PHRASE:
          390  +        *piLeft += pExpr->pPhrase->nToken;
          391  +        break;
          392  +      case FTSQUERY_NEAR: {
          393  +        /* The right-hand-side of a NEAR operator is always a phrase. The
          394  +        ** left-hand-side is either a phrase or an expression tree that is 
          395  +        ** itself headed by a NEAR operator. The following initializations
          396  +        ** set local variable iLeft to the token number of the left-most
          397  +        ** token in the right-hand phrase, and iRight to the right most
          398  +        ** token in the same phrase. For example, if we had:
          399  +        **
          400  +        **     <col> MATCH '"abc def" NEAR/2 "ghi jkl"'
          401  +        **
          402  +        ** then iLeft will be set to 2 (token number of ghi) and nToken will
          403  +        ** be set to 4.
          404  +        */
          405  +        Fts3Expr *pLeft = pExpr->pLeft;
          406  +        Fts3Expr *pRight = pExpr->pRight;
          407  +        int iLeft = *piLeft;
          408  +        int nNear = pExpr->nNear;
          409  +        int nToken = pRight->pPhrase->nToken;
          410  +        int jj, ii;
          411  +        if( pLeft->eType==FTSQUERY_NEAR ){
          412  +          pLeft = pLeft->pRight;
          413  +        }
          414  +        assert( pRight->eType==FTSQUERY_PHRASE );
          415  +        assert( pLeft->eType==FTSQUERY_PHRASE );
          416  +        nToken += pLeft->pPhrase->nToken;
          417  +
          418  +        for(ii=0; ii<pSnippet->nMatch; ii++){
          419  +          struct snippetMatch *p = &pSnippet->aMatch[ii];
          420  +          if( p->iTerm==iLeft ){
          421  +            int isOk = 0;
          422  +            /* Snippet ii is an occurence of query term iLeft in the document.
          423  +            ** It occurs at position (p->iToken) of the document. We now
          424  +            ** search for an instance of token (iLeft-1) somewhere in the 
          425  +            ** range (p->iToken - nNear)...(p->iToken + nNear + nToken) within 
          426  +            ** the set of snippetMatch structures. If one is found, proceed. 
          427  +            ** If one cannot be found, then remove snippets ii..(ii+N-1) 
          428  +            ** from the matching snippets, where N is the number of tokens 
          429  +            ** in phrase pRight->pPhrase.
          430  +            */
          431  +            for(jj=0; isOk==0 && jj<pSnippet->nMatch; jj++){
          432  +              struct snippetMatch *p2 = &pSnippet->aMatch[jj];
          433  +              if( p2->iTerm==(iLeft-1) ){
          434  +                if( p2->iToken>=(p->iToken-nNear-1) 
          435  +                 && p2->iToken<(p->iToken+nNear+nToken) 
          436  +                ){
          437  +                  isOk = 1;
          438  +                }
          439  +              }
          440  +            }
          441  +            if( !isOk ){
          442  +              int kk;
          443  +              for(kk=0; kk<pRight->pPhrase->nToken; kk++){
          444  +                pSnippet->aMatch[kk+ii].iTerm = -2;
          445  +              }
          446  +              return 1;
          447  +            }
          448  +          }
          449  +          if( p->iTerm==(iLeft-1) ){
          450  +            int isOk = 0;
          451  +            for(jj=0; isOk==0 && jj<pSnippet->nMatch; jj++){
          452  +              struct snippetMatch *p2 = &pSnippet->aMatch[jj];
          453  +              if( p2->iTerm==iLeft ){
          454  +                if( p2->iToken<=(p->iToken+nNear+1) 
          455  +                 && p2->iToken>(p->iToken-nNear-nToken) 
          456  +                ){
          457  +                  isOk = 1;
          458  +                }
          459  +              }
          460  +            }
          461  +            if( !isOk ){
          462  +              int kk;
          463  +              for(kk=0; kk<pLeft->pPhrase->nToken; kk++){
          464  +                pSnippet->aMatch[ii-kk].iTerm = -2;
          465  +              }
          466  +              return 1;
          467  +            }
          468  +          }
          469  +        }
          470  +        break;
          471  +      }
          472  +    }
          473  +
          474  +    if( trimSnippetOffsets(pExpr->pRight, pSnippet, piLeft) ){
          475  +      return 1;
          476  +    }
          477  +  }
          478  +  return 0;
          479  +}
          480  +
          481  +/*
          482  +** Compute all offsets for the current row of the query.  
          483  +** If the offsets have already been computed, this routine is a no-op.
          484  +*/
          485  +static int snippetAllOffsets(Fts3Cursor *pCsr, Snippet **ppSnippet){
          486  +  Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;
          487  +  int nColumn;
          488  +  int iColumn, i;
          489  +  int iFirst, iLast;
          490  +  int iTerm = 0;
          491  +  Snippet *pSnippet;
          492  +
          493  +  if( pCsr->pExpr==0 ){
          494  +    return SQLITE_OK;
          495  +  }
          496  +
          497  +  pSnippet = (Snippet *)sqlite3_malloc(sizeof(Snippet));
          498  +  *ppSnippet = pSnippet;
          499  +  if( !pSnippet ){
          500  +    return SQLITE_NOMEM;
          501  +  }
          502  +  memset(pSnippet, 0, sizeof(Snippet));
          503  +
          504  +  nColumn = p->nColumn;
          505  +  iColumn = (pCsr->eType - 2);
          506  +  if( iColumn<0 || iColumn>=nColumn ){
          507  +    /* Look for matches over all columns of the full-text index */
          508  +    iFirst = 0;
          509  +    iLast = nColumn-1;
          510  +  }else{
          511  +    /* Look for matches in the iColumn-th column of the index only */
          512  +    iFirst = iColumn;
          513  +    iLast = iColumn;
          514  +  }
          515  +  for(i=iFirst; i<=iLast; i++){
          516  +    const char *zDoc;
          517  +    int nDoc;
          518  +    zDoc = (const char*)sqlite3_column_text(pCsr->pStmt, i+1);
          519  +    nDoc = sqlite3_column_bytes(pCsr->pStmt, i+1);
          520  +    snippetOffsetsOfColumn(pCsr, pSnippet, i, zDoc, nDoc);
          521  +  }
          522  +
          523  +  while( trimSnippetOffsets(pCsr->pExpr, pSnippet, &iTerm) ){
          524  +    iTerm = 0;
          525  +  }
          526  +
          527  +  return SQLITE_OK;
          528  +}
          529  +
          530  +/*
          531  +** Convert the information in the aMatch[] array of the snippet
          532  +** into the string zOffset[0..nOffset-1]. This string is used as
          533  +** the return of the SQL offsets() function.
          534  +*/
          535  +static void snippetOffsetText(Snippet *p){
          536  +  int i;
          537  +  int cnt = 0;
          538  +  StringBuffer sb;
          539  +  char zBuf[200];
          540  +  if( p->zOffset ) return;
          541  +  initStringBuffer(&sb);
          542  +  for(i=0; i<p->nMatch; i++){
          543  +    struct snippetMatch *pMatch = &p->aMatch[i];
          544  +    if( pMatch->iTerm>=0 ){
          545  +      /* If snippetMatch.iTerm is less than 0, then the match was 
          546  +      ** discarded as part of processing the NEAR operator (see the 
          547  +      ** trimSnippetOffsetsForNear() function for details). Ignore 
          548  +      ** it in this case
          549  +      */
          550  +      zBuf[0] = ' ';
          551  +      sqlite3_snprintf(sizeof(zBuf)-1, &zBuf[cnt>0], "%d %d %d %d",
          552  +          pMatch->iCol, pMatch->iTerm, pMatch->iStart, pMatch->nByte);
          553  +      append(&sb, zBuf);
          554  +      cnt++;
          555  +    }
          556  +  }
          557  +  p->zOffset = stringBufferData(&sb);
          558  +  p->nOffset = stringBufferLength(&sb);
          559  +}
          560  +
          561  +/*
          562  +** zDoc[0..nDoc-1] is phrase of text.  aMatch[0..nMatch-1] are a set
          563  +** of matching words some of which might be in zDoc.  zDoc is column
          564  +** number iCol.
          565  +**
          566  +** iBreak is suggested spot in zDoc where we could begin or end an
          567  +** excerpt.  Return a value similar to iBreak but possibly adjusted
          568  +** to be a little left or right so that the break point is better.
          569  +*/
          570  +static int wordBoundary(
          571  +  int iBreak,                   /* The suggested break point */
          572  +  const char *zDoc,             /* Document text */
          573  +  int nDoc,                     /* Number of bytes in zDoc[] */
          574  +  struct snippetMatch *aMatch,  /* Matching words */
          575  +  int nMatch,                   /* Number of entries in aMatch[] */
          576  +  int iCol                      /* The column number for zDoc[] */
          577  +){
          578  +  int i;
          579  +  if( iBreak<=10 ){
          580  +    return 0;
          581  +  }
          582  +  if( iBreak>=nDoc-10 ){
          583  +    return nDoc;
          584  +  }
          585  +  for(i=0; i<nMatch && aMatch[i].iCol<iCol; i++){}
          586  +  while( i<nMatch && aMatch[i].iStart+aMatch[i].nByte<iBreak ){ i++; }
          587  +  if( i<nMatch ){
          588  +    if( aMatch[i].iStart<iBreak+10 ){
          589  +      return aMatch[i].iStart;
          590  +    }
          591  +    if( i>0 && aMatch[i-1].iStart+aMatch[i-1].nByte>=iBreak ){
          592  +      return aMatch[i-1].iStart;
          593  +    }
          594  +  }
          595  +  for(i=1; i<=10; i++){
          596  +    if( safe_isspace(zDoc[iBreak-i]) ){
          597  +      return iBreak - i + 1;
          598  +    }
          599  +    if( safe_isspace(zDoc[iBreak+i]) ){
          600  +      return iBreak + i + 1;
          601  +    }
          602  +  }
          603  +  return iBreak;
          604  +}
          605  +
          606  +
          607  +
          608  +/*
          609  +** Allowed values for Snippet.aMatch[].snStatus
          610  +*/
          611  +#define SNIPPET_IGNORE  0   /* It is ok to omit this match from the snippet */
          612  +#define SNIPPET_DESIRED 1   /* We want to include this match in the snippet */
          613  +
          614  +/*
          615  +** Generate the text of a snippet.
          616  +*/
          617  +static void snippetText(
          618  +  Fts3Cursor *pCursor,   /* The cursor we need the snippet for */
          619  +  Snippet *pSnippet,
          620  +  const char *zStartMark,     /* Markup to appear before each match */
          621  +  const char *zEndMark,       /* Markup to appear after each match */
          622  +  const char *zEllipsis       /* Ellipsis mark */
          623  +){
          624  +  int i, j;
          625  +  struct snippetMatch *aMatch;
          626  +  int nMatch;
          627  +  int nDesired;
          628  +  StringBuffer sb;
          629  +  int tailCol;
          630  +  int tailOffset;
          631  +  int iCol;
          632  +  int nDoc;
          633  +  const char *zDoc;
          634  +  int iStart, iEnd;
          635  +  int tailEllipsis = 0;
          636  +  int iMatch;
          637  +  
          638  +
          639  +  sqlite3_free(pSnippet->zSnippet);
          640  +  pSnippet->zSnippet = 0;
          641  +  aMatch = pSnippet->aMatch;
          642  +  nMatch = pSnippet->nMatch;
          643  +  initStringBuffer(&sb);
          644  +
          645  +  for(i=0; i<nMatch; i++){
          646  +    aMatch[i].snStatus = SNIPPET_IGNORE;
          647  +  }
          648  +  nDesired = 0;
          649  +  for(i=0; i<FTS3_ROTOR_SZ; i++){
          650  +    for(j=0; j<nMatch; j++){
          651  +      if( aMatch[j].iTerm==i ){
          652  +        aMatch[j].snStatus = SNIPPET_DESIRED;
          653  +        nDesired++;
          654  +        break;
          655  +      }
          656  +    }
          657  +  }
          658  +
          659  +  iMatch = 0;
          660  +  tailCol = -1;
          661  +  tailOffset = 0;
          662  +  for(i=0; i<nMatch && nDesired>0; i++){
          663  +    if( aMatch[i].snStatus!=SNIPPET_DESIRED ) continue;
          664  +    nDesired--;
          665  +    iCol = aMatch[i].iCol;
          666  +    zDoc = (const char*)sqlite3_column_text(pCursor->pStmt, iCol+1);
          667  +    nDoc = sqlite3_column_bytes(pCursor->pStmt, iCol+1);
          668  +    iStart = aMatch[i].iStart - 40;
          669  +    iStart = wordBoundary(iStart, zDoc, nDoc, aMatch, nMatch, iCol);
          670  +    if( iStart<=10 ){
          671  +      iStart = 0;
          672  +    }
          673  +    if( iCol==tailCol && iStart<=tailOffset+20 ){
          674  +      iStart = tailOffset;
          675  +    }
          676  +    if( (iCol!=tailCol && tailCol>=0) || iStart!=tailOffset ){
          677  +      trimWhiteSpace(&sb);
          678  +      appendWhiteSpace(&sb);
          679  +      append(&sb, zEllipsis);
          680  +      appendWhiteSpace(&sb);
          681  +    }
          682  +    iEnd = aMatch[i].iStart + aMatch[i].nByte + 40;
          683  +    iEnd = wordBoundary(iEnd, zDoc, nDoc, aMatch, nMatch, iCol);
          684  +    if( iEnd>=nDoc-10 ){
          685  +      iEnd = nDoc;
          686  +      tailEllipsis = 0;
          687  +    }else{
          688  +      tailEllipsis = 1;
          689  +    }
          690  +    while( iMatch<nMatch && aMatch[iMatch].iCol<iCol ){ iMatch++; }
          691  +    while( iStart<iEnd ){
          692  +      while( iMatch<nMatch && aMatch[iMatch].iStart<iStart
          693  +             && aMatch[iMatch].iCol<=iCol ){
          694  +        iMatch++;
          695  +      }
          696  +      if( iMatch<nMatch && aMatch[iMatch].iStart<iEnd
          697  +             && aMatch[iMatch].iCol==iCol ){
          698  +        nappend(&sb, &zDoc[iStart], aMatch[iMatch].iStart - iStart);
          699  +        iStart = aMatch[iMatch].iStart;
          700  +        append(&sb, zStartMark);
          701  +        nappend(&sb, &zDoc[iStart], aMatch[iMatch].nByte);
          702  +        append(&sb, zEndMark);
          703  +        iStart += aMatch[iMatch].nByte;
          704  +        for(j=iMatch+1; j<nMatch; j++){
          705  +          if( aMatch[j].iTerm==aMatch[iMatch].iTerm
          706  +              && aMatch[j].snStatus==SNIPPET_DESIRED ){
          707  +            nDesired--;
          708  +            aMatch[j].snStatus = SNIPPET_IGNORE;
          709  +          }
          710  +        }
          711  +      }else{
          712  +        nappend(&sb, &zDoc[iStart], iEnd - iStart);
          713  +        iStart = iEnd;
          714  +      }
          715  +    }
          716  +    tailCol = iCol;
          717  +    tailOffset = iEnd;
          718  +  }
          719  +  trimWhiteSpace(&sb);
          720  +  if( tailEllipsis ){
          721  +    appendWhiteSpace(&sb);
          722  +    append(&sb, zEllipsis);
          723  +  }
          724  +  pSnippet->zSnippet = stringBufferData(&sb);
          725  +  pSnippet->nSnippet = stringBufferLength(&sb);
          726  +}
          727  +
          728  +void sqlite3Fts3Offsets(
          729  +  sqlite3_context *pCtx,          /* SQLite function call context */
          730  +  Fts3Cursor *pCsr                /* Cursor object */
          731  +){
          732  +  Snippet *p;                     /* Snippet structure */
          733  +  int rc = snippetAllOffsets(pCsr, &p);
          734  +  snippetOffsetText(p);
          735  +  sqlite3_result_text(pCtx, p->zOffset, p->nOffset, SQLITE_TRANSIENT);
          736  +  fts3SnippetFree(p);
          737  +}
          738  +
          739  +void sqlite3Fts3Snippet(
          740  +  sqlite3_context *pCtx,          /* SQLite function call context */
          741  +  Fts3Cursor *pCsr,               /* Cursor object */
          742  +  const char *zStart,             /* Snippet start text - "<b>" */
          743  +  const char *zEnd,               /* Snippet end text - "</b>" */
          744  +  const char *zEllipsis           /* Snippet ellipsis text - "<b>...</b>" */
          745  +){
          746  +  Snippet *p;                     /* Snippet structure */
          747  +  int rc = snippetAllOffsets(pCsr, &p);
          748  +  snippetText(pCsr, p, zStart, zEnd, zEllipsis);
          749  +  sqlite3_result_text(pCtx, p->zSnippet, p->nSnippet, SQLITE_TRANSIENT);
          750  +  fts3SnippetFree(p);
          751  +}
          752  +
          753  +#endif

Changes to ext/fts3/fts3_tokenizer.c.

    26     26   #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
    27     27   
    28     28   #include "sqlite3ext.h"
    29     29   #ifndef SQLITE_CORE
    30     30     SQLITE_EXTENSION_INIT1
    31     31   #endif
    32     32   
    33         -#include "fts3_hash.h"
    34         -#include "fts3_tokenizer.h"
           33  +#include "fts3Int.h"
    35     34   #include <assert.h>
           35  +#include <ctype.h>
           36  +#include <string.h>
    36     37   
    37     38   /*
    38     39   ** Implementation of the SQL scalar function for accessing the underlying 
    39     40   ** hash table. This function may be called as follows:
    40     41   **
    41     42   **   SELECT <function-name>(<key-name>);
    42     43   **   SELECT <function-name>(<key-name>, <pointer>);
................................................................................
    55     56   ** to string <key-name> (after the hash-table is updated, if applicable).
    56     57   */
    57     58   static void scalarFunc(
    58     59     sqlite3_context *context,
    59     60     int argc,
    60     61     sqlite3_value **argv
    61     62   ){
    62         -  fts3Hash *pHash;
           63  +  Fts3Hash *pHash;
    63     64     void *pPtr = 0;
    64     65     const unsigned char *zName;
    65     66     int nName;
    66     67   
    67     68     assert( argc==1 || argc==2 );
    68     69   
    69         -  pHash = (fts3Hash *)sqlite3_user_data(context);
           70  +  pHash = (Fts3Hash *)sqlite3_user_data(context);
    70     71   
    71     72     zName = sqlite3_value_text(argv[0]);
    72     73     nName = sqlite3_value_bytes(argv[0])+1;
    73     74   
    74     75     if( argc==2 ){
    75     76       void *pOld;
    76     77       int n = sqlite3_value_bytes(argv[1]);
................................................................................
    92     93         sqlite3_free(zErr);
    93     94         return;
    94     95       }
    95     96     }
    96     97   
    97     98     sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT);
    98     99   }
          100  +
          101  +static int fts3IsIdChar(char c){
          102  +  static const char isFtsIdChar[] = {
          103  +      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 0x */
          104  +      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 1x */
          105  +      0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 2x */
          106  +      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,  /* 3x */
          107  +      0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 4x */
          108  +      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,  /* 5x */
          109  +      0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 6x */
          110  +      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,  /* 7x */
          111  +  };
          112  +  return (c&0x80 || isFtsIdChar[(int)(c)]);
          113  +}
          114  +
          115  +const char *sqlite3Fts3NextToken(const char *zStr, int *pn){
          116  +  const char *z1;
          117  +  const char *z2 = 0;
          118  +
          119  +  /* Find the start of the next token. */
          120  +  z1 = zStr;
          121  +  while( z2==0 ){
          122  +    switch( *z1 ){
          123  +      case '\0': return 0;        /* No more tokens here */
          124  +      case '\'':
          125  +      case '"':
          126  +      case '`': {
          127  +        z2 = &z1[1];
          128  +        while( *z2 && (z2[0]!=*z1 || z2[1]==*z1) ) z2++;
          129  +        if( *z2 ) z2++;
          130  +        break;
          131  +      }
          132  +      case '[':
          133  +        z2 = &z1[1];
          134  +        while( *z2 && z2[0]!=']' ) z2++;
          135  +        if( *z2 ) z2++;
          136  +        break;
          137  +
          138  +      default:
          139  +        if( fts3IsIdChar(*z1) ){
          140  +          z2 = &z1[1];
          141  +          while( fts3IsIdChar(*z2) ) z2++;
          142  +        }else{
          143  +          z1++;
          144  +        }
          145  +    }
          146  +  }
          147  +
          148  +  *pn = (z2-z1);
          149  +  return z1;
          150  +}
          151  +
          152  +int sqlite3Fts3InitTokenizer(
          153  +  Fts3Hash *pHash,                /* Tokenizer hash table */
          154  +  const char *zArg,               /* Possible tokenizer specification */
          155  +  sqlite3_tokenizer **ppTok,      /* OUT: Tokenizer (if applicable) */
          156  +  const char **pzTokenizer,       /* OUT: Set to zArg if is tokenizer */
          157  +  char **pzErr                    /* OUT: Set to malloced error message */
          158  +){
          159  +  int rc;
          160  +  char *z = (char *)zArg;
          161  +  int n;
          162  +  char *zCopy;
          163  +  char *zEnd;                     /* Pointer to nul-term of zCopy */
          164  +  sqlite3_tokenizer_module *m;
          165  +
          166  +  if( !z ){
          167  +    zCopy = sqlite3_mprintf("simple");
          168  +  }else{
          169  +    while( (*z&0x80) && isspace(*z) ) z++;
          170  +    if( sqlite3_strnicmp(z, "tokenize", 8) || fts3IsIdChar(z[8])){
          171  +      return SQLITE_OK;
          172  +    }
          173  +    zCopy = sqlite3_mprintf("%s", &z[8]);
          174  +    *pzTokenizer = zArg;
          175  +  }
          176  +  if( !zCopy ){
          177  +    return SQLITE_NOMEM;
          178  +  }
          179  +
          180  +  zEnd = &zCopy[strlen(zCopy)];
          181  +
          182  +  z = (char *)sqlite3Fts3NextToken(zCopy, &n);
          183  +  z[n] = '\0';
          184  +  sqlite3Fts3Dequote(z);
          185  +
          186  +  m = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, z, strlen(z)+1);
          187  +  if( !m ){
          188  +    *pzErr = sqlite3_mprintf("unknown tokenizer: %s", z);
          189  +    rc = SQLITE_ERROR;
          190  +  }else{
          191  +    char const **aArg = 0;
          192  +    int iArg = 0;
          193  +    z = &z[n+1];
          194  +    while( z<zEnd && (z = (char *)sqlite3Fts3NextToken(z, &n)) ){
          195  +      int nNew = sizeof(char *)*(iArg+1);
          196  +      char const **aNew = (const char **)sqlite3_realloc(aArg, nNew);
          197  +      if( !aNew ){
          198  +        sqlite3_free(zCopy);
          199  +        sqlite3_free(aArg);
          200  +        return SQLITE_NOMEM;
          201  +      }
          202  +      aArg = aNew;
          203  +      aArg[iArg++] = z;
          204  +      z[n] = '\0';
          205  +      sqlite3Fts3Dequote(z);
          206  +      z = &z[n+1];
          207  +    }
          208  +    rc = m->xCreate(iArg, aArg, ppTok);
          209  +    assert( rc!=SQLITE_OK || *ppTok );
          210  +    if( rc!=SQLITE_OK ){
          211  +      *pzErr = sqlite3_mprintf("unknown tokenizer: %s", z);
          212  +    }else{
          213  +      (*ppTok)->pModule = m; 
          214  +    }
          215  +    sqlite3_free(aArg);
          216  +  }
          217  +
          218  +  sqlite3_free(zCopy);
          219  +  return rc;
          220  +}
          221  +
    99    222   
   100    223   #ifdef SQLITE_TEST
   101    224   
   102    225   #include <tcl.h>
   103    226   #include <string.h>
   104    227   
   105    228   /*
................................................................................
   129    252   **   
   130    253   */
   131    254   static void testFunc(
   132    255     sqlite3_context *context,
   133    256     int argc,
   134    257     sqlite3_value **argv
   135    258   ){
   136         -  fts3Hash *pHash;
          259  +  Fts3Hash *pHash;
   137    260     sqlite3_tokenizer_module *p;
   138    261     sqlite3_tokenizer *pTokenizer = 0;
   139    262     sqlite3_tokenizer_cursor *pCsr = 0;
   140    263   
   141    264     const char *zErr = 0;
   142    265   
   143    266     const char *zName;
................................................................................
   162    285     nInput = sqlite3_value_bytes(argv[argc-1]);
   163    286     zInput = (const char *)sqlite3_value_text(argv[argc-1]);
   164    287   
   165    288     if( argc==3 ){
   166    289       zArg = (const char *)sqlite3_value_text(argv[1]);
   167    290     }
   168    291   
   169         -  pHash = (fts3Hash *)sqlite3_user_data(context);
          292  +  pHash = (Fts3Hash *)sqlite3_user_data(context);
   170    293     p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1);
   171    294   
   172    295     if( !p ){
   173    296       char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
   174    297       sqlite3_result_error(context, zErr, -1);
   175    298       sqlite3_free(zErr);
   176    299       return;
................................................................................
   331    454   ** provide read/write access to the contents of *pHash.
   332    455   **
   333    456   ** The third argument to this function, zName, is used as the name
   334    457   ** of both the scalar and, if created, the virtual table.
   335    458   */
   336    459   int sqlite3Fts3InitHashTable(
   337    460     sqlite3 *db, 
   338         -  fts3Hash *pHash, 
          461  +  Fts3Hash *pHash, 
   339    462     const char *zName
   340    463   ){
   341    464     int rc = SQLITE_OK;
   342    465     void *p = (void *)pHash;
   343    466     const int any = SQLITE_ANY;
   344    467     char *zTest = 0;
   345    468     char *zTest2 = 0;

Added ext/fts3/fts3_write.c.

            1  +/*
            2  +** 2009 Oct 23
            3  +**
            4  +** The author disclaims copyright to this source code.  In place of
            5  +** a legal notice, here is a blessing:
            6  +**
            7  +**    May you do good and not evil.
            8  +**    May you find forgiveness for yourself and forgive others.
            9  +**    May you share freely, never taking more than you give.
           10  +**
           11  +******************************************************************************
           12  +**
           13  +** This file is part of the SQLite FTS3 extension module. Specifically,
           14  +** this file contains code to insert, update and delete rows from FTS3
           15  +** tables. It also contains code to merge FTS3 b-tree segments. Some
           16  +** of the sub-routines used to merge segments are also used by the query 
           17  +** code in fts3.c.
           18  +*/
           19  +
           20  +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
           21  +
           22  +#include "fts3Int.h"
           23  +#include <string.h>
           24  +#include <assert.h>
           25  +#include <stdlib.h>
           26  +
           27  +#define INTERIOR_MAX 2048         /* Soft limit for segment node size */
           28  +#define LEAF_MAX 2048             /* Soft limit for segment leaf size */
           29  +
           30  +typedef struct PendingList PendingList;
           31  +typedef struct SegmentNode SegmentNode;
           32  +typedef struct SegmentWriter SegmentWriter;
           33  +
           34  +/*
           35  +** Data structure used while accumulating terms in the pending-terms hash
           36  +** table. The hash table entry maps from term (a string) to a malloced
           37  +** instance of this structure.
           38  +*/
           39  +struct PendingList {
           40  +  int nData;
           41  +  char *aData;
           42  +  int nSpace;
           43  +  sqlite3_int64 iLastDocid;
           44  +  sqlite3_int64 iLastCol;
           45  +  sqlite3_int64 iLastPos;
           46  +};
           47  +
           48  +/*
           49  +** An instance of this structure is used to iterate through the terms on
           50  +** a contiguous set of segment b-tree leaf nodes. Although the details of
           51  +** this structure are only manipulated by code in this file, opaque handles
           52  +** of type Fts3SegReader* are also used by code in fts3.c to iterate through
           53  +** terms when querying the full-text index. See functions:
           54  +**
           55  +**   sqlite3Fts3SegReaderNew()
           56  +**   sqlite3Fts3SegReaderFree()
           57  +**   sqlite3Fts3SegReaderIterate()
           58  +*/
           59  +struct Fts3SegReader {
           60  +  int iIdx;                       /* Index within level */
           61  +  sqlite3_int64 iStartBlock;
           62  +  sqlite3_int64 iEndBlock;
           63  +  sqlite3_stmt *pStmt;            /* SQL Statement to access leaf nodes */
           64  +  char *aNode;                    /* Pointer to node data (or NULL) */
           65  +  int nNode;                      /* Size of buffer at aNode (or 0) */
           66  +  int nTermAlloc;                 /* Allocated size of zTerm buffer */
           67  +
           68  +  /* Variables set by fts3SegReaderNext(). These may be read directly
           69  +  ** by the caller. They are valid from the time SegmentReaderNew() returns
           70  +  ** until SegmentReaderNext() returns something other than SQLITE_OK
           71  +  ** (i.e. SQLITE_DONE).
           72  +  */
           73  +  int nTerm;                      /* Number of bytes in current term */
           74  +  char *zTerm;                    /* Pointer to current term */
           75  +  char *aDoclist;                 /* Pointer to doclist of current entry */
           76  +  int nDoclist;                   /* Size of doclist in current entry */
           77  +