/ Check-in [48c0db0e]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Changes to fts3 to avoid flushing data to disk within a SELECT statement.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:48c0db0eb2d134bb302bb5eca6beb0ec46736257
User & Date: dan 2009-12-10 16:04:26
Context
2009-12-10
18:20
Fix an OOM related problem in the snippet() and offsets() functions of fts3. check-in: 61efff41 user: dan tags: trunk
16:04
Changes to fts3 to avoid flushing data to disk within a SELECT statement. check-in: 48c0db0e user: dan tags: trunk
01:17
Only declare the sqlite3_mutex_held() and sqlite3_mutex_notheld() interfaces in the header file if NDEBUG is not defined. check-in: ee9b1c05 user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3.c.

422
423
424
425
426
427
428

429
430
431
432
433
434
435
436
437
438
439
440
441


442
443
444
445
446
447

448
449
450
451

452
453
454
455

456
457
458
459
460
461
462
463
464
....
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602








1603
1604
1605
1606
1607
1608
1609
....
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
....
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
....
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007

2008


2009
2010
2011
2012
2013
2014
2015
**
** Examples:
**
**     "abc"   becomes   abc
**     'xyz'   becomes   xyz
**     [pqr]   becomes   pqr
**     `mno`   becomes   mno

*/
void sqlite3Fts3Dequote(char *z){
  char quote;
  int i, j;

  quote = z[0];
  switch( quote ){
    case '\'':  break;
    case '"':   break;
    case '`':   break;                /* For MySQL compatibility */
    case '[':   quote = ']';  break;  /* For MS SqlServer compatibility */
    default:    return;
  }



  i = 1;
  j = 0;
  while( ALWAYS(z[i]) ){
    if( z[i]==quote ){
      if( z[i+1]==quote ){

        z[j++] = quote;
        i += 2;
      }else{
        break;

      }
    }else{
      z[j++] = z[i++];
    }

  }
  z[j] = 0;
}

static void fts3GetDeltaVarint(char **pp, sqlite3_int64 *pVal){
  sqlite3_int64 iVal;
  *pp += sqlite3Fts3GetVarint(*pp, &iVal);
  *pVal += iVal;
}
................................................................................
  int isReqPos,                   /* True to include position lists in output */
  int *pnOut,                     /* OUT: Size of buffer at *ppOut */
  char **ppOut                    /* OUT: Malloced result buffer */
){
  int i;
  TermSelect tsc;
  Fts3SegFilter filter;           /* Segment term filter configuration */
  Fts3SegReader **apSegment = 0;  /* Array of segments to read data from */
  int nSegment = 0;               /* Size of apSegment array */
  int nAlloc = 0;                 /* Allocated size of segment array */
  int rc;                         /* Return code */
  sqlite3_stmt *pStmt;            /* SQL statement to scan %_segdir table */
  int iAge = 0;                   /* Used to assign ages to segments */









  /* Loop through the entire %_segdir table. For each segment, create a
  ** Fts3SegReader to iterate through the subset of the segment leaves
  ** that may contain a term that matches zTerm/nTerm. For non-prefix
  ** searches, this is always a single leaf. For prefix searches, this
  ** may be a contiguous block of leaves.
  **
................................................................................
  char *pOut = 0;
  int nOut = 0;
  int rc = SQLITE_OK;
  int ii;
  int iCol = pPhrase->iColumn;
  int isTermPos = (pPhrase->nToken>1 || isReqPos);

  assert( p->nPendingData==0 );

  for(ii=0; ii<pPhrase->nToken; ii++){
    struct PhraseToken *pTok = &pPhrase->aToken[ii];
    char *z = pTok->z;            /* Next token of the phrase */
    int n = pTok->n;              /* Size of z in bytes */
    int isPrefix = pTok->isPrefix;/* True if token is a prefix */
    char *pList;                  /* Pointer to token doclist */
    int nList;                    /* Size of buffer at pList */
................................................................................
  }else if( idxNum!=FTS3_FULLSCAN_SEARCH ){
    int iCol = idxNum-FTS3_FULLTEXT_SEARCH;
    const char *zQuery = (const char *)sqlite3_value_text(apVal[0]);

    if( zQuery==0 && sqlite3_value_type(apVal[0])!=SQLITE_NULL ){
      return SQLITE_NOMEM;
    }
    rc = sqlite3Fts3PendingTermsFlush(p);
    if( rc!=SQLITE_OK ) return rc;

    rc = sqlite3Fts3ExprParse(p->pTokenizer, p->azColumn, p->nColumn, 
        iCol, zQuery, -1, &pCsr->pExpr
    );
    if( rc!=SQLITE_OK ) return rc;

    rc = evalFts3Expr(p, pCsr->pExpr, &pCsr->aDoclist, &pCsr->nDoclist);
................................................................................
  int rc;                         /* Return Code */
  Fts3Cursor *pCsr = (Fts3Cursor *) pCursor;
  Fts3Table *p = (Fts3Table *)pCursor->pVtab;

  /* The column value supplied by SQLite must be in range. */
  assert( iCol>=0 && iCol<=p->nColumn+1 );

  rc = fts3CursorSeek(pCsr);
  if( rc==SQLITE_OK ){
    if( iCol==p->nColumn+1 ){
      /* This call is a request for the "docid" column. Since "docid" is an 
      ** alias for "rowid", use the xRowid() method to obtain the value.
      */
      sqlite3_int64 iRowid;
      rc = fts3RowidMethod(pCursor, &iRowid);
      sqlite3_result_int64(pContext, iRowid);
    }else if( iCol==p->nColumn ){
      /* The extra column whose name is the same as the table.
      ** Return a blob which is a pointer to the cursor.
      */
      sqlite3_result_blob(pContext, &pCsr, sizeof(pCsr), SQLITE_TRANSIENT);

    }else{


      sqlite3_result_value(pContext, sqlite3_column_value(pCsr->pStmt, iCol+1));
    }
  }
  return rc;
}

/* 







>


<
|


|
|
|
<
<
<
|
>
>

<
<
|
|
<
>
|
|

<
>

<
<

>

<







 







|

|



>
>
>
>
>
>
>
>







 







<
<







 







<
<







 







<
<
|
|
|
|
|
|
|
|
|
|
|
|
>
|
>
>







422
423
424
425
426
427
428
429
430
431

432
433
434
435
436
437



438
439
440
441


442
443

444
445
446
447

448
449


450
451
452

453
454
455
456
457
458
459
....
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
....
1716
1717
1718
1719
1720
1721
1722


1723
1724
1725
1726
1727
1728
1729
....
1931
1932
1933
1934
1935
1936
1937


1938
1939
1940
1941
1942
1943
1944
....
1986
1987
1988
1989
1990
1991
1992


1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
**
** Examples:
**
**     "abc"   becomes   abc
**     'xyz'   becomes   xyz
**     [pqr]   becomes   pqr
**     `mno`   becomes   mno
**
*/
void sqlite3Fts3Dequote(char *z){

  char quote;                     /* Quote character (if any ) */

  quote = z[0];
  if( quote=='[' || quote=='\'' || quote=='"' || quote=='`' ){
    int iIn = 1;                  /* Index of next byte to read from input */
    int iOut = 0;                 /* Index of next byte to write to output */




    /* If the first byte was a '[', then the close-quote character is a ']' */
    if( quote=='[' ) quote = ']';  



    while( ALWAYS(z[iIn]) ){
      if( z[iIn]==quote ){

        if( z[iIn+1]!=quote ) break;
        z[iOut++] = quote;
        iIn += 2;
      }else{

        z[iOut++] = z[iIn++];
      }


    }
    z[iOut] = '\0';
  }

}

static void fts3GetDeltaVarint(char **pp, sqlite3_int64 *pVal){
  sqlite3_int64 iVal;
  *pp += sqlite3Fts3GetVarint(*pp, &iVal);
  *pVal += iVal;
}
................................................................................
  int isReqPos,                   /* True to include position lists in output */
  int *pnOut,                     /* OUT: Size of buffer at *ppOut */
  char **ppOut                    /* OUT: Malloced result buffer */
){
  int i;
  TermSelect tsc;
  Fts3SegFilter filter;           /* Segment term filter configuration */
  Fts3SegReader **apSegment;      /* Array of segments to read data from */
  int nSegment = 0;               /* Size of apSegment array */
  int nAlloc = 16;                /* Allocated size of segment array */
  int rc;                         /* Return code */
  sqlite3_stmt *pStmt;            /* SQL statement to scan %_segdir table */
  int iAge = 0;                   /* Used to assign ages to segments */

  apSegment = (Fts3SegReader **)sqlite3_malloc(sizeof(Fts3SegReader*)*nAlloc);
  if( !apSegment ) return SQLITE_NOMEM;
  rc = sqlite3Fts3SegReaderPending(p, zTerm, nTerm, isPrefix, &apSegment[0]);
  if( rc!=SQLITE_OK ) return rc;
  if( apSegment[0] ){
    nSegment = 1;
  }

  /* Loop through the entire %_segdir table. For each segment, create a
  ** Fts3SegReader to iterate through the subset of the segment leaves
  ** that may contain a term that matches zTerm/nTerm. For non-prefix
  ** searches, this is always a single leaf. For prefix searches, this
  ** may be a contiguous block of leaves.
  **
................................................................................
  char *pOut = 0;
  int nOut = 0;
  int rc = SQLITE_OK;
  int ii;
  int iCol = pPhrase->iColumn;
  int isTermPos = (pPhrase->nToken>1 || isReqPos);



  for(ii=0; ii<pPhrase->nToken; ii++){
    struct PhraseToken *pTok = &pPhrase->aToken[ii];
    char *z = pTok->z;            /* Next token of the phrase */
    int n = pTok->n;              /* Size of z in bytes */
    int isPrefix = pTok->isPrefix;/* True if token is a prefix */
    char *pList;                  /* Pointer to token doclist */
    int nList;                    /* Size of buffer at pList */
................................................................................
  }else if( idxNum!=FTS3_FULLSCAN_SEARCH ){
    int iCol = idxNum-FTS3_FULLTEXT_SEARCH;
    const char *zQuery = (const char *)sqlite3_value_text(apVal[0]);

    if( zQuery==0 && sqlite3_value_type(apVal[0])!=SQLITE_NULL ){
      return SQLITE_NOMEM;
    }



    rc = sqlite3Fts3ExprParse(p->pTokenizer, p->azColumn, p->nColumn, 
        iCol, zQuery, -1, &pCsr->pExpr
    );
    if( rc!=SQLITE_OK ) return rc;

    rc = evalFts3Expr(p, pCsr->pExpr, &pCsr->aDoclist, &pCsr->nDoclist);
................................................................................
  int rc;                         /* Return Code */
  Fts3Cursor *pCsr = (Fts3Cursor *) pCursor;
  Fts3Table *p = (Fts3Table *)pCursor->pVtab;

  /* The column value supplied by SQLite must be in range. */
  assert( iCol>=0 && iCol<=p->nColumn+1 );



  if( iCol==p->nColumn+1 ){
    /* This call is a request for the "docid" column. Since "docid" is an 
    ** alias for "rowid", use the xRowid() method to obtain the value.
    */
    sqlite3_int64 iRowid;
    rc = fts3RowidMethod(pCursor, &iRowid);
    sqlite3_result_int64(pContext, iRowid);
  }else if( iCol==p->nColumn ){
    /* The extra column whose name is the same as the table.
    ** Return a blob which is a pointer to the cursor.
    */
    sqlite3_result_blob(pContext, &pCsr, sizeof(pCsr), SQLITE_TRANSIENT);
    rc = SQLITE_OK;
  }else{
    rc = fts3CursorSeek(pCsr);
    if( rc==SQLITE_OK ){
      sqlite3_result_value(pContext, sqlite3_column_value(pCsr->pStmt, iCol+1));
    }
  }
  return rc;
}

/* 

Changes to ext/fts3/fts3Int.h.

221
222
223
224
225
226
227

228
229
230
231
232
233
234
/* fts3_write.c */
int sqlite3Fts3UpdateMethod(sqlite3_vtab*,int,sqlite3_value**,sqlite3_int64*);
int sqlite3Fts3PendingTermsFlush(Fts3Table *);
void sqlite3Fts3PendingTermsClear(Fts3Table *);
int sqlite3Fts3Optimize(Fts3Table *);
int sqlite3Fts3SegReaderNew(Fts3Table *,int, sqlite3_int64,
  sqlite3_int64, sqlite3_int64, const char *, int, Fts3SegReader**);

void sqlite3Fts3SegReaderFree(Fts3Table *, Fts3SegReader *);
int sqlite3Fts3SegReaderIterate(
  Fts3Table *, Fts3SegReader **, int, Fts3SegFilter *,
  int (*)(Fts3Table *, void *, char *, int, char *, int),  void *
);
int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char const**, int*);
int sqlite3Fts3AllSegdirs(Fts3Table*, sqlite3_stmt **);







>







221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
/* fts3_write.c */
int sqlite3Fts3UpdateMethod(sqlite3_vtab*,int,sqlite3_value**,sqlite3_int64*);
int sqlite3Fts3PendingTermsFlush(Fts3Table *);
void sqlite3Fts3PendingTermsClear(Fts3Table *);
int sqlite3Fts3Optimize(Fts3Table *);
int sqlite3Fts3SegReaderNew(Fts3Table *,int, sqlite3_int64,
  sqlite3_int64, sqlite3_int64, const char *, int, Fts3SegReader**);
int sqlite3Fts3SegReaderPending(Fts3Table*,const char*,int,int,Fts3SegReader**);
void sqlite3Fts3SegReaderFree(Fts3Table *, Fts3SegReader *);
int sqlite3Fts3SegReaderIterate(
  Fts3Table *, Fts3SegReader **, int, Fts3SegFilter *,
  int (*)(Fts3Table *, void *, char *, int, char *, int),  void *
);
int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char const**, int*);
int sqlite3Fts3AllSegdirs(Fts3Table*, sqlite3_stmt **);

Changes to ext/fts3/fts3_hash.c.

275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296











297
298
299
300
301
302
303
304
  if( pH->count<=0 ){
    assert( pH->first==0 );
    assert( pH->count==0 );
    fts3HashClear(pH);
  }
}

/* Attempt to locate an element of the hash table pH with a key
** that matches pKey,nKey.  Return the data for this element if it is
** found, or NULL if there is no match.
*/
void *sqlite3Fts3HashFind(const Fts3Hash *pH, const void *pKey, int nKey){
  int h;                 /* A hash on key */
  Fts3HashElem *elem;    /* The element that matches key */
  int (*xHash)(const void*,int);  /* The hash function */

  if( pH==0 || pH->ht==0 ) return 0;
  xHash = ftsHashFunction(pH->keyClass);
  assert( xHash!=0 );
  h = (*xHash)(pKey,nKey);
  assert( (pH->htsize & (pH->htsize-1))==0 );
  elem = fts3FindElementByHash(pH,pKey,nKey, h & (pH->htsize-1));











  return elem ? elem->data : 0;
}

/* Insert an element into the hash table pH.  The key is pKey,nKey
** and the data is "data".
**
** If no element exists with a matching key, then a new
** element is created.  A copy of the key is made if the copyKey







|
|
|
|
|
|
<







|
>
>
>
>
>
>
>
>
>
>
>
|







275
276
277
278
279
280
281
282
283
284
285
286
287

288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
  if( pH->count<=0 ){
    assert( pH->first==0 );
    assert( pH->count==0 );
    fts3HashClear(pH);
  }
}

Fts3HashElem *sqlite3Fts3HashFindElem(
  const Fts3Hash *pH, 
  const void *pKey, 
  int nKey
){
  int h;                          /* A hash on key */

  int (*xHash)(const void*,int);  /* The hash function */

  if( pH==0 || pH->ht==0 ) return 0;
  xHash = ftsHashFunction(pH->keyClass);
  assert( xHash!=0 );
  h = (*xHash)(pKey,nKey);
  assert( (pH->htsize & (pH->htsize-1))==0 );
  return fts3FindElementByHash(pH,pKey,nKey, h & (pH->htsize-1));
}

/* 
** Attempt to locate an element of the hash table pH with a key
** that matches pKey,nKey.  Return the data for this element if it is
** found, or NULL if there is no match.
*/
void *sqlite3Fts3HashFind(const Fts3Hash *pH, const void *pKey, int nKey){
  Fts3HashElem *pElem;            /* The element that matches key (if any) */

  pElem = sqlite3Fts3HashFindElem(pH, pKey, nKey);
  return pElem ? pElem->data : 0;
}

/* Insert an element into the hash table pH.  The key is pKey,nKey
** and the data is "data".
**
** If no element exists with a matching key, then a new
** element is created.  A copy of the key is made if the copyKey

Changes to ext/fts3/fts3_hash.h.

71
72
73
74
75
76
77

78
79
80
81
82
83
84
85

86
87
88
89
90
91
92
/*
** Access routines.  To delete, insert a NULL pointer.
*/
void sqlite3Fts3HashInit(Fts3Hash *pNew, char keyClass, char copyKey);
void *sqlite3Fts3HashInsert(Fts3Hash*, const void *pKey, int nKey, void *pData);
void *sqlite3Fts3HashFind(const Fts3Hash*, const void *pKey, int nKey);
void sqlite3Fts3HashClear(Fts3Hash*);


/*
** Shorthand for the functions above
*/
#define fts3HashInit   sqlite3Fts3HashInit
#define fts3HashInsert sqlite3Fts3HashInsert
#define fts3HashFind   sqlite3Fts3HashFind
#define fts3HashClear  sqlite3Fts3HashClear


/*
** Macros for looping over all elements of a hash table.  The idiom is
** like this:
**
**   Fts3Hash h;
**   Fts3HashElem *p;







>




|
|
|
|
>







71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
/*
** Access routines.  To delete, insert a NULL pointer.
*/
void sqlite3Fts3HashInit(Fts3Hash *pNew, char keyClass, char copyKey);
void *sqlite3Fts3HashInsert(Fts3Hash*, const void *pKey, int nKey, void *pData);
void *sqlite3Fts3HashFind(const Fts3Hash*, const void *pKey, int nKey);
void sqlite3Fts3HashClear(Fts3Hash*);
Fts3HashElem *sqlite3Fts3HashFindElem(const Fts3Hash *, const void *, int);

/*
** Shorthand for the functions above
*/
#define fts3HashInit     sqlite3Fts3HashInit
#define fts3HashInsert   sqlite3Fts3HashInsert
#define fts3HashFind     sqlite3Fts3HashFind
#define fts3HashClear    sqlite3Fts3HashClear
#define fts3HashFindElem sqlite3Fts3HashFindElem

/*
** Macros for looping over all elements of a hash table.  The idiom is
** like this:
**
**   Fts3Hash h;
**   Fts3HashElem *p;

Changes to ext/fts3/fts3_write.c.

26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
..
48
49
50
51
52
53
54






55
56
57
58
59
60
61
62
63

64
65
66
67
68
69
70
..
72
73
74
75
76
77
78


79
80
81
82
83
84
85
...
724
725
726
727
728
729
730














731
732
733
734
735
736
737
...
833
834
835
836
837
838
839

840

841
842
843
844
845
846
847
...
930
931
932
933
934
935
936



























































































937
938
939
940
941
942
943
...
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
....
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
....
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
....
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
....
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080

typedef struct PendingList PendingList;
typedef struct SegmentNode SegmentNode;
typedef struct SegmentWriter SegmentWriter;

/*
** Data structure used while accumulating terms in the pending-terms hash
** table. The hash table entry maps from term (a string) to a malloced
** instance of this structure.
*/
struct PendingList {
  int nData;
  char *aData;
  int nSpace;
  sqlite3_int64 iLastDocid;
................................................................................
** this structure are only manipulated by code in this file, opaque handles
** of type Fts3SegReader* are also used by code in fts3.c to iterate through
** terms when querying the full-text index. See functions:
**
**   sqlite3Fts3SegReaderNew()
**   sqlite3Fts3SegReaderFree()
**   sqlite3Fts3SegReaderIterate()






*/
struct Fts3SegReader {
  int iIdx;                       /* Index within level */
  sqlite3_int64 iStartBlock;
  sqlite3_int64 iEndBlock;
  sqlite3_stmt *pStmt;            /* SQL Statement to access leaf nodes */
  char *aNode;                    /* Pointer to node data (or NULL) */
  int nNode;                      /* Size of buffer at aNode (or 0) */
  int nTermAlloc;                 /* Allocated size of zTerm buffer */


  /* Variables set by fts3SegReaderNext(). These may be read directly
  ** by the caller. They are valid from the time SegmentReaderNew() returns
  ** until SegmentReaderNext() returns something other than SQLITE_OK
  ** (i.e. SQLITE_DONE).
  */
  int nTerm;                      /* Number of bytes in current term */
................................................................................
  char *aDoclist;                 /* Pointer to doclist of current entry */
  int nDoclist;                   /* Size of doclist in current entry */

  /* The following variables are used to iterate through the current doclist */
  char *pOffsetList;
  sqlite3_int64 iDocid;
};



/*
** An instance of this structure is used to create a segment b-tree in the
** database. The internal details of this type are only accessed by the
** following functions:
**
**   fts3SegWriterAdd()
................................................................................
    pNext = pReader->aNode;
  }else{
    pNext = &pReader->aDoclist[pReader->nDoclist];
  }

  if( !pNext || pNext>=&pReader->aNode[pReader->nNode] ){
    int rc;














    if( !pReader->pStmt ){
      pReader->aNode = 0;
      return SQLITE_OK;
    }
    rc = sqlite3_step(pReader->pStmt);
    if( rc!=SQLITE_ROW ){
      pReader->aNode = 0;
................................................................................
      /* Move the leaf-range SELECT statement to the aLeavesStmt[] array,
      ** so that it can be reused when required by another query.
      */
      assert( p->nLeavesStmt<p->nLeavesTotal );
      sqlite3_reset(pReader->pStmt);
      p->aLeavesStmt[p->nLeavesStmt++] = pReader->pStmt;
    }

    sqlite3_free(pReader->zTerm);

    sqlite3_free(pReader);
  }
}

/*
** Allocate a new SegReader object.
*/
................................................................................
    *ppReader = pReader;
  }else{
    sqlite3Fts3SegReaderFree(p, pReader);
  }
  return rc;
}





























































































/*
** The second argument to this function is expected to be a statement of
** the form:
**
**   SELECT 
**     idx,                  -- col 0
................................................................................

/*
** Compare the entries pointed to by two Fts3SegReader structures. 
** Comparison is as follows:
**
**   1) EOF is greater than not EOF.
**
**   2) The current terms (if any) are compared with memcmp(). If one
**      term is a prefix of another, the longer term is considered the
**      larger.
**
**   3) By segment age. An older segment is considered larger.
*/
static int fts3SegReaderCmp(Fts3SegReader *pLhs, Fts3SegReader *pRhs){
  int rc;
................................................................................
  if( pFilter->zTerm ){
    int nTerm = pFilter->nTerm;
    const char *zTerm = pFilter->zTerm;
    for(i=0; i<nSegment; i++){
      Fts3SegReader *pSeg = apSegment[i];
      while( fts3SegReaderTermCmp(pSeg, zTerm, nTerm)<0 ){
        rc = fts3SegReaderNext(pSeg);
        if( rc!=SQLITE_OK ) goto finished;
      }
    }
  }

  fts3SegReaderSort(apSegment, nSegment, nSegment, fts3SegReaderCmp);
  while( apSegment[0]->aNode ){
    int nTerm = apSegment[0]->nTerm;
    char *zTerm = apSegment[0]->zTerm;
................................................................................
        int j;                    /* Number of segments that share a docid */
        char *pList;
        int nList;
        int nByte;
        sqlite3_int64 iDocid = apSegment[0]->iDocid;
        fts3SegReaderNextDocid(apSegment[0], &pList, &nList);
        j = 1;
        while( j<nMerge 
            && apSegment[j]->pOffsetList 
            && apSegment[j]->iDocid==iDocid 
        ){
          fts3SegReaderNextDocid(apSegment[j], 0, 0);
          j++;
        }

        if( isColFilter ){
          fts3ColumnFilter(pFilter->iCol, &pList, &nList);
................................................................................
    }
    sqlite3_free(apSegment);
  }
  sqlite3_reset(pStmt);
  return rc;
}

/*
** This is a comparison function used as a qsort() callback when sorting
** an array of pending terms by term. This occurs as part of flushing
** the contents of the pending-terms hash table to the database.
*/
static int qsortCompare(const void *lhs, const void *rhs){
  char *z1 = fts3HashKey(*(Fts3HashElem **)lhs);
  char *z2 = fts3HashKey(*(Fts3HashElem **)rhs);
  int n1 = fts3HashKeysize(*(Fts3HashElem **)lhs);
  int n2 = fts3HashKeysize(*(Fts3HashElem **)rhs);

  int n = (n1<n2 ? n1 : n2);
  int c = memcmp(z1, z2, n);
  if( c==0 ){
    c = n1 - n2;
  }
  return c;
}


/* 
** Flush the contents of pendingTerms to a level 0 segment.
*/
int sqlite3Fts3PendingTermsFlush(Fts3Table *p){
  Fts3HashElem *pElem;
  int idx, rc, i;
................................................................................
  assert( i==nElem );

  /* TODO(shess) Should we allow user-defined collation sequences,
  ** here?  I think we only need that once we support prefix searches.
  ** Also, should we be using qsort()?
  */
  if( nElem>1 ){
    qsort(apElem, nElem, sizeof(Fts3HashElem *), qsortCompare);
  }


  /* Write the segment tree into the database. */
  for(i=0; rc==SQLITE_OK && i<nElem; i++){
    const char *z = fts3HashKey(apElem[i]);
    int n = fts3HashKeysize(apElem[i]);







|







 







>
>
>
>
>
>


|






>







 







>
>







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







>
|
>







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







|







 







|
<







 







|
|
|







 







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







 







|







26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
..
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
..
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
...
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
...
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
...
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
....
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
....
1902
1903
1904
1905
1906
1907
1908
1909

1910
1911
1912
1913
1914
1915
1916
....
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
....
2120
2121
2122
2123
2124
2125
2126



















2127
2128
2129
2130
2131
2132
2133
....
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176

typedef struct PendingList PendingList;
typedef struct SegmentNode SegmentNode;
typedef struct SegmentWriter SegmentWriter;

/*
** Data structure used while accumulating terms in the pending-terms hash
** table. The hash table entry maps from term (a string) to a malloc'd
** instance of this structure.
*/
struct PendingList {
  int nData;
  char *aData;
  int nSpace;
  sqlite3_int64 iLastDocid;
................................................................................
** this structure are only manipulated by code in this file, opaque handles
** of type Fts3SegReader* are also used by code in fts3.c to iterate through
** terms when querying the full-text index. See functions:
**
**   sqlite3Fts3SegReaderNew()
**   sqlite3Fts3SegReaderFree()
**   sqlite3Fts3SegReaderIterate()
**
** Methods used to manipulate Fts3SegReader structures:
**
**   fts3SegReaderNext()
**   fts3SegReaderFirstDocid()
**   fts3SegReaderNextDocid()
*/
struct Fts3SegReader {
  int iIdx;                       /* Index within level, or 0x7FFFFFFF for PT */
  sqlite3_int64 iStartBlock;
  sqlite3_int64 iEndBlock;
  sqlite3_stmt *pStmt;            /* SQL Statement to access leaf nodes */
  char *aNode;                    /* Pointer to node data (or NULL) */
  int nNode;                      /* Size of buffer at aNode (or 0) */
  int nTermAlloc;                 /* Allocated size of zTerm buffer */
  Fts3HashElem **ppNextElem;

  /* Variables set by fts3SegReaderNext(). These may be read directly
  ** by the caller. They are valid from the time SegmentReaderNew() returns
  ** until SegmentReaderNext() returns something other than SQLITE_OK
  ** (i.e. SQLITE_DONE).
  */
  int nTerm;                      /* Number of bytes in current term */
................................................................................
  char *aDoclist;                 /* Pointer to doclist of current entry */
  int nDoclist;                   /* Size of doclist in current entry */

  /* The following variables are used to iterate through the current doclist */
  char *pOffsetList;
  sqlite3_int64 iDocid;
};

#define fts3SegReaderIsPending(p) ((p)->ppNextElem!=0)

/*
** An instance of this structure is used to create a segment b-tree in the
** database. The internal details of this type are only accessed by the
** following functions:
**
**   fts3SegWriterAdd()
................................................................................
    pNext = pReader->aNode;
  }else{
    pNext = &pReader->aDoclist[pReader->nDoclist];
  }

  if( !pNext || pNext>=&pReader->aNode[pReader->nNode] ){
    int rc;
    if( fts3SegReaderIsPending(pReader) ){
      Fts3HashElem *pElem = *(pReader->ppNextElem);
      if( pElem==0 ){
        pReader->aNode = 0;
      }else{
        PendingList *pList = (PendingList *)fts3HashData(pElem);
        pReader->zTerm = (char *)fts3HashKey(pElem);
        pReader->nTerm = fts3HashKeysize(pElem);
        pReader->nNode = pReader->nDoclist = pList->nData;
        pReader->aNode = pReader->aDoclist = pList->aData;
        pReader->ppNextElem++;
      }
      return SQLITE_OK;
    }
    if( !pReader->pStmt ){
      pReader->aNode = 0;
      return SQLITE_OK;
    }
    rc = sqlite3_step(pReader->pStmt);
    if( rc!=SQLITE_ROW ){
      pReader->aNode = 0;
................................................................................
      /* Move the leaf-range SELECT statement to the aLeavesStmt[] array,
      ** so that it can be reused when required by another query.
      */
      assert( p->nLeavesStmt<p->nLeavesTotal );
      sqlite3_reset(pReader->pStmt);
      p->aLeavesStmt[p->nLeavesStmt++] = pReader->pStmt;
    }
    if( !fts3SegReaderIsPending(pReader) ){
      sqlite3_free(pReader->zTerm);
    }
    sqlite3_free(pReader);
  }
}

/*
** Allocate a new SegReader object.
*/
................................................................................
    *ppReader = pReader;
  }else{
    sqlite3Fts3SegReaderFree(p, pReader);
  }
  return rc;
}

/*
** This is a comparison function used as a qsort() callback when sorting
** an array of pending terms by term. This occurs as part of flushing
** the contents of the pending-terms hash table to the database.
*/
static int fts3CompareElemByTerm(const void *lhs, const void *rhs){
  char *z1 = fts3HashKey(*(Fts3HashElem **)lhs);
  char *z2 = fts3HashKey(*(Fts3HashElem **)rhs);
  int n1 = fts3HashKeysize(*(Fts3HashElem **)lhs);
  int n2 = fts3HashKeysize(*(Fts3HashElem **)rhs);

  int n = (n1<n2 ? n1 : n2);
  int c = memcmp(z1, z2, n);
  if( c==0 ){
    c = n1 - n2;
  }
  return c;
}

/*
** This function is used to allocate an Fts3SegReader that iterates through
** a subset of the terms stored in the Fts3Table.pendingTerms array.
*/
int sqlite3Fts3SegReaderPending(
  Fts3Table *p,                   /* Virtual table handle */
  const char *zTerm,              /* Term to search for */
  int nTerm,                      /* Size of buffer zTerm */
  int isPrefix,                   /* True for a term-prefix query */
  Fts3SegReader **ppReader        /* OUT: SegReader for pending-terms */
){
  Fts3SegReader *pReader = 0;     /* Fts3SegReader object to return */
  Fts3HashElem **aElem = 0;       /* Array of term hash entries to scan */
  int nElem = 0;                  /* Size of array at aElem */
  int rc = SQLITE_OK;             /* Return Code */

  if( isPrefix ){
    Fts3HashElem *pE = 0;         /* Iterator variable */

    for(pE=fts3HashFirst(&p->pendingTerms); pE; pE=fts3HashNext(pE)){
      char *zKey = (char *)fts3HashKey(pE);
      int nKey = fts3HashKeysize(pE);
      if( nKey>=nTerm && 0==memcmp(zKey, zTerm, nTerm) ){
        int nByte = (1+nElem * sizeof(Fts3HashElem *));
        Fts3HashElem **aElem2 = (Fts3HashElem **)sqlite3_realloc(aElem, nByte);
        if( !aElem2 ){
          rc = SQLITE_NOMEM;
          nElem = 0;
          break;
        }
        aElem = aElem2;
        aElem[nElem++] = pE;
      }
    }

    /* If more than one term matches the prefix, sort the Fts3HashElem
    ** objects in term order using qsort(). This uses the same comparison
    ** callback as is used when flushing terms to disk.
    */
    if( nElem>1 ){
      qsort(aElem, nElem, sizeof(Fts3HashElem *), fts3CompareElemByTerm);
    }

  }else{
    Fts3HashElem *pE = fts3HashFindElem(&p->pendingTerms, zTerm, nTerm);
    if( pE ){
      aElem = &pE;
      nElem = 1;
    }
  }

  if( nElem>0 ){
    int nByte = sizeof(Fts3SegReader) + (nElem+1)*sizeof(Fts3HashElem *);
    pReader = (Fts3SegReader *)sqlite3_malloc(nByte);
    if( !pReader ){
      rc = SQLITE_NOMEM;
    }else{
      memset(pReader, 0, nByte);
      pReader->iIdx = 0x7FFFFFFF;
      pReader->ppNextElem = (Fts3HashElem **)&pReader[1];
      memcpy(pReader->ppNextElem, aElem, nElem*sizeof(Fts3HashElem *));
      fts3SegReaderNext(pReader);
    }
  }

  if( isPrefix ){
    sqlite3_free(aElem);
  }
  *ppReader = pReader;
  return rc;
}


/*
** The second argument to this function is expected to be a statement of
** the form:
**
**   SELECT 
**     idx,                  -- col 0
................................................................................

/*
** Compare the entries pointed to by two Fts3SegReader structures. 
** Comparison is as follows:
**
**   1) EOF is greater than not EOF.
**
**   2) The current terms (if any) are compared using memcmp(). If one
**      term is a prefix of another, the longer term is considered the
**      larger.
**
**   3) By segment age. An older segment is considered larger.
*/
static int fts3SegReaderCmp(Fts3SegReader *pLhs, Fts3SegReader *pRhs){
  int rc;
................................................................................
  if( pFilter->zTerm ){
    int nTerm = pFilter->nTerm;
    const char *zTerm = pFilter->zTerm;
    for(i=0; i<nSegment; i++){
      Fts3SegReader *pSeg = apSegment[i];
      while( fts3SegReaderTermCmp(pSeg, zTerm, nTerm)<0 ){
        rc = fts3SegReaderNext(pSeg);
        if( rc!=SQLITE_OK ) goto finished; }

    }
  }

  fts3SegReaderSort(apSegment, nSegment, nSegment, fts3SegReaderCmp);
  while( apSegment[0]->aNode ){
    int nTerm = apSegment[0]->nTerm;
    char *zTerm = apSegment[0]->zTerm;
................................................................................
        int j;                    /* Number of segments that share a docid */
        char *pList;
        int nList;
        int nByte;
        sqlite3_int64 iDocid = apSegment[0]->iDocid;
        fts3SegReaderNextDocid(apSegment[0], &pList, &nList);
        j = 1;
        while( j<nMerge
            && apSegment[j]->pOffsetList
            && apSegment[j]->iDocid==iDocid
        ){
          fts3SegReaderNextDocid(apSegment[j], 0, 0);
          j++;
        }

        if( isColFilter ){
          fts3ColumnFilter(pFilter->iCol, &pList, &nList);
................................................................................
    }
    sqlite3_free(apSegment);
  }
  sqlite3_reset(pStmt);
  return rc;
}





















/* 
** Flush the contents of pendingTerms to a level 0 segment.
*/
int sqlite3Fts3PendingTermsFlush(Fts3Table *p){
  Fts3HashElem *pElem;
  int idx, rc, i;
................................................................................
  assert( i==nElem );

  /* TODO(shess) Should we allow user-defined collation sequences,
  ** here?  I think we only need that once we support prefix searches.
  ** Also, should we be using qsort()?
  */
  if( nElem>1 ){
    qsort(apElem, nElem, sizeof(Fts3HashElem *), fts3CompareElemByTerm);
  }


  /* Write the segment tree into the database. */
  for(i=0; rc==SQLITE_OK && i<nElem; i++){
    const char *z = fts3HashKey(apElem[i]);
    int n = fts3HashKeysize(apElem[i]);

Changes to test/fts3_common.tcl.

321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339


340
341
342
343
344
345
346
347
  if {![info exists ::DO_MALLOC_TEST]} { set ::DO_MALLOC_TEST 1 }

  if {$::DO_MALLOC_TEST} {
    set answers [list {1 {out of memory}} $catchres]
    set modes [list 100000 transient 1 persistent]
  } else {
    set answers [list $catchres]
    set modes [list 0 nofail]
  }
  set str [join $answers " OR "]

  foreach {nRepeat zName} $modes {
    for {set iFail 1} 1 {incr iFail} {
      if {$::DO_MALLOC_TEST} {sqlite3_memdebug_fail $iFail -repeat $nRepeat}

      set res [uplevel [list catchsql $sql]]
      if {[lsearch -exact $answers $res]>=0} {
        set res $str
      }


      do_test $name.$zName.$iFail [list set {} $res] $str
      set nFail [sqlite3_memdebug_fail -1 -benigncnt nBenign]
      if {$nFail==0} break
    }
  }
}









|




|






>
>
|







321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
  if {![info exists ::DO_MALLOC_TEST]} { set ::DO_MALLOC_TEST 1 }

  if {$::DO_MALLOC_TEST} {
    set answers [list {1 {out of memory}} $catchres]
    set modes [list 100000 transient 1 persistent]
  } else {
    set answers [list $catchres]
    set modes [list 0 ""]
  }
  set str [join $answers " OR "]

  foreach {nRepeat zName} $modes {
    for {set iFail 48} 1 {incr iFail} {
      if {$::DO_MALLOC_TEST} {sqlite3_memdebug_fail $iFail -repeat $nRepeat}

      set res [uplevel [list catchsql $sql]]
      if {[lsearch -exact $answers $res]>=0} {
        set res $str
      }
      set testname "$name.$zName.$iFail"
      if {$zName == ""} { set testname $name }
      do_test $testname [list set {} $res] $str
      set nFail [sqlite3_memdebug_fail -1 -benigncnt nBenign]
      if {$nFail==0} break
    }
  }
}


Changes to test/fts3rnd.test.

15
16
17
18
19
20
21



22
23
24
25
26
27
28
...
145
146
147
148
149
150
151

152
153
154
155
156
157
158
...
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
...
289
290
291
292
293
294
295


296
297
298
299
# If this build does not include FTS3, skip the tests in this file.
#
ifcapable !fts3 { finish_test ; return }
source $testdir/fts3_common.tcl

set nVocab 100
set lVocab [list]




# Generate a vocabulary of nVocab words. Each word is 3 characters long.
#
set lChar {a b c d e f g h i j k l m n o p q r s t u v w x y z}
for {set i 0} {$i < $nVocab} {incr i} {
  set    word [lindex $lChar [expr int(rand()*26)]]
  append word [lindex $lChar [expr int(rand()*26)]]
................................................................................
  db transaction {
    catchsql { DROP TABLE t1 }
    execsql "CREATE VIRTUAL TABLE t1 USING fts3(a, b, c, test:$nodesize)"
    for {set i 0} {$i < 100} {incr i} { insert_row $i }
  }
  
  for {set iTest 0} {$iTest <= 100} {incr iTest} {


    set DO_MALLOC_TEST 0
    set nRep 10
    if {$iTest==100 && $nodesize==50} { 
      set DO_MALLOC_TEST 1 
      set nRep 2
    }
................................................................................
    while {$iDelete == $iUpdate} {
      set iDelete [lindex $rows [expr {int(rand()*$nRow)}]]
    }
    set iInsert $iUpdate
    while {[info exists ::t1($iInsert)]} {
      set iInsert [expr {int(rand()*1000000)}]
    }
    db transaction {
      insert_row $iInsert
      update_row $iUpdate
      delete_row $iDelete
    }

    # Pick 10 terms from the vocabulary. Check that the results of querying
    # the database for the set of documents containing each of these terms
    # is the same as the result obtained by scanning the contents of the Tcl 
    # array for each term.
    #
    for {set i 0} {$i < 10} {incr i} {
................................................................................
          SELECT docid FROM t1 WHERE t1 MATCH $match
        } [$proc                                  \
            [simple_near [list $term1 $term2] 10] \
            [simple_near [list $term3 $term4] 10]
          ]
      }
    }


  }
}

finish_test







>
>
>







 







>







 







|



|







 







>
>




15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
...
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
...
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
...
293
294
295
296
297
298
299
300
301
302
303
304
305
# If this build does not include FTS3, skip the tests in this file.
#
ifcapable !fts3 { finish_test ; return }
source $testdir/fts3_common.tcl

set nVocab 100
set lVocab [list]

expr srand(0)


# Generate a vocabulary of nVocab words. Each word is 3 characters long.
#
set lChar {a b c d e f g h i j k l m n o p q r s t u v w x y z}
for {set i 0} {$i < $nVocab} {incr i} {
  set    word [lindex $lChar [expr int(rand()*26)]]
  append word [lindex $lChar [expr int(rand()*26)]]
................................................................................
  db transaction {
    catchsql { DROP TABLE t1 }
    execsql "CREATE VIRTUAL TABLE t1 USING fts3(a, b, c, test:$nodesize)"
    for {set i 0} {$i < 100} {incr i} { insert_row $i }
  }
  
  for {set iTest 0} {$iTest <= 100} {incr iTest} {
    catchsql COMMIT

    set DO_MALLOC_TEST 0
    set nRep 10
    if {$iTest==100 && $nodesize==50} { 
      set DO_MALLOC_TEST 1 
      set nRep 2
    }
................................................................................
    while {$iDelete == $iUpdate} {
      set iDelete [lindex $rows [expr {int(rand()*$nRow)}]]
    }
    set iInsert $iUpdate
    while {[info exists ::t1($iInsert)]} {
      set iInsert [expr {int(rand()*1000000)}]
    }
    execsql BEGIN
      insert_row $iInsert
      update_row $iUpdate
      delete_row $iDelete
    if {0==($iTest%2)} { execsql COMMIT }

    # Pick 10 terms from the vocabulary. Check that the results of querying
    # the database for the set of documents containing each of these terms
    # is the same as the result obtained by scanning the contents of the Tcl 
    # array for each term.
    #
    for {set i 0} {$i < 10} {incr i} {
................................................................................
          SELECT docid FROM t1 WHERE t1 MATCH $match
        } [$proc                                  \
            [simple_near [list $term1 $term2] 10] \
            [simple_near [list $term3 $term4] 10]
          ]
      }
    }

    catchsql COMMIT
  }
}

finish_test