/ Check-in [cae844a0]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Propagate prefix flag through implementation of doclist query code. Also implement correct prefix-handling for traversal of interior nodes of segment tree. A given prefix can span multiple children of an interior node, and from there the branches need to be followed in parallel. (CVS 3889)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:cae844a01a1d87ffb00bba8b4e7b62a92e633aa9
User & Date: shess 2007-04-30 22:09:36
Context
2007-05-01
16:59
The pager takes the sector size to be the larger of the sector size reported by sqlite3OsSectorSize() and the page size. (CVS 3890) check-in: e5e6af55 user: drh tags: trunk
2007-04-30
22:09
Propagate prefix flag through implementation of doclist query code. Also implement correct prefix-handling for traversal of interior nodes of segment tree. A given prefix can span multiple children of an interior node, and from there the branches need to be followed in parallel. (CVS 3889) check-in: cae844a0 user: shess tags: trunk
21:39
Fix a potential segfault following a malloc() failure during a call to sqlite3_prepare() where the nBytes parameter is positive but less than the length of the input SQL string. (CVS 3888) check-in: 27bf3fc3 user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts2/fts2.c.

1516
1517
1518
1519
1520
1521
1522

1523
1524
1525
1526
1527
1528
1529
....
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
....
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
....
3293
3294
3295
3296
3297
3298
3299

3300
3301
3302
3303
3304
3305
3306
....
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200

4201
4202
4203
4204
4205
4206
4207
....
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155


5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172

5173
5174
5175
5176
5177
5178
5179

5180
5181
5182
5183
5184
5185










5186




5187
5188
5189
5190
5191
5192


5193
5194

5195
5196
5197
5198
5199
5200

5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
....
5225
5226
5227
5228
5229
5230
5231
5232

5233
5234

5235
5236
5237
5238
5239
5240
5241
5242
5243

5244
5245



5246
5247






5248









5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263

5264
5265
5266
5267
5268
5269
5270
5271
5272
5273

5274
5275
5276
5277
5278
5279
5280
....
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
....
5313
5314
5315
5316
5317
5318
5319
5320

5321
5322
5323
5324
5325
5326
5327
*/
typedef struct QueryTerm {
  short int nPhrase; /* How many following terms are part of the same phrase */
  short int iPhrase; /* This is the i-th term of a phrase. */
  short int iColumn; /* Column of the index that must match this term */
  signed char isOr;  /* this term is preceded by "OR" */
  signed char isNot; /* this term is preceded by "-" */

  char *pTerm;       /* text of the term.  '\000' terminated.  malloced */
  int nTerm;         /* Number of bytes in pTerm[] */
} QueryTerm;


/* A query string is parsed into a Query structure.
 *
................................................................................


/* TODO(shess) If we pushed LeafReader to the top of the file, or to
** another file, term_select() could be pushed above
** docListOfTerm().
*/
static int termSelect(fulltext_vtab *v, int iColumn,
                      const char *pTerm, int nTerm,
                      DocListType iType, DataBuffer *out);

/* Return a DocList corresponding to the query term *pTerm.  If *pTerm
** is the first term of a phrase query, go ahead and evaluate the phrase
** query and return the doclist for the entire phrase query.
**
** The resulting DL_DOCIDS doclist is stored in pResult, which is
................................................................................
  /* No phrase search if no position info. */
  assert( pQTerm->nPhrase==0 || DL_DEFAULT!=DL_DOCIDS );

  /* This code should never be called with buffered updates. */
  assert( v->nPendingData<0 );

  dataBufferInit(&left, 0);
  rc = termSelect(v, iColumn, pQTerm->pTerm, pQTerm->nTerm,
                  0<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS, &left);
  if( rc ) return rc;
  for(i=1; i<=pQTerm->nPhrase && left.nData>0; i++){
    dataBufferInit(&right, 0);
    rc = termSelect(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm,
                    DL_POSITIONS, &right);
    if( rc ){
      dataBufferDestroy(&left);
      return rc;
    }
    dataBufferInit(&new, 0);
    docListPhraseMerge(left.pData, left.nData, right.pData, right.nData,
                       i<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS, &new);
................................................................................
  t = &q->pTerms[q->nTerms - 1];
  CLEAR(t);
  t->pTerm = malloc(nTerm+1);
  memcpy(t->pTerm, pTerm, nTerm);
  t->pTerm[nTerm] = 0;
  t->nTerm = nTerm;
  t->isOr = q->nextIsOr;

  q->nextIsOr = 0;
  t->iColumn = q->nextColumn;
  q->nextColumn = q->dfltColumn;
}

/*
** Check to see if the string zToken[0...nToken-1] matches any
................................................................................
    pReader->pData += n+nSuffix;
    pReader->nData -= n+nSuffix;
  }
  pReader->iBlockid++;
}

/* Compare the current term to pTerm[nTerm], returning strcmp-style
** results.
*/
static int interiorReaderTermCmp(InteriorReader *pReader,
                                 const char *pTerm, int nTerm){
  const char *pReaderTerm = interiorReaderTerm(pReader);
  int nReaderTerm = interiorReaderTermBytes(pReader);
  int c, n = nReaderTerm<nTerm ? nReaderTerm : nTerm;

  if( n==0 ){
    if( nReaderTerm>0 ) return -1;
    if( nTerm>0 ) return 1;
    return 0;
  }

  c = memcmp(pReaderTerm, pTerm, n);
  if( c!=0 ) return c;

  return nReaderTerm - nTerm;
}

/****************************************************************/
/* LeafWriter is used to collect terms and associated doclist data
** into leaf blocks in %_segments (see top of file for format info).
** Expected usage is:
................................................................................

/* Call loadSegmentLeavesInt() with the leaf nodes from iStartLeaf to
** iEndLeaf (inclusive) as input, and merge the resulting doclist into
** out.
*/
static int loadSegmentLeaves(fulltext_vtab *v,
                             sqlite_int64 iStartLeaf, sqlite_int64 iEndLeaf,
                             const char *pTerm, int nTerm,
                             DataBuffer *out){
  LeavesReader reader;


  int rc = leavesReaderInit(v, 0, iStartLeaf, iEndLeaf, NULL, 0, &reader);
  if( rc!=SQLITE_OK ) return rc;

  rc = loadSegmentLeavesInt(v, &reader, pTerm, nTerm, out);
  leavesReaderReset(&reader);
  leavesReaderDestroy(&reader);
  return rc;
}

/* Taking pData/nData as an interior node, find the child node which
** could include pTerm/nTerm.  Note that the interior node terms
** logically come between the blocks, so there is one more blockid
** than there are terms (that block contains terms >= the last
** interior-node term).
*/
static void getChildContaining(const char *pData, int nData,
                               const char *pTerm, int nTerm,

                               sqlite_int64 *piBlockid){
  InteriorReader reader;

  assert( nData>1 );
  assert( *pData!='\0' );
  interiorReaderInit(pData, nData, &reader);


  while( !interiorReaderAtEnd(&reader) ){
    if( interiorReaderTermCmp(&reader, pTerm, nTerm)>0 ) break;
    interiorReaderStep(&reader);
  }
  *piBlockid = interiorReaderCurrentBlockid(&reader);











  interiorReaderDestroy(&reader);




}

/* Read block at iBlockid and pass it with other params to
** getChildContaining().
*/
static int loadAndGetChildContaining(fulltext_vtab *v, sqlite_int64 iBlockid,


                                     const char *pTerm, int nTerm,
                                     sqlite_int64 *piBlockid){

  sqlite3_stmt *s = NULL;
  int rc;

  assert( iBlockid!=0 );
  assert( pTerm!=NULL );
  assert( nTerm!=0 );        /* TODO(shess) Why not allow this? */

  assert( piBlockid!=NULL );

  rc = sql_get_statement(v, BLOCK_SELECT_STMT, &s);
  if( rc!=SQLITE_OK ) return rc;

  rc = sqlite3_bind_int64(s, 1, iBlockid);
  if( rc!=SQLITE_OK ) return rc;

  rc = sql_step_statement(v, BLOCK_SELECT_STMT, &s);
  if( rc==SQLITE_DONE ) return SQLITE_ERROR;
  if( rc!=SQLITE_ROW ) return rc;

  getChildContaining(sqlite3_column_blob(s, 0), sqlite3_column_bytes(s, 0),
                     pTerm, nTerm, piBlockid);

  /* We expect only one row.  We must execute another sqlite3_step()
   * to complete the iteration; otherwise the table will remain
   * locked. */
  rc = sqlite3_step(s);
  if( rc==SQLITE_ROW ) return SQLITE_ERROR;
  if( rc!=SQLITE_DONE ) return rc;
................................................................................

/* Traverse the tree represented by pData[nData] looking for
** pTerm[nTerm], placing its doclist into *out.  This is internal to
** loadSegment() to make error-handling cleaner.
*/
static int loadSegmentInt(fulltext_vtab *v, const char *pData, int nData,
                          sqlite_int64 iLeavesEnd,
                          const char *pTerm, int nTerm, DataBuffer *out){

  /* Special case where root is a leaf. */
  if( *pData=='\0' ){

    return loadSegmentLeaf(v, pData, nData, pTerm, nTerm, out);
  }else{
    int rc;
    sqlite_int64 iBlockid;

    /* Process pData as an interior node, then loop down the tree
    ** until we find a leaf node to scan for the term.
    */
    getChildContaining(pData, nData, pTerm, nTerm, &iBlockid);

    while( iBlockid>iLeavesEnd ){
      rc = loadAndGetChildContaining(v, iBlockid, pTerm, nTerm, &iBlockid);



      if( rc!=SQLITE_OK ) return rc;
    }
















    return loadSegmentLeaves(v, iBlockid, iBlockid, pTerm, nTerm, out);
  }
}

/* Call loadSegmentInt() to collect the doclist for pTerm/nTerm, then
** merge its doclist over *out (any duplicate doclists read from the
** segment rooted at pData will overwrite those in *out).
*/
/* NOTE(shess) Previous code passed out down to sub-routines for use
** in docListMerge().  This version deoptimizes things slightly, but
** prefix searches require a different merge function entirely.
*/
static int loadSegment(fulltext_vtab *v, const char *pData, int nData,
                       sqlite_int64 iLeavesEnd,
                       const char *pTerm, int nTerm, DataBuffer *out){

  DataBuffer result;
  int rc;

  assert( nData>1 );

  /* This code should never be called with buffered updates. */
  assert( v->nPendingData<0 );

  dataBufferInit(&result, 0);
  rc = loadSegmentInt(v, pData, nData, iLeavesEnd, pTerm, nTerm, &result);

  if( rc==SQLITE_OK && result.nData>0 ){
    if( out->nData==0 ){
      DataBuffer tmp = *out;
      *out = result;
      result = tmp;
    }else{
      DataBuffer merged;
................................................................................
  return rc;
}

/* Scan the database and merge together the posting lists for the term
** into *out.
*/
static int termSelect(fulltext_vtab *v, int iColumn,
                      const char *pTerm, int nTerm,
                      DocListType iType, DataBuffer *out){
  DataBuffer doclist;
  sqlite3_stmt *s;
  int rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s);
  if( rc!=SQLITE_OK ) return rc;

  /* This code should never be called with buffered updates. */
................................................................................
  /* Traverse the segments from oldest to newest so that newer doclist
  ** elements for given docids overwrite older elements.
  */
  while( (rc=sql_step_statement(v, SEGDIR_SELECT_ALL_STMT, &s))==SQLITE_ROW ){
    const char *pData = sqlite3_column_blob(s, 0);
    const int nData = sqlite3_column_bytes(s, 0);
    const sqlite_int64 iLeavesEnd = sqlite3_column_int64(s, 1);
    rc = loadSegment(v, pData, nData, iLeavesEnd, pTerm, nTerm, &doclist);

    if( rc!=SQLITE_OK ) goto err;
  }
  if( rc==SQLITE_DONE ){
    if( doclist.nData!=0 ){
      /* TODO(shess) The old term_select_all() code applied the column
      ** restrict as we merged segments, leading to smaller buffers.
      ** This is probably worthwhile to bring back, once the new storage







>







 







|







 







|





|







 







>







 







|


|












>







 







|
|

>
>
|








|
|
|
|
|

|
|
>
|






>

|


|

>
>
>
>
>
>
>
>
>
>

>
>
>
>



|

|
>
>
|
|
>






>
|











|
|







 







|
>


>



|


|

|
>
|
<
>
>
>

|
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
|













|
>









|
>







 







|







 







|
>







1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
....
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
....
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
....
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
....
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
....
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
....
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267
5268
5269
5270
5271
5272

5273
5274
5275
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324
5325
5326
5327
....
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
....
5360
5361
5362
5363
5364
5365
5366
5367
5368
5369
5370
5371
5372
5373
5374
5375
*/
typedef struct QueryTerm {
  short int nPhrase; /* How many following terms are part of the same phrase */
  short int iPhrase; /* This is the i-th term of a phrase. */
  short int iColumn; /* Column of the index that must match this term */
  signed char isOr;  /* this term is preceded by "OR" */
  signed char isNot; /* this term is preceded by "-" */
  signed char isPrefix; /* this term is followed by "*" */
  char *pTerm;       /* text of the term.  '\000' terminated.  malloced */
  int nTerm;         /* Number of bytes in pTerm[] */
} QueryTerm;


/* A query string is parsed into a Query structure.
 *
................................................................................


/* TODO(shess) If we pushed LeafReader to the top of the file, or to
** another file, term_select() could be pushed above
** docListOfTerm().
*/
static int termSelect(fulltext_vtab *v, int iColumn,
                      const char *pTerm, int nTerm, int isPrefix,
                      DocListType iType, DataBuffer *out);

/* Return a DocList corresponding to the query term *pTerm.  If *pTerm
** is the first term of a phrase query, go ahead and evaluate the phrase
** query and return the doclist for the entire phrase query.
**
** The resulting DL_DOCIDS doclist is stored in pResult, which is
................................................................................
  /* No phrase search if no position info. */
  assert( pQTerm->nPhrase==0 || DL_DEFAULT!=DL_DOCIDS );

  /* This code should never be called with buffered updates. */
  assert( v->nPendingData<0 );

  dataBufferInit(&left, 0);
  rc = termSelect(v, iColumn, pQTerm->pTerm, pQTerm->nTerm, pQTerm->isPrefix,
                  0<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS, &left);
  if( rc ) return rc;
  for(i=1; i<=pQTerm->nPhrase && left.nData>0; i++){
    dataBufferInit(&right, 0);
    rc = termSelect(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm,
                    pQTerm[i].isPrefix, DL_POSITIONS, &right);
    if( rc ){
      dataBufferDestroy(&left);
      return rc;
    }
    dataBufferInit(&new, 0);
    docListPhraseMerge(left.pData, left.nData, right.pData, right.nData,
                       i<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS, &new);
................................................................................
  t = &q->pTerms[q->nTerms - 1];
  CLEAR(t);
  t->pTerm = malloc(nTerm+1);
  memcpy(t->pTerm, pTerm, nTerm);
  t->pTerm[nTerm] = 0;
  t->nTerm = nTerm;
  t->isOr = q->nextIsOr;
  t->isPrefix = 0;
  q->nextIsOr = 0;
  t->iColumn = q->nextColumn;
  q->nextColumn = q->dfltColumn;
}

/*
** Check to see if the string zToken[0...nToken-1] matches any
................................................................................
    pReader->pData += n+nSuffix;
    pReader->nData -= n+nSuffix;
  }
  pReader->iBlockid++;
}

/* Compare the current term to pTerm[nTerm], returning strcmp-style
** results.  If isPrefix, equality means equal through nTerm bytes.
*/
static int interiorReaderTermCmp(InteriorReader *pReader,
                                 const char *pTerm, int nTerm, int isPrefix){
  const char *pReaderTerm = interiorReaderTerm(pReader);
  int nReaderTerm = interiorReaderTermBytes(pReader);
  int c, n = nReaderTerm<nTerm ? nReaderTerm : nTerm;

  if( n==0 ){
    if( nReaderTerm>0 ) return -1;
    if( nTerm>0 ) return 1;
    return 0;
  }

  c = memcmp(pReaderTerm, pTerm, n);
  if( c!=0 ) return c;
  if( isPrefix && n==nTerm ) return 0;
  return nReaderTerm - nTerm;
}

/****************************************************************/
/* LeafWriter is used to collect terms and associated doclist data
** into leaf blocks in %_segments (see top of file for format info).
** Expected usage is:
................................................................................

/* Call loadSegmentLeavesInt() with the leaf nodes from iStartLeaf to
** iEndLeaf (inclusive) as input, and merge the resulting doclist into
** out.
*/
static int loadSegmentLeaves(fulltext_vtab *v,
                             sqlite_int64 iStartLeaf, sqlite_int64 iEndLeaf,
                             const char *pTerm, int nTerm, DataBuffer *out){
  int rc;
  LeavesReader reader;

  assert( iStartLeaf<=iEndLeaf );
  rc = leavesReaderInit(v, 0, iStartLeaf, iEndLeaf, NULL, 0, &reader);
  if( rc!=SQLITE_OK ) return rc;

  rc = loadSegmentLeavesInt(v, &reader, pTerm, nTerm, out);
  leavesReaderReset(&reader);
  leavesReaderDestroy(&reader);
  return rc;
}

/* Taking pData/nData as an interior node, find the sequence of child
** nodes which could include pTerm/nTerm/isPrefix.  Note that the
** interior node terms logically come between the blocks, so there is
** one more blockid than there are terms (that block contains terms >=
** the last interior-node term).
*/
static void getChildrenContaining(const char *pData, int nData,
                                  const char *pTerm, int nTerm, int isPrefix,
                                  sqlite_int64 *piStartChild,
                                  sqlite_int64 *piEndChild){
  InteriorReader reader;

  assert( nData>1 );
  assert( *pData!='\0' );
  interiorReaderInit(pData, nData, &reader);

  /* Scan for the first child which could contain pTerm/nTerm. */
  while( !interiorReaderAtEnd(&reader) ){
    if( interiorReaderTermCmp(&reader, pTerm, nTerm, 0)>0 ) break;
    interiorReaderStep(&reader);
  }
  *piStartChild = interiorReaderCurrentBlockid(&reader);

  /* Keep scanning to find a term greater than our term, using prefix
  ** comparison if indicated.  If isPrefix is false, this will be the
  ** same blockid as the starting block.
  */
  while( !interiorReaderAtEnd(&reader) ){
    if( interiorReaderTermCmp(&reader, pTerm, nTerm, isPrefix)>0 ) break;
    interiorReaderStep(&reader);
  }
  *piEndChild = interiorReaderCurrentBlockid(&reader);

  interiorReaderDestroy(&reader);

  /* Children must ascend, and if !prefix, both must be the same. */
  assert( *piEndChild>=*piStartChild );
  assert( isPrefix || *piStartChild==*piEndChild );
}

/* Read block at iBlockid and pass it with other params to
** getChildrenContaining().
*/
static int loadAndGetChildrenContaining(
  fulltext_vtab *v,
  sqlite_int64 iBlockid,
  const char *pTerm, int nTerm, int isPrefix,
  sqlite_int64 *piStartChild, sqlite_int64 *piEndChild
){
  sqlite3_stmt *s = NULL;
  int rc;

  assert( iBlockid!=0 );
  assert( pTerm!=NULL );
  assert( nTerm!=0 );        /* TODO(shess) Why not allow this? */
  assert( piStartChild!=NULL );
  assert( piEndChild!=NULL );

  rc = sql_get_statement(v, BLOCK_SELECT_STMT, &s);
  if( rc!=SQLITE_OK ) return rc;

  rc = sqlite3_bind_int64(s, 1, iBlockid);
  if( rc!=SQLITE_OK ) return rc;

  rc = sql_step_statement(v, BLOCK_SELECT_STMT, &s);
  if( rc==SQLITE_DONE ) return SQLITE_ERROR;
  if( rc!=SQLITE_ROW ) return rc;

  getChildrenContaining(sqlite3_column_blob(s, 0), sqlite3_column_bytes(s, 0),
                        pTerm, nTerm, isPrefix, piStartChild, piEndChild);

  /* We expect only one row.  We must execute another sqlite3_step()
   * to complete the iteration; otherwise the table will remain
   * locked. */
  rc = sqlite3_step(s);
  if( rc==SQLITE_ROW ) return SQLITE_ERROR;
  if( rc!=SQLITE_DONE ) return rc;
................................................................................

/* Traverse the tree represented by pData[nData] looking for
** pTerm[nTerm], placing its doclist into *out.  This is internal to
** loadSegment() to make error-handling cleaner.
*/
static int loadSegmentInt(fulltext_vtab *v, const char *pData, int nData,
                          sqlite_int64 iLeavesEnd,
                          const char *pTerm, int nTerm, int isPrefix,
                          DataBuffer *out){
  /* Special case where root is a leaf. */
  if( *pData=='\0' ){
    assert( !isPrefix );   /* TODO(shess) Add prefix support. */
    return loadSegmentLeaf(v, pData, nData, pTerm, nTerm, out);
  }else{
    int rc;
    sqlite_int64 iStartChild, iEndChild;

    /* Process pData as an interior node, then loop down the tree
    ** until we find the set of leaf nodes to scan for the term.
    */
    getChildrenContaining(pData, nData, pTerm, nTerm, isPrefix,
                          &iStartChild, &iEndChild);
    while( iStartChild>iLeavesEnd ){

      sqlite_int64 iNextStart, iNextEnd;
      rc = loadAndGetChildrenContaining(v, iStartChild, pTerm, nTerm, isPrefix,
                                        &iNextStart, &iNextEnd);
      if( rc!=SQLITE_OK ) return rc;

      /* If we've branched, follow the end branch, too. */
      if( iStartChild!=iEndChild ){
        sqlite_int64 iDummy;
        rc = loadAndGetChildrenContaining(v, iEndChild, pTerm, nTerm, isPrefix,
                                          &iDummy, &iNextEnd);
        if( rc!=SQLITE_OK ) return rc;
      }

      assert( iNextStart<=iNextEnd );
      iStartChild = iNextStart;
      iEndChild = iNextEnd;
    }
    assert( iStartChild<=iLeavesEnd );
    assert( iEndChild<=iLeavesEnd );

    assert( !isPrefix );   /* TODO(shess) Add prefix support. */
    return loadSegmentLeaves(v, iStartChild, iEndChild, pTerm, nTerm, out);
  }
}

/* Call loadSegmentInt() to collect the doclist for pTerm/nTerm, then
** merge its doclist over *out (any duplicate doclists read from the
** segment rooted at pData will overwrite those in *out).
*/
/* NOTE(shess) Previous code passed out down to sub-routines for use
** in docListMerge().  This version deoptimizes things slightly, but
** prefix searches require a different merge function entirely.
*/
static int loadSegment(fulltext_vtab *v, const char *pData, int nData,
                       sqlite_int64 iLeavesEnd,
                       const char *pTerm, int nTerm, int isPrefix,
                       DataBuffer *out){
  DataBuffer result;
  int rc;

  assert( nData>1 );

  /* This code should never be called with buffered updates. */
  assert( v->nPendingData<0 );

  dataBufferInit(&result, 0);
  rc = loadSegmentInt(v, pData, nData, iLeavesEnd,
                      pTerm, nTerm, isPrefix, &result);
  if( rc==SQLITE_OK && result.nData>0 ){
    if( out->nData==0 ){
      DataBuffer tmp = *out;
      *out = result;
      result = tmp;
    }else{
      DataBuffer merged;
................................................................................
  return rc;
}

/* Scan the database and merge together the posting lists for the term
** into *out.
*/
static int termSelect(fulltext_vtab *v, int iColumn,
                      const char *pTerm, int nTerm, int isPrefix,
                      DocListType iType, DataBuffer *out){
  DataBuffer doclist;
  sqlite3_stmt *s;
  int rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s);
  if( rc!=SQLITE_OK ) return rc;

  /* This code should never be called with buffered updates. */
................................................................................
  /* Traverse the segments from oldest to newest so that newer doclist
  ** elements for given docids overwrite older elements.
  */
  while( (rc=sql_step_statement(v, SEGDIR_SELECT_ALL_STMT, &s))==SQLITE_ROW ){
    const char *pData = sqlite3_column_blob(s, 0);
    const int nData = sqlite3_column_bytes(s, 0);
    const sqlite_int64 iLeavesEnd = sqlite3_column_int64(s, 1);
    rc = loadSegment(v, pData, nData, iLeavesEnd, pTerm, nTerm, isPrefix,
                     &doclist);
    if( rc!=SQLITE_OK ) goto err;
  }
  if( rc==SQLITE_DONE ){
    if( doclist.nData!=0 ){
      /* TODO(shess) The old term_select_all() code applied the column
      ** restrict as we merged segments, leading to smaller buffers.
      ** This is probably worthwhile to bring back, once the new storage