/ Check-in [9466367d]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Break interior-node and leaf-node readers apart in loadSegment(). Previously, the code looped until the block was a leaf node as indicated by a leading NUL. Now the code loops until it finds a block in the range of leaf nodes for this segment, then reads it using LeavesReader. This will make it easier to traverse a range of leaves when doing a prefix search. (CVS 3884)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:9466367d65f43d58020e709428268dc2ff98aa35
User & Date: shess 2007-04-27 22:02:58
Context
2007-04-28
15:47
Add some tests (and 2 resulting bug fixes) to incr vacuum mode. (CVS 3885) check-in: 89b1b3f8 user: danielk1977 tags: trunk
2007-04-27
22:02
Break interior-node and leaf-node readers apart in loadSegment(). Previously, the code looped until the block was a leaf node as indicated by a leading NUL. Now the code loops until it finds a block in the range of leaf nodes for this segment, then reads it using LeavesReader. This will make it easier to traverse a range of leaves when doing a prefix search. (CVS 3884) check-in: 9466367d user: shess tags: trunk
21:59
Internationalize the TRIM functions. Ticket #2323. (CVS 3883) check-in: ff1f4e74 user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts2/fts2.c.

1630
1631
1632
1633
1634
1635
1636

1637
1638
1639
1640
1641
1642
1643
1644
....
4802
4803
4804
4805
4806
4807
4808

















4809
4810
4811
4812
4813
4814
4815
....
5129
5130
5131
5132
5133
5134
5135



















5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
....
5156
5157
5158
5159
5160
5161
5162





































5163
5164
5165
5166
5167
5168
5169

5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180



5181
5182
5183
5184
5185
5186

5187




5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
....
5226
5227
5228
5229
5230
5231
5232

5233

5234
5235
5236
5237
5238
5239
5240
5241
  /* SEGDIR_SELECT */
  "select start_block, leaves_end_block, root from %_segdir "
  " where level = ? order by idx",
  /* SEGDIR_SPAN */
  "select min(start_block), max(end_block) from %_segdir "
  " where level = ? and start_block <> 0",
  /* SEGDIR_DELETE */ "delete from %_segdir where level = ?",

  /* SEGDIR_SELECT_ALL */ "select root from %_segdir order by level desc, idx",
};

/*
** A connection to a fulltext index is an instance of the following
** structure.  The xCreate and xConnect methods create an instance
** of this structure and xDestroy and xDisconnect free that instance.
** All other methods receive a pointer to the structure as one of their
................................................................................
  assert( !pReader->eof );
  return leafReaderData(&pReader->leafReader);
}

static int leavesReaderAtEnd(LeavesReader *pReader){
  return pReader->eof;
}


















static void leavesReaderDestroy(LeavesReader *pReader){
  leafReaderDestroy(&pReader->leafReader);
  dataBufferDestroy(&pReader->rootData);
  SCRAMBLE(pReader);
}

................................................................................

  assert( nData>1 );
  assert( *pData=='\0' );
  rc = leavesReaderInit(v, 0, 0, 0, pData, nData, &reader);
  if( rc!=SQLITE_OK ) return rc;

  rc = loadSegmentLeavesInt(v, &reader, pTerm, nTerm, out);



















  leavesReaderDestroy(&reader);
  return rc;
}

/* Taking pData/nData as an interior node, find the child node which
** could include pTerm/nTerm.  Note that the interior node terms
** logically come between the blocks, so there is one more blockid
** than there are terms (that block contains terms >= the last
** interior-node term).
*/
static void getNextChild(const char *pData, int nData,
                         const char *pTerm, int nTerm,
                         sqlite_int64 *piBlockid){
  InteriorReader reader;

  assert( nData>1 );
  assert( *pData!='\0' );
  interiorReaderInit(pData, nData, &reader);

  while( !interiorReaderAtEnd(&reader) ){
................................................................................
    if( interiorReaderTermCmp(&reader, pTerm, nTerm)>0 ) break;
    interiorReaderStep(&reader);
  }
  *piBlockid = interiorReaderCurrentBlockid(&reader);

  interiorReaderDestroy(&reader);
}






































/* Traverse the tree represented by pData[nData] looking for
** pTerm[nTerm], merging its doclist over *out if found (any duplicate
** doclists read from the segment rooted at pData will overwrite those
** in *out).
*/
static int loadSegment(fulltext_vtab *v, const char *pData, int nData,

                       const char *pTerm, int nTerm, DataBuffer *out){
  int rc;
  sqlite3_stmt *s = NULL;

  assert( nData>1 );

  /* This code should never be called with buffered updates. */
  assert( v->nPendingData<0 );

  /* Process data as an interior node until we reach a leaf. */
  while( *pData!='\0' ){



    sqlite_int64 iBlockid;
    getNextChild(pData, nData, pTerm, nTerm, &iBlockid);

    rc = sql_get_statement(v, BLOCK_SELECT_STMT, &s);
    if( rc!=SQLITE_OK ) return rc;


    rc = sqlite3_bind_int64(s, 1, iBlockid);




    if( rc!=SQLITE_OK ) return rc;

    rc = sql_step_statement(v, BLOCK_SELECT_STMT, &s);
    if( rc==SQLITE_DONE ) return SQLITE_ERROR;
    if( rc!=SQLITE_ROW ) return rc;

    pData = sqlite3_column_blob(s, 0);
    nData = sqlite3_column_bytes(s, 0);
  }

  rc = loadSegmentLeaf(v, pData, nData, pTerm, nTerm, out);
  if( rc!=SQLITE_OK ) return rc;

  /* If we selected a child node, we need to finish that select. */
  if( s!=NULL ){
    /* We expect only one row.  We must execute another sqlite3_step()
     * to complete the iteration; otherwise the table will remain
     * locked. */
    rc = sqlite3_step(s);
    if( rc==SQLITE_ROW ) return SQLITE_ERROR;
    if( rc!=SQLITE_DONE ) return rc;
  }
  return SQLITE_OK;
}

/* Scan the database and merge together the posting lists for the term
** into *out.
*/
static int termSelect(fulltext_vtab *v, int iColumn,
                      const char *pTerm, int nTerm,
................................................................................

  dataBufferInit(&doclist, 0);

  /* Traverse the segments from oldest to newest so that newer doclist
  ** elements for given docids overwrite older elements.
  */
  while( (rc=sql_step_statement(v, SEGDIR_SELECT_ALL_STMT, &s))==SQLITE_ROW ){

    rc = loadSegment(v, sqlite3_column_blob(s, 0), sqlite3_column_bytes(s, 0),

                     pTerm, nTerm, &doclist);
    if( rc!=SQLITE_OK ) goto err;
  }
  if( rc==SQLITE_DONE ){
    if( doclist.nData!=0 ){
      /* TODO(shess) The old term_select_all() code applied the column
      ** restrict as we merged segments, leading to smaller buffers.
      ** This is probably worthwhile to bring back, once the new storage







>
|







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>










|
|
|







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







>

<
<
<





|
|
>
>
>

<

<
<
<
>
|
>
>
>
>
|
|
<
<
<

<
|

<
<
<
<
<
<
<
<
<
<
<
<
<
<







 







>
|
>
|







1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
....
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
....
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
....
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245



5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256

5257



5258
5259
5260
5261
5262
5263
5264
5265



5266

5267
5268














5269
5270
5271
5272
5273
5274
5275
....
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
  /* SEGDIR_SELECT */
  "select start_block, leaves_end_block, root from %_segdir "
  " where level = ? order by idx",
  /* SEGDIR_SPAN */
  "select min(start_block), max(end_block) from %_segdir "
  " where level = ? and start_block <> 0",
  /* SEGDIR_DELETE */ "delete from %_segdir where level = ?",
  /* SEGDIR_SELECT_ALL */
  "select root, leaves_end_block from %_segdir order by level desc, idx",
};

/*
** A connection to a fulltext index is an instance of the following
** structure.  The xCreate and xConnect methods create an instance
** of this structure and xDestroy and xDisconnect free that instance.
** All other methods receive a pointer to the structure as one of their
................................................................................
  assert( !pReader->eof );
  return leafReaderData(&pReader->leafReader);
}

static int leavesReaderAtEnd(LeavesReader *pReader){
  return pReader->eof;
}

/* loadSegmentLeaves() may not read all the way to SQLITE_DONE, thus
** leaving the statement handle open, which locks the table.
*/
/* TODO(shess) This "solution" is not satisfactory.  Really, there
** should be check-in function for all statement handles which
** arranges to call sqlite3_reset().  This most likely will require
** modification to control flow all over the place, though, so for now
** just punt.
**
** Note the the current system assumes that segment merges will run to
** completion, which is why this particular probably hasn't arisen in
** this case.  Probably a brittle assumption.
*/
static int leavesReaderReset(LeavesReader *pReader){
  return sqlite3_reset(pReader->pStmt);
}

static void leavesReaderDestroy(LeavesReader *pReader){
  leafReaderDestroy(&pReader->leafReader);
  dataBufferDestroy(&pReader->rootData);
  SCRAMBLE(pReader);
}

................................................................................

  assert( nData>1 );
  assert( *pData=='\0' );
  rc = leavesReaderInit(v, 0, 0, 0, pData, nData, &reader);
  if( rc!=SQLITE_OK ) return rc;

  rc = loadSegmentLeavesInt(v, &reader, pTerm, nTerm, out);
  leavesReaderReset(&reader);
  leavesReaderDestroy(&reader);
  return rc;
}

/* Call loadSegmentLeavesInt() with the leaf nodes from iStartLeaf to
** iEndLeaf (inclusive) as input, and merge the resulting doclist into
** out.
*/
static int loadSegmentLeaves(fulltext_vtab *v,
                             sqlite_int64 iStartLeaf, sqlite_int64 iEndLeaf,
                             const char *pTerm, int nTerm,
                             DataBuffer *out){
  LeavesReader reader;
  int rc = leavesReaderInit(v, 0, iStartLeaf, iEndLeaf, NULL, 0, &reader);
  if( rc!=SQLITE_OK ) return rc;

  rc = loadSegmentLeavesInt(v, &reader, pTerm, nTerm, out);
  leavesReaderReset(&reader);
  leavesReaderDestroy(&reader);
  return rc;
}

/* Taking pData/nData as an interior node, find the child node which
** could include pTerm/nTerm.  Note that the interior node terms
** logically come between the blocks, so there is one more blockid
** than there are terms (that block contains terms >= the last
** interior-node term).
*/
static void getChildContaining(const char *pData, int nData,
                               const char *pTerm, int nTerm,
                               sqlite_int64 *piBlockid){
  InteriorReader reader;

  assert( nData>1 );
  assert( *pData!='\0' );
  interiorReaderInit(pData, nData, &reader);

  while( !interiorReaderAtEnd(&reader) ){
................................................................................
    if( interiorReaderTermCmp(&reader, pTerm, nTerm)>0 ) break;
    interiorReaderStep(&reader);
  }
  *piBlockid = interiorReaderCurrentBlockid(&reader);

  interiorReaderDestroy(&reader);
}

/* Read block at iBlockid and pass it with other params to
** getChildContaining().
*/
static int loadAndGetChildContaining(fulltext_vtab *v, sqlite_int64 iBlockid,
                                     const char *pTerm, int nTerm,
                                     sqlite_int64 *piBlockid){
  sqlite3_stmt *s = NULL;
  int rc;

  assert( iBlockid!=0 );
  assert( pTerm!=NULL );
  assert( nTerm!=0 );        /* TODO(shess) Why not allow this? */
  assert( piBlockid!=NULL );

  rc = sql_get_statement(v, BLOCK_SELECT_STMT, &s);
  if( rc!=SQLITE_OK ) return rc;

  rc = sqlite3_bind_int64(s, 1, iBlockid);
  if( rc!=SQLITE_OK ) return rc;

  rc = sql_step_statement(v, BLOCK_SELECT_STMT, &s);
  if( rc==SQLITE_DONE ) return SQLITE_ERROR;
  if( rc!=SQLITE_ROW ) return rc;

  getChildContaining(sqlite3_column_blob(s, 0), sqlite3_column_bytes(s, 0),
                     pTerm, nTerm, piBlockid);

  /* We expect only one row.  We must execute another sqlite3_step()
   * to complete the iteration; otherwise the table will remain
   * locked. */
  rc = sqlite3_step(s);
  if( rc==SQLITE_ROW ) return SQLITE_ERROR;
  if( rc!=SQLITE_DONE ) return rc;

  return SQLITE_OK;
}

/* Traverse the tree represented by pData[nData] looking for
** pTerm[nTerm], merging its doclist over *out if found (any duplicate
** doclists read from the segment rooted at pData will overwrite those
** in *out).
*/
static int loadSegment(fulltext_vtab *v, const char *pData, int nData,
                       sqlite_int64 iLeavesEnd,
                       const char *pTerm, int nTerm, DataBuffer *out){



  assert( nData>1 );

  /* This code should never be called with buffered updates. */
  assert( v->nPendingData<0 );

  /* Special case where root is a leaf. */
  if( *pData=='\0' ){
    return loadSegmentLeaf(v, pData, nData, pTerm, nTerm, out);
  }else{
    int rc;
    sqlite_int64 iBlockid;





    /* Process pData as an interior node, then loop down the tree
    ** until we find a leaf node to scan for the term.
    */
    getChildContaining(pData, nData, pTerm, nTerm, &iBlockid);
    while( iBlockid>iLeavesEnd ){
      rc = loadAndGetChildContaining(v, iBlockid, pTerm, nTerm, &iBlockid);
      if( rc!=SQLITE_OK ) return rc;
    }





    return loadSegmentLeaves(v, iBlockid, iBlockid, pTerm, nTerm, out);
  }














}

/* Scan the database and merge together the posting lists for the term
** into *out.
*/
static int termSelect(fulltext_vtab *v, int iColumn,
                      const char *pTerm, int nTerm,
................................................................................

  dataBufferInit(&doclist, 0);

  /* Traverse the segments from oldest to newest so that newer doclist
  ** elements for given docids overwrite older elements.
  */
  while( (rc=sql_step_statement(v, SEGDIR_SELECT_ALL_STMT, &s))==SQLITE_ROW ){
    const char *pData = sqlite3_column_blob(s, 0);
    const int nData = sqlite3_column_bytes(s, 0);
    const sqlite_int64 iLeavesEnd = sqlite3_column_int64(s, 1);
    rc = loadSegment(v, pData, nData, iLeavesEnd, pTerm, nTerm, &doclist);
    if( rc!=SQLITE_OK ) goto err;
  }
  if( rc==SQLITE_DONE ){
    if( doclist.nData!=0 ){
      /* TODO(shess) The old term_select_all() code applied the column
      ** restrict as we merged segments, leading to smaller buffers.
      ** This is probably worthwhile to bring back, once the new storage