SQLite

Check-in [fdcea7b1ff]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add the option to omit offset information from posting lists in FTS1. (CVS 3456)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: fdcea7b1ffd821f3f2b6d30997d3957f705a6d0c
User & Date: drh 2006-10-03 11:42:29.000
Context
2006-10-03
12:04
Fix sqlite3_analyzer so that it works on databases containing virtual tables. (CVS 3457) (check-in: 47c8567fcb user: drh tags: trunk)
11:42
Add the option to omit offset information from posting lists in FTS1. (CVS 3456) (check-in: fdcea7b1ff user: drh tags: trunk)
2006-10-01
20:41
Another typo in the Porter stemmer check-in. (CVS 3455) (check-in: 6696bda11c user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts1/fts1.c.
175
176
177
178
179
180
181












182
183
184
185
186
187
188

typedef enum DocListType {
  DL_DOCIDS,              /* docids only */
  DL_POSITIONS,           /* docids + positions */
  DL_POSITIONS_OFFSETS    /* docids + positions + offsets */
} DocListType;













typedef struct DocList {
  char *pData;
  int nData;
  DocListType iType;
  int iLastColumn;    /* the last column written */
  int iLastPos;       /* the last position written */
  int iLastOffset;    /* the last start offset written */







>
>
>
>
>
>
>
>
>
>
>
>







175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200

typedef enum DocListType {
  DL_DOCIDS,              /* docids only */
  DL_POSITIONS,           /* docids + positions */
  DL_POSITIONS_OFFSETS    /* docids + positions + offsets */
} DocListType;

/*
** By default, positions and offsets are stored in the doclists.
** To change this so that only positions are stored, compile
** with
**
**          -DDL_DEFAULT=DL_POSITIONS
**
*/
#ifndef DL_DEFAULT
# define DL_DEFAULT DL_POSITIONS_OFFSETS
#endif

typedef struct DocList {
  char *pData;
  int nData;
  DocListType iType;
  int iLastColumn;    /* the last column written */
  int iLastPos;       /* the last position written */
  int iLastOffset;    /* the last start offset written */
269
270
271
272
273
274
275






276



277


278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
/* Add a position to the last position list in a doclist. */
static void docListAddPos(DocList *d, int iColumn, int iPos){
  assert( d->iType==DL_POSITIONS );
  addPos(d, iColumn, iPos);
  appendVarint(d, POS_END);  /* add new terminator */
}







static void docListAddPosOffset(DocList *d, int iColumn, int iPos,



                                int iStartOffset, int iEndOffset){


  assert( d->iType==DL_POSITIONS_OFFSETS );
  addPos(d, iColumn, iPos);

  assert( iStartOffset>=d->iLastOffset );
  appendVarint(d, iStartOffset-d->iLastOffset);
  d->iLastOffset = iStartOffset;

  assert( iEndOffset>=iStartOffset );
  appendVarint(d, iEndOffset-iStartOffset);

  appendVarint(d, POS_END);  /* add new terminator */
}

/*
** A DocListReader object is a cursor into a doclist.  Initialize
** the cursor to the beginning of the doclist by calling readerInit().
** Then use routines







>
>
>
>
>
>
|
>
>
>
|
>
>
|

|
|
|
|
<
|
|
|







281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306

307
308
309
310
311
312
313
314
315
316
/* Add a position to the last position list in a doclist. */
static void docListAddPos(DocList *d, int iColumn, int iPos){
  assert( d->iType==DL_POSITIONS );
  addPos(d, iColumn, iPos);
  appendVarint(d, POS_END);  /* add new terminator */
}

/*
** Add a position and starting and ending offsets to a doclist.
**
** If the doclist is setup to handle only positions, then insert
** the position only and ignore the offsets.
*/
static void docListAddPosOffset(
  DocList *d,             /* Doclist under construction */
  int iColumn,            /* Column the inserted term is part of */
  int iPos,               /* Position of the inserted term */
  int iStartOffset,       /* Starting offset of inserted term */
  int iEndOffset          /* Ending offset of inserted term */
){
  assert( d->iType>=DL_POSITIONS );
  addPos(d, iColumn, iPos);
  if( d->iType==DL_POSITIONS_OFFSETS ){
    assert( iStartOffset>=d->iLastOffset );
    appendVarint(d, iStartOffset-d->iLastOffset);
    d->iLastOffset = iStartOffset;

    assert( iEndOffset>=iStartOffset );
    appendVarint(d, iEndOffset-iStartOffset);
  }
  appendVarint(d, POS_END);  /* add new terminator */
}

/*
** A DocListReader object is a cursor into a doclist.  Initialize
** the cursor to the beginning of the doclist by calling readerInit().
** Then use routines
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
  rc = sqlite3_bind_int(s, 2, iSegment);
  if( rc!=SQLITE_OK ) return rc;

  rc = sql_step_statement(v, TERM_SELECT_STMT, &s);
  if( rc!=SQLITE_ROW ) return rc;

  *rowid = sqlite3_column_int64(s, 0);
  docListInit(out, DL_POSITIONS_OFFSETS,
              sqlite3_column_blob(s, 1), sqlite3_column_bytes(s, 1));

  /* We expect only one row.  We must execute another sqlite3_step()
   * to complete the iteration; otherwise the table will remain locked. */
  rc = sqlite3_step(s);
  return rc==SQLITE_DONE ? SQLITE_ROW : rc;
}







|







1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
  rc = sqlite3_bind_int(s, 2, iSegment);
  if( rc!=SQLITE_OK ) return rc;

  rc = sql_step_statement(v, TERM_SELECT_STMT, &s);
  if( rc!=SQLITE_ROW ) return rc;

  *rowid = sqlite3_column_int64(s, 0);
  docListInit(out, DL_DEFAULT,
              sqlite3_column_blob(s, 1), sqlite3_column_bytes(s, 1));

  /* We expect only one row.  We must execute another sqlite3_step()
   * to complete the iteration; otherwise the table will remain locked. */
  rc = sqlite3_step(s);
  return rc==SQLITE_DONE ? SQLITE_ROW : rc;
}
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
  sqlite3_stmt *s;
  int rc = sql_get_statement(v, TERM_SELECT_ALL_STMT, &s);
  if( rc!=SQLITE_OK ) return rc;

  rc = sqlite3_bind_text(s, 1, pTerm, nTerm, SQLITE_STATIC);
  if( rc!=SQLITE_OK ) return rc;

  docListInit(&doclist, DL_POSITIONS_OFFSETS, 0, 0);

  /* TODO(shess) Handle schema and busy errors. */
  while( (rc=sql_step_statement(v, TERM_SELECT_ALL_STMT, &s))==SQLITE_ROW ){
    DocList old;

    /* TODO(shess) If we processed doclists from oldest to newest, we
    ** could skip the malloc() involved with the following call.  For







|







1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
  sqlite3_stmt *s;
  int rc = sql_get_statement(v, TERM_SELECT_ALL_STMT, &s);
  if( rc!=SQLITE_OK ) return rc;

  rc = sqlite3_bind_text(s, 1, pTerm, nTerm, SQLITE_STATIC);
  if( rc!=SQLITE_OK ) return rc;

  docListInit(&doclist, DL_DEFAULT, 0, 0);

  /* TODO(shess) Handle schema and busy errors. */
  while( (rc=sql_step_statement(v, TERM_SELECT_ALL_STMT, &s))==SQLITE_ROW ){
    DocList old;

    /* TODO(shess) If we processed doclists from oldest to newest, we
    ** could skip the malloc() involved with the following call.  For
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
    if( iPosition<0 ){
      pTokenizer->pModule->xClose(pCursor);
      return SQLITE_ERROR;
    }

    p = fts1HashFind(terms, pToken, nTokenBytes);
    if( p==NULL ){
      p = docListNew(DL_POSITIONS_OFFSETS);
      docListAddDocid(p, iDocid);
      fts1HashInsert(terms, pToken, nTokenBytes, p);
    }
    if( iColumn>=0 ){
      docListAddPosOffset(p, iColumn, iPosition, iStartOffset, iEndOffset);
    }
  }







|







2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
    if( iPosition<0 ){
      pTokenizer->pModule->xClose(pCursor);
      return SQLITE_ERROR;
    }

    p = fts1HashFind(terms, pToken, nTokenBytes);
    if( p==NULL ){
      p = docListNew(DL_DEFAULT);
      docListAddDocid(p, iDocid);
      fts1HashInsert(terms, pToken, nTokenBytes, p);
    }
    if( iColumn>=0 ){
      docListAddPosOffset(p, iColumn, iPosition, iStartOffset, iEndOffset);
    }
  }
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
                             DocList *d){
  sqlite_int64 iIndexRow;
  DocList doclist;
  int iSegment = 0, rc;

  rc = term_select(v, pTerm, nTerm, iSegment, &iIndexRow, &doclist);
  if( rc==SQLITE_DONE ){
    docListInit(&doclist, DL_POSITIONS_OFFSETS, 0, 0);
    docListUpdate(&doclist, d);
    /* TODO(shess) Consider length(doclist)>CHUNK_MAX? */
    rc = term_insert(v, NULL, pTerm, nTerm, iSegment, &doclist);
    goto err;
  }
  if( rc!=SQLITE_ROW ) return SQLITE_ERROR;








|







2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
                             DocList *d){
  sqlite_int64 iIndexRow;
  DocList doclist;
  int iSegment = 0, rc;

  rc = term_select(v, pTerm, nTerm, iSegment, &iIndexRow, &doclist);
  if( rc==SQLITE_DONE ){
    docListInit(&doclist, DL_DEFAULT, 0, 0);
    docListUpdate(&doclist, d);
    /* TODO(shess) Consider length(doclist)>CHUNK_MAX? */
    rc = term_insert(v, NULL, pTerm, nTerm, iSegment, &doclist);
    goto err;
  }
  if( rc!=SQLITE_ROW ) return SQLITE_ERROR;