/ Check-in [e98b0cf2]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Miscellaneous restructuring and cleanup based on suggestions from shess. (CVS 3382)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:e98b0cf292f6dc9deb6ae9b773c52b16867f7556
User & Date: adamd 2006-09-02 00:23:02
Context
2006-09-02
13:22
Construct the .def files before calling dllwrap to build the .dll files. Ticket #1951. (CVS 3383) check-in: e6e49a38 user: drh tags: trunk
00:23
Miscellaneous restructuring and cleanup based on suggestions from shess. (CVS 3382) check-in: e98b0cf2 user: adamd tags: trunk
2006-09-01
17:06
Automatically compute the sqlite3.def and tclsqlite3.def files when building windows DLLs. This will (hopefully) keep the .def files in perfect synchronization with the DLLs. Ticket #1951. (CVS 3381) check-in: 1f6d7926 user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts1/fts1.c.

190
191
192
193
194
195
196


197







198
199
200
201
202



203
204
205
206
207
208
209


210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242

243
244
245
246
247
248
249
250

251

252

253
254
255
256
257
258
259
260
261
262

263
264
265
266
267
268
269
...
282
283
284
285
286
287
288
289
290
291
292










293
294
295
296
297
298
299
300
301
302
303
304
305

306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
...
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
...
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411

412


413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428













429

430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449

450
451
452
453
454


455
456
457
458
459
460
461
462
463

464
465
466
467

468
469
470
471
472
473
474
....
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
....
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177

1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194


1195






1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
....
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
....
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
....
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
....
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
  d->pData = realloc(d->pData, d->nData + n);
  memcpy(d->pData + d->nData, c, n);
  d->nData += n;
}

static void docListAddDocid(DocList *d, sqlite_int64 iDocid){
  appendVarint(d, iDocid);


  d->iLastPos = 0;







}

/* Add a position to the last position list in a doclist. */
static void docListAddPos(DocList *d, int iPos){
  assert( d->iType>=DL_POSITIONS );



  appendVarint(d, iPos-d->iLastPos+1);
  d->iLastPos = iPos;
}

static void docListAddPosOffset(DocList *d, int iPos,
                                int iStartOffset, int iEndOffset){
  assert( d->iType==DL_POSITIONS_OFFSETS );


  docListAddPos(d, iPos);
  appendVarint(d, iStartOffset-d->iLastOffset);
  d->iLastOffset = iStartOffset;
  appendVarint(d, iEndOffset-iStartOffset);
}

/* Terminate the last position list in the given doclist. */
static void docListAddEndPos(DocList *d){
  appendVarint(d, 0);
}

typedef struct DocListReader {
  DocList *pDoclist;
  char *p;
  int iLastPos;    /* the last position read */
} DocListReader;

static void readerInit(DocListReader *r, DocList *pDoclist){
  r->pDoclist = pDoclist;
  if( pDoclist!=NULL ){
    r->p = pDoclist->pData;
  }
  r->iLastPos = 0;
}

static int readerAtEnd(DocListReader *pReader){
  return pReader->p >= docListEnd(pReader->pDoclist);
}

/* Peek at the next docid without advancing the read pointer. */
static sqlite_int64 peekDocid(DocListReader *pReader){
  sqlite_int64 ret;
  assert( !readerAtEnd(pReader) );

  getVarint(pReader->p, &ret);
  return ret;
}

/* Read the next docid. */
static sqlite_int64 readDocid(DocListReader *pReader){
  sqlite_int64 ret;
  assert( !readerAtEnd(pReader) );

  pReader->p += getVarint(pReader->p, &ret);

  pReader->iLastPos = 0;

  return ret;
}

/* Read the next position from a position list.
 * Returns the position, or -1 at the end of the list. */
static int readPosition(DocListReader *pReader){
  int i;
  int iType = pReader->pDoclist->iType;
  assert( iType>=DL_POSITIONS );
  assert( !readerAtEnd(pReader) );


  pReader->p += getVarint32(pReader->p, &i);
  if( i==0 ){
    pReader->iLastPos = -1;
    return -1;
  }
  pReader->iLastPos += ((int) i)-1;
................................................................................
    ;
}

/* Skip over a docid, including its position list if the doclist has
 * positions. */
static void skipDocument(DocListReader *pReader){
  readDocid(pReader);
  if( pReader->pDoclist->iType >= DL_POSITIONS ){
    skipPositionList(pReader);
  }
}











static sqlite_int64 firstDocid(DocList *d){
  DocListReader r;
  readerInit(&r, d);
  return readDocid(&r);
}

/* Doclist multi-tool.  Pass pUpdate==NULL to delete the indicated docid;
 * otherwise pUpdate, which must contain only the single docid [iDocid], is
 * inserted (if not present) or updated (if already present). */
static int docListUpdate(DocList *d, sqlite_int64 iDocid, DocList *pUpdate){
  int modified = 0;
  DocListReader reader;

  char *p;

  if( pUpdate!=NULL ){
    assert( d->iType==pUpdate->iType);
    assert( iDocid==firstDocid(pUpdate) );
  }

  readerInit(&reader, d);
  while( !readerAtEnd(&reader) && peekDocid(&reader)<iDocid ){
    skipDocument(&reader);
  }

  p = reader.p;
  /* Delete if there is a matching element. */
  if( !readerAtEnd(&reader) && iDocid==peekDocid(&reader) ){
    skipDocument(&reader);
    memmove(p, reader.p, docListEnd(d) - reader.p);
    d->nData -= (reader.p - p);
    modified = 1;
  }

  /* Insert if indicated. */
  if( pUpdate!=NULL ){
    int iDoclist = p-d->pData;
    docListAddEndPos(pUpdate);

    d->pData = realloc(d->pData, d->nData+pUpdate->nData);
    p = d->pData + iDoclist;

    memmove(p+pUpdate->nData, p, docListEnd(d) - p);
    memcpy(p, pUpdate->pData, pUpdate->nData);
    d->nData += pUpdate->nData;
................................................................................
  const char *pSplitPoint = d->pData + d->nData / 2;
  DocListReader reader;

  readerInit(&reader, d);
  while( reader.p<pSplitPoint ){
    skipDocument(&reader);
  }
  if( readerAtEnd(&reader) ) return 0;
  docListInit(d2, d->iType, reader.p, docListEnd(d) - reader.p);
  d->nData = reader.p - d->pData;
  d->pData = realloc(d->pData, d->nData);
  return 1;
}

/* A DocListMerge computes the AND of an in-memory DocList [in] and a chunked
................................................................................
 * If [in] has positions, then the merge output contains only documents with
 * matching positions in the two input doclists.  If [in] does not have
 * positions, then the merge output contains all documents common to the two
 * input doclists.
 *
 * If [in] is NULL, then the on-disk doclist is copied to [out] directly.
 *
 * A merge is performed using an integer [iOffset] provided by the caller.
 * [iOffset] is subtracted from each position in the on-disk doclist for the
 * purpose of position comparison; this is helpful in implementing phrase
 * searches.
 *
 * A DocListMerge is not yet able to propagate offsets through query
 * processing; we should add that capability soon.
*/
typedef struct DocListMerge {
  DocListReader in;
  DocList *pOut;
  int iOffset;
} DocListMerge;

static void mergeInit(DocListMerge *m,
                      DocList *pIn, int iOffset, DocList *pOut){
  readerInit(&m->in, pIn);
  m->pOut = pOut;
  m->iOffset = iOffset;

  /* can't handle offsets yet */
  assert( pIn==NULL || pIn->iType <= DL_POSITIONS );
  assert( pOut->iType <= DL_POSITIONS );
}

/* A helper function for mergeBlock(), below.  Merge the position lists
 * pointed to by m->in and pBlockReader.
 * If the merge matches, write [iDocid] to m->pOut; if m->pOut
 * has positions then write all matching positions as well. */
static void mergePosList(DocListMerge *m, sqlite_int64 iDocid,
                  DocListReader *pBlockReader){
  int block_pos = readPosition(pBlockReader);
  int in_pos = readPosition(&m->in);
  int match = 0;
  while( block_pos!=-1 || in_pos!=-1 ){

    if( block_pos-m->iOffset==in_pos ){


      if( !match ){
        docListAddDocid(m->pOut, iDocid);
        match = 1;
      }
      if( m->pOut->iType >= DL_POSITIONS ){
        docListAddPos(m->pOut, in_pos);
      }
      block_pos = readPosition(pBlockReader);
      in_pos = readPosition(&m->in);
    } else if( in_pos==-1 || (block_pos!=-1 && block_pos-m->iOffset<in_pos) ){
      block_pos = readPosition(pBlockReader);
    } else {
      in_pos = readPosition(&m->in);
    }
  }
  if( m->pOut->iType >= DL_POSITIONS && match ){













    docListAddEndPos(m->pOut);

  }
}

/* Merge one block of an on-disk doclist into a DocListMerge. */
static void mergeBlock(DocListMerge *m, DocList *pBlock){
  DocListReader blockReader;
  assert( pBlock->iType >= DL_POSITIONS );
  readerInit(&blockReader, pBlock);
  while( !readerAtEnd(&blockReader) ){
    sqlite_int64 iDocid = readDocid(&blockReader);
    if( m->in.pDoclist!=NULL ){
      while( 1 ){
        if( readerAtEnd(&m->in) ) return;  /* nothing more to merge */
        if( peekDocid(&m->in)>=iDocid ) break;
        skipDocument(&m->in);
      }
      if( peekDocid(&m->in)>iDocid ){  /* [pIn] has no match with iDocid */
        skipPositionList(&blockReader);  /* skip this docid in the block */
        continue;
      }

      readDocid(&m->in);
    }
    /* We have a document match. */
    if( m->in.pDoclist==NULL || m->in.pDoclist->iType < DL_POSITIONS ){
      /* We don't need to do a poslist merge. */


      docListAddDocid(m->pOut, iDocid);
      if( m->pOut->iType >= DL_POSITIONS ){
        /* Copy all positions to the output doclist. */
        while( 1 ){
          int pos = readPosition(&blockReader);
          if( pos==-1 ) break;
          docListAddPos(m->pOut, pos);
        }
        docListAddEndPos(m->pOut);

      } else skipPositionList(&blockReader);
      continue;
    }
    mergePosList(m, iDocid, &blockReader);

  }
}

/* Duplicate a string; the caller must free() the returned string.
 * (We don't use strdup() since it's not part of the standard C library and
 * may not be available everywhere.) */
static char *string_dup(const char *s){
................................................................................
          c->eof = 1;
          return rc;
      }
    case QUERY_FULLTEXT:
      rc = sqlite3_reset(c->pStmt);
      if( rc!=SQLITE_OK ) return rc;

      if( readerAtEnd(&c->result)){
        c->eof = 1;
        return SQLITE_OK;
      }
      iDocid = readDocid(&c->result);
      rc = sqlite3_bind_int64(c->pStmt, 1, iDocid);
      if( rc!=SQLITE_OK ) return rc;
      /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
................................................................................

  rc = sqlite3_bind_text(*ppStmt, 1, pTerm, nTerm, SQLITE_TRANSIENT);
  if( rc!=SQLITE_OK ) return rc;

  return sqlite3_step(*ppStmt);   /* TODO(adamd): handle schema error */
}

/* Read the posting list for [zTerm]; AND it with the doclist [in] to
 * produce the doclist [out], using the given offset [iOffset] for phrase
 * matching.
 * (*pSelect) is used to hold an SQLite statement used inside this function;
 * the caller should initialize *pSelect to NULL before the first call.
 */
static int query_merge(fulltext_vtab *v, sqlite3_stmt **pSelect,
                       const char *pTerm, int nTerm,
                       DocList *pIn, int iOffset, DocList *out){
  int rc;
  DocListMerge merge;

  if( pIn!=NULL && !pIn->nData ){
    /* If [pIn] is already empty, there's no point in reading the
     * posting list to AND it in; return immediately. */
      return SQLITE_OK;
  }

  rc = term_select_doclist(v, pTerm, nTerm, pSelect);
  if( rc!=SQLITE_ROW && rc!=SQLITE_DONE ) return rc;

  mergeInit(&merge, pIn, iOffset, out);
  while( rc==SQLITE_ROW ){
    DocList block;
    docListInit(&block, DL_POSITIONS_OFFSETS,
                sqlite3_column_blob(*pSelect, 0),
                sqlite3_column_bytes(*pSelect, 0));
    mergeBlock(&merge, &block);
    docListDestroy(&block);

    rc = sqlite3_step(*pSelect);
    if( rc!=SQLITE_ROW && rc!=SQLITE_DONE ){
      return rc;
    }
  }
  
  return SQLITE_OK;
}

typedef struct QueryTerm {
  int is_phrase;    /* true if this term begins a new phrase */
  char *pTerm;
  int nTerm;
} QueryTerm;

/* A parsed query.
 *
 * As an example, parsing the query ["four score" years "new nation"] will
 * yield a Query with 5 terms:
 *   "four",   is_phrase = 1
 *   "score",  is_phrase = 0
 *   "years",  is_phrase = 1
 *   "new",    is_phrase = 1
 *   "nation", is_phrase = 0
 */
typedef struct Query {
  int nTerms;
  QueryTerm *pTerm;
} Query;

static void query_add(Query *q, int is_phrase, const char *pTerm, int nTerm){
  QueryTerm *t;
  ++q->nTerms;
  q->pTerm = realloc(q->pTerm, q->nTerms * sizeof(q->pTerm[0]));
  t = &q->pTerm[q->nTerms - 1];
  t->is_phrase = is_phrase;
  t->pTerm = malloc(nTerm);
  memcpy(t->pTerm, pTerm, nTerm);
  t->nTerm = nTerm;
}
    
static void query_free(Query *q){
  int i;
  for(i = 0; i < q->nTerms; ++i){
    free(q->pTerm[i].pTerm);
  }
  free(q->pTerm);
}

static int tokenize_segment(sqlite3_tokenizer *pTokenizer,
                            const char *pSegment, int nSegment, int in_phrase,
                            Query *pQuery){
  sqlite3_tokenizer_module *pModule = pTokenizer->pModule;
  sqlite3_tokenizer_cursor *pCursor;
  int is_first = 1;
  
  int rc = pModule->xOpen(pTokenizer, pSegment, nSegment, &pCursor);
  if( rc!=SQLITE_OK ) return rc;
  pCursor->pTokenizer = pTokenizer;

  while( 1 ){
    const char *pToken;
    int nToken, iStartOffset, iEndOffset, dummy_pos;

    rc = pModule->xNext(pCursor,
                        &pToken, &nToken,
                        &iStartOffset, &iEndOffset,
                        &dummy_pos);
    if( rc!=SQLITE_OK ) break;
    query_add(pQuery, !in_phrase || is_first, pToken, nToken);
    is_first = 0;
  }

  return pModule->xClose(pCursor);
}

/* Parse a query string, yielding a Query object. */

static int parse_query(fulltext_vtab *v, const char *pInput, int nInput,
                       Query *pQuery){
  int iInput, in_phrase = 0;

  if( nInput<0 ) nInput = strlen(pInput);
  pQuery->nTerms = 0;
  pQuery->pTerm = NULL;

  for(iInput=0; iInput<nInput; ++iInput){
    int i;
    for(i=iInput; i<nInput && pInput[i]!='"'; ++i)
      ;
    if( i>iInput ){
      tokenize_segment(v->pTokenizer, pInput+iInput, i-iInput, in_phrase,
                       pQuery);
    }
    iInput = i;


    in_phrase = !in_phrase;






  }
  return SQLITE_OK;
}

/* Perform a full-text query; return a list of documents in [pResult]. */
static int fulltext_query(fulltext_vtab *v, const char *pInput, int nInput,
                          DocList **pResult){
  Query q;
  int phrase_start = -1;
  int i;
  sqlite3_stmt *pSelect = NULL;
  DocList *d = NULL;

  int rc = parse_query(v, pInput, nInput, &q);
  if( rc!=SQLITE_OK ) return rc;

  /* Merge terms. */
  for(i = 0 ; i < q.nTerms ; ++i){
    /* In each merge step, we need to generate positions whenever we're
     * processing a phrase which hasn't ended yet. */
    int need_positions = i<q.nTerms-1 && !q.pTerm[i+1].is_phrase;
    DocList *next = docListNew(need_positions ? DL_POSITIONS : DL_DOCIDS);
    if( q.pTerm[i].is_phrase ){
      phrase_start = i;
    }
    rc = query_merge(v, &pSelect, q.pTerm[i].pTerm, q.pTerm[i].nTerm,
                     d, i-phrase_start, next);
    if( rc!=SQLITE_OK ) break;
    if( d!=NULL ){
      docListDelete(d);
    }
    d = next;
  }

  sqlite3_finalize(pSelect);
  query_free(&q);
  *pResult = d;
  return rc;
}

static int fulltextFilter(sqlite3_vtab_cursor *pCursor,
                          int idxNum, const char *idxStr,
                          int argc, sqlite3_value **argv){
................................................................................
      break;

    case QUERY_FULLTEXT:   /* full-text search */
    {
      const char *zQuery = (const char *)sqlite3_value_text(argv[0]);
      DocList *pResult;
      assert( argc==1 );
      rc = fulltext_query(v, zQuery, -1, &pResult);
      if( rc!=SQLITE_OK ) return rc;
      readerInit(&c->result, pResult);
      zStatement = "select rowid, content from %_content where rowid = ?";
      break;
    }

    default:
................................................................................
  fulltext_cursor *c = (fulltext_cursor *) pCursor;

  *pRowid = sqlite3_column_int64(c->pStmt, 0);
  return SQLITE_OK;
}

/* Build a hash table containing all terms in pText. */
static int build_terms(fts1Hash *terms, sqlite3_tokenizer *pTokenizer,
                       const char *pText, int nText, sqlite_int64 iDocid){
  sqlite3_tokenizer_cursor *pCursor;
  const char *pToken;
  int nTokenBytes;
  int iStartOffset, iEndOffset, iPosition;
  int rc;

................................................................................

  rc = content_insert(v, pRequestRowid, pText, nText);
  if( rc!=SQLITE_OK ) return rc;
  *piRowid = sqlite3_last_insert_rowid(v->db);

  if( !pText || !nText ) return SQLITE_OK;   /* nothing to index */

  rc = build_terms(&terms, v->pTokenizer, pText, nText, *piRowid);
  if( rc!=SQLITE_OK ) return rc;

  for(e=fts1HashFirst(&terms); e; e=fts1HashNext(e)){
    DocList *p = fts1HashData(e);
    rc = index_insert_term(v, fts1HashKey(e), fts1HashKeysize(e), *piRowid, p);
    if( rc!=SQLITE_OK ) break;
  }
................................................................................
  int nText;
  fts1Hash terms;
  fts1HashElem *e;

  int rc = content_select(v, iRow, &pText, &nText);
  if( rc!=SQLITE_OK ) return rc;

  rc = build_terms(&terms, v->pTokenizer, pText, nText, iRow);
  free(pText);
  if( rc!=SQLITE_OK ) return rc;

  for(e=fts1HashFirst(&terms); e; e=fts1HashNext(e)){
    rc = index_delete_term(v, fts1HashKey(e), fts1HashKeysize(e), iRow);
    if( rc!=SQLITE_OK ) break;
  }







>
>
|
>
>
>
>
>
>
>




|
>
>
>
|
<





>
>
|



<
<
<
<
|





|







|


|






|
>







|
>

>
|
>









|
>







 







|



>
>
>
>
>
>
>
>
>
>













>








|
<
<



|









<







 







|







 







|
|









|



|


|


|
|







|
|
|

<
>
|
>
>




|
|

|
|
|
|

|


<
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>






|

|

|
|
<
<
<
<
<
<
|
|
>

<
<
|
<
>
>
|
<
<
<
<
<
<
|
<
>
|
<
<
<
>







 







|







 







|
|
<



|

|



<
|
|
|
<




|









|
<
<






|








|
|
|
|
|



|


|


|
|
|





|


|

|


|
|











|



|
|

|






|
>
|
|
|



|






|



>
>
|
>
>
>
>
>
>





|







|






|
|
|


|









|







 







|







 







|







 







|







 







|







190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215

216
217
218
219
220
221
222
223
224
225
226




227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
...
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339


340
341
342
343
344
345
346
347
348
349
350
351
352

353
354
355
356
357
358
359
...
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
...
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432

433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451

452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478






479
480
481
482


483

484
485
486






487

488
489



490
491
492
493
494
495
496
497
....
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
....
1087
1088
1089
1090
1091
1092
1093
1094
1095

1096
1097
1098
1099
1100
1101
1102
1103
1104

1105
1106
1107

1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122


1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
....
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
....
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
....
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
....
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
  d->pData = realloc(d->pData, d->nData + n);
  memcpy(d->pData + d->nData, c, n);
  d->nData += n;
}

static void docListAddDocid(DocList *d, sqlite_int64 iDocid){
  appendVarint(d, iDocid);
  if( d->iType>=DL_POSITIONS ){
    appendVarint(d, 0);  /* initially empty position list */
    d->iLastPos = 0;
  }
}

/* helper function for docListAddPos and docListAddPosOffset */
static void addPos(DocList *d, int iPos) {
  appendVarint(d, iPos-d->iLastPos+1);
  d->iLastPos = iPos;
}

/* Add a position to the last position list in a doclist. */
static void docListAddPos(DocList *d, int iPos){
  assert( d->iType==DL_POSITIONS );
  assert( d->nData>0 );
  --d->nData;  /* remove previous terminator */
  addPos(d, iPos);
  appendVarint(d, 0);  /* add new terminator */

}

static void docListAddPosOffset(DocList *d, int iPos,
                                int iStartOffset, int iEndOffset){
  assert( d->iType==DL_POSITIONS_OFFSETS );
  assert( d->nData>0 );
  --d->nData;  /* remove previous terminator */
  addPos(d, iPos);
  appendVarint(d, iStartOffset-d->iLastOffset);
  d->iLastOffset = iStartOffset;
  appendVarint(d, iEndOffset-iStartOffset);




  appendVarint(d, 0);  /* add new terminator */
}

typedef struct DocListReader {
  DocList *pDoclist;
  char *p;
  int iLastPos;  /* the last position read, or -1 when not in a position list */
} DocListReader;

static void readerInit(DocListReader *r, DocList *pDoclist){
  r->pDoclist = pDoclist;
  if( pDoclist!=NULL ){
    r->p = pDoclist->pData;
  }
  r->iLastPos = -1;
}

static int atEnd(DocListReader *pReader){
  return pReader->p >= docListEnd(pReader->pDoclist);
}

/* Peek at the next docid without advancing the read pointer. */
static sqlite_int64 peekDocid(DocListReader *pReader){
  sqlite_int64 ret;
  assert( !atEnd(pReader) );
  assert( pReader->iLastPos==-1 );
  getVarint(pReader->p, &ret);
  return ret;
}

/* Read the next docid. */
static sqlite_int64 readDocid(DocListReader *pReader){
  sqlite_int64 ret;
  assert( !atEnd(pReader) );
  assert( pReader->iLastPos==-1 );
  pReader->p += getVarint(pReader->p, &ret);
  if( pReader->pDoclist->iType>=DL_POSITIONS ){
    pReader->iLastPos = 0;
  }
  return ret;
}

/* Read the next position from a position list.
 * Returns the position, or -1 at the end of the list. */
static int readPosition(DocListReader *pReader){
  int i;
  int iType = pReader->pDoclist->iType;
  assert( iType>=DL_POSITIONS );
  assert( !atEnd(pReader) );
  assert( pReader->iLastPos!=-1 );

  pReader->p += getVarint32(pReader->p, &i);
  if( i==0 ){
    pReader->iLastPos = -1;
    return -1;
  }
  pReader->iLastPos += ((int) i)-1;
................................................................................
    ;
}

/* Skip over a docid, including its position list if the doclist has
 * positions. */
static void skipDocument(DocListReader *pReader){
  readDocid(pReader);
  if( pReader->pDoclist->iType>=DL_POSITIONS ){
    skipPositionList(pReader);
  }
}

/* Skip past all docids which are less than [iDocid].  Returns 1 if a docid
 * matching [iDocid] was found.  */
static int skipToDocid(DocListReader *pReader, sqlite_int64 iDocid){
  sqlite_int64 d = 0;
  while( !atEnd(pReader) && (d=peekDocid(pReader))<iDocid ){
    skipDocument(pReader);
  }
  return !atEnd(pReader) && d==iDocid;
}

static sqlite_int64 firstDocid(DocList *d){
  DocListReader r;
  readerInit(&r, d);
  return readDocid(&r);
}

/* Doclist multi-tool.  Pass pUpdate==NULL to delete the indicated docid;
 * otherwise pUpdate, which must contain only the single docid [iDocid], is
 * inserted (if not present) or updated (if already present). */
static int docListUpdate(DocList *d, sqlite_int64 iDocid, DocList *pUpdate){
  int modified = 0;
  DocListReader reader;
  int found;
  char *p;

  if( pUpdate!=NULL ){
    assert( d->iType==pUpdate->iType);
    assert( iDocid==firstDocid(pUpdate) );
  }

  readerInit(&reader, d);
  found = skipToDocid(&reader, iDocid);



  p = reader.p;
  /* Delete if there is a matching element. */
  if( found ){
    skipDocument(&reader);
    memmove(p, reader.p, docListEnd(d) - reader.p);
    d->nData -= (reader.p - p);
    modified = 1;
  }

  /* Insert if indicated. */
  if( pUpdate!=NULL ){
    int iDoclist = p-d->pData;


    d->pData = realloc(d->pData, d->nData+pUpdate->nData);
    p = d->pData + iDoclist;

    memmove(p+pUpdate->nData, p, docListEnd(d) - p);
    memcpy(p, pUpdate->pData, pUpdate->nData);
    d->nData += pUpdate->nData;
................................................................................
  const char *pSplitPoint = d->pData + d->nData / 2;
  DocListReader reader;

  readerInit(&reader, d);
  while( reader.p<pSplitPoint ){
    skipDocument(&reader);
  }
  if( atEnd(&reader) ) return 0;
  docListInit(d2, d->iType, reader.p, docListEnd(d) - reader.p);
  d->nData = reader.p - d->pData;
  d->pData = realloc(d->pData, d->nData);
  return 1;
}

/* A DocListMerge computes the AND of an in-memory DocList [in] and a chunked
................................................................................
 * If [in] has positions, then the merge output contains only documents with
 * matching positions in the two input doclists.  If [in] does not have
 * positions, then the merge output contains all documents common to the two
 * input doclists.
 *
 * If [in] is NULL, then the on-disk doclist is copied to [out] directly.
 *
 * A merge is performed using an integer [iPhrasePos] provided by the caller.
 * [iPhrasePos] is subtracted from each position in the on-disk doclist for the
 * purpose of position comparison; this is helpful in implementing phrase
 * searches.
 *
 * A DocListMerge is not yet able to propagate offsets through query
 * processing; we should add that capability soon.
*/
typedef struct DocListMerge {
  DocListReader in;
  DocList *pOut;
  int iPhrasePos;
} DocListMerge;

static void mergeInit(DocListMerge *m,
                      DocList *pIn, int iPhrasePos, DocList *pOut){
  readerInit(&m->in, pIn);
  m->pOut = pOut;
  m->iPhrasePos = iPhrasePos;

  /* can't handle offsets yet */
  assert( pIn==NULL || pIn->iType<=DL_POSITIONS );
  assert( pOut->iType<=DL_POSITIONS );
}

/* A helper function for mergeBlock(), below.  Merge the position lists
 * pointed to by m->in and pBlockReader.
 * If the merge matches, write [iDocid] to m->pOut; if m->pOut
 * has positions then write all matching positions as well. */
static void mergePosList(DocListMerge *m, sqlite_int64 iDocid,
                         DocListReader *pBlockReader){
  int iBlockPos = readPosition(pBlockReader);
  int iInPos = readPosition(&m->in);
  int match = 0;


  /* Loop until we've reached the end of both position lists. */
  while( iBlockPos!=-1 || iInPos!=-1 ){
    if( iBlockPos-m->iPhrasePos==iInPos ){
      if( !match ){
        docListAddDocid(m->pOut, iDocid);
        match = 1;
      }
      if( m->pOut->iType>=DL_POSITIONS ){
        docListAddPos(m->pOut, iInPos);
      }
      iBlockPos = readPosition(pBlockReader);
      iInPos = readPosition(&m->in);
    } else if( iInPos==-1 || (iBlockPos!=-1 && iBlockPos-m->iPhrasePos<iInPos) ){
      iBlockPos = readPosition(pBlockReader);
    } else {
      iInPos = readPosition(&m->in);
    }
  }

}

/* A helper function for mergeBlock(), below.  Copy the docid and
 * position list (if wanted) from pBlockReader to pOut. */
static void copyDocument(DocList *pOut, sqlite_int64 iDocid,
                         DocListReader *pBlockReader){
  docListAddDocid(pOut, iDocid);
  if( pOut->iType<DL_POSITIONS ){
    skipPositionList(pBlockReader);
  } else {
    /* Copy all positions to the output doclist. */
    int pos;
    while( (pos = readPosition(pBlockReader))!=-1 ){
      docListAddPos(pOut, pos);
    }
  }
}

/* Merge one block of an on-disk doclist into a DocListMerge. */
static void mergeBlock(DocListMerge *m, DocList *pBlock){
  DocListReader blockReader;
  assert( pBlock->iType>=DL_POSITIONS );
  readerInit(&blockReader, pBlock);
  while( !atEnd(&blockReader) ){
    sqlite_int64 iDocid = readDocid(&blockReader);
    if( m->in.pDoclist==NULL ){
      copyDocument(m->pOut, iDocid, &blockReader);






      continue;
    }
    if( skipToDocid(&m->in, iDocid) ){  /* we have a docid match */
      readDocid(&m->in);


      if( m->in.pDoclist->iType>=DL_POSITIONS ){

        mergePosList(m, iDocid, &blockReader);
      } else {
        copyDocument(m->pOut, iDocid, &blockReader);






      }

    } else if( !atEnd(&m->in) ){
      skipPositionList(&blockReader);  /* skip this docid in the block */



    } else return;  /* nothing more to merge */
  }
}

/* Duplicate a string; the caller must free() the returned string.
 * (We don't use strdup() since it's not part of the standard C library and
 * may not be available everywhere.) */
static char *string_dup(const char *s){
................................................................................
          c->eof = 1;
          return rc;
      }
    case QUERY_FULLTEXT:
      rc = sqlite3_reset(c->pStmt);
      if( rc!=SQLITE_OK ) return rc;

      if( atEnd(&c->result)){
        c->eof = 1;
        return SQLITE_OK;
      }
      iDocid = readDocid(&c->result);
      rc = sqlite3_bind_int64(c->pStmt, 1, iDocid);
      if( rc!=SQLITE_OK ) return rc;
      /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
................................................................................

  rc = sqlite3_bind_text(*ppStmt, 1, pTerm, nTerm, SQLITE_TRANSIENT);
  if( rc!=SQLITE_OK ) return rc;

  return sqlite3_step(*ppStmt);   /* TODO(adamd): handle schema error */
}

/* Read the posting list for [pTerm]; AND it with the doclist [pIn] to
 * produce the doclist [out], using the given phrase position [iPhrasePos].

 * (*pSelect) is used to hold an SQLite statement used inside this function;
 * the caller should initialize *pSelect to NULL before the first call.
 */
static int mergeQuery(fulltext_vtab *v, sqlite3_stmt **pSelect,
                       const char *pTerm, int nTerm,
                       DocList *pIn, int iPhrasePos, DocList *out){
  int rc;
  DocListMerge merge;


  /* If [pIn] is already empty, there's no point in reading the
   * posting list to AND it in; return immediately. */
  if( pIn!=NULL && !pIn->nData ) return SQLITE_OK;


  rc = term_select_doclist(v, pTerm, nTerm, pSelect);
  if( rc!=SQLITE_ROW && rc!=SQLITE_DONE ) return rc;

  mergeInit(&merge, pIn, iPhrasePos, out);
  while( rc==SQLITE_ROW ){
    DocList block;
    docListInit(&block, DL_POSITIONS_OFFSETS,
                sqlite3_column_blob(*pSelect, 0),
                sqlite3_column_bytes(*pSelect, 0));
    mergeBlock(&merge, &block);
    docListDestroy(&block);

    rc = sqlite3_step(*pSelect);
    if( rc!=SQLITE_ROW && rc!=SQLITE_DONE ) return rc;


  }
  
  return SQLITE_OK;
}

typedef struct QueryTerm {
  int isPhrase;    /* true if this term begins a new phrase */
  char *pTerm;
  int nTerm;
} QueryTerm;

/* A parsed query.
 *
 * As an example, parsing the query ["four score" years "new nation"] will
 * yield a Query with 5 terms:
 *   "four",   isPhrase = 1
 *   "score",  isPhrase = 0
 *   "years",  isPhrase = 1
 *   "new",    isPhrase = 1
 *   "nation", isPhrase = 0
 */
typedef struct Query {
  int nTerms;
  QueryTerm *pTerms;
} Query;

static void queryAdd(Query *q, int isPhrase, const char *pTerm, int nTerm){
  QueryTerm *t;
  ++q->nTerms;
  q->pTerms = realloc(q->pTerms, q->nTerms * sizeof(q->pTerms[0]));
  t = &q->pTerms[q->nTerms - 1];
  t->isPhrase = isPhrase;
  t->pTerm = malloc(nTerm);
  memcpy(t->pTerm, pTerm, nTerm);
  t->nTerm = nTerm;
}
    
static void queryDestroy(Query *q){
  int i;
  for(i = 0; i < q->nTerms; ++i){
    free(q->pTerms[i].pTerm);
  }
  free(q->pTerms);
}

static int tokenizeSegment(sqlite3_tokenizer *pTokenizer,
                            const char *pSegment, int nSegment, int inPhrase,
                            Query *pQuery){
  sqlite3_tokenizer_module *pModule = pTokenizer->pModule;
  sqlite3_tokenizer_cursor *pCursor;
  int is_first = 1;
  
  int rc = pModule->xOpen(pTokenizer, pSegment, nSegment, &pCursor);
  if( rc!=SQLITE_OK ) return rc;
  pCursor->pTokenizer = pTokenizer;

  while( 1 ){
    const char *pToken;
    int nToken, iDummyOffset, iDummyPos;

    rc = pModule->xNext(pCursor,
                        &pToken, &nToken,
                        &iDummyOffset, &iDummyOffset,
                        &iDummyPos);
    if( rc!=SQLITE_OK ) break;
    queryAdd(pQuery, !inPhrase || is_first, pToken, nToken);
    is_first = 0;
  }

  return pModule->xClose(pCursor);
}

/* Parse a query string, yielding a Query object [pQuery], which the caller
 * must free. */
static int parseQuery(fulltext_vtab *v, const char *pInput, int nInput,
                      Query *pQuery){
  int iInput, inPhrase = 0;

  if( nInput<0 ) nInput = strlen(pInput);
  pQuery->nTerms = 0;
  pQuery->pTerms = NULL;

  for(iInput=0; iInput<nInput; ++iInput){
    int i;
    for(i=iInput; i<nInput && pInput[i]!='"'; ++i)
      ;
    if( i>iInput ){
      tokenizeSegment(v->pTokenizer, pInput+iInput, i-iInput, inPhrase,
                       pQuery);
    }
    iInput = i;
    if( i<nInput ){
      assert( pInput[i]=='"' );
      inPhrase = !inPhrase;
    }
  }

  if(inPhrase) {  /* unmatched quote */
    queryDestroy(pQuery);
    return SQLITE_ERROR;
  }
  return SQLITE_OK;
}

/* Perform a full-text query; return a list of documents in [pResult]. */
static int fulltextQuery(fulltext_vtab *v, const char *pInput, int nInput,
                          DocList **pResult){
  Query q;
  int phrase_start = -1;
  int i;
  sqlite3_stmt *pSelect = NULL;
  DocList *d = NULL;

  int rc = parseQuery(v, pInput, nInput, &q);
  if( rc!=SQLITE_OK ) return rc;

  /* Merge terms. */
  for(i = 0 ; i < q.nTerms ; ++i){
    /* In each merge step, we need to generate positions whenever we're
     * processing a phrase which hasn't ended yet. */
    int needPositions = i<q.nTerms-1 && !q.pTerms[i+1].isPhrase;
    DocList *next = docListNew(needPositions ? DL_POSITIONS : DL_DOCIDS);
    if( q.pTerms[i].isPhrase ){
      phrase_start = i;
    }
    rc = mergeQuery(v, &pSelect, q.pTerms[i].pTerm, q.pTerms[i].nTerm,
                     d, i-phrase_start, next);
    if( rc!=SQLITE_OK ) break;
    if( d!=NULL ){
      docListDelete(d);
    }
    d = next;
  }

  sqlite3_finalize(pSelect);
  queryDestroy(&q);
  *pResult = d;
  return rc;
}

static int fulltextFilter(sqlite3_vtab_cursor *pCursor,
                          int idxNum, const char *idxStr,
                          int argc, sqlite3_value **argv){
................................................................................
      break;

    case QUERY_FULLTEXT:   /* full-text search */
    {
      const char *zQuery = (const char *)sqlite3_value_text(argv[0]);
      DocList *pResult;
      assert( argc==1 );
      rc = fulltextQuery(v, zQuery, -1, &pResult);
      if( rc!=SQLITE_OK ) return rc;
      readerInit(&c->result, pResult);
      zStatement = "select rowid, content from %_content where rowid = ?";
      break;
    }

    default:
................................................................................
  fulltext_cursor *c = (fulltext_cursor *) pCursor;

  *pRowid = sqlite3_column_int64(c->pStmt, 0);
  return SQLITE_OK;
}

/* Build a hash table containing all terms in pText. */
static int buildTerms(fts1Hash *terms, sqlite3_tokenizer *pTokenizer,
                       const char *pText, int nText, sqlite_int64 iDocid){
  sqlite3_tokenizer_cursor *pCursor;
  const char *pToken;
  int nTokenBytes;
  int iStartOffset, iEndOffset, iPosition;
  int rc;

................................................................................

  rc = content_insert(v, pRequestRowid, pText, nText);
  if( rc!=SQLITE_OK ) return rc;
  *piRowid = sqlite3_last_insert_rowid(v->db);

  if( !pText || !nText ) return SQLITE_OK;   /* nothing to index */

  rc = buildTerms(&terms, v->pTokenizer, pText, nText, *piRowid);
  if( rc!=SQLITE_OK ) return rc;

  for(e=fts1HashFirst(&terms); e; e=fts1HashNext(e)){
    DocList *p = fts1HashData(e);
    rc = index_insert_term(v, fts1HashKey(e), fts1HashKeysize(e), *piRowid, p);
    if( rc!=SQLITE_OK ) break;
  }
................................................................................
  int nText;
  fts1Hash terms;
  fts1HashElem *e;

  int rc = content_select(v, iRow, &pText, &nText);
  if( rc!=SQLITE_OK ) return rc;

  rc = buildTerms(&terms, v->pTokenizer, pText, nText, iRow);
  free(pText);
  if( rc!=SQLITE_OK ) return rc;

  for(e=fts1HashFirst(&terms); e; e=fts1HashNext(e)){
    rc = index_delete_term(v, fts1HashKey(e), fts1HashKeysize(e), iRow);
    if( rc!=SQLITE_OK ) break;
  }