SQLite

Check-in [1c20c1c28b]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Further tweaks to improve fts5 prefix query performance.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 1c20c1c28b56411f106cf2f6961b3ad4b4d6f6c8
User & Date: dan 2015-10-12 19:12:29.091
Context
2015-10-12
22:20
Fix a couple harmless compiler warnings. (check-in: 7f896a971c user: mistachkin tags: trunk)
19:12
Further tweaks to improve fts5 prefix query performance. (check-in: 1c20c1c28b user: dan tags: trunk)
04:56
Change all references to 3.8.12 into 3.9.0. Comment changes only - no changes to code. (check-in: 6f2858f681 user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts5/fts5Int.h.
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276

#define FTS5_POS2COLUMN(iPos) (int)(iPos >> 32)
#define FTS5_POS2OFFSET(iPos) (int)(iPos & 0xFFFFFFFF)

typedef struct Fts5PoslistReader Fts5PoslistReader;
struct Fts5PoslistReader {
  /* Variables used only by sqlite3Fts5PoslistIterXXX() functions. */
  int iCol;                       /* If (iCol>=0), this column only */
  const u8 *a;                    /* Position list to iterate through */
  int n;                          /* Size of buffer at a[] in bytes */
  int i;                          /* Current offset in a[] */

  u8 bFlag;                       /* For client use (any custom purpose) */

  /* Output variables */
  u8 bEof;                        /* Set to true at EOF */
  i64 iPos;                       /* (iCol<<32) + iPos */
};
int sqlite3Fts5PoslistReaderInit(
  int iCol,                       /* If (iCol>=0), this column only */
  const u8 *a, int n,             /* Poslist buffer to iterate through */
  Fts5PoslistReader *pIter        /* Iterator object to initialize */
);
int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader*);

typedef struct Fts5PoslistWriter Fts5PoslistWriter;
struct Fts5PoslistWriter {







<











<







250
251
252
253
254
255
256

257
258
259
260
261
262
263
264
265
266
267

268
269
270
271
272
273
274

#define FTS5_POS2COLUMN(iPos) (int)(iPos >> 32)
#define FTS5_POS2OFFSET(iPos) (int)(iPos & 0xFFFFFFFF)

typedef struct Fts5PoslistReader Fts5PoslistReader;
struct Fts5PoslistReader {
  /* Variables used only by sqlite3Fts5PoslistIterXXX() functions. */

  const u8 *a;                    /* Position list to iterate through */
  int n;                          /* Size of buffer at a[] in bytes */
  int i;                          /* Current offset in a[] */

  u8 bFlag;                       /* For client use (any custom purpose) */

  /* Output variables */
  u8 bEof;                        /* Set to true at EOF */
  i64 iPos;                       /* (iCol<<32) + iPos */
};
int sqlite3Fts5PoslistReaderInit(

  const u8 *a, int n,             /* Poslist buffer to iterate through */
  Fts5PoslistReader *pIter        /* Iterator object to initialize */
);
int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader*);

typedef struct Fts5PoslistWriter Fts5PoslistWriter;
struct Fts5PoslistWriter {
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
** The various operations on open token or token prefix iterators opened
** using sqlite3Fts5IndexQuery().
*/
int sqlite3Fts5IterEof(Fts5IndexIter*);
int sqlite3Fts5IterNext(Fts5IndexIter*);
int sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch);
i64 sqlite3Fts5IterRowid(Fts5IndexIter*);
int sqlite3Fts5IterPoslist(Fts5IndexIter*, const u8 **pp, int *pn, i64 *pi);
int sqlite3Fts5IterPoslistBuffer(Fts5IndexIter *pIter, Fts5Buffer *pBuf);

/*
** Close an iterator opened by sqlite3Fts5IndexQuery().
*/
void sqlite3Fts5IterClose(Fts5IndexIter*);








|







341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
** The various operations on open token or token prefix iterators opened
** using sqlite3Fts5IndexQuery().
*/
int sqlite3Fts5IterEof(Fts5IndexIter*);
int sqlite3Fts5IterNext(Fts5IndexIter*);
int sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch);
i64 sqlite3Fts5IterRowid(Fts5IndexIter*);
int sqlite3Fts5IterPoslist(Fts5IndexIter*,Fts5Colset*, const u8**, int*, i64*);
int sqlite3Fts5IterPoslistBuffer(Fts5IndexIter *pIter, Fts5Buffer *pBuf);

/*
** Close an iterator opened by sqlite3Fts5IndexQuery().
*/
void sqlite3Fts5IterClose(Fts5IndexIter*);

Changes to ext/fts5/fts5_buffer.c.
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232


/*
** Advance the iterator object passed as the only argument. Return true
** if the iterator reaches EOF, or false otherwise.
*/
int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader *pIter){
  if( sqlite3Fts5PoslistNext64(pIter->a, pIter->n, &pIter->i, &pIter->iPos) 
   || (pIter->iCol>=0 && (pIter->iPos >> 32) > pIter->iCol)
  ){
    pIter->bEof = 1;
  }
  return pIter->bEof;
}

int sqlite3Fts5PoslistReaderInit(
  int iCol,                       /* If (iCol>=0), this column only */
  const u8 *a, int n,             /* Poslist buffer to iterate through */
  Fts5PoslistReader *pIter        /* Iterator object to initialize */
){
  memset(pIter, 0, sizeof(*pIter));
  pIter->a = a;
  pIter->n = n;
  pIter->iCol = iCol;
  do {
    sqlite3Fts5PoslistReaderNext(pIter);
  }while( pIter->bEof==0 && (pIter->iPos >> 32)<iCol );
  return pIter->bEof;
}

int sqlite3Fts5PoslistWriterAppend(
  Fts5Buffer *pBuf, 
  Fts5PoslistWriter *pWriter,
  i64 iPos







|
<
<






<






<
<
|
<







199
200
201
202
203
204
205
206


207
208
209
210
211
212

213
214
215
216
217
218


219

220
221
222
223
224
225
226


/*
** Advance the iterator object passed as the only argument. Return true
** if the iterator reaches EOF, or false otherwise.
*/
int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader *pIter){
  if( sqlite3Fts5PoslistNext64(pIter->a, pIter->n, &pIter->i, &pIter->iPos) ){


    pIter->bEof = 1;
  }
  return pIter->bEof;
}

int sqlite3Fts5PoslistReaderInit(

  const u8 *a, int n,             /* Poslist buffer to iterate through */
  Fts5PoslistReader *pIter        /* Iterator object to initialize */
){
  memset(pIter, 0, sizeof(*pIter));
  pIter->a = a;
  pIter->n = n;


  sqlite3Fts5PoslistReaderNext(pIter);

  return pIter->bEof;
}

int sqlite3Fts5PoslistWriterAppend(
  Fts5Buffer *pBuf, 
  Fts5PoslistWriter *pWriter,
  i64 iPos
Changes to ext/fts5/fts5_expr.c.
305
306
307
308
309
310
311

312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
}

/*
** Argument pTerm must be a synonym iterator.
*/
static int fts5ExprSynonymPoslist(
  Fts5ExprTerm *pTerm, 

  i64 iRowid,
  int *pbDel,                     /* OUT: Caller should sqlite3_free(*pa) */
  u8 **pa, int *pn
){
  Fts5PoslistReader aStatic[4];
  Fts5PoslistReader *aIter = aStatic;
  int nIter = 0;
  int nAlloc = 4;
  int rc = SQLITE_OK;
  Fts5ExprTerm *p;

  assert( pTerm->pSynonym );
  for(p=pTerm; p; p=p->pSynonym){
    Fts5IndexIter *pIter = p->pIter;
    if( sqlite3Fts5IterEof(pIter)==0 && sqlite3Fts5IterRowid(pIter)==iRowid ){
      const u8 *a;
      int n;
      i64 dummy;
      rc = sqlite3Fts5IterPoslist(pIter, &a, &n, &dummy);
      if( rc!=SQLITE_OK ) goto synonym_poslist_out;
      if( nIter==nAlloc ){
        int nByte = sizeof(Fts5PoslistReader) * nAlloc * 2;
        Fts5PoslistReader *aNew = (Fts5PoslistReader*)sqlite3_malloc(nByte);
        if( aNew==0 ){
          rc = SQLITE_NOMEM;
          goto synonym_poslist_out;
        }
        memcpy(aNew, aIter, sizeof(Fts5PoslistReader) * nIter);
        nAlloc = nAlloc*2;
        if( aIter!=aStatic ) sqlite3_free(aIter);
        aIter = aNew;
      }
      sqlite3Fts5PoslistReaderInit(-1, a, n, &aIter[nIter]);
      assert( aIter[nIter].bEof==0 );
      nIter++;
    }
  }

  assert( *pbDel==0 );
  if( nIter==1 ){







>


















|













|







305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
}

/*
** Argument pTerm must be a synonym iterator.
*/
static int fts5ExprSynonymPoslist(
  Fts5ExprTerm *pTerm, 
  Fts5Colset *pColset,
  i64 iRowid,
  int *pbDel,                     /* OUT: Caller should sqlite3_free(*pa) */
  u8 **pa, int *pn
){
  Fts5PoslistReader aStatic[4];
  Fts5PoslistReader *aIter = aStatic;
  int nIter = 0;
  int nAlloc = 4;
  int rc = SQLITE_OK;
  Fts5ExprTerm *p;

  assert( pTerm->pSynonym );
  for(p=pTerm; p; p=p->pSynonym){
    Fts5IndexIter *pIter = p->pIter;
    if( sqlite3Fts5IterEof(pIter)==0 && sqlite3Fts5IterRowid(pIter)==iRowid ){
      const u8 *a;
      int n;
      i64 dummy;
      rc = sqlite3Fts5IterPoslist(pIter, pColset, &a, &n, &dummy);
      if( rc!=SQLITE_OK ) goto synonym_poslist_out;
      if( nIter==nAlloc ){
        int nByte = sizeof(Fts5PoslistReader) * nAlloc * 2;
        Fts5PoslistReader *aNew = (Fts5PoslistReader*)sqlite3_malloc(nByte);
        if( aNew==0 ){
          rc = SQLITE_NOMEM;
          goto synonym_poslist_out;
        }
        memcpy(aNew, aIter, sizeof(Fts5PoslistReader) * nIter);
        nAlloc = nAlloc*2;
        if( aIter!=aStatic ) sqlite3_free(aIter);
        aIter = aNew;
      }
      sqlite3Fts5PoslistReaderInit(a, n, &aIter[nIter]);
      assert( aIter[nIter].bEof==0 );
      nIter++;
    }
  }

  assert( *pbDel==0 );
  if( nIter==1 ){
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438


439
440
441
442
443
444
445
446
447
448
449
450
  int *pbMatch                    /* OUT: Set to true if really a match */
){
  Fts5PoslistWriter writer = {0};
  Fts5PoslistReader aStatic[4];
  Fts5PoslistReader *aIter = aStatic;
  int i;
  int rc = SQLITE_OK;
  int iCol = -1;
  
  if( pColset && pColset->nCol==1 ){
    iCol = pColset->aiCol[0];
    pColset = 0;
  }

  fts5BufferZero(&pPhrase->poslist);

  /* If the aStatic[] array is not large enough, allocate a large array
  ** using sqlite3_malloc(). This approach could be improved upon. */
  if( pPhrase->nTerm>(sizeof(aStatic) / sizeof(aStatic[0])) ){
    int nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm;
    aIter = (Fts5PoslistReader*)sqlite3_malloc(nByte);
    if( !aIter ) return SQLITE_NOMEM;
  }
  memset(aIter, 0, sizeof(Fts5PoslistReader) * pPhrase->nTerm);

  /* Initialize a term iterator for each term in the phrase */
  for(i=0; i<pPhrase->nTerm; i++){
    Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
    i64 dummy;
    int n = 0;
    int bFlag = 0;
    const u8 *a = 0;
    if( pTerm->pSynonym ){
      rc = fts5ExprSynonymPoslist(pTerm, pNode->iRowid, &bFlag, (u8**)&a, &n);


    }else{
      rc = sqlite3Fts5IterPoslist(pTerm->pIter, &a, &n, &dummy);
    }
    if( rc!=SQLITE_OK ) goto ismatch_out;
    sqlite3Fts5PoslistReaderInit(iCol, a, n, &aIter[i]);
    aIter[i].bFlag = bFlag;
    if( aIter[i].bEof ) goto ismatch_out;
  }

  while( 1 ){
    int bMatch;
    i64 iPos = aIter[0].iPos;







<

<
<
<
<
<



















|
>
>

|


|







406
407
408
409
410
411
412

413





414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
  int *pbMatch                    /* OUT: Set to true if really a match */
){
  Fts5PoslistWriter writer = {0};
  Fts5PoslistReader aStatic[4];
  Fts5PoslistReader *aIter = aStatic;
  int i;
  int rc = SQLITE_OK;

  





  fts5BufferZero(&pPhrase->poslist);

  /* If the aStatic[] array is not large enough, allocate a large array
  ** using sqlite3_malloc(). This approach could be improved upon. */
  if( pPhrase->nTerm>(sizeof(aStatic) / sizeof(aStatic[0])) ){
    int nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm;
    aIter = (Fts5PoslistReader*)sqlite3_malloc(nByte);
    if( !aIter ) return SQLITE_NOMEM;
  }
  memset(aIter, 0, sizeof(Fts5PoslistReader) * pPhrase->nTerm);

  /* Initialize a term iterator for each term in the phrase */
  for(i=0; i<pPhrase->nTerm; i++){
    Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
    i64 dummy;
    int n = 0;
    int bFlag = 0;
    const u8 *a = 0;
    if( pTerm->pSynonym ){
      rc = fts5ExprSynonymPoslist(
          pTerm, pColset, pNode->iRowid, &bFlag, (u8**)&a, &n
      );
    }else{
      rc = sqlite3Fts5IterPoslist(pTerm->pIter, pColset, &a, &n, &dummy);
    }
    if( rc!=SQLITE_OK ) goto ismatch_out;
    sqlite3Fts5PoslistReaderInit(a, n, &aIter[i]);
    aIter[i].bFlag = bFlag;
    if( aIter[i].bEof ) goto ismatch_out;
  }

  while( 1 ){
    int bMatch;
    i64 iPos = aIter[0].iPos;
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
            if( sqlite3Fts5PoslistReaderNext(pPos) ) goto ismatch_out;
          }
          if( pPos->iPos>iAdj ) iPos = pPos->iPos-i;
        }
      }
    }while( bMatch==0 );

    if( pColset==0 || fts5ExprColsetTest(pColset, FTS5_POS2COLUMN(iPos)) ){
      /* Append position iPos to the output */
      rc = sqlite3Fts5PoslistWriterAppend(&pPhrase->poslist, &writer, iPos);
      if( rc!=SQLITE_OK ) goto ismatch_out;
    }

    for(i=0; i<pPhrase->nTerm; i++){
      if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) goto ismatch_out;
    }
  }

 ismatch_out:







<
|
|
|
<







456
457
458
459
460
461
462

463
464
465

466
467
468
469
470
471
472
            if( sqlite3Fts5PoslistReaderNext(pPos) ) goto ismatch_out;
          }
          if( pPos->iPos>iAdj ) iPos = pPos->iPos-i;
        }
      }
    }while( bMatch==0 );


    /* Append position iPos to the output */
    rc = sqlite3Fts5PoslistWriterAppend(&pPhrase->poslist, &writer, iPos);
    if( rc!=SQLITE_OK ) goto ismatch_out;


    for(i=0; i<pPhrase->nTerm; i++){
      if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) goto ismatch_out;
    }
  }

 ismatch_out:
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
    bEof = 1;
  }else{
    *piLast = fts5ExprSynonymRowid(pTerm, bDesc, &bEof);
  }
  return bEof;
}

/*
** IN/OUT parameter (*pa) points to a position list n bytes in size. If
** the position list contains entries for column iCol, then (*pa) is set
** to point to the sub-position-list for that column and the number of
** bytes in it returned. Or, if the argument position list does not
** contain any entries for column iCol, return 0.
*/
static int fts5ExprExtractCol(
  const u8 **pa,                  /* IN/OUT: Pointer to poslist */
  int n,                          /* IN: Size of poslist in bytes */
  int iCol                        /* Column to extract from poslist */
){
  int iCurrent = 0;
  const u8 *p = *pa;
  const u8 *pEnd = &p[n];         /* One byte past end of position list */
  u8 prev = 0;

  while( iCol!=iCurrent ){
    /* Advance pointer p until it points to pEnd or an 0x01 byte that is
    ** not part of a varint */
    while( (prev & 0x80) || *p!=0x01 ){
      prev = *p++;
      if( p==pEnd ) return 0;
    }
    *pa = p++;
    p += fts5GetVarint32(p, iCurrent);
  }

  /* Advance pointer p until it points to pEnd or an 0x01 byte that is
  ** not part of a varint */
  assert( (prev & 0x80)==0 );
  while( p<pEnd && ((prev & 0x80) || *p!=0x01) ){
    prev = *p++;
  }
  return p - (*pa);
}

static int fts5ExprExtractColset (
  Fts5Colset *pColset,            /* Colset to filter on */
  const u8 *pPos, int nPos,       /* Position list */
  Fts5Buffer *pBuf                /* Output buffer */
){
  int rc = SQLITE_OK;
  int i;

  fts5BufferZero(pBuf);
  for(i=0; i<pColset->nCol; i++){
    const u8 *pSub = pPos;
    int nSub = fts5ExprExtractCol(&pSub, nPos, pColset->aiCol[i]);
    if( nSub ){
      fts5BufferAppendBlob(&rc, pBuf, nSub, pSub);
    }
  }
  return rc;
}

static int fts5ExprNearTest(
  int *pRc,
  Fts5Expr *pExpr,                /* Expression that pNear is a part of */
  Fts5ExprNode *pNode             /* The "NEAR" node (FTS5_STRING) */
){
  Fts5ExprNearset *pNear = pNode->pNear;







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







753
754
755
756
757
758
759























































760
761
762
763
764
765
766
    bEof = 1;
  }else{
    *piLast = fts5ExprSynonymRowid(pTerm, bDesc, &bEof);
  }
  return bEof;
}

























































static int fts5ExprNearTest(
  int *pRc,
  Fts5Expr *pExpr,                /* Expression that pNear is a part of */
  Fts5ExprNode *pNode             /* The "NEAR" node (FTS5_STRING) */
){
  Fts5ExprNearset *pNear = pNode->pNear;
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
  ** fts5_index.c iterator object. This is much faster than synthesizing 
  ** a new poslist the way we have to for more complicated phrase or NEAR
  ** expressions.  */
  Fts5ExprNearset *pNear = pNode->pNear;
  Fts5ExprPhrase *pPhrase = pNear->apPhrase[0];
  Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter;
  Fts5Colset *pColset = pNear->pColset;
  const u8 *pPos;
  int nPos;
  int rc;

  assert( pNode->eType==FTS5_TERM );
  assert( pNear->nPhrase==1 && pPhrase->nTerm==1 );
  assert( pPhrase->aTerm[0].pSynonym==0 );

  rc = sqlite3Fts5IterPoslist(pIter, &pPos, &nPos, &pNode->iRowid);

  /* If the term may match any column, then this must be a match. 
  ** Return immediately in this case. Otherwise, try to find the
  ** part of the poslist that corresponds to the required column.
  ** If it can be found, return. If it cannot, the next iteration
  ** of the loop will test the next rowid in the database for this
  ** term.  */
  if( pColset==0 ){
    assert( pPhrase->poslist.nSpace==0 );
    pPhrase->poslist.p = (u8*)pPos;
    pPhrase->poslist.n = nPos;
  }else if( pColset->nCol==1 ){
    assert( pPhrase->poslist.nSpace==0 );
    pPhrase->poslist.n = fts5ExprExtractCol(&pPos, nPos, pColset->aiCol[0]);
    pPhrase->poslist.p = (u8*)pPos;
  }else if( rc==SQLITE_OK ){
    rc = fts5ExprExtractColset(pColset, pPos, nPos, &pPhrase->poslist);
  }

  pNode->bNomatch = (pPhrase->poslist.n==0);
  return rc;
}

/*
** All individual term iterators in pNear are guaranteed to be valid when
** this function is called. This function checks if all term iterators







<
<






|
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
|







800
801
802
803
804
805
806


807
808
809
810
811
812
813









814








815
816
817
818
819
820
821
822
  ** fts5_index.c iterator object. This is much faster than synthesizing 
  ** a new poslist the way we have to for more complicated phrase or NEAR
  ** expressions.  */
  Fts5ExprNearset *pNear = pNode->pNear;
  Fts5ExprPhrase *pPhrase = pNear->apPhrase[0];
  Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter;
  Fts5Colset *pColset = pNear->pColset;


  int rc;

  assert( pNode->eType==FTS5_TERM );
  assert( pNear->nPhrase==1 && pPhrase->nTerm==1 );
  assert( pPhrase->aTerm[0].pSynonym==0 );

  rc = sqlite3Fts5IterPoslist(pIter, pColset, 









      (const u8**)&pPhrase->poslist.p, &pPhrase->poslist.n, &pNode->iRowid








  );
  pNode->bNomatch = (pPhrase->poslist.n==0);
  return rc;
}

/*
** All individual term iterators in pNear are guaranteed to be valid when
** this function is called. This function checks if all term iterators
Changes to ext/fts5/fts5_index.c.
507
508
509
510
511
512
513
514
515

516
517
518
519
520
521
522
struct Fts5IndexIter {
  Fts5Index *pIndex;              /* Index that owns this iterator */
  Fts5Structure *pStruct;         /* Database structure for this iterator */
  Fts5Buffer poslist;             /* Buffer containing current poslist */

  int nSeg;                       /* Size of aSeg[] array */
  int bRev;                       /* True to iterate in reverse order */
  int bSkipEmpty;                 /* True to skip deleted entries */
  int bEof;                       /* True at EOF */


  i64 iSwitchRowid;               /* Firstest rowid of other than aFirst[1] */
  Fts5CResult *aFirst;            /* Current merge state (see above) */
  Fts5SegIter aSeg[1];            /* Array of segment iterators */
};









|
|
>







507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
struct Fts5IndexIter {
  Fts5Index *pIndex;              /* Index that owns this iterator */
  Fts5Structure *pStruct;         /* Database structure for this iterator */
  Fts5Buffer poslist;             /* Buffer containing current poslist */

  int nSeg;                       /* Size of aSeg[] array */
  int bRev;                       /* True to iterate in reverse order */
  u8 bSkipEmpty;                  /* True to skip deleted entries */
  u8 bEof;                        /* True at EOF */
  u8 bFiltered;                   /* True if column-filter already applied */

  i64 iSwitchRowid;               /* Firstest rowid of other than aFirst[1] */
  Fts5CResult *aFirst;            /* Current merge state (see above) */
  Fts5SegIter aSeg[1];            /* Array of segment iterators */
};


1453
1454
1455
1456
1457
1458
1459
1460

1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480

1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
** Argument p points to a buffer containing a varint to be interpreted as a
** position list size field. Read the varint and return the number of bytes
** read. Before returning, set *pnSz to the number of bytes in the position
** list, and *pbDel to true if the delete flag is set, or false otherwise.
*/
static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){
  int nSz;
  int n = fts5GetVarint32(p, nSz);

  assert_nc( nSz>=0 );
  *pnSz = nSz/2;
  *pbDel = nSz & 0x0001;
  return n;
}

/*
** Fts5SegIter.iLeafOffset currently points to the first byte of a
** position-list size field. Read the value of the field and store it
** in the following variables:
**
**   Fts5SegIter.nPos
**   Fts5SegIter.bDel
**
** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the 
** position list content (if any).
*/
static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){
  if( p->rc==SQLITE_OK ){
    int iOff = pIter->iLeafOffset;  /* Offset to read at */

    ASSERT_SZLEAF_OK(pIter->pLeaf);
    if( iOff>=pIter->pLeaf->szLeaf ){
      p->rc = FTS5_CORRUPT;
    }else{
      const u8 *a = &pIter->pLeaf->p[iOff];
      pIter->iLeafOffset += fts5GetPoslistSize(a, &pIter->nPos, &pIter->bDel);
    }
  }
}

static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
  u8 *a = pIter->pLeaf->p;        /* Buffer to read data from */
  int iOff = pIter->iLeafOffset;








|
>




















>

|
|
<
|
|
<







1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486

1487
1488

1489
1490
1491
1492
1493
1494
1495
** Argument p points to a buffer containing a varint to be interpreted as a
** position list size field. Read the varint and return the number of bytes
** read. Before returning, set *pnSz to the number of bytes in the position
** list, and *pbDel to true if the delete flag is set, or false otherwise.
*/
static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){
  int nSz;
  int n = 0;
  fts5FastGetVarint32(p, n, nSz);
  assert_nc( nSz>=0 );
  *pnSz = nSz/2;
  *pbDel = nSz & 0x0001;
  return n;
}

/*
** Fts5SegIter.iLeafOffset currently points to the first byte of a
** position-list size field. Read the value of the field and store it
** in the following variables:
**
**   Fts5SegIter.nPos
**   Fts5SegIter.bDel
**
** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the 
** position list content (if any).
*/
static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){
  if( p->rc==SQLITE_OK ){
    int iOff = pIter->iLeafOffset;  /* Offset to read at */
    int nSz;
    ASSERT_SZLEAF_OK(pIter->pLeaf);
    fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz);
    pIter->bDel = (nSz & 0x0001);

    pIter->nPos = nSz>>1;
    pIter->iLeafOffset = iOff;

  }
}

static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
  u8 *a = pIter->pLeaf->p;        /* Buffer to read data from */
  int iOff = pIter->iLeafOffset;

2721
2722
2723
2724
2725
2726
2727

2728
2729
2730
2731
2732
2733
2734
  Fts5IndexIter **ppOut           /* New object */
){
  Fts5IndexIter *pNew;
  pNew = fts5MultiIterAlloc(p, 2);
  if( pNew ){
    Fts5SegIter *pIter = &pNew->aSeg[1];


    pIter->flags = FTS5_SEGITER_ONETERM;
    if( pData->szLeaf>0 ){
      pIter->pLeaf = pData;
      pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid);
      pIter->iEndofDoclist = pData->nn;
      pNew->aFirst[1].iFirst = 1;
      if( bDesc ){







>







2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
  Fts5IndexIter **ppOut           /* New object */
){
  Fts5IndexIter *pNew;
  pNew = fts5MultiIterAlloc(p, 2);
  if( pNew ){
    Fts5SegIter *pIter = &pNew->aSeg[1];

    pNew->bFiltered = 1;
    pIter->flags = FTS5_SEGITER_ONETERM;
    if( pData->szLeaf>0 ){
      pIter->pLeaf = pData;
      pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid);
      pIter->iEndofDoclist = pData->nn;
      pNew->aFirst[1].iFirst = 1;
      if( bDesc ){
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
static void fts5PoslistCallback(
  Fts5Index *p, 
  void *pContext, 
  const u8 *pChunk, int nChunk
){
  assert_nc( nChunk>=0 );
  if( nChunk>0 ){
    fts5BufferAppendBlob(&p->rc, (Fts5Buffer*)pContext, nChunk, pChunk);
  }
}

typedef struct PoslistCallbackCtx PoslistCallbackCtx;
struct PoslistCallbackCtx {
  Fts5Buffer *pBuf;               /* Append to this buffer */
  Fts5Colset *pColset;            /* Restrict matches to this column */







|







3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
static void fts5PoslistCallback(
  Fts5Index *p, 
  void *pContext, 
  const u8 *pChunk, int nChunk
){
  assert_nc( nChunk>=0 );
  if( nChunk>0 ){
    fts5BufferSafeAppendBlob((Fts5Buffer*)pContext, pChunk, nChunk);
  }
}

typedef struct PoslistCallbackCtx PoslistCallbackCtx;
struct PoslistCallbackCtx {
  Fts5Buffer *pBuf;               /* Append to this buffer */
  Fts5Colset *pColset;            /* Restrict matches to this column */
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027

4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039

4040






































4041
4042


4043





4044
4045
4046
4047
4048
4049

4050
4051
4052
4053
4054




4055

4056
4057
4058


4059
4060
4061
4062
4063
4064
4065












4066
4067

4068
4069
4070
4071
4072

4073
4074
4075
4076
4077

4078
4079
4080
4081
4082
4083
4084
    int iStart = 0;

    if( pCtx->eState==2 ){
      int iCol;
      fts5FastGetVarint32(pChunk, i, iCol);
      if( fts5IndexColsetTest(pCtx->pColset, iCol) ){
        pCtx->eState = 1;
        fts5BufferAppendVarint(&p->rc, pCtx->pBuf, 1);
      }else{
        pCtx->eState = 0;
      }
    }

    do {
      while( i<nChunk && pChunk[i]!=0x01 ){
        while( pChunk[i] & 0x80 ) i++;
        i++;
      }
      if( pCtx->eState ){
        fts5BufferAppendBlob(&p->rc, pCtx->pBuf, i-iStart, &pChunk[iStart]);
      }
      if( i<nChunk ){
        int iCol;
        iStart = i;
        i++;
        if( i>=nChunk ){
          pCtx->eState = 2;
        }else{
          fts5FastGetVarint32(pChunk, i, iCol);
          pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol);
          if( pCtx->eState ){
            fts5BufferAppendBlob(&p->rc, pCtx->pBuf, i-iStart, &pChunk[iStart]);
            iStart = i;
          }
        }
      }
    }while( i<nChunk );
  }
}

/*
** Iterator pIter currently points to a valid entry (not EOF). This
** function appends the position list data for the current entry to
** buffer pBuf. It does not make a copy of the position-list size
** field.
*/
static void fts5SegiterPoslist(
  Fts5Index *p,
  Fts5SegIter *pSeg,
  Fts5Colset *pColset,
  Fts5Buffer *pBuf
){

  if( pColset==0 ){
    fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback);
  }else{
    PoslistCallbackCtx sCtx;
    sCtx.pBuf = pBuf;
    sCtx.pColset = pColset;
    sCtx.eState = pColset ? fts5IndexColsetTest(pColset, 0) : 1;
    assert( sCtx.eState==0 || sCtx.eState==1 );
    fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback);
  }
}


/*






































** Iterator pMulti currently points to a valid entry (not EOF). This
** function appends a copy of the position-list of the entry pMulti 


** currently points to to buffer pBuf.





**
** If an error occurs, an error code is left in p->rc. It is assumed
** no error has already occurred when this function is called.
*/
static int fts5MultiIterPoslist(
  Fts5Index *p,

  Fts5IndexIter *pMulti,
  Fts5Colset *pColset,
  Fts5Buffer *pBuf
){
  if( p->rc==SQLITE_OK ){




    int iSz;

    int iData;

    Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ];


    assert( fts5MultiIterEof(p, pMulti)==0 );

    /* WRITEPOSLISTSIZE */
    iSz = pBuf->n;
    fts5BufferSafeAppendVarint(pBuf, pSeg->nPos*2);
    iData = pBuf->n;













    fts5SegiterPoslist(p, pSeg, pColset, pBuf);


    if( pColset ){
      int nActual = pBuf->n - iData;
      if( nActual!=pSeg->nPos ){
        /* WRITEPOSLISTSIZE */
        if( nActual==0 ){

          return 1;
        }else{
          int nReq = sqlite3Fts5GetVarintLen((u32)(nActual*2));
          while( iSz<(iData-nReq) ){ pBuf->p[iSz++] = 0x80; }
          sqlite3Fts5PutVarint(&pBuf->p[iSz], nActual*2);

        }
      }
    }
  }

  return 0;
}







|











|











|




















>
|
|
|
|
|
|
|
|
|
|
|
|
>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

|
>
>
|
>
>
>
>
>

|
<

|

>





>
>
>
>
|
>
|

<
>
>
|

|
|
|
|

>
>
>
>
>
>
>
>
>
>
>
>
|
|
>
|
|
|
<
|
>
|
|
|
|
|
>







3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094

4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111

4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138

4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
    int iStart = 0;

    if( pCtx->eState==2 ){
      int iCol;
      fts5FastGetVarint32(pChunk, i, iCol);
      if( fts5IndexColsetTest(pCtx->pColset, iCol) ){
        pCtx->eState = 1;
        fts5BufferSafeAppendVarint(pCtx->pBuf, 1);
      }else{
        pCtx->eState = 0;
      }
    }

    do {
      while( i<nChunk && pChunk[i]!=0x01 ){
        while( pChunk[i] & 0x80 ) i++;
        i++;
      }
      if( pCtx->eState ){
        fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
      }
      if( i<nChunk ){
        int iCol;
        iStart = i;
        i++;
        if( i>=nChunk ){
          pCtx->eState = 2;
        }else{
          fts5FastGetVarint32(pChunk, i, iCol);
          pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol);
          if( pCtx->eState ){
            fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
            iStart = i;
          }
        }
      }
    }while( i<nChunk );
  }
}

/*
** Iterator pIter currently points to a valid entry (not EOF). This
** function appends the position list data for the current entry to
** buffer pBuf. It does not make a copy of the position-list size
** field.
*/
static void fts5SegiterPoslist(
  Fts5Index *p,
  Fts5SegIter *pSeg,
  Fts5Colset *pColset,
  Fts5Buffer *pBuf
){
  if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos) ){
    if( pColset==0 ){
      fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback);
    }else{
      PoslistCallbackCtx sCtx;
      sCtx.pBuf = pBuf;
      sCtx.pColset = pColset;
      sCtx.eState = pColset ? fts5IndexColsetTest(pColset, 0) : 1;
      assert( sCtx.eState==0 || sCtx.eState==1 );
      fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback);
    }
  }
}

/*
** IN/OUT parameter (*pa) points to a position list n bytes in size. If
** the position list contains entries for column iCol, then (*pa) is set
** to point to the sub-position-list for that column and the number of
** bytes in it returned. Or, if the argument position list does not
** contain any entries for column iCol, return 0.
*/
static int fts5IndexExtractCol(
  const u8 **pa,                  /* IN/OUT: Pointer to poslist */
  int n,                          /* IN: Size of poslist in bytes */
  int iCol                        /* Column to extract from poslist */
){
  int iCurrent = 0;               /* Anything before the first 0x01 is col 0 */
  const u8 *p = *pa;
  const u8 *pEnd = &p[n];         /* One byte past end of position list */
  u8 prev = 0;

  while( iCol!=iCurrent ){
    /* Advance pointer p until it points to pEnd or an 0x01 byte that is
    ** not part of a varint */
    while( (prev & 0x80) || *p!=0x01 ){
      prev = *p++;
      if( p==pEnd ) return 0;
    }
    *pa = p++;
    p += fts5GetVarint32(p, iCurrent);
  }

  /* Advance pointer p until it points to pEnd or an 0x01 byte that is
  ** not part of a varint */
  assert( (prev & 0x80)==0 );
  while( p<pEnd && ((prev & 0x80) || *p!=0x01) ){
    prev = *p++;
  }
  return p - (*pa);
}


/*
** Iterator pMulti currently points to a valid entry (not EOF). This
** function appends the following to buffer pBuf:
**
**   * The varint iDelta, and
**   * the position list that currently points to, including the size field.
**
** If argument pColset is NULL, then the position list is filtered according
** to pColset before being appended to the buffer. If this means there are
** no entries in the position list, nothing is appended to the buffer (not
** even iDelta).
**
** If an error occurs, an error code is left in p->rc. 

*/
static int fts5AppendPoslist(
  Fts5Index *p,
  i64 iDelta,
  Fts5IndexIter *pMulti,
  Fts5Colset *pColset,
  Fts5Buffer *pBuf
){
  if( p->rc==SQLITE_OK ){
    Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ];
    assert( fts5MultiIterEof(p, pMulti)==0 );
    assert( pSeg->nPos>0 );
    if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos+9+9) ){
      int iSv1;
      int iSv2;
      int iData;


      /* Append iDelta */
      iSv1 = pBuf->n;
      fts5BufferSafeAppendVarint(pBuf, iDelta);

      /* WRITEPOSLISTSIZE */
      iSv2 = pBuf->n;
      fts5BufferSafeAppendVarint(pBuf, pSeg->nPos*2);
      iData = pBuf->n;

      if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf 
       && (pColset==0 || pColset->nCol==1)
      ){
        const u8 *pPos = &pSeg->pLeaf->p[pSeg->iLeafOffset];
        int nPos;
        if( pColset ){
          nPos = fts5IndexExtractCol(&pPos, pSeg->nPos, pColset->aiCol[0]);
        }else{
          nPos = pSeg->nPos;
        }
        fts5BufferSafeAppendBlob(pBuf, pPos, nPos);
      }else{
        fts5SegiterPoslist(p, pSeg, pColset, pBuf);
      }

      if( pColset ){
        int nActual = pBuf->n - iData;
        if( nActual!=pSeg->nPos ){

          if( nActual==0 ){
            pBuf->n = iSv1;
            return 1;
          }else{
            int nReq = sqlite3Fts5GetVarintLen((u32)(nActual*2));
            while( iSv2<(iData-nReq) ){ pBuf->p[iSv2++] = 0x80; }
            sqlite3Fts5PutVarint(&pBuf->p[iSv2], nActual*2);
          }
        }
      }
    }
  }

  return 0;
}
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
            fts5MergePrefixLists(p, &doclist, &aBuf[i]);
            fts5BufferZero(&aBuf[i]);
          }
        }
        iLastRowid = 0;
      }

      if( 0==sqlite3Fts5BufferGrow(&p->rc, &doclist, 9) ){
        int iSave = doclist.n;
        assert( doclist.n!=0 || iLastRowid==0 );
        fts5BufferSafeAppendVarint(&doclist, iRowid - iLastRowid);
        if( fts5MultiIterPoslist(p, p1, pColset, &doclist) ){
          doclist.n = iSave;
        }else{
          iLastRowid = iRowid;
        }
      }
    }

    for(i=0; i<nBuf; i++){
      if( p->rc==SQLITE_OK ){
        fts5MergePrefixLists(p, &doclist, &aBuf[i]);
      }







<
<
<
<
|
<
<
|
<







4347
4348
4349
4350
4351
4352
4353




4354


4355

4356
4357
4358
4359
4360
4361
4362
            fts5MergePrefixLists(p, &doclist, &aBuf[i]);
            fts5BufferZero(&aBuf[i]);
          }
        }
        iLastRowid = 0;
      }





      if( !fts5AppendPoslist(p, iRowid-iLastRowid, p1, pColset, &doclist) ){


        iLastRowid = iRowid;

      }
    }

    for(i=0; i<nBuf; i++){
      if( p->rc==SQLITE_OK ){
        fts5MergePrefixLists(p, &doclist, &aBuf[i]);
      }
4643
4644
4645
4646
4647
4648
4649




















4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660

4661
4662
4663
4664
4665
4666
4667
4668
4669
4670






4671
4672






4673
4674

4675
4676
4677
4678
4679
4680
4681
const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIter, int *pn){
  int n;
  const char *z = (const char*)fts5MultiIterTerm(pIter, &n);
  *pn = n-1;
  return &z[1];
}






















/*
** Return a pointer to a buffer containing a copy of the position list for
** the current entry. Output variable *pn is set to the size of the buffer 
** in bytes before returning.
**
** The returned position list does not include the "number of bytes" varint
** field that starts the position list on disk.
*/
int sqlite3Fts5IterPoslist(
  Fts5IndexIter *pIter, 

  const u8 **pp,                  /* OUT: Pointer to position-list data */
  int *pn,                        /* OUT: Size of position-list in bytes */
  i64 *piRowid                    /* OUT: Current rowid */
){
  Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
  assert( pIter->pIndex->rc==SQLITE_OK );
  *piRowid = pSeg->iRowid;
  *pn = pSeg->nPos;
  if( pSeg->iLeafOffset+pSeg->nPos <= pSeg->pLeaf->szLeaf ){
    *pp = &pSeg->pLeaf->p[pSeg->iLeafOffset];






  }else{
    fts5BufferZero(&pIter->poslist);






    fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist);
    *pp = pIter->poslist.p;

  }
  return fts5IndexReturn(pIter->pIndex);
}

/*
** This function is similar to sqlite3Fts5IterPoslist(), except that it
** copies the position list into the buffer supplied as the second 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>











>







<
|
|
>
>
>
>
>
>
|
|
>
>
>
>
>
>
|

>







4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750

4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIter, int *pn){
  int n;
  const char *z = (const char*)fts5MultiIterTerm(pIter, &n);
  *pn = n-1;
  return &z[1];
}


static int fts5IndexExtractColset (
  Fts5Colset *pColset,            /* Colset to filter on */
  const u8 *pPos, int nPos,       /* Position list */
  Fts5Buffer *pBuf                /* Output buffer */
){
  int rc = SQLITE_OK;
  int i;

  fts5BufferZero(pBuf);
  for(i=0; i<pColset->nCol; i++){
    const u8 *pSub = pPos;
    int nSub = fts5IndexExtractCol(&pSub, nPos, pColset->aiCol[i]);
    if( nSub ){
      fts5BufferAppendBlob(&rc, pBuf, nSub, pSub);
    }
  }
  return rc;
}


/*
** Return a pointer to a buffer containing a copy of the position list for
** the current entry. Output variable *pn is set to the size of the buffer 
** in bytes before returning.
**
** The returned position list does not include the "number of bytes" varint
** field that starts the position list on disk.
*/
int sqlite3Fts5IterPoslist(
  Fts5IndexIter *pIter, 
  Fts5Colset *pColset,            /* Column filter (or NULL) */
  const u8 **pp,                  /* OUT: Pointer to position-list data */
  int *pn,                        /* OUT: Size of position-list in bytes */
  i64 *piRowid                    /* OUT: Current rowid */
){
  Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
  assert( pIter->pIndex->rc==SQLITE_OK );
  *piRowid = pSeg->iRowid;

  if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
    u8 *pPos = &pSeg->pLeaf->p[pSeg->iLeafOffset];
    if( pColset==0 || pIter->bFiltered ){
      *pn = pSeg->nPos;
      *pp = pPos;
    }else if( pColset->nCol==1 ){
      *pp = pPos;
      *pn = fts5IndexExtractCol(pp, pSeg->nPos, pColset->aiCol[0]);
    }else{
      fts5BufferZero(&pIter->poslist);
      fts5IndexExtractColset(pColset, pPos, pSeg->nPos, &pIter->poslist);
      *pp = pIter->poslist.p;
      *pn = pIter->poslist.n;
    }
  }else{
    fts5BufferZero(&pIter->poslist);
    fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist);
    *pp = pIter->poslist.p;
    *pn = pIter->poslist.n;
  }
  return fts5IndexReturn(pIter->pIndex);
}

/*
** This function is similar to sqlite3Fts5IterPoslist(), except that it
** copies the position list into the buffer supplied as the second 
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
  int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIdxIter);

  while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){
    i64 dummy;
    const u8 *pPos;
    int nPos;
    i64 rowid = sqlite3Fts5IterRowid(pIdxIter);
    rc = sqlite3Fts5IterPoslist(pIdxIter, &pPos, &nPos, &dummy);
    if( rc==SQLITE_OK ){
      Fts5PoslistReader sReader;
      for(sqlite3Fts5PoslistReaderInit(-1, pPos, nPos, &sReader);
          sReader.bEof==0;
          sqlite3Fts5PoslistReaderNext(&sReader)
      ){
        int iCol = FTS5_POS2COLUMN(sReader.iPos);
        int iOff = FTS5_POS2OFFSET(sReader.iPos);
        cksum ^= fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n);
      }







|


|







4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
  int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIdxIter);

  while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){
    i64 dummy;
    const u8 *pPos;
    int nPos;
    i64 rowid = sqlite3Fts5IterRowid(pIdxIter);
    rc = sqlite3Fts5IterPoslist(pIdxIter, 0, &pPos, &nPos, &dummy);
    if( rc==SQLITE_OK ){
      Fts5PoslistReader sReader;
      for(sqlite3Fts5PoslistReaderInit(pPos, nPos, &sReader);
          sReader.bEof==0;
          sqlite3Fts5PoslistReaderNext(&sReader)
      ){
        int iCol = FTS5_POS2COLUMN(sReader.iPos);
        int iOff = FTS5_POS2OFFSET(sReader.iPos);
        cksum ^= fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n);
      }
Changes to ext/fts5/fts5_main.c.
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
    int nInst = 0;                /* Number instances seen so far */
    int i;

    /* Initialize all iterators */
    for(i=0; i<nIter; i++){
      const u8 *a;
      int n = fts5CsrPoslist(pCsr, i, &a);
      sqlite3Fts5PoslistReaderInit(-1, a, n, &aIter[i]);
    }

    while( 1 ){
      int *aInst;
      int iBest = -1;
      for(i=0; i<nIter; i++){
        if( (aIter[i].bEof==0) 







|







1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
    int nInst = 0;                /* Number instances seen so far */
    int i;

    /* Initialize all iterators */
    for(i=0; i<nIter; i++){
      const u8 *a;
      int n = fts5CsrPoslist(pCsr, i, &a);
      sqlite3Fts5PoslistReaderInit(a, n, &aIter[i]);
    }

    while( 1 ){
      int *aInst;
      int iBest = -1;
      for(i=0; i<nIter; i++){
        if( (aIter[i].bEof==0) 
Changes to ext/fts5/fts5_vocab.c.
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
      assert( pTab->eType==FTS5_VOCAB_COL || pTab->eType==FTS5_VOCAB_ROW );
      while( rc==SQLITE_OK ){
        i64 dummy;
        const u8 *pPos; int nPos;   /* Position list */
        i64 iPos = 0;               /* 64-bit position read from poslist */
        int iOff = 0;               /* Current offset within position list */

        rc = sqlite3Fts5IterPoslist(pCsr->pIter, &pPos, &nPos, &dummy);
        if( rc==SQLITE_OK ){
          if( pTab->eType==FTS5_VOCAB_ROW ){
            while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){
              pCsr->aVal[1]++;
            }
            pCsr->aVal[0]++;
          }else{







|







345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
      assert( pTab->eType==FTS5_VOCAB_COL || pTab->eType==FTS5_VOCAB_ROW );
      while( rc==SQLITE_OK ){
        i64 dummy;
        const u8 *pPos; int nPos;   /* Position list */
        i64 iPos = 0;               /* 64-bit position read from poslist */
        int iOff = 0;               /* Current offset within position list */

        rc = sqlite3Fts5IterPoslist(pCsr->pIter, 0, &pPos, &nPos, &dummy);
        if( rc==SQLITE_OK ){
          if( pTab->eType==FTS5_VOCAB_ROW ){
            while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){
              pCsr->aVal[1]++;
            }
            pCsr->aVal[0]++;
          }else{
Changes to ext/fts5/test/fts5simple.test.
246
247
248
249
250
251
252




253
254
255
256
257
258
259
  INSERT INTO t3 VALUES('bac aab bab', 'c bac c', 'acb aba abb'); -- 1
  INSERT INTO t3 VALUES('bab abc c', 'acb c abb', 'c aaa c');     -- 2
}

do_execsql_test 10.1 {
  SELECT rowid FROM t3('c: c*');
} {2}





#-------------------------------------------------------------------------
# Test that character 0x1A is allowed in fts5 barewords.
#
do_test 11.0 {
  execsql "CREATE VIRTUAL TABLE t4 USING fts5(x, tokenize=\"ascii tokenchars '\x1A'\")"
  execsql "







>
>
>
>







246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
  INSERT INTO t3 VALUES('bac aab bab', 'c bac c', 'acb aba abb'); -- 1
  INSERT INTO t3 VALUES('bab abc c', 'acb c abb', 'c aaa c');     -- 2
}

do_execsql_test 10.1 {
  SELECT rowid FROM t3('c: c*');
} {2}

do_execsql_test 10.2 {
  SELECT rowid FROM t3('b: acb');
} {2}

#-------------------------------------------------------------------------
# Test that character 0x1A is allowed in fts5 barewords.
#
do_test 11.0 {
  execsql "CREATE VIRTUAL TABLE t4 USING fts5(x, tokenize=\"ascii tokenchars '\x1A'\")"
  execsql "
277
278
279
280
281
282
283














284
285
286
do_test 11.4 {
  catchsql "SELECT rowid FROM t4('d\x1B')"
} {/fts5: syntax error/}
do_test 11.5 {
  catchsql "SELECT rowid FROM t4('d\x19')"
} {/fts5: syntax error/}
















finish_test








>
>
>
>
>
>
>
>
>
>
>
>
>
>



281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
do_test 11.4 {
  catchsql "SELECT rowid FROM t4('d\x1B')"
} {/fts5: syntax error/}
do_test 11.5 {
  catchsql "SELECT rowid FROM t4('d\x19')"
} {/fts5: syntax error/}

#-------------------------------------------------------------------------
#
do_test 12.1 {
  execsql {
    CREATE VIRTUAL TABLE xx USING fts5(x,y);
    BEGIN;
      INSERT INTO xx VALUES('1 2 3', 'a b c');
  }
} {}

do_execsql_test 12.2 {
  SELECT rowid FROM xx('x:a');
  COMMIT;
} {}

finish_test