/ Check-in [bf1607ac]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Optimizations for fts5 expressions that filter on column. More still to come.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:bf1607ac155018573ca40fb58aca62c5fea7e60b
User & Date: dan 2015-10-06 20:53:26
Context
2015-10-06
21:07
Simplifications to the VDBE bytecode that handles LIMIT and OFFSET. check-in: 041df7c2 user: drh tags: trunk
20:53
Optimizations for fts5 expressions that filter on column. More still to come. check-in: bf1607ac user: dan tags: trunk
17:27
Fix the LIMIT and OFFSET handling for UNION ALL queries that contain a subquery with ORDER BY on the right-hand side. Fix for ticket [b65cb2c8d91f668584]. check-in: 4b631364 user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5Int.h.

77
78
79
80
81
82
83














84
85
86
87
88
89
90
...
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
...
317
318
319
320
321
322
323

324
325
326
327
328
329
330
331
...
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
extern int sqlite3_fts5_may_be_corrupt;
# define assert_nc(x) assert(sqlite3_fts5_may_be_corrupt || (x))
#else
# define assert_nc(x) assert(x)
#endif

typedef struct Fts5Global Fts5Global;















/**************************************************************************
** Interface to code in fts5_config.c. fts5_config.c contains contains code
** to parse the arguments passed to the CREATE VIRTUAL TABLE statement.
*/

typedef struct Fts5Config Fts5Config;
................................................................................
** Create/destroy an Fts5Index object.
*/
int sqlite3Fts5IndexOpen(Fts5Config *pConfig, int bCreate, Fts5Index**, char**);
int sqlite3Fts5IndexClose(Fts5Index *p);

/*
** for(
**   pIter = sqlite3Fts5IndexQuery(p, "token", 5, 0);
**   0==sqlite3Fts5IterEof(pIter);
**   sqlite3Fts5IterNext(pIter)
** ){
**   i64 iRowid = sqlite3Fts5IterRowid(pIter);
** }
*/

................................................................................
** Open a new iterator to iterate though all rowids that match the 
** specified token or token prefix.
*/
int sqlite3Fts5IndexQuery(
  Fts5Index *p,                   /* FTS index to query */
  const char *pToken, int nToken, /* Token (or prefix) to query for */
  int flags,                      /* Mask of FTS5INDEX_QUERY_X flags */

  Fts5IndexIter **ppIter
);

/*
** The various operations on open token or token prefix iterators opened
** using sqlite3Fts5IndexQuery().
*/
int sqlite3Fts5IterEof(Fts5IndexIter*);
................................................................................
*/
typedef struct Fts5Expr Fts5Expr;
typedef struct Fts5ExprNode Fts5ExprNode;
typedef struct Fts5Parse Fts5Parse;
typedef struct Fts5Token Fts5Token;
typedef struct Fts5ExprPhrase Fts5ExprPhrase;
typedef struct Fts5ExprNearset Fts5ExprNearset;
typedef struct Fts5ExprColset Fts5ExprColset;

struct Fts5Token {
  const char *p;                  /* Token text (not NULL terminated) */
  int n;                          /* Size of buffer p in bytes */
};

/* Parse a MATCH expression. */







>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







|







 







>
|







 







<







77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
...
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
...
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
...
578
579
580
581
582
583
584

585
586
587
588
589
590
591
extern int sqlite3_fts5_may_be_corrupt;
# define assert_nc(x) assert(sqlite3_fts5_may_be_corrupt || (x))
#else
# define assert_nc(x) assert(x)
#endif

typedef struct Fts5Global Fts5Global;
typedef struct Fts5ExprColset Fts5ExprColset;

/* If a NEAR() clump or phrase may only match a specific set of columns, 
** then an object of the following type is used to record the set of columns.
** Each entry in the aiCol[] array is a column that may be matched.
**
** This object is used by fts5_expr.c and fts5_index.c.
*/
struct Fts5ExprColset {
  int nCol;
  int aiCol[1];
};



/**************************************************************************
** Interface to code in fts5_config.c. fts5_config.c contains contains code
** to parse the arguments passed to the CREATE VIRTUAL TABLE statement.
*/

typedef struct Fts5Config Fts5Config;
................................................................................
** Create/destroy an Fts5Index object.
*/
int sqlite3Fts5IndexOpen(Fts5Config *pConfig, int bCreate, Fts5Index**, char**);
int sqlite3Fts5IndexClose(Fts5Index *p);

/*
** for(
**   sqlite3Fts5IndexQuery(p, "token", 5, 0, 0, &pIter);
**   0==sqlite3Fts5IterEof(pIter);
**   sqlite3Fts5IterNext(pIter)
** ){
**   i64 iRowid = sqlite3Fts5IterRowid(pIter);
** }
*/

................................................................................
** Open a new iterator to iterate though all rowids that match the 
** specified token or token prefix.
*/
int sqlite3Fts5IndexQuery(
  Fts5Index *p,                   /* FTS index to query */
  const char *pToken, int nToken, /* Token (or prefix) to query for */
  int flags,                      /* Mask of FTS5INDEX_QUERY_X flags */
  Fts5ExprColset *pColset,        /* Match these columns only */
  Fts5IndexIter **ppIter          /* OUT: New iterator object */
);

/*
** The various operations on open token or token prefix iterators opened
** using sqlite3Fts5IndexQuery().
*/
int sqlite3Fts5IterEof(Fts5IndexIter*);
................................................................................
*/
typedef struct Fts5Expr Fts5Expr;
typedef struct Fts5ExprNode Fts5ExprNode;
typedef struct Fts5Parse Fts5Parse;
typedef struct Fts5Token Fts5Token;
typedef struct Fts5ExprPhrase Fts5ExprPhrase;
typedef struct Fts5ExprNearset Fts5ExprNearset;


struct Fts5Token {
  const char *p;                  /* Token text (not NULL terminated) */
  int n;                          /* Size of buffer p in bytes */
};

/* Parse a MATCH expression. */

Changes to ext/fts5/fts5_expr.c.

85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
...
998
999
1000
1001
1002
1003
1004

1005
1006
1007
1008
1009
1010
1011
struct Fts5ExprPhrase {
  Fts5ExprNode *pNode;            /* FTS5_STRING node this phrase is part of */
  Fts5Buffer poslist;             /* Current position list */
  int nTerm;                      /* Number of entries in aTerm[] */
  Fts5ExprTerm aTerm[1];          /* Terms that make up this phrase */
};

/*
** If a NEAR() clump may only match a specific set of columns, then
** Fts5ExprNearset.pColset points to an object of the following type.
** Each entry in the aiCol[] array
*/
struct Fts5ExprColset {
  int nCol;
  int aiCol[1];
};

/*
** One or more phrases that must appear within a certain token distance of
** each other within each matching document.
*/
struct Fts5ExprNearset {
  int nNear;                      /* NEAR parameter */
  Fts5ExprColset *pColset;        /* Columns to search (NULL -> all columns) */
................................................................................
          sqlite3Fts5IterClose(p->pIter);
          p->pIter = 0;
        }
        rc = sqlite3Fts5IndexQuery(
            pExpr->pIndex, p->zTerm, strlen(p->zTerm),
            (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) |
            (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0),

            &p->pIter
        );
        assert( rc==SQLITE_OK || p->pIter==0 );
        if( p->pIter && 0==sqlite3Fts5IterEof(p->pIter) ){
          bEof = 0;
        }
      }







<
<
<
<
<
<
<
<
<
<







 







>







85
86
87
88
89
90
91










92
93
94
95
96
97
98
...
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
struct Fts5ExprPhrase {
  Fts5ExprNode *pNode;            /* FTS5_STRING node this phrase is part of */
  Fts5Buffer poslist;             /* Current position list */
  int nTerm;                      /* Number of entries in aTerm[] */
  Fts5ExprTerm aTerm[1];          /* Terms that make up this phrase */
};











/*
** One or more phrases that must appear within a certain token distance of
** each other within each matching document.
*/
struct Fts5ExprNearset {
  int nNear;                      /* NEAR parameter */
  Fts5ExprColset *pColset;        /* Columns to search (NULL -> all columns) */
................................................................................
          sqlite3Fts5IterClose(p->pIter);
          p->pIter = 0;
        }
        rc = sqlite3Fts5IndexQuery(
            pExpr->pIndex, p->zTerm, strlen(p->zTerm),
            (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) |
            (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0),
            pNear->pColset,
            &p->pIter
        );
        assert( rc==SQLITE_OK || p->pIter==0 );
        if( p->pIter && 0==sqlite3Fts5IterEof(p->pIter) ){
          bEof = 0;
        }
      }

Changes to ext/fts5/fts5_index.c.

3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950





































































3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962

3963
3964

3965








3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978

3979
3980
3981
3982



3983
3984
3985
3986
3987

3988

3989

3990
3991
















3992
3993
3994
3995
3996
3997
3998
....
4145
4146
4147
4148
4149
4150
4151

4152
4153
4154
4155
4156
4157
4158
4159
....
4188
4189
4190
4191
4192
4193
4194
4195


4196




4197
4198
4199
4200
4201
4202
4203
....
4423
4424
4425
4426
4427
4428
4429

4430
4431
4432
4433
4434
4435
4436
....
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
....
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
....
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
....
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
....
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
  fts5StructureRelease(pStruct);

  return fts5IndexReturn(p);
}

static void fts5PoslistCallback(
  Fts5Index *p, 
  void *pCtx, 
  const u8 *pChunk, int nChunk
){
  assert_nc( nChunk>=0 );
  if( nChunk>0 ){
    fts5BufferAppendBlob(&p->rc, (Fts5Buffer*)pCtx, nChunk, pChunk);





































































  }
}

/*
** Iterator pIter currently points to a valid entry (not EOF). This
** function appends the position list data for the current entry to
** buffer pBuf. It does not make a copy of the position-list size
** field.
*/
static void fts5SegiterPoslist(
  Fts5Index *p,
  Fts5SegIter *pSeg,

  Fts5Buffer *pBuf
){

  fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback);








}

/*
** Iterator pMulti currently points to a valid entry (not EOF). This
** function appends a copy of the position-list of the entry pMulti 
** currently points to to buffer pBuf.
**
** If an error occurs, an error code is left in p->rc. It is assumed
** no error has already occurred when this function is called.
*/
static void fts5MultiIterPoslist(
  Fts5Index *p,
  Fts5IndexIter *pMulti,

  int bSz,                        /* Append a size field before the data */
  Fts5Buffer *pBuf
){
  if( p->rc==SQLITE_OK ){



    Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ];
    assert( fts5MultiIterEof(p, pMulti)==0 );

    if( bSz ){
      /* WRITEPOSLISTSIZE */

      fts5BufferAppendVarint(&p->rc, pBuf, pSeg->nPos*2);

    }

    fts5SegiterPoslist(p, pSeg, pBuf);
  }
















}

static void fts5DoclistIterNext(Fts5DoclistIter *pIter){
  u8 *p = pIter->aPoslist + pIter->nPoslist;

  assert( pIter->aPoslist );
  if( p>=pIter->aEof ){
................................................................................
}

static void fts5SetupPrefixIter(
  Fts5Index *p,                   /* Index to read from */
  int bDesc,                      /* True for "ORDER BY rowid DESC" */
  const u8 *pToken,               /* Buffer containing prefix to match */
  int nToken,                     /* Size of buffer pToken in bytes */

  Fts5IndexIter **ppIter       /* OUT: New iterator */
){
  Fts5Structure *pStruct;
  Fts5Buffer *aBuf;
  const int nBuf = 32;

  aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf);
  pStruct = fts5StructureRead(p);
................................................................................
            fts5BufferZero(&aBuf[i]);
          }
        }
        iLastRowid = 0;
      }

      if( 0==sqlite3Fts5BufferGrow(&p->rc, &doclist, 9) ){
        fts5MergeAppendDocid(&doclist, iLastRowid, iRowid);


        fts5MultiIterPoslist(p, p1, 1, &doclist);




      }
    }

    for(i=0; i<nBuf; i++){
      if( p->rc==SQLITE_OK ){
        fts5MergePrefixLists(p, &doclist, &aBuf[i]);
      }
................................................................................
** Open a new iterator to iterate though all rowid that match the 
** specified token or token prefix.
*/
int sqlite3Fts5IndexQuery(
  Fts5Index *p,                   /* FTS index to query */
  const char *pToken, int nToken, /* Token (or prefix) to query for */
  int flags,                      /* Mask of FTS5INDEX_QUERY_X flags */

  Fts5IndexIter **ppIter          /* OUT: New iterator object */
){
  Fts5Config *pConfig = p->pConfig;
  Fts5IndexIter *pRet = 0;
  int iIdx = 0;
  Fts5Buffer buf = {0, 0, 0};

................................................................................
      if( pStruct ){
        fts5MultiIterNew(p, pStruct, 1, flags, buf.p, nToken+1, -1, 0, &pRet);
        fts5StructureRelease(pStruct);
      }
    }else{
      int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0;
      buf.p[0] = FTS5_MAIN_PREFIX;
      fts5SetupPrefixIter(p, bDesc, buf.p, nToken+1, &pRet);
    }

    if( p->rc ){
      sqlite3Fts5IterClose(pRet);
      pRet = 0;
      fts5CloseReader(p);
    }
................................................................................
  assert( pIter->pIndex->rc==SQLITE_OK );
  *piRowid = pSeg->iRowid;
  *pn = pSeg->nPos;
  if( pSeg->iLeafOffset+pSeg->nPos <= pSeg->pLeaf->szLeaf ){
    *pp = &pSeg->pLeaf->p[pSeg->iLeafOffset];
  }else{
    fts5BufferZero(&pIter->poslist);
    fts5SegiterPoslist(pIter->pIndex, pSeg, &pIter->poslist);
    *pp = pIter->poslist.p;
  }
  return fts5IndexReturn(pIter->pIndex);
}

/*
** This function is similar to sqlite3Fts5IterPoslist(), except that it
................................................................................
** argument.
*/
int sqlite3Fts5IterPoslistBuffer(Fts5IndexIter *pIter, Fts5Buffer *pBuf){
  Fts5Index *p = pIter->pIndex;

  assert( p->rc==SQLITE_OK );
  fts5BufferZero(pBuf);
  fts5MultiIterPoslist(p, pIter, 0, pBuf);
  return fts5IndexReturn(p);
}

/*
** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery().
*/
void sqlite3Fts5IterClose(Fts5IndexIter *pIter){
................................................................................
  const char *z,                  /* Index key to query for */
  int n,                          /* Size of index key in bytes */
  int flags,                      /* Flags for Fts5IndexQuery */
  u64 *pCksum                     /* IN/OUT: Checksum value */
){
  u64 cksum = *pCksum;
  Fts5IndexIter *pIdxIter = 0;
  int rc = sqlite3Fts5IndexQuery(p, z, n, flags, &pIdxIter);

  while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){
    i64 dummy;
    const u8 *pPos;
    int nPos;
    i64 rowid = sqlite3Fts5IterRowid(pIdxIter);
    rc = sqlite3Fts5IterPoslist(pIdxIter, &pPos, &nPos, &dummy);
................................................................................
    i64 iRowid = fts5MultiIterRowid(pIter);
    char *z = (char*)fts5MultiIterTerm(pIter, &n);

    /* If this is a new term, query for it. Update cksum3 with the results. */
    fts5TestTerm(p, &term, z, n, cksum2, &cksum3);

    poslist.n = 0;
    fts5MultiIterPoslist(p, pIter, 0, &poslist);
    while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){
      int iCol = FTS5_POS2COLUMN(iPos);
      int iTokOff = FTS5_POS2OFFSET(iPos);
      cksum2 ^= fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n);
    }
  }
  fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3);







|




|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>












>


>
|
>
>
>
>
>
>
>
>










|


>




>
>
>





>

>

>
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







>
|







 







|
>
>
|
>
>
>
>







 







>







 







|







 







|







 







|







 







|







 







|







3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
....
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
....
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
....
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
....
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
....
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
....
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
....
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
....
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
  fts5StructureRelease(pStruct);

  return fts5IndexReturn(p);
}

static void fts5PoslistCallback(
  Fts5Index *p, 
  void *pContext, 
  const u8 *pChunk, int nChunk
){
  assert_nc( nChunk>=0 );
  if( nChunk>0 ){
    fts5BufferAppendBlob(&p->rc, (Fts5Buffer*)pContext, nChunk, pChunk);
  }
}

typedef struct PoslistCallbackCtx PoslistCallbackCtx;
struct PoslistCallbackCtx {
  Fts5Buffer *pBuf;               /* Append to this buffer */
  Fts5ExprColset *pColset;        /* Restrict matches to this column */
  int eState;                     /* See above */
};

/*
** TODO: Make this more efficient!
*/
static int fts5IndexColsetTest(Fts5ExprColset *pColset, int iCol){
  int i;
  for(i=0; i<pColset->nCol; i++){
    if( pColset->aiCol[i]==iCol ) return 1;
  }
  return 0;
}

static void fts5PoslistFilterCallback(
  Fts5Index *p, 
  void *pContext, 
  const u8 *pChunk, int nChunk
){
  PoslistCallbackCtx *pCtx = (PoslistCallbackCtx*)pContext;
  assert_nc( nChunk>=0 );
  if( nChunk>0 ){
    /* Search through to find the first varint with value 1. This is the
    ** start of the next columns hits. */
    int i = 0;
    int iStart = 0;

    if( pCtx->eState==2 ){
      int iCol;
      fts5IndexGetVarint32(pChunk, i, iCol);
      if( fts5IndexColsetTest(pCtx->pColset, iCol) ){
        pCtx->eState = 1;
        fts5BufferAppendVarint(&p->rc, pCtx->pBuf, 1);
      }else{
        pCtx->eState = 0;
      }
    }

    do {
      while( i<nChunk && pChunk[i]!=0x01 ){
        while( pChunk[i] & 0x80 ) i++;
        i++;
      }
      if( pCtx->eState ){
        fts5BufferAppendBlob(&p->rc, pCtx->pBuf, i-iStart, &pChunk[iStart]);
      }
      if( i<nChunk ){
        int iCol;
        iStart = i;
        i++;
        if( i>=nChunk ){
          pCtx->eState = 2;
        }else{
          fts5IndexGetVarint32(pChunk, i, iCol);
          pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol);
          if( pCtx->eState ){
            fts5BufferAppendBlob(&p->rc, pCtx->pBuf, i-iStart, &pChunk[iStart]);
            iStart = i;
          }
        }
      }
    }while( i<nChunk );
  }
}

/*
** Iterator pIter currently points to a valid entry (not EOF). This
** function appends the position list data for the current entry to
** buffer pBuf. It does not make a copy of the position-list size
** field.
*/
static void fts5SegiterPoslist(
  Fts5Index *p,
  Fts5SegIter *pSeg,
  Fts5ExprColset *pColset,
  Fts5Buffer *pBuf
){
  if( pColset==0 ){
    fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback);
  }else{
    PoslistCallbackCtx sCtx;
    sCtx.pBuf = pBuf;
    sCtx.pColset = pColset;
    sCtx.eState = pColset ? fts5IndexColsetTest(pColset, 0) : 1;
    assert( sCtx.eState==0 || sCtx.eState==1 );
    fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback);
  }
}

/*
** Iterator pMulti currently points to a valid entry (not EOF). This
** function appends a copy of the position-list of the entry pMulti 
** currently points to to buffer pBuf.
**
** If an error occurs, an error code is left in p->rc. It is assumed
** no error has already occurred when this function is called.
*/
static int fts5MultiIterPoslist(
  Fts5Index *p,
  Fts5IndexIter *pMulti,
  Fts5ExprColset *pColset,
  int bSz,                        /* Append a size field before the data */
  Fts5Buffer *pBuf
){
  if( p->rc==SQLITE_OK ){
    int iSz;
    int iData;

    Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ];
    assert( fts5MultiIterEof(p, pMulti)==0 );

    if( bSz ){
      /* WRITEPOSLISTSIZE */
      iSz = pBuf->n;
      fts5BufferAppendVarint(&p->rc, pBuf, pSeg->nPos*2);
      iData = pBuf->n;
    }

    fts5SegiterPoslist(p, pSeg, pColset, pBuf);

    if( bSz && pColset ){
      int nActual = pBuf->n - iData;
      if( nActual!=pSeg->nPos ){
        /* WRITEPOSLISTSIZE */
        if( nActual==0 ){
          return 1;
        }else{
          int nReq = sqlite3Fts5GetVarintLen((u32)(nActual*2));
          while( iSz<(iData-nReq) ){ pBuf->p[iSz++] = 0x80; }
          sqlite3Fts5PutVarint(&pBuf->p[iSz], nActual*2);
        }
      }
    }
  }

  return 0;
}

static void fts5DoclistIterNext(Fts5DoclistIter *pIter){
  u8 *p = pIter->aPoslist + pIter->nPoslist;

  assert( pIter->aPoslist );
  if( p>=pIter->aEof ){
................................................................................
}

static void fts5SetupPrefixIter(
  Fts5Index *p,                   /* Index to read from */
  int bDesc,                      /* True for "ORDER BY rowid DESC" */
  const u8 *pToken,               /* Buffer containing prefix to match */
  int nToken,                     /* Size of buffer pToken in bytes */
  Fts5ExprColset *pColset,        /* Restrict matches to these columns */
  Fts5IndexIter **ppIter          /* OUT: New iterator */
){
  Fts5Structure *pStruct;
  Fts5Buffer *aBuf;
  const int nBuf = 32;

  aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf);
  pStruct = fts5StructureRead(p);
................................................................................
            fts5BufferZero(&aBuf[i]);
          }
        }
        iLastRowid = 0;
      }

      if( 0==sqlite3Fts5BufferGrow(&p->rc, &doclist, 9) ){
        int iSave = doclist.n;
        assert( doclist.n!=0 || iLastRowid==0 );
        fts5BufferSafeAppendVarint(&doclist, iRowid - iLastRowid);
        if( fts5MultiIterPoslist(p, p1, pColset, 1, &doclist) ){
          doclist.n = iSave;
        }else{
          iLastRowid = iRowid;
        }
      }
    }

    for(i=0; i<nBuf; i++){
      if( p->rc==SQLITE_OK ){
        fts5MergePrefixLists(p, &doclist, &aBuf[i]);
      }
................................................................................
** Open a new iterator to iterate though all rowid that match the 
** specified token or token prefix.
*/
int sqlite3Fts5IndexQuery(
  Fts5Index *p,                   /* FTS index to query */
  const char *pToken, int nToken, /* Token (or prefix) to query for */
  int flags,                      /* Mask of FTS5INDEX_QUERY_X flags */
  Fts5ExprColset *pColset,        /* Match these columns only */
  Fts5IndexIter **ppIter          /* OUT: New iterator object */
){
  Fts5Config *pConfig = p->pConfig;
  Fts5IndexIter *pRet = 0;
  int iIdx = 0;
  Fts5Buffer buf = {0, 0, 0};

................................................................................
      if( pStruct ){
        fts5MultiIterNew(p, pStruct, 1, flags, buf.p, nToken+1, -1, 0, &pRet);
        fts5StructureRelease(pStruct);
      }
    }else{
      int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0;
      buf.p[0] = FTS5_MAIN_PREFIX;
      fts5SetupPrefixIter(p, bDesc, buf.p, nToken+1, pColset, &pRet);
    }

    if( p->rc ){
      sqlite3Fts5IterClose(pRet);
      pRet = 0;
      fts5CloseReader(p);
    }
................................................................................
  assert( pIter->pIndex->rc==SQLITE_OK );
  *piRowid = pSeg->iRowid;
  *pn = pSeg->nPos;
  if( pSeg->iLeafOffset+pSeg->nPos <= pSeg->pLeaf->szLeaf ){
    *pp = &pSeg->pLeaf->p[pSeg->iLeafOffset];
  }else{
    fts5BufferZero(&pIter->poslist);
    fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist);
    *pp = pIter->poslist.p;
  }
  return fts5IndexReturn(pIter->pIndex);
}

/*
** This function is similar to sqlite3Fts5IterPoslist(), except that it
................................................................................
** argument.
*/
int sqlite3Fts5IterPoslistBuffer(Fts5IndexIter *pIter, Fts5Buffer *pBuf){
  Fts5Index *p = pIter->pIndex;

  assert( p->rc==SQLITE_OK );
  fts5BufferZero(pBuf);
  fts5MultiIterPoslist(p, pIter, 0, 0, pBuf);
  return fts5IndexReturn(p);
}

/*
** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery().
*/
void sqlite3Fts5IterClose(Fts5IndexIter *pIter){
................................................................................
  const char *z,                  /* Index key to query for */
  int n,                          /* Size of index key in bytes */
  int flags,                      /* Flags for Fts5IndexQuery */
  u64 *pCksum                     /* IN/OUT: Checksum value */
){
  u64 cksum = *pCksum;
  Fts5IndexIter *pIdxIter = 0;
  int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIdxIter);

  while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){
    i64 dummy;
    const u8 *pPos;
    int nPos;
    i64 rowid = sqlite3Fts5IterRowid(pIdxIter);
    rc = sqlite3Fts5IterPoslist(pIdxIter, &pPos, &nPos, &dummy);
................................................................................
    i64 iRowid = fts5MultiIterRowid(pIter);
    char *z = (char*)fts5MultiIterTerm(pIter, &n);

    /* If this is a new term, query for it. Update cksum3 with the results. */
    fts5TestTerm(p, &term, z, n, cksum2, &cksum3);

    poslist.n = 0;
    fts5MultiIterPoslist(p, pIter, 0, 0, &poslist);
    while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){
      int iCol = FTS5_POS2COLUMN(iPos);
      int iTokOff = FTS5_POS2OFFSET(iPos);
      cksum2 ^= fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n);
    }
  }
  fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3);

Changes to ext/fts5/fts5_vocab.c.

398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
  sqlite3_value **apVal           /* Arguments for the indexing scheme */
){
  Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
  int rc;
  const int flags = FTS5INDEX_QUERY_SCAN;

  fts5VocabResetCursor(pCsr);
  rc = sqlite3Fts5IndexQuery(pCsr->pIndex, 0, 0, flags, &pCsr->pIter);
  if( rc==SQLITE_OK ){
    rc = fts5VocabNextMethod(pCursor);
  }

  return rc;
}








|







398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
  sqlite3_value **apVal           /* Arguments for the indexing scheme */
){
  Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
  int rc;
  const int flags = FTS5INDEX_QUERY_SCAN;

  fts5VocabResetCursor(pCsr);
  rc = sqlite3Fts5IndexQuery(pCsr->pIndex, 0, 0, flags, 0, &pCsr->pIter);
  if( rc==SQLITE_OK ){
    rc = fts5VocabNextMethod(pCursor);
  }

  return rc;
}

Changes to ext/fts5/test/fts5prefix.test.

58
59
60
61
62
63
64


65
















































































66
67

foreach {tn q res} {
  1 "SELECT rowid FROM t1 WHERE t1 MATCH '\xCA\xCB*'" 1
  2 "SELECT rowid FROM t1 WHERE t1 MATCH '\u1234\u5678*'" 2
} {
  do_execsql_test 2.3.$tn $q $res
}




















































































finish_test









>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


>
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
foreach {tn q res} {
  1 "SELECT rowid FROM t1 WHERE t1 MATCH '\xCA\xCB*'" 1
  2 "SELECT rowid FROM t1 WHERE t1 MATCH '\u1234\u5678*'" 2
} {
  do_execsql_test 2.3.$tn $q $res
}

#-------------------------------------------------------------------------
# Check that prefix queries with:
#
#   * a column filter, and
#   * no prefix index.
#
# work Ok.
#
do_execsql_test 3.0 {
  CREATE VIRTUAL TABLE t3 USING fts5(a, b, c);
  INSERT INTO t3(t3, rank) VALUES('pgsz', 32);
  BEGIN;
    INSERT INTO t3 VALUES('acb ccc bba', 'cca bba bca', 'bbc ccc bca'); -- 1
    INSERT INTO t3 VALUES('cbb cac cab', 'abb aac bba', 'aab ccc cac'); -- 2
    INSERT INTO t3 VALUES('aac bcb aac', 'acb bcb caa', 'aca bab bca'); -- 3
    INSERT INTO t3 VALUES('aab ccb ccc', 'aca cba cca', 'aca aac cbb'); -- 4
    INSERT INTO t3 VALUES('bac aab bab', 'ccb bac cba', 'acb aba abb'); -- 5
    INSERT INTO t3 VALUES('bab abc ccb', 'acb cba abb', 'cbb aaa cab'); -- 6
    INSERT INTO t3 VALUES('cbb bbc baa', 'aab aca baa', 'bcc cca aca'); -- 7
    INSERT INTO t3 VALUES('abc bba abb', 'cac abc cba', 'acc aac cac'); -- 8
    INSERT INTO t3 VALUES('bbc bbc cab', 'bcb ccb cba', 'bcc cac acb'); -- 9
  COMMIT;
}

foreach {tn match res} {
  1 "a : c*" {1 2 4 6 7 9}
  2 "b : c*" {1 3 4 5 6 8 9}
  3 "c : c*" {1 2 4 6 7 8 9}
  4 "a : b*" {1 3 5 6 7 8 9}
  5 "b : b*" {1 2 3 5 7 9}
  6 "c : b*" {1 3 7 9}
  7 "a : a*" {1 3 4 5 6 8}
  8 "b : a*" {2 3 4 6 7 8}
  9 "c : a*" {2 3 4 5 6 7 8 9}
} {
  do_execsql_test 3.1.$tn {
    SELECT rowid FROM t3($match)
  } $res
}

do_test 3.2 {
  expr srand(0)
  execsql { DELETE FROM t3 }
  for {set i 0} {$i < 1000} {incr i} {
    set a [fts5_rnddoc 3]
    set b [fts5_rnddoc 8]
    set c [fts5_rnddoc 20]
    execsql { INSERT INTO t3 VALUES($a, $b, $c) }
  }
  execsql { INSERT INTO t3(t3) VALUES('integrity-check') }
} {}

proc gmatch {col pattern} {
  expr {[lsearch -glob $col $pattern]>=0}
}
db func gmatch gmatch

for {set x 0} {$x<2} {incr x} {
  foreach {tn pattern} {
    1  {xa*}
    2  {xb*}
    3  {xc*}
    4  {xd*}
    5  {xe*}
    6  {xf*}
    7  {xg*}
    8  {xh*}
    9  {xi*}
    10 {xj*}
  } {
    foreach col {b} {
      set res [db eval "SELECT rowid FROM t3 WHERE gmatch($col, '$pattern')"]
      set query "$col : $pattern"
      do_execsql_test 3.3.$x.$tn.$col {
        SELECT rowid FROM t3($query);
      } $res
    }
  }
  execsql { INSERT INTO t3(t3) VALUES('optimize') }
  execsql { INSERT INTO t3(t3) VALUES('integrity-check') }
}


finish_test


Changes to ext/fts5/test/fts5simple.test.

235
236
237
238
239
240
241











242
243
244
245
  SELECT rowid FROM ft2('a');
} {1 2}

do_execsql_test 9.3 {
  SELECT rowid FROM ft2('b AND c');
} {2}














finish_test








>
>
>
>
>
>
>
>
>
>
>




235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
  SELECT rowid FROM ft2('a');
} {1 2}

do_execsql_test 9.3 {
  SELECT rowid FROM ft2('b AND c');
} {2}

#-------------------------------------------------------------------------
#
do_execsql_test 10.0 {
  CREATE VIRTUAL TABLE t3 USING fts5(a, b, c);
  INSERT INTO t3 VALUES('bac aab bab', 'c bac c', 'acb aba abb'); -- 1
  INSERT INTO t3 VALUES('bab abc c', 'acb c abb', 'c aaa c');     -- 2
}

do_execsql_test 10.1 {
  SELECT rowid FROM t3('c: c*');
} {2}


finish_test