SQLite

Check-in [ea543f081d]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Allow multi-token phrases to load doclists from the database incrementally. This allows queries that feature such phrases to benefit from the "docid<?" optimization.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | fts4-docid-range-constraints
Files: files | file ages | folders
SHA1: ea543f081d93ed1bf66c21ce2108ec94e349f4c5
User & Date: dan 2013-10-01 20:02:32.683
Context
2013-10-01
20:10
Merge trunk changes with this branch. (check-in: 65d9c6fafb user: dan tags: fts4-docid-range-constraints)
20:02
Allow multi-token phrases to load doclists from the database incrementally. This allows queries that feature such phrases to benefit from the "docid<?" optimization. (check-in: ea543f081d user: dan tags: fts4-docid-range-constraints)
2013-09-30
18:16
Merge trunk changes with this branch. (check-in: e294a9c7c5 user: dan tags: fts4-docid-range-constraints)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts3/fts3.c.
4011
4012
4013
4014
4015
4016
4017






4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033


4034






4035
4036
4037

4038

4039



4040
4041
4042
4043


4044
4045
4046
4047
4048
4049
4050
4051
4052

4053
4054
4055
4056
4057
4058
4059
      sqlite3_free(aPoslist);
    }
  }

  return SQLITE_OK;
}







/*
** This function is called for each Fts3Phrase in a full-text query 
** expression to initialize the mechanism for returning rows. Once this
** function has been called successfully on an Fts3Phrase, it may be
** used with fts3EvalPhraseNext() to iterate through the matching docids.
**
** If parameter bOptOk is true, then the phrase may (or may not) use the
** incremental loading strategy. Otherwise, the entire doclist is loaded into
** memory within this call.
**
** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
*/
static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){
  int rc;                         /* Error code */
  Fts3PhraseToken *pFirst = &p->aToken[0];
  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;









  if( pCsr->bDesc==pTab->bDescIdx 
   && bOptOk==1 
   && p->nToken==1 

   && pFirst->pSegcsr 

   && pFirst->pSegcsr->bLookup 



   && pFirst->bFirst==0
  ){
    /* Use the incremental approach. */
    int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn);


    rc = sqlite3Fts3MsrIncrStart(
        pTab, pFirst->pSegcsr, iCol, pFirst->z, pFirst->n);
    p->bIncr = 1;

  }else{
    /* Load the full doclist for the phrase into memory. */
    rc = fts3EvalPhraseLoad(pCsr, p);
    p->bIncr = 0;
  }


  assert( rc!=SQLITE_OK || p->nToken<1 || p->aToken[0].pSegcsr==0 || p->bIncr );
  return rc;
}

/*
** This function is used to iterate backwards (from the end to start) 







>
>
>
>
>
>













<
<

>
>

>
>
>
>
>
>
|
<
|
>
|
>
|
>
>
>
|
|


>
>
|
<
<
|



<

>







4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036


4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047

4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062


4063
4064
4065
4066

4067
4068
4069
4070
4071
4072
4073
4074
4075
      sqlite3_free(aPoslist);
    }
  }

  return SQLITE_OK;
}

/*
** Maximum number of tokens a phrase may have to be considered for the
** incremental doclists strategy.
*/
#define MAX_INCR_PHRASE_TOKENS 4

/*
** This function is called for each Fts3Phrase in a full-text query 
** expression to initialize the mechanism for returning rows. Once this
** function has been called successfully on an Fts3Phrase, it may be
** used with fts3EvalPhraseNext() to iterate through the matching docids.
**
** If parameter bOptOk is true, then the phrase may (or may not) use the
** incremental loading strategy. Otherwise, the entire doclist is loaded into
** memory within this call.
**
** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
*/
static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){


  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
  int rc = SQLITE_OK;             /* Error code */
  int i;

  /* Determine if doclists may be loaded from disk incrementally. This is
  ** possible if the bOptOk argument is true, the FTS doclists will be
  ** scanned in forward order, and the phrase consists of 
  ** MAX_INCR_PHRASE_TOKENS or fewer tokens, none of which are are "^first"
  ** tokens or prefix tokens that cannot use a prefix-index.  */
  int bIncrOk = (bOptOk 
   && pCsr->bDesc==pTab->bDescIdx 

   && p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0
  );
  for(i=0; bIncrOk==1 && i<p->nToken; i++){
    Fts3PhraseToken *pToken = &p->aToken[i];
    if( pToken->bFirst || !pToken->pSegcsr || !pToken->pSegcsr->bLookup ){
      bIncrOk = 0;
    }
  }

  if( bIncrOk ){
    /* Use the incremental approach. */
    int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn);
    for(i=0; rc==SQLITE_OK && i<p->nToken; i++){
      Fts3PhraseToken *pTok = &p->aToken[i];
      rc = sqlite3Fts3MsrIncrStart(pTab, pTok->pSegcsr, iCol, pTok->z, pTok->n);


    }
  }else{
    /* Load the full doclist for the phrase into memory. */
    rc = fts3EvalPhraseLoad(pCsr, p);

  }
  p->bIncr = bIncrOk;

  assert( rc!=SQLITE_OK || p->nToken<1 || p->aToken[0].pSegcsr==0 || p->bIncr );
  return rc;
}

/*
** This function is used to iterate backwards (from the end to start) 
4148
4149
4150
4151
4152
4153
4154































































































































4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
      p += sqlite3Fts3GetVarint(p, &iVar);
      *piDocid += ((bDescIdx ? -1 : 1) * iVar);
    }
  }

  *ppIter = p;
}
































































































































/*
** Attempt to move the phrase iterator to point to the next matching docid. 
** If an error occurs, return an SQLite error code. Otherwise, return 
** SQLITE_OK.
**
** If there is no "next" entry and no error occurs, then *pbEof is set to
** 1 before returning. Otherwise, if no error occurs and the iterator is
** successfully advanced, *pbEof is set to 0.
*/
static int fts3EvalPhraseNext(
  Fts3Cursor *pCsr,               /* FTS Cursor handle */
  Fts3Phrase *p,                  /* Phrase object to advance to next docid */
  u8 *pbEof                       /* OUT: Set to 1 if EOF */
){
  int rc = SQLITE_OK;
  Fts3Doclist *pDL = &p->doclist;
  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;

  if( p->bIncr ){
    assert( p->nToken==1 );
    assert( pDL->pNextDocid==0 );
    rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr, 
        &pDL->iDocid, &pDL->pList, &pDL->nList
    );
    if( rc==SQLITE_OK && !pDL->pList ){
      *pbEof = 1;
    }
  }else if( pCsr->bDesc!=pTab->bDescIdx && pDL->nAll ){
    sqlite3Fts3DoclistPrev(pTab->bDescIdx, pDL->aAll, pDL->nAll, 
        &pDL->pNextDocid, &pDL->iDocid, &pDL->nList, pbEof
    );
    pDL->pList = pDL->pNextDocid;
  }else{
    char *pIter;                            /* Used to iterate through aAll */







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>




















<
<
<
<
<
<
|
<







4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317






4318

4319
4320
4321
4322
4323
4324
4325
      p += sqlite3Fts3GetVarint(p, &iVar);
      *piDocid += ((bDescIdx ? -1 : 1) * iVar);
    }
  }

  *ppIter = p;
}

/*
** Helper type used by fts3EvalIncrPhraseNext() and incrPhraseTokenNext().
*/
typedef struct TokenDoclist TokenDoclist;
struct TokenDoclist {
  sqlite3_int64 iDocid;
  char *pList;
  int nList;
};

/*
** Token pToken is an incrementally loaded token that is part of a 
** multi-token phrase. Advance it to the next matching document in the
** database and populate output variable *p with the details of the new
** entry. Or, if the iterator has reached EOF, set *pbEof to true.
**
** If an error occurs, return an SQLite error code. Otherwise, return 
** SQLITE_OK.
*/
static int incrPhraseTokenNext(
  Fts3Table *pTab,                /* Virtual table handle */
  Fts3PhraseToken *pToken,        /* Advance the iterator for this token */
  TokenDoclist *p,                /* OUT: Docid and doclist for new entry */
  int *pbEof                      /* OUT: True if iterator is at EOF */
){
  int rc;
  assert( pToken->pDeferred==0 );
  rc = sqlite3Fts3MsrIncrNext(
      pTab, pToken->pSegcsr, &p->iDocid, &p->pList, &p->nList
  );
  if( p->pList==0 ) *pbEof = 1;
  return rc;
}


/*
** The phrase iterator passed as the second argument uses the incremental
** doclist strategy. Advance it to the next matching documnent in the
** database. If an error occurs, return an SQLite error code. Otherwise, 
** return SQLITE_OK.
**
** If there is no "next" entry and no error occurs, then *pbEof is set to
** 1 before returning. Otherwise, if no error occurs and the iterator is
** successfully advanced, *pbEof is set to 0.
*/
static int fts3EvalIncrPhraseNext(
  Fts3Cursor *pCsr,               /* FTS Cursor handle */
  Fts3Phrase *p,                  /* Phrase object to advance to next docid */
  u8 *pbEof                       /* OUT: Set to 1 if EOF */
){
  int rc = SQLITE_OK;
  Fts3Doclist *pDL = &p->doclist;
  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
  int bEof = 0;

  assert( p->bIncr==1 );
  assert( pDL->pNextDocid==0 );

  if( p->nToken==1 ){
    rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr, 
        &pDL->iDocid, &pDL->pList, &pDL->nList
    );
    if( pDL->pList==0 ) bEof = 1;
  }else{
    int bDescDoclist = pCsr->bDesc;
    struct TokenDoclist a[MAX_INCR_PHRASE_TOKENS];

    assert( p->nToken<=MAX_INCR_PHRASE_TOKENS );

    while( bEof==0 ){
      sqlite3_int64 iMax;         /* Largest docid for all iterators */
      int i;                      /* Used to iterate through tokens */

      /* Advance the iterator for each token in the phrase once. */
      for(i=0; rc==SQLITE_OK && i<p->nToken; i++){
        rc = incrPhraseTokenNext(pTab, &p->aToken[i], &a[i], &bEof);
        if( i==0 || DOCID_CMP(iMax, a[i].iDocid)<0 ){
          iMax = a[i].iDocid;
        }
      }

      /* Keep advancing iterators until they all point to the same document */
      if( bEof==0 && rc==SQLITE_OK ){
        for(i=0; i<p->nToken; i++){
          while( DOCID_CMP(a[i].iDocid, iMax)<0 && rc==SQLITE_OK && bEof==0 ){
            rc = incrPhraseTokenNext(pTab, &p->aToken[i], &a[i], &bEof);
            if( DOCID_CMP(a[i].iDocid, iMax)>0 ){
              iMax = a[i].iDocid;
              i = 0;
            }
          }
        }
      }

      /* Check if the current entries really are a phrase match */
      if( bEof==0 ){
        int nByte = a[p->nToken-1].nList;
        char *aDoclist = sqlite3_malloc(nByte+1);
        if( !aDoclist ) return SQLITE_NOMEM;
        memcpy(aDoclist, a[p->nToken-1].pList, nByte+1);

        int nList;
        for(i=0; i<(p->nToken-1); i++){
          char *pLeft = a[i].pList;
          char *pRight = aDoclist;
          char *pOut = aDoclist;
          int nDist = p->nToken-1-i;
          int res = fts3PoslistPhraseMerge(&pOut, nDist, 0, 1, &pLeft, &pRight);
          if( res==0 ) break;
          nList = (pOut - aDoclist);
        }
        if( i==(p->nToken-1) ){
          pDL->iDocid = a[0].iDocid;
          pDL->pList = aDoclist;
          pDL->nList = nList;
          pDL->bFreeList = 1;
          break;
        }
        sqlite3_free(aDoclist);
      }
    }
  }

  *pbEof = bEof;
  return rc;
}

/*
** Attempt to move the phrase iterator to point to the next matching docid. 
** If an error occurs, return an SQLite error code. Otherwise, return 
** SQLITE_OK.
**
** If there is no "next" entry and no error occurs, then *pbEof is set to
** 1 before returning. Otherwise, if no error occurs and the iterator is
** successfully advanced, *pbEof is set to 0.
*/
static int fts3EvalPhraseNext(
  Fts3Cursor *pCsr,               /* FTS Cursor handle */
  Fts3Phrase *p,                  /* Phrase object to advance to next docid */
  u8 *pbEof                       /* OUT: Set to 1 if EOF */
){
  int rc = SQLITE_OK;
  Fts3Doclist *pDL = &p->doclist;
  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;

  if( p->bIncr ){






    rc = fts3EvalIncrPhraseNext(pCsr, p, pbEof);

  }else if( pCsr->bDesc!=pTab->bDescIdx && pDL->nAll ){
    sqlite3Fts3DoclistPrev(pTab->bDescIdx, pDL->aAll, pDL->nAll, 
        &pDL->pNextDocid, &pDL->iDocid, &pDL->nList, pbEof
    );
    pDL->pList = pDL->pNextDocid;
  }else{
    char *pIter;                            /* Used to iterate through aAll */
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
**
** If an error occurs within this function, *pRc is set to an SQLite error
** code before returning.
*/
static void fts3EvalStartReaders(
  Fts3Cursor *pCsr,               /* FTS Cursor handle */
  Fts3Expr *pExpr,                /* Expression to initialize phrases in */
  int bOptOk,                     /* True to enable incremental loading */
  int *pRc                        /* IN/OUT: Error code */
){
  if( pExpr && SQLITE_OK==*pRc ){
    if( pExpr->eType==FTSQUERY_PHRASE ){
      int i;
      int nToken = pExpr->pPhrase->nToken;
      for(i=0; i<nToken; i++){
        if( pExpr->pPhrase->aToken[i].pDeferred==0 ) break;
      }
      pExpr->bDeferred = (i==nToken);
      *pRc = fts3EvalPhraseStart(pCsr, bOptOk, pExpr->pPhrase);
    }else{
      fts3EvalStartReaders(pCsr, pExpr->pLeft, bOptOk, pRc);
      fts3EvalStartReaders(pCsr, pExpr->pRight, bOptOk, pRc);
      pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred);
    }
  }
}

/*
** An array of the following structures is assembled as part of the process







<










|

|
|







4377
4378
4379
4380
4381
4382
4383

4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
**
** If an error occurs within this function, *pRc is set to an SQLite error
** code before returning.
*/
static void fts3EvalStartReaders(
  Fts3Cursor *pCsr,               /* FTS Cursor handle */
  Fts3Expr *pExpr,                /* Expression to initialize phrases in */

  int *pRc                        /* IN/OUT: Error code */
){
  if( pExpr && SQLITE_OK==*pRc ){
    if( pExpr->eType==FTSQUERY_PHRASE ){
      int i;
      int nToken = pExpr->pPhrase->nToken;
      for(i=0; i<nToken; i++){
        if( pExpr->pPhrase->aToken[i].pDeferred==0 ) break;
      }
      pExpr->bDeferred = (i==nToken);
      *pRc = fts3EvalPhraseStart(pCsr, 1, pExpr->pPhrase);
    }else{
      fts3EvalStartReaders(pCsr, pExpr->pLeft, pRc);
      fts3EvalStartReaders(pCsr, pExpr->pRight, pRc);
      pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred);
    }
  }
}

/*
** An array of the following structures is assembled as part of the process
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
      }

      sqlite3_free(aTC);
    }
  }
#endif

  fts3EvalStartReaders(pCsr, pCsr->pExpr, 1, &rc);
  return rc;
}

/*
** Invalidate the current position list for phrase pPhrase.
*/
static void fts3EvalInvalidatePoslist(Fts3Phrase *pPhrase){







|







4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
      }

      sqlite3_free(aTC);
    }
  }
#endif

  fts3EvalStartReaders(pCsr, pCsr->pExpr, &rc);
  return rc;
}

/*
** Invalidate the current position list for phrase pPhrase.
*/
static void fts3EvalInvalidatePoslist(Fts3Phrase *pPhrase){
5093
5094
5095
5096
5097
5098
5099

5100
5101
5102

5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
){
  if( pExpr && *pRc==SQLITE_OK ){
    Fts3Phrase *pPhrase = pExpr->pPhrase;

    if( pPhrase ){
      fts3EvalInvalidatePoslist(pPhrase);
      if( pPhrase->bIncr ){

        assert( pPhrase->nToken==1 );
        assert( pPhrase->aToken[0].pSegcsr );
        sqlite3Fts3MsrIncrRestart(pPhrase->aToken[0].pSegcsr);

        *pRc = fts3EvalPhraseStart(pCsr, 0, pPhrase);
      }

      pPhrase->doclist.pNextDocid = 0;
      pPhrase->doclist.iDocid = 0;
    }

    pExpr->iDocid = 0;
    pExpr->bEof = 0;
    pExpr->bStart = 0;







>
|
|
|
>


<







5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241

5242
5243
5244
5245
5246
5247
5248
){
  if( pExpr && *pRc==SQLITE_OK ){
    Fts3Phrase *pPhrase = pExpr->pPhrase;

    if( pPhrase ){
      fts3EvalInvalidatePoslist(pPhrase);
      if( pPhrase->bIncr ){
        int i;
        for(i=0; i<pPhrase->nToken; i++){
          assert( pPhrase->aToken[i].pSegcsr );
          sqlite3Fts3MsrIncrRestart(pPhrase->aToken[i].pSegcsr);
        }
        *pRc = fts3EvalPhraseStart(pCsr, 0, pPhrase);
      }

      pPhrase->doclist.pNextDocid = 0;
      pPhrase->doclist.iDocid = 0;
    }

    pExpr->iDocid = 0;
    pExpr->bEof = 0;
    pExpr->bStart = 0;