SQLite

Check-in [8f939723f7]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Avoid loading doclists for infrequent terms that are part of phrases twice.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 8f939723f742329cedba8930f71dff42004f3d0d
User & Date: dan 2011-06-17 17:37:31.284
Context
2011-06-17
18:52
Fix a header dependency in nmake Makefile. (check-in: 54492212af user: shaneh tags: trunk)
17:37
Avoid loading doclists for infrequent terms that are part of phrases twice. (check-in: 8f939723f7 user: dan tags: trunk)
16:04
Add a missing declaration to fts3Int.h. (check-in: 3bfd4466f5 user: dan tags: trunk)
Changes
Unified Diff Show Whitespace Changes Patch
Changes to ext/fts3/fts3.c.
3178
3179
3180
3181
3182
3183
3184


3185
3186
3187
3188
3189
3190
3191






















































3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232

3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
            pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr
        );
        if( rc!=SQLITE_OK ){
          *pRc = rc;
          return;
        }
      }


    }else{
      *pnOr += (pExpr->eType==FTSQUERY_OR);
      fts3EvalAllocateReaders(pCsr, pExpr->pLeft, pnToken, pnOr, pRc);
      fts3EvalAllocateReaders(pCsr, pExpr->pRight, pnToken, pnOr, pRc);
    }
  }
}























































static int fts3EvalPhraseLoad(
  Fts3Cursor *pCsr, 
  Fts3Phrase *p
){
  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
  int iToken;
  int rc = SQLITE_OK;

  char *aDoclist = 0;
  int nDoclist = 0;
  int iPrev = -1;

  for(iToken=0; rc==SQLITE_OK && iToken<p->nToken; iToken++){
    Fts3PhraseToken *pToken = &p->aToken[iToken];
    assert( pToken->pSegcsr || pToken->pDeferred );

    if( pToken->pDeferred==0 ){
      int nThis = 0;
      char *pThis = 0;
      rc = fts3TermSelect(pTab, pToken, p->iColumn, 1, &nThis, &pThis);
      if( rc==SQLITE_OK ){
        if( pThis==0 ){
          sqlite3_free(aDoclist);
          aDoclist = 0;
          nDoclist = 0;
          break;
        }else if( aDoclist==0 ){
          aDoclist = pThis;
          nDoclist = nThis;
        }else{
          assert( iPrev>=0 );
          fts3DoclistPhraseMerge(pTab->bDescIdx,
              iToken-iPrev, aDoclist, nDoclist, pThis, &nThis
          );
          sqlite3_free(aDoclist);
          aDoclist = pThis;
          nDoclist = nThis;
        }
        iPrev = iToken;
      }

    }
  }

  if( rc==SQLITE_OK ){
    p->doclist.aAll = aDoclist;
    p->doclist.nAll = nDoclist;
  }else{
    sqlite3_free(aDoclist);
  }
  return rc;
}

static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){
  int iToken;
  int rc = SQLITE_OK;

  int nMaxUndeferred = -1;
  char *aPoslist = 0;
  int nPoslist = 0;
  int iPrev = -1;

  assert( pPhrase->doclist.bFreeList==0 );

  for(iToken=0; rc==SQLITE_OK && iToken<pPhrase->nToken; iToken++){







>
>







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>









<
<
<
<


|

|




<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<

<

>

|
<
<
<
<
<
<
<







|







3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256




3257
3258
3259
3260
3261
3262
3263
3264
3265










3266





3267

3268
3269
3270
3271







3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
            pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr
        );
        if( rc!=SQLITE_OK ){
          *pRc = rc;
          return;
        }
      }
      assert( pExpr->pPhrase->iDoclistToken==0 );
      pExpr->pPhrase->iDoclistToken = -1;
    }else{
      *pnOr += (pExpr->eType==FTSQUERY_OR);
      fts3EvalAllocateReaders(pCsr, pExpr->pLeft, pnToken, pnOr, pRc);
      fts3EvalAllocateReaders(pCsr, pExpr->pRight, pnToken, pnOr, pRc);
    }
  }
}

static void fts3EvalPhraseMergeToken(
  Fts3Table *pTab,
  Fts3Phrase *p,
  int iToken,
  char *pList,
  int nList
){
  assert( iToken!=p->iDoclistToken );

  if( pList==0 ){
    sqlite3_free(p->doclist.aAll);
    p->doclist.aAll = 0;
    p->doclist.nAll = 0;
  }

  else if( p->iDoclistToken<0 ){
    p->doclist.aAll = pList;
    p->doclist.nAll = nList;
  }

  else if( p->doclist.aAll==0 ){
    sqlite3_free(pList);
  }

  else {
    char *pLeft;
    char *pRight;
    int nLeft;
    int nRight;
    int nDiff;

    if( p->iDoclistToken<iToken ){
      pLeft = p->doclist.aAll;
      nLeft = p->doclist.nAll;
      pRight = pList;
      nRight = nList;
      nDiff = iToken - p->iDoclistToken;
    }else{
      pRight = p->doclist.aAll;
      nRight = p->doclist.nAll;
      pLeft = pList;
      nLeft = nList;
      nDiff = p->iDoclistToken - iToken;
    }

    fts3DoclistPhraseMerge(pTab->bDescIdx, nDiff, pLeft, nLeft, pRight,&nRight);
    sqlite3_free(pLeft);
    p->doclist.aAll = pRight;
    p->doclist.nAll = nRight;
  }

  if( iToken>p->iDoclistToken ) p->iDoclistToken = iToken;
}

static int fts3EvalPhraseLoad(
  Fts3Cursor *pCsr, 
  Fts3Phrase *p
){
  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
  int iToken;
  int rc = SQLITE_OK;





  for(iToken=0; rc==SQLITE_OK && iToken<p->nToken; iToken++){
    Fts3PhraseToken *pToken = &p->aToken[iToken];
    assert( pToken->pDeferred==0 || pToken->pSegcsr==0 );

    if( pToken->pSegcsr ){
      int nThis = 0;
      char *pThis = 0;
      rc = fts3TermSelect(pTab, pToken, p->iColumn, 1, &nThis, &pThis);
      if( rc==SQLITE_OK ){










        fts3EvalPhraseMergeToken(pTab, p, iToken, pThis, nThis);





        }

      }
    assert( pToken->pSegcsr==0 );
    }








  return rc;
}

static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){
  int iToken;
  int rc = SQLITE_OK;

  int nMaxUndeferred = pPhrase->iDoclistToken;
  char *aPoslist = 0;
  int nPoslist = 0;
  int iPrev = -1;

  assert( pPhrase->doclist.bFreeList==0 );

  for(iToken=0; rc==SQLITE_OK && iToken<pPhrase->nToken; iToken++){
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
          sqlite3_free(aPoslist);
          pPhrase->doclist.pList = 0;
          pPhrase->doclist.nList = 0;
          return SQLITE_OK;
        }
      }
      iPrev = iToken;
    }else{
      nMaxUndeferred = iToken;
    }
  }

  if( iPrev>=0 ){
    if( nMaxUndeferred<0 ){
      pPhrase->doclist.pList = aPoslist;
      pPhrase->doclist.nList = nPoslist;







<
<







3317
3318
3319
3320
3321
3322
3323


3324
3325
3326
3327
3328
3329
3330
          sqlite3_free(aPoslist);
          pPhrase->doclist.pList = 0;
          pPhrase->doclist.nList = 0;
          return SQLITE_OK;
        }
      }
      iPrev = iToken;


    }
  }

  if( iPrev>=0 ){
    if( nMaxUndeferred<0 ){
      pPhrase->doclist.pList = aPoslist;
      pPhrase->doclist.nList = nPoslist;
3347
3348
3349
3350
3351
3352
3353
3354
3355


3356

3357
3358
3359
3360
3361
3362
3363
** used with fts3EvalPhraseNext() to iterate through the matching docids.
*/
static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){
  int rc;
  Fts3PhraseToken *pFirst = &p->aToken[0];
  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;

  assert( p->doclist.aAll==0 );
  if( pCsr->bDesc==pTab->bDescIdx && bOptOk==1 && p->nToken==1 


   && pFirst->pSegcsr && pFirst->pSegcsr->bLookup 

  ){
    /* Use the incremental approach. */
    int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn);
    rc = sqlite3Fts3MsrIncrStart(
        pTab, pFirst->pSegcsr, iCol, pFirst->z, pFirst->n);
    p->bIncr = 1;








<
|
>
>
|
>







3375
3376
3377
3378
3379
3380
3381

3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
** used with fts3EvalPhraseNext() to iterate through the matching docids.
*/
static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){
  int rc;
  Fts3PhraseToken *pFirst = &p->aToken[0];
  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;


  if( pCsr->bDesc==pTab->bDescIdx 
   && bOptOk==1 
   && p->nToken==1 
   && pFirst->pSegcsr 
   && pFirst->pSegcsr->bLookup 
  ){
    /* Use the incremental approach. */
    int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn);
    rc = sqlite3Fts3MsrIncrStart(
        pTab, pFirst->pSegcsr, iCol, pFirst->z, pFirst->n);
    p->bIncr = 1;

3520
3521
3522
3523
3524
3525
3526
3527
3528
3529


3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549


3550
3551
3552
3553
3554
3555
3556
      fts3EvalStartReaders(pCsr, pExpr->pLeft, bOptOk, pRc);
      fts3EvalStartReaders(pCsr, pExpr->pRight, bOptOk, pRc);
      pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred);
    }
  }
}


typedef struct Fts3TokenAndCost Fts3TokenAndCost;
struct Fts3TokenAndCost {


  Fts3PhraseToken *pToken;
  Fts3Expr *pRoot;
  int nOvfl;
  int iCol;
};

static void fts3EvalTokenCosts(
  Fts3Cursor *pCsr, 
  Fts3Expr *pRoot, 
  Fts3Expr *pExpr, 
  Fts3TokenAndCost **ppTC,
  Fts3Expr ***ppOr,
  int *pRc
){
  if( *pRc==SQLITE_OK && pExpr ){
    if( pExpr->eType==FTSQUERY_PHRASE ){
      Fts3Phrase *pPhrase = pExpr->pPhrase;
      int i;
      for(i=0; *pRc==SQLITE_OK && i<pPhrase->nToken; i++){
        Fts3TokenAndCost *pTC = (*ppTC)++;


        pTC->pRoot = pRoot;
        pTC->pToken = &pPhrase->aToken[i];
        pTC->iCol = pPhrase->iColumn;
        *pRc = sqlite3Fts3MsrOvfl(pCsr, pTC->pToken->pSegcsr, &pTC->nOvfl);
      }
    }else if( pExpr->eType!=FTSQUERY_NOT ){
      if( pExpr->eType==FTSQUERY_OR ){







<


>
>
|


|
















>
>







3550
3551
3552
3553
3554
3555
3556

3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
      fts3EvalStartReaders(pCsr, pExpr->pLeft, bOptOk, pRc);
      fts3EvalStartReaders(pCsr, pExpr->pRight, bOptOk, pRc);
      pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred);
    }
  }
}


typedef struct Fts3TokenAndCost Fts3TokenAndCost;
struct Fts3TokenAndCost {
  Fts3Phrase *pPhrase;            /* The phrase the token belongs to */
  int iToken;                     /* Position of token in phrase */
  Fts3PhraseToken *pToken;        /* The token itself */
  Fts3Expr *pRoot;
  int nOvfl;
  int iCol;                       /* The column the token must match */
};

static void fts3EvalTokenCosts(
  Fts3Cursor *pCsr, 
  Fts3Expr *pRoot, 
  Fts3Expr *pExpr, 
  Fts3TokenAndCost **ppTC,
  Fts3Expr ***ppOr,
  int *pRc
){
  if( *pRc==SQLITE_OK && pExpr ){
    if( pExpr->eType==FTSQUERY_PHRASE ){
      Fts3Phrase *pPhrase = pExpr->pPhrase;
      int i;
      for(i=0; *pRc==SQLITE_OK && i<pPhrase->nToken; i++){
        Fts3TokenAndCost *pTC = (*ppTC)++;
        pTC->pPhrase = pPhrase;
        pTC->iToken = i;
        pTC->pRoot = pRoot;
        pTC->pToken = &pPhrase->aToken[i];
        pTC->iCol = pPhrase->iColumn;
        *pRc = sqlite3Fts3MsrOvfl(pCsr, pTC->pToken->pSegcsr, &pTC->nOvfl);
      }
    }else if( pExpr->eType!=FTSQUERY_NOT ){
      if( pExpr->eType==FTSQUERY_OR ){
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674


3675
3676
3677
3678
3679
3680
3681
    assert( pTC );

    /* At this point pTC points to the cheapest remaining token. */
    if( ii==0 ){
      if( pTC->nOvfl ){
        nDocEst = (pTC->nOvfl * pTab->nPgsz + pTab->nPgsz) / 10;
      }else{
        /* TODO: Fix this so that the doclist need not be read twice. */
        Fts3PhraseToken *pToken = pTC->pToken;
        int nList = 0;
        char *pList = 0;
        rc = fts3TermSelect(pTab, pToken, pTC->iCol, 1, &nList, &pList);
        if( rc==SQLITE_OK ){
          nDocEst = fts3DoclistCountDocids(1, pList, nList);
        }
        sqlite3_free(pList);
        if( rc==SQLITE_OK ){
          rc = sqlite3Fts3TermSegReaderCursor(pCsr, 
              pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr
          );


        }
      }
    }else{
      if( pTC->nOvfl>=(nDocEst*nDocSize) ){
        Fts3PhraseToken *pToken = pTC->pToken;
        rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol);
        fts3SegReaderCursorFree(pToken->pSegcsr);







<




|
<
|
<

<
<
<
>
>







3688
3689
3690
3691
3692
3693
3694

3695
3696
3697
3698
3699

3700

3701



3702
3703
3704
3705
3706
3707
3708
3709
3710
    assert( pTC );

    /* At this point pTC points to the cheapest remaining token. */
    if( ii==0 ){
      if( pTC->nOvfl ){
        nDocEst = (pTC->nOvfl * pTab->nPgsz + pTab->nPgsz) / 10;
      }else{

        Fts3PhraseToken *pToken = pTC->pToken;
        int nList = 0;
        char *pList = 0;
        rc = fts3TermSelect(pTab, pToken, pTC->iCol, 1, &nList, &pList);
        assert( rc==SQLITE_OK || pList==0 );



        if( rc==SQLITE_OK ){



          nDocEst = fts3DoclistCountDocids(1, pList, nList);
          fts3EvalPhraseMergeToken(pTab, pTC->pPhrase, pTC->iToken,pList,nList);
        }
      }
    }else{
      if( pTC->nOvfl>=(nDocEst*nDocSize) ){
        Fts3PhraseToken *pToken = pTC->pToken;
        rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol);
        fts3SegReaderCursorFree(pToken->pSegcsr);
Changes to ext/fts3/fts3Int.h.
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317

318
319
320
321
322
323
324
  char *z;                        /* Text of the token */
  int n;                          /* Number of bytes in buffer z */
  int isPrefix;                   /* True if token ends with a "*" character */

  /* Variables above this point are populated when the expression is
  ** parsed (by code in fts3_expr.c). Below this point the variables are
  ** used when evaluating the expression. */
  int bFulltext;                  /* True if full-text index was used */
  Fts3DeferredToken *pDeferred;   /* Deferred token object for this token */
  Fts3MultiSegReader *pSegcsr;    /* Segment-reader for this token */
};

struct Fts3Phrase {
  /* Cache of doclist for this phrase. */
  Fts3Doclist doclist;
  int bIncr;                 /* True if doclist is loaded incrementally */


  /* Variables below this point are populated by fts3_expr.c when parsing 
  ** a MATCH expression. Everything above is part of the evaluation phase. 
  */
  int nToken;                /* Number of tokens in the phrase */
  int iColumn;               /* Index of column this phrase must match */
  Fts3PhraseToken aToken[1]; /* One entry for each token in the phrase */







<








>







302
303
304
305
306
307
308

309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
  char *z;                        /* Text of the token */
  int n;                          /* Number of bytes in buffer z */
  int isPrefix;                   /* True if token ends with a "*" character */

  /* Variables above this point are populated when the expression is
  ** parsed (by code in fts3_expr.c). Below this point the variables are
  ** used when evaluating the expression. */

  Fts3DeferredToken *pDeferred;   /* Deferred token object for this token */
  Fts3MultiSegReader *pSegcsr;    /* Segment-reader for this token */
};

struct Fts3Phrase {
  /* Cache of doclist for this phrase. */
  Fts3Doclist doclist;
  int bIncr;                 /* True if doclist is loaded incrementally */
  int iDoclistToken;

  /* Variables below this point are populated by fts3_expr.c when parsing 
  ** a MATCH expression. Everything above is part of the evaluation phase. 
  */
  int nToken;                /* Number of tokens in the phrase */
  int iColumn;               /* Index of column this phrase must match */
  Fts3PhraseToken aToken[1]; /* One entry for each token in the phrase */