/ Check-in [245e8730]
Login
Overview
Comment:Improve the performance of fts3/4 queries that use the OR operator and at least one auxiliary fts function.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:245e8730451fbdc1c729beff7295c452df604009
User & Date: dan 2015-01-27 18:43:02
References
2015-02-12
17:15
Improve the performance of fts3/4 queries that use the OR operator and at least one auxiliary fts function. Cherrypick of [245e8730451f]. check-in: b2082462 user: dan tags: branch-3.8.8
Context
2015-01-27
19:01
Fix a bug in the fts3 snippet() function causing it to omit leading separator characters from snippets that begin with the first token in a column. check-in: adc9283d user: dan tags: trunk
18:43
Improve the performance of fts3/4 queries that use the OR operator and at least one auxiliary fts function. check-in: 245e8730 user: dan tags: trunk
13:17
Fix a (almost always harmless) read past the end of a memory allocation that comes about because the Expr.pTab field is checked on an EXPR_REDUCEDSIZE Expr object before checking the Expr.op field to know that the Expr.pTab field is meaningless. check-in: e098de69 user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3.c.

5016
5017
5018
5019
5020
5021
5022
















5023
5024
5025
5026
5027
5028
5029
....
5388
5389
5390
5391
5392
5393
5394

5395
5396
5397
5398
5399
5400
5401
....
5633
5634
5635
5636
5637
5638
5639

5640
5641
5642
5643
5644
5645
5646
....
5658
5659
5660
5661
5662
5663
5664
5665
5666
5667
5668
5669
5670
5671
5672
5673
5674
5675
5676
5677
5678
5679
5680
5681
5682
5683
5684
5685
5686
5687
5688
5689
5690
5691

5692
5693


5694
5695
5696
5697
5698
5699
5700
5701
5702
5703
5704
5705
5706
5707
5708
5709
5710
5711

5712
5713
5714
5715
5716
5717
5718
5719
5720
5721
5722

5723

5724
5725
5726
5727
5728
5729
5730


5731
5732
5733
5734
5735
5736
5737
5738
5739
5740
5741
5742
5743
5744
5745
5746



5747
5748
5749
5750
5751
5752
5753
              fts3EvalNextRow(pCsr, pLeft, pRc);
            }else{
              fts3EvalNextRow(pCsr, pRight, pRc);
            }
          }
          pExpr->iDocid = pLeft->iDocid;
          pExpr->bEof = (pLeft->bEof || pRight->bEof);
















        }
        break;
      }
  
      case FTSQUERY_OR: {
        Fts3Expr *pLeft = pExpr->pLeft;
        Fts3Expr *pRight = pExpr->pRight;
................................................................................
            sqlite3Fts3MsrIncrRestart(pToken->pSegcsr);
          }
        }
        *pRc = fts3EvalPhraseStart(pCsr, 0, pPhrase);
      }
      pPhrase->doclist.pNextDocid = 0;
      pPhrase->doclist.iDocid = 0;

    }

    pExpr->iDocid = 0;
    pExpr->bEof = 0;
    pExpr->bStart = 0;

    fts3EvalRestart(pCsr, pExpr->pLeft, pRc);
................................................................................
  if( (pPhrase->iColumn<pTab->nColumn && pPhrase->iColumn!=iCol) ){
    return SQLITE_OK;
  }

  iDocid = pExpr->iDocid;
  pIter = pPhrase->doclist.pList;
  if( iDocid!=pCsr->iPrevId || pExpr->bEof ){

    int bDescDoclist = pTab->bDescIdx;      /* For DOCID_CMP macro */
    int iMul;                     /* +1 if csr dir matches index dir, else -1 */
    int bOr = 0;
    u8 bEof = 0;
    u8 bTreeEof = 0;
    Fts3Expr *p;                  /* Used to iterate from pExpr to root */
    Fts3Expr *pNear;              /* Most senior NEAR ancestor (or pExpr) */
................................................................................
    }
    if( bOr==0 ) return SQLITE_OK;

    /* This is the descendent of an OR node. In this case we cannot use
    ** an incremental phrase. Load the entire doclist for the phrase
    ** into memory in this case.  */
    if( pPhrase->bIncr ){
      int rc = SQLITE_OK;
      int bEofSave = pExpr->bEof;
      fts3EvalRestart(pCsr, pExpr, &rc);
      while( rc==SQLITE_OK && !pExpr->bEof ){
        fts3EvalNextRow(pCsr, pExpr, &rc);
        if( bEofSave==0 && pExpr->iDocid==iDocid ) break;
      }
      pIter = pPhrase->doclist.pList;
      assert( rc!=SQLITE_OK || pPhrase->bIncr==0 );
      if( rc!=SQLITE_OK ) return rc;
    }
    
    iMul = ((pCsr->bDesc==bDescDoclist) ? 1 : -1);
    while( bTreeEof==1 
        && pNear->bEof==0
        && (DOCID_CMP(pNear->iDocid, pCsr->iPrevId) * iMul)<0
    ){
      int rc = SQLITE_OK;
      fts3EvalNextRow(pCsr, pExpr, &rc);
      if( rc!=SQLITE_OK ) return rc;
      iDocid = pExpr->iDocid;
      pIter = pPhrase->doclist.pList;
    }

    bEof = (pPhrase->doclist.nAll==0);
    assert( bDescDoclist==0 || bDescDoclist==1 );
    assert( pCsr->bDesc==0 || pCsr->bDesc==1 );


    if( bEof==0 ){


      if( pCsr->bDesc==bDescDoclist ){
        int dummy;
        if( pNear->bEof ){
          /* This expression is already at EOF. So position it to point to the
          ** last entry in the doclist at pPhrase->doclist.aAll[]. Variable
          ** iDocid is already set for this entry, so all that is required is
          ** to set pIter to point to the first byte of the last position-list
          ** in the doclist. 
          **
          ** It would also be correct to set pIter and iDocid to zero. In
          ** this case, the first call to sqltie3Fts4DoclistPrev() below
          ** would also move the iterator to point to the last entry in the 
          ** doclist. However, this is expensive, as to do so it has to 
          ** iterate through the entire doclist from start to finish (since
          ** it does not know the docid for the last entry).  */
          pIter = &pPhrase->doclist.aAll[pPhrase->doclist.nAll-1];
          fts3ReversePoslist(pPhrase->doclist.aAll, &pIter);
        }

        while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)>0 ) && bEof==0 ){
          sqlite3Fts3DoclistPrev(
              bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll, 
              &pIter, &iDocid, &dummy, &bEof
          );
        }
      }else{
        if( pNear->bEof ){
          pIter = 0;
          iDocid = 0;
        }

        while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)<0 ) && bEof==0 ){

          sqlite3Fts3DoclistNext(
              bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll, 
              &pIter, &iDocid, &bEof
          );
        }
      }
    }



    if( bEof || iDocid!=pCsr->iPrevId ) pIter = 0;
  }
  if( pIter==0 ) return SQLITE_OK;

  if( *pIter==0x01 ){
    pIter++;
    pIter += fts3GetVarint32(pIter, &iThis);
  }else{
    iThis = 0;
  }
  while( iThis<iCol ){
    fts3ColumnlistCopy(0, &pIter);
    if( *pIter==0x00 ) return 0;
    pIter++;
    pIter += fts3GetVarint32(pIter, &iThis);



  }

  *ppOut = ((iCol==iThis)?pIter:0);
  return SQLITE_OK;
}

/*







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







>







 







>







 







<
|
|
|
|
|

<

<

<
<
|
|
<
<
<
|
<
<
<
|
|
<
<
<
>

<
>
>
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
>
|
|
|
|
|
|
|
<
<
<
<
>
|
>
|
|
|
|
|
|
<
>
>













|


>
>
>







5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
....
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413
5414
5415
5416
5417
5418
....
5650
5651
5652
5653
5654
5655
5656
5657
5658
5659
5660
5661
5662
5663
5664
....
5676
5677
5678
5679
5680
5681
5682

5683
5684
5685
5686
5687
5688

5689

5690


5691
5692



5693



5694
5695



5696
5697

5698
5699
5700

















5701
5702
5703
5704
5705
5706
5707
5708




5709
5710
5711
5712
5713
5714
5715
5716
5717

5718
5719
5720
5721
5722
5723
5724
5725
5726
5727
5728
5729
5730
5731
5732
5733
5734
5735
5736
5737
5738
5739
5740
5741
5742
5743
5744
5745
              fts3EvalNextRow(pCsr, pLeft, pRc);
            }else{
              fts3EvalNextRow(pCsr, pRight, pRc);
            }
          }
          pExpr->iDocid = pLeft->iDocid;
          pExpr->bEof = (pLeft->bEof || pRight->bEof);
          if( pExpr->eType==FTSQUERY_NEAR && pExpr->bEof ){
            if( pRight->pPhrase && pRight->pPhrase->doclist.aAll ){
              Fts3Doclist *pDl = &pRight->pPhrase->doclist;
              while( *pRc==SQLITE_OK && pRight->bEof==0 ){
                memset(pDl->pList, 0, pDl->nList);
                fts3EvalNextRow(pCsr, pRight, pRc);
              }
            }
            if( pLeft->pPhrase && pLeft->pPhrase->doclist.aAll ){
              Fts3Doclist *pDl = &pLeft->pPhrase->doclist;
              while( *pRc==SQLITE_OK && pLeft->bEof==0 ){
                memset(pDl->pList, 0, pDl->nList);
                fts3EvalNextRow(pCsr, pLeft, pRc);
              }
            }
          }
        }
        break;
      }
  
      case FTSQUERY_OR: {
        Fts3Expr *pLeft = pExpr->pLeft;
        Fts3Expr *pRight = pExpr->pRight;
................................................................................
            sqlite3Fts3MsrIncrRestart(pToken->pSegcsr);
          }
        }
        *pRc = fts3EvalPhraseStart(pCsr, 0, pPhrase);
      }
      pPhrase->doclist.pNextDocid = 0;
      pPhrase->doclist.iDocid = 0;
      pPhrase->pOrPoslist = 0;
    }

    pExpr->iDocid = 0;
    pExpr->bEof = 0;
    pExpr->bStart = 0;

    fts3EvalRestart(pCsr, pExpr->pLeft, pRc);
................................................................................
  if( (pPhrase->iColumn<pTab->nColumn && pPhrase->iColumn!=iCol) ){
    return SQLITE_OK;
  }

  iDocid = pExpr->iDocid;
  pIter = pPhrase->doclist.pList;
  if( iDocid!=pCsr->iPrevId || pExpr->bEof ){
    int rc = SQLITE_OK;
    int bDescDoclist = pTab->bDescIdx;      /* For DOCID_CMP macro */
    int iMul;                     /* +1 if csr dir matches index dir, else -1 */
    int bOr = 0;
    u8 bEof = 0;
    u8 bTreeEof = 0;
    Fts3Expr *p;                  /* Used to iterate from pExpr to root */
    Fts3Expr *pNear;              /* Most senior NEAR ancestor (or pExpr) */
................................................................................
    }
    if( bOr==0 ) return SQLITE_OK;

    /* This is the descendent of an OR node. In this case we cannot use
    ** an incremental phrase. Load the entire doclist for the phrase
    ** into memory in this case.  */
    if( pPhrase->bIncr ){

      int bEofSave = pNear->bEof;
      fts3EvalRestart(pCsr, pNear, &rc);
      while( rc==SQLITE_OK && !pNear->bEof ){
        fts3EvalNextRow(pCsr, pNear, &rc);
        if( bEofSave==0 && pNear->iDocid==iDocid ) break;
      }

      assert( rc!=SQLITE_OK || pPhrase->bIncr==0 );

    }


    if( bTreeEof ){
      while( rc==SQLITE_OK && !pNear->bEof ){



        fts3EvalNextRow(pCsr, pNear, &rc);



      }
    }



    if( rc!=SQLITE_OK ) return rc;


    pIter = pPhrase->pOrPoslist;
    iDocid = pPhrase->iOrDocid;
    if( pCsr->bDesc==bDescDoclist ){

















      bEof = (pIter >= (pPhrase->doclist.aAll + pPhrase->doclist.nAll));
      while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)<0 ) && bEof==0 ){
        sqlite3Fts3DoclistNext(
            bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll, 
            &pIter, &iDocid, &bEof
        );
      }
    }else{




      bEof = !pPhrase->doclist.nAll || (pIter && pIter<=pPhrase->doclist.aAll);
      while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)>0 ) && bEof==0 ){
        int dummy;
        sqlite3Fts3DoclistPrev(
            bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll, 
            &pIter, &iDocid, &dummy, &bEof
        );
      }
    }

    pPhrase->pOrPoslist = pIter;
    pPhrase->iOrDocid = iDocid;

    if( bEof || iDocid!=pCsr->iPrevId ) pIter = 0;
  }
  if( pIter==0 ) return SQLITE_OK;

  if( *pIter==0x01 ){
    pIter++;
    pIter += fts3GetVarint32(pIter, &iThis);
  }else{
    iThis = 0;
  }
  while( iThis<iCol ){
    fts3ColumnlistCopy(0, &pIter);
    if( *pIter==0x00 ) return SQLITE_OK;
    pIter++;
    pIter += fts3GetVarint32(pIter, &iThis);
  }
  if( *pIter==0x00 ){
    pIter = 0;
  }

  *ppOut = ((iCol==iThis)?pIter:0);
  return SQLITE_OK;
}

/*

Changes to ext/fts3/fts3Int.h.

370
371
372
373
374
375
376





377
378
379
380
381
382
383
};

struct Fts3Phrase {
  /* Cache of doclist for this phrase. */
  Fts3Doclist doclist;
  int bIncr;                 /* True if doclist is loaded incrementally */
  int iDoclistToken;






  /* Variables below this point are populated by fts3_expr.c when parsing 
  ** a MATCH expression. Everything above is part of the evaluation phase. 
  */
  int nToken;                /* Number of tokens in the phrase */
  int iColumn;               /* Index of column this phrase must match */
  Fts3PhraseToken aToken[1]; /* One entry for each token in the phrase */







>
>
>
>
>







370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
};

struct Fts3Phrase {
  /* Cache of doclist for this phrase. */
  Fts3Doclist doclist;
  int bIncr;                 /* True if doclist is loaded incrementally */
  int iDoclistToken;

  /* Used by sqlite3Fts3EvalPhrasePoslist() if this is a descendent of an
  ** OR condition.  */
  char *pOrPoslist;
  i64 iOrDocid;

  /* Variables below this point are populated by fts3_expr.c when parsing 
  ** a MATCH expression. Everything above is part of the evaluation phase. 
  */
  int nToken;                /* Number of tokens in the phrase */
  int iColumn;               /* Index of column this phrase must match */
  Fts3PhraseToken aToken[1]; /* One entry for each token in the phrase */

Changes to ext/fts3/fts3_snippet.c.

438
439
440
441
442
443
444
445

446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474


475
476
477
478
479
480
481
482
  ** the set of phrases in the expression to populate the aPhrase[] array.
  */
  sIter.pCsr = pCsr;
  sIter.iCol = iCol;
  sIter.nSnippet = nSnippet;
  sIter.nPhrase = nList;
  sIter.iCurrent = -1;
  (void)fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void *)&sIter);


  /* Set the *pmSeen output variable. */
  for(i=0; i<nList; i++){
    if( sIter.aPhrase[i].pHead ){
      *pmSeen |= (u64)1 << i;
    }
  }

  /* Loop through all candidate snippets. Store the best snippet in 
  ** *pFragment. Store its associated 'score' in iBestScore.
  */
  pFragment->iCol = iCol;
  while( !fts3SnippetNextCandidate(&sIter) ){
    int iPos;
    int iScore;
    u64 mCover;
    u64 mHighlight;
    fts3SnippetDetails(&sIter, mCovered, &iPos, &iScore, &mCover, &mHighlight);
    assert( iScore>=0 );
    if( iScore>iBestScore ){
      pFragment->iPos = iPos;
      pFragment->hlmask = mHighlight;
      pFragment->covered = mCover;
      iBestScore = iScore;
    }
  }

  sqlite3_free(sIter.aPhrase);
  *piScore = iBestScore;


  return SQLITE_OK;
}


/*
** Append a string to the string-buffer passed as the first argument.
**
** If nAppend is negative, then the length of the string zAppend is







|
>

|
|
|
|
|
|

|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|

<
|
>
>
|







438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473

474
475
476
477
478
479
480
481
482
483
484
  ** the set of phrases in the expression to populate the aPhrase[] array.
  */
  sIter.pCsr = pCsr;
  sIter.iCol = iCol;
  sIter.nSnippet = nSnippet;
  sIter.nPhrase = nList;
  sIter.iCurrent = -1;
  rc = fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void *)&sIter);
  if( rc==SQLITE_OK ){

    /* Set the *pmSeen output variable. */
    for(i=0; i<nList; i++){
      if( sIter.aPhrase[i].pHead ){
        *pmSeen |= (u64)1 << i;
      }
    }

    /* Loop through all candidate snippets. Store the best snippet in 
     ** *pFragment. Store its associated 'score' in iBestScore.
     */
    pFragment->iCol = iCol;
    while( !fts3SnippetNextCandidate(&sIter) ){
      int iPos;
      int iScore;
      u64 mCover;
      u64 mHighlite;
      fts3SnippetDetails(&sIter, mCovered, &iPos, &iScore, &mCover,&mHighlite);
      assert( iScore>=0 );
      if( iScore>iBestScore ){
        pFragment->iPos = iPos;
        pFragment->hlmask = mHighlite;
        pFragment->covered = mCover;
        iBestScore = iScore;
      }
    }


    *piScore = iBestScore;
  }
  sqlite3_free(sIter.aPhrase);
  return rc;
}


/*
** Append a string to the string-buffer passed as the first argument.
**
** If nAppend is negative, then the length of the string zAppend is