SQLite

Check-in [9d10a6846b]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Have NEAR queries use incremental merging. Fix issues surrounding the deferred token optimization.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | fts3-prefix-search
Files: files | file ages | folders
SHA1: 9d10a6846b12a9cc8fd4fdc3affd931a27218b5a
User & Date: dan 2011-06-07 18:35:45.780
Context
2011-06-08
18:39
Fix various issues to do with deferred tokens, NEAR expressions and matchinfo(). (check-in: 3972a787df user: dan tags: fts3-prefix-search)
2011-06-07
18:35
Have NEAR queries use incremental merging. Fix issues surrounding the deferred token optimization. (check-in: 9d10a6846b user: dan tags: fts3-prefix-search)
2011-06-06
18:14
Merge the latest trunk changes into the fts3-prefix-search branch. (check-in: 567dd84359 user: drh tags: fts3-prefix-search)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts3/fts3.c.
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
  }
  memset(p, 0, nByte);
  p->db = db;
  p->nColumn = nCol;
  p->nPendingData = 0;
  p->azColumn = (char **)&p[1];
  p->pTokenizer = pTokenizer;
  p->nNodeSize = 1000;
  p->nMaxPendingData = FTS3_MAX_PENDING_DATA;
  p->bHasDocsize = (isFts4 && bNoDocsize==0);
  p->bHasStat = isFts4;
  p->bDescIdx = bDescIdx;
  TESTONLY( p->inTransaction = -1 );
  TESTONLY( p->mxSavepoint = -1 );








<







1065
1066
1067
1068
1069
1070
1071

1072
1073
1074
1075
1076
1077
1078
  }
  memset(p, 0, nByte);
  p->db = db;
  p->nColumn = nCol;
  p->nPendingData = 0;
  p->azColumn = (char **)&p[1];
  p->pTokenizer = pTokenizer;

  p->nMaxPendingData = FTS3_MAX_PENDING_DATA;
  p->bHasDocsize = (isFts4 && bNoDocsize==0);
  p->bHasStat = isFts4;
  p->bDescIdx = bDescIdx;
  TESTONLY( p->inTransaction = -1 );
  TESTONLY( p->mxSavepoint = -1 );

1123
1124
1125
1126
1127
1128
1129

1130
1131
1132
1133
1134
1135
1136
  }

  /* Figure out the page-size for the database. This is required in order to
  ** estimate the cost of loading large doclists from the database (see 
  ** function sqlite3Fts3SegReaderCost() for details).
  */
  fts3DatabasePageSize(&rc, p);


  /* Declare the table schema to SQLite. */
  fts3DeclareVtab(&rc, p);

fts3_init_out:
  sqlite3_free(zPrefix);
  sqlite3_free(aFree);







>







1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
  }

  /* Figure out the page-size for the database. This is required in order to
  ** estimate the cost of loading large doclists from the database (see 
  ** function sqlite3Fts3SegReaderCost() for details).
  */
  fts3DatabasePageSize(&rc, p);
  p->nNodeSize = p->nPgsz-35;

  /* Declare the table schema to SQLite. */
  fts3DeclareVtab(&rc, p);

fts3_init_out:
  sqlite3_free(zPrefix);
  sqlite3_free(aFree);
1858
1859
1860
1861
1862
1863
1864
1865












1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
  }
  *p++ = 0x00;
  *pp = p;
  return 1;
}

/*
** Merge two position-lists as required by the NEAR operator.












*/
static int fts3PoslistNearMerge(
  char **pp,                      /* Output buffer */
  char *aTmp,                     /* Temporary buffer space */
  int nRight,                     /* Maximum difference in token positions */
  int nLeft,                      /* Maximum difference in token positions */
  char **pp1,                     /* IN/OUT: Left input list */
  char **pp2                      /* IN/OUT: Right input list */
){
  char *p1 = *pp1;
  char *p2 = *pp2;

  if( !pp ){
    if( fts3PoslistPhraseMerge(0, nRight, 0, 0, pp1, pp2) ) return 1;
    *pp1 = p1;
    *pp2 = p2;
    return fts3PoslistPhraseMerge(0, nLeft, 0, 0, pp2, pp1);
  }else{
    char *pTmp1 = aTmp;
    char *pTmp2;
    char *aTmp2;
    int res = 1;

    fts3PoslistPhraseMerge(&pTmp1, nRight, 0, 0, pp1, pp2);
    aTmp2 = pTmp2 = pTmp1;
    *pp1 = p1;
    *pp2 = p2;
    fts3PoslistPhraseMerge(&pTmp2, nLeft, 1, 0, pp2, pp1);
    if( pTmp1!=aTmp && pTmp2!=aTmp2 ){
      fts3PoslistMerge(pp, &aTmp, &aTmp2);
    }else if( pTmp1!=aTmp ){
      fts3PoslistCopy(pp, &aTmp);
    }else if( pTmp2!=aTmp2 ){
      fts3PoslistCopy(pp, &aTmp2);
    }else{
      res = 0;
    }

    return res;
  }
}

/* 
** A pointer to an instance of this structure is used as the context 
** argument to sqlite3Fts3SegReaderIterate()
*/
typedef struct TermSelect TermSelect;







|
>
>
>
>
>
>
>
>
>
>
>
>












<
<
<
<
<
<
|
|
|
|

|
|
|
|
|
|
|
|
|
|
|
|
|
|

|
<







1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889






1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910

1911
1912
1913
1914
1915
1916
1917
  }
  *p++ = 0x00;
  *pp = p;
  return 1;
}

/*
** Merge two position-lists as required by the NEAR operator. The argument
** position lists correspond to the left and right phrases of an expression 
** like:
**
**     "phrase 1" NEAR "phrase number 2"
**
** Position list *pp1 corresponds to the left-hand side of the NEAR 
** expression and *pp2 to the right. As usual, the indexes in the position 
** lists are the offsets of the last token in each phrase (tokens "1" and "2" 
** in the example above).
**
** The output position list - written to *pp - is a copy of *pp2 with those
** entries that are not sufficiently NEAR entries in *pp1 removed.
*/
static int fts3PoslistNearMerge(
  char **pp,                      /* Output buffer */
  char *aTmp,                     /* Temporary buffer space */
  int nRight,                     /* Maximum difference in token positions */
  int nLeft,                      /* Maximum difference in token positions */
  char **pp1,                     /* IN/OUT: Left input list */
  char **pp2                      /* IN/OUT: Right input list */
){
  char *p1 = *pp1;
  char *p2 = *pp2;







  char *pTmp1 = aTmp;
  char *pTmp2;
  char *aTmp2;
  int res = 1;

  fts3PoslistPhraseMerge(&pTmp1, nRight, 0, 0, pp1, pp2);
  aTmp2 = pTmp2 = pTmp1;
  *pp1 = p1;
  *pp2 = p2;
  fts3PoslistPhraseMerge(&pTmp2, nLeft, 1, 0, pp2, pp1);
  if( pTmp1!=aTmp && pTmp2!=aTmp2 ){
    fts3PoslistMerge(pp, &aTmp, &aTmp2);
  }else if( pTmp1!=aTmp ){
    fts3PoslistCopy(pp, &aTmp);
  }else if( pTmp2!=aTmp2 ){
    fts3PoslistCopy(pp, &aTmp2);
  }else{
    res = 0;
  }

  return res;

}

/* 
** A pointer to an instance of this structure is used as the context 
** argument to sqlite3Fts3SegReaderIterate()
*/
typedef struct TermSelect TermSelect;
3240
3241
3242
3243
3244
3245
3246

3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264

3265
3266
3267
3268
3269
3270
3271
3272
3273
** and merged incrementally. Otherwise, it has to be merged into an in-memory 
** doclist and then traversed.
*/
static void fts3EvalAllocateReaders(
  Fts3Cursor *pCsr, 
  Fts3Expr *pExpr, 
  int *pnToken,                   /* OUT: Total number of tokens in phrase. */

  int *pRc
){
  if( pExpr && SQLITE_OK==*pRc ){
    if( pExpr->eType==FTSQUERY_PHRASE ){
      int i;
      int nToken = pExpr->pPhrase->nToken;
      *pnToken += nToken;
      for(i=0; i<nToken; i++){
        Fts3PhraseToken *pToken = &pExpr->pPhrase->aToken[i];
        int rc = sqlite3Fts3TermSegReaderCursor(pCsr, 
            pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr
        );
        if( rc!=SQLITE_OK ){
          *pRc = rc;
          return;
        }
      }
    }else{

      fts3EvalAllocateReaders(pCsr, pExpr->pLeft, pnToken, pRc);
      fts3EvalAllocateReaders(pCsr, pExpr->pRight, pnToken, pRc);
    }
  }
}

static int fts3EvalPhraseLoad(
  Fts3Cursor *pCsr, 
  Fts3Phrase *p







>


















>
|
|







3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
** and merged incrementally. Otherwise, it has to be merged into an in-memory 
** doclist and then traversed.
*/
static void fts3EvalAllocateReaders(
  Fts3Cursor *pCsr, 
  Fts3Expr *pExpr, 
  int *pnToken,                   /* OUT: Total number of tokens in phrase. */
  int *pnOr,                      /* OUT: Total number of OR nodes in expr. */
  int *pRc
){
  if( pExpr && SQLITE_OK==*pRc ){
    if( pExpr->eType==FTSQUERY_PHRASE ){
      int i;
      int nToken = pExpr->pPhrase->nToken;
      *pnToken += nToken;
      for(i=0; i<nToken; i++){
        Fts3PhraseToken *pToken = &pExpr->pPhrase->aToken[i];
        int rc = sqlite3Fts3TermSegReaderCursor(pCsr, 
            pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr
        );
        if( rc!=SQLITE_OK ){
          *pRc = rc;
          return;
        }
      }
    }else{
      *pnOr += (pExpr->eType==FTSQUERY_OR);
      fts3EvalAllocateReaders(pCsr, pExpr->pLeft, pnToken, pnOr, pRc);
      fts3EvalAllocateReaders(pCsr, pExpr->pRight, pnToken, pnOr, pRc);
    }
  }
}

static int fts3EvalPhraseLoad(
  Fts3Cursor *pCsr, 
  Fts3Phrase *p
3541
3542
3543
3544
3545
3546
3547

3548
3549
3550
3551
3552
3553
3554
){
  int rc = SQLITE_OK;
  Fts3Doclist *pDL = &p->doclist;
  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;

  if( p->bIncr ){
    assert( p->nToken==1 );

    rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr, 
        &pDL->iDocid, &pDL->pList, &pDL->nList
    );
    if( rc==SQLITE_OK && !pDL->pList ){
      *pbEof = 1;
    }
  }else if( pCsr->bDesc!=pTab->bDescIdx && pDL->nAll ){







>







3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
){
  int rc = SQLITE_OK;
  Fts3Doclist *pDL = &p->doclist;
  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;

  if( p->bIncr ){
    assert( p->nToken==1 );
    assert( pDL->pNextDocid==0 );
    rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr, 
        &pDL->iDocid, &pDL->pList, &pDL->nList
    );
    if( rc==SQLITE_OK && !pDL->pList ){
      *pbEof = 1;
    }
  }else if( pCsr->bDesc!=pTab->bDescIdx && pDL->nAll ){
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
      int nToken = pExpr->pPhrase->nToken;
      for(i=0; i<nToken; i++){
        if( pExpr->pPhrase->aToken[i].pDeferred==0 ) break;
      }
      pExpr->bDeferred = (i==nToken);
      *pRc = fts3EvalPhraseStart(pCsr, bOptOk, pExpr->pPhrase);
    }else{
      if( pExpr->eType==FTSQUERY_NEAR ){
        bOptOk = 0;
      }
      fts3EvalStartReaders(pCsr, pExpr->pLeft, bOptOk, pRc);
      fts3EvalStartReaders(pCsr, pExpr->pRight, bOptOk, pRc);
      pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred);
    }
  }
}








<
<
<







3606
3607
3608
3609
3610
3611
3612



3613
3614
3615
3616
3617
3618
3619
      int nToken = pExpr->pPhrase->nToken;
      for(i=0; i<nToken; i++){
        if( pExpr->pPhrase->aToken[i].pDeferred==0 ) break;
      }
      pExpr->bDeferred = (i==nToken);
      *pRc = fts3EvalPhraseStart(pCsr, bOptOk, pExpr->pPhrase);
    }else{



      fts3EvalStartReaders(pCsr, pExpr->pLeft, bOptOk, pRc);
      fts3EvalStartReaders(pCsr, pExpr->pRight, bOptOk, pRc);
      pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred);
    }
  }
}

3696
3697
3698
3699
3700
3701
3702

3703
3704
3705
3706
3707
3708

3709
3710

3711
3712
3713
3714
3715
3716
3717
3718

3719
3720
3721
3722
3723





3724
3725
3726
3727
3728
3729
3730
3731
3732
    }
  }
}

typedef struct Fts3TokenAndCost Fts3TokenAndCost;
struct Fts3TokenAndCost {
  Fts3PhraseToken *pToken;

  int nOvfl;
  int iCol;
};

static void fts3EvalTokenCosts(
  Fts3Cursor *pCsr, 

  Fts3Expr *pExpr, 
  Fts3TokenAndCost **ppTC,

  int *pRc
){
  if( *pRc==SQLITE_OK && pExpr ){
    if( pExpr->eType==FTSQUERY_PHRASE ){
      Fts3Phrase *pPhrase = pExpr->pPhrase;
      int i;
      for(i=0; *pRc==SQLITE_OK && i<pPhrase->nToken; i++){
        Fts3TokenAndCost *pTC = (*ppTC)++;

        pTC->pToken = &pPhrase->aToken[i];
        pTC->iCol = pPhrase->iColumn;
        *pRc = sqlite3Fts3MsrOvfl(pCsr, pTC->pToken->pSegcsr, &pTC->nOvfl);
      }
    }else if( pExpr->eType==FTSQUERY_AND ){





      fts3EvalTokenCosts(pCsr, pExpr->pLeft, ppTC, pRc);
      fts3EvalTokenCosts(pCsr, pExpr->pRight, ppTC, pRc);
    }
  }
}

static int fts3EvalAverageDocsize(Fts3Cursor *pCsr, int *pnPage){
  if( pCsr->nRowAvg==0 ){
    /* The average document size, which is required to calculate the cost







>






>


>








>




|
>
>
>
>
>
|
|







3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
    }
  }
}

typedef struct Fts3TokenAndCost Fts3TokenAndCost;
struct Fts3TokenAndCost {
  Fts3PhraseToken *pToken;
  Fts3Expr *pRoot;
  int nOvfl;
  int iCol;
};

static void fts3EvalTokenCosts(
  Fts3Cursor *pCsr, 
  Fts3Expr *pRoot, 
  Fts3Expr *pExpr, 
  Fts3TokenAndCost **ppTC,
  Fts3Expr ***ppOr,
  int *pRc
){
  if( *pRc==SQLITE_OK && pExpr ){
    if( pExpr->eType==FTSQUERY_PHRASE ){
      Fts3Phrase *pPhrase = pExpr->pPhrase;
      int i;
      for(i=0; *pRc==SQLITE_OK && i<pPhrase->nToken; i++){
        Fts3TokenAndCost *pTC = (*ppTC)++;
        pTC->pRoot = pRoot;
        pTC->pToken = &pPhrase->aToken[i];
        pTC->iCol = pPhrase->iColumn;
        *pRc = sqlite3Fts3MsrOvfl(pCsr, pTC->pToken->pSegcsr, &pTC->nOvfl);
      }
    }else if( pExpr->eType!=FTSQUERY_NOT ){
      if( pExpr->eType==FTSQUERY_OR ){
        pRoot = pExpr;
        **ppOr = pExpr;
        (*ppOr)++;
      }
      fts3EvalTokenCosts(pCsr, pRoot, pExpr->pLeft, ppTC, ppOr, pRc);
      fts3EvalTokenCosts(pCsr, pRoot, pExpr->pRight, ppTC, ppOr, pRc);
    }
  }
}

static int fts3EvalAverageDocsize(Fts3Cursor *pCsr, int *pnPage){
  if( pCsr->nRowAvg==0 ){
    /* The average document size, which is required to calculate the cost
3768
3769
3770
3771
3772
3773
3774
3775









































































3776
3777
3778
3779

3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805

3806





3807
3808
3809
3810
3811
3812
3813

3814
3815
3816
3817

3818






3819
3820
3821
3822
3823
3824
3825
3826
3827

3828
3829
3830
3831
3832
3833
3834
    rc = sqlite3_reset(pStmt);
    if( rc!=SQLITE_OK ) return rc;
  }

  *pnPage = pCsr->nRowAvg;
  return SQLITE_OK;
}










































































int sqlite3Fts3EvalStart(Fts3Cursor *pCsr, Fts3Expr *pExpr, int bOptOk){
  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
  int rc = SQLITE_OK;
  int nToken = 0;


  /* Allocate a MultiSegReader for each token in the expression. */
  fts3EvalAllocateReaders(pCsr, pExpr, &nToken, &rc);

  /* Call fts3EvalPhraseStart() on all phrases in the expression. TODO:
  ** This call will eventually also be responsible for determining which
  ** tokens are 'deferred' until the document text is loaded into memory.
  **
  ** Each token in each phrase is dealt with using one of the following
  ** three strategies:
  **
  **   1. Entire doclist loaded into memory as part of the
  **      fts3EvalStartReaders() call.
  **
  **   2. Doclist loaded into memory incrementally, as part of each
  **      sqlite3Fts3EvalNext() call.
  **
  **   3. Token doclist is never loaded. Instead, documents are loaded into
  **      memory and scanned for the token as part of the sqlite3Fts3EvalNext()
  **      call. This is known as a "deferred" token.
  */

  /* If bOptOk is true, check if there are any tokens that can be 
  ** deferred (strategy 3). */
  if( rc==SQLITE_OK && bOptOk && nToken>1 && pTab->bHasStat ){
    Fts3TokenAndCost *aTC;

    aTC = (Fts3TokenAndCost *)sqlite3_malloc(sizeof(Fts3TokenAndCost) * nToken);





    if( !aTC ){
      rc = SQLITE_NOMEM;
    }else{
      int ii;
      int nDocEst = 0;
      int nDocSize;
      Fts3TokenAndCost *pTC = aTC;


      rc = fts3EvalAverageDocsize(pCsr, &nDocSize);
      fts3EvalTokenCosts(pCsr, pExpr, &pTC, &rc);
      nToken = pTC-aTC;








      for(ii=0; rc==SQLITE_OK && ii<nToken; ii++){
        int jj;
        pTC = 0;
        for(jj=0; jj<nToken; jj++){
          if( aTC[jj].pToken && (!pTC || aTC[jj].nOvfl<pTC->nOvfl) ){
            pTC = &aTC[jj];
          }
        }
        assert( pTC );


        /* At this point pTC points to the cheapest remaining token. */
        if( ii==0 ){
          if( pTC->nOvfl ){
            nDocEst = (pTC->nOvfl * pTab->nPgsz + pTab->nPgsz) / 10;
          }else{
            /* TODO: Fix this so that the doclist need not be read twice. */








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>




>


|



















|
|


>
|
>
>
>
>
>




<


>

<
|

>

>
>
>
>
>
>









>







3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904

3905
3906
3907
3908

3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
    rc = sqlite3_reset(pStmt);
    if( rc!=SQLITE_OK ) return rc;
  }

  *pnPage = pCsr->nRowAvg;
  return SQLITE_OK;
}

static int fts3EvalSelectDeferred(
  Fts3Cursor *pCsr,
  Fts3Expr *pRoot,
  Fts3TokenAndCost *aTC,
  int nTC
){
  int nDocSize = 0;
  int nDocEst = 0;
  int rc = SQLITE_OK;
  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
  int ii;

  int nOvfl = 0;
  int nTerm = 0;

  for(ii=0; ii<nTC; ii++){
    if( aTC[ii].pRoot==pRoot ){
      nOvfl += aTC[ii].nOvfl;
      nTerm++;
    }
  }
  if( nOvfl==0 || nTerm<2 ) return SQLITE_OK;

  rc = fts3EvalAverageDocsize(pCsr, &nDocSize);

  for(ii=0; ii<nTerm && rc==SQLITE_OK; ii++){
    int jj;
    Fts3TokenAndCost *pTC = 0;

    for(jj=0; jj<nTC; jj++){
      if( aTC[jj].pToken && aTC[jj].pRoot==pRoot 
       && (!pTC || aTC[jj].nOvfl<pTC->nOvfl) 
      ){
        pTC = &aTC[jj];
      }
    }
    assert( pTC );

    /* At this point pTC points to the cheapest remaining token. */
    if( ii==0 ){
      if( pTC->nOvfl ){
        nDocEst = (pTC->nOvfl * pTab->nPgsz + pTab->nPgsz) / 10;
      }else{
        /* TODO: Fix this so that the doclist need not be read twice. */
        Fts3PhraseToken *pToken = pTC->pToken;
        int nList = 0;
        char *pList = 0;
        rc = fts3TermSelect(pTab, pToken, pTC->iCol, 1, &nList, &pList);
        if( rc==SQLITE_OK ){
          nDocEst = fts3DoclistCountDocids(1, pList, nList);
        }
        sqlite3_free(pList);
        if( rc==SQLITE_OK ){
          rc = sqlite3Fts3TermSegReaderCursor(pCsr, 
              pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr
          );
        }
      }
    }else{
      if( pTC->nOvfl>=(nDocEst*nDocSize) ){
        Fts3PhraseToken *pToken = pTC->pToken;
        rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol);
        fts3SegReaderCursorFree(pToken->pSegcsr);
        pToken->pSegcsr = 0;
      }
      nDocEst = 1 + (nDocEst/4);
    }
    pTC->pToken = 0;
  }

  return rc;
}

int sqlite3Fts3EvalStart(Fts3Cursor *pCsr, Fts3Expr *pExpr, int bOptOk){
  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
  int rc = SQLITE_OK;
  int nToken = 0;
  int nOr = 0;

  /* Allocate a MultiSegReader for each token in the expression. */
  fts3EvalAllocateReaders(pCsr, pExpr, &nToken, &nOr, &rc);

  /* Call fts3EvalPhraseStart() on all phrases in the expression. TODO:
  ** This call will eventually also be responsible for determining which
  ** tokens are 'deferred' until the document text is loaded into memory.
  **
  ** Each token in each phrase is dealt with using one of the following
  ** three strategies:
  **
  **   1. Entire doclist loaded into memory as part of the
  **      fts3EvalStartReaders() call.
  **
  **   2. Doclist loaded into memory incrementally, as part of each
  **      sqlite3Fts3EvalNext() call.
  **
  **   3. Token doclist is never loaded. Instead, documents are loaded into
  **      memory and scanned for the token as part of the sqlite3Fts3EvalNext()
  **      call. This is known as a "deferred" token.
  */

  /* If bOptOk is true, check if there are any tokens that should be deferred.
  */
  if( rc==SQLITE_OK && bOptOk && nToken>1 && pTab->bHasStat ){
    Fts3TokenAndCost *aTC;
    Fts3Expr **apOr;
    aTC = (Fts3TokenAndCost *)sqlite3_malloc(
        sizeof(Fts3TokenAndCost) * nToken
      + sizeof(Fts3Expr *) * nOr
    );
    apOr = (Fts3Expr **)&aTC[nToken];

    if( !aTC ){
      rc = SQLITE_NOMEM;
    }else{
      int ii;

      int nDocSize;
      Fts3TokenAndCost *pTC = aTC;
      Fts3Expr **ppOr = apOr;


      fts3EvalTokenCosts(pCsr, 0, pExpr, &pTC, &ppOr, &rc);
      nToken = pTC-aTC;
      nOr = ppOr-apOr;

      rc = fts3EvalSelectDeferred(pCsr, 0, aTC, nToken);
      for(ii=0; rc==SQLITE_OK && ii<nOr; ii++){
        rc = fts3EvalSelectDeferred(pCsr, apOr[ii], aTC, nToken);
      }

#if 0
      for(ii=0; rc==SQLITE_OK && ii<nToken; ii++){
        int jj;
        pTC = 0;
        for(jj=0; jj<nToken; jj++){
          if( aTC[jj].pToken && (!pTC || aTC[jj].nOvfl<pTC->nOvfl) ){
            pTC = &aTC[jj];
          }
        }
        assert( pTC );


        /* At this point pTC points to the cheapest remaining token. */
        if( ii==0 ){
          if( pTC->nOvfl ){
            nDocEst = (pTC->nOvfl * pTab->nPgsz + pTab->nPgsz) / 10;
          }else{
            /* TODO: Fix this so that the doclist need not be read twice. */
3853
3854
3855
3856
3857
3858
3859


3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879




























































































3880
3881
3882
3883
3884
3885
3886
            fts3SegReaderCursorFree(pToken->pSegcsr);
            pToken->pSegcsr = 0;
          }
          nDocEst = 1 + (nDocEst/4);
        }
        pTC->pToken = 0;
      }


      sqlite3_free(aTC);
    }
  }

  fts3EvalStartReaders(pCsr, pExpr, bOptOk, &rc);

  /* Fix the results of NEAR expressions. */
  fts3EvalNearTrim(pCsr, pExpr, &rc);

  return rc;
}

static void fts3EvalFreeDeferredDoclist(Fts3Phrase *pPhrase){
  if( pPhrase->doclist.bFreeList ){
    sqlite3_free(pPhrase->doclist.pList);
    pPhrase->doclist.pList = 0;
    pPhrase->doclist.nList = 0;
    pPhrase->doclist.bFreeList = 0;
  }
}





























































































/*
** This macro is used by the fts3EvalNext() function. The two arguments are
** 64-bit docid values. If the current query is "ORDER BY docid ASC", then
** the macro returns (i1 - i2). Or if it is "ORDER BY docid DESC", then
** it returns (i2 - i1). This allows the same code to be used for merging
** doclists in ascending or descending order.







>
>





<
<
<
<











>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967




3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
            fts3SegReaderCursorFree(pToken->pSegcsr);
            pToken->pSegcsr = 0;
          }
          nDocEst = 1 + (nDocEst/4);
        }
        pTC->pToken = 0;
      }
#endif

      sqlite3_free(aTC);
    }
  }

  fts3EvalStartReaders(pCsr, pExpr, bOptOk, &rc);




  return rc;
}

static void fts3EvalFreeDeferredDoclist(Fts3Phrase *pPhrase){
  if( pPhrase->doclist.bFreeList ){
    sqlite3_free(pPhrase->doclist.pList);
    pPhrase->doclist.pList = 0;
    pPhrase->doclist.nList = 0;
    pPhrase->doclist.bFreeList = 0;
  }
}

static int fts3EvalNearTrim2(
  int nNear,
  char *aTmp,                     /* Temporary space to use */
  char **paPoslist,               /* IN/OUT: Position list */
  int *pnToken,                   /* IN/OUT: Tokens in phrase of *paPoslist */
  Fts3Phrase *pPhrase             /* The phrase object to trim the doclist of */
){
  int nParam1 = nNear + pPhrase->nToken;
  int nParam2 = nNear + *pnToken;
  char *p2; 
  char *pOut; 
  int res;

  p2 = pOut = pPhrase->doclist.pList;
  res = fts3PoslistNearMerge(
    &pOut, aTmp, nParam1, nParam2, paPoslist, &p2
  );
  pPhrase->doclist.nList = pOut - pPhrase->doclist.pList;
  *paPoslist = pPhrase->doclist.pList;
  *pnToken = pPhrase->nToken;

  return res;
}

static int fts3EvalNearTest(Fts3Expr *pExpr, int *pRc){
  int res = 1;

  /* The following block runs if pExpr is the root of a NEAR query.
  ** For example, the query:
  **
  **         "w" NEAR "x" NEAR "y" NEAR "z"
  **
  ** which is represented in tree form as:
  **
  **                               |
  **                          +--NEAR--+      <-- root of NEAR query
  **                          |        |
  **                     +--NEAR--+   "z"
  **                     |        |
  **                +--NEAR--+   "y"
  **                |        |
  **               "w"      "x"
  **
  ** The right-hand child of a NEAR node is always a phrase. The 
  ** left-hand child may be either a phrase or a NEAR node. There are
  ** no exceptions to this.
  */
  if( *pRc==SQLITE_OK 
   && pExpr->eType==FTSQUERY_NEAR 
   && pExpr->bEof==0
   && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR)
  ){
    Fts3Expr *p; 
    int nTmp = 0;                 /* Bytes of temp space */
    char *aTmp;                   /* Temp space for PoslistNearMerge() */

    /* Allocate temporary working space. */
    for(p=pExpr; p->pLeft; p=p->pLeft){
      nTmp += p->pRight->pPhrase->doclist.nList;
    }
    nTmp += p->pPhrase->doclist.nList;
    aTmp = sqlite3_malloc(nTmp*2);
    if( !aTmp ){
      *pRc = SQLITE_NOMEM;
      res = 0;
    }else{
      char *aPoslist = p->pPhrase->doclist.pList;
      int nToken = p->pPhrase->nToken;

      for(p=p->pParent;res && p && p->eType==FTSQUERY_NEAR; p=p->pParent){
        Fts3Phrase *pPhrase = p->pRight->pPhrase;
        int nNear = p->nNear;
        res = fts3EvalNearTrim2(nNear, aTmp, &aPoslist, &nToken, pPhrase);
      }
  
      aPoslist = pExpr->pRight->pPhrase->doclist.pList;
      nToken = pExpr->pRight->pPhrase->nToken;
      for(p=pExpr->pLeft; p && res; p=p->pLeft){
        int nNear = p->pParent->nNear;
        Fts3Phrase *pPhrase = (
            p->eType==FTSQUERY_NEAR ? p->pRight->pPhrase : p->pPhrase
        );
        res = fts3EvalNearTrim2(nNear, aTmp, &aPoslist, &nToken, pPhrase);
      }
    }

    sqlite3_free(aTmp);
  }

  return res;
}

/*
** This macro is used by the fts3EvalNext() function. The two arguments are
** 64-bit docid values. If the current query is "ORDER BY docid ASC", then
** the macro returns (i1 - i2). Or if it is "ORDER BY docid DESC", then
** it returns (i2 - i1). This allows the same code to be used for merging
** doclists in ascending or descending order.
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
    pExpr->bStart = 1;

    switch( pExpr->eType ){
      case FTSQUERY_NEAR:
      case FTSQUERY_AND: {
        Fts3Expr *pLeft = pExpr->pLeft;
        Fts3Expr *pRight = pExpr->pRight;

        assert( !pLeft->bDeferred || !pRight->bDeferred );
        if( pLeft->bDeferred ){
          fts3EvalNext(pCsr, pRight, pRc);
          pExpr->iDocid = pRight->iDocid;
          pExpr->bEof = pRight->bEof;
        }else if( pRight->bDeferred ){
          fts3EvalNext(pCsr, pLeft, pRc);







<







4088
4089
4090
4091
4092
4093
4094

4095
4096
4097
4098
4099
4100
4101
    pExpr->bStart = 1;

    switch( pExpr->eType ){
      case FTSQUERY_NEAR:
      case FTSQUERY_AND: {
        Fts3Expr *pLeft = pExpr->pLeft;
        Fts3Expr *pRight = pExpr->pRight;

        assert( !pLeft->bDeferred || !pRight->bDeferred );
        if( pLeft->bDeferred ){
          fts3EvalNext(pCsr, pRight, pRc);
          pExpr->iDocid = pRight->iDocid;
          pExpr->bEof = pRight->bEof;
        }else if( pRight->bDeferred ){
          fts3EvalNext(pCsr, pLeft, pRc);
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
            if( iDiff==0 ) break;
            if( iDiff<0 ){
              fts3EvalNext(pCsr, pLeft, pRc);
            }else{
              fts3EvalNext(pCsr, pRight, pRc);
            }
          }
    
          pExpr->iDocid = pLeft->iDocid;
          pExpr->bEof = (pLeft->bEof || pRight->bEof);
        }
        break;
      }
  
      case FTSQUERY_OR: {







|







4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
            if( iDiff==0 ) break;
            if( iDiff<0 ){
              fts3EvalNext(pCsr, pLeft, pRc);
            }else{
              fts3EvalNext(pCsr, pRight, pRc);
            }
          }

          pExpr->iDocid = pLeft->iDocid;
          pExpr->bEof = (pLeft->bEof || pRight->bEof);
        }
        break;
      }
  
      case FTSQUERY_OR: {
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010

4011
























4012
4013
4014
4015
4016
4017
4018

4019

4020
4021
4022
4023
4024
4025
4026
4027
4028

4029
4030
4031
4032
4033



4034
4035
4036
4037
4038
4039
4040
      }

      default: {
        Fts3Phrase *pPhrase = pExpr->pPhrase;
        fts3EvalFreeDeferredDoclist(pPhrase);
        *pRc = fts3EvalPhraseNext(pCsr, pPhrase, &pExpr->bEof);
        pExpr->iDocid = pPhrase->doclist.iDocid;
#if 0
        printf("token \"%.*s\" docid=%lld\n", 
            pPhrase->aToken[0].n, pPhrase->aToken[0].z, pExpr->iDocid
        );
#endif
        
        break;
      }
    }
  }
}

static int fts3EvalDeferredTest(Fts3Cursor *pCsr, Fts3Expr *pExpr, int *pRc){
  int bHit = 0;
  if( *pRc==SQLITE_OK ){
    switch( pExpr->eType ){
      case FTSQUERY_NEAR:
      case FTSQUERY_AND:
        bHit = (
            fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc)
         && fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc)

        );
























        break;

      case FTSQUERY_OR:
        bHit = (
            fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc)
         || fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc)
        );

        break;


      case FTSQUERY_NOT:
        bHit = (
            fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc)
         && !fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc)
        );
        break;

      default: {

        Fts3Phrase *pPhrase = pExpr->pPhrase;
        fts3EvalFreeDeferredDoclist(pPhrase);
        *pRc = fts3EvalDeferredPhrase(pCsr, pPhrase);
        bHit = (pPhrase->doclist.pList!=0);
        pExpr->iDocid = pCsr->iPrevId;



        break;
      }
    }
  }
  return bHit;
}








<
<
<
<
<
<







|







>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


|
<
|
|
<
>

>









>
|
|
|
|
|
>
>
>







4173
4174
4175
4176
4177
4178
4179






4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223

4224
4225

4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
      }

      default: {
        Fts3Phrase *pPhrase = pExpr->pPhrase;
        fts3EvalFreeDeferredDoclist(pPhrase);
        *pRc = fts3EvalPhraseNext(pCsr, pPhrase, &pExpr->bEof);
        pExpr->iDocid = pPhrase->doclist.iDocid;






        break;
      }
    }
  }
}

static int fts3EvalDeferredTest(Fts3Cursor *pCsr, Fts3Expr *pExpr, int *pRc){
  int bHit = 1;
  if( *pRc==SQLITE_OK ){
    switch( pExpr->eType ){
      case FTSQUERY_NEAR:
      case FTSQUERY_AND:
        bHit = (
            fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc)
         && fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc)
         && fts3EvalNearTest(pExpr, pRc)
        );

        /* If this is a NEAR node and the NEAR expression does not match
        ** any rows, zero the doclist for all phrases involved in the NEAR.
        ** This is because the snippet(), offsets() and matchinfo() functions
        ** are not supposed to recognize any instances of phrases that are
        ** part of unmatched NEAR queries. For example if this expression:
        **
        **    ... MATCH 'a OR (b NEAR c)'
        **
        ** is matched against a row containing:
        **
        **        'a b d e'
        **
        ** then any snippet() should ony highlight the "a" term, not the "b"
        ** (as "b" is part of a non-matching NEAR clause).
        */
        if( pExpr->eType==FTSQUERY_NEAR && bHit==0 ){
          Fts3Expr *p;
          for(p=pExpr; p->pPhrase==0; p=p->pLeft){
            p->pRight->pPhrase->doclist.pList = 0;
          }
          p->pPhrase->doclist.pList = 0;
        }

        break;

      case FTSQUERY_OR: {

        int bHit1 = fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc);
        int bHit2 = fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc);

        bHit = bHit1 || bHit2;
        break;
      }

      case FTSQUERY_NOT:
        bHit = (
            fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc)
         && !fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc)
        );
        break;

      default: {
        if( pCsr->pDeferred ){
          Fts3Phrase *pPhrase = pExpr->pPhrase;
          fts3EvalFreeDeferredDoclist(pPhrase);
          *pRc = fts3EvalDeferredPhrase(pCsr, pPhrase);
          bHit = (pPhrase->doclist.pList!=0);
          pExpr->iDocid = pCsr->iPrevId;
        }else{
          bHit = (pExpr->bEof==0 && pExpr->iDocid==pCsr->iPrevId);
        }
        break;
      }
    }
  }
  return bHit;
}

4048
4049
4050
4051
4052
4053
4054
4055

4056
4057
4058
4059

4060
4061
4062
4063
4064
4065
4066
**
** Or, if no error occurs and it seems the current row does match the FTS
** query, return 0.
*/
static int fts3EvalLoadDeferred(Fts3Cursor *pCsr, int *pRc){
  int rc = *pRc;
  int bMiss = 0;
  if( rc==SQLITE_OK && pCsr->pDeferred ){

    rc = fts3CursorSeek(0, pCsr);
    if( rc==SQLITE_OK ){
      sqlite3Fts3FreeDeferredDoclists(pCsr);
      rc = sqlite3Fts3CacheDeferredDoclists(pCsr);

    }
    bMiss = (0==fts3EvalDeferredTest(pCsr, pCsr->pExpr, &rc));
    sqlite3Fts3FreeDeferredDoclists(pCsr);
    *pRc = rc;
  }
  return (rc==SQLITE_OK && bMiss);
}







|
>
|
|
<
|
>







4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271

4272
4273
4274
4275
4276
4277
4278
4279
4280
**
** Or, if no error occurs and it seems the current row does match the FTS
** query, return 0.
*/
static int fts3EvalLoadDeferred(Fts3Cursor *pCsr, int *pRc){
  int rc = *pRc;
  int bMiss = 0;
  if( rc==SQLITE_OK ){
    if( pCsr->pDeferred ){
      rc = fts3CursorSeek(0, pCsr);
      if( rc==SQLITE_OK ){

        rc = sqlite3Fts3CacheDeferredDoclists(pCsr);
      }
    }
    bMiss = (0==fts3EvalDeferredTest(pCsr, pCsr->pExpr, &rc));
    sqlite3Fts3FreeDeferredDoclists(pCsr);
    *pRc = rc;
  }
  return (rc==SQLITE_OK && bMiss);
}
4145
4146
4147
4148
4149
4150
4151







4152
4153
4154
4155
4156
4157
4158
    if( pExpr->bStart && !pExpr->bEof ){
      pExpr->bStart = 0;
      while( rc==SQLITE_OK && (pExpr->bStart==0 || pExpr->iDocid!=iDocid) ){
        fts3EvalNext(pCsr, pExpr, &rc);
        assert( !pExpr->bEof );
      }
    }







  }

  *pnList = pPhrase->doclist.nAll;
  *ppList = pPhrase->doclist.aAll;
  return rc;
}








>
>
>
>
>
>
>







4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
    if( pExpr->bStart && !pExpr->bEof ){
      pExpr->bStart = 0;
      while( rc==SQLITE_OK && (pExpr->bStart==0 || pExpr->iDocid!=iDocid) ){
        fts3EvalNext(pCsr, pExpr, &rc);
        assert( !pExpr->bEof );
      }
    }
  }

  if( rc==SQLITE_OK 
   && pExpr->pParent 
   && pExpr->pParent->eType==FTSQUERY_NEAR 
  ){

  }

  *pnList = pPhrase->doclist.nAll;
  *ppList = pPhrase->doclist.aAll;
  return rc;
}

Changes to test/fts3defer2.test.
44
45
46
47
48
49
50




51
52
53
54

55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
  INSERT INTO t1(t1) VALUES('optimize');
}
do_execsql_test 1.1.4 {
  SELECT count(*) FROM t1_segments WHERE length(block)>10000;
  UPDATE t1_segments SET block = zeroblob(length(block)) WHERE length(block)>10000;
} {2}





do_execsql_test 1.2.1 {
  SELECT content FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)';
} {{a b c d e f a x y}}


do_execsql_test 1.2.2 {
  SELECT snippet(t1, '[', ']'), offsets(t1), mit(matchinfo(t1, 'pcxnal'))
  FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)';
} [list                              \
   {a b c d [e] [f] [a] x y}         \
   {0 1 8 1 0 0 10 1 0 2 12 1}       \
   [list 3 1   1 1 1   1 8 8   1 8 8   8 5001 9]
]

do_execsql_test 1.2.3 {
  SELECT snippet(t1, '[', ']'), offsets(t1), mit(matchinfo(t1, 'pcxnal'))
  FROM t1 WHERE t1 MATCH 'f (e NEAR/3 a)';
} [list                                 \
   {[a] b c d [e] [f] [a] x y}          \
   {0 2 0 1 0 1 8 1 0 0 10 1 0 2 12 1}  \
   [list 3 1   1 1 1   1 8 8   2 8 8   8 5001 9]
]

do_execsql_test 1.3.1 { DROP TABLE t1 }

#-----------------------------------------------------------------------------
# Test cases fts3defer2-2.* focus specifically on the matchinfo function.
# 







>
>
>
>




>






|








|







44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
  INSERT INTO t1(t1) VALUES('optimize');
}
do_execsql_test 1.1.4 {
  SELECT count(*) FROM t1_segments WHERE length(block)>10000;
  UPDATE t1_segments SET block = zeroblob(length(block)) WHERE length(block)>10000;
} {2}

do_execsql_test 1.2.0 {
  SELECT content FROM t1 WHERE t1 MATCH 'f (e a)';
} {{a b c d e f a x y}}

do_execsql_test 1.2.1 {
  SELECT content FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)';
} {{a b c d e f a x y}}

breakpoint
do_execsql_test 1.2.2 {
  SELECT snippet(t1, '[', ']'), offsets(t1), mit(matchinfo(t1, 'pcxnal'))
  FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)';
} [list                              \
   {a b c d [e] [f] [a] x y}         \
   {0 1 8 1 0 0 10 1 0 2 12 1}       \
   [list 3 1   1 1 1   1 1 1   1 8 8   8 5001 9]
]

do_execsql_test 1.2.3 {
  SELECT snippet(t1, '[', ']'), offsets(t1), mit(matchinfo(t1, 'pcxnal'))
  FROM t1 WHERE t1 MATCH 'f (e NEAR/3 a)';
} [list                                 \
   {[a] b c d [e] [f] [a] x y}          \
   {0 2 0 1 0 1 8 1 0 0 10 1 0 2 12 1}  \
   [list 3 1   1 1 1   1 1 1   2 8 8   8 5001 9]
]

do_execsql_test 1.3.1 { DROP TABLE t1 }

#-----------------------------------------------------------------------------
# Test cases fts3defer2-2.* focus specifically on the matchinfo function.
# 
95
96
97
98
99
100
101






102
103
104
105
106
107
108
109
110
  do_execsql_test 2.2.$tn.1 {
    SELECT mit(matchinfo(t2, 'pcxnal')) FROM t2 WHERE t2 MATCH 'a b';
  } [list                                          \
    [list 2 1  1 54 54  1 3 3  54 372 8]        \
    [list 2 1  1 54 54  1 3 3  54 372 7]        \
  ]







  set sqlite_fts3_enable_parentheses 1
  do_execsql_test 2.2.$tn.2 {
    SELECT mit(matchinfo(t2, 'x')) FROM t2 WHERE t2 MATCH 'g OR (g z)';
  } [list                                       \
    [list 1 2 2  1 2 2   1 54 54]               \
    [list 1 2 2  1 2 2   0 54 54]               \
  ]
  set sqlite_fts3_enable_parentheses 0
}







>
>
>
>
>
>

|







100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
  do_execsql_test 2.2.$tn.1 {
    SELECT mit(matchinfo(t2, 'pcxnal')) FROM t2 WHERE t2 MATCH 'a b';
  } [list                                          \
    [list 2 1  1 54 54  1 3 3  54 372 8]        \
    [list 2 1  1 54 54  1 3 3  54 372 7]        \
  ]

  do_execsql_test 2.2.$tn.2 {
    SELECT mit(matchinfo(t2, 'x')) FROM t2 WHERE t2 MATCH 'g z';
  } [list                                       \
    [list 1 2 2  1 54 54]                       \
  ]

  set sqlite_fts3_enable_parentheses 1
  do_execsql_test 2.2.$tn.3 {
    SELECT mit(matchinfo(t2, 'x')) FROM t2 WHERE t2 MATCH 'g OR (g z)';
  } [list                                       \
    [list 1 2 2  1 2 2   1 54 54]               \
    [list 1 2 2  1 2 2   0 54 54]               \
  ]
  set sqlite_fts3_enable_parentheses 0
}
Changes to test/fts3matchinfo.test.
64
65
66
67
68
69
70


71


72
73
74
75
76
77
78
  CREATE VIRTUAL TABLE t3 USING fts3(mtchinfo=fts3);
  INSERT INTO t3(mtchinfo) VALUES('Beside the lake, beneath the trees');
  SELECT mtchinfo FROM t3;
} {{Beside the lake, beneath the trees}}

do_execsql_test 3.2 {
  CREATE VIRTUAL TABLE xx USING FTS4;


  SELECT * FROM xx WHERE xx MATCH 'abc';


  SELECT * FROM xx WHERE xx MATCH 'a b c';
}


#--------------------------------------------------------------------------
# Proc [do_matchinfo_test] is used to test the FTSX matchinfo() function.
#







>
>

>
>







64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
  CREATE VIRTUAL TABLE t3 USING fts3(mtchinfo=fts3);
  INSERT INTO t3(mtchinfo) VALUES('Beside the lake, beneath the trees');
  SELECT mtchinfo FROM t3;
} {{Beside the lake, beneath the trees}}

do_execsql_test 3.2 {
  CREATE VIRTUAL TABLE xx USING FTS4;
}
do_execsql_test 3.3 {
  SELECT * FROM xx WHERE xx MATCH 'abc';
}
do_execsql_test 3.4 {
  SELECT * FROM xx WHERE xx MATCH 'a b c';
}


#--------------------------------------------------------------------------
# Proc [do_matchinfo_test] is used to test the FTSX matchinfo() function.
#