SQLite

Check-in [ad7feaed4c]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Begin changes to allow synonym support by adding multiple terms to a query (an alternative to adding multiple terms to the FTS index).
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | fts5-incompatible
Files: files | file ages | folders
SHA1: ad7feaed4cd6b1d6e6376bb82d1f5664ddd083f3
User & Date: dan 2015-08-31 20:06:06.235
Context
2015-09-01
18:08
Add tests for fts5 synonyms implemented by adding extra terms to queries. And fixes for the same. (check-in: dbcb73802b user: dan tags: fts5-incompatible)
2015-08-31
20:06
Begin changes to allow synonym support by adding multiple terms to a query (an alternative to adding multiple terms to the FTS index). (check-in: ad7feaed4c user: dan tags: fts5-incompatible)
2015-08-29
18:46
Add a test for an fts5 tokenizer that supports synonyms by adding multiple entries to the fts index. (check-in: 98d07d16ca user: dan tags: fts5-incompatible)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts5/fts5Int.h.
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
  /* Variables used only by sqlite3Fts5PoslistIterXXX() functions. */
  int iCol;                       /* If (iCol>=0), this column only */
  const u8 *a;                    /* Position list to iterate through */
  int n;                          /* Size of buffer at a[] in bytes */
  int i;                          /* Current offset in a[] */

  /* Output variables */
  int bEof;                       /* Set to true at EOF */
  i64 iPos;                       /* (iCol<<32) + iPos */
};
int sqlite3Fts5PoslistReaderInit(
  int iCol,                       /* If (iCol>=0), this column only */
  const u8 *a, int n,             /* Poslist buffer to iterate through */
  Fts5PoslistReader *pIter        /* Iterator object to initialize */
);







|







232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
  /* Variables used only by sqlite3Fts5PoslistIterXXX() functions. */
  int iCol;                       /* If (iCol>=0), this column only */
  const u8 *a;                    /* Position list to iterate through */
  int n;                          /* Size of buffer at a[] in bytes */
  int i;                          /* Current offset in a[] */

  /* Output variables */
  u8 bEof;                        /* Set to true at EOF */
  i64 iPos;                       /* (iCol<<32) + iPos */
};
int sqlite3Fts5PoslistReaderInit(
  int iCol,                       /* If (iCol>=0), this column only */
  const u8 *a, int n,             /* Poslist buffer to iterate through */
  Fts5PoslistReader *pIter        /* Iterator object to initialize */
);
Changes to ext/fts5/fts5_expr.c.
69
70
71
72
73
74
75

76
77
78
79
80
81
82
** An instance of the following structure represents a single search term
** or term prefix.
*/
struct Fts5ExprTerm {
  int bPrefix;                    /* True for a prefix term */
  char *zTerm;                    /* nul-terminated term */
  Fts5IndexIter *pIter;           /* Iterator for this term */

};

/*
** A phrase. One or more terms that must appear in a contiguous sequence
** within a document for it to match.
*/
struct Fts5ExprPhrase {







>







69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
** An instance of the following structure represents a single search term
** or term prefix.
*/
struct Fts5ExprTerm {
  int bPrefix;                    /* True for a prefix term */
  char *zTerm;                    /* nul-terminated term */
  Fts5IndexIter *pIter;           /* Iterator for this term */
  Fts5ExprTerm *pSynonym;         /* Pointer to first in list of synonyms */
};

/*
** A phrase. One or more terms that must appear in a contiguous sequence
** within a document for it to match.
*/
struct Fts5ExprPhrase {
177
178
179
180
181
182
183




184
185
186
187
188
189
190
      }
      pToken->n = (z2 - z);
      break;
    }

    default: {
      const char *z2;




      tok = FTS5_STRING;
      for(z2=&z[1]; sqlite3Fts5IsBareword(*z2); z2++);
      pToken->n = (z2 - z);
      if( pToken->n==2 && memcmp(pToken->p, "OR", 2)==0 )  tok = FTS5_OR;
      if( pToken->n==3 && memcmp(pToken->p, "NOT", 3)==0 ) tok = FTS5_NOT;
      if( pToken->n==3 && memcmp(pToken->p, "AND", 3)==0 ) tok = FTS5_AND;
      break;







>
>
>
>







178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
      }
      pToken->n = (z2 - z);
      break;
    }

    default: {
      const char *z2;
      if( sqlite3Fts5IsBareword(z[0])==0 ){
        sqlite3Fts5ParseError(pParse, "fts5: syntax error near \"%.1s\"", z);
        return FTS5_EOF;
      }
      tok = FTS5_STRING;
      for(z2=&z[1]; sqlite3Fts5IsBareword(*z2); z2++);
      pToken->n = (z2 - z);
      if( pToken->n==2 && memcmp(pToken->p, "OR", 2)==0 )  tok = FTS5_OR;
      if( pToken->n==3 && memcmp(pToken->p, "NOT", 3)==0 ) tok = FTS5_NOT;
      if( pToken->n==3 && memcmp(pToken->p, "AND", 3)==0 ) tok = FTS5_AND;
      break;
345
346
347
348
349
350
351






















































352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
static int fts5ExprColsetTest(Fts5ExprColset *pColset, int iCol){
  int i;
  for(i=0; i<pColset->nCol; i++){
    if( pColset->aiCol[i]==iCol ) return 1;
  }
  return 0;
}























































/*
** All individual term iterators in pPhrase are guaranteed to be valid and
** pointing to the same rowid when this function is called. This function 
** checks if the current rowid really is a match, and if so populates
** the pPhrase->poslist buffer accordingly. Output parameter *pbMatch
** is set to true if this is really a match, or false otherwise.
**
** SQLITE_OK is returned if an error occurs, or an SQLite error code 
** otherwise. It is not considered an error code if the current rowid is 
** not a match.
*/
static int fts5ExprPhraseIsMatch(
  Fts5Expr *pExpr,                /* Expression pPhrase belongs to */
  Fts5ExprColset *pColset,        /* Restrict matches to these columns */
  Fts5ExprPhrase *pPhrase,        /* Phrase object to initialize */
  int *pbMatch                    /* OUT: Set to true if really a match */
){
  Fts5PoslistWriter writer = {0};
  Fts5PoslistReader aStatic[4];
  Fts5PoslistReader *aIter = aStatic;







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>













|







350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
static int fts5ExprColsetTest(Fts5ExprColset *pColset, int iCol){
  int i;
  for(i=0; i<pColset->nCol; i++){
    if( pColset->aiCol[i]==iCol ) return 1;
  }
  return 0;
}

/*
** Argument pTerm must be a synonym iterator. Return the current rowid
** that it points to.
*/
static i64 fts5ExprSynonymRowid(Fts5ExprTerm *pTerm, int bDesc){
  i64 iRet;
  int bRetValid = 0;
  Fts5ExprTerm *p;

  assert( pTerm->pSynonym );
  assert( bDesc==0 || bDesc==1 );
  for(p=pTerm; p; p=p->pSynonym){
    if( 0==sqlite3Fts5IterEof(p->pIter) ){
      i64 iRowid = sqlite3Fts5IterRowid(p->pIter);
      if( bRetValid==0 || (bDesc!=(iRowid<iRet)) ){
        iRet = iRowid;
        bRetValid = 1;
      }
    }
  }

  assert( bRetValid );
  return iRet;
}

/*
** Argument pTerm must be a synonym iterator.
*/
static int fts5ExprSynonymPoslist(
  Fts5ExprTerm *pTerm, 
  i64 iRowid,
  u8 **pa, int *pn
){
  Fts5PoslistWriter writer = {0};
  Fts5PoslistReader aStatic[4];
  Fts5PoslistReader *aIter = aStatic;
  int nIter = 0;
  Fts5ExprTerm *p;

  assert( pTerm->pSynonym );
  for(p=pTerm; p; p=p->pSynonym){
    Fts5IndexIter *pIter = p->pIter;
    if( sqlite3Fts5IterEof(pIter)==0 && sqlite3Fts5IterRowid(pIter)==iRowid ){
      i64 dummy;
      int rc = sqlite3Fts5IterPoslist(pIter, (const u8**)pa, pn, &dummy);
      return rc;
    }
  }

  assert( 0 );
  return SQLITE_ERROR;
}


/*
** All individual term iterators in pPhrase are guaranteed to be valid and
** pointing to the same rowid when this function is called. This function 
** checks if the current rowid really is a match, and if so populates
** the pPhrase->poslist buffer accordingly. Output parameter *pbMatch
** is set to true if this is really a match, or false otherwise.
**
** SQLITE_OK is returned if an error occurs, or an SQLite error code 
** otherwise. It is not considered an error code if the current rowid is 
** not a match.
*/
static int fts5ExprPhraseIsMatch(
  Fts5ExprNode *pNode,            /* Node pPhrase belongs to */
  Fts5ExprColset *pColset,        /* Restrict matches to these columns */
  Fts5ExprPhrase *pPhrase,        /* Phrase object to initialize */
  int *pbMatch                    /* OUT: Set to true if really a match */
){
  Fts5PoslistWriter writer = {0};
  Fts5PoslistReader aStatic[4];
  Fts5PoslistReader *aIter = aStatic;
387
388
389
390
391
392
393

394
395
396



397

398
399
400
401
402
403
404
    int nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm;
    aIter = (Fts5PoslistReader*)sqlite3_malloc(nByte);
    if( !aIter ) return SQLITE_NOMEM;
  }

  /* Initialize a term iterator for each term in the phrase */
  for(i=0; i<pPhrase->nTerm; i++){

    i64 dummy;
    int n;
    const u8 *a;



    rc = sqlite3Fts5IterPoslist(pPhrase->aTerm[i].pIter, &a, &n, &dummy);

    if( rc || sqlite3Fts5PoslistReaderInit(iCol, a, n, &aIter[i]) ){
      goto ismatch_out;
    }
  }

  while( 1 ){
    int bMatch;







>



>
>
>
|
>







446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
    int nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm;
    aIter = (Fts5PoslistReader*)sqlite3_malloc(nByte);
    if( !aIter ) return SQLITE_NOMEM;
  }

  /* Initialize a term iterator for each term in the phrase */
  for(i=0; i<pPhrase->nTerm; i++){
    Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
    i64 dummy;
    int n;
    const u8 *a;
    if( pTerm->pSynonym ){
      rc = fts5ExprSynonymPoslist(pTerm, pNode->iRowid, (u8**)&a, &n);
    }else{
      rc = sqlite3Fts5IterPoslist(pTerm->pIter, &a, &n, &dummy);
    }
    if( rc || sqlite3Fts5PoslistReaderInit(iCol, a, n, &aIter[i]) ){
      goto ismatch_out;
    }
  }

  while( 1 ){
    int bMatch;
594
595
596
597
598
599
600
601
602
603
























604
605
606
607
608
609
610
611


612
613
614
615
616
617
618
*/
static int fts5ExprNearAdvanceFirst(
  Fts5Expr *pExpr,                /* Expression pPhrase belongs to */
  Fts5ExprNode *pNode,            /* FTS5_STRING or FTS5_TERM node */
  int bFromValid,
  i64 iFrom 
){
  Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter;
  int rc;

























  assert( Fts5NodeIsString(pNode) );
  if( bFromValid ){
    rc = sqlite3Fts5IterNextFrom(pIter, iFrom);
  }else{
    rc = sqlite3Fts5IterNext(pIter);
  }

  pNode->bEof = (rc || sqlite3Fts5IterEof(pIter));


  return rc;
}

/*
** Advance iterator pIter until it points to a value equal to or laster
** than the initial value of *piLast. If this means the iterator points
** to a value laster than *piLast, update *piLast to the new lastest value.







|


>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
|
|
|
|

|
>
>







658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
*/
static int fts5ExprNearAdvanceFirst(
  Fts5Expr *pExpr,                /* Expression pPhrase belongs to */
  Fts5ExprNode *pNode,            /* FTS5_STRING or FTS5_TERM node */
  int bFromValid,
  i64 iFrom 
){
  Fts5ExprTerm *pTerm = &pNode->pNear->apPhrase[0]->aTerm[0];
  int rc;

  if( pTerm->pSynonym ){
    int bEof = 1;
    Fts5ExprTerm *p;

    /* Find the firstest rowid any synonym points to. */
    i64 iRowid = fts5ExprSynonymRowid(pTerm, pExpr->bDesc);

    /* Advance each iterator that currently points to iRowid */
    for(p=pTerm; p; p=p->pSynonym){
      if( sqlite3Fts5IterEof(p->pIter)==0 ){
        bEof = 0;
        if( sqlite3Fts5IterRowid(p->pIter)==iRowid ){
          rc = sqlite3Fts5IterNext(p->pIter);
          if( rc!=SQLITE_OK ) break;
        }
      }
    }

    /* Set the EOF flag if either all synonym iterators are at EOF or an
    ** error has occurred.  */
    pNode->bEof = (rc || bEof);
  }else{
    Fts5IndexIter *pIter = pTerm->pIter;

    assert( Fts5NodeIsString(pNode) );
    if( bFromValid ){
      rc = sqlite3Fts5IterNextFrom(pIter, iFrom);
    }else{
      rc = sqlite3Fts5IterNext(pIter);
    }

    pNode->bEof = (rc || sqlite3Fts5IterEof(pIter));
  }

  return rc;
}

/*
** Advance iterator pIter until it points to a value equal to or laster
** than the initial value of *piLast. If this means the iterator points
** to a value laster than *piLast, update *piLast to the new lastest value.
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
  /* Check that each phrase in the nearset matches the current row.
  ** Populate the pPhrase->poslist buffers at the same time. If any
  ** phrase is not a match, break out of the loop early.  */
  for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
    Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
    if( pPhrase->nTerm>1 || pNear->pColset ){
      int bMatch = 0;
      rc = fts5ExprPhraseIsMatch(pExpr, pNear->pColset, pPhrase, &bMatch);
      if( bMatch==0 ) break;
    }else{
      rc = sqlite3Fts5IterPoslistBuffer(
          pPhrase->aTerm[0].pIter, &pPhrase->poslist
      );
    }
  }







|







805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
  /* Check that each phrase in the nearset matches the current row.
  ** Populate the pPhrase->poslist buffers at the same time. If any
  ** phrase is not a match, break out of the loop early.  */
  for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
    Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
    if( pPhrase->nTerm>1 || pNear->pColset ){
      int bMatch = 0;
      rc = fts5ExprPhraseIsMatch(pNode, pNear->pColset, pPhrase, &bMatch);
      if( bMatch==0 ) break;
    }else{
      rc = sqlite3Fts5IterPoslistBuffer(
          pPhrase->aTerm[0].pIter, &pPhrase->poslist
      );
    }
  }
751
752
753
754
755
756
757

758
759
760
761
762
763
764
  Fts5ExprColset *pColset = pNear->pColset;
  const u8 *pPos;
  int nPos;
  int rc;

  assert( pNode->eType==FTS5_TERM );
  assert( pNear->nPhrase==1 && pPhrase->nTerm==1 );


  rc = sqlite3Fts5IterPoslist(pIter, &pPos, &nPos, &pNode->iRowid);

  /* If the term may match any column, then this must be a match. 
  ** Return immediately in this case. Otherwise, try to find the
  ** part of the poslist that corresponds to the required column.
  ** If it can be found, return. If it cannot, the next iteration







>







841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
  Fts5ExprColset *pColset = pNear->pColset;
  const u8 *pPos;
  int nPos;
  int rc;

  assert( pNode->eType==FTS5_TERM );
  assert( pNear->nPhrase==1 && pPhrase->nTerm==1 );
  assert( pPhrase->aTerm[0].pSynonym==0 );

  rc = sqlite3Fts5IterPoslist(pIter, &pPos, &nPos, &pNode->iRowid);

  /* If the term may match any column, then this must be a match. 
  ** Return immediately in this case. Otherwise, try to find the
  ** part of the poslist that corresponds to the required column.
  ** If it can be found, return. If it cannot, the next iteration
797
798
799
800
801
802
803

804

805



806
807
808
809
810



811

812
813
814
815
816
817




















818
819

820
821
822

823
824
825
826
827
828
829

830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852




853
854
855
856
857
858
859
860
861
862
863
864





865
866
867
868
869
870
871
872
873
){
  Fts5ExprNearset *pNear = pNode->pNear;
  Fts5ExprPhrase *pLeft = pNear->apPhrase[0];
  int rc = SQLITE_OK;
  i64 iLast;                      /* Lastest rowid any iterator points to */
  int i, j;                       /* Phrase and token index, respectively */
  int bMatch;                     /* True if all terms are at the same rowid */



  assert( pNear->nPhrase>1 || pNear->apPhrase[0]->nTerm>1 );




  /* Initialize iLast, the "lastest" rowid any iterator points to. If the
  ** iterator skips through rowids in the default ascending order, this means
  ** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it
  ** means the minimum rowid.  */



  iLast = sqlite3Fts5IterRowid(pLeft->aTerm[0].pIter);


  do {
    bMatch = 1;
    for(i=0; i<pNear->nPhrase; i++){
      Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
      for(j=0; j<pPhrase->nTerm; j++){




















        Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter;
        i64 iRowid = sqlite3Fts5IterRowid(pIter);

        if( iRowid!=iLast ) bMatch = 0;
        if( fts5ExprAdvanceto(pIter, pExpr->bDesc, &iLast,&rc,&pNode->bEof) ){
          return rc;

        }
      }
    }
  }while( bMatch==0 );

  pNode->bNomatch = (0==fts5ExprNearTest(&rc, pExpr, pNode));
  pNode->iRowid = iLast;


  return rc;
}

/*
** Initialize all term iterators in the pNear object. If any term is found
** to match no documents at all, set *pbEof to true and return immediately,
** without initializing any further iterators.
*/
static int fts5ExprNearInitAll(
  Fts5Expr *pExpr,
  Fts5ExprNode *pNode
){
  Fts5ExprNearset *pNear = pNode->pNear;
  Fts5ExprTerm *pTerm;
  Fts5ExprPhrase *pPhrase;
  int i, j;
  int rc = SQLITE_OK;

  for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
    pPhrase = pNear->apPhrase[i];
    for(j=0; j<pPhrase->nTerm; j++){
      pTerm = &pPhrase->aTerm[j];




      if( pTerm->pIter ){
        sqlite3Fts5IterClose(pTerm->pIter);
        pTerm->pIter = 0;
      }
      rc = sqlite3Fts5IndexQuery(
          pExpr->pIndex, pTerm->zTerm, strlen(pTerm->zTerm),
          (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) |
          (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0),
          &pTerm->pIter
      );
      assert( rc==SQLITE_OK || pTerm->pIter==0 );
      if( pTerm->pIter==0 || sqlite3Fts5IterEof(pTerm->pIter) ){





        pNode->bEof = 1;
        break;
      }
    }
  }

  return rc;
}








>

>
|
>
>
>





>
>
>
|
>






>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
>
|
|
|
>





<

>






|
|






<
<




|

|
>
>
>
>
|
|
|
|
|
|
|
|
|
|
|
|
>
>
>
>
>

|







888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949

950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965


966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
){
  Fts5ExprNearset *pNear = pNode->pNear;
  Fts5ExprPhrase *pLeft = pNear->apPhrase[0];
  int rc = SQLITE_OK;
  i64 iLast;                      /* Lastest rowid any iterator points to */
  int i, j;                       /* Phrase and token index, respectively */
  int bMatch;                     /* True if all terms are at the same rowid */
  const int bDesc = pExpr->bDesc;

  /* Check that this node should not be FTS5_TERM */
  assert( pNear->nPhrase>1 
       || pNear->apPhrase[0]->nTerm>1 
       || pNear->apPhrase[0]->aTerm[0].pSynonym
  );

  /* Initialize iLast, the "lastest" rowid any iterator points to. If the
  ** iterator skips through rowids in the default ascending order, this means
  ** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it
  ** means the minimum rowid.  */
  if( pLeft->aTerm[0].pSynonym ){
    iLast = fts5ExprSynonymRowid(&pLeft->aTerm[0], bDesc);
  }else{
    iLast = sqlite3Fts5IterRowid(pLeft->aTerm[0].pIter);
  }

  do {
    bMatch = 1;
    for(i=0; i<pNear->nPhrase; i++){
      Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
      for(j=0; j<pPhrase->nTerm; j++){
        Fts5ExprTerm *pTerm = &pPhrase->aTerm[j];
        if( pTerm->pSynonym ){
          Fts5ExprTerm *p;
          int bEof = 1;
          i64 iRowid = fts5ExprSynonymRowid(pTerm, bDesc);
          if( iRowid==iLast ) continue;
          for(p=pTerm; p; p=p->pSynonym){
            Fts5IndexIter *pIter = p->pIter;
            int dummy;
            if( 0==sqlite3Fts5IterEof(pIter)
             && 0==fts5ExprAdvanceto(pIter, bDesc, &iLast, &rc, &dummy)==0 
            ){
              bEof = 0;
            }
          }
          if( bEof || rc ){
            pNode->bEof = 1;
            return rc;
          }
        }else{
          Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter;
          i64 iRowid = sqlite3Fts5IterRowid(pIter);
          if( iRowid==iLast ) continue;
          bMatch = 0;
          if( fts5ExprAdvanceto(pIter, bDesc, &iLast, &rc, &pNode->bEof) ){
            return rc;
          }
        }
      }
    }
  }while( bMatch==0 );


  pNode->iRowid = iLast;
  pNode->bNomatch = (0==fts5ExprNearTest(&rc, pExpr, pNode));

  return rc;
}

/*
** Initialize all term iterators in the pNear object. If any term is found
** to match no documents at all, return immediately without initializing any
** further iterators.
*/
static int fts5ExprNearInitAll(
  Fts5Expr *pExpr,
  Fts5ExprNode *pNode
){
  Fts5ExprNearset *pNear = pNode->pNear;


  int i, j;
  int rc = SQLITE_OK;

  for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
    Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
    for(j=0; j<pPhrase->nTerm; j++){
      Fts5ExprTerm *pTerm = &pPhrase->aTerm[j];
      Fts5ExprTerm *p;
      int bEof = 1;

      for(p=pTerm; p && rc==SQLITE_OK; p=p->pSynonym){
        if( p->pIter ){
          sqlite3Fts5IterClose(p->pIter);
          p->pIter = 0;
        }
        rc = sqlite3Fts5IndexQuery(
            pExpr->pIndex, p->zTerm, strlen(p->zTerm),
            (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) |
            (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0),
            &p->pIter
        );
        assert( rc==SQLITE_OK || p->pIter==0 );
        if( p->pIter && 0==sqlite3Fts5IterEof(pTerm->pIter) ){
          bEof = 0;
        }
      }

      if( bEof ){
        pNode->bEof = 1;
        return rc;
      }
    }
  }

  return rc;
}

1262
1263
1264
1265
1266
1267
1268


1269
1270
1271



1272

1273
1274
1275
1276
1277
1278
1279
/*
** Free the phrase object passed as the only argument.
*/
static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){
  if( pPhrase ){
    int i;
    for(i=0; i<pPhrase->nTerm; i++){


      Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
      sqlite3_free(pTerm->zTerm);
      if( pTerm->pIter ){



        sqlite3Fts5IterClose(pTerm->pIter);

      }
    }
    if( pPhrase->poslist.nSpace>0 ) fts5BufferFree(&pPhrase->poslist);
    sqlite3_free(pPhrase);
  }
}








>
>


|
>
>
>
|
>







1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
/*
** Free the phrase object passed as the only argument.
*/
static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){
  if( pPhrase ){
    int i;
    for(i=0; i<pPhrase->nTerm; i++){
      Fts5ExprTerm *pSyn;
      Fts5ExprTerm *pNext;
      Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
      sqlite3_free(pTerm->zTerm);
      sqlite3Fts5IterClose(pTerm->pIter);

      for(pSyn=pTerm->pSynonym; pSyn; pSyn=pNext){
        pNext = pSyn->pSynonym;
        sqlite3Fts5IterClose(pSyn->pIter);
        sqlite3_free(pSyn);
      }
    }
    if( pPhrase->poslist.nSpace>0 ) fts5BufferFree(&pPhrase->poslist);
    sqlite3_free(pPhrase);
  }
}

1344
1345
1346
1347
1348
1349
1350
1351
1352

1353
1354










1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
  int iStart,                     /* Start offset of token */
  int iEnd                        /* End offset of token */
){
  int rc = SQLITE_OK;
  const int SZALLOC = 8;
  TokenCtx *pCtx = (TokenCtx*)pContext;
  Fts5ExprPhrase *pPhrase = pCtx->pPhrase;
  Fts5ExprTerm *pTerm;


  if( tflags & FTS5_TOKEN_COLOCATED ) return rc;











  if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){
    Fts5ExprPhrase *pNew;
    int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0);

    pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase, 
        sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew
    );
    if( pNew==0 ) return SQLITE_NOMEM;
    if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase));
    pCtx->pPhrase = pPhrase = pNew;
    pNew->nTerm = nNew - SZALLOC;
  }

  pTerm = &pPhrase->aTerm[pPhrase->nTerm++];
  memset(pTerm, 0, sizeof(Fts5ExprTerm));
  pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken);

  return rc;
}


/*
** Free the phrase object passed as the only argument.
*/







<

>
|
|
>
>
>
>
>
>
>
>
>
>
|
|
|

|
|
|
|
|
|
|
|

|
|
|
|







1479
1480
1481
1482
1483
1484
1485

1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
  int iStart,                     /* Start offset of token */
  int iEnd                        /* End offset of token */
){
  int rc = SQLITE_OK;
  const int SZALLOC = 8;
  TokenCtx *pCtx = (TokenCtx*)pContext;
  Fts5ExprPhrase *pPhrase = pCtx->pPhrase;


  assert( pPhrase==0 || pPhrase->nTerm>0 );
  if( pPhrase && (tflags & FTS5_TOKEN_COLOCATED) ){
    Fts5ExprTerm *pSyn;
    int nByte = sizeof(Fts5ExprTerm) + nToken+1;
    pSyn = (Fts5ExprTerm*)sqlite3_malloc(nByte);
    if( pSyn==0 ) return SQLITE_NOMEM;
    memset(pSyn, 0, nByte);
    pSyn->zTerm = (char*)&pSyn[1];
    memcpy(pSyn->zTerm, pToken, nToken);
    pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym;
    pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn;
  }else{
    Fts5ExprTerm *pTerm;
    if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){
      Fts5ExprPhrase *pNew;
      int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0);

      pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase, 
          sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew
          );
      if( pNew==0 ) return SQLITE_NOMEM;
      if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase));
      pCtx->pPhrase = pPhrase = pNew;
      pNew->nTerm = nNew - SZALLOC;
    }

    pTerm = &pPhrase->aTerm[pPhrase->nTerm++];
    memset(pTerm, 0, sizeof(Fts5ExprTerm));
    pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken);
  }
  return rc;
}


/*
** Free the phrase object passed as the only argument.
*/
1632
1633
1634
1635
1636
1637
1638
1639



1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658






1659



1660
1661

1662
1663
1664
1665
1666
1667
1668


1669
1670
1671
1672
1673
1674
1675
      pRet->eType = eType;
      pRet->pNear = pNear;
      if( eType==FTS5_STRING ){
        int iPhrase;
        for(iPhrase=0; iPhrase<pNear->nPhrase; iPhrase++){
          pNear->apPhrase[iPhrase]->pNode = pRet;
        }
        if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 ){



          pRet->eType = FTS5_TERM;
        }
      }else{
        fts5ExprAddChildren(pRet, pLeft);
        fts5ExprAddChildren(pRet, pRight);
      }
    }
  }

  if( pRet==0 ){
    assert( pParse->rc!=SQLITE_OK );
    sqlite3Fts5ParseNodeFree(pLeft);
    sqlite3Fts5ParseNodeFree(pRight);
    sqlite3Fts5ParseNearsetFree(pNear);
  }
  return pRet;
}

static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){






  char *zQuoted = sqlite3_malloc(strlen(pTerm->zTerm) * 2 + 3 + 2);



  if( zQuoted ){
    int i = 0;

    char *zIn = pTerm->zTerm;
    zQuoted[i++] = '"';
    while( *zIn ){
      if( *zIn=='"' ) zQuoted[i++] = '"';
      zQuoted[i++] = *zIn++;
    }
    zQuoted[i++] = '"';


    if( pTerm->bPrefix ){
      zQuoted[i++] = ' ';
      zQuoted[i++] = '*';
    }
    zQuoted[i++] = '\0';
  }
  return zQuoted;







|
>
>
>



















>
>
>
>
>
>
|
>
>
>


>
|
|
|
|
|
|
|
>
>







1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
      pRet->eType = eType;
      pRet->pNear = pNear;
      if( eType==FTS5_STRING ){
        int iPhrase;
        for(iPhrase=0; iPhrase<pNear->nPhrase; iPhrase++){
          pNear->apPhrase[iPhrase]->pNode = pRet;
        }
        if( pNear->nPhrase==1 
         && pNear->apPhrase[0]->nTerm==1 
         && pNear->apPhrase[0]->aTerm[0].pSynonym==0
        ){
          pRet->eType = FTS5_TERM;
        }
      }else{
        fts5ExprAddChildren(pRet, pLeft);
        fts5ExprAddChildren(pRet, pRight);
      }
    }
  }

  if( pRet==0 ){
    assert( pParse->rc!=SQLITE_OK );
    sqlite3Fts5ParseNodeFree(pLeft);
    sqlite3Fts5ParseNodeFree(pRight);
    sqlite3Fts5ParseNearsetFree(pNear);
  }
  return pRet;
}

static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){
  int nByte = 0;
  Fts5ExprTerm *p;
  char *zQuoted;

  /* Determine the maximum amount of space required. */
  for(p=pTerm; p; p=p->pSynonym){
    nByte += strlen(pTerm->zTerm) * 2 + 3 + 2;
  }
  zQuoted = sqlite3_malloc(nByte);

  if( zQuoted ){
    int i = 0;
    for(p=pTerm; p; p=p->pSynonym){
      char *zIn = p->zTerm;
      zQuoted[i++] = '"';
      while( *zIn ){
        if( *zIn=='"' ) zQuoted[i++] = '"';
        zQuoted[i++] = *zIn++;
      }
      zQuoted[i++] = '"';
      if( p->pSynonym ) zQuoted[i++] = '|';
    }
    if( pTerm->bPrefix ){
      zQuoted[i++] = ' ';
      zQuoted[i++] = '*';
    }
    zQuoted[i++] = '\0';
  }
  return zQuoted;
Changes to ext/fts5/test/fts5aa.test.
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
} {}

do_execsql_test 13.5 {
  SELECT rowid FROM t1 WHERE t1 MATCH 'o';
} {1}

do_execsql_test 13.6 {
  SELECT rowid FROM t1 WHERE t1 MATCH '.';
} {}

#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 14.1 {
  CREATE VIRTUAL TABLE t1 USING fts5(x, y);







|







339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
} {}

do_execsql_test 13.5 {
  SELECT rowid FROM t1 WHERE t1 MATCH 'o';
} {1}

do_execsql_test 13.6 {
  SELECT rowid FROM t1 WHERE t1 MATCH '""';
} {}

#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 14.1 {
  CREATE VIRTUAL TABLE t1 USING fts5(x, y);
Changes to ext/fts5/test/fts5ea.test.
83
84
85
86
87
88
89






90
91
92
93
#-------------------------------------------------------------------------
# Experiment with a tokenizer that considers " to be a token character.
#
do_execsql_test 4.0 {
  SELECT fts5_expr('a AND """"', 'x', 'tokenize="unicode61 tokenchars ''""''"');
} {{"a" AND """"}}










finish_test







>
>
>
>
>
>




83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#-------------------------------------------------------------------------
# Experiment with a tokenizer that considers " to be a token character.
#
do_execsql_test 4.0 {
  SELECT fts5_expr('a AND """"', 'x', 'tokenize="unicode61 tokenchars ''""''"');
} {{"a" AND """"}}

#-------------------------------------------------------------------------
# Experiment with a tokenizer that considers " to be a token character.
#
do_catchsql_test 5.0 {
  SELECT fts5_expr('abc | def');
} {1 {fts5: syntax error near "|"}}



finish_test
Changes to ext/fts5/test/fts5eb.test.
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51

proc do_syntax_test {tn expr res} {
  set ::se_expr $expr
  do_execsql_test $tn {SELECT fts5_expr($se_expr)} [list $res]
}

foreach {tn expr res} {
  1  {abc}                           {"abc"}
  2  {abc .}                         {"abc"}
  3  {.}                             {}
  4  {abc OR .}                      {"abc"}
  5  {abc NOT .}                     {"abc"}
  6  {abc AND .}                     {"abc"}
  7  {. OR abc}                      {"abc"}
  8  {. NOT abc}                     {"abc"}
  9  {. AND abc}                     {"abc"}
  10 {abc + . + def}                 {"abc" + "def"}
  11 {abc . def}                     {"abc" AND "def"}
  12 {r+e OR w}                      {"r" + "e" OR "w"}
} {
  do_execsql_test 1.$tn {SELECT fts5_expr($expr)} [list $res]
}

do_catchsql_test 2.1 {
  SELECT fts5_expr()
} {1 {wrong number of arguments to function fts5_expr}}







|
|
|
|
|
|
|
|
|
|
|
|







26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51

proc do_syntax_test {tn expr res} {
  set ::se_expr $expr
  do_execsql_test $tn {SELECT fts5_expr($se_expr)} [list $res]
}

foreach {tn expr res} {
  1  {abc}                            {"abc"}
  2  {abc ""}                         {"abc"}
  3  {""}                             {}
  4  {abc OR ""}                      {"abc"}
  5  {abc NOT ""}                     {"abc"}
  6  {abc AND ""}                     {"abc"}
  7  {"" OR abc}                      {"abc"}
  8  {"" NOT abc}                     {"abc"}
  9  {"" AND abc}                     {"abc"}
  10 {abc + "" + def}                 {"abc" + "def"}
  11 {abc "" def}                     {"abc" AND "def"}
  12 {r+e OR w}                       {"r" + "e" OR "w"}
} {
  do_execsql_test 1.$tn {SELECT fts5_expr($expr)} [list $res]
}

do_catchsql_test 2.1 {
  SELECT fts5_expr()
} {1 {wrong number of arguments to function fts5_expr}}
Changes to ext/fts5/test/fts5synonym.test.
17
18
19
20
21
22
23


















24
25
26
27
28
29
30

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}




















proc gobble_whitespace {textvar} {
  upvar $textvar t
  regexp {([ ]*)(.*)} $t -> space t
  return [string length $space]
}








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

foreach S {
  {zero 0}
  {one 1}
  {two 2}
  {three 3 iii}
  {four 4}
  {five 5}
  {six 6}
  {seven 7}
  {eight 8}
  {nine 9}
} {
  foreach s $S {
    set o [list]
    foreach x $S {if {$x!=$s} {lappend o $x}}
    set ::syn($s) $o
  }
}

proc gobble_whitespace {textvar} {
  upvar $textvar t
  regexp {([ ]*)(.*)} $t -> space t
  return [string length $space]
}

71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
  SELECT x, rowid FROM ft WHERE ft MATCH 'pqr';
} {1 {abc def ghi} {jkl mno pqr} 2}

#-------------------------------------------------------------------------
# Test a tokenizer that supports synonyms by adding extra entries to the
# FTS index.
#
foreach S {
  {zero 0}
  {one 1}
  {two 2}
  {three 3 iii}
  {four 4}
  {five 5}
  {six 6}
  {seven 7}
  {eight 8}
  {nine 9}
} {
  foreach s $S {
    set o [list]
    foreach x $S {if {$x!=$s} {lappend o $x}}
    set ::syn($s) $o
  }
}

proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [do_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
    if {$tflags=="document" && [info exists ::syn($w)]} {
      foreach s $::syn($w) {
        sqlite3_fts5_token -colo $s $iStart $iEnd







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







89
90
91
92
93
94
95


















96
97
98
99
100
101
102
  SELECT x, rowid FROM ft WHERE ft MATCH 'pqr';
} {1 {abc def ghi} {jkl mno pqr} 2}

#-------------------------------------------------------------------------
# Test a tokenizer that supports synonyms by adding extra entries to the
# FTS index.
#



















proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [do_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
    if {$tflags=="document" && [info exists ::syn($w)]} {
      foreach s $::syn($w) {
        sqlite3_fts5_token -colo $s $iStart $iEnd
189
190
191
192
193
194
195
196

























197
198
  SELECT rowid FROM ft WHERE ft MATCH 'one two three';
  SELECT rowid FROM ft WHERE ft MATCH 'one + one + two + three';
} {1 1}
do_execsql_test 3.2.2 {
  SELECT rowid FROM ft WHERE ft MATCH 'one two two three';
  SELECT rowid FROM ft WHERE ft MATCH 'one + two + two + three';
} {1}


























finish_test









>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
  SELECT rowid FROM ft WHERE ft MATCH 'one two three';
  SELECT rowid FROM ft WHERE ft MATCH 'one + one + two + three';
} {1 1}
do_execsql_test 3.2.2 {
  SELECT rowid FROM ft WHERE ft MATCH 'one two two three';
  SELECT rowid FROM ft WHERE ft MATCH 'one + two + two + three';
} {1}

#-------------------------------------------------------------------------
# Check that expressions with synonyms can be parsed.
#
reset_db
sqlite3_fts5_create_tokenizer db tcl tcl_create
proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [do_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
    if {$tflags=="query" && [info exists ::syn($w)]} {
      foreach s $::syn($w) {
        sqlite3_fts5_token -colo $s $iStart $iEnd
      }
    }
  }
}

foreach {tn expr res} {
  1  {abc}                           {"abc"}
  2  {one}                           {"one"|"1"}
  3  {3}                             {"3"|"iii"|"three"}
  4  {3*}                            {"3"|"iii"|"three" *}
} {
  do_execsql_test 4.$tn {SELECT fts5_expr($expr, 'tokenize=tcl')} [list $res]
}

finish_test