/ Check-in [f8e9c445]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add the xLanguageid method to sqlite3_fts3_tokenizer versions 1 and greater.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts4-languageid
Files: files | file ages | folders
SHA1: f8e9c445dd358c40e5a7bf3756b9f291909dbea7
User & Date: dan 2012-03-03 18:46:41
Context
2012-03-05
15:33
Merge the fts4-languageid branch with the trunk. check-in: 99a9073b user: dan tags: trunk
2012-03-03
18:46
Add the xLanguageid method to sqlite3_fts3_tokenizer versions 1 and greater. Closed-Leaf check-in: f8e9c445 user: dan tags: fts4-languageid
2012-03-02
19:53
Fix problems with combining content= and languageid= in a single fts4 table. check-in: 22491e7b user: dan tags: fts4-languageid
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3.c.

2954
2955
2956
2957
2958
2959
2960



2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
    int iCol = idxNum-FTS3_FULLTEXT_SEARCH;
    const char *zQuery = (const char *)sqlite3_value_text(apVal[0]);

    if( zQuery==0 && sqlite3_value_type(apVal[0])!=SQLITE_NULL ){
      return SQLITE_NOMEM;
    }




    rc = sqlite3Fts3ExprParse(p->pTokenizer, p->azColumn, p->bHasStat, 
        p->nColumn, iCol, zQuery, -1, &pCsr->pExpr
    );
    if( rc!=SQLITE_OK ){
      if( rc==SQLITE_ERROR ){
        static const char *zErr = "malformed MATCH expression: [%s]";
        p->base.zErrMsg = sqlite3_mprintf(zErr, zQuery);
      }
      return rc;
    }

    pCsr->iLangid = 0;
    if( nVal==2 ) pCsr->iLangid = sqlite3_value_int(apVal[1]);

    rc = sqlite3Fts3ReadLock(p);
    if( rc!=SQLITE_OK ) return rc;

    rc = fts3EvalStart(pCsr);

    sqlite3Fts3SegmentsClose(p);
    if( rc!=SQLITE_OK ) return rc;







>
>
>
|
|









<
<
<







2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974



2975
2976
2977
2978
2979
2980
2981
    int iCol = idxNum-FTS3_FULLTEXT_SEARCH;
    const char *zQuery = (const char *)sqlite3_value_text(apVal[0]);

    if( zQuery==0 && sqlite3_value_type(apVal[0])!=SQLITE_NULL ){
      return SQLITE_NOMEM;
    }

    pCsr->iLangid = 0;
    if( nVal==2 ) pCsr->iLangid = sqlite3_value_int(apVal[1]);

    rc = sqlite3Fts3ExprParse(p->pTokenizer, pCsr->iLangid,
        p->azColumn, p->bHasStat, p->nColumn, iCol, zQuery, -1, &pCsr->pExpr
    );
    if( rc!=SQLITE_OK ){
      if( rc==SQLITE_ERROR ){
        static const char *zErr = "malformed MATCH expression: [%s]";
        p->base.zErrMsg = sqlite3_mprintf(zErr, zQuery);
      }
      return rc;
    }




    rc = sqlite3Fts3ReadLock(p);
    if( rc!=SQLITE_OK ) return rc;

    rc = fts3EvalStart(pCsr);

    sqlite3Fts3SegmentsClose(p);
    if( rc!=SQLITE_OK ) return rc;

Changes to ext/fts3/fts3Int.h.

494
495
496
497
498
499
500
501
502
503
504
505
506
507
508




509
510
511
512
513
514
515
void sqlite3Fts3Offsets(sqlite3_context*, Fts3Cursor*);
void sqlite3Fts3Snippet(sqlite3_context *, Fts3Cursor *, const char *,
  const char *, const char *, int, int
);
void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *, const char *);

/* fts3_expr.c */
int sqlite3Fts3ExprParse(sqlite3_tokenizer *, 
  char **, int, int, int, const char *, int, Fts3Expr **
);
void sqlite3Fts3ExprFree(Fts3Expr *);
#ifdef SQLITE_TEST
int sqlite3Fts3ExprInitTestInterface(sqlite3 *db);
int sqlite3Fts3InitTerm(sqlite3 *db);
#endif





/* fts3_aux.c */
int sqlite3Fts3InitAux(sqlite3 *db);

void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *);

int sqlite3Fts3MsrIncrStart(







|







>
>
>
>







494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
void sqlite3Fts3Offsets(sqlite3_context*, Fts3Cursor*);
void sqlite3Fts3Snippet(sqlite3_context *, Fts3Cursor *, const char *,
  const char *, const char *, int, int
);
void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *, const char *);

/* fts3_expr.c */
int sqlite3Fts3ExprParse(sqlite3_tokenizer *, int,
  char **, int, int, int, const char *, int, Fts3Expr **
);
void sqlite3Fts3ExprFree(Fts3Expr *);
#ifdef SQLITE_TEST
int sqlite3Fts3ExprInitTestInterface(sqlite3 *db);
int sqlite3Fts3InitTerm(sqlite3 *db);
#endif

int sqlite3Fts3OpenTokenizer(sqlite3_tokenizer *, int, const char *, int,
  sqlite3_tokenizer_cursor **
);

/* fts3_aux.c */
int sqlite3Fts3InitAux(sqlite3 *db);

void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *);

int sqlite3Fts3MsrIncrStart(

Changes to ext/fts3/fts3_expr.c.

88
89
90
91
92
93
94

95
96
97
98
99
100
101
...
123
124
125
126
127
128
129



























130
131
132
133
134
135
136
...
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
...
264
265
266
267
268
269
270

271
272
273
274
275
276
277
278
279
280
281
...
741
742
743
744
745
746
747

748
749
750
751
752
753
754
755
756
757


758

759
760
761
762
763
764
765
766
767
768
769
...
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
**   FTSQUERY_PHRASE with a unary "-" attached to it. i.e. "mysql" in the
**   FTS3 query "sqlite -mysql". Otherwise, ParseContext.isNot is set to
**   zero.
*/
typedef struct ParseContext ParseContext;
struct ParseContext {
  sqlite3_tokenizer *pTokenizer;      /* Tokenizer module */

  const char **azCol;                 /* Array of column names for fts3 table */
  int bFts4;                          /* True to allow FTS4-only syntax */
  int nCol;                           /* Number of entries in azCol[] */
  int iDefaultCol;                    /* Default column to query */
  int isNot;                          /* True if getNextNode() sees a unary - */
  sqlite3_context *pCtx;              /* Write error message here */
  int nNest;                          /* Number of nested brackets */
................................................................................
*/
static void *fts3MallocZero(int nByte){
  void *pRet = sqlite3_malloc(nByte);
  if( pRet ) memset(pRet, 0, nByte);
  return pRet;
}





























/*
** Extract the next token from buffer z (length n) using the tokenizer
** and other information (column names etc.) in pParse. Create an Fts3Expr
** structure of type FTSQUERY_PHRASE containing a phrase consisting of this
** single token and set *ppExpr to point to it. If the end of the buffer is
** reached before a token is found, set *ppExpr to zero. It is the
................................................................................
  sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
  sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
  int rc;
  sqlite3_tokenizer_cursor *pCursor;
  Fts3Expr *pRet = 0;
  int nConsumed = 0;

  rc = pModule->xOpen(pTokenizer, z, n, &pCursor);
  if( rc==SQLITE_OK ){
    const char *zToken;
    int nToken, iStart, iEnd, iPosition;
    int nByte;                               /* total space to allocate */

    pCursor->pTokenizer = pTokenizer;
    rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);

    if( rc==SQLITE_OK ){
      nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken;
      pRet = (Fts3Expr *)fts3MallocZero(nByte);
      if( !pRet ){
        rc = SQLITE_NOMEM;
      }else{
        pRet->eType = FTSQUERY_PHRASE;
................................................................................
  **
  **   Buffer zTemp: Contains copies of all tokens.
  **
  ** The second pass, in the block that begins "if( rc==SQLITE_DONE )" below,
  ** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase
  ** structures.
  */

  rc = pModule->xOpen(pTokenizer, zInput, nInput, &pCursor);
  if( rc==SQLITE_OK ){
    int ii;
    pCursor->pTokenizer = pTokenizer;
    for(ii=0; rc==SQLITE_OK; ii++){
      const char *zByte;
      int nByte, iBegin, iEnd, iPos;
      rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos);
      if( rc==SQLITE_OK ){
        Fts3PhraseToken *pToken;

................................................................................
** that appears on the left-hand-side of the MATCH operator (the default
** column to match against for tokens for which a column name is not explicitly
** specified as part of the query string), or -1 if tokens may by default
** match any table column.
*/
int sqlite3Fts3ExprParse(
  sqlite3_tokenizer *pTokenizer,      /* Tokenizer module */

  char **azCol,                       /* Array of column names for fts3 table */
  int bFts4,                          /* True to allow FTS4-only syntax */
  int nCol,                           /* Number of entries in azCol[] */
  int iDefaultCol,                    /* Default column to query */
  const char *z, int n,               /* Text of MATCH query */
  Fts3Expr **ppExpr                   /* OUT: Parsed query structure */
){
  int nParsed;
  int rc;
  ParseContext sParse;


  sParse.pTokenizer = pTokenizer;

  sParse.azCol = (const char **)azCol;
  sParse.nCol = nCol;
  sParse.iDefaultCol = iDefaultCol;
  sParse.nNest = 0;
  sParse.bFts4 = bFts4;
  if( z==0 ){
    *ppExpr = 0;
    return SQLITE_OK;
  }
  if( n<0 ){
    n = (int)strlen(z);
................................................................................
    goto exprtest_out;
  }
  for(ii=0; ii<nCol; ii++){
    azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]);
  }

  rc = sqlite3Fts3ExprParse(
      pTokenizer, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr
  );
  if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){
    sqlite3_result_error(context, "Error parsing expression", -1);
  }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){
    sqlite3_result_error_nomem(context);
  }else{
    sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT);







>







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







|





<

<







 







>
|


<







 







>










>
>

>



<







 







|







88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
...
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
...
178
179
180
181
182
183
184
185
186
187
188
189
190

191

192
193
194
195
196
197
198
...
290
291
292
293
294
295
296
297
298
299
300

301
302
303
304
305
306
307
...
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791

792
793
794
795
796
797
798
...
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
**   FTSQUERY_PHRASE with a unary "-" attached to it. i.e. "mysql" in the
**   FTS3 query "sqlite -mysql". Otherwise, ParseContext.isNot is set to
**   zero.
*/
typedef struct ParseContext ParseContext;
struct ParseContext {
  sqlite3_tokenizer *pTokenizer;      /* Tokenizer module */
  int iLangid;                        /* Language id used with tokenizer */
  const char **azCol;                 /* Array of column names for fts3 table */
  int bFts4;                          /* True to allow FTS4-only syntax */
  int nCol;                           /* Number of entries in azCol[] */
  int iDefaultCol;                    /* Default column to query */
  int isNot;                          /* True if getNextNode() sees a unary - */
  sqlite3_context *pCtx;              /* Write error message here */
  int nNest;                          /* Number of nested brackets */
................................................................................
*/
static void *fts3MallocZero(int nByte){
  void *pRet = sqlite3_malloc(nByte);
  if( pRet ) memset(pRet, 0, nByte);
  return pRet;
}

int sqlite3Fts3OpenTokenizer(
  sqlite3_tokenizer *pTokenizer,
  int iLangid,
  const char *z,
  int n,
  sqlite3_tokenizer_cursor **ppCsr
){
  sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
  sqlite3_tokenizer_cursor *pCsr = 0;
  int rc;

  rc = pModule->xOpen(pTokenizer, z, n, &pCsr);
  assert( rc==SQLITE_OK || pCsr==0 );
  if( rc==SQLITE_OK ){
    pCsr->pTokenizer = pTokenizer;
    if( pModule->iVersion>=1 ){
      rc = pModule->xLanguageid(pCsr, iLangid);
      if( rc!=SQLITE_OK ){
        pModule->xClose(pCsr);
        pCsr = 0;
      }
    }
  }
  *ppCsr = pCsr;
  return rc;
}


/*
** Extract the next token from buffer z (length n) using the tokenizer
** and other information (column names etc.) in pParse. Create an Fts3Expr
** structure of type FTSQUERY_PHRASE containing a phrase consisting of this
** single token and set *ppExpr to point to it. If the end of the buffer is
** reached before a token is found, set *ppExpr to zero. It is the
................................................................................
  sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
  sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
  int rc;
  sqlite3_tokenizer_cursor *pCursor;
  Fts3Expr *pRet = 0;
  int nConsumed = 0;

  rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, n, &pCursor);
  if( rc==SQLITE_OK ){
    const char *zToken;
    int nToken, iStart, iEnd, iPosition;
    int nByte;                               /* total space to allocate */


    rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);

    if( rc==SQLITE_OK ){
      nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken;
      pRet = (Fts3Expr *)fts3MallocZero(nByte);
      if( !pRet ){
        rc = SQLITE_NOMEM;
      }else{
        pRet->eType = FTSQUERY_PHRASE;
................................................................................
  **
  **   Buffer zTemp: Contains copies of all tokens.
  **
  ** The second pass, in the block that begins "if( rc==SQLITE_DONE )" below,
  ** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase
  ** structures.
  */
  rc = sqlite3Fts3OpenTokenizer(
      pTokenizer, pParse->iLangid, zInput, nInput, &pCursor);
  if( rc==SQLITE_OK ){
    int ii;

    for(ii=0; rc==SQLITE_OK; ii++){
      const char *zByte;
      int nByte, iBegin, iEnd, iPos;
      rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos);
      if( rc==SQLITE_OK ){
        Fts3PhraseToken *pToken;

................................................................................
** that appears on the left-hand-side of the MATCH operator (the default
** column to match against for tokens for which a column name is not explicitly
** specified as part of the query string), or -1 if tokens may by default
** match any table column.
*/
int sqlite3Fts3ExprParse(
  sqlite3_tokenizer *pTokenizer,      /* Tokenizer module */
  int iLangid,                        /* Language id for tokenizer */
  char **azCol,                       /* Array of column names for fts3 table */
  int bFts4,                          /* True to allow FTS4-only syntax */
  int nCol,                           /* Number of entries in azCol[] */
  int iDefaultCol,                    /* Default column to query */
  const char *z, int n,               /* Text of MATCH query */
  Fts3Expr **ppExpr                   /* OUT: Parsed query structure */
){
  int nParsed;
  int rc;
  ParseContext sParse;

  memset(&sParse, 0, sizeof(ParseContext));
  sParse.pTokenizer = pTokenizer;
  sParse.iLangid = iLangid;
  sParse.azCol = (const char **)azCol;
  sParse.nCol = nCol;
  sParse.iDefaultCol = iDefaultCol;

  sParse.bFts4 = bFts4;
  if( z==0 ){
    *ppExpr = 0;
    return SQLITE_OK;
  }
  if( n<0 ){
    n = (int)strlen(z);
................................................................................
    goto exprtest_out;
  }
  for(ii=0; ii<nCol; ii++){
    azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]);
  }

  rc = sqlite3Fts3ExprParse(
      pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr
  );
  if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){
    sqlite3_result_error(context, "Error parsing expression", -1);
  }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){
    sqlite3_result_error_nomem(context);
  }else{
    sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT);

Changes to ext/fts3/fts3_snippet.c.

528
529
530
531
532
533
534

535
536
537
538
539
540
541
...
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
...
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
...
653
654
655
656
657
658
659

660

661
662
663
664
665
666
667
....
1386
1387
1388
1389
1390
1391
1392

1393

1394
1395
1396
1397
1398
1399
1400
1401
1402
** This is done as part of extracting the snippet text, not when selecting
** the snippet. Snippet selection is done based on doclists only, so there
** is no way for fts3BestSnippet() to know whether or not the document 
** actually contains terms that follow the final highlighted term. 
*/
static int fts3SnippetShift(
  Fts3Table *pTab,                /* FTS3 table snippet comes from */

  int nSnippet,                   /* Number of tokens desired for snippet */
  const char *zDoc,               /* Document text to extract snippet from */
  int nDoc,                       /* Size of buffer zDoc in bytes */
  int *piPos,                     /* IN/OUT: First token of snippet */
  u64 *pHlmask                    /* IN/OUT: Mask of tokens to highlight */
){
  u64 hlmask = *pHlmask;          /* Local copy of initial highlight-mask */
................................................................................
      sqlite3_tokenizer_module *pMod;
      sqlite3_tokenizer_cursor *pC;
      pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;

      /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired)
      ** or more tokens in zDoc/nDoc.
      */
      rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);
      if( rc!=SQLITE_OK ){
        return rc;
      }
      pC->pTokenizer = pTab->pTokenizer;
      while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){
        const char *ZDUMMY; int DUMMY1, DUMMY2, DUMMY3;
        rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent);
      }
      pMod->xClose(pC);
      if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; }

................................................................................
    }
    return SQLITE_OK;
  }
  nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol);

  /* Open a token cursor on the document. */
  pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
  rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);
  if( rc!=SQLITE_OK ){
    return rc;
  }
  pC->pTokenizer = pTab->pTokenizer;

  while( rc==SQLITE_OK ){
    int iBegin;                   /* Offset in zDoc of start of token */
    int iFin;                     /* Offset in zDoc of end of token */
    int isHighlight;              /* True for highlighted terms */

    rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent);
................................................................................
      }
      break;
    }
    if( iCurrent<iPos ){ continue; }

    if( !isShiftDone ){
      int n = nDoc - iBegin;

      rc = fts3SnippetShift(pTab, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask);

      isShiftDone = 1;

      /* Now that the shift has been done, check if the initial "..." are
      ** required. They are required if (a) this is not the first fragment,
      ** or (b) this fragment does not begin at position 0 of its column. 
      */
      if( rc==SQLITE_OK && (iPos>0 || iFragment>0) ){
................................................................................
        continue;
      }
      rc = SQLITE_NOMEM;
      goto offsets_out;
    }

    /* Initialize a tokenizer iterator to iterate through column iCol. */

    rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);

    if( rc!=SQLITE_OK ) goto offsets_out;
    pC->pTokenizer = pTab->pTokenizer;

    rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
    while( rc==SQLITE_OK ){
      int i;                      /* Used to loop through terms */
      int iMinPos = 0x7FFFFFFF;   /* Position of next token */
      TermOffset *pTerm = 0;      /* TermOffset associated with next token */








>







 







|



<







 







|



<







 







>
|
>







 







>
|
>

<







528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
...
564
565
566
567
568
569
570
571
572
573
574

575
576
577
578
579
580
581
...
627
628
629
630
631
632
633
634
635
636
637

638
639
640
641
642
643
644
...
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
....
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397

1398
1399
1400
1401
1402
1403
1404
** This is done as part of extracting the snippet text, not when selecting
** the snippet. Snippet selection is done based on doclists only, so there
** is no way for fts3BestSnippet() to know whether or not the document 
** actually contains terms that follow the final highlighted term. 
*/
static int fts3SnippetShift(
  Fts3Table *pTab,                /* FTS3 table snippet comes from */
  int iLangid,                    /* Language id to use in tokenizing */
  int nSnippet,                   /* Number of tokens desired for snippet */
  const char *zDoc,               /* Document text to extract snippet from */
  int nDoc,                       /* Size of buffer zDoc in bytes */
  int *piPos,                     /* IN/OUT: First token of snippet */
  u64 *pHlmask                    /* IN/OUT: Mask of tokens to highlight */
){
  u64 hlmask = *pHlmask;          /* Local copy of initial highlight-mask */
................................................................................
      sqlite3_tokenizer_module *pMod;
      sqlite3_tokenizer_cursor *pC;
      pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;

      /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired)
      ** or more tokens in zDoc/nDoc.
      */
      rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, iLangid, zDoc, nDoc, &pC);
      if( rc!=SQLITE_OK ){
        return rc;
      }

      while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){
        const char *ZDUMMY; int DUMMY1, DUMMY2, DUMMY3;
        rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent);
      }
      pMod->xClose(pC);
      if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; }

................................................................................
    }
    return SQLITE_OK;
  }
  nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol);

  /* Open a token cursor on the document. */
  pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
  rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, zDoc,nDoc,&pC);
  if( rc!=SQLITE_OK ){
    return rc;
  }


  while( rc==SQLITE_OK ){
    int iBegin;                   /* Offset in zDoc of start of token */
    int iFin;                     /* Offset in zDoc of end of token */
    int isHighlight;              /* True for highlighted terms */

    rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent);
................................................................................
      }
      break;
    }
    if( iCurrent<iPos ){ continue; }

    if( !isShiftDone ){
      int n = nDoc - iBegin;
      rc = fts3SnippetShift(
          pTab, pCsr->iLangid, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask
      );
      isShiftDone = 1;

      /* Now that the shift has been done, check if the initial "..." are
      ** required. They are required if (a) this is not the first fragment,
      ** or (b) this fragment does not begin at position 0 of its column. 
      */
      if( rc==SQLITE_OK && (iPos>0 || iFragment>0) ){
................................................................................
        continue;
      }
      rc = SQLITE_NOMEM;
      goto offsets_out;
    }

    /* Initialize a tokenizer iterator to iterate through column iCol. */
    rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid,
        zDoc, nDoc, &pC
    );
    if( rc!=SQLITE_OK ) goto offsets_out;


    rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
    while( rc==SQLITE_OK ){
      int i;                      /* Used to loop through terms */
      int iMinPos = 0x7FFFFFFF;   /* Position of next token */
      TermOffset *pTerm = 0;      /* TermOffset associated with next token */

Changes to ext/fts3/fts3_test.c.

9
10
11
12
13
14
15



16
17
18
19
20
21
22
...
310
311
312
313
314
315
316

































































































































































































317
318
319
320
321



322
323
324
**    May you share freely, never taking more than you give.
**
******************************************************************************
**
** This file is not part of the production FTS code. It is only used for
** testing. It contains a Tcl command that can be used to test if a document
** matches an FTS NEAR expression.



*/

#include <tcl.h>
#include <string.h>
#include <assert.h>

#ifdef SQLITE_TEST
................................................................................

  Tcl_SetObjResult(interp, pRet);
  Tcl_DecrRefCount(pRet);
#endif
  return TCL_OK;
}


































































































































































































int Sqlitetestfts3_Init(Tcl_Interp *interp){
  Tcl_CreateObjCommand(interp, "fts3_near_match", fts3_near_match_cmd, 0, 0);
  Tcl_CreateObjCommand(interp, 
      "fts3_configure_incr_load", fts3_configure_incr_load_cmd, 0, 0
  );



  return TCL_OK;
}
#endif                  /* ifdef SQLITE_TEST */







>
>
>







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>





>
>
>



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
...
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
**    May you share freely, never taking more than you give.
**
******************************************************************************
**
** This file is not part of the production FTS code. It is only used for
** testing. It contains a Tcl command that can be used to test if a document
** matches an FTS NEAR expression.
**
** As of March 2012, it also contains a version 1 tokenizer used for testing
** that the sqlite3_tokenizer_module.xLanguage() method is invoked correctly.
*/

#include <tcl.h>
#include <string.h>
#include <assert.h>

#ifdef SQLITE_TEST
................................................................................

  Tcl_SetObjResult(interp, pRet);
  Tcl_DecrRefCount(pRet);
#endif
  return TCL_OK;
}

/**************************************************************************
** Beginning of test tokenizer code.
**
** For language 0, this tokenizer is similar to the default 'simple' 
** tokenizer. For other languages L, the following:
**
**   * Odd numbered languages are case-sensitive. Even numbered 
**     languages are not.
**
**   * Language ids 100 or greater are considered an error.
**
** The implementation assumes that the input contains only ASCII characters
** (i.e. those that may be encoded in UTF-8 using a single byte).
*/
typedef struct test_tokenizer {
  sqlite3_tokenizer base;
} test_tokenizer;

typedef struct test_tokenizer_cursor {
  sqlite3_tokenizer_cursor base;
  const char *aInput;          /* Input being tokenized */
  int nInput;                  /* Size of the input in bytes */
  int iInput;                  /* Current offset in aInput */
  int iToken;                  /* Index of next token to be returned */
  char *aBuffer;               /* Buffer containing current token */
  int nBuffer;                 /* Number of bytes allocated at pToken */
  int iLangid;                 /* Configured language id */
} test_tokenizer_cursor;

static int testTokenizerCreate(
  int argc, const char * const *argv,
  sqlite3_tokenizer **ppTokenizer
){
  test_tokenizer *pNew;

  pNew = sqlite3_malloc(sizeof(test_tokenizer));
  if( !pNew ) return SQLITE_NOMEM;
  memset(pNew, 0, sizeof(test_tokenizer));

  *ppTokenizer = (sqlite3_tokenizer *)pNew;
  return SQLITE_OK;
}

static int testTokenizerDestroy(sqlite3_tokenizer *pTokenizer){
  test_tokenizer *p = (test_tokenizer *)pTokenizer;
  sqlite3_free(p);
  return SQLITE_OK;
}

static int testTokenizerOpen(
  sqlite3_tokenizer *pTokenizer,         /* The tokenizer */
  const char *pInput, int nBytes,        /* String to be tokenized */
  sqlite3_tokenizer_cursor **ppCursor    /* OUT: Tokenization cursor */
){
  int rc = SQLITE_OK;                    /* Return code */
  test_tokenizer_cursor *pCsr;           /* New cursor object */

  UNUSED_PARAMETER(pTokenizer);

  pCsr = (test_tokenizer_cursor *)sqlite3_malloc(sizeof(test_tokenizer_cursor));
  if( pCsr==0 ){
    rc = SQLITE_NOMEM;
  }else{
    memset(pCsr, 0, sizeof(test_tokenizer_cursor));
    pCsr->aInput = pInput;
    if( nBytes<0 ){
      pCsr->nInput = strlen(pInput);
    }else{
      pCsr->nInput = nBytes;
    }
  }

  *ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
  return rc;
}

static int testTokenizerClose(sqlite3_tokenizer_cursor *pCursor){
  test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;
  sqlite3_free(pCsr->aBuffer);
  sqlite3_free(pCsr);
  return SQLITE_OK;
}

static int testIsTokenChar(char c){
  return (c>='a' && c<='z') || (c>='A' && c<='Z');
}
static int testTolower(char c){
  char ret = c;
  if( ret>='A' && ret<='Z') ret = ret - ('A'-'a');
  return ret;
}

static int testTokenizerNext(
  sqlite3_tokenizer_cursor *pCursor,  /* Cursor returned by testTokenizerOpen */
  const char **ppToken,               /* OUT: *ppToken is the token text */
  int *pnBytes,                       /* OUT: Number of bytes in token */
  int *piStartOffset,                 /* OUT: Starting offset of token */
  int *piEndOffset,                   /* OUT: Ending offset of token */
  int *piPosition                     /* OUT: Position integer of token */
){
  test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;
  int rc = SQLITE_OK;
  const char *p;
  const char *pEnd;

  p = &pCsr->aInput[pCsr->iInput];
  pEnd = &pCsr->aInput[pCsr->nInput];

  /* Skip past any white-space */
  assert( p<=pEnd );
  while( p<pEnd && testIsTokenChar(*p)==0 ) p++;

  if( p==pEnd ){
    rc = SQLITE_DONE;
  }else{
    /* Advance to the end of the token */
    const char *pToken = p;
    int nToken;
    while( p<pEnd && testIsTokenChar(*p) ) p++;
    nToken = p-pToken;

    /* Copy the token into the buffer */
    if( nToken>pCsr->nBuffer ){
      sqlite3_free(pCsr->aBuffer);
      pCsr->aBuffer = sqlite3_malloc(nToken);
    }
    if( pCsr->aBuffer==0 ){
      rc = SQLITE_NOMEM;
    }else{
      int i;

      if( pCsr->iLangid & 0x00000001 ){
        for(i=0; i<nToken; i++) pCsr->aBuffer[i] = pToken[i];
      }else{
        for(i=0; i<nToken; i++) pCsr->aBuffer[i] = testTolower(pToken[i]);
      }
      pCsr->iToken++;
      pCsr->iInput = p - pCsr->aInput;

      *ppToken = pCsr->aBuffer;
      *pnBytes = nToken;
      *piStartOffset = pToken - pCsr->aInput;
      *piEndOffset = p - pCsr->aInput;
      *piPosition = pCsr->iToken;
    }
  }

  return rc;
}

static int testTokenizerLanguage(
  sqlite3_tokenizer_cursor *pCursor,
  int iLangid
){
  int rc = SQLITE_OK;
  test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;
  pCsr->iLangid = iLangid;
  if( pCsr->iLangid>=100 ){
    rc = SQLITE_ERROR;
  }
  return rc;
}

static int fts3_test_tokenizer_cmd(
  ClientData clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  static const sqlite3_tokenizer_module testTokenizerModule = {
    1,
    testTokenizerCreate,
    testTokenizerDestroy,
    testTokenizerOpen,
    testTokenizerClose,
    testTokenizerNext,
    testTokenizerLanguage
  };
  const sqlite3_tokenizer_module *pPtr = &testTokenizerModule;
  if( objc!=1 ){
    Tcl_WrongNumArgs(interp, 1, objv, "");
    return TCL_ERROR;
  }
  Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(
    (const unsigned char *)&pPtr, sizeof(sqlite3_tokenizer_module *)
  ));
  return TCL_OK;
}

/* 
** End of tokenizer code.
**************************************************************************/ 

int Sqlitetestfts3_Init(Tcl_Interp *interp){
  Tcl_CreateObjCommand(interp, "fts3_near_match", fts3_near_match_cmd, 0, 0);
  Tcl_CreateObjCommand(interp, 
      "fts3_configure_incr_load", fts3_configure_incr_load_cmd, 0, 0
  );
  Tcl_CreateObjCommand(
      interp, "fts3_test_tokenizer", fts3_test_tokenizer_cmd, 0, 0
  );
  return TCL_OK;
}
#endif                  /* ifdef SQLITE_TEST */

Changes to ext/fts3/fts3_tokenizer.c.

284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
  Tcl_IncrRefCount(pRet);

  if( SQLITE_OK!=p->xCreate(zArg ? 1 : 0, &zArg, &pTokenizer) ){
    zErr = "error in xCreate()";
    goto finish;
  }
  pTokenizer->pModule = p;
  if( SQLITE_OK!=p->xOpen(pTokenizer, zInput, nInput, &pCsr) ){
    zErr = "error in xOpen()";
    goto finish;
  }
  pCsr->pTokenizer = pTokenizer;

  while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){
    Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos));
    Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
    zToken = &zInput[iStart];
    nToken = iEnd-iStart;
    Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));







|



<







284
285
286
287
288
289
290
291
292
293
294

295
296
297
298
299
300
301
  Tcl_IncrRefCount(pRet);

  if( SQLITE_OK!=p->xCreate(zArg ? 1 : 0, &zArg, &pTokenizer) ){
    zErr = "error in xCreate()";
    goto finish;
  }
  pTokenizer->pModule = p;
  if( sqlite3Fts3OpenTokenizer(pTokenizer, 0, zInput, nInput, &pCsr) ){
    zErr = "error in xOpen()";
    goto finish;
  }


  while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){
    Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos));
    Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
    zToken = &zInput[iStart];
    nToken = iEnd-iStart;
    Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));

Changes to ext/fts3/fts3_tokenizer.h.

48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
...
129
130
131
132
133
134
135









136
137
138
139
140
141
142
typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
typedef struct sqlite3_tokenizer sqlite3_tokenizer;
typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;

struct sqlite3_tokenizer_module {

  /*
  ** Structure version. Should always be set to 0.
  */
  int iVersion;

  /*
  ** Create a new tokenizer. The values in the argv[] array are the
  ** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL
  ** TABLE statement that created the fts3 table. For example, if
................................................................................
  int (*xNext)(
    sqlite3_tokenizer_cursor *pCursor,   /* Tokenizer cursor */
    const char **ppToken, int *pnBytes,  /* OUT: Normalized text for token */
    int *piStartOffset,  /* OUT: Byte offset of token in input buffer */
    int *piEndOffset,    /* OUT: Byte offset of end of token in input buffer */
    int *piPosition      /* OUT: Number of tokens returned before this one */
  );









};

struct sqlite3_tokenizer {
  const sqlite3_tokenizer_module *pModule;  /* The module for this tokenizer */
  /* Tokenizer implementations will typically add additional fields */
};








|







 







>
>
>
>
>
>
>
>
>







48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
...
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
typedef struct sqlite3_tokenizer sqlite3_tokenizer;
typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;

struct sqlite3_tokenizer_module {

  /*
  ** Structure version. Should always be set to 0 or 1.
  */
  int iVersion;

  /*
  ** Create a new tokenizer. The values in the argv[] array are the
  ** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL
  ** TABLE statement that created the fts3 table. For example, if
................................................................................
  int (*xNext)(
    sqlite3_tokenizer_cursor *pCursor,   /* Tokenizer cursor */
    const char **ppToken, int *pnBytes,  /* OUT: Normalized text for token */
    int *piStartOffset,  /* OUT: Byte offset of token in input buffer */
    int *piEndOffset,    /* OUT: Byte offset of end of token in input buffer */
    int *piPosition      /* OUT: Number of tokens returned before this one */
  );

  /***********************************************************************
  ** Methods below this point are only available if iVersion>=1.
  */

  /* 
  ** Configure the language id of a tokenizer cursor.
  */
  int (*xLanguageid)(sqlite3_tokenizer_cursor *pCsr, int iLangid);
};

struct sqlite3_tokenizer {
  const sqlite3_tokenizer_module *pModule;  /* The module for this tokenizer */
  /* Tokenizer implementations will typically add additional fields */
};

Changes to ext/fts3/fts3_write.c.

653
654
655
656
657
658
659

660
661
662
663
664
665
666
...
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
...
779
780
781
782
783
784
785
786





787
788
789
790
791
792
793
794
795
796
797
...
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
....
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
....
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
....
3463
3464
3465
3466
3467
3468
3469

3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
** pending-terms hash-table. The docid used is that currently stored in
** p->iPrevDocid, and the column is specified by argument iCol.
**
** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code.
*/
static int fts3PendingTermsAdd(
  Fts3Table *p,                   /* Table into which text will be inserted */

  const char *zText,              /* Text of document to be inserted */
  int iCol,                       /* Column into which text is being inserted */
  u32 *pnWord                     /* OUT: Number of tokens inserted */
){
  int rc;
  int iStart;
  int iEnd;
................................................................................
  ** zText==0. In this case, add zero token entries to the hash table and 
  ** return early. */
  if( zText==0 ){
    *pnWord = 0;
    return SQLITE_OK;
  }

  rc = pModule->xOpen(pTokenizer, zText, -1, &pCsr);
  if( rc!=SQLITE_OK ){
    return rc;
  }
  pCsr->pTokenizer = pTokenizer;

  xNext = pModule->xNext;
  while( SQLITE_OK==rc
      && SQLITE_OK==(rc = xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos))
  ){
    int i;
    if( iPos>=nWord ) nWord = iPos+1;
................................................................................
** This function is called by the xUpdate() method as part of an INSERT
** operation. It adds entries for each term in the new record to the
** pendingTerms hash table.
**
** Argument apVal is the same as the similarly named argument passed to
** fts3InsertData(). Parameter iDocid is the docid of the new row.
*/
static int fts3InsertTerms(Fts3Table *p, sqlite3_value **apVal, u32 *aSz){





  int i;                          /* Iterator variable */
  for(i=2; i<p->nColumn+2; i++){
    const char *zText = (const char *)sqlite3_value_text(apVal[i]);
    int rc = fts3PendingTermsAdd(p, zText, i-2, &aSz[i-2]);
    if( rc!=SQLITE_OK ){
      return rc;
    }
    aSz[p->nColumn] += sqlite3_value_bytes(apVal[i]);
  }
  return SQLITE_OK;
}
................................................................................
  sqlite3_stmt *pSelect;

  if( *pRC ) return;
  rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pSelect, &pRowid);
  if( rc==SQLITE_OK ){
    if( SQLITE_ROW==sqlite3_step(pSelect) ){
      int i;
      rc = fts3PendingTermsDocid(p, 
          langidFromSelect(p, pSelect), 
          sqlite3_column_int64(pSelect, 0)
      );
      for(i=1; rc==SQLITE_OK && i<=p->nColumn; i++){
        const char *zText = (const char *)sqlite3_column_text(pSelect, i);
        rc = fts3PendingTermsAdd(p, zText, -1, &aSz[i-1]);
        aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i);
      }
      if( rc!=SQLITE_OK ){
        sqlite3_reset(pSelect);
        *pRC = rc;
        return;
      }
................................................................................
        aSzIns = &aSz[p->nColumn+1];
        aSzDel = &aSzIns[p->nColumn+1];
      }
    }

    while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
      int iCol;
      rc = fts3PendingTermsDocid(p, 
          langidFromSelect(p, pStmt), sqlite3_column_int64(pStmt, 0)
      );
      aSz[p->nColumn] = 0;
      for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){
        const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1);
        rc = fts3PendingTermsAdd(p, z, iCol, &aSz[iCol]);
        aSz[p->nColumn] += sqlite3_column_bytes(pStmt, iCol+1);
      }
      if( p->bHasDocsize ){
        fts3InsertDocsize(&rc, p, aSz);
      }
      if( rc!=SQLITE_OK ){
        sqlite3_finalize(pStmt);
................................................................................
    assert( pCsr->isRequireSeek==0 );
    iDocid = sqlite3_column_int64(pCsr->pStmt, 0);
  
    for(i=0; i<p->nColumn && rc==SQLITE_OK; i++){
      const char *zText = (const char *)sqlite3_column_text(pCsr->pStmt, i+1);
      sqlite3_tokenizer_cursor *pTC = 0;
  
      rc = pModule->xOpen(pT, zText, -1, &pTC);
      while( rc==SQLITE_OK ){
        char const *zToken;       /* Buffer containing token */
        int nToken;               /* Number of bytes in token */
        int iDum1, iDum2;         /* Dummy variables */
        int iPos;                 /* Position of token in zText */
  
        pTC->pTokenizer = pT;
        rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos);
        for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){
          Fts3PhraseToken *pPT = pDef->pToken;
          if( (pDef->iCol>=p->nColumn || pDef->iCol==i)
           && (pPT->bFirst==0 || iPos==0)
           && (pPT->n==nToken || (pPT->isPrefix && pPT->n<nToken))
           && (0==memcmp(zToken, pPT->z, pPT->n))
................................................................................
    assert( sqlite3_value_type(apVal[0])==SQLITE_INTEGER );
    rc = fts3DeleteByRowid(p, apVal[0], &nChng, aSzDel);
    isRemove = 1;
  }
  
  /* If this is an INSERT or UPDATE operation, insert the new record. */
  if( nArg>1 && rc==SQLITE_OK ){

    if( bInsertDone==0 ){
      rc = fts3InsertData(p, apVal, pRowid);
      if( rc==SQLITE_CONSTRAINT && p->zContentTbl==0 ){
        rc = FTS_CORRUPT_VTAB;
      }
    }
    if( rc==SQLITE_OK && (!isRemove || *pRowid!=p->iPrevDocid ) ){
      rc = fts3PendingTermsDocid(p, 
          sqlite3_value_int(apVal[2 + p->nColumn + 2]),
          *pRowid
      );
    }
    if( rc==SQLITE_OK ){
      assert( p->iPrevDocid==*pRowid );
      rc = fts3InsertTerms(p, apVal, aSzIns);
    }
    if( p->bHasDocsize ){
      fts3InsertDocsize(&rc, p, aSzIns);
    }
    nChng++;
  }








>







 







|



<







 







|
>
>
>
>
>



|







 







<
|
|
<


|







 







|
|
<



|







 







|






<







 







>







|
<
<
<



|







653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
...
683
684
685
686
687
688
689
690
691
692
693

694
695
696
697
698
699
700
...
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
...
934
935
936
937
938
939
940

941
942

943
944
945
946
947
948
949
950
951
952
....
3101
3102
3103
3104
3105
3106
3107
3108
3109

3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
....
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238

3239
3240
3241
3242
3243
3244
3245
....
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479



3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
** pending-terms hash-table. The docid used is that currently stored in
** p->iPrevDocid, and the column is specified by argument iCol.
**
** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code.
*/
static int fts3PendingTermsAdd(
  Fts3Table *p,                   /* Table into which text will be inserted */
  int iLangid,                    /* Language id to use */
  const char *zText,              /* Text of document to be inserted */
  int iCol,                       /* Column into which text is being inserted */
  u32 *pnWord                     /* OUT: Number of tokens inserted */
){
  int rc;
  int iStart;
  int iEnd;
................................................................................
  ** zText==0. In this case, add zero token entries to the hash table and 
  ** return early. */
  if( zText==0 ){
    *pnWord = 0;
    return SQLITE_OK;
  }

  rc = sqlite3Fts3OpenTokenizer(pTokenizer, iLangid, zText, -1, &pCsr);
  if( rc!=SQLITE_OK ){
    return rc;
  }


  xNext = pModule->xNext;
  while( SQLITE_OK==rc
      && SQLITE_OK==(rc = xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos))
  ){
    int i;
    if( iPos>=nWord ) nWord = iPos+1;
................................................................................
** This function is called by the xUpdate() method as part of an INSERT
** operation. It adds entries for each term in the new record to the
** pendingTerms hash table.
**
** Argument apVal is the same as the similarly named argument passed to
** fts3InsertData(). Parameter iDocid is the docid of the new row.
*/
static int fts3InsertTerms(
  Fts3Table *p, 
  int iLangid, 
  sqlite3_value **apVal, 
  u32 *aSz
){
  int i;                          /* Iterator variable */
  for(i=2; i<p->nColumn+2; i++){
    const char *zText = (const char *)sqlite3_value_text(apVal[i]);
    int rc = fts3PendingTermsAdd(p, iLangid, zText, i-2, &aSz[i-2]);
    if( rc!=SQLITE_OK ){
      return rc;
    }
    aSz[p->nColumn] += sqlite3_value_bytes(apVal[i]);
  }
  return SQLITE_OK;
}
................................................................................
  sqlite3_stmt *pSelect;

  if( *pRC ) return;
  rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pSelect, &pRowid);
  if( rc==SQLITE_OK ){
    if( SQLITE_ROW==sqlite3_step(pSelect) ){
      int i;

      int iLangid = langidFromSelect(p, pSelect);
      rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pSelect, 0));

      for(i=1; rc==SQLITE_OK && i<=p->nColumn; i++){
        const char *zText = (const char *)sqlite3_column_text(pSelect, i);
        rc = fts3PendingTermsAdd(p, iLangid, zText, -1, &aSz[i-1]);
        aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i);
      }
      if( rc!=SQLITE_OK ){
        sqlite3_reset(pSelect);
        *pRC = rc;
        return;
      }
................................................................................
        aSzIns = &aSz[p->nColumn+1];
        aSzDel = &aSzIns[p->nColumn+1];
      }
    }

    while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
      int iCol;
      int iLangid = langidFromSelect(p, pStmt);
      rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pStmt, 0));

      aSz[p->nColumn] = 0;
      for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){
        const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1);
        rc = fts3PendingTermsAdd(p, iLangid, z, iCol, &aSz[iCol]);
        aSz[p->nColumn] += sqlite3_column_bytes(pStmt, iCol+1);
      }
      if( p->bHasDocsize ){
        fts3InsertDocsize(&rc, p, aSz);
      }
      if( rc!=SQLITE_OK ){
        sqlite3_finalize(pStmt);
................................................................................
    assert( pCsr->isRequireSeek==0 );
    iDocid = sqlite3_column_int64(pCsr->pStmt, 0);
  
    for(i=0; i<p->nColumn && rc==SQLITE_OK; i++){
      const char *zText = (const char *)sqlite3_column_text(pCsr->pStmt, i+1);
      sqlite3_tokenizer_cursor *pTC = 0;
  
      rc = sqlite3Fts3OpenTokenizer(pT, pCsr->iLangid, zText, -1, &pTC);
      while( rc==SQLITE_OK ){
        char const *zToken;       /* Buffer containing token */
        int nToken;               /* Number of bytes in token */
        int iDum1, iDum2;         /* Dummy variables */
        int iPos;                 /* Position of token in zText */
  

        rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos);
        for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){
          Fts3PhraseToken *pPT = pDef->pToken;
          if( (pDef->iCol>=p->nColumn || pDef->iCol==i)
           && (pPT->bFirst==0 || iPos==0)
           && (pPT->n==nToken || (pPT->isPrefix && pPT->n<nToken))
           && (0==memcmp(zToken, pPT->z, pPT->n))
................................................................................
    assert( sqlite3_value_type(apVal[0])==SQLITE_INTEGER );
    rc = fts3DeleteByRowid(p, apVal[0], &nChng, aSzDel);
    isRemove = 1;
  }
  
  /* If this is an INSERT or UPDATE operation, insert the new record. */
  if( nArg>1 && rc==SQLITE_OK ){
    int iLangid = sqlite3_value_int(apVal[2 + p->nColumn + 2]);
    if( bInsertDone==0 ){
      rc = fts3InsertData(p, apVal, pRowid);
      if( rc==SQLITE_CONSTRAINT && p->zContentTbl==0 ){
        rc = FTS_CORRUPT_VTAB;
      }
    }
    if( rc==SQLITE_OK && (!isRemove || *pRowid!=p->iPrevDocid ) ){
      rc = fts3PendingTermsDocid(p, iLangid, *pRowid);



    }
    if( rc==SQLITE_OK ){
      assert( p->iPrevDocid==*pRowid );
      rc = fts3InsertTerms(p, iLangid, apVal, aSzIns);
    }
    if( p->bHasDocsize ){
      fts3InsertDocsize(&rc, p, aSzIns);
    }
    nChng++;
  }

Changes to test/fts4langid.test.

36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
...
338
339
340
341
342
343
344


345






346







347


















348





#   2.2.* - Same as 2.1.*, after an 'optimize' command.
#
#   2.3.* - Same as 2.1.*, after a 'rebuild' command.
#
#   3.* - Tests with content= tables. Both where there is a real 
#         underlying content table and where there is not.
#
#
#   4.* - Test that if one is provided, the tokenizer xLanguage method
#         is called to configure the tokenizer before tokenizing query
#         or document text.
#
#   5.* - Test the fts4aux table when the associated FTS4 table contains
#         multiple languages.
#
................................................................................
do_test_query1 3.3.3 {zero one two} {
  and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
}
do_test_query1 3.3.4 {"zero one" OR "one two"} {
  or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
}


















finish_test































<







 







>
>
|
>
>
>
>
>
>
|
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
36
37
38
39
40
41
42

43
44
45
46
47
48
49
...
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
#   2.2.* - Same as 2.1.*, after an 'optimize' command.
#
#   2.3.* - Same as 2.1.*, after a 'rebuild' command.
#
#   3.* - Tests with content= tables. Both where there is a real 
#         underlying content table and where there is not.
#

#   4.* - Test that if one is provided, the tokenizer xLanguage method
#         is called to configure the tokenizer before tokenizing query
#         or document text.
#
#   5.* - Test the fts4aux table when the associated FTS4 table contains
#         multiple languages.
#
................................................................................
do_test_query1 3.3.3 {zero one two} {
  and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
}
do_test_query1 3.3.4 {"zero one" OR "one two"} {
  or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
}

#-------------------------------------------------------------------------
# Test cases 4.*
#
proc build_multilingual_db_2 {db} {
  $db eval {
    CREATE VIRTUAL TABLE t4 USING fts4(
        tokenize=testtokenizer, 
        languageid=lid
    );
  }
  for {set i 0} {$i < 50} {incr i} {
    execsql { 
      INSERT INTO t4(docid, content, lid) VALUES($i, 'The Quick Brown Fox', $i) 
    }
  }
}

do_test 4.1.0 {
  reset_db
  set ptr [fts3_test_tokenizer]
  execsql { SELECT fts3_tokenizer('testtokenizer', $ptr) }
  build_multilingual_db_2 db
} {}
do_execsql_test 4.1.1 {
  SELECT docid FROM t4 WHERE t4 MATCH 'quick';
} {0}
do_execsql_test 4.1.2 {
  SELECT docid FROM t4 WHERE t4 MATCH 'quick' AND lid=1;
} {}
do_execsql_test 4.1.3 {
  SELECT docid FROM t4 WHERE t4 MATCH 'Quick' AND lid=1;
} {1}
for {set i 0} {$i < 50} {incr i} {
  do_execsql_test 4.1.4.$i {
    SELECT count(*) FROM t4 WHERE t4 MATCH 'fox' AND lid=$i;
  } [expr 0==($i%2)]
}
do_catchsql_test 4.1.5 {
  INSERT INTO t4(content, lid) VALUES('hello world', 101)
} {1 {SQL logic error or missing database}}

finish_test

Changes to test/permutations.test.

180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
  fts3defer.test fts3defer2.test fts3e.test fts3expr.test fts3expr2.test 
  fts3near.test fts3query.test fts3shared.test fts3snippet.test 
  fts3sort.test
  fts3fault.test fts3malloc.test fts3matchinfo.test
  fts3aux1.test fts3comp1.test fts3auto.test
  fts4aa.test fts4content.test
  fts3conf.test fts3prefix.test fts3fault2.test fts3corrupt.test
  fts3corrupt2.test
  fts3first.test
}


lappend ::testsuitelist xxx
#-------------------------------------------------------------------------
# Define the coverage related test suites:
#







|
<







180
181
182
183
184
185
186
187

188
189
190
191
192
193
194
  fts3defer.test fts3defer2.test fts3e.test fts3expr.test fts3expr2.test 
  fts3near.test fts3query.test fts3shared.test fts3snippet.test 
  fts3sort.test
  fts3fault.test fts3malloc.test fts3matchinfo.test
  fts3aux1.test fts3comp1.test fts3auto.test
  fts4aa.test fts4content.test
  fts3conf.test fts3prefix.test fts3fault2.test fts3corrupt.test
  fts3corrupt2.test fts3first.test fts4langid.test

}


lappend ::testsuitelist xxx
#-------------------------------------------------------------------------
# Define the coverage related test suites:
#