/ Check-in [11fa9808]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix a problem handling OOM conditions within fts5 queries that feature synonyms.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts5-incompatible
Files: files | file ages | folders
SHA1: 11fa980897c6c7be218bbd9c4cd8253272d2c300
User & Date: dan 2015-09-02 14:17:38
Context
2015-09-02
17:34
Fix a problem with fts5 synonyms and phrase queries. Also fix an OOM handling bug in fts5. check-in: a4c35fa2 user: dan tags: fts5-incompatible
14:17
Fix a problem handling OOM conditions within fts5 queries that feature synonyms. check-in: 11fa9808 user: dan tags: fts5-incompatible
08:22
Fix a problem with fts5 synonyms and the xQueryPhrase() auxiliary function API. check-in: cf3e45e7 user: dan tags: fts5-incompatible
Changes
Hide Diffs Unified Diffs Show Whitespace Changes Patch

Changes to ext/fts5/fts5_expr.c.

1463
1464
1465
1466
1467
1468
1469

1470
1471
1472
1473
1474
1475
1476
....
1481
1482
1483
1484
1485
1486
1487



1488
1489
1490
1491
1492
1493


1494
1495
1496
1497
1498

1499
1500
1501
1502
1503
1504
1505
1506
1507
1508


1509
1510
1511
1512
1513


1514
1515
1516
1517



1518
1519
1520
1521
1522
1523
1524
....
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
....
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
  }
  return pRet;
}

typedef struct TokenCtx TokenCtx;
struct TokenCtx {
  Fts5ExprPhrase *pPhrase;

};

/*
** Callback for tokenizing terms used by ParseTerm().
*/
static int fts5ParseTokenize(
  void *pContext,                 /* Pointer to Fts5InsertCtx object */
................................................................................
  int iUnused2                    /* End offset of token */
){
  int rc = SQLITE_OK;
  const int SZALLOC = 8;
  TokenCtx *pCtx = (TokenCtx*)pContext;
  Fts5ExprPhrase *pPhrase = pCtx->pPhrase;




  assert( pPhrase==0 || pPhrase->nTerm>0 );
  if( pPhrase && (tflags & FTS5_TOKEN_COLOCATED) ){
    Fts5ExprTerm *pSyn;
    int nByte = sizeof(Fts5ExprTerm) + nToken+1;
    pSyn = (Fts5ExprTerm*)sqlite3_malloc(nByte);
    if( pSyn==0 ) return SQLITE_NOMEM;


    memset(pSyn, 0, nByte);
    pSyn->zTerm = (char*)&pSyn[1];
    memcpy(pSyn->zTerm, pToken, nToken);
    pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym;
    pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn;

  }else{
    Fts5ExprTerm *pTerm;
    if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){
      Fts5ExprPhrase *pNew;
      int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0);

      pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase, 
          sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew
      );
      if( pNew==0 ) return SQLITE_NOMEM;


      if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase));
      pCtx->pPhrase = pPhrase = pNew;
      pNew->nTerm = nNew - SZALLOC;
    }



    pTerm = &pPhrase->aTerm[pPhrase->nTerm++];
    memset(pTerm, 0, sizeof(Fts5ExprTerm));
    pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken);
  }



  return rc;
}


/*
** Free the phrase object passed as the only argument.
*/
................................................................................
    int flags = FTS5_TOKENIZE_QUERY | (bPrefix ? FTS5_TOKENIZE_QUERY : 0);
    int n;
    sqlite3Fts5Dequote(z);
    n = strlen(z);
    rc = sqlite3Fts5Tokenize(pConfig, flags, z, n, &sCtx, fts5ParseTokenize);
  }
  sqlite3_free(z);
  if( rc ){
    pParse->rc = rc;
    fts5ExprPhraseFree(sCtx.pPhrase);
    sCtx.pPhrase = 0;
  }else if( sCtx.pPhrase ){

    if( pAppend==0 ){
      if( (pParse->nPhrase % 8)==0 ){
................................................................................
  int i;                          /* Used to iterate through phrase terms */

  Fts5Expr *pNew = 0;             /* Expression to return via *ppNew */
  Fts5ExprPhrase **apPhrase;      /* pNew->apPhrase */
  Fts5ExprNode *pNode;            /* pNew->pRoot */
  Fts5ExprNearset *pNear;         /* pNew->pRoot->pNear */

  TokenCtx sCtx = {0};            /* Context object for fts5ParseTokenize */


  pOrig = pExpr->apExprPhrase[iPhrase];

  pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr));
  if( rc==SQLITE_OK ){
    pNew->apExprPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc, 







>







 







>
>
>





|
>
>





>









|
>
>




|
>
>




>
>
>







 







|







 







|







1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
....
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
....
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
....
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
  }
  return pRet;
}

typedef struct TokenCtx TokenCtx;
struct TokenCtx {
  Fts5ExprPhrase *pPhrase;
  int rc;
};

/*
** Callback for tokenizing terms used by ParseTerm().
*/
static int fts5ParseTokenize(
  void *pContext,                 /* Pointer to Fts5InsertCtx object */
................................................................................
  int iUnused2                    /* End offset of token */
){
  int rc = SQLITE_OK;
  const int SZALLOC = 8;
  TokenCtx *pCtx = (TokenCtx*)pContext;
  Fts5ExprPhrase *pPhrase = pCtx->pPhrase;

  /* If an error has already occurred, this is a no-op */
  if( pCtx->rc!=SQLITE_OK ) return pCtx->rc;

  assert( pPhrase==0 || pPhrase->nTerm>0 );
  if( pPhrase && (tflags & FTS5_TOKEN_COLOCATED) ){
    Fts5ExprTerm *pSyn;
    int nByte = sizeof(Fts5ExprTerm) + nToken+1;
    pSyn = (Fts5ExprTerm*)sqlite3_malloc(nByte);
    if( pSyn==0 ){
      rc = SQLITE_NOMEM;
    }else{
      memset(pSyn, 0, nByte);
      pSyn->zTerm = (char*)&pSyn[1];
      memcpy(pSyn->zTerm, pToken, nToken);
      pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym;
      pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn;
    }
  }else{
    Fts5ExprTerm *pTerm;
    if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){
      Fts5ExprPhrase *pNew;
      int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0);

      pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase, 
          sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew
      );
      if( pNew==0 ){
        rc = SQLITE_NOMEM;
      }else{
        if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase));
        pCtx->pPhrase = pPhrase = pNew;
        pNew->nTerm = nNew - SZALLOC;
      }
    }

    if( rc==SQLITE_OK ){
      pTerm = &pPhrase->aTerm[pPhrase->nTerm++];
      memset(pTerm, 0, sizeof(Fts5ExprTerm));
      pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken);
    }
  }

  pCtx->rc = rc;
  return rc;
}


/*
** Free the phrase object passed as the only argument.
*/
................................................................................
    int flags = FTS5_TOKENIZE_QUERY | (bPrefix ? FTS5_TOKENIZE_QUERY : 0);
    int n;
    sqlite3Fts5Dequote(z);
    n = strlen(z);
    rc = sqlite3Fts5Tokenize(pConfig, flags, z, n, &sCtx, fts5ParseTokenize);
  }
  sqlite3_free(z);
  if( rc || (rc = sCtx.rc) ){
    pParse->rc = rc;
    fts5ExprPhraseFree(sCtx.pPhrase);
    sCtx.pPhrase = 0;
  }else if( sCtx.pPhrase ){

    if( pAppend==0 ){
      if( (pParse->nPhrase % 8)==0 ){
................................................................................
  int i;                          /* Used to iterate through phrase terms */

  Fts5Expr *pNew = 0;             /* Expression to return via *ppNew */
  Fts5ExprPhrase **apPhrase;      /* pNew->apPhrase */
  Fts5ExprNode *pNode;            /* pNew->pRoot */
  Fts5ExprNearset *pNear;         /* pNew->pRoot->pNear */

  TokenCtx sCtx = {0,0};          /* Context object for fts5ParseTokenize */


  pOrig = pExpr->apExprPhrase[iPhrase];

  pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr));
  if( rc==SQLITE_OK ){
    pNew->apExprPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc, 

Changes to ext/fts5/fts5_test_mi.c.

348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365



366
367
368
369


370

371
372
373
374
375
376
377
  Fts5Context *pFts,              /* First arg to pass to pApi functions */
  sqlite3_context *pCtx,          /* Context for returning result/error */
  int nVal,                       /* Number of values in apVal[] array */
  sqlite3_value **apVal           /* Array of trailing arguments */
){
  const char *zArg;
  Fts5MatchinfoCtx *p;
  int rc;

  if( nVal>0 ){
    zArg = (const char*)sqlite3_value_text(apVal[0]);
  }else{
    zArg = "pcx";
  }

  p = (Fts5MatchinfoCtx*)pApi->xGetAuxdata(pFts, 0);
  if( p==0 || sqlite3_stricmp(zArg, p->zArg) ){
    p = fts5MatchinfoNew(pApi, pFts, pCtx, zArg);



    pApi->xSetAuxdata(pFts, p, sqlite3_free);
    if( p==0 ) return;
  }



  rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoLocalCb);

  if( rc!=SQLITE_OK ){
    sqlite3_result_error_code(pCtx, rc);
  }else{
    /* No errors has occured, so return a copy of the array of integers. */
    int nByte = p->nRet * sizeof(u32);
    sqlite3_result_blob(pCtx, (void*)p->aRet, nByte, SQLITE_TRANSIENT);
  }







|










>
>
>
|
<
|
|
>
>
|
>







348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369

370
371
372
373
374
375
376
377
378
379
380
381
382
  Fts5Context *pFts,              /* First arg to pass to pApi functions */
  sqlite3_context *pCtx,          /* Context for returning result/error */
  int nVal,                       /* Number of values in apVal[] array */
  sqlite3_value **apVal           /* Array of trailing arguments */
){
  const char *zArg;
  Fts5MatchinfoCtx *p;
  int rc = SQLITE_OK;

  if( nVal>0 ){
    zArg = (const char*)sqlite3_value_text(apVal[0]);
  }else{
    zArg = "pcx";
  }

  p = (Fts5MatchinfoCtx*)pApi->xGetAuxdata(pFts, 0);
  if( p==0 || sqlite3_stricmp(zArg, p->zArg) ){
    p = fts5MatchinfoNew(pApi, pFts, pCtx, zArg);
    if( p==0 ){
      rc = SQLITE_NOMEM;
    }else{
      rc = pApi->xSetAuxdata(pFts, p, sqlite3_free);

    }
  }

  if( rc==SQLITE_OK ){
    rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoLocalCb);
  }
  if( rc!=SQLITE_OK ){
    sqlite3_result_error_code(pCtx, rc);
  }else{
    /* No errors has occured, so return a copy of the array of integers. */
    int nByte = p->nRet * sizeof(u32);
    sqlite3_result_blob(pCtx, (void*)p->aRet, nByte, SQLITE_TRANSIENT);
  }

Changes to ext/fts5/test/fts5_common.tcl.

290
291
292
293
294
295
296
297

































proc OR {args} {
  sort_poslist [concat {*}$args]
}
proc NOT {a b} {
  if {[llength $b]>0} { return [list] }
  return $a
}










































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
proc OR {args} {
  sort_poslist [concat {*}$args]
}
proc NOT {a b} {
  if {[llength $b]>0} { return [list] }
  return $a
}

#-------------------------------------------------------------------------
# This command is similar to [split], except that it also provides the
# start and end offsets of each token. For example:
#
#   [fts5_tokenize_split "abc d ef"] -> {abc 0 3 d 4 5 ef 6 8}
#

proc gobble_whitespace {textvar} {
  upvar $textvar t
  regexp {([ ]*)(.*)} $t -> space t
  return [string length $space]
}

proc gobble_text {textvar wordvar} {
  upvar $textvar t
  upvar $wordvar w
  regexp {([^ ]*)(.*)} $t -> w t
  return [string length $w]
}

proc fts5_tokenize_split {text} {
  set token ""
  set ret [list]
  set iOff [gobble_whitespace text]
  while {[set nToken [gobble_text text word]]} {
    lappend ret $word $iOff [expr $iOff+$nToken]
    incr iOff $nToken
    incr iOff [gobble_whitespace text]
  }

  set ret
}

Changes to ext/fts5/test/fts5fault6.test.

143
144
145
146
147
148
149
150


































































151
152
} -body {
  db eval { 
    CREATE VIRTUAL TABLE yu USING fts5(x, tokenize="unicode61 separators abc");
  }
} -test {
  faultsim_test_result {0 {}}
}



































































finish_test









>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
} -body {
  db eval { 
    CREATE VIRTUAL TABLE yu USING fts5(x, tokenize="unicode61 separators abc");
  }
} -test {
  faultsim_test_result {0 {}}
}

#-------------------------------------------------------------------------
# OOM while running a query that includes synonyms and matchinfo().
#
proc mit {blob} {
  set scan(littleEndian) i*
  set scan(bigEndian) I*
  binary scan $blob $scan($::tcl_platform(byteOrder)) r
  return $r
}
proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
    if {$tflags=="query" && [string length $w]==1} {
      for {set i 2} {$i < 7} {incr i} {
        sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd
      }
    }
  }
}
proc tcl_create {args} { return "tcl_tokenize" }
reset_db
sqlite3_fts5_create_tokenizer db tcl tcl_create
db func mit mit
sqlite3_fts5_register_matchinfo db
do_test 5.0 {
  execsql { CREATE VIRTUAL TABLE t1 USING fts5(a, tokenize=tcl) }
  foreach {rowid text} {
    1 {aaaa cc b aaaaa cc aa} 
    2 {aa aa bb a bbb}
    3 {bb aaaaa aaaaa b aaaa aaaaa}
    4 {aa a b aaaa aa}
    5 {aa b ccc aaaaa cc}
    6 {aa aaaaa bbbb cc aaa}
    7 {aaaaa aa aa ccccc bb}
    8 {ccc bbbbb ccccc bbb c}
    9 {cccccc bbbb a aaa cccc c}
  } {
    execsql { INSERT INTO t1(rowid, a) VALUES($rowid, $text) }
  }
} {}

set res [list {*}{
  1 {3 24 8 2 12 6}
  5 {2 24 8 2 12 6}
  6 {3 24 8 1 12 6}
  7 {3 24 8 1 12 6}
  9 {2 24 8 3 12 6}
}]
do_execsql_test 5.1 {
  SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH 'a AND c'
} $res

faultsim_save_and_close
do_faultsim_test 5.2 -faults oom* -prep {
  faultsim_restore_and_reopen
  sqlite3_fts5_create_tokenizer db tcl tcl_create
  sqlite3_fts5_register_matchinfo db
  db func mit mit
} -body {
  db eval { 
    SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH 'a AND c'
  }
} -test {
  faultsim_test_result [list 0 $::res]
}

finish_test

Changes to ext/fts5/test/fts5synonym.test.

36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
..
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
...
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
...
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
...
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
...
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
  foreach s $S {
    set o [list]
    foreach x $S {if {$x!=$s} {lappend o $x}}
    set ::syn($s) $o
  }
}

proc gobble_whitespace {textvar} {
  upvar $textvar t
  regexp {([ ]*)(.*)} $t -> space t
  return [string length $space]
}

proc gobble_text {textvar wordvar} {
  upvar $textvar t
  upvar $wordvar w
  regexp {([^ ]*)(.*)} $t -> w t
  return [string length $w]
}

proc do_tokenize_split {text} {
  set token ""
  set ret [list]
  set iOff [gobble_whitespace text]
  while {[set nToken [gobble_text text word]]} {
    lappend ret $word $iOff [expr $iOff+$nToken]
    incr iOff $nToken
    incr iOff [gobble_whitespace text]
  }

  set ret
}

proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [do_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
  }
}

proc tcl_create {args} {
  return "tcl_tokenize"
}
................................................................................

#-------------------------------------------------------------------------
# Test a tokenizer that supports synonyms by adding extra entries to the
# FTS index.
#

proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [do_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
    if {$tflags=="document" && [info exists ::syn($w)]} {
      foreach s $::syn($w) {
        sqlite3_fts5_token -colo $s $iStart $iEnd
      }
    }
  }
................................................................................
#   3.2.*: A tokenizer that reports two identical tokens at the same position.
#          This is allowed.
#
reset_db
sqlite3_fts5_create_tokenizer db tcl tcl_create
proc tcl_tokenize {tflags text} {
  set bColo 1
  foreach {w iStart iEnd} [do_tokenize_split $text] {
    if {$bColo} {
      sqlite3_fts5_token -colo $w $iStart $iEnd
      set bColo 0
    } {
      sqlite3_fts5_token $w $iStart $iEnd
    }
  }
................................................................................
}

do_execsql_test 3.1.1 {
  INSERT INTO ft(ft) VALUES('integrity-check');
} {}

proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [do_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
  }
}

do_execsql_test 3.1.2 {
  SELECT rowid FROM ft WHERE ft MATCH 'one two three'
} {1}

reset_db
sqlite3_fts5_create_tokenizer db tcl tcl_create
proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [do_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
    sqlite3_fts5_token -colo $w $iStart $iEnd
  }
}
do_execsql_test 3.2.0 {
  CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl);
  INSERT INTO ft VALUES('one one two three');
................................................................................

#-------------------------------------------------------------------------
# Check that expressions with synonyms can be parsed and executed.
#
reset_db
sqlite3_fts5_create_tokenizer db tcl tcl_create
proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [do_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
    if {$tflags=="query" && [info exists ::syn($w)]} {
      foreach s $::syn($w) {
        sqlite3_fts5_token -colo $s $iStart $iEnd
      }
    }
  }
................................................................................

#-------------------------------------------------------------------------
# Test terms with more than 4 synonyms.
#
reset_db
sqlite3_fts5_create_tokenizer db tcl tcl_create
proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [do_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
    if {$tflags=="query" && [string length $w]==1} {
      for {set i 2} {$i<=10} {incr i} {
        sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd
      }
    }
  }







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<

|







 







|







 







|







 







|











|







 







|







 







|







36
37
38
39
40
41
42


























43
44
45
46
47
48
49
50
51
..
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
...
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
...
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
...
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
...
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
  foreach s $S {
    set o [list]
    foreach x $S {if {$x!=$s} {lappend o $x}}
    set ::syn($s) $o
  }
}



























proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
  }
}

proc tcl_create {args} {
  return "tcl_tokenize"
}
................................................................................

#-------------------------------------------------------------------------
# Test a tokenizer that supports synonyms by adding extra entries to the
# FTS index.
#

proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
    if {$tflags=="document" && [info exists ::syn($w)]} {
      foreach s $::syn($w) {
        sqlite3_fts5_token -colo $s $iStart $iEnd
      }
    }
  }
................................................................................
#   3.2.*: A tokenizer that reports two identical tokens at the same position.
#          This is allowed.
#
reset_db
sqlite3_fts5_create_tokenizer db tcl tcl_create
proc tcl_tokenize {tflags text} {
  set bColo 1
  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    if {$bColo} {
      sqlite3_fts5_token -colo $w $iStart $iEnd
      set bColo 0
    } {
      sqlite3_fts5_token $w $iStart $iEnd
    }
  }
................................................................................
}

do_execsql_test 3.1.1 {
  INSERT INTO ft(ft) VALUES('integrity-check');
} {}

proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
  }
}

do_execsql_test 3.1.2 {
  SELECT rowid FROM ft WHERE ft MATCH 'one two three'
} {1}

reset_db
sqlite3_fts5_create_tokenizer db tcl tcl_create
proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
    sqlite3_fts5_token -colo $w $iStart $iEnd
  }
}
do_execsql_test 3.2.0 {
  CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl);
  INSERT INTO ft VALUES('one one two three');
................................................................................

#-------------------------------------------------------------------------
# Check that expressions with synonyms can be parsed and executed.
#
reset_db
sqlite3_fts5_create_tokenizer db tcl tcl_create
proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
    if {$tflags=="query" && [info exists ::syn($w)]} {
      foreach s $::syn($w) {
        sqlite3_fts5_token -colo $s $iStart $iEnd
      }
    }
  }
................................................................................

#-------------------------------------------------------------------------
# Test terms with more than 4 synonyms.
#
reset_db
sqlite3_fts5_create_tokenizer db tcl tcl_create
proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
    if {$tflags=="query" && [string length $w]==1} {
      for {set i 2} {$i<=10} {incr i} {
        sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd
      }
    }
  }