SQLite

Check-in [11fa980897]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix a problem handling OOM conditions within fts5 queries that feature synonyms.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | fts5-incompatible
Files: files | file ages | folders
SHA1: 11fa980897c6c7be218bbd9c4cd8253272d2c300
User & Date: dan 2015-09-02 14:17:38.670
Context
2015-09-02
17:34
Fix a problem with fts5 synonyms and phrase queries. Also fix an OOM handling bug in fts5. (check-in: a4c35fa2c9 user: dan tags: fts5-incompatible)
14:17
Fix a problem handling OOM conditions within fts5 queries that feature synonyms. (check-in: 11fa980897 user: dan tags: fts5-incompatible)
08:22
Fix a problem with fts5 synonyms and the xQueryPhrase() auxiliary function API. (check-in: cf3e45e76d user: dan tags: fts5-incompatible)
Changes
Side-by-Side Diff Ignore Whitespace Patch
Changes to ext/fts5/fts5_expr.c.
1463
1464
1465
1466
1467
1468
1469

1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487



1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498









1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517

















1518
1519
1520
1521
1522
1523
1524
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496






1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514










1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538







+


















+
+
+





-
-
-
-
-
-
+
+
+
+
+
+
+
+
+









-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







  }
  return pRet;
}

typedef struct TokenCtx TokenCtx;
struct TokenCtx {
  Fts5ExprPhrase *pPhrase;
  int rc;
};

/*
** Callback for tokenizing terms used by ParseTerm().
*/
static int fts5ParseTokenize(
  void *pContext,                 /* Pointer to Fts5InsertCtx object */
  int tflags,                     /* Mask of FTS5_TOKEN_* flags */
  const char *pToken,             /* Buffer containing token */
  int nToken,                     /* Size of token in bytes */
  int iUnused1,                   /* Start offset of token */
  int iUnused2                    /* End offset of token */
){
  int rc = SQLITE_OK;
  const int SZALLOC = 8;
  TokenCtx *pCtx = (TokenCtx*)pContext;
  Fts5ExprPhrase *pPhrase = pCtx->pPhrase;

  /* If an error has already occurred, this is a no-op */
  if( pCtx->rc!=SQLITE_OK ) return pCtx->rc;

  assert( pPhrase==0 || pPhrase->nTerm>0 );
  if( pPhrase && (tflags & FTS5_TOKEN_COLOCATED) ){
    Fts5ExprTerm *pSyn;
    int nByte = sizeof(Fts5ExprTerm) + nToken+1;
    pSyn = (Fts5ExprTerm*)sqlite3_malloc(nByte);
    if( pSyn==0 ) return SQLITE_NOMEM;
    memset(pSyn, 0, nByte);
    pSyn->zTerm = (char*)&pSyn[1];
    memcpy(pSyn->zTerm, pToken, nToken);
    pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym;
    pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn;
    if( pSyn==0 ){
      rc = SQLITE_NOMEM;
    }else{
      memset(pSyn, 0, nByte);
      pSyn->zTerm = (char*)&pSyn[1];
      memcpy(pSyn->zTerm, pToken, nToken);
      pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym;
      pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn;
    }
  }else{
    Fts5ExprTerm *pTerm;
    if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){
      Fts5ExprPhrase *pNew;
      int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0);

      pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase, 
          sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew
      );
      if( pNew==0 ) return SQLITE_NOMEM;
      if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase));
      pCtx->pPhrase = pPhrase = pNew;
      pNew->nTerm = nNew - SZALLOC;
    }

    pTerm = &pPhrase->aTerm[pPhrase->nTerm++];
    memset(pTerm, 0, sizeof(Fts5ExprTerm));
    pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken);
  }
      if( pNew==0 ){
        rc = SQLITE_NOMEM;
      }else{
        if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase));
        pCtx->pPhrase = pPhrase = pNew;
        pNew->nTerm = nNew - SZALLOC;
      }
    }

    if( rc==SQLITE_OK ){
      pTerm = &pPhrase->aTerm[pPhrase->nTerm++];
      memset(pTerm, 0, sizeof(Fts5ExprTerm));
      pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken);
    }
  }

  pCtx->rc = rc;
  return rc;
}


/*
** Free the phrase object passed as the only argument.
*/
1569
1570
1571
1572
1573
1574
1575
1576

1577
1578
1579
1580
1581
1582
1583
1583
1584
1585
1586
1587
1588
1589

1590
1591
1592
1593
1594
1595
1596
1597







-
+







    int flags = FTS5_TOKENIZE_QUERY | (bPrefix ? FTS5_TOKENIZE_QUERY : 0);
    int n;
    sqlite3Fts5Dequote(z);
    n = strlen(z);
    rc = sqlite3Fts5Tokenize(pConfig, flags, z, n, &sCtx, fts5ParseTokenize);
  }
  sqlite3_free(z);
  if( rc ){
  if( rc || (rc = sCtx.rc) ){
    pParse->rc = rc;
    fts5ExprPhraseFree(sCtx.pPhrase);
    sCtx.pPhrase = 0;
  }else if( sCtx.pPhrase ){

    if( pAppend==0 ){
      if( (pParse->nPhrase % 8)==0 ){
1618
1619
1620
1621
1622
1623
1624
1625

1626
1627
1628
1629
1630
1631
1632
1632
1633
1634
1635
1636
1637
1638

1639
1640
1641
1642
1643
1644
1645
1646







-
+







  int i;                          /* Used to iterate through phrase terms */

  Fts5Expr *pNew = 0;             /* Expression to return via *ppNew */
  Fts5ExprPhrase **apPhrase;      /* pNew->apPhrase */
  Fts5ExprNode *pNode;            /* pNew->pRoot */
  Fts5ExprNearset *pNear;         /* pNew->pRoot->pNear */

  TokenCtx sCtx = {0};            /* Context object for fts5ParseTokenize */
  TokenCtx sCtx = {0,0};          /* Context object for fts5ParseTokenize */


  pOrig = pExpr->apExprPhrase[iPhrase];

  pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr));
  if( rc==SQLITE_OK ){
    pNew->apExprPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc, 
Changes to ext/fts5/fts5_test_mi.c.
348
349
350
351
352
353
354
355

356
357
358
359
360
361
362
363
364
365



366

367
368
369
370






371
372
373
374
375
376
377
348
349
350
351
352
353
354

355
356
357
358
359
360
361
362
363
364
365
366
367
368

369




370
371
372
373
374
375
376
377
378
379
380
381
382







-
+










+
+
+
-
+
-
-
-
-
+
+
+
+
+
+







  Fts5Context *pFts,              /* First arg to pass to pApi functions */
  sqlite3_context *pCtx,          /* Context for returning result/error */
  int nVal,                       /* Number of values in apVal[] array */
  sqlite3_value **apVal           /* Array of trailing arguments */
){
  const char *zArg;
  Fts5MatchinfoCtx *p;
  int rc;
  int rc = SQLITE_OK;

  if( nVal>0 ){
    zArg = (const char*)sqlite3_value_text(apVal[0]);
  }else{
    zArg = "pcx";
  }

  p = (Fts5MatchinfoCtx*)pApi->xGetAuxdata(pFts, 0);
  if( p==0 || sqlite3_stricmp(zArg, p->zArg) ){
    p = fts5MatchinfoNew(pApi, pFts, pCtx, zArg);
    if( p==0 ){
      rc = SQLITE_NOMEM;
    }else{
    pApi->xSetAuxdata(pFts, p, sqlite3_free);
      rc = pApi->xSetAuxdata(pFts, p, sqlite3_free);
    if( p==0 ) return;
  }

  rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoLocalCb);
    }
  }

  if( rc==SQLITE_OK ){
    rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoLocalCb);
  }
  if( rc!=SQLITE_OK ){
    sqlite3_result_error_code(pCtx, rc);
  }else{
    /* No errors has occured, so return a copy of the array of integers. */
    int nByte = p->nRet * sizeof(u32);
    sqlite3_result_blob(pCtx, (void*)p->aRet, nByte, SQLITE_TRANSIENT);
  }
Changes to ext/fts5/test/fts5_common.tcl.
290
291
292
293
294
295
296

































297
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

proc OR {args} {
  sort_poslist [concat {*}$args]
}
proc NOT {a b} {
  if {[llength $b]>0} { return [list] }
  return $a
}

#-------------------------------------------------------------------------
# This command is similar to [split], except that it also provides the
# start and end offsets of each token. For example:
#
#   [fts5_tokenize_split "abc d ef"] -> {abc 0 3 d 4 5 ef 6 8}
#

proc gobble_whitespace {textvar} {
  upvar $textvar t
  regexp {([ ]*)(.*)} $t -> space t
  return [string length $space]
}

proc gobble_text {textvar wordvar} {
  upvar $textvar t
  upvar $wordvar w
  regexp {([^ ]*)(.*)} $t -> w t
  return [string length $w]
}

proc fts5_tokenize_split {text} {
  set token ""
  set ret [list]
  set iOff [gobble_whitespace text]
  while {[set nToken [gobble_text text word]]} {
    lappend ret $word $iOff [expr $iOff+$nToken]
    incr iOff $nToken
    incr iOff [gobble_whitespace text]
  }

  set ret
}

Changes to ext/fts5/test/fts5fault6.test.
143
144
145
146
147
148
149


































































150
151
152
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+



} -body {
  db eval { 
    CREATE VIRTUAL TABLE yu USING fts5(x, tokenize="unicode61 separators abc");
  }
} -test {
  faultsim_test_result {0 {}}
}

#-------------------------------------------------------------------------
# OOM while running a query that includes synonyms and matchinfo().
#
proc mit {blob} {
  set scan(littleEndian) i*
  set scan(bigEndian) I*
  binary scan $blob $scan($::tcl_platform(byteOrder)) r
  return $r
}
proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
    if {$tflags=="query" && [string length $w]==1} {
      for {set i 2} {$i < 7} {incr i} {
        sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd
      }
    }
  }
}
proc tcl_create {args} { return "tcl_tokenize" }
reset_db
sqlite3_fts5_create_tokenizer db tcl tcl_create
db func mit mit
sqlite3_fts5_register_matchinfo db
do_test 5.0 {
  execsql { CREATE VIRTUAL TABLE t1 USING fts5(a, tokenize=tcl) }
  foreach {rowid text} {
    1 {aaaa cc b aaaaa cc aa} 
    2 {aa aa bb a bbb}
    3 {bb aaaaa aaaaa b aaaa aaaaa}
    4 {aa a b aaaa aa}
    5 {aa b ccc aaaaa cc}
    6 {aa aaaaa bbbb cc aaa}
    7 {aaaaa aa aa ccccc bb}
    8 {ccc bbbbb ccccc bbb c}
    9 {cccccc bbbb a aaa cccc c}
  } {
    execsql { INSERT INTO t1(rowid, a) VALUES($rowid, $text) }
  }
} {}

set res [list {*}{
  1 {3 24 8 2 12 6}
  5 {2 24 8 2 12 6}
  6 {3 24 8 1 12 6}
  7 {3 24 8 1 12 6}
  9 {2 24 8 3 12 6}
}]
do_execsql_test 5.1 {
  SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH 'a AND c'
} $res

faultsim_save_and_close
do_faultsim_test 5.2 -faults oom* -prep {
  faultsim_restore_and_reopen
  sqlite3_fts5_create_tokenizer db tcl tcl_create
  sqlite3_fts5_register_matchinfo db
  db func mit mit
} -body {
  db eval { 
    SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH 'a AND c'
  }
} -test {
  faultsim_test_result [list 0 $::res]
}

finish_test

Changes to ext/fts5/test/fts5synonym.test.
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70

71
72
73
74
75
76
77
36
37
38
39
40
41
42


























43

44
45
46
47
48
49
50
51







-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-

-
+







  foreach s $S {
    set o [list]
    foreach x $S {if {$x!=$s} {lappend o $x}}
    set ::syn($s) $o
  }
}

proc gobble_whitespace {textvar} {
  upvar $textvar t
  regexp {([ ]*)(.*)} $t -> space t
  return [string length $space]
}

proc gobble_text {textvar wordvar} {
  upvar $textvar t
  upvar $wordvar w
  regexp {([^ ]*)(.*)} $t -> w t
  return [string length $w]
}

proc do_tokenize_split {text} {
  set token ""
  set ret [list]
  set iOff [gobble_whitespace text]
  while {[set nToken [gobble_text text word]]} {
    lappend ret $word $iOff [expr $iOff+$nToken]
    incr iOff $nToken
    incr iOff [gobble_whitespace text]
  }

  set ret
}

proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [do_tokenize_split $text] {
  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
  }
}

proc tcl_create {args} {
  return "tcl_tokenize"
}
91
92
93
94
95
96
97
98

99
100
101
102
103
104
105
65
66
67
68
69
70
71

72
73
74
75
76
77
78
79







-
+








#-------------------------------------------------------------------------
# Test a tokenizer that supports synonyms by adding extra entries to the
# FTS index.
#

proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [do_tokenize_split $text] {
  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
    if {$tflags=="document" && [info exists ::syn($w)]} {
      foreach s $::syn($w) {
        sqlite3_fts5_token -colo $s $iStart $iEnd
      }
    }
  }
133
134
135
136
137
138
139
140

141
142
143
144
145
146
147
107
108
109
110
111
112
113

114
115
116
117
118
119
120
121







-
+







#   3.2.*: A tokenizer that reports two identical tokens at the same position.
#          This is allowed.
#
reset_db
sqlite3_fts5_create_tokenizer db tcl tcl_create
proc tcl_tokenize {tflags text} {
  set bColo 1
  foreach {w iStart iEnd} [do_tokenize_split $text] {
  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    if {$bColo} {
      sqlite3_fts5_token -colo $w $iStart $iEnd
      set bColo 0
    } {
      sqlite3_fts5_token $w $iStart $iEnd
    }
  }
156
157
158
159
160
161
162
163

164
165
166
167
168
169
170
171
172
173
174
175

176
177
178
179
180
181
182
130
131
132
133
134
135
136

137
138
139
140
141
142
143
144
145
146
147
148

149
150
151
152
153
154
155
156







-
+











-
+







}

do_execsql_test 3.1.1 {
  INSERT INTO ft(ft) VALUES('integrity-check');
} {}

proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [do_tokenize_split $text] {
  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
  }
}

do_execsql_test 3.1.2 {
  SELECT rowid FROM ft WHERE ft MATCH 'one two three'
} {1}

reset_db
sqlite3_fts5_create_tokenizer db tcl tcl_create
proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [do_tokenize_split $text] {
  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
    sqlite3_fts5_token -colo $w $iStart $iEnd
  }
}
do_execsql_test 3.2.0 {
  CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl);
  INSERT INTO ft VALUES('one one two three');
203
204
205
206
207
208
209
210

211
212
213
214
215
216
217
177
178
179
180
181
182
183

184
185
186
187
188
189
190
191







-
+








#-------------------------------------------------------------------------
# Check that expressions with synonyms can be parsed and executed.
#
reset_db
sqlite3_fts5_create_tokenizer db tcl tcl_create
proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [do_tokenize_split $text] {
  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
    if {$tflags=="query" && [info exists ::syn($w)]} {
      foreach s $::syn($w) {
        sqlite3_fts5_token -colo $s $iStart $iEnd
      }
    }
  }
326
327
328
329
330
331
332
333

334
335
336
337
338
339
340
300
301
302
303
304
305
306

307
308
309
310
311
312
313
314







-
+








#-------------------------------------------------------------------------
# Test terms with more than 4 synonyms.
#
reset_db
sqlite3_fts5_create_tokenizer db tcl tcl_create
proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [do_tokenize_split $text] {
  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
    if {$tflags=="query" && [string length $w]==1} {
      for {set i 2} {$i<=10} {incr i} {
        sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd
      }
    }
  }