/ Check-in [cf3e45e7]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix a problem with fts5 synonyms and the xQueryPhrase() auxiliary function API.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts5-incompatible
Files: files | file ages | folders
SHA1: cf3e45e76d23e10ee06296c3561a341591597a04
User & Date: dan 2015-09-02 08:22:41
Context
2015-09-02
14:17
Fix a problem handling OOM conditions within fts5 queries that feature synonyms. check-in: 11fa9808 user: dan tags: fts5-incompatible
08:22
Fix a problem with fts5 synonyms and the xQueryPhrase() auxiliary function API. check-in: cf3e45e7 user: dan tags: fts5-incompatible
2015-09-01
18:44
Fix a problem that occurs when more than 4 synonyms for a term appear within a single row. check-in: cd359550 user: dan tags: fts5-incompatible
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5Int.h.

595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
/* Called during startup to register a UDF with SQLite */
int sqlite3Fts5ExprInit(Fts5Global*, sqlite3*);

int sqlite3Fts5ExprPhraseCount(Fts5Expr*);
int sqlite3Fts5ExprPhraseSize(Fts5Expr*, int iPhrase);
int sqlite3Fts5ExprPoslist(Fts5Expr*, int, const u8 **);

int sqlite3Fts5ExprPhraseExpr(Fts5Config*, Fts5Expr*, int, Fts5Expr**);

/*******************************************
** The fts5_expr.c API above this point is used by the other hand-written
** C code in this module. The interfaces below this point are called by
** the parser code in fts5parse.y.  */

void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...);







|







595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
/* Called during startup to register a UDF with SQLite */
int sqlite3Fts5ExprInit(Fts5Global*, sqlite3*);

int sqlite3Fts5ExprPhraseCount(Fts5Expr*);
int sqlite3Fts5ExprPhraseSize(Fts5Expr*, int iPhrase);
int sqlite3Fts5ExprPoslist(Fts5Expr*, int, const u8 **);

int sqlite3Fts5ExprClonePhrase(Fts5Config*, Fts5Expr*, int, Fts5Expr**);

/*******************************************
** The fts5_expr.c API above this point is used by the other hand-written
** C code in this module. The interfaces below this point are called by
** the parser code in fts5parse.y.  */

void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...);

Changes to ext/fts5/fts5_expr.c.

247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
....
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
....
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
....
1670
1671
1672
1673
1674
1675
1676













































































1677
1678
1679
1680
1681
1682
1683
  }

  sqlite3_free(sParse.apPhrase);
  *pzErr = sParse.zErr;
  return sParse.rc;
}

/*
** Create a new FTS5 expression by cloning phrase iPhrase of the
** expression passed as the second argument.
*/
int sqlite3Fts5ExprPhraseExpr(
  Fts5Config *pConfig,
  Fts5Expr *pExpr, 
  int iPhrase, 
  Fts5Expr **ppNew
){
  int rc = SQLITE_OK;             /* Return code */
  Fts5ExprPhrase *pOrig;          /* The phrase extracted from pExpr */
  Fts5ExprPhrase *pCopy;          /* Copy of pOrig */
  Fts5Expr *pNew = 0;             /* Expression to return via *ppNew */

  pOrig = pExpr->apExprPhrase[iPhrase];
  pCopy = (Fts5ExprPhrase*)sqlite3Fts5MallocZero(&rc, 
      sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * pOrig->nTerm
  );
  if( pCopy ){
    int i;                          /* Used to iterate through phrase terms */
    Fts5ExprPhrase **apPhrase;
    Fts5ExprNode *pNode;
    Fts5ExprNearset *pNear;

    pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr));
    apPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc, 
        sizeof(Fts5ExprPhrase*)
    );
    pNode = (Fts5ExprNode*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5ExprNode));
    pNear = (Fts5ExprNearset*)sqlite3Fts5MallocZero(&rc, 
        sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*)
    );

    for(i=0; i<pOrig->nTerm; i++){
      pCopy->aTerm[i].zTerm = sqlite3Fts5Strndup(&rc, pOrig->aTerm[i].zTerm,-1);
      pCopy->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix;
    }

    if( rc==SQLITE_OK ){
      /* All the allocations succeeded. Put the expression object together. */
      pNew->pIndex = pExpr->pIndex;
      pNew->pRoot = pNode;
      pNew->nPhrase = 1;
      pNew->apExprPhrase = apPhrase;
      pNew->apExprPhrase[0] = pCopy;

      pNode->eType = (pOrig->nTerm==1 ? FTS5_TERM : FTS5_STRING);
      pNode->pNear = pNear;

      pNear->nPhrase = 1;
      pNear->apPhrase[0] = pCopy;

      pCopy->nTerm = pOrig->nTerm;
      pCopy->pNode = pNode;
    }else{
      /* At least one allocation failed. Free them all. */
      for(i=0; i<pOrig->nTerm; i++){
        sqlite3_free(pCopy->aTerm[i].zTerm);
      }
      sqlite3_free(pCopy);
      sqlite3_free(pNear);
      sqlite3_free(pNode);
      sqlite3_free(apPhrase);
      sqlite3_free(pNew);
      pNew = 0;
    }
  }

  *ppNew = pNew;
  return rc;
}

/*
** Free the expression node object passed as the only argument.
*/
void sqlite3Fts5ParseNodeFree(Fts5ExprNode *p){
  if( p ){
    int i;
    for(i=0; i<p->nChild; i++){
................................................................................
** Callback for tokenizing terms used by ParseTerm().
*/
static int fts5ParseTokenize(
  void *pContext,                 /* Pointer to Fts5InsertCtx object */
  int tflags,                     /* Mask of FTS5_TOKEN_* flags */
  const char *pToken,             /* Buffer containing token */
  int nToken,                     /* Size of token in bytes */
  int iStart,                     /* Start offset of token */
  int iEnd                        /* End offset of token */
){
  int rc = SQLITE_OK;
  const int SZALLOC = 8;
  TokenCtx *pCtx = (TokenCtx*)pContext;
  Fts5ExprPhrase *pPhrase = pCtx->pPhrase;

  assert( pPhrase==0 || pPhrase->nTerm>0 );
................................................................................
    Fts5ExprTerm *pTerm;
    if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){
      Fts5ExprPhrase *pNew;
      int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0);

      pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase, 
          sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew
          );
      if( pNew==0 ) return SQLITE_NOMEM;
      if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase));
      pCtx->pPhrase = pPhrase = pNew;
      pNew->nTerm = nNew - SZALLOC;
    }

    pTerm = &pPhrase->aTerm[pPhrase->nTerm++];
................................................................................
    pParse->apPhrase[pParse->nPhrase-1] = sCtx.pPhrase;
    assert( sCtx.pPhrase->nTerm>0 );
    sCtx.pPhrase->aTerm[sCtx.pPhrase->nTerm-1].bPrefix = bPrefix;
  }

  return sCtx.pPhrase;
}














































































/*
** Token pTok has appeared in a MATCH expression where the NEAR operator
** is expected. If token pTok does not contain "NEAR", store an error
** in the pParse object.
*/
void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token *pTok){







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







 







|
|







 







|







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







247
248
249
250
251
252
253









































































254
255
256
257
258
259
260
....
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
....
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
....
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
  }

  sqlite3_free(sParse.apPhrase);
  *pzErr = sParse.zErr;
  return sParse.rc;
}










































































/*
** Free the expression node object passed as the only argument.
*/
void sqlite3Fts5ParseNodeFree(Fts5ExprNode *p){
  if( p ){
    int i;
    for(i=0; i<p->nChild; i++){
................................................................................
** Callback for tokenizing terms used by ParseTerm().
*/
static int fts5ParseTokenize(
  void *pContext,                 /* Pointer to Fts5InsertCtx object */
  int tflags,                     /* Mask of FTS5_TOKEN_* flags */
  const char *pToken,             /* Buffer containing token */
  int nToken,                     /* Size of token in bytes */
  int iUnused1,                   /* Start offset of token */
  int iUnused2                    /* End offset of token */
){
  int rc = SQLITE_OK;
  const int SZALLOC = 8;
  TokenCtx *pCtx = (TokenCtx*)pContext;
  Fts5ExprPhrase *pPhrase = pCtx->pPhrase;

  assert( pPhrase==0 || pPhrase->nTerm>0 );
................................................................................
    Fts5ExprTerm *pTerm;
    if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){
      Fts5ExprPhrase *pNew;
      int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0);

      pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase, 
          sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew
      );
      if( pNew==0 ) return SQLITE_NOMEM;
      if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase));
      pCtx->pPhrase = pPhrase = pNew;
      pNew->nTerm = nNew - SZALLOC;
    }

    pTerm = &pPhrase->aTerm[pPhrase->nTerm++];
................................................................................
    pParse->apPhrase[pParse->nPhrase-1] = sCtx.pPhrase;
    assert( sCtx.pPhrase->nTerm>0 );
    sCtx.pPhrase->aTerm[sCtx.pPhrase->nTerm-1].bPrefix = bPrefix;
  }

  return sCtx.pPhrase;
}

/*
** Create a new FTS5 expression by cloning phrase iPhrase of the
** expression passed as the second argument.
*/
int sqlite3Fts5ExprClonePhrase(
  Fts5Config *pConfig,
  Fts5Expr *pExpr, 
  int iPhrase, 
  Fts5Expr **ppNew
){
  int rc = SQLITE_OK;             /* Return code */
  Fts5ExprPhrase *pOrig;          /* The phrase extracted from pExpr */
  Fts5ExprPhrase *pCopy;          /* Copy of pOrig */
  int i;                          /* Used to iterate through phrase terms */

  Fts5Expr *pNew = 0;             /* Expression to return via *ppNew */
  Fts5ExprPhrase **apPhrase;      /* pNew->apPhrase */
  Fts5ExprNode *pNode;            /* pNew->pRoot */
  Fts5ExprNearset *pNear;         /* pNew->pRoot->pNear */

  TokenCtx sCtx = {0};            /* Context object for fts5ParseTokenize */


  pOrig = pExpr->apExprPhrase[iPhrase];

  pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr));
  if( rc==SQLITE_OK ){
    pNew->apExprPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc, 
        sizeof(Fts5ExprPhrase*));
  }
  if( rc==SQLITE_OK ){
    pNew->pRoot = (Fts5ExprNode*)sqlite3Fts5MallocZero(&rc, 
        sizeof(Fts5ExprNode));
  }
  if( rc==SQLITE_OK ){
    pNew->pRoot->pNear = (Fts5ExprNearset*)sqlite3Fts5MallocZero(&rc, 
        sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*));
  }

  for(i=0; rc==SQLITE_OK && i<pOrig->nTerm; i++){
    int tflags = 0;
    Fts5ExprTerm *p;
    for(p=&pOrig->aTerm[i]; p && rc==SQLITE_OK; p=p->pSynonym){
      const char *zTerm = p->zTerm;
      rc = fts5ParseTokenize((void*)&sCtx, tflags, zTerm, strlen(zTerm), 0, 0);
      tflags = FTS5_TOKEN_COLOCATED;
    }
    if( rc==SQLITE_OK ){
      sCtx.pPhrase->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix;
    }
  }

  if( rc==SQLITE_OK ){
    /* All the allocations succeeded. Put the expression object together. */
    pNew->pIndex = pExpr->pIndex;
    pNew->nPhrase = 1;
    pNew->apExprPhrase[0] = sCtx.pPhrase;
    pNew->pRoot->pNear->apPhrase[0] = sCtx.pPhrase;
    pNew->pRoot->pNear->nPhrase = 1;
    sCtx.pPhrase->pNode = pNew->pRoot;

    if( pOrig->nTerm==1 && pOrig->aTerm[0].pSynonym==0 ){
      pNew->pRoot->eType = FTS5_TERM;
    }else{
      pNew->pRoot->eType = FTS5_STRING;
    }
  }else{
    sqlite3Fts5ExprFree(pNew);
    fts5ExprPhraseFree(sCtx.pPhrase);
    pNew = 0;
  }

  *ppNew = pNew;
  return rc;
}


/*
** Token pTok has appeared in a MATCH expression where the NEAR operator
** is expected. If token pTok does not contain "NEAR", store an error
** in the pParse object.
*/
void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token *pTok){

Changes to ext/fts5/fts5_main.c.

1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
  rc = fts5OpenMethod(pCsr->base.pVtab, (sqlite3_vtab_cursor**)&pNew);
  if( rc==SQLITE_OK ){
    Fts5Config *pConf = pTab->pConfig;
    pNew->ePlan = FTS5_PLAN_MATCH;
    pNew->iFirstRowid = SMALLEST_INT64;
    pNew->iLastRowid = LARGEST_INT64;
    pNew->base.pVtab = (sqlite3_vtab*)pTab;
    rc = sqlite3Fts5ExprPhraseExpr(pConf, pCsr->pExpr, iPhrase, &pNew->pExpr);
  }

  if( rc==SQLITE_OK ){
    for(rc = fts5CursorFirst(pTab, pNew, 0);
        rc==SQLITE_OK && CsrFlagTest(pNew, FTS5CSR_EOF)==0;
        rc = fts5NextMethod((sqlite3_vtab_cursor*)pNew)
    ){







|







1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
  rc = fts5OpenMethod(pCsr->base.pVtab, (sqlite3_vtab_cursor**)&pNew);
  if( rc==SQLITE_OK ){
    Fts5Config *pConf = pTab->pConfig;
    pNew->ePlan = FTS5_PLAN_MATCH;
    pNew->iFirstRowid = SMALLEST_INT64;
    pNew->iLastRowid = LARGEST_INT64;
    pNew->base.pVtab = (sqlite3_vtab*)pTab;
    rc = sqlite3Fts5ExprClonePhrase(pConf, pCsr->pExpr, iPhrase, &pNew->pExpr);
  }

  if( rc==SQLITE_OK ){
    for(rc = fts5CursorFirst(pTab, pNew, 0);
        rc==SQLITE_OK && CsrFlagTest(pNew, FTS5CSR_EOF)==0;
        rc = fts5NextMethod((sqlite3_vtab_cursor*)pNew)
    ){

Changes to ext/fts5/test/fts5synonym.test.

293
294
295
296
297
298
299


























300
301
302
303
304
305
306
  }
} {
  do_execsql_test 5.1.$tn {
    SELECT rowid, highlight(t1, 0, '[', ']'), highlight(t1, 1, '[', ']')
    FROM t1 WHERE t1 MATCH $q
  } $res
}



























#-------------------------------------------------------------------------
# Test terms with more than 4 synonyms.
#
reset_db
sqlite3_fts5_create_tokenizer db tcl tcl_create
proc tcl_tokenize {tflags text} {







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
  }
} {
  do_execsql_test 5.1.$tn {
    SELECT rowid, highlight(t1, 0, '[', ']'), highlight(t1, 1, '[', ']')
    FROM t1 WHERE t1 MATCH $q
  } $res
}

# Test that the xQueryPhrase() API works with synonyms.
#
proc mit {blob} {
  set scan(littleEndian) i*
  set scan(bigEndian) I*
  binary scan $blob $scan($::tcl_platform(byteOrder)) r
  return $r
}
db func mit mit
sqlite3_fts5_register_matchinfo db

foreach {tn q res} {
  1 {one} {
      1 {1 11 7 2 12 6}     2 {2 11 7 0 12 6} 
      3 {2 11 7 1 12 6}     4 {1 11 7 2 12 6} 
      5 {3 11 7 0 12 6}     6 {0 11 7 2 12 6} 
      7 {0 11 7 3 12 6}     8 {1 11 7 0 12 6} 
      9 {1 11 7 2 12 6}
  }
} {
  do_execsql_test 5.2.$tn {
    SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH $q
  } $res
}


#-------------------------------------------------------------------------
# Test terms with more than 4 synonyms.
#
reset_db
sqlite3_fts5_create_tokenizer db tcl tcl_create
proc tcl_tokenize {tflags text} {