/ Check-in [11fa9808]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix a problem handling OOM conditions within fts5 queries that feature synonyms.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts5-incompatible
Files: files | file ages | folders
SHA1: 11fa980897c6c7be218bbd9c4cd8253272d2c300
User & Date: dan 2015-09-02 14:17:38
Context
2015-09-02
17:34
Fix a problem with fts5 synonyms and phrase queries. Also fix an OOM handling bug in fts5. check-in: a4c35fa2 user: dan tags: fts5-incompatible
14:17
Fix a problem handling OOM conditions within fts5 queries that feature synonyms. check-in: 11fa9808 user: dan tags: fts5-incompatible
08:22
Fix a problem with fts5 synonyms and the xQueryPhrase() auxiliary function API. check-in: cf3e45e7 user: dan tags: fts5-incompatible
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5_expr.c.

  1463   1463     }
  1464   1464     return pRet;
  1465   1465   }
  1466   1466   
  1467   1467   typedef struct TokenCtx TokenCtx;
  1468   1468   struct TokenCtx {
  1469   1469     Fts5ExprPhrase *pPhrase;
         1470  +  int rc;
  1470   1471   };
  1471   1472   
  1472   1473   /*
  1473   1474   ** Callback for tokenizing terms used by ParseTerm().
  1474   1475   */
  1475   1476   static int fts5ParseTokenize(
  1476   1477     void *pContext,                 /* Pointer to Fts5InsertCtx object */
................................................................................
  1481   1482     int iUnused2                    /* End offset of token */
  1482   1483   ){
  1483   1484     int rc = SQLITE_OK;
  1484   1485     const int SZALLOC = 8;
  1485   1486     TokenCtx *pCtx = (TokenCtx*)pContext;
  1486   1487     Fts5ExprPhrase *pPhrase = pCtx->pPhrase;
  1487   1488   
         1489  +  /* If an error has already occurred, this is a no-op */
         1490  +  if( pCtx->rc!=SQLITE_OK ) return pCtx->rc;
         1491  +
  1488   1492     assert( pPhrase==0 || pPhrase->nTerm>0 );
  1489   1493     if( pPhrase && (tflags & FTS5_TOKEN_COLOCATED) ){
  1490   1494       Fts5ExprTerm *pSyn;
  1491   1495       int nByte = sizeof(Fts5ExprTerm) + nToken+1;
  1492   1496       pSyn = (Fts5ExprTerm*)sqlite3_malloc(nByte);
  1493         -    if( pSyn==0 ) return SQLITE_NOMEM;
  1494         -    memset(pSyn, 0, nByte);
  1495         -    pSyn->zTerm = (char*)&pSyn[1];
  1496         -    memcpy(pSyn->zTerm, pToken, nToken);
  1497         -    pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym;
  1498         -    pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn;
         1497  +    if( pSyn==0 ){
         1498  +      rc = SQLITE_NOMEM;
         1499  +    }else{
         1500  +      memset(pSyn, 0, nByte);
         1501  +      pSyn->zTerm = (char*)&pSyn[1];
         1502  +      memcpy(pSyn->zTerm, pToken, nToken);
         1503  +      pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym;
         1504  +      pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn;
         1505  +    }
  1499   1506     }else{
  1500   1507       Fts5ExprTerm *pTerm;
  1501   1508       if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){
  1502   1509         Fts5ExprPhrase *pNew;
  1503   1510         int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0);
  1504   1511   
  1505   1512         pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase, 
  1506   1513             sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew
  1507   1514         );
  1508         -      if( pNew==0 ) return SQLITE_NOMEM;
  1509         -      if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase));
  1510         -      pCtx->pPhrase = pPhrase = pNew;
  1511         -      pNew->nTerm = nNew - SZALLOC;
         1515  +      if( pNew==0 ){
         1516  +        rc = SQLITE_NOMEM;
         1517  +      }else{
         1518  +        if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase));
         1519  +        pCtx->pPhrase = pPhrase = pNew;
         1520  +        pNew->nTerm = nNew - SZALLOC;
         1521  +      }
  1512   1522       }
  1513   1523   
  1514         -    pTerm = &pPhrase->aTerm[pPhrase->nTerm++];
  1515         -    memset(pTerm, 0, sizeof(Fts5ExprTerm));
  1516         -    pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken);
         1524  +    if( rc==SQLITE_OK ){
         1525  +      pTerm = &pPhrase->aTerm[pPhrase->nTerm++];
         1526  +      memset(pTerm, 0, sizeof(Fts5ExprTerm));
         1527  +      pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken);
         1528  +    }
  1517   1529     }
         1530  +
         1531  +  pCtx->rc = rc;
  1518   1532     return rc;
  1519   1533   }
  1520   1534   
  1521   1535   
  1522   1536   /*
  1523   1537   ** Free the phrase object passed as the only argument.
  1524   1538   */
................................................................................
  1569   1583       int flags = FTS5_TOKENIZE_QUERY | (bPrefix ? FTS5_TOKENIZE_QUERY : 0);
  1570   1584       int n;
  1571   1585       sqlite3Fts5Dequote(z);
  1572   1586       n = strlen(z);
  1573   1587       rc = sqlite3Fts5Tokenize(pConfig, flags, z, n, &sCtx, fts5ParseTokenize);
  1574   1588     }
  1575   1589     sqlite3_free(z);
  1576         -  if( rc ){
         1590  +  if( rc || (rc = sCtx.rc) ){
  1577   1591       pParse->rc = rc;
  1578   1592       fts5ExprPhraseFree(sCtx.pPhrase);
  1579   1593       sCtx.pPhrase = 0;
  1580   1594     }else if( sCtx.pPhrase ){
  1581   1595   
  1582   1596       if( pAppend==0 ){
  1583   1597         if( (pParse->nPhrase % 8)==0 ){
................................................................................
  1618   1632     int i;                          /* Used to iterate through phrase terms */
  1619   1633   
  1620   1634     Fts5Expr *pNew = 0;             /* Expression to return via *ppNew */
  1621   1635     Fts5ExprPhrase **apPhrase;      /* pNew->apPhrase */
  1622   1636     Fts5ExprNode *pNode;            /* pNew->pRoot */
  1623   1637     Fts5ExprNearset *pNear;         /* pNew->pRoot->pNear */
  1624   1638   
  1625         -  TokenCtx sCtx = {0};            /* Context object for fts5ParseTokenize */
         1639  +  TokenCtx sCtx = {0,0};          /* Context object for fts5ParseTokenize */
  1626   1640   
  1627   1641   
  1628   1642     pOrig = pExpr->apExprPhrase[iPhrase];
  1629   1643   
  1630   1644     pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr));
  1631   1645     if( rc==SQLITE_OK ){
  1632   1646       pNew->apExprPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc, 

Changes to ext/fts5/fts5_test_mi.c.

   348    348     Fts5Context *pFts,              /* First arg to pass to pApi functions */
   349    349     sqlite3_context *pCtx,          /* Context for returning result/error */
   350    350     int nVal,                       /* Number of values in apVal[] array */
   351    351     sqlite3_value **apVal           /* Array of trailing arguments */
   352    352   ){
   353    353     const char *zArg;
   354    354     Fts5MatchinfoCtx *p;
   355         -  int rc;
          355  +  int rc = SQLITE_OK;
   356    356   
   357    357     if( nVal>0 ){
   358    358       zArg = (const char*)sqlite3_value_text(apVal[0]);
   359    359     }else{
   360    360       zArg = "pcx";
   361    361     }
   362    362   
   363    363     p = (Fts5MatchinfoCtx*)pApi->xGetAuxdata(pFts, 0);
   364    364     if( p==0 || sqlite3_stricmp(zArg, p->zArg) ){
   365    365       p = fts5MatchinfoNew(pApi, pFts, pCtx, zArg);
   366         -    pApi->xSetAuxdata(pFts, p, sqlite3_free);
   367         -    if( p==0 ) return;
          366  +    if( p==0 ){
          367  +      rc = SQLITE_NOMEM;
          368  +    }else{
          369  +      rc = pApi->xSetAuxdata(pFts, p, sqlite3_free);
          370  +    }
   368    371     }
   369    372   
   370         -  rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoLocalCb);
          373  +  if( rc==SQLITE_OK ){
          374  +    rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoLocalCb);
          375  +  }
   371    376     if( rc!=SQLITE_OK ){
   372    377       sqlite3_result_error_code(pCtx, rc);
   373    378     }else{
   374    379       /* No errors has occured, so return a copy of the array of integers. */
   375    380       int nByte = p->nRet * sizeof(u32);
   376    381       sqlite3_result_blob(pCtx, (void*)p->aRet, nByte, SQLITE_TRANSIENT);
   377    382     }

Changes to ext/fts5/test/fts5_common.tcl.

   290    290   proc OR {args} {
   291    291     sort_poslist [concat {*}$args]
   292    292   }
   293    293   proc NOT {a b} {
   294    294     if {[llength $b]>0} { return [list] }
   295    295     return $a
   296    296   }
          297  +
          298  +#-------------------------------------------------------------------------
          299  +# This command is similar to [split], except that it also provides the
          300  +# start and end offsets of each token. For example:
          301  +#
          302  +#   [fts5_tokenize_split "abc d ef"] -> {abc 0 3 d 4 5 ef 6 8}
          303  +#
          304  +
          305  +proc gobble_whitespace {textvar} {
          306  +  upvar $textvar t
          307  +  regexp {([ ]*)(.*)} $t -> space t
          308  +  return [string length $space]
          309  +}
          310  +
          311  +proc gobble_text {textvar wordvar} {
          312  +  upvar $textvar t
          313  +  upvar $wordvar w
          314  +  regexp {([^ ]*)(.*)} $t -> w t
          315  +  return [string length $w]
          316  +}
          317  +
          318  +proc fts5_tokenize_split {text} {
          319  +  set token ""
          320  +  set ret [list]
          321  +  set iOff [gobble_whitespace text]
          322  +  while {[set nToken [gobble_text text word]]} {
          323  +    lappend ret $word $iOff [expr $iOff+$nToken]
          324  +    incr iOff $nToken
          325  +    incr iOff [gobble_whitespace text]
          326  +  }
          327  +
          328  +  set ret
          329  +}
   297    330   

Changes to ext/fts5/test/fts5fault6.test.

   143    143   } -body {
   144    144     db eval { 
   145    145       CREATE VIRTUAL TABLE yu USING fts5(x, tokenize="unicode61 separators abc");
   146    146     }
   147    147   } -test {
   148    148     faultsim_test_result {0 {}}
   149    149   }
          150  +
          151  +#-------------------------------------------------------------------------
          152  +# OOM while running a query that includes synonyms and matchinfo().
          153  +#
          154  +proc mit {blob} {
          155  +  set scan(littleEndian) i*
          156  +  set scan(bigEndian) I*
          157  +  binary scan $blob $scan($::tcl_platform(byteOrder)) r
          158  +  return $r
          159  +}
          160  +proc tcl_tokenize {tflags text} {
          161  +  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
          162  +    sqlite3_fts5_token $w $iStart $iEnd
          163  +    if {$tflags=="query" && [string length $w]==1} {
          164  +      for {set i 2} {$i < 7} {incr i} {
          165  +        sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd
          166  +      }
          167  +    }
          168  +  }
          169  +}
          170  +proc tcl_create {args} { return "tcl_tokenize" }
          171  +reset_db
          172  +sqlite3_fts5_create_tokenizer db tcl tcl_create
          173  +db func mit mit
          174  +sqlite3_fts5_register_matchinfo db
          175  +do_test 5.0 {
          176  +  execsql { CREATE VIRTUAL TABLE t1 USING fts5(a, tokenize=tcl) }
          177  +  foreach {rowid text} {
          178  +    1 {aaaa cc b aaaaa cc aa} 
          179  +    2 {aa aa bb a bbb}
          180  +    3 {bb aaaaa aaaaa b aaaa aaaaa}
          181  +    4 {aa a b aaaa aa}
          182  +    5 {aa b ccc aaaaa cc}
          183  +    6 {aa aaaaa bbbb cc aaa}
          184  +    7 {aaaaa aa aa ccccc bb}
          185  +    8 {ccc bbbbb ccccc bbb c}
          186  +    9 {cccccc bbbb a aaa cccc c}
          187  +  } {
          188  +    execsql { INSERT INTO t1(rowid, a) VALUES($rowid, $text) }
          189  +  }
          190  +} {}
          191  +
          192  +set res [list {*}{
          193  +  1 {3 24 8 2 12 6}
          194  +  5 {2 24 8 2 12 6}
          195  +  6 {3 24 8 1 12 6}
          196  +  7 {3 24 8 1 12 6}
          197  +  9 {2 24 8 3 12 6}
          198  +}]
          199  +do_execsql_test 5.1 {
          200  +  SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH 'a AND c'
          201  +} $res
          202  +
          203  +faultsim_save_and_close
          204  +do_faultsim_test 5.2 -faults oom* -prep {
          205  +  faultsim_restore_and_reopen
          206  +  sqlite3_fts5_create_tokenizer db tcl tcl_create
          207  +  sqlite3_fts5_register_matchinfo db
          208  +  db func mit mit
          209  +} -body {
          210  +  db eval { 
          211  +    SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH 'a AND c'
          212  +  }
          213  +} -test {
          214  +  faultsim_test_result [list 0 $::res]
          215  +}
   150    216   
   151    217   finish_test
   152    218   

Changes to ext/fts5/test/fts5synonym.test.

    36     36     foreach s $S {
    37     37       set o [list]
    38     38       foreach x $S {if {$x!=$s} {lappend o $x}}
    39     39       set ::syn($s) $o
    40     40     }
    41     41   }
    42     42   
    43         -proc gobble_whitespace {textvar} {
    44         -  upvar $textvar t
    45         -  regexp {([ ]*)(.*)} $t -> space t
    46         -  return [string length $space]
    47         -}
    48         -
    49         -proc gobble_text {textvar wordvar} {
    50         -  upvar $textvar t
    51         -  upvar $wordvar w
    52         -  regexp {([^ ]*)(.*)} $t -> w t
    53         -  return [string length $w]
    54         -}
    55         -
    56         -proc do_tokenize_split {text} {
    57         -  set token ""
    58         -  set ret [list]
    59         -  set iOff [gobble_whitespace text]
    60         -  while {[set nToken [gobble_text text word]]} {
    61         -    lappend ret $word $iOff [expr $iOff+$nToken]
    62         -    incr iOff $nToken
    63         -    incr iOff [gobble_whitespace text]
    64         -  }
    65         -
    66         -  set ret
    67         -}
    68         -
    69     43   proc tcl_tokenize {tflags text} {
    70         -  foreach {w iStart iEnd} [do_tokenize_split $text] {
           44  +  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    71     45       sqlite3_fts5_token $w $iStart $iEnd
    72     46     }
    73     47   }
    74     48   
    75     49   proc tcl_create {args} {
    76     50     return "tcl_tokenize"
    77     51   }
................................................................................
    91     65   
    92     66   #-------------------------------------------------------------------------
    93     67   # Test a tokenizer that supports synonyms by adding extra entries to the
    94     68   # FTS index.
    95     69   #
    96     70   
    97     71   proc tcl_tokenize {tflags text} {
    98         -  foreach {w iStart iEnd} [do_tokenize_split $text] {
           72  +  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    99     73       sqlite3_fts5_token $w $iStart $iEnd
   100     74       if {$tflags=="document" && [info exists ::syn($w)]} {
   101     75         foreach s $::syn($w) {
   102     76           sqlite3_fts5_token -colo $s $iStart $iEnd
   103     77         }
   104     78       }
   105     79     }
................................................................................
   133    107   #   3.2.*: A tokenizer that reports two identical tokens at the same position.
   134    108   #          This is allowed.
   135    109   #
   136    110   reset_db
   137    111   sqlite3_fts5_create_tokenizer db tcl tcl_create
   138    112   proc tcl_tokenize {tflags text} {
   139    113     set bColo 1
   140         -  foreach {w iStart iEnd} [do_tokenize_split $text] {
          114  +  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
   141    115       if {$bColo} {
   142    116         sqlite3_fts5_token -colo $w $iStart $iEnd
   143    117         set bColo 0
   144    118       } {
   145    119         sqlite3_fts5_token $w $iStart $iEnd
   146    120       }
   147    121     }
................................................................................
   156    130   }
   157    131   
   158    132   do_execsql_test 3.1.1 {
   159    133     INSERT INTO ft(ft) VALUES('integrity-check');
   160    134   } {}
   161    135   
   162    136   proc tcl_tokenize {tflags text} {
   163         -  foreach {w iStart iEnd} [do_tokenize_split $text] {
          137  +  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
   164    138       sqlite3_fts5_token $w $iStart $iEnd
   165    139     }
   166    140   }
   167    141   
   168    142   do_execsql_test 3.1.2 {
   169    143     SELECT rowid FROM ft WHERE ft MATCH 'one two three'
   170    144   } {1}
   171    145   
   172    146   reset_db
   173    147   sqlite3_fts5_create_tokenizer db tcl tcl_create
   174    148   proc tcl_tokenize {tflags text} {
   175         -  foreach {w iStart iEnd} [do_tokenize_split $text] {
          149  +  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
   176    150       sqlite3_fts5_token $w $iStart $iEnd
   177    151       sqlite3_fts5_token -colo $w $iStart $iEnd
   178    152     }
   179    153   }
   180    154   do_execsql_test 3.2.0 {
   181    155     CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl);
   182    156     INSERT INTO ft VALUES('one one two three');
................................................................................
   203    177   
   204    178   #-------------------------------------------------------------------------
   205    179   # Check that expressions with synonyms can be parsed and executed.
   206    180   #
   207    181   reset_db
   208    182   sqlite3_fts5_create_tokenizer db tcl tcl_create
   209    183   proc tcl_tokenize {tflags text} {
   210         -  foreach {w iStart iEnd} [do_tokenize_split $text] {
          184  +  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
   211    185       sqlite3_fts5_token $w $iStart $iEnd
   212    186       if {$tflags=="query" && [info exists ::syn($w)]} {
   213    187         foreach s $::syn($w) {
   214    188           sqlite3_fts5_token -colo $s $iStart $iEnd
   215    189         }
   216    190       }
   217    191     }
................................................................................
   326    300   
   327    301   #-------------------------------------------------------------------------
   328    302   # Test terms with more than 4 synonyms.
   329    303   #
   330    304   reset_db
   331    305   sqlite3_fts5_create_tokenizer db tcl tcl_create
   332    306   proc tcl_tokenize {tflags text} {
   333         -  foreach {w iStart iEnd} [do_tokenize_split $text] {
          307  +  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
   334    308       sqlite3_fts5_token $w $iStart $iEnd
   335    309       if {$tflags=="query" && [string length $w]==1} {
   336    310         for {set i 2} {$i<=10} {incr i} {
   337    311           sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd
   338    312         }
   339    313       }
   340    314     }