/ Check-in [72b3ff0f]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix handling of strings that contain zero tokens in fts5. And other problems found by fuzzing.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 72b3ff0f0df83e62adda6584b4281cf086d45e45
User & Date: dan 2016-03-12 16:32:16
Context
2016-03-12
19:33
Fix a problem handling 'NEAR("" token)' in fts5 found by fuzzing. check-in: 10a827ae user: dan tags: trunk
16:32
Fix handling of strings that contain zero tokens in fts5. And other problems found by fuzzing. check-in: 72b3ff0f user: dan tags: trunk
2016-03-11
23:07
Do not allow the use of WAL mode with nolock=1 because it does not work. check-in: 74f5d3b0 user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5Int.h.

   696    696   Fts5ExprNode *sqlite3Fts5ParseNode(
   697    697     Fts5Parse *pParse,
   698    698     int eType,
   699    699     Fts5ExprNode *pLeft,
   700    700     Fts5ExprNode *pRight,
   701    701     Fts5ExprNearset *pNear
   702    702   );
          703  +
          704  +Fts5ExprNode *sqlite3Fts5ParseImplicitAnd(
          705  +  Fts5Parse *pParse,
          706  +  Fts5ExprNode *pLeft,
          707  +  Fts5ExprNode *pRight
          708  +);
   703    709   
   704    710   Fts5ExprPhrase *sqlite3Fts5ParseTerm(
   705    711     Fts5Parse *pParse, 
   706    712     Fts5ExprPhrase *pPhrase, 
   707    713     Fts5Token *pToken,
   708    714     int bPrefix
   709    715   );

Changes to ext/fts5/fts5_config.c.

   438    438       memcpy(zOut, zIn, nIn+1);
   439    439       if( fts5_isopenquote(zOut[0]) ){
   440    440         int ii = fts5Dequote(zOut);
   441    441         zRet = &zIn[ii];
   442    442         *pbQuoted = 1;
   443    443       }else{
   444    444         zRet = fts5ConfigSkipBareword(zIn);
   445         -      zOut[zRet-zIn] = '\0';
          445  +      if( zRet ){
          446  +        zOut[zRet-zIn] = '\0';
          447  +      }
   446    448       }
   447    449     }
   448    450   
   449    451     if( zRet==0 ){
   450    452       sqlite3_free(zOut);
   451    453     }else{
   452    454       *pzOut = zOut;

Changes to ext/fts5/fts5_expr.c.

  1264   1264     int rc = SQLITE_OK;
  1265   1265     pNode->bEof = 0;
  1266   1266     pNode->bNomatch = 0;
  1267   1267   
  1268   1268     if( Fts5NodeIsString(pNode) ){
  1269   1269       /* Initialize all term iterators in the NEAR object. */
  1270   1270       rc = fts5ExprNearInitAll(pExpr, pNode);
         1271  +  }else if( pNode->xNext==0 ){
         1272  +    pNode->bEof = 1;
  1271   1273     }else{
  1272   1274       int i;
  1273   1275       int nEof = 0;
  1274   1276       for(i=0; i<pNode->nChild && rc==SQLITE_OK; i++){
  1275   1277         Fts5ExprNode *pChild = pNode->apChild[i];
  1276   1278         rc = fts5ExprNodeFirst(pExpr, pNode->apChild[i]);
  1277   1279         assert( pChild->bEof==0 || pChild->bEof==1 );
................................................................................
  1315   1317   ** equal to iFirst.
  1316   1318   **
  1317   1319   ** Return SQLITE_OK if successful, or an SQLite error code otherwise. It
  1318   1320   ** is not considered an error if the query does not match any documents.
  1319   1321   */
  1320   1322   int sqlite3Fts5ExprFirst(Fts5Expr *p, Fts5Index *pIdx, i64 iFirst, int bDesc){
  1321   1323     Fts5ExprNode *pRoot = p->pRoot;
  1322         -  int rc = SQLITE_OK;
  1323         -  if( pRoot->xNext ){
  1324         -    p->pIndex = pIdx;
  1325         -    p->bDesc = bDesc;
  1326         -    rc = fts5ExprNodeFirst(p, pRoot);
         1324  +  int rc;                         /* Return code */
  1327   1325   
  1328         -    /* If not at EOF but the current rowid occurs earlier than iFirst in
  1329         -    ** the iteration order, move to document iFirst or later. */
  1330         -    if( pRoot->bEof==0 && fts5RowidCmp(p, pRoot->iRowid, iFirst)<0 ){
  1331         -      rc = fts5ExprNodeNext(p, pRoot, 1, iFirst);
  1332         -    }
         1326  +  p->pIndex = pIdx;
         1327  +  p->bDesc = bDesc;
         1328  +  rc = fts5ExprNodeFirst(p, pRoot);
  1333   1329   
  1334         -    /* If the iterator is not at a real match, skip forward until it is. */
  1335         -    while( pRoot->bNomatch ){
  1336         -      assert( pRoot->bEof==0 && rc==SQLITE_OK );
  1337         -      rc = fts5ExprNodeNext(p, pRoot, 0, 0);
  1338         -    }
         1330  +  /* If not at EOF but the current rowid occurs earlier than iFirst in
         1331  +  ** the iteration order, move to document iFirst or later. */
         1332  +  if( pRoot->bEof==0 && fts5RowidCmp(p, pRoot->iRowid, iFirst)<0 ){
         1333  +    rc = fts5ExprNodeNext(p, pRoot, 1, iFirst);
         1334  +  }
         1335  +
         1336  +  /* If the iterator is not at a real match, skip forward until it is. */
         1337  +  while( pRoot->bNomatch ){
         1338  +    assert( pRoot->bEof==0 && rc==SQLITE_OK );
         1339  +    rc = fts5ExprNodeNext(p, pRoot, 0, 0);
  1339   1340     }
  1340   1341     return rc;
  1341   1342   }
  1342   1343   
  1343   1344   /*
  1344   1345   ** Move to the next document 
  1345   1346   **
................................................................................
  1472   1473     Fts5ExprPhrase *pPhrase = pCtx->pPhrase;
  1473   1474   
  1474   1475     UNUSED_PARAM2(iUnused1, iUnused2);
  1475   1476   
  1476   1477     /* If an error has already occurred, this is a no-op */
  1477   1478     if( pCtx->rc!=SQLITE_OK ) return pCtx->rc;
  1478   1479   
  1479         -  assert( pPhrase==0 || pPhrase->nTerm>0 );
  1480         -  if( pPhrase && (tflags & FTS5_TOKEN_COLOCATED) ){
         1480  +  if( pPhrase && pPhrase->nTerm>0 && (tflags & FTS5_TOKEN_COLOCATED) ){
  1481   1481       Fts5ExprTerm *pSyn;
  1482   1482       int nByte = sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer) + nToken+1;
  1483   1483       pSyn = (Fts5ExprTerm*)sqlite3_malloc(nByte);
  1484   1484       if( pSyn==0 ){
  1485   1485         rc = SQLITE_NOMEM;
  1486   1486       }else{
  1487   1487         memset(pSyn, 0, nByte);
................................................................................
  1574   1574       rc = sqlite3Fts5Tokenize(pConfig, flags, z, n, &sCtx, fts5ParseTokenize);
  1575   1575     }
  1576   1576     sqlite3_free(z);
  1577   1577     if( rc || (rc = sCtx.rc) ){
  1578   1578       pParse->rc = rc;
  1579   1579       fts5ExprPhraseFree(sCtx.pPhrase);
  1580   1580       sCtx.pPhrase = 0;
  1581         -  }else if( sCtx.pPhrase ){
         1581  +  }else{
  1582   1582   
  1583   1583       if( pAppend==0 ){
  1584   1584         if( (pParse->nPhrase % 8)==0 ){
  1585   1585           int nByte = sizeof(Fts5ExprPhrase*) * (pParse->nPhrase + 8);
  1586   1586           Fts5ExprPhrase **apNew;
  1587   1587           apNew = (Fts5ExprPhrase**)sqlite3_realloc(pParse->apPhrase, nByte);
  1588   1588           if( apNew==0 ){
................................................................................
  1591   1591             return 0;
  1592   1592           }
  1593   1593           pParse->apPhrase = apNew;
  1594   1594         }
  1595   1595         pParse->nPhrase++;
  1596   1596       }
  1597   1597   
         1598  +    if( sCtx.pPhrase==0 ){
         1599  +      /* This happens when parsing a token or quoted phrase that contains
         1600  +      ** no token characters at all. (e.g ... MATCH '""'). */
         1601  +      sCtx.pPhrase = sqlite3Fts5MallocZero(&pParse->rc, sizeof(Fts5ExprPhrase));
         1602  +    }else if( sCtx.pPhrase->nTerm ){
         1603  +      sCtx.pPhrase->aTerm[sCtx.pPhrase->nTerm-1].bPrefix = bPrefix;
         1604  +    }
  1598   1605       pParse->apPhrase[pParse->nPhrase-1] = sCtx.pPhrase;
  1599         -    assert( sCtx.pPhrase->nTerm>0 );
  1600         -    sCtx.pPhrase->aTerm[sCtx.pPhrase->nTerm-1].bPrefix = bPrefix;
  1601   1606     }
  1602   1607   
  1603   1608     return sCtx.pPhrase;
  1604   1609   }
  1605   1610   
  1606   1611   /*
  1607   1612   ** Create a new FTS5 expression by cloning phrase iPhrase of the
................................................................................
  1689   1694   }
  1690   1695   
  1691   1696   void sqlite3Fts5ParseSetDistance(
  1692   1697     Fts5Parse *pParse, 
  1693   1698     Fts5ExprNearset *pNear,
  1694   1699     Fts5Token *p
  1695   1700   ){
  1696         -  int nNear = 0;
  1697         -  int i;
  1698         -  if( p->n ){
  1699         -    for(i=0; i<p->n; i++){
  1700         -      char c = (char)p->p[i];
  1701         -      if( c<'0' || c>'9' ){
  1702         -        sqlite3Fts5ParseError(
  1703         -            pParse, "expected integer, got \"%.*s\"", p->n, p->p
  1704         -        );
  1705         -        return;
         1701  +  if( pNear ){
         1702  +    int nNear = 0;
         1703  +    int i;
         1704  +    if( p->n ){
         1705  +      for(i=0; i<p->n; i++){
         1706  +        char c = (char)p->p[i];
         1707  +        if( c<'0' || c>'9' ){
         1708  +          sqlite3Fts5ParseError(
         1709  +              pParse, "expected integer, got \"%.*s\"", p->n, p->p
         1710  +              );
         1711  +          return;
         1712  +        }
         1713  +        nNear = nNear * 10 + (p->p[i] - '0');
  1706   1714         }
  1707         -      nNear = nNear * 10 + (p->p[i] - '0');
         1715  +    }else{
         1716  +      nNear = FTS5_DEFAULT_NEARDIST;
  1708   1717       }
  1709         -  }else{
  1710         -    nNear = FTS5_DEFAULT_NEARDIST;
         1718  +    pNear->nNear = nNear;
  1711   1719     }
  1712         -  pNear->nNear = nNear;
  1713   1720   }
  1714   1721   
  1715   1722   /*
  1716   1723   ** The second argument passed to this function may be NULL, or it may be
  1717   1724   ** an existing Fts5Colset object. This function returns a pointer to
  1718   1725   ** a new colset object containing the contents of (p) with new value column
  1719   1726   ** number iCol appended. 
................................................................................
  1892   1899         pRet->eType = eType;
  1893   1900         pRet->pNear = pNear;
  1894   1901         fts5ExprAssignXNext(pRet);
  1895   1902         if( eType==FTS5_STRING ){
  1896   1903           int iPhrase;
  1897   1904           for(iPhrase=0; iPhrase<pNear->nPhrase; iPhrase++){
  1898   1905             pNear->apPhrase[iPhrase]->pNode = pRet;
         1906  +          if( pNear->apPhrase[iPhrase]->nTerm==0 ){
         1907  +            pRet->xNext = 0;
         1908  +            pRet->eType = FTS5_EOF;
         1909  +          }
  1899   1910           }
  1900   1911   
  1901   1912           if( pParse->pConfig->eDetail!=FTS5_DETAIL_FULL 
  1902         -         && (pNear->nPhrase!=1 || pNear->apPhrase[0]->nTerm!=1)
         1913  +         && (pNear->nPhrase!=1 || pNear->apPhrase[0]->nTerm>1)
  1903   1914           ){
  1904   1915             assert( pParse->rc==SQLITE_OK );
  1905   1916             pParse->rc = SQLITE_ERROR;
  1906   1917             assert( pParse->zErr==0 );
  1907   1918             pParse->zErr = sqlite3_mprintf(
  1908   1919                 "fts5: %s queries are not supported (detail!=full)", 
  1909   1920                 pNear->nPhrase==1 ? "phrase": "NEAR"
................................................................................
  1921   1932   
  1922   1933     if( pRet==0 ){
  1923   1934       assert( pParse->rc!=SQLITE_OK );
  1924   1935       sqlite3Fts5ParseNodeFree(pLeft);
  1925   1936       sqlite3Fts5ParseNodeFree(pRight);
  1926   1937       sqlite3Fts5ParseNearsetFree(pNear);
  1927   1938     }
         1939  +  return pRet;
         1940  +}
         1941  +
         1942  +Fts5ExprNode *sqlite3Fts5ParseImplicitAnd(
         1943  +  Fts5Parse *pParse,              /* Parse context */
         1944  +  Fts5ExprNode *pLeft,            /* Left hand child expression */
         1945  +  Fts5ExprNode *pRight            /* Right hand child expression */
         1946  +){
         1947  +  Fts5ExprNode *pRet = 0;
         1948  +  Fts5ExprNode *pPrev;
         1949  +
         1950  +  if( pParse->rc ){
         1951  +    sqlite3Fts5ParseNodeFree(pLeft);
         1952  +    sqlite3Fts5ParseNodeFree(pRight);
         1953  +  }else{
         1954  +
         1955  +    assert( pLeft->eType==FTS5_STRING 
         1956  +        || pLeft->eType==FTS5_TERM
         1957  +        || pLeft->eType==FTS5_EOF
         1958  +        || pLeft->eType==FTS5_AND
         1959  +    );
         1960  +    assert( pRight->eType==FTS5_STRING 
         1961  +        || pRight->eType==FTS5_TERM 
         1962  +        || pRight->eType==FTS5_EOF 
         1963  +    );
         1964  +
         1965  +    if( pLeft->eType==FTS5_AND ){
         1966  +      pPrev = pLeft->apChild[pLeft->nChild-1];
         1967  +    }else{
         1968  +      pPrev = pLeft;
         1969  +    }
         1970  +    assert( pPrev->eType==FTS5_STRING 
         1971  +        || pPrev->eType==FTS5_TERM 
         1972  +        || pPrev->eType==FTS5_EOF 
         1973  +        );
         1974  +
         1975  +    if( pRight->eType==FTS5_EOF ){
         1976  +      assert( pParse->apPhrase[pParse->nPhrase-1]==pRight->pNear->apPhrase[0] );
         1977  +      sqlite3Fts5ParseNodeFree(pRight);
         1978  +      pRet = pLeft;
         1979  +      pParse->nPhrase--;
         1980  +    }
         1981  +    else if( pPrev->eType==FTS5_EOF ){
         1982  +      Fts5ExprPhrase **ap;
         1983  +
         1984  +      if( pPrev==pLeft ){
         1985  +        pRet = pRight;
         1986  +      }else{
         1987  +        pLeft->apChild[pLeft->nChild-1] = pRight;
         1988  +        pRet = pLeft;
         1989  +      }
         1990  +
         1991  +      ap = &pParse->apPhrase[pParse->nPhrase-1-pRight->pNear->nPhrase];
         1992  +      assert( ap[0]==pPrev->pNear->apPhrase[0] );
         1993  +      memmove(ap, &ap[1], sizeof(Fts5ExprPhrase*)*pRight->pNear->nPhrase);
         1994  +      pParse->nPhrase--;
         1995  +
         1996  +      sqlite3Fts5ParseNodeFree(pPrev);
         1997  +    }
         1998  +    else{
         1999  +      pRet = sqlite3Fts5ParseNode(pParse, FTS5_AND, pLeft, pRight, 0);
         2000  +    }
         2001  +  }
         2002  +
  1928   2003     return pRet;
  1929   2004   }
  1930   2005   
  1931   2006   static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){
  1932   2007     int nByte = 0;
  1933   2008     Fts5ExprTerm *p;
  1934   2009     char *zQuoted;
................................................................................
  2058   2133     }
  2059   2134   
  2060   2135     return zRet;
  2061   2136   }
  2062   2137   
  2063   2138   static char *fts5ExprPrint(Fts5Config *pConfig, Fts5ExprNode *pExpr){
  2064   2139     char *zRet = 0;
         2140  +  if( pExpr->eType==0 ){
         2141  +    return sqlite3_mprintf("\"\"");
         2142  +  }else
  2065   2143     if( pExpr->eType==FTS5_STRING || pExpr->eType==FTS5_TERM ){
  2066   2144       Fts5ExprNearset *pNear = pExpr->pNear;
  2067   2145       int i; 
  2068   2146       int iTerm;
  2069   2147   
  2070   2148       if( pNear->pColset ){
  2071   2149         int iCol = pNear->pColset->aiCol[0];
................................................................................
  2118   2196       for(i=0; i<pExpr->nChild; i++){
  2119   2197         char *z = fts5ExprPrint(pConfig, pExpr->apChild[i]);
  2120   2198         if( z==0 ){
  2121   2199           sqlite3_free(zRet);
  2122   2200           zRet = 0;
  2123   2201         }else{
  2124   2202           int e = pExpr->apChild[i]->eType;
  2125         -        int b = (e!=FTS5_STRING && e!=FTS5_TERM);
         2203  +        int b = (e!=FTS5_STRING && e!=FTS5_TERM && e!=FTS5_EOF);
  2126   2204           zRet = fts5PrintfAppend(zRet, "%s%s%z%s", 
  2127   2205               (i==0 ? "" : zOp),
  2128   2206               (b?"(":""), z, (b?")":"")
  2129   2207           );
  2130   2208         }
  2131   2209         if( zRet==0 ) break;
  2132   2210       }

Changes to ext/fts5/fts5parse.y.

   100    100   }
   101    101   
   102    102   expr(A) ::= LP expr(X) RP. {A = X;}
   103    103   expr(A) ::= exprlist(X).   {A = X;}
   104    104   
   105    105   exprlist(A) ::= cnearset(X). {A = X;}
   106    106   exprlist(A) ::= exprlist(X) cnearset(Y). {
   107         -  A = sqlite3Fts5ParseNode(pParse, FTS5_AND, X, Y, 0);
          107  +  A = sqlite3Fts5ParseImplicitAnd(pParse, X, Y);
   108    108   }
   109    109   
   110    110   cnearset(A) ::= nearset(X). { 
   111    111     A = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, X); 
   112    112   }
   113    113   cnearset(A) ::= colset(X) COLON nearset(Y). { 
   114    114     sqlite3Fts5ParseSetColset(pParse, Y, X);

Changes to ext/fts5/test/fts5eb.test.

    29     29     do_execsql_test $tn {SELECT fts5_expr($se_expr)} [list $res]
    30     30   }
    31     31   
    32     32   foreach {tn expr res} {
    33     33     1  {abc}                            {"abc"}
    34     34     2  {abc ""}                         {"abc"}
    35     35     3  {""}                             {}
    36         -  4  {abc OR ""}                      {"abc"}
    37         -  5  {abc NOT ""}                     {"abc"}
    38         -  6  {abc AND ""}                     {"abc"}
    39         -  7  {"" OR abc}                      {"abc"}
    40         -  8  {"" NOT abc}                     {"abc"}
    41         -  9  {"" AND abc}                     {"abc"}
           36  +  4  {abc OR ""}                      {"abc" OR ""}
           37  +  5  {abc NOT ""}                     {"abc" NOT ""}
           38  +  6  {abc AND ""}                     {"abc" AND ""}
           39  +  7  {"" OR abc}                      {"" OR "abc"}
           40  +  8  {"" NOT abc}                     {"" NOT "abc"}
           41  +  9  {"" AND abc}                     {"" AND "abc"}
    42     42     10 {abc + "" + def}                 {"abc" + "def"}
    43     43     11 {abc "" def}                     {"abc" AND "def"}
    44     44     12 {r+e OR w}                       {"r" + "e" OR "w"}
    45     45   
    46     46     13 {a AND b NOT c}                  {"a" AND ("b" NOT "c")}
    47     47     14 {a OR b NOT c}                   {"a" OR ("b" NOT "c")}
    48     48     15 {a NOT b AND c}                  {("a" NOT "b") AND "c"}

Added ext/fts5/test/fts5fuzz1.test.

            1  +# 2014 June 17
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#*************************************************************************
           11  +# This file implements regression tests for SQLite library.  The
           12  +# focus of this script is testing the FTS5 module.
           13  +#
           14  +
           15  +source [file join [file dirname [info script]] fts5_common.tcl]
           16  +return_if_no_fts5
           17  +set testprefix fts5fuzz1
           18  +
           19  +
           20  +#-------------------------------------------------------------------------
           21  +reset_db
           22  +do_catchsql_test 1.1 {
           23  +  CREATE VIRTUAL TABLE f1 USING fts5(a b);
           24  +} {/1 {parse error in.*}/}
           25  +
           26  +
           27  +#-------------------------------------------------------------------------
           28  +reset_db
           29  +do_execsql_test 2.1 {
           30  +  CREATE VIRTUAL TABLE f1 USING fts5(a, b);
           31  +  INSERT INTO f1 VALUES('a b', 'c d');
           32  +  INSERT INTO f1 VALUES('e f', 'a b');
           33  +}
           34  +
           35  +do_execsql_test 2.2.1 {
           36  +  SELECT rowid FROM f1('""');
           37  +} {}
           38  +
           39  +do_execsql_test 2.2.2 {
           40  +  SELECT rowid FROM f1('"" AND a');
           41  +} {}
           42  +
           43  +
           44  +do_execsql_test 2.2.3 {
           45  +  SELECT rowid FROM f1('"" a');
           46  +} {1 2}
           47  +
           48  +do_execsql_test 2.2.4 {
           49  +  SELECT rowid FROM f1('"" OR a');
           50  +} {1 2}
           51  +
           52  +do_execsql_test 2.3 {
           53  +  SELECT a, b FROM f1('NEAR("")');
           54  +} {}
           55  +
           56  +do_execsql_test 2.4 {
           57  +  SELECT a, b FROM f1('NEAR("", 5)');
           58  +} {}
           59  +
           60  +do_execsql_test 2.5 {
           61  +  SELECT a, b FROM f1('NEAR("" c, 5)');
           62  +} {}
           63  +
           64  +do_execsql_test 2.6 {
           65  +  SELECT a, b FROM f1('NEAR("" c d, 5)');
           66  +} {}
           67  +
           68  +do_execsql_test 2.7 {
           69  +  SELECT a, b FROM f1('NEAR(c d, 5)');
           70  +} {{a b} {c d}}
           71  +
           72  +do_execsql_test 2.8 {
           73  +  SELECT rowid FROM f1('NEAR("a" "b", 5)');
           74  +} {1 2}
           75  +
           76  +#-------------------------------------------------------------------------
           77  +reset_db
           78  +do_execsql_test 3.2 {
           79  +  CREATE VIRTUAL TABLE f2 USING fts5(o, t, tokenize="ascii separators abc");
           80  +  SELECT * FROM f2('a+4');
           81  +} {}
           82  +
           83  +
           84  +
           85  +
           86  +
           87  +finish_test
           88  +
           89  +