/ Check-in [f5d330f4]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Enhance the like optimization so that it works with an ESCAPE clause.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256:f5d330f495d07a704e115595bbdf5422ddb68fd8191114c5a12c9c873d983f7c
User & Date: drh 2017-07-27 20:24:29
Context
2017-07-28
22:13
Fix harmless compiler warning. check-in: 3286e1a0 user: mistachkin tags: trunk
2017-07-27
22:16
Enhance the like optimization so that it works with an ESCAPE clause. check-in: 2495acf7 user: drh tags: branch-3.8.9
20:24
Enhance the like optimization so that it works with an ESCAPE clause. check-in: f5d330f4 user: drh tags: trunk
19:59
Increase the version number to 3.21.0 in anticipation for changes to go into the next release. check-in: 0645f25c user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/func.c.

  1702   1702     setLikeOptFlag(db, "like", 
  1703   1703         caseSensitive ? (SQLITE_FUNC_LIKE | SQLITE_FUNC_CASE) : SQLITE_FUNC_LIKE);
  1704   1704   }
  1705   1705   
  1706   1706   /*
  1707   1707   ** pExpr points to an expression which implements a function.  If
  1708   1708   ** it is appropriate to apply the LIKE optimization to that function
  1709         -** then set aWc[0] through aWc[2] to the wildcard characters and
  1710         -** return TRUE.  If the function is not a LIKE-style function then
  1711         -** return FALSE.
         1709  +** then set aWc[0] through aWc[2] to the wildcard characters and the
         1710  +** escape character and then return TRUE.  If the function is not a 
         1711  +** LIKE-style function then return FALSE.
         1712  +**
         1713  +** The expression "a LIKE b ESCAPE c" is only considered a valid LIKE
         1714  +** operator if c is a string literal that is exactly one byte in length.
         1715  +** That one byte is stored in aWc[3].  aWc[3] is set to zero if there is
         1716  +** no ESCAPE clause.
  1712   1717   **
  1713   1718   ** *pIsNocase is set to true if uppercase and lowercase are equivalent for
  1714   1719   ** the function (default for LIKE).  If the function makes the distinction
  1715   1720   ** between uppercase and lowercase (as does GLOB) then *pIsNocase is set to
  1716   1721   ** false.
  1717   1722   */
  1718   1723   int sqlite3IsLikeFunction(sqlite3 *db, Expr *pExpr, int *pIsNocase, char *aWc){
  1719   1724     FuncDef *pDef;
  1720         -  if( pExpr->op!=TK_FUNCTION 
  1721         -   || !pExpr->x.pList 
  1722         -   || pExpr->x.pList->nExpr!=2
  1723         -  ){
         1725  +  int nExpr;
         1726  +  if( pExpr->op!=TK_FUNCTION || !pExpr->x.pList ){
  1724   1727       return 0;
  1725   1728     }
  1726   1729     assert( !ExprHasProperty(pExpr, EP_xIsSelect) );
  1727         -  pDef = sqlite3FindFunction(db, pExpr->u.zToken, 2, SQLITE_UTF8, 0);
         1730  +  nExpr = pExpr->x.pList->nExpr;
         1731  +  pDef = sqlite3FindFunction(db, pExpr->u.zToken, nExpr, SQLITE_UTF8, 0);
  1728   1732     if( NEVER(pDef==0) || (pDef->funcFlags & SQLITE_FUNC_LIKE)==0 ){
  1729   1733       return 0;
  1730   1734     }
         1735  +  if( nExpr<3 ){
         1736  +    aWc[3] = 0;
         1737  +  }else{
         1738  +    Expr *pEscape = pExpr->x.pList->a[2].pExpr;
         1739  +    char *zEscape;
         1740  +    if( pEscape->op!=TK_STRING ) return 0;
         1741  +    zEscape = pEscape->u.zToken;
         1742  +    if( zEscape[0]==0 || zEscape[1]!=0 ) return 0;
         1743  +    aWc[3] = zEscape[0];
         1744  +  }
  1731   1745   
  1732   1746     /* The memcpy() statement assumes that the wildcard characters are
  1733   1747     ** the first three statements in the compareInfo structure.  The
  1734   1748     ** asserts() that follow verify that assumption
  1735   1749     */
  1736   1750     memcpy(aWc, pDef->pUserData, 3);
  1737   1751     assert( (char*)&likeInfoAlt == (char*)&likeInfoAlt.matchAll );

Changes to src/wherecode.c.

   790    790     if( pExpr->op==TK_IS 
   791    791      || pExpr->op==TK_ISNULL || pExpr->op==TK_ISNOT 
   792    792      || pExpr->op==TK_NOTNULL || pExpr->op==TK_CASE 
   793    793     ){
   794    794       pWalker->eCode = 1;
   795    795     }else if( pExpr->op==TK_FUNCTION ){
   796    796       int d1;
   797         -    char d2[3];
          797  +    char d2[4];
   798    798       if( 0==sqlite3IsLikeFunction(pWalker->pParse->db, pExpr, &d1, d2) ){
   799    799         pWalker->eCode = 1;
   800    800       }
   801    801     }
   802    802   
   803    803     return WRC_Continue;
   804    804   }

Changes to src/whereexpr.c.

   195    195     int *pnoCase      /* True if uppercase is equivalent to lowercase */
   196    196   ){
   197    197     const char *z = 0;         /* String on RHS of LIKE operator */
   198    198     Expr *pRight, *pLeft;      /* Right and left size of LIKE operator */
   199    199     ExprList *pList;           /* List of operands to the LIKE operator */
   200    200     int c;                     /* One character in z[] */
   201    201     int cnt;                   /* Number of non-wildcard prefix characters */
   202         -  char wc[3];                /* Wildcard characters */
          202  +  char wc[4];                /* Wildcard characters */
   203    203     sqlite3 *db = pParse->db;  /* Database connection */
   204    204     sqlite3_value *pVal = 0;
   205    205     int op;                    /* Opcode of pRight */
   206    206     int rc;                    /* Result code to return */
   207    207   
   208    208     if( !sqlite3IsLikeFunction(db, pExpr, pnoCase, wc) ){
   209    209       return 0;
................................................................................
   242    242          || sqlite3ExprAffinity(pLeft)!=SQLITE_AFF_TEXT 
   243    243          || IsVirtual(pLeft->pTab)  /* Value might be numeric */
   244    244         ){
   245    245           sqlite3ValueFree(pVal);
   246    246           return 0;
   247    247         }
   248    248       }
          249  +
          250  +    /* Count the number of prefix characters prior to the first wildcard */
   249    251       cnt = 0;
   250    252       while( (c=z[cnt])!=0 && c!=wc[0] && c!=wc[1] && c!=wc[2] ){
   251    253         cnt++;
          254  +      if( c==wc[3] && z[cnt]!=0 ){
          255  +        if( z[cnt++]>0xc0 ) while( (z[cnt]&0xc0)==0x80 ){ cnt++; }
          256  +      }
   252    257       }
          258  +
          259  +    /* The optimization is possible only if (1) the pattern does not begin
          260  +    ** with a wildcard and if (2) the non-wildcard prefix does not end with
          261  +    ** an (illegal 0xff) character.  The second condition is necessary so
          262  +    ** that we can increment the prefix key to find an upper bound for the
          263  +    ** range search. 
          264  +    */
   253    265       if( cnt!=0 && 255!=(u8)z[cnt-1] ){
   254    266         Expr *pPrefix;
          267  +
          268  +      /* A "complete" match if the pattern ends with "*" or "%" */
   255    269         *pisComplete = c==wc[0] && z[cnt+1]==0;
          270  +
          271  +      /* Get the pattern prefix.  Remove all escapes from the prefix. */
   256    272         pPrefix = sqlite3Expr(db, TK_STRING, z);
   257         -      if( pPrefix ) pPrefix->u.zToken[cnt] = 0;
          273  +      if( pPrefix ){
          274  +        int iFrom, iTo;
          275  +        char *zNew = pPrefix->u.zToken;
          276  +        zNew[cnt] = 0;
          277  +        for(iFrom=iTo=0; iFrom<cnt; iFrom++){
          278  +          if( zNew[iFrom]==wc[3] ) iFrom++;
          279  +          zNew[iTo++] = zNew[iFrom];
          280  +        }
          281  +        zNew[iTo] = 0;
          282  +      }
   258    283         *ppPrefix = pPrefix;
          284  +
          285  +      /* If the RHS pattern is a bound parameter, make arrangements to
          286  +      ** reprepare the statement when that parameter is rebound */
   259    287         if( op==TK_VARIABLE ){
   260    288           Vdbe *v = pParse->pVdbe;
   261    289           sqlite3VdbeSetVarmask(v, pRight->iColumn);
   262    290           if( *pisComplete && pRight->u.zToken[1] ){
   263    291             /* If the rhs of the LIKE expression is a variable, and the current
   264    292             ** value of the variable means there is no need to invoke the LIKE
   265    293             ** function, then no OP_Variable will be added to the program.

Changes to test/like.test.

   203    203       PRAGMA case_sensitive_like=on;
   204    204       CREATE INDEX i1 ON t1(x);
   205    205     }
   206    206     queryplan {
   207    207       SELECT x FROM t1 WHERE x LIKE 'abc%' ORDER BY 1;
   208    208     }
   209    209   } {abc abcd nosort {} i1}
   210         -do_test like-3.3.101 {
          210  +do_test like-3.3.100.cnt {
   211    211     set sqlite_like_count
   212    212   } 0
   213    213   
   214    214   # The like optimization works even when the pattern is a bound parameter
   215    215   #
   216    216   # Exception: It does not work if sqlite3_prepare() is used instead of
   217    217   # sqlite3_prepare_v2(), as in that case the statement cannot be reprepared
................................................................................
  1043   1043       set x [lindex [time {
  1044   1044         db one {SELECT 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaz'LIKE'%a%a%a%a%a%a%a%a%y'}
  1045   1045       }] 0]
  1046   1046       puts -nonewline " ($x ms - want less than 1000) "
  1047   1047       expr {$x<1000}
  1048   1048     } {1}
  1049   1049   }
         1050  +
         1051  +# As of 2017-07-27 (3.21.0) the LIKE optimization works with ESCAPE as
         1052  +# long as the ESCAPE is a single-byte literal.
         1053  +#
         1054  +db close
         1055  +sqlite3 db :memory:
         1056  +do_execsql_test like-15.100 {
         1057  +  CREATE TABLE t15(x TEXT COLLATE nocase, y, PRIMARY KEY(x));
         1058  +  INSERT INTO t15(x,y) VALUES
         1059  +    ('abcde',1), ('ab%de',2), ('a_cde',3),
         1060  +    ('uvwxy',11),('uvwx%',12),('uvwx_',13),
         1061  +    ('_bcde',21),('%bcde',22),
         1062  +    ('abcd_',31),('abcd%',32),
         1063  +    ('ab%xy',41);
         1064  +  SELECT y FROM t15 WHERE x LIKE 'ab/%d%' ESCAPE '/';
         1065  +} {2}
         1066  +do_execsql_test like-15.101 {
         1067  +  EXPLAIN QUERY PLAN
         1068  +  SELECT y FROM t15 WHERE x LIKE 'ab/%d%' ESCAPE '/';
         1069  +} {/SEARCH/}
         1070  +do_execsql_test like-15.102 {
         1071  +  EXPLAIN QUERY PLAN
         1072  +  SELECT y FROM t15 WHERE x LIKE 'ab/%d%' ESCAPE '//';
         1073  +} {/SCAN/}
         1074  +do_execsql_test like-15.103 {
         1075  +  EXPLAIN QUERY PLAN
         1076  +  SELECT y FROM t15 WHERE x LIKE 'ab/%d%' ESCAPE '';
         1077  +} {/SCAN/}
         1078  +do_execsql_test like-15.110 {
         1079  +  SELECT y FROM t15 WHERE x LIKE 'abcdx%%' ESCAPE 'x';
         1080  +} {32}
         1081  +do_execsql_test like-15.111 {
         1082  +  SELECT y FROM t15 WHERE x LIKE 'abx%%' ESCAPE 'x' ORDER BY +y
         1083  +} {2 41}
         1084  +do_execsql_test like-15.112 {
         1085  +  EXPLAIN QUERY PLAN
         1086  +  SELECT y FROM t15 WHERE x LIKE 'abx%%' ESCAPE 'x' ORDER BY +y
         1087  +} {/SEARCH/}
         1088  +do_execsql_test like-15.120 {
         1089  +  SELECT y FROM t15 WHERE x LIKE '/%bc%' ESCAPE '/';
         1090  +} {22}
         1091  +do_execsql_test like-15.121 {
         1092  +  EXPLAIN QUERY PLAN
         1093  +  SELECT y FROM t15 WHERE x LIKE '/%bc%' ESCAPE '/';
         1094  +} {/SEARCH/}
         1095  +
         1096  +
         1097  +
  1050   1098   
  1051   1099   finish_test