/ Check-in [f5d330f4]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Enhance the like optimization so that it works with an ESCAPE clause.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256:f5d330f495d07a704e115595bbdf5422ddb68fd8191114c5a12c9c873d983f7c
User & Date: drh 2017-07-27 20:24:29
Context
2017-07-28
22:13
Fix harmless compiler warning. check-in: 3286e1a0 user: mistachkin tags: trunk
2017-07-27
20:24
Enhance the like optimization so that it works with an ESCAPE clause. check-in: f5d330f4 user: drh tags: trunk
19:59
Increase the version number to 3.21.0 in anticipation for changes to go into the next release. check-in: 0645f25c user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/func.c.

1702
1703
1704
1705
1706
1707
1708
1709
1710
1711





1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726

1727
1728
1729
1730










1731
1732
1733
1734
1735
1736
1737
  setLikeOptFlag(db, "like", 
      caseSensitive ? (SQLITE_FUNC_LIKE | SQLITE_FUNC_CASE) : SQLITE_FUNC_LIKE);
}

/*
** pExpr points to an expression which implements a function.  If
** it is appropriate to apply the LIKE optimization to that function
** then set aWc[0] through aWc[2] to the wildcard characters and
** return TRUE.  If the function is not a LIKE-style function then
** return FALSE.





**
** *pIsNocase is set to true if uppercase and lowercase are equivalent for
** the function (default for LIKE).  If the function makes the distinction
** between uppercase and lowercase (as does GLOB) then *pIsNocase is set to
** false.
*/
int sqlite3IsLikeFunction(sqlite3 *db, Expr *pExpr, int *pIsNocase, char *aWc){
  FuncDef *pDef;
  if( pExpr->op!=TK_FUNCTION 
   || !pExpr->x.pList 
   || pExpr->x.pList->nExpr!=2
  ){
    return 0;
  }
  assert( !ExprHasProperty(pExpr, EP_xIsSelect) );

  pDef = sqlite3FindFunction(db, pExpr->u.zToken, 2, SQLITE_UTF8, 0);
  if( NEVER(pDef==0) || (pDef->funcFlags & SQLITE_FUNC_LIKE)==0 ){
    return 0;
  }











  /* The memcpy() statement assumes that the wildcard characters are
  ** the first three statements in the compareInfo structure.  The
  ** asserts() that follow verify that assumption
  */
  memcpy(aWc, pDef->pUserData, 3);
  assert( (char*)&likeInfoAlt == (char*)&likeInfoAlt.matchAll );







|
|
|
>
>
>
>
>








|
|
<
<



>
|



>
>
>
>
>
>
>
>
>
>







1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726


1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
  setLikeOptFlag(db, "like", 
      caseSensitive ? (SQLITE_FUNC_LIKE | SQLITE_FUNC_CASE) : SQLITE_FUNC_LIKE);
}

/*
** pExpr points to an expression which implements a function.  If
** it is appropriate to apply the LIKE optimization to that function
** then set aWc[0] through aWc[2] to the wildcard characters and the
** escape character and then return TRUE.  If the function is not a 
** LIKE-style function then return FALSE.
**
** The expression "a LIKE b ESCAPE c" is only considered a valid LIKE
** operator if c is a string literal that is exactly one byte in length.
** That one byte is stored in aWc[3].  aWc[3] is set to zero if there is
** no ESCAPE clause.
**
** *pIsNocase is set to true if uppercase and lowercase are equivalent for
** the function (default for LIKE).  If the function makes the distinction
** between uppercase and lowercase (as does GLOB) then *pIsNocase is set to
** false.
*/
int sqlite3IsLikeFunction(sqlite3 *db, Expr *pExpr, int *pIsNocase, char *aWc){
  FuncDef *pDef;
  int nExpr;
  if( pExpr->op!=TK_FUNCTION || !pExpr->x.pList ){


    return 0;
  }
  assert( !ExprHasProperty(pExpr, EP_xIsSelect) );
  nExpr = pExpr->x.pList->nExpr;
  pDef = sqlite3FindFunction(db, pExpr->u.zToken, nExpr, SQLITE_UTF8, 0);
  if( NEVER(pDef==0) || (pDef->funcFlags & SQLITE_FUNC_LIKE)==0 ){
    return 0;
  }
  if( nExpr<3 ){
    aWc[3] = 0;
  }else{
    Expr *pEscape = pExpr->x.pList->a[2].pExpr;
    char *zEscape;
    if( pEscape->op!=TK_STRING ) return 0;
    zEscape = pEscape->u.zToken;
    if( zEscape[0]==0 || zEscape[1]!=0 ) return 0;
    aWc[3] = zEscape[0];
  }

  /* The memcpy() statement assumes that the wildcard characters are
  ** the first three statements in the compareInfo structure.  The
  ** asserts() that follow verify that assumption
  */
  memcpy(aWc, pDef->pUserData, 3);
  assert( (char*)&likeInfoAlt == (char*)&likeInfoAlt.matchAll );

Changes to src/wherecode.c.

790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
  if( pExpr->op==TK_IS 
   || pExpr->op==TK_ISNULL || pExpr->op==TK_ISNOT 
   || pExpr->op==TK_NOTNULL || pExpr->op==TK_CASE 
  ){
    pWalker->eCode = 1;
  }else if( pExpr->op==TK_FUNCTION ){
    int d1;
    char d2[3];
    if( 0==sqlite3IsLikeFunction(pWalker->pParse->db, pExpr, &d1, d2) ){
      pWalker->eCode = 1;
    }
  }

  return WRC_Continue;
}







|







790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
  if( pExpr->op==TK_IS 
   || pExpr->op==TK_ISNULL || pExpr->op==TK_ISNOT 
   || pExpr->op==TK_NOTNULL || pExpr->op==TK_CASE 
  ){
    pWalker->eCode = 1;
  }else if( pExpr->op==TK_FUNCTION ){
    int d1;
    char d2[4];
    if( 0==sqlite3IsLikeFunction(pWalker->pParse->db, pExpr, &d1, d2) ){
      pWalker->eCode = 1;
    }
  }

  return WRC_Continue;
}

Changes to src/whereexpr.c.

195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
...
242
243
244
245
246
247
248


249
250
251


252








253
254


255


256
257









258



259
260
261
262
263
264
265
  int *pnoCase      /* True if uppercase is equivalent to lowercase */
){
  const char *z = 0;         /* String on RHS of LIKE operator */
  Expr *pRight, *pLeft;      /* Right and left size of LIKE operator */
  ExprList *pList;           /* List of operands to the LIKE operator */
  int c;                     /* One character in z[] */
  int cnt;                   /* Number of non-wildcard prefix characters */
  char wc[3];                /* Wildcard characters */
  sqlite3 *db = pParse->db;  /* Database connection */
  sqlite3_value *pVal = 0;
  int op;                    /* Opcode of pRight */
  int rc;                    /* Result code to return */

  if( !sqlite3IsLikeFunction(db, pExpr, pnoCase, wc) ){
    return 0;
................................................................................
       || sqlite3ExprAffinity(pLeft)!=SQLITE_AFF_TEXT 
       || IsVirtual(pLeft->pTab)  /* Value might be numeric */
      ){
        sqlite3ValueFree(pVal);
        return 0;
      }
    }


    cnt = 0;
    while( (c=z[cnt])!=0 && c!=wc[0] && c!=wc[1] && c!=wc[2] ){
      cnt++;


    }








    if( cnt!=0 && 255!=(u8)z[cnt-1] ){
      Expr *pPrefix;


      *pisComplete = c==wc[0] && z[cnt+1]==0;


      pPrefix = sqlite3Expr(db, TK_STRING, z);
      if( pPrefix ) pPrefix->u.zToken[cnt] = 0;









      *ppPrefix = pPrefix;



      if( op==TK_VARIABLE ){
        Vdbe *v = pParse->pVdbe;
        sqlite3VdbeSetVarmask(v, pRight->iColumn);
        if( *pisComplete && pRight->u.zToken[1] ){
          /* If the rhs of the LIKE expression is a variable, and the current
          ** value of the variable means there is no need to invoke the LIKE
          ** function, then no OP_Variable will be added to the program.







|







 







>
>



>
>
|
>
>
>
>
>
>
>
>


>
>

>
>

|
>
>
>
>
>
>
>
>
>

>
>
>







195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
...
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
  int *pnoCase      /* True if uppercase is equivalent to lowercase */
){
  const char *z = 0;         /* String on RHS of LIKE operator */
  Expr *pRight, *pLeft;      /* Right and left size of LIKE operator */
  ExprList *pList;           /* List of operands to the LIKE operator */
  int c;                     /* One character in z[] */
  int cnt;                   /* Number of non-wildcard prefix characters */
  char wc[4];                /* Wildcard characters */
  sqlite3 *db = pParse->db;  /* Database connection */
  sqlite3_value *pVal = 0;
  int op;                    /* Opcode of pRight */
  int rc;                    /* Result code to return */

  if( !sqlite3IsLikeFunction(db, pExpr, pnoCase, wc) ){
    return 0;
................................................................................
       || sqlite3ExprAffinity(pLeft)!=SQLITE_AFF_TEXT 
       || IsVirtual(pLeft->pTab)  /* Value might be numeric */
      ){
        sqlite3ValueFree(pVal);
        return 0;
      }
    }

    /* Count the number of prefix characters prior to the first wildcard */
    cnt = 0;
    while( (c=z[cnt])!=0 && c!=wc[0] && c!=wc[1] && c!=wc[2] ){
      cnt++;
      if( c==wc[3] && z[cnt]!=0 ){
        if( z[cnt++]>0xc0 ) while( (z[cnt]&0xc0)==0x80 ){ cnt++; }
      }
    }

    /* The optimization is possible only if (1) the pattern does not begin
    ** with a wildcard and if (2) the non-wildcard prefix does not end with
    ** an (illegal 0xff) character.  The second condition is necessary so
    ** that we can increment the prefix key to find an upper bound for the
    ** range search. 
    */
    if( cnt!=0 && 255!=(u8)z[cnt-1] ){
      Expr *pPrefix;

      /* A "complete" match if the pattern ends with "*" or "%" */
      *pisComplete = c==wc[0] && z[cnt+1]==0;

      /* Get the pattern prefix.  Remove all escapes from the prefix. */
      pPrefix = sqlite3Expr(db, TK_STRING, z);
      if( pPrefix ){
        int iFrom, iTo;
        char *zNew = pPrefix->u.zToken;
        zNew[cnt] = 0;
        for(iFrom=iTo=0; iFrom<cnt; iFrom++){
          if( zNew[iFrom]==wc[3] ) iFrom++;
          zNew[iTo++] = zNew[iFrom];
        }
        zNew[iTo] = 0;
      }
      *ppPrefix = pPrefix;

      /* If the RHS pattern is a bound parameter, make arrangements to
      ** reprepare the statement when that parameter is rebound */
      if( op==TK_VARIABLE ){
        Vdbe *v = pParse->pVdbe;
        sqlite3VdbeSetVarmask(v, pRight->iColumn);
        if( *pisComplete && pRight->u.zToken[1] ){
          /* If the rhs of the LIKE expression is a variable, and the current
          ** value of the variable means there is no need to invoke the LIKE
          ** function, then no OP_Variable will be added to the program.

Changes to test/like.test.

203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
....
1043
1044
1045
1046
1047
1048
1049
1050
















































1051
    PRAGMA case_sensitive_like=on;
    CREATE INDEX i1 ON t1(x);
  }
  queryplan {
    SELECT x FROM t1 WHERE x LIKE 'abc%' ORDER BY 1;
  }
} {abc abcd nosort {} i1}
do_test like-3.3.101 {
  set sqlite_like_count
} 0

# The like optimization works even when the pattern is a bound parameter
#
# Exception: It does not work if sqlite3_prepare() is used instead of
# sqlite3_prepare_v2(), as in that case the statement cannot be reprepared
................................................................................
    set x [lindex [time {
      db one {SELECT 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaz'LIKE'%a%a%a%a%a%a%a%a%y'}
    }] 0]
    puts -nonewline " ($x ms - want less than 1000) "
    expr {$x<1000}
  } {1}
}

















































finish_test







|







 








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
....
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
    PRAGMA case_sensitive_like=on;
    CREATE INDEX i1 ON t1(x);
  }
  queryplan {
    SELECT x FROM t1 WHERE x LIKE 'abc%' ORDER BY 1;
  }
} {abc abcd nosort {} i1}
do_test like-3.3.100.cnt {
  set sqlite_like_count
} 0

# The like optimization works even when the pattern is a bound parameter
#
# Exception: It does not work if sqlite3_prepare() is used instead of
# sqlite3_prepare_v2(), as in that case the statement cannot be reprepared
................................................................................
    set x [lindex [time {
      db one {SELECT 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaz'LIKE'%a%a%a%a%a%a%a%a%y'}
    }] 0]
    puts -nonewline " ($x ms - want less than 1000) "
    expr {$x<1000}
  } {1}
}

# As of 2017-07-27 (3.21.0) the LIKE optimization works with ESCAPE as
# long as the ESCAPE is a single-byte literal.
#
db close
sqlite3 db :memory:
do_execsql_test like-15.100 {
  CREATE TABLE t15(x TEXT COLLATE nocase, y, PRIMARY KEY(x));
  INSERT INTO t15(x,y) VALUES
    ('abcde',1), ('ab%de',2), ('a_cde',3),
    ('uvwxy',11),('uvwx%',12),('uvwx_',13),
    ('_bcde',21),('%bcde',22),
    ('abcd_',31),('abcd%',32),
    ('ab%xy',41);
  SELECT y FROM t15 WHERE x LIKE 'ab/%d%' ESCAPE '/';
} {2}
do_execsql_test like-15.101 {
  EXPLAIN QUERY PLAN
  SELECT y FROM t15 WHERE x LIKE 'ab/%d%' ESCAPE '/';
} {/SEARCH/}
do_execsql_test like-15.102 {
  EXPLAIN QUERY PLAN
  SELECT y FROM t15 WHERE x LIKE 'ab/%d%' ESCAPE '//';
} {/SCAN/}
do_execsql_test like-15.103 {
  EXPLAIN QUERY PLAN
  SELECT y FROM t15 WHERE x LIKE 'ab/%d%' ESCAPE '';
} {/SCAN/}
do_execsql_test like-15.110 {
  SELECT y FROM t15 WHERE x LIKE 'abcdx%%' ESCAPE 'x';
} {32}
do_execsql_test like-15.111 {
  SELECT y FROM t15 WHERE x LIKE 'abx%%' ESCAPE 'x' ORDER BY +y
} {2 41}
do_execsql_test like-15.112 {
  EXPLAIN QUERY PLAN
  SELECT y FROM t15 WHERE x LIKE 'abx%%' ESCAPE 'x' ORDER BY +y
} {/SEARCH/}
do_execsql_test like-15.120 {
  SELECT y FROM t15 WHERE x LIKE '/%bc%' ESCAPE '/';
} {22}
do_execsql_test like-15.121 {
  EXPLAIN QUERY PLAN
  SELECT y FROM t15 WHERE x LIKE '/%bc%' ESCAPE '/';
} {/SEARCH/}




finish_test