/ Check-in [2495acf7]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Enhance the like optimization so that it works with an ESCAPE clause.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | branch-3.8.9
Files: files | file ages | folders
SHA3-256: 2495acf71017fa2ffada18824590ead593c47dabe2312701a25adc517cbf72eb
User & Date: drh 2017-07-27 22:16:26
Context
2017-11-09
04:13
Disallow ORDER BY and LIMIT on UPDATE and DELETE of views and WITHOUT ROWID tables. This is a temporary fix for ticket [d4beea1633f1b88f] until a better solution can be found. check-in: 30aa941f user: drh tags: branch-3.8.9
2017-07-27
22:16
Enhance the like optimization so that it works with an ESCAPE clause. check-in: 2495acf7 user: drh tags: branch-3.8.9
20:24
Enhance the like optimization so that it works with an ESCAPE clause. check-in: f5d330f4 user: drh tags: trunk
2017-07-17
19:37
Try to push relevant WHERE clause terms of the outer query down into the subquery in order to help the subquery run faster and/or use less memory. This is a cherry-pick of [6df18e949d36] with bug fixes. check-in: cd6ac078 user: drh tags: branch-3.8.9
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/func.c.

1645
1646
1647
1648
1649
1650
1651
1652
1653
1654





1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669

1670
1671
1672
1673
1674
1675










1676
1677
1678
1679
1680
1681
1682
  setLikeOptFlag(db, "like", 
      caseSensitive ? (SQLITE_FUNC_LIKE | SQLITE_FUNC_CASE) : SQLITE_FUNC_LIKE);
}

/*
** pExpr points to an expression which implements a function.  If
** it is appropriate to apply the LIKE optimization to that function
** then set aWc[0] through aWc[2] to the wildcard characters and
** return TRUE.  If the function is not a LIKE-style function then
** return FALSE.





**
** *pIsNocase is set to true if uppercase and lowercase are equivalent for
** the function (default for LIKE).  If the function makes the distinction
** between uppercase and lowercase (as does GLOB) then *pIsNocase is set to
** false.
*/
int sqlite3IsLikeFunction(sqlite3 *db, Expr *pExpr, int *pIsNocase, char *aWc){
  FuncDef *pDef;
  if( pExpr->op!=TK_FUNCTION 
   || !pExpr->x.pList 
   || pExpr->x.pList->nExpr!=2
  ){
    return 0;
  }
  assert( !ExprHasProperty(pExpr, EP_xIsSelect) );

  pDef = sqlite3FindFunction(db, pExpr->u.zToken, 
                             sqlite3Strlen30(pExpr->u.zToken),
                             2, SQLITE_UTF8, 0);
  if( NEVER(pDef==0) || (pDef->funcFlags & SQLITE_FUNC_LIKE)==0 ){
    return 0;
  }











  /* The memcpy() statement assumes that the wildcard characters are
  ** the first three statements in the compareInfo structure.  The
  ** asserts() that follow verify that assumption
  */
  memcpy(aWc, pDef->pUserData, 3);
  assert( (char*)&likeInfoAlt == (char*)&likeInfoAlt.matchAll );







|
|
|
>
>
>
>
>








|
|
<
<



>


|



>
>
>
>
>
>
>
>
>
>







1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669


1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
  setLikeOptFlag(db, "like", 
      caseSensitive ? (SQLITE_FUNC_LIKE | SQLITE_FUNC_CASE) : SQLITE_FUNC_LIKE);
}

/*
** pExpr points to an expression which implements a function.  If
** it is appropriate to apply the LIKE optimization to that function
** then set aWc[0] through aWc[2] to the wildcard characters and the
** escape character and then return TRUE.  If the function is not a 
** LIKE-style function then return FALSE.
**
** The expression "a LIKE b ESCAPE c" is only considered a valid LIKE
** operator if c is a string literal that is exactly one byte in length.
** That one byte is stored in aWc[3].  aWc[3] is set to zero if there is
** no ESCAPE clause.
**
** *pIsNocase is set to true if uppercase and lowercase are equivalent for
** the function (default for LIKE).  If the function makes the distinction
** between uppercase and lowercase (as does GLOB) then *pIsNocase is set to
** false.
*/
int sqlite3IsLikeFunction(sqlite3 *db, Expr *pExpr, int *pIsNocase, char *aWc){
  FuncDef *pDef;
  int nExpr;
  if( pExpr->op!=TK_FUNCTION || !pExpr->x.pList ){


    return 0;
  }
  assert( !ExprHasProperty(pExpr, EP_xIsSelect) );
  nExpr = pExpr->x.pList->nExpr;
  pDef = sqlite3FindFunction(db, pExpr->u.zToken, 
                             sqlite3Strlen30(pExpr->u.zToken),
                             nExpr, SQLITE_UTF8, 0);
  if( NEVER(pDef==0) || (pDef->funcFlags & SQLITE_FUNC_LIKE)==0 ){
    return 0;
  }
  if( nExpr<3 ){
    aWc[3] = 0;
  }else{
    Expr *pEscape = pExpr->x.pList->a[2].pExpr;
    char *zEscape;
    if( pEscape->op!=TK_STRING ) return 0;
    zEscape = pEscape->u.zToken;
    if( zEscape[0]==0 || zEscape[1]!=0 ) return 0;
    aWc[3] = zEscape[0];
  }

  /* The memcpy() statement assumes that the wildcard characters are
  ** the first three statements in the compareInfo structure.  The
  ** asserts() that follow verify that assumption
  */
  memcpy(aWc, pDef->pUserData, 3);
  assert( (char*)&likeInfoAlt == (char*)&likeInfoAlt.matchAll );

Changes to src/where.c.

653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
...
691
692
693
694
695
696
697

698
699
700


701








702
703


704


705
706









707



708
709
710
711
712
713
714
  int *pnoCase      /* True if uppercase is equivalent to lowercase */
){
  const char *z = 0;         /* String on RHS of LIKE operator */
  Expr *pRight, *pLeft;      /* Right and left size of LIKE operator */
  ExprList *pList;           /* List of operands to the LIKE operator */
  int c;                     /* One character in z[] */
  int cnt;                   /* Number of non-wildcard prefix characters */
  char wc[3];                /* Wildcard characters */
  sqlite3 *db = pParse->db;  /* Database connection */
  sqlite3_value *pVal = 0;
  int op;                    /* Opcode of pRight */

  if( !sqlite3IsLikeFunction(db, pExpr, pnoCase, wc) ){
    return 0;
  }
................................................................................
    }
    sqlite3VdbeSetVarmask(pParse->pVdbe, iCol);
    assert( pRight->op==TK_VARIABLE || pRight->op==TK_REGISTER );
  }else if( op==TK_STRING ){
    z = pRight->u.zToken;
  }
  if( z ){

    cnt = 0;
    while( (c=z[cnt])!=0 && c!=wc[0] && c!=wc[1] && c!=wc[2] ){
      cnt++;


    }








    if( cnt!=0 && 255!=(u8)z[cnt-1] ){
      Expr *pPrefix;


      *pisComplete = c==wc[0] && z[cnt+1]==0;


      pPrefix = sqlite3Expr(db, TK_STRING, z);
      if( pPrefix ) pPrefix->u.zToken[cnt] = 0;









      *ppPrefix = pPrefix;



      if( op==TK_VARIABLE ){
        Vdbe *v = pParse->pVdbe;
        sqlite3VdbeSetVarmask(v, pRight->iColumn);
        if( *pisComplete && pRight->u.zToken[1] ){
          /* If the rhs of the LIKE expression is a variable, and the current
          ** value of the variable means there is no need to invoke the LIKE
          ** function, then no OP_Variable will be added to the program.







|







 







>



>
>
|
>
>
>
>
>
>
>
>


>
>

>
>

|
>
>
>
>
>
>
>
>
>

>
>
>







653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
...
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
  int *pnoCase      /* True if uppercase is equivalent to lowercase */
){
  const char *z = 0;         /* String on RHS of LIKE operator */
  Expr *pRight, *pLeft;      /* Right and left size of LIKE operator */
  ExprList *pList;           /* List of operands to the LIKE operator */
  int c;                     /* One character in z[] */
  int cnt;                   /* Number of non-wildcard prefix characters */
  char wc[4];                /* Wildcard characters */
  sqlite3 *db = pParse->db;  /* Database connection */
  sqlite3_value *pVal = 0;
  int op;                    /* Opcode of pRight */

  if( !sqlite3IsLikeFunction(db, pExpr, pnoCase, wc) ){
    return 0;
  }
................................................................................
    }
    sqlite3VdbeSetVarmask(pParse->pVdbe, iCol);
    assert( pRight->op==TK_VARIABLE || pRight->op==TK_REGISTER );
  }else if( op==TK_STRING ){
    z = pRight->u.zToken;
  }
  if( z ){
    /* Count the number of prefix characters prior to the first wildcard */
    cnt = 0;
    while( (c=z[cnt])!=0 && c!=wc[0] && c!=wc[1] && c!=wc[2] ){
      cnt++;
      if( c==wc[3] && z[cnt]!=0 ){
        if( z[cnt++]>0xc0 ) while( (z[cnt]&0xc0)==0x80 ){ cnt++; }
      }
    }

    /* The optimization is possible only if (1) the pattern does not begin
    ** with a wildcard and if (2) the non-wildcard prefix does not end with
    ** an (illegal 0xff) character.  The second condition is necessary so
    ** that we can increment the prefix key to find an upper bound for the
    ** range search. 
    */
    if( cnt!=0 && 255!=(u8)z[cnt-1] ){
      Expr *pPrefix;

      /* A "complete" match if the pattern ends with "*" or "%" */
      *pisComplete = c==wc[0] && z[cnt+1]==0;

      /* Get the pattern prefix.  Remove all escapes from the prefix. */
      pPrefix = sqlite3Expr(db, TK_STRING, z);
      if( pPrefix ){
        int iFrom, iTo;
        char *zNew = pPrefix->u.zToken;
        zNew[cnt] = 0;
        for(iFrom=iTo=0; iFrom<cnt; iFrom++){
          if( zNew[iFrom]==wc[3] ) iFrom++;
          zNew[iTo++] = zNew[iFrom];
        }
        zNew[iTo] = 0;
      }
      *ppPrefix = pPrefix;

      /* If the RHS pattern is a bound parameter, make arrangements to
      ** reprepare the statement when that parameter is rebound */
      if( op==TK_VARIABLE ){
        Vdbe *v = pParse->pVdbe;
        sqlite3VdbeSetVarmask(v, pRight->iColumn);
        if( *pisComplete && pRight->u.zToken[1] ){
          /* If the rhs of the LIKE expression is a variable, and the current
          ** value of the variable means there is no need to invoke the LIKE
          ** function, then no OP_Variable will be added to the program.

Changes to test/like.test.

944
945
946
947
948
949
950
951













































952
  SELECT id FROM t12nc WHERE x LIKE 'abc%' COLLATE binary ORDER BY +id;
} {/SEARCH/}
do_execsql_test like-12.16 {
  EXPLAIN QUERY PLAN
  SELECT id FROM t12b WHERE x LIKE 'abc%' COLLATE binary ORDER BY +id;
} {/SCAN/}















































finish_test








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
  SELECT id FROM t12nc WHERE x LIKE 'abc%' COLLATE binary ORDER BY +id;
} {/SEARCH/}
do_execsql_test like-12.16 {
  EXPLAIN QUERY PLAN
  SELECT id FROM t12b WHERE x LIKE 'abc%' COLLATE binary ORDER BY +id;
} {/SCAN/}


# As of 2017-07-27 (3.21.0) the LIKE optimization works with ESCAPE as
# long as the ESCAPE is a single-byte literal.
#
db close
sqlite3 db :memory:
do_execsql_test like-15.100 {
  CREATE TABLE t15(x TEXT COLLATE nocase, y, PRIMARY KEY(x));
  INSERT INTO t15(x,y) VALUES
    ('abcde',1), ('ab%de',2), ('a_cde',3),
    ('uvwxy',11),('uvwx%',12),('uvwx_',13),
    ('_bcde',21),('%bcde',22),
    ('abcd_',31),('abcd%',32),
    ('ab%xy',41);
  SELECT y FROM t15 WHERE x LIKE 'ab/%d%' ESCAPE '/';
} {2}
do_execsql_test like-15.101 {
  EXPLAIN QUERY PLAN
  SELECT y FROM t15 WHERE x LIKE 'ab/%d%' ESCAPE '/';
} {/SEARCH/}
do_execsql_test like-15.102 {
  EXPLAIN QUERY PLAN
  SELECT y FROM t15 WHERE x LIKE 'ab/%d%' ESCAPE '//';
} {/SCAN/}
do_execsql_test like-15.103 {
  EXPLAIN QUERY PLAN
  SELECT y FROM t15 WHERE x LIKE 'ab/%d%' ESCAPE '';
} {/SCAN/}
do_execsql_test like-15.110 {
  SELECT y FROM t15 WHERE x LIKE 'abcdx%%' ESCAPE 'x';
} {32}
do_execsql_test like-15.111 {
  SELECT y FROM t15 WHERE x LIKE 'abx%%' ESCAPE 'x' ORDER BY +y
} {2 41}
do_execsql_test like-15.112 {
  EXPLAIN QUERY PLAN
  SELECT y FROM t15 WHERE x LIKE 'abx%%' ESCAPE 'x' ORDER BY +y
} {/SEARCH/}
do_execsql_test like-15.120 {
  SELECT y FROM t15 WHERE x LIKE '/%bc%' ESCAPE '/';
} {22}
do_execsql_test like-15.121 {
  EXPLAIN QUERY PLAN
  SELECT y FROM t15 WHERE x LIKE '/%bc%' ESCAPE '/';
} {/SEARCH/}

finish_test