/ Check-in [11e0844f]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Change the ESCAPE clause on the LIKE operator to override wildcard characters, in order to match the behavior of PosgreSQL.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 11e0844f71e8f2d27ce9363fb505e02fd7795c61dae0b3886cf0d8df4484dd97
User & Date: drh 2020-03-19 18:13:29
Original Comment: Chagnes the ESCAPE clause on the LIKE operator to overwrite wildcard characters, in order ot match the behavior of PosgreSQL.
References
2021-02-15
16:51 New ticket [c0aeea67] Incorrect LIKE result. (artifact: 1fd8c4af user: drh)
Context
2020-03-20
20:18
Avoid an undefined integer overflow in fts3 by detecting data structure corruption earlier. (check-in: 86e98ddc user: dan tags: trunk)
2020-03-19
21:17
Initial code for a proposed new sqlite3_stmt_mode() API. This is an incomplete snapshot of a work-in-progress. (check-in: 3cf7537b user: drh tags: sqlite3_stmt_mode)
18:13
Change the ESCAPE clause on the LIKE operator to override wildcard characters, in order to match the behavior of PosgreSQL. (check-in: 11e0844f user: drh tags: trunk)
17:27
Fix an integer overflow problem with the dbstat virtual table that comes up when trying to analyze a corrupt database. (check-in: 1d64f4a8 user: drh tags: trunk)
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/icu/icu.c.

139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
    /* There are now 4 possibilities:
    **
    **     1. uPattern is an unescaped match-all character "%",
    **     2. uPattern is an unescaped match-one character "_",
    **     3. uPattern is an unescaped escape character, or
    **     4. uPattern is to be handled as an ordinary character
    */
    if( !prevEscape && uPattern==MATCH_ALL ){
      /* Case 1. */
      uint8_t c;

      /* Skip any MATCH_ALL or MATCH_ONE characters that follow a
      ** MATCH_ALL. For each MATCH_ONE, skip one character in the 
      ** test string.
      */







|







139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
    /* There are now 4 possibilities:
    **
    **     1. uPattern is an unescaped match-all character "%",
    **     2. uPattern is an unescaped match-one character "_",
    **     3. uPattern is an unescaped escape character, or
    **     4. uPattern is to be handled as an ordinary character
    */
    if( uPattern==MATCH_ALL && !prevEscape && uPattern!=(uint32_t)uEsc ){
      /* Case 1. */
      uint8_t c;

      /* Skip any MATCH_ALL or MATCH_ONE characters that follow a
      ** MATCH_ALL. For each MATCH_ONE, skip one character in the 
      ** test string.
      */
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
        if( icuLikeCompare(zPattern, zString, uEsc) ){
          return 1;
        }
        SQLITE_ICU_SKIP_UTF8(zString);
      }
      return 0;

    }else if( !prevEscape && uPattern==MATCH_ONE ){
      /* Case 2. */
      if( *zString==0 ) return 0;
      SQLITE_ICU_SKIP_UTF8(zString);

    }else if( !prevEscape && uPattern==(uint32_t)uEsc){
      /* Case 3. */
      prevEscape = 1;

    }else{
      /* Case 4. */
      uint32_t uString;
      SQLITE_ICU_READ_UTF8(zString, uString);







|




|







165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
        if( icuLikeCompare(zPattern, zString, uEsc) ){
          return 1;
        }
        SQLITE_ICU_SKIP_UTF8(zString);
      }
      return 0;

    }else if( uPattern==MATCH_ONE && !prevEscape && uPattern!=(uint32_t)uEsc ){
      /* Case 2. */
      if( *zString==0 ) return 0;
      SQLITE_ICU_SKIP_UTF8(zString);

    }else if( uPattern==(uint32_t)uEsc && !prevEscape ){
      /* Case 3. */
      prevEscape = 1;

    }else{
      /* Case 4. */
      uint32_t uString;
      SQLITE_ICU_READ_UTF8(zString, uString);

Changes to src/func.c.

849
850
851
852
853
854
855

856
857
858
859
860
861
862
  sqlite3_value **argv
){
  const unsigned char *zA, *zB;
  u32 escape;
  int nPat;
  sqlite3 *db = sqlite3_context_db_handle(context);
  struct compareInfo *pInfo = sqlite3_user_data(context);


#ifdef SQLITE_LIKE_DOESNT_MATCH_BLOBS
  if( sqlite3_value_type(argv[0])==SQLITE_BLOB
   || sqlite3_value_type(argv[1])==SQLITE_BLOB
  ){
#ifdef SQLITE_TEST
    sqlite3_like_count++;







>







849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
  sqlite3_value **argv
){
  const unsigned char *zA, *zB;
  u32 escape;
  int nPat;
  sqlite3 *db = sqlite3_context_db_handle(context);
  struct compareInfo *pInfo = sqlite3_user_data(context);
  struct compareInfo backupInfo;

#ifdef SQLITE_LIKE_DOESNT_MATCH_BLOBS
  if( sqlite3_value_type(argv[0])==SQLITE_BLOB
   || sqlite3_value_type(argv[1])==SQLITE_BLOB
  ){
#ifdef SQLITE_TEST
    sqlite3_like_count++;
884
885
886
887
888
889
890






891
892
893
894
895
896
897
    if( zEsc==0 ) return;
    if( sqlite3Utf8CharLen((char*)zEsc, -1)!=1 ){
      sqlite3_result_error(context, 
          "ESCAPE expression must be a single character", -1);
      return;
    }
    escape = sqlite3Utf8Read(&zEsc);






  }else{
    escape = pInfo->matchSet;
  }
  zB = sqlite3_value_text(argv[0]);
  zA = sqlite3_value_text(argv[1]);
  if( zA && zB ){
#ifdef SQLITE_TEST







>
>
>
>
>
>







885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
    if( zEsc==0 ) return;
    if( sqlite3Utf8CharLen((char*)zEsc, -1)!=1 ){
      sqlite3_result_error(context, 
          "ESCAPE expression must be a single character", -1);
      return;
    }
    escape = sqlite3Utf8Read(&zEsc);
    if( escape==pInfo->matchAll || escape==pInfo->matchOne ){
      memcpy(&backupInfo, pInfo, sizeof(backupInfo));
      pInfo = &backupInfo;
      if( escape==pInfo->matchAll ) pInfo->matchAll = 0;
      if( escape==pInfo->matchOne ) pInfo->matchOne = 0;
    }
  }else{
    escape = pInfo->matchSet;
  }
  zB = sqlite3_value_text(argv[0]);
  zA = sqlite3_value_text(argv[1]);
  if( zA && zB ){
#ifdef SQLITE_TEST
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891














1892
1893
1894
1895
1896
1897
1898
  pDef = sqlite3FindFunction(db, pExpr->u.zToken, nExpr, SQLITE_UTF8, 0);
#ifdef SQLITE_ENABLE_UNKNOWN_SQL_FUNCTION
  if( pDef==0 ) return 0;
#endif
  if( NEVER(pDef==0) || (pDef->funcFlags & SQLITE_FUNC_LIKE)==0 ){
    return 0;
  }
  if( nExpr<3 ){
    aWc[3] = 0;
  }else{
    Expr *pEscape = pExpr->x.pList->a[2].pExpr;
    char *zEscape;
    if( pEscape->op!=TK_STRING ) return 0;
    zEscape = pEscape->u.zToken;
    if( zEscape[0]==0 || zEscape[1]!=0 ) return 0;
    aWc[3] = zEscape[0];
  }

  /* The memcpy() statement assumes that the wildcard characters are
  ** the first three statements in the compareInfo structure.  The
  ** asserts() that follow verify that assumption
  */
  memcpy(aWc, pDef->pUserData, 3);
  assert( (char*)&likeInfoAlt == (char*)&likeInfoAlt.matchAll );
  assert( &((char*)&likeInfoAlt)[1] == (char*)&likeInfoAlt.matchOne );
  assert( &((char*)&likeInfoAlt)[2] == (char*)&likeInfoAlt.matchSet );














  *pIsNocase = (pDef->funcFlags & SQLITE_FUNC_CASE)==0;
  return 1;
}

/*
** All of the FuncDef structures in the aBuiltinFunc[] array above
** to the global function hash table.  This occurs at start-time (as







<
<
<
<
<
<
<
<
<
<









>
>
>
>
>
>
>
>
>
>
>
>
>
>







1873
1874
1875
1876
1877
1878
1879










1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
  pDef = sqlite3FindFunction(db, pExpr->u.zToken, nExpr, SQLITE_UTF8, 0);
#ifdef SQLITE_ENABLE_UNKNOWN_SQL_FUNCTION
  if( pDef==0 ) return 0;
#endif
  if( NEVER(pDef==0) || (pDef->funcFlags & SQLITE_FUNC_LIKE)==0 ){
    return 0;
  }











  /* The memcpy() statement assumes that the wildcard characters are
  ** the first three statements in the compareInfo structure.  The
  ** asserts() that follow verify that assumption
  */
  memcpy(aWc, pDef->pUserData, 3);
  assert( (char*)&likeInfoAlt == (char*)&likeInfoAlt.matchAll );
  assert( &((char*)&likeInfoAlt)[1] == (char*)&likeInfoAlt.matchOne );
  assert( &((char*)&likeInfoAlt)[2] == (char*)&likeInfoAlt.matchSet );

  if( nExpr<3 ){
    aWc[3] = 0;
  }else{
    Expr *pEscape = pExpr->x.pList->a[2].pExpr;
    char *zEscape;
    if( pEscape->op!=TK_STRING ) return 0;
    zEscape = pEscape->u.zToken;
    if( zEscape[0]==0 || zEscape[1]!=0 ) return 0;
    if( zEscape[0]==aWc[0] ) return 0;
    if( zEscape[0]==aWc[1] ) return 0;
    aWc[3] = zEscape[0];
  }

  *pIsNocase = (pDef->funcFlags & SQLITE_FUNC_CASE)==0;
  return 1;
}

/*
** All of the FuncDef structures in the aBuiltinFunc[] array above
** to the global function hash table.  This occurs at start-time (as

Changes to test/icu.test.

141
142
143
144
145
146
147
148


















149
  do_catchsql_test icu-5.5 {SELECT 'abc' REGEXP }   {1 {incomplete input}}
  do_catchsql_test icu-5.6 {SELECT 'abc' REGEXP, 1} {1 {near ",": syntax error}}
 
  do_malloc_test icu-6.10 -sqlbody {
    SELECT upper(char(0xfb04,0xdf,0xfb04,0xe8,0xfb04));
  }
}



















finish_test








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
  do_catchsql_test icu-5.5 {SELECT 'abc' REGEXP }   {1 {incomplete input}}
  do_catchsql_test icu-5.6 {SELECT 'abc' REGEXP, 1} {1 {near ",": syntax error}}
 
  do_malloc_test icu-6.10 -sqlbody {
    SELECT upper(char(0xfb04,0xdf,0xfb04,0xe8,0xfb04));
  }
}

# 2020-03-19
# The ESCAPE clause on LIKE takes precedence over wildcards
#
do_execsql_test idu-6.0 {
  DROP TABLE IF EXISTS t1;
  CREATE TABLE t1(id INTEGER PRIMARY KEY, x TEXT);
  INSERT INTO t1 VALUES
    (1,'abcde'),
    (2,'abc_'),
    (3,'abc__'),
    (4,'abc%'),
    (5,'abc%%');
  SELECT id FROM t1 WHERE x LIKE 'abc%%' ESCAPE '%';
} {4}
do_execsql_test icu-6.1 {
  SELECT id FROM t1 WHERE x LIKE 'abc__' ESCAPE '_';
} {2}

finish_test

Changes to test/like.test.

1108
1109
1110
1111
1112
1113
1114
1115


















1116
}
do_execsql_test 16.1 {
  SELECT * FROM t1 WHERE a LIKE ' 1%';
} {{ 1x} { 1-}}
do_execsql_test 16.2 {
  SELECT * FROM t1 WHERE a LIKE ' 1-';
} {{ 1-}}



















finish_test








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
}
do_execsql_test 16.1 {
  SELECT * FROM t1 WHERE a LIKE ' 1%';
} {{ 1x} { 1-}}
do_execsql_test 16.2 {
  SELECT * FROM t1 WHERE a LIKE ' 1-';
} {{ 1-}}

# 2020-03-19
# The ESCAPE clause on LIKE takes precedence over wildcards
#
do_execsql_test 17.0 {
  DROP TABLE IF EXISTS t1;
  CREATE TABLE t1(id INTEGER PRIMARY KEY, x TEXT);
  INSERT INTO t1 VALUES
    (1,'abcde'),
    (2,'abc_'),
    (3,'abc__'),
    (4,'abc%'),
    (5,'abc%%');
  SELECT id FROM t1 WHERE x LIKE 'abc%%' ESCAPE '%';
} {4}
do_execsql_test 17.1 {
  SELECT id FROM t1 WHERE x LIKE 'abc__' ESCAPE '_';
} {2}

finish_test

Changes to test/like3.test.

233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
do_eqp_test like3-6.110 {
  SELECT * FROM t1 WHERE path LIKE 'a%';
} {
  QUERY PLAN
  `--SEARCH TABLE t1 USING PRIMARY KEY (path>? AND path<?)
}
do_eqp_test like3-6.120 {
  SELECT * FROM t1 WHERE path LIKE 'a%' ESCAPE '_';
} {
  QUERY PLAN
  `--SEARCH TABLE t1 USING PRIMARY KEY (path>? AND path<?)
}
do_execsql_test like3-6.200 {
  DROP TABLE IF EXISTS t2;
  CREATE TABLE t2(path TEXT,x,y,z);
  CREATE INDEX t2path ON t2(path COLLATE nocase);
  CREATE INDEX t2path2 ON t2(path);
}
do_eqp_test like3-6.210 {
  SELECT * FROM t2 WHERE path LIKE 'a%';
} {
  QUERY PLAN
  `--SEARCH TABLE t2 USING INDEX t2path (path>? AND path<?)
}
do_eqp_test like3-6.220 {
  SELECT * FROM t2 WHERE path LIKE 'a%' ESCAPE '_';
} {
  QUERY PLAN
  `--SEARCH TABLE t2 USING INDEX t2path (path>? AND path<?)
}
db eval {PRAGMA case_sensitive_like=ON}
do_eqp_test like3-6.230 {
  SELECT * FROM t2 WHERE path LIKE 'a%';
} {
  QUERY PLAN
  `--SEARCH TABLE t2 USING INDEX t2path2 (path>? AND path<?)
}
do_eqp_test like3-6.240 {
  SELECT * FROM t2 WHERE path LIKE 'a%' ESCAPE '_';
} {
  QUERY PLAN
  `--SEARCH TABLE t2 USING INDEX t2path2 (path>? AND path<?)
}
}

finish_test







|

















|












|







233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
do_eqp_test like3-6.110 {
  SELECT * FROM t1 WHERE path LIKE 'a%';
} {
  QUERY PLAN
  `--SEARCH TABLE t1 USING PRIMARY KEY (path>? AND path<?)
}
do_eqp_test like3-6.120 {
  SELECT * FROM t1 WHERE path LIKE 'a%' ESCAPE 'x';
} {
  QUERY PLAN
  `--SEARCH TABLE t1 USING PRIMARY KEY (path>? AND path<?)
}
do_execsql_test like3-6.200 {
  DROP TABLE IF EXISTS t2;
  CREATE TABLE t2(path TEXT,x,y,z);
  CREATE INDEX t2path ON t2(path COLLATE nocase);
  CREATE INDEX t2path2 ON t2(path);
}
do_eqp_test like3-6.210 {
  SELECT * FROM t2 WHERE path LIKE 'a%';
} {
  QUERY PLAN
  `--SEARCH TABLE t2 USING INDEX t2path (path>? AND path<?)
}
do_eqp_test like3-6.220 {
  SELECT * FROM t2 WHERE path LIKE 'a%' ESCAPE '\';
} {
  QUERY PLAN
  `--SEARCH TABLE t2 USING INDEX t2path (path>? AND path<?)
}
db eval {PRAGMA case_sensitive_like=ON}
do_eqp_test like3-6.230 {
  SELECT * FROM t2 WHERE path LIKE 'a%';
} {
  QUERY PLAN
  `--SEARCH TABLE t2 USING INDEX t2path2 (path>? AND path<?)
}
do_eqp_test like3-6.240 {
  SELECT * FROM t2 WHERE path LIKE 'a%' ESCAPE '\';
} {
  QUERY PLAN
  `--SEARCH TABLE t2 USING INDEX t2path2 (path>? AND path<?)
}
}

finish_test