/ Check-in [610574b2]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix a bug in the LIKE optimizer that occurs when the last character before the wildcard is an upper-case 'Z'. Ticket #2959. (CVS 4807)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:610574b23b5e73b71be71df66e084c5bf37f6ccd
User & Date: drh 2008-02-23 21:55:40
Context
2008-02-26
03:46
Add bitvec to build (CVS 4808) check-in: c690dd68 user: mlcreech tags: trunk
2008-02-23
21:55
Fix a bug in the LIKE optimizer that occurs when the last character before the wildcard is an upper-case 'Z'. Ticket #2959. (CVS 4807) check-in: 610574b2 user: drh tags: trunk
2008-02-21
21:30
Additional test cases. (CVS 4806) check-in: 74126bf4 user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/where.c.

12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
...
513
514
515
516
517
518
519
520

521
522
523
524
525
526
527
528
529
530
531
532



533
534
535
536
537
538
539
...
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
...
712
713
714
715
716
717
718

719
720
721
722
723
724
725
...
882
883
884
885
886
887
888








889
890
891
892
893
894
895
896
897
...
899
900
901
902
903
904
905

906
907



908
909
910
911
912
913
914
** This module contains C code that generates VDBE code used to process
** the WHERE clause of SQL statements.  This module is reponsible for
** generating the code that loops through a table looking for applicable
** rows.  Indices are selected and used to speed the search when doing
** so is applicable.  Because this module is responsible for selecting
** indices, you might also think of this module as the "query optimizer".
**
** $Id: where.c,v 1.286 2008/01/23 12:52:41 drh Exp $
*/
#include "sqliteInt.h"

/*
** The number of bits in a Bitmask.  "BMS" means "BitMask Size".
*/
#define BMS  (sizeof(Bitmask)*8)
................................................................................
** In order for the operator to be optimizible, the RHS must be a string
** literal that does not begin with a wildcard.  
*/
static int isLikeOrGlob(
  sqlite3 *db,      /* The database */
  Expr *pExpr,      /* Test this expression */
  int *pnPattern,   /* Number of non-wildcard prefix characters */
  int *pisComplete  /* True if the only wildcard is % in the last character */

){
  const char *z;
  Expr *pRight, *pLeft;
  ExprList *pList;
  int c, cnt;
  int noCase;
  char wc[3];
  CollSeq *pColl;

  if( !sqlite3IsLikeFunction(db, pExpr, &noCase, wc) ){
    return 0;
  }



  pList = pExpr->pList;
  pRight = pList->a[0].pExpr;
  if( pRight->op!=TK_STRING ){
    return 0;
  }
  pLeft = pList->a[1].pExpr;
  if( pLeft->op!=TK_COLUMN ){
................................................................................
  }
  pColl = pLeft->pColl;
  assert( pColl!=0 || pLeft->iColumn==-1 );
  if( pColl==0 ){
    /* No collation is defined for the ROWID.  Use the default. */
    pColl = db->pDfltColl;
  }
  if( (pColl->type!=SQLITE_COLL_BINARY || noCase) &&
      (pColl->type!=SQLITE_COLL_NOCASE || !noCase) ){
    return 0;
  }
  sqlite3DequoteExpr(db, pRight);
  z = (char *)pRight->token.z;
  cnt = 0;
  if( z ){
    while( (c=z[cnt])!=0 && c!=wc[0] && c!=wc[1] && c!=wc[2] ){ cnt++; }
................................................................................
  WhereTerm *pTerm;
  ExprMaskSet *pMaskSet;
  Expr *pExpr;
  Bitmask prereqLeft;
  Bitmask prereqAll;
  int nPattern;
  int isComplete;

  int op;
  Parse *pParse = pWC->pParse;
  sqlite3 *db = pParse->db;

  if( db->mallocFailed ){
    return;
  }
................................................................................
    whereClauseClear(&sOr);
  }
#endif /* SQLITE_OMIT_OR_OPTIMIZATION */

#ifndef SQLITE_OMIT_LIKE_OPTIMIZATION
  /* Add constraints to reduce the search space on a LIKE or GLOB
  ** operator.








  */
  if( isLikeOrGlob(db, pExpr, &nPattern, &isComplete) ){
    Expr *pLeft, *pRight;
    Expr *pStr1, *pStr2;
    Expr *pNewExpr1, *pNewExpr2;
    int idxNew1, idxNew2;

    pLeft = pExpr->pList->a[1].pExpr;
    pRight = pExpr->pList->a[0].pExpr;
................................................................................
    if( pStr1 ){
      sqlite3TokenCopy(db, &pStr1->token, &pRight->token);
      pStr1->token.n = nPattern;
      pStr1->flags = EP_Dequoted;
    }
    pStr2 = sqlite3ExprDup(db, pStr1);
    if( !db->mallocFailed ){

      assert( pStr2->token.dyn );
      ++*(u8*)&pStr2->token.z[nPattern-1];



    }
    pNewExpr1 = sqlite3PExpr(pParse, TK_GE, sqlite3ExprDup(db,pLeft), pStr1, 0);
    idxNew1 = whereClauseInsert(pWC, pNewExpr1, TERM_VIRTUAL|TERM_DYNAMIC);
    exprAnalyze(pSrc, pWC, idxNew1);
    pNewExpr2 = sqlite3PExpr(pParse, TK_LT, sqlite3ExprDup(db,pLeft), pStr2, 0);
    idxNew2 = whereClauseInsert(pWC, pNewExpr2, TERM_VIRTUAL|TERM_DYNAMIC);
    exprAnalyze(pSrc, pWC, idxNew2);







|







 







|
>





<



|


>
>
>







 







|
|







 







>







 







>
>
>
>
>
>
>
>

|







 







>

|
>
>
>







12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
...
513
514
515
516
517
518
519
520
521
522
523
524
525
526

527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
...
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
...
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
...
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
...
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
** This module contains C code that generates VDBE code used to process
** the WHERE clause of SQL statements.  This module is reponsible for
** generating the code that loops through a table looking for applicable
** rows.  Indices are selected and used to speed the search when doing
** so is applicable.  Because this module is responsible for selecting
** indices, you might also think of this module as the "query optimizer".
**
** $Id: where.c,v 1.287 2008/02/23 21:55:40 drh Exp $
*/
#include "sqliteInt.h"

/*
** The number of bits in a Bitmask.  "BMS" means "BitMask Size".
*/
#define BMS  (sizeof(Bitmask)*8)
................................................................................
** In order for the operator to be optimizible, the RHS must be a string
** literal that does not begin with a wildcard.  
*/
static int isLikeOrGlob(
  sqlite3 *db,      /* The database */
  Expr *pExpr,      /* Test this expression */
  int *pnPattern,   /* Number of non-wildcard prefix characters */
  int *pisComplete, /* True if the only wildcard is % in the last character */
  int *pnoCase      /* True if uppercase is equivalent to lowercase */
){
  const char *z;
  Expr *pRight, *pLeft;
  ExprList *pList;
  int c, cnt;

  char wc[3];
  CollSeq *pColl;

  if( !sqlite3IsLikeFunction(db, pExpr, pnoCase, wc) ){
    return 0;
  }
#ifdef SQLITE_EBCDIC
  if( *pnoCase ) return 0;
#endif
  pList = pExpr->pList;
  pRight = pList->a[0].pExpr;
  if( pRight->op!=TK_STRING ){
    return 0;
  }
  pLeft = pList->a[1].pExpr;
  if( pLeft->op!=TK_COLUMN ){
................................................................................
  }
  pColl = pLeft->pColl;
  assert( pColl!=0 || pLeft->iColumn==-1 );
  if( pColl==0 ){
    /* No collation is defined for the ROWID.  Use the default. */
    pColl = db->pDfltColl;
  }
  if( (pColl->type!=SQLITE_COLL_BINARY || *pnoCase) &&
      (pColl->type!=SQLITE_COLL_NOCASE || !*pnoCase) ){
    return 0;
  }
  sqlite3DequoteExpr(db, pRight);
  z = (char *)pRight->token.z;
  cnt = 0;
  if( z ){
    while( (c=z[cnt])!=0 && c!=wc[0] && c!=wc[1] && c!=wc[2] ){ cnt++; }
................................................................................
  WhereTerm *pTerm;
  ExprMaskSet *pMaskSet;
  Expr *pExpr;
  Bitmask prereqLeft;
  Bitmask prereqAll;
  int nPattern;
  int isComplete;
  int noCase;
  int op;
  Parse *pParse = pWC->pParse;
  sqlite3 *db = pParse->db;

  if( db->mallocFailed ){
    return;
  }
................................................................................
    whereClauseClear(&sOr);
  }
#endif /* SQLITE_OMIT_OR_OPTIMIZATION */

#ifndef SQLITE_OMIT_LIKE_OPTIMIZATION
  /* Add constraints to reduce the search space on a LIKE or GLOB
  ** operator.
  **
  ** A like pattern of the form "x LIKE 'abc%'" is changed into constraints
  **
  **          x>='abc' AND x<'abd' AND x LIKE 'abc%'
  **
  ** The last character of the prefix "abc" is incremented to form the
  ** termination condidtion "abd".  This trick of incrementing the last
  ** is not 255 and if the character set is not EBCDIC.  
  */
  if( isLikeOrGlob(db, pExpr, &nPattern, &isComplete, &noCase) ){
    Expr *pLeft, *pRight;
    Expr *pStr1, *pStr2;
    Expr *pNewExpr1, *pNewExpr2;
    int idxNew1, idxNew2;

    pLeft = pExpr->pList->a[1].pExpr;
    pRight = pExpr->pList->a[0].pExpr;
................................................................................
    if( pStr1 ){
      sqlite3TokenCopy(db, &pStr1->token, &pRight->token);
      pStr1->token.n = nPattern;
      pStr1->flags = EP_Dequoted;
    }
    pStr2 = sqlite3ExprDup(db, pStr1);
    if( !db->mallocFailed ){
      u8 c, *pC;
      assert( pStr2->token.dyn );
      pC = (u8*)&pStr2->token.z[nPattern-1];
      c = *pC;
      if( noCase ) c = sqlite3UpperToLower[c];
      *pC = c + 1;
    }
    pNewExpr1 = sqlite3PExpr(pParse, TK_GE, sqlite3ExprDup(db,pLeft), pStr1, 0);
    idxNew1 = whereClauseInsert(pWC, pNewExpr1, TERM_VIRTUAL|TERM_DYNAMIC);
    exprAnalyze(pSrc, pWC, idxNew1);
    pNewExpr2 = sqlite3PExpr(pParse, TK_LT, sqlite3ExprDup(db,pLeft), pStr2, 0);
    idxNew2 = whereClauseInsert(pWC, pNewExpr2, TERM_VIRTUAL|TERM_DYNAMIC);
    exprAnalyze(pSrc, pWC, idxNew2);

Changes to test/like.test.

9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
...
377
378
379
380
381
382
383












































































































384
385
386
387
388
389
390
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this file is testing the LIKE and GLOB operators and
# in particular the optimizations that occur to help those operators
# run faster.
#
# $Id: like.test,v 1.8 2008/01/23 12:52:41 drh Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl

# Create some sample data to work with.
#
do_test like-1.0 {
................................................................................
  queryplan {
    SELECT x FROM t2 WHERE x GLOB 'abc*' ORDER BY 1
  }
} {abc abcd nosort {} i2}
do_test like-5.8 {
  set sqlite_like_count
} 12













































































































# ticket #2407
#
# Make sure the LIKE prefix optimization does not strip off leading
# characters of the like pattern that happen to be quote characters.
#
do_test like-6.1 {







|







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
...
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this file is testing the LIKE and GLOB operators and
# in particular the optimizations that occur to help those operators
# run faster.
#
# $Id: like.test,v 1.9 2008/02/23 21:55:40 drh Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl

# Create some sample data to work with.
#
do_test like-1.0 {
................................................................................
  queryplan {
    SELECT x FROM t2 WHERE x GLOB 'abc*' ORDER BY 1
  }
} {abc abcd nosort {} i2}
do_test like-5.8 {
  set sqlite_like_count
} 12
do_test like-5.11 {
  execsql {PRAGMA case_sensitive_like=off}
  set sqlite_like_count 0
  queryplan {
    SELECT x FROM t1 WHERE x LIKE 'ABC%' ORDER BY 1
  }
} {ABC {ABC abc xyz} abc abcd nosort {} i1}
do_test like-5.12 {
  set sqlite_like_count
} 12
do_test like-5.13 {
  set sqlite_like_count 0
  queryplan {
    SELECT x FROM t2 WHERE x LIKE 'ABC%' ORDER BY 1
  }
} {abc ABC {ABC abc xyz} abcd nosort {} i2}
do_test like-5.14 {
  set sqlite_like_count
} 0
do_test like-5.15 {
  execsql {
    PRAGMA case_sensitive_like=on;
  }
  set sqlite_like_count 0
  queryplan {
    SELECT x FROM t2 WHERE x LIKE 'ABC%' ORDER BY 1
  }
} {ABC {ABC abc xyz} nosort {} i2}
do_test like-5.16 {
  set sqlite_like_count
} 12
do_test like-5.17 {
  execsql {
    PRAGMA case_sensitive_like=off;
  }
  set sqlite_like_count 0
  queryplan {
    SELECT x FROM t2 WHERE x GLOB 'ABC*' ORDER BY 1
  }
} {ABC {ABC abc xyz} nosort {} i2}
do_test like-5.18 {
  set sqlite_like_count
} 12

# Boundary case.  The prefix for a LIKE comparison is rounded up
# when constructing the comparison.  Example:  "ab" becomes "ac".
# In other words, the last character is increased by one.
#
# Make sure this happens correctly when the last character is a 
# "z" and we are doing case-insensitive comparisons.
#
# Ticket #2959
#
do_test like-5.21 {
  execsql {
    PRAGMA case_sensitive_like=off;
    INSERT INTO t2 VALUES('ZZ-upper-upper');
    INSERT INTO t2 VALUES('zZ-lower-upper');
    INSERT INTO t2 VALUES('Zz-upper-lower');
    INSERT INTO t2 VALUES('zz-lower-lower');
  }
  queryplan {
    SELECT x FROM t2 WHERE x LIKE 'zz%';
  }
} {zz-lower-lower zZ-lower-upper Zz-upper-lower ZZ-upper-upper nosort {} i2}
do_test like-5.22 {
  queryplan {
    SELECT x FROM t2 WHERE x LIKE 'zZ%';
  }
} {zz-lower-lower zZ-lower-upper Zz-upper-lower ZZ-upper-upper nosort {} i2}
do_test like-5.23 {
  queryplan {
    SELECT x FROM t2 WHERE x LIKE 'Zz%';
  }
} {zz-lower-lower zZ-lower-upper Zz-upper-lower ZZ-upper-upper nosort {} i2}
do_test like-5.24 {
  queryplan {
    SELECT x FROM t2 WHERE x LIKE 'ZZ%';
  }
} {zz-lower-lower zZ-lower-upper Zz-upper-lower ZZ-upper-upper nosort {} i2}
do_test like-5.25 {
  queryplan {
    PRAGMA case_sensitive_like=on;
    CREATE TABLE t3(x);
    CREATE INDEX i3 ON t3(x);
    INSERT INTO t3 VALUES('ZZ-upper-upper');
    INSERT INTO t3 VALUES('zZ-lower-upper');
    INSERT INTO t3 VALUES('Zz-upper-lower');
    INSERT INTO t3 VALUES('zz-lower-lower');
    SELECT x FROM t3 WHERE x LIKE 'zz%';
  }
} {zz-lower-lower nosort {} i3}
do_test like-5.26 {
  queryplan {
    SELECT x FROM t3 WHERE x LIKE 'zZ%';
  }
} {zZ-lower-upper nosort {} i3}
do_test like-5.27 {
  queryplan {
    SELECT x FROM t3 WHERE x LIKE 'Zz%';
  }
} {Zz-upper-lower nosort {} i3}
do_test like-5.28 {
  queryplan {
    SELECT x FROM t3 WHERE x LIKE 'ZZ%';
  }
} {ZZ-upper-upper nosort {} i3}


# ticket #2407
#
# Make sure the LIKE prefix optimization does not strip off leading
# characters of the like pattern that happen to be quote characters.
#
do_test like-6.1 {