/ Check-in [6c8924ca]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Still more performance enhancements to the LIKE and GLOB operators.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 6c8924cacc2b875270770fed2cc3b1884f57a655
User & Date: drh 2014-09-25 11:08:57
Context
2014-09-25
12:31
Simplification to the random rowid picking logic that begins running when the maximum possible rowid has already been used. check-in: 1330c72e user: drh tags: trunk
11:08
Still more performance enhancements to the LIKE and GLOB operators. check-in: 6c8924ca user: drh tags: trunk
03:51
More performance optimization for the LIKE and GLOB operators. check-in: 5ab1023d user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/func.c.

581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
...
601
602
603
604
605
606
607
608
609
610
611
612







613
614
615
616
617
618
619
620
621
622
623
624
625
626

627
628
629
630
631
632
633
634
635
636



637
638
639
640
641
642
643
644
645
646
647
648
649


650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665














666
667
668

669



670
671



672
673
674
675
676
677
678
679
680
681
682
683
684
685
686

687
688
689
690
691
692
693
694
695
696
697
...
716
717
718
719
720
721
722
723
724
725


726
727
728
729
730
731
732
733
static const struct compareInfo likeInfoNorm = { '%', '_',   0, 1 };
/* If SQLITE_CASE_SENSITIVE_LIKE is defined, then the LIKE operator
** is case sensitive causing 'a' LIKE 'A' to be false */
static const struct compareInfo likeInfoAlt = { '%', '_',   0, 0 };

/*
** Compare two UTF-8 strings for equality where the first string can
** potentially be a "glob" expression.  Return true (1) if they
** are the same and false (0) if they are different.
**
** Globbing rules:
**
**      '*'       Matches any sequence of zero or more characters.
**
**      '?'       Matches exactly one character.
................................................................................
**
** With the [...] and [^...] matching, a ']' character can be included
** in the list by making it the first character after '[' or '^'.  A
** range of characters can be specified using '-'.  Example:
** "[a-z]" matches any single lower-case letter.  To match a '-', make
** it the last character in the list.
**
** This routine is usually quick, but can be N**2 in the worst case.
**
** Hints: to match '*' or '?', put them in "[]".  Like this:
**
**         abc[*]xyz        Matches "abc*xyz" only







*/
static int patternCompare(
  const u8 *zPattern,              /* The glob pattern */
  const u8 *zString,               /* The string to compare against the glob */
  const struct compareInfo *pInfo, /* Information about how to do the compare */
  u32 esc                          /* The escape character */
){
  u32 c, c2;
  int invert;
  int seen;
  u32 matchOne = pInfo->matchOne;
  u32 matchAll = pInfo->matchAll;
  u32 matchOther;
  u8 noCase = pInfo->noCase;

  
  /* The GLOB operator does not have an ESCAPE clause.  And LIKE does not
  ** have the matchSet operator.  So we either have to look for one or
  ** the other, never both.  Hence the single variable matchOther is used
  ** to store the one we have to look for.
  */
  matchOther = esc ? esc : pInfo->matchSet;

  while( (c = sqlite3Utf8Read(&zPattern))!=0 ){
    if( c==matchAll ){



      while( (c=sqlite3Utf8Read(&zPattern)) == matchAll
               || c == matchOne ){
        if( c==matchOne && sqlite3Utf8Read(&zString)==0 ){
          return 0;
        }
      }
      if( c==0 ){
        return 1;
      }else if( c==matchOther ){
        if( esc ){
          c = sqlite3Utf8Read(&zPattern);
          if( c==0 ) return 0;
        }else{


          assert( matchOther<0x80 );  /* '[' is a single-byte character */
          while( *zString
                 && patternCompare(&zPattern[-1],zString,pInfo,esc)==0 ){
            SQLITE_SKIP_UTF8(zString);
          }
          return *zString!=0;
        }
      }
      while( (c2 = sqlite3Utf8Read(&zString))!=0 ){
        if( noCase && c<0x80 ){
          GlobUpperToLower(c2);
          GlobUpperToLowerAscii(c);
          while( c2 != 0 && c2 != c ){
            do{ c2 = *(zString++); }while( c2>0x7f );
            GlobUpperToLowerAscii(c2);
          }














        }else{
          while( c2 != 0 && c2 != c ){
            c2 = sqlite3Utf8Read(&zString);

          }



        }
        if( c2==0 ) return 0;



        if( patternCompare(zPattern,zString,pInfo,esc) ) return 1;
      }
      return 0;
    }
    if( c==matchOne ){
      if( sqlite3Utf8Read(&zString)==0 ){
        return 0;
      }else{
        continue;
      }
    }
    if( c==matchOther ){
      if( esc ){
        c = sqlite3Utf8Read(&zPattern);
        if( c==0 ) return 0;

      }else{
        u32 prior_c = 0;
        seen = 0;
        invert = 0;
        c = sqlite3Utf8Read(&zString);
        if( c==0 ) return 0;
        c2 = sqlite3Utf8Read(&zPattern);
        if( c2=='^' ){
          invert = 1;
          c2 = sqlite3Utf8Read(&zPattern);
        }
................................................................................
          return 0;
        }
        continue;
      }
    }
    c2 = sqlite3Utf8Read(&zString);
    if( c==c2 ) continue;
    if( !noCase ) return 0;
    GlobUpperToLower(c);
    GlobUpperToLower(c2);


    if( c!=c2 ) return 0;
  }
  return *zString==0;
}

/*
** The sqlite3_strglob() interface.
*/







|







 







|
|
|

|
>
>
>
>
>
>
>







|
<
<
|
|
|
|
>









|
>
>
>







|





>
>








<
<
<
<
<
<
<
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>

<
<
>
|
>
>
>

<
>
>
>
|
|
<
|
<
<
|
<
<
<





>


|
|







 







|
|
<
>
>
|







581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
...
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627


628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668







669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684


685
686
687
688
689
690

691
692
693
694
695

696


697



698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
...
733
734
735
736
737
738
739
740
741

742
743
744
745
746
747
748
749
750
751
static const struct compareInfo likeInfoNorm = { '%', '_',   0, 1 };
/* If SQLITE_CASE_SENSITIVE_LIKE is defined, then the LIKE operator
** is case sensitive causing 'a' LIKE 'A' to be false */
static const struct compareInfo likeInfoAlt = { '%', '_',   0, 0 };

/*
** Compare two UTF-8 strings for equality where the first string can
** potentially be a "glob" or "like" expression.  Return true (1) if they
** are the same and false (0) if they are different.
**
** Globbing rules:
**
**      '*'       Matches any sequence of zero or more characters.
**
**      '?'       Matches exactly one character.
................................................................................
**
** With the [...] and [^...] matching, a ']' character can be included
** in the list by making it the first character after '[' or '^'.  A
** range of characters can be specified using '-'.  Example:
** "[a-z]" matches any single lower-case letter.  To match a '-', make
** it the last character in the list.
**
** Like matching rules:
** 
**      '%'       Matches any sequence of zero or more characters
**
***     '_'       Matches any one character
**
**      Ec        Where E is the "esc" character and c is any other
**                character, including '%', '_', and esc, match exactly c.
**
** The comments through this routine usually assume glob matching.
**
** This routine is usually quick, but can be N**2 in the worst case.
*/
static int patternCompare(
  const u8 *zPattern,              /* The glob pattern */
  const u8 *zString,               /* The string to compare against the glob */
  const struct compareInfo *pInfo, /* Information about how to do the compare */
  u32 esc                          /* The escape character */
){
  u32 c, c2;                       /* Next pattern and input string chars */


  u32 matchOne = pInfo->matchOne;  /* "?" or "_" */
  u32 matchAll = pInfo->matchAll;  /* "*" or "%" */
  u32 matchOther;                  /* "[" or the escape character */
  u8 noCase = pInfo->noCase;       /* True if uppercase==lowercase */
  const u8 *zEscaped = 0;          /* One past the last escaped input char */
  
  /* The GLOB operator does not have an ESCAPE clause.  And LIKE does not
  ** have the matchSet operator.  So we either have to look for one or
  ** the other, never both.  Hence the single variable matchOther is used
  ** to store the one we have to look for.
  */
  matchOther = esc ? esc : pInfo->matchSet;

  while( (c = sqlite3Utf8Read(&zPattern))!=0 ){
    if( c==matchAll ){  /* Match "*" */
      /* Skip over multiple "*" characters in the pattern.  If there
      ** are also "?" characters, skip those as well, but consume a
      ** single character of the input string for each "?" skipped */
      while( (c=sqlite3Utf8Read(&zPattern)) == matchAll
               || c == matchOne ){
        if( c==matchOne && sqlite3Utf8Read(&zString)==0 ){
          return 0;
        }
      }
      if( c==0 ){
        return 1;   /* "*" at the end of the pattern matches */
      }else if( c==matchOther ){
        if( esc ){
          c = sqlite3Utf8Read(&zPattern);
          if( c==0 ) return 0;
        }else{
          /* "[...]" immediately follows the "*".  We have to do a slow
          ** recursive search in this case, but it is an unusual case. */
          assert( matchOther<0x80 );  /* '[' is a single-byte character */
          while( *zString
                 && patternCompare(&zPattern[-1],zString,pInfo,esc)==0 ){
            SQLITE_SKIP_UTF8(zString);
          }
          return *zString!=0;
        }
      }








      /* At this point variable c contains the first character of the
      ** pattern string past the "*".  Search in the input string for the
      ** first matching character and recursively contine the match from
      ** that point.
      **
      ** For a case-insensitive search, set variable cx to be the same as
      ** c but in the other case and search the input string for either
      ** c or cx.
      */
      if( c<=0x80 ){
        u32 cx;
        if( noCase ){
          cx = sqlite3Toupper(c);
          c = sqlite3Tolower(c);
        }else{


          cx = c;
        }
        while( (c2 = *(zString++))!=0 ){
          if( c2!=c && c2!=cx ) continue;
          if( patternCompare(zPattern,zString,pInfo,esc) ) return 1;
        }

      }else{
        while( (c2 = sqlite3Utf8Read(&zString))!=0 ){
          if( c2!=c ) continue;
          if( patternCompare(zPattern,zString,pInfo,esc) ) return 1;
        }

      }


      return 0;



    }
    if( c==matchOther ){
      if( esc ){
        c = sqlite3Utf8Read(&zPattern);
        if( c==0 ) return 0;
        zEscaped = zPattern;
      }else{
        u32 prior_c = 0;
        int seen = 0;
        int invert = 0;
        c = sqlite3Utf8Read(&zString);
        if( c==0 ) return 0;
        c2 = sqlite3Utf8Read(&zPattern);
        if( c2=='^' ){
          invert = 1;
          c2 = sqlite3Utf8Read(&zPattern);
        }
................................................................................
          return 0;
        }
        continue;
      }
    }
    c2 = sqlite3Utf8Read(&zString);
    if( c==c2 ) continue;
    if( noCase && c<0x80 && c2<0x80 && sqlite3Tolower(c)==sqlite3Tolower(c2) ){
      continue;

    }
    if( c==matchOne && zPattern!=zEscaped && c2!=0 ) continue;
    return 0;
  }
  return *zString==0;
}

/*
** The sqlite3_strglob() interface.
*/