SQLite

Check-in [c5e5614d98]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Modify the patternCompare() function (used for GLOB, LIKE) to better handle patterns containing multiple wildcard characters ("*", "%").
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | pattern-compare-optimization
Files: files | file ages | folders
SHA1: c5e5614d98a752738c081fecdd1e349a1a92b0e5
User & Date: dan 2016-12-01 17:34:59.799
Context
2016-12-01
18:49
Faster version of patternCompare() that uses new return values rather than an extra parameter to communicate wildcard information back up to parent searches. (Closed-Leaf check-in: a1e2b6ce3a user: drh tags: pattern-compare-optimization)
17:34
Modify the patternCompare() function (used for GLOB, LIKE) to better handle patterns containing multiple wildcard characters ("*", "%"). (check-in: c5e5614d98 user: dan tags: pattern-compare-optimization)
2016-11-30
16:54
Add the remember(V,PTR) extension function which copies an SQL value into an application variable. (check-in: d2d30914d8 user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/func.c.
632
633
634
635
636
637
638
639

640
641
642
643
644
645
646
647
648
649
650
651

652
653
654
655
656
657
658
659
660
661
662
663

664
665
666
667
668


669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685

686
687
688
689
690
691
692
693
694

695


696

697
698
699



700
701
702
703
704
705
706
**
** This routine is usually quick, but can be N**2 in the worst case.
*/
static int patternCompare(
  const u8 *zPattern,              /* The glob pattern */
  const u8 *zString,               /* The string to compare against the glob */
  const struct compareInfo *pInfo, /* Information about how to do the compare */
  u32 matchOther                   /* The escape char (LIKE) or '[' (GLOB) */

){
  u32 c, c2;                       /* Next pattern and input string chars */
  u32 matchOne = pInfo->matchOne;  /* "?" or "_" */
  u32 matchAll = pInfo->matchAll;  /* "*" or "%" */
  u8 noCase = pInfo->noCase;       /* True if uppercase==lowercase */
  const u8 *zEscaped = 0;          /* One past the last escaped input char */
  
  while( (c = Utf8Read(zPattern))!=0 ){
    if( c==matchAll ){  /* Match "*" */
      /* Skip over multiple "*" characters in the pattern.  If there
      ** are also "?" characters, skip those as well, but consume a
      ** single character of the input string for each "?" skipped */

      while( (c=Utf8Read(zPattern)) == matchAll || c == matchOne ){
        if( c==matchOne && sqlite3Utf8Read(&zString)==0 ){
          return 0;
        }
      }
      if( c==0 ){
        return 1;   /* "*" at the end of the pattern matches */
      }else if( c==matchOther ){
        if( pInfo->matchSet==0 ){
          c = sqlite3Utf8Read(&zPattern);
          if( c==0 ) return 0;
        }else{

          /* "[...]" immediately follows the "*".  We have to do a slow
          ** recursive search in this case, but it is an unusual case. */
          assert( matchOther<0x80 );  /* '[' is a single-byte character */
          while( *zString
                 && patternCompare(&zPattern[-1],zString,pInfo,matchOther)==0 ){


            SQLITE_SKIP_UTF8(zString);
          }
          return *zString!=0;
        }
      }

      /* At this point variable c contains the first character of the
      ** pattern string past the "*".  Search in the input string for the
      ** first matching character and recursively contine the match from
      ** that point.
      **
      ** For a case-insensitive search, set variable cx to be the same as
      ** c but in the other case and search the input string for either
      ** c or cx.
      */
      if( c<=0x80 ){
        u32 cx;

        if( noCase ){
          cx = sqlite3Toupper(c);
          c = sqlite3Tolower(c);
        }else{
          cx = c;
        }
        while( (c2 = *(zString++))!=0 ){
          if( c2!=c && c2!=cx ) continue;
          if( patternCompare(zPattern,zString,pInfo,matchOther) ) return 1;

        }


      }else{

        while( (c2 = Utf8Read(zString))!=0 ){
          if( c2!=c ) continue;
          if( patternCompare(zPattern,zString,pInfo,matchOther) ) return 1;



        }
      }
      return 0;
    }
    if( c==matchOther ){
      if( pInfo->matchSet==0 ){
        c = sqlite3Utf8Read(&zPattern);







|
>












>












>




|
>
>








|








>








|
>
|
>
>

>


|
>
>
>







632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
**
** This routine is usually quick, but can be N**2 in the worst case.
*/
static int patternCompare(
  const u8 *zPattern,              /* The glob pattern */
  const u8 *zString,               /* The string to compare against the glob */
  const struct compareInfo *pInfo, /* Information about how to do the compare */
  u32 matchOther,                  /* The escape char (LIKE) or '[' (GLOB) */
  int *pbSeenMatchAll              /* OUT: True if have seen matchAll */
){
  u32 c, c2;                       /* Next pattern and input string chars */
  u32 matchOne = pInfo->matchOne;  /* "?" or "_" */
  u32 matchAll = pInfo->matchAll;  /* "*" or "%" */
  u8 noCase = pInfo->noCase;       /* True if uppercase==lowercase */
  const u8 *zEscaped = 0;          /* One past the last escaped input char */
  
  while( (c = Utf8Read(zPattern))!=0 ){
    if( c==matchAll ){  /* Match "*" */
      /* Skip over multiple "*" characters in the pattern.  If there
      ** are also "?" characters, skip those as well, but consume a
      ** single character of the input string for each "?" skipped */
      *pbSeenMatchAll = 1;
      while( (c=Utf8Read(zPattern)) == matchAll || c == matchOne ){
        if( c==matchOne && sqlite3Utf8Read(&zString)==0 ){
          return 0;
        }
      }
      if( c==0 ){
        return 1;   /* "*" at the end of the pattern matches */
      }else if( c==matchOther ){
        if( pInfo->matchSet==0 ){
          c = sqlite3Utf8Read(&zPattern);
          if( c==0 ) return 0;
        }else{
          int bMA = 0;            /* True if patternCompare sees matchAll */
          /* "[...]" immediately follows the "*".  We have to do a slow
          ** recursive search in this case, but it is an unusual case. */
          assert( matchOther<0x80 );  /* '[' is a single-byte character */
          while( *zString
            && patternCompare(&zPattern[-1],zString,pInfo,matchOther,&bMA)==0
          ){
            if( bMA ) return 0;
            SQLITE_SKIP_UTF8(zString);
          }
          return *zString!=0;
        }
      }

      /* At this point variable c contains the first character of the
      ** pattern string past the "*".  Search in the input string for the
      ** first matching character and recursively continue the match from
      ** that point.
      **
      ** For a case-insensitive search, set variable cx to be the same as
      ** c but in the other case and search the input string for either
      ** c or cx.
      */
      if( c<=0x80 ){
        u32 cx;
        int bMatchAll = 0;
        if( noCase ){
          cx = sqlite3Toupper(c);
          c = sqlite3Tolower(c);
        }else{
          cx = c;
        }
        while( (c2 = *(zString++))!=0 ){
          if( c2!=c && c2!=cx ) continue;
          if( patternCompare(zPattern,zString,pInfo,matchOther, &bMatchAll) ){
            return 1;
          }
          if( bMatchAll ) break;
        }
      }else{
        int bMatchAll = 0;
        while( (c2 = Utf8Read(zString))!=0 ){
          if( c2!=c ) continue;
          if( patternCompare(zPattern,zString,pInfo,matchOther, &bMatchAll) ){
            return 1;
          }
          if( bMatchAll ) break;
        }
      }
      return 0;
    }
    if( c==matchOther ){
      if( pInfo->matchSet==0 ){
        c = sqlite3Utf8Read(&zPattern);
751
752
753
754
755
756
757

758
759
760
761
762
763
764

765
766
767
768
769
770
771
772
  return *zString==0;
}

/*
** The sqlite3_strglob() interface.
*/
int sqlite3_strglob(const char *zGlobPattern, const char *zString){

  return patternCompare((u8*)zGlobPattern, (u8*)zString, &globInfo, '[')==0;
}

/*
** The sqlite3_strlike() interface.
*/
int sqlite3_strlike(const char *zPattern, const char *zStr, unsigned int esc){

  return patternCompare((u8*)zPattern, (u8*)zStr, &likeInfoNorm, esc)==0;
}

/*
** Count the number of times that the LIKE operator (or GLOB which is
** just a variation of LIKE) gets called.  This is used for testing
** only.
*/







>
|






>
|







764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
  return *zString==0;
}

/*
** The sqlite3_strglob() interface.
*/
int sqlite3_strglob(const char *zGlobPattern, const char *zString){
  int dummy = 0;
  return patternCompare((u8*)zGlobPattern, (u8*)zString, &globInfo, '[', &dummy)==0;
}

/*
** The sqlite3_strlike() interface.
*/
int sqlite3_strlike(const char *zPattern, const char *zStr, unsigned int esc){
  int dummy = 0;
  return patternCompare((u8*)zPattern, (u8*)zStr, &likeInfoNorm, esc, &dummy)==0;
}

/*
** Count the number of times that the LIKE operator (or GLOB which is
** just a variation of LIKE) gets called.  This is used for testing
** only.
*/
836
837
838
839
840
841
842

843
844
845
846
847
848
849
850
851
852
853
      return;
    }
    escape = sqlite3Utf8Read(&zEsc);
  }else{
    escape = pInfo->matchSet;
  }
  if( zA && zB ){

#ifdef SQLITE_TEST
    sqlite3_like_count++;
#endif
    sqlite3_result_int(context, patternCompare(zB, zA, pInfo, escape));
  }
}

/*
** Implementation of the NULLIF(x,y) function.  The result is the first
** argument if the arguments are different.  The result is NULL if the
** arguments are equal to each other.







>



|







851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
      return;
    }
    escape = sqlite3Utf8Read(&zEsc);
  }else{
    escape = pInfo->matchSet;
  }
  if( zA && zB ){
    int dummy = 0;
#ifdef SQLITE_TEST
    sqlite3_like_count++;
#endif
    sqlite3_result_int(context, patternCompare(zB, zA, pInfo, escape, &dummy));
  }
}

/*
** Implementation of the NULLIF(x,y) function.  The result is the first
** argument if the arguments are different.  The result is NULL if the
** arguments are equal to each other.