SQLite

Check-in [5ab1023d6c]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:More performance optimization for the LIKE and GLOB operators.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 5ab1023d6cfe31fa8a194804b8216058977ac973
User & Date: drh 2014-09-25 03:51:37.139
Context
2014-09-25
11:08
Still more performance enhancements to the LIKE and GLOB operators. (check-in: 6c8924cacc user: drh tags: trunk)
03:51
More performance optimization for the LIKE and GLOB operators. (check-in: 5ab1023d6c user: drh tags: trunk)
02:44
Change that might allow SQLite to build and work using the EBCDIC character set. (check-in: ef30e0352b user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/func.c.
563
564
565
566
567
568
569
570
571

572
573

574
575
576
577
578
579
580
/*
** For LIKE and GLOB matching on EBCDIC machines, assume that every
** character is exactly one byte in size.  Also, all characters are
** able to participate in upper-case-to-lower-case mappings in EBCDIC
** whereas only characters less than 0x80 do in ASCII.
*/
#if defined(SQLITE_EBCDIC)
# define sqlite3Utf8Read(A)    (*((*A)++))
# define GlobUpperToLower(A)   A = sqlite3UpperToLower[A]

#else
# define GlobUpperToLower(A)   if( !((A)&~0x7f) ){ A = sqlite3UpperToLower[A]; }

#endif

static const struct compareInfo globInfo = { '*', '?', '[', 0 };
/* The correct SQL-92 behavior is for the LIKE operator to ignore
** case.  Thus  'a' LIKE 'A' would be true. */
static const struct compareInfo likeInfoNorm = { '%', '_',   0, 1 };
/* If SQLITE_CASE_SENSITIVE_LIKE is defined, then the LIKE operator







|
|
>

|
>







563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
/*
** For LIKE and GLOB matching on EBCDIC machines, assume that every
** character is exactly one byte in size.  Also, all characters are
** able to participate in upper-case-to-lower-case mappings in EBCDIC
** whereas only characters less than 0x80 do in ASCII.
*/
#if defined(SQLITE_EBCDIC)
# define sqlite3Utf8Read(A)        (*((*A)++))
# define GlobUpperToLower(A)       A = sqlite3UpperToLower[A]
# define GlobUpperToLowerAscii(A)  A = sqlite3UpperToLower[A]
#else
# define GlobUpperToLower(A)       if( A<=0x7f ){ A = sqlite3UpperToLower[A]; }
# define GlobUpperToLowerAscii(A)  A = sqlite3UpperToLower[A]
#endif

static const struct compareInfo globInfo = { '*', '?', '[', 0 };
/* The correct SQL-92 behavior is for the LIKE operator to ignore
** case.  Thus  'a' LIKE 'A' would be true. */
static const struct compareInfo likeInfoNorm = { '%', '_',   0, 1 };
/* If SQLITE_CASE_SENSITIVE_LIKE is defined, then the LIKE operator
614
615
616
617
618
619
620
621
622
623
624







625
626
627
628
629
630
631
632
633
634
635
636

637
638
639
640
641
642
643

644
645
646
647
648

649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665

666
667
668


669

670




671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702


703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
  const u8 *zString,               /* The string to compare against the glob */
  const struct compareInfo *pInfo, /* Information about how to do the compare */
  u32 esc                          /* The escape character */
){
  u32 c, c2;
  int invert;
  int seen;
  u8 matchOne = pInfo->matchOne;
  u8 matchAll = pInfo->matchAll;
  u8 matchSet = pInfo->matchSet;
  u8 noCase = pInfo->noCase; 








  while( (c = sqlite3Utf8Read(&zPattern))!=0 ){
    if( c==matchAll ){
      while( (c=sqlite3Utf8Read(&zPattern)) == matchAll
               || c == matchOne ){
        if( c==matchOne && sqlite3Utf8Read(&zString)==0 ){
          return 0;
        }
      }
      if( c==0 ){
        return 1;
      }else if( c==esc ){

        c = sqlite3Utf8Read(&zPattern);
        if( c==0 ){
          return 0;
        }
      }else if( c==matchSet ){
        assert( esc==0 );         /* This is GLOB, not LIKE */
        assert( matchSet<0x80 );  /* '[' is a single-byte character */

        while( *zString && patternCompare(&zPattern[-1],zString,pInfo,esc)==0 ){
          SQLITE_SKIP_UTF8(zString);
        }
        return *zString!=0;
      }

      while( (c2 = sqlite3Utf8Read(&zString))!=0 ){
        if( noCase ){
          GlobUpperToLower(c2);
          GlobUpperToLower(c);
          while( c2 != 0 && c2 != c ){
            c2 = sqlite3Utf8Read(&zString);
            GlobUpperToLower(c2);
          }
        }else{
          while( c2 != 0 && c2 != c ){
            c2 = sqlite3Utf8Read(&zString);
          }
        }
        if( c2==0 ) return 0;
        if( patternCompare(zPattern,zString,pInfo,esc) ) return 1;
      }
      return 0;

    }else if( c==matchOne ){
      if( sqlite3Utf8Read(&zString)==0 ){
        return 0;


      }

    }else if( c==matchSet ){




      u32 prior_c = 0;
      assert( esc==0 );    /* This only occurs for GLOB, not LIKE */
      seen = 0;
      invert = 0;
      c = sqlite3Utf8Read(&zString);
      if( c==0 ) return 0;
      c2 = sqlite3Utf8Read(&zPattern);
      if( c2=='^' ){
        invert = 1;
        c2 = sqlite3Utf8Read(&zPattern);
      }
      if( c2==']' ){
        if( c==']' ) seen = 1;
        c2 = sqlite3Utf8Read(&zPattern);
      }
      while( c2 && c2!=']' ){
        if( c2=='-' && zPattern[0]!=']' && zPattern[0]!=0 && prior_c>0 ){
          c2 = sqlite3Utf8Read(&zPattern);
          if( c>=prior_c && c<=c2 ) seen = 1;
          prior_c = 0;
        }else{
          if( c==c2 ){
            seen = 1;
          }
          prior_c = c2;
        }
        c2 = sqlite3Utf8Read(&zPattern);
      }
      if( c2==0 || (seen ^ invert)==0 ){
        return 0;
      }
    }else{


      c2 = sqlite3Utf8Read(&zString);
      if( c==esc ){
        c = sqlite3Utf8Read(&zPattern);
      }
      if( noCase ){
        GlobUpperToLower(c);
        GlobUpperToLower(c2);
      }
      if( c!=c2 ){
        return 0;
      }
    }
  }
  return *zString==0;
}

/*
** The sqlite3_strglob() interface.
*/







|
|
|
|
>
>
>
>
>
>
>











|
>
|
|
<
|
<
<
|
>
|
|
|
|
|
>

|

|

|
|










>
|


>
>

>
|
>
>
>
>
|
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>
>
|
|
<
<
|
|
|
<
|
<
<
<







616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648

649


650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688

689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722


723
724
725

726



727
728
729
730
731
732
733
  const u8 *zString,               /* The string to compare against the glob */
  const struct compareInfo *pInfo, /* Information about how to do the compare */
  u32 esc                          /* The escape character */
){
  u32 c, c2;
  int invert;
  int seen;
  u32 matchOne = pInfo->matchOne;
  u32 matchAll = pInfo->matchAll;
  u32 matchOther;
  u8 noCase = pInfo->noCase;
  
  /* The GLOB operator does not have an ESCAPE clause.  And LIKE does not
  ** have the matchSet operator.  So we either have to look for one or
  ** the other, never both.  Hence the single variable matchOther is used
  ** to store the one we have to look for.
  */
  matchOther = esc ? esc : pInfo->matchSet;

  while( (c = sqlite3Utf8Read(&zPattern))!=0 ){
    if( c==matchAll ){
      while( (c=sqlite3Utf8Read(&zPattern)) == matchAll
               || c == matchOne ){
        if( c==matchOne && sqlite3Utf8Read(&zString)==0 ){
          return 0;
        }
      }
      if( c==0 ){
        return 1;
      }else if( c==matchOther ){
        if( esc ){
          c = sqlite3Utf8Read(&zPattern);
          if( c==0 ) return 0;

        }else{


          assert( matchOther<0x80 );  /* '[' is a single-byte character */
          while( *zString
                 && patternCompare(&zPattern[-1],zString,pInfo,esc)==0 ){
            SQLITE_SKIP_UTF8(zString);
          }
          return *zString!=0;
        }
      }
      while( (c2 = sqlite3Utf8Read(&zString))!=0 ){
        if( noCase && c<0x80 ){
          GlobUpperToLower(c2);
          GlobUpperToLowerAscii(c);
          while( c2 != 0 && c2 != c ){
            do{ c2 = *(zString++); }while( c2>0x7f );
            GlobUpperToLowerAscii(c2);
          }
        }else{
          while( c2 != 0 && c2 != c ){
            c2 = sqlite3Utf8Read(&zString);
          }
        }
        if( c2==0 ) return 0;
        if( patternCompare(zPattern,zString,pInfo,esc) ) return 1;
      }
      return 0;
    }
    if( c==matchOne ){
      if( sqlite3Utf8Read(&zString)==0 ){
        return 0;
      }else{
        continue;
      }
    }
    if( c==matchOther ){
      if( esc ){
        c = sqlite3Utf8Read(&zPattern);
        if( c==0 ) return 0;
      }else{
        u32 prior_c = 0;

        seen = 0;
        invert = 0;
        c = sqlite3Utf8Read(&zString);
        if( c==0 ) return 0;
        c2 = sqlite3Utf8Read(&zPattern);
        if( c2=='^' ){
          invert = 1;
          c2 = sqlite3Utf8Read(&zPattern);
        }
        if( c2==']' ){
          if( c==']' ) seen = 1;
          c2 = sqlite3Utf8Read(&zPattern);
        }
        while( c2 && c2!=']' ){
          if( c2=='-' && zPattern[0]!=']' && zPattern[0]!=0 && prior_c>0 ){
            c2 = sqlite3Utf8Read(&zPattern);
            if( c>=prior_c && c<=c2 ) seen = 1;
            prior_c = 0;
          }else{
            if( c==c2 ){
              seen = 1;
            }
            prior_c = c2;
          }
          c2 = sqlite3Utf8Read(&zPattern);
        }
        if( c2==0 || (seen ^ invert)==0 ){
          return 0;
        }
        continue;
      }
    }
    c2 = sqlite3Utf8Read(&zString);
    if( c==c2 ) continue;


    if( !noCase ) return 0;
    GlobUpperToLower(c);
    GlobUpperToLower(c2);

    if( c!=c2 ) return 0;



  }
  return *zString==0;
}

/*
** The sqlite3_strglob() interface.
*/