SQLite

Check-in [c89d772628]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Performance optimization and comment fixes for the LIKE and GLOB operators.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: c89d772628564a808173f6f73bc1798ec714276b
User & Date: drh 2015-06-17 13:20:54.580
Context
2015-06-17
17:08
Fix a uninitialized variable use in the command-line shell when the ".open" command is invoked without any arguments. (check-in: fc4f4d1ecc user: drh tags: trunk)
13:20
Performance optimization and comment fixes for the LIKE and GLOB operators. (check-in: c89d772628 user: drh tags: trunk)
02:11
Make getCellInfo() a real function instead of a macro, for a size reduction and a 0.2% performance gain. (check-in: 55c393ea14 user: drh tags: trunk)
Changes
Side-by-Side Diff Ignore Whitespace Patch
Changes to src/func.c.
571
572
573
574
575
576
577
578
579
580



581
582
583
584

585
586
587

588
589
590
591
592
593
594
595
571
572
573
574
575
576
577



578
579
580
581
582
583

584

585

586

587
588
589
590
591
592
593







-
-
-
+
+
+



-
+
-

-
+
-







  u8 matchOne;
  u8 matchSet;
  u8 noCase;
};

/*
** For LIKE and GLOB matching on EBCDIC machines, assume that every
** character is exactly one byte in size.  Also, all characters are
** able to participate in upper-case-to-lower-case mappings in EBCDIC
** whereas only characters less than 0x80 do in ASCII.
** character is exactly one byte in size.  Also, provde the Utf8Read()
** macro for fast reading of the next character in the common case where
** the next character is ASCII.
*/
#if defined(SQLITE_EBCDIC)
# define sqlite3Utf8Read(A)        (*((*A)++))
# define GlobUpperToLower(A)       A = sqlite3UpperToLower[A]
# define Utf8Read(A)               (*(A++))
# define GlobUpperToLowerAscii(A)  A = sqlite3UpperToLower[A]
#else
# define GlobUpperToLower(A)       if( A<=0x7f ){ A = sqlite3UpperToLower[A]; }
# define Utf8Read(A)               (A[0]<0x80?*(A++):sqlite3Utf8Read(&A))
# define GlobUpperToLowerAscii(A)  A = sqlite3UpperToLower[A]
#endif

static const struct compareInfo globInfo = { '*', '?', '[', 0 };
/* The correct SQL-92 behavior is for the LIKE operator to ignore
** case.  Thus  'a' LIKE 'A' would be true. */
static const struct compareInfo likeInfoNorm = { '%', '_',   0, 1 };
/* If SQLITE_CASE_SENSITIVE_LIKE is defined, then the LIKE operator
623
624
625
626
627
628
629
630

631
632
633
634
635
636
637
621
622
623
624
625
626
627

628
629
630
631
632
633
634
635







-
+







**      '%'       Matches any sequence of zero or more characters
**
***     '_'       Matches any one character
**
**      Ec        Where E is the "esc" character and c is any other
**                character, including '%', '_', and esc, match exactly c.
**
** The comments through this routine usually assume glob matching.
** The comments within this routine usually assume glob matching.
**
** This routine is usually quick, but can be N**2 in the worst case.
*/
static int patternCompare(
  const u8 *zPattern,              /* The glob pattern */
  const u8 *zString,               /* The string to compare against the glob */
  const struct compareInfo *pInfo, /* Information about how to do the compare */
647
648
649
650
651
652
653
654

655
656
657
658
659
660

661
662
663
664
665
666
667
645
646
647
648
649
650
651

652
653
654
655
656


657
658
659
660
661
662
663
664







-
+




-
-
+







  /* The GLOB operator does not have an ESCAPE clause.  And LIKE does not
  ** have the matchSet operator.  So we either have to look for one or
  ** the other, never both.  Hence the single variable matchOther is used
  ** to store the one we have to look for.
  */
  matchOther = esc ? esc : pInfo->matchSet;

  while( (c = sqlite3Utf8Read(&zPattern))!=0 ){
  while( (c = Utf8Read(zPattern))!=0 ){
    if( c==matchAll ){  /* Match "*" */
      /* Skip over multiple "*" characters in the pattern.  If there
      ** are also "?" characters, skip those as well, but consume a
      ** single character of the input string for each "?" skipped */
      while( (c=sqlite3Utf8Read(&zPattern)) == matchAll
               || c == matchOne ){
      while( (c=Utf8Read(zPattern)) == matchAll || c == matchOne ){
        if( c==matchOne && sqlite3Utf8Read(&zString)==0 ){
          return 0;
        }
      }
      if( c==0 ){
        return 1;   /* "*" at the end of the pattern matches */
      }else if( c==matchOther ){
698
699
700
701
702
703
704
705

706
707
708
709
710
711
712
695
696
697
698
699
700
701

702
703
704
705
706
707
708
709







-
+







          cx = c;
        }
        while( (c2 = *(zString++))!=0 ){
          if( c2!=c && c2!=cx ) continue;
          if( patternCompare(zPattern,zString,pInfo,esc) ) return 1;
        }
      }else{
        while( (c2 = sqlite3Utf8Read(&zString))!=0 ){
        while( (c2 = Utf8Read(zString))!=0 ){
          if( c2!=c ) continue;
          if( patternCompare(zPattern,zString,pInfo,esc) ) return 1;
        }
      }
      return 0;
    }
    if( c==matchOther ){
744
745
746
747
748
749
750
751

752
753
754
755
756
757
758
741
742
743
744
745
746
747

748
749
750
751
752
753
754
755







-
+







        }
        if( c2==0 || (seen ^ invert)==0 ){
          return 0;
        }
        continue;
      }
    }
    c2 = sqlite3Utf8Read(&zString);
    c2 = Utf8Read(zString);
    if( c==c2 ) continue;
    if( noCase && c<0x80 && c2<0x80 && sqlite3Tolower(c)==sqlite3Tolower(c2) ){
      continue;
    }
    if( c==matchOne && zPattern!=zEscaped && c2!=0 ) continue;
    return 0;
  }