/ Check-in [424b7aee]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix the ICU extension LIKE function so that it does not read past the end of a buffer if it it passed malformed utf-8.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 424b7aee3310b9782bd312589dc3d9f287aa04b8
User & Date: dan 2016-02-26 16:03:29
Context
2016-02-26
21:03
Further refinements to the MSVC batch build process. check-in: cf4e4fbd user: mistachkin tags: trunk
16:14
Merge all recent enhancements from trunk, and in particular the SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER change. check-in: 4fb4aee8 user: drh tags: apple-osx
16:03
Fix the ICU extension LIKE function so that it does not read past the end of a buffer if it it passed malformed utf-8. check-in: 424b7aee user: dan tags: trunk
15:38
Provide the new SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER option to sqlite3_db_config() that can be used to activate the two-argument version of fts3_tokenizer() for a specific database connection at run-time. check-in: 374b5108 user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/icu/icu.c.

    55     55   
    56     56   /*
    57     57   ** Version of sqlite3_free() that is always a function, never a macro.
    58     58   */
    59     59   static void xFree(void *p){
    60     60     sqlite3_free(p);
    61     61   }
           62  +
           63  +/*
           64  +** This lookup table is used to help decode the first byte of
           65  +** a multi-byte UTF8 character. It is copied here from SQLite source
           66  +** code file utf8.c.
           67  +*/
           68  +static const unsigned char icuUtf8Trans1[] = {
           69  +  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
           70  +  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
           71  +  0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
           72  +  0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
           73  +  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
           74  +  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
           75  +  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
           76  +  0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
           77  +};
           78  +
           79  +#define SQLITE_ICU_READ_UTF8(zIn, c)                       \
           80  +  c = *(zIn++);                                            \
           81  +  if( c>=0xc0 ){                                           \
           82  +    c = icuUtf8Trans1[c-0xc0];                             \
           83  +    while( (*zIn & 0xc0)==0x80 ){                          \
           84  +      c = (c<<6) + (0x3f & *(zIn++));                      \
           85  +    }                                                      \
           86  +  }
           87  +
           88  +#define SQLITE_ICU_SKIP_UTF8(zIn)                          \
           89  +  assert( *zIn );                                          \
           90  +  if( *(zIn++)>=0xc0 ){                                    \
           91  +    while( (*zIn & 0xc0)==0x80 ){zIn++;}                   \
           92  +  }
           93  +
    62     94   
    63     95   /*
    64     96   ** Compare two UTF-8 strings for equality where the first string is
    65     97   ** a "LIKE" expression. Return true (1) if they are the same and 
    66     98   ** false (0) if they are different.
    67     99   */
    68    100   static int icuLikeCompare(
................................................................................
    69    101     const uint8_t *zPattern,   /* LIKE pattern */
    70    102     const uint8_t *zString,    /* The UTF-8 string to compare against */
    71    103     const UChar32 uEsc         /* The escape character */
    72    104   ){
    73    105     static const int MATCH_ONE = (UChar32)'_';
    74    106     static const int MATCH_ALL = (UChar32)'%';
    75    107   
    76         -  int iPattern = 0;       /* Current byte index in zPattern */
    77         -  int iString = 0;        /* Current byte index in zString */
    78         -
    79    108     int prevEscape = 0;     /* True if the previous character was uEsc */
    80    109   
    81         -  while( zPattern[iPattern]!=0 ){
          110  +  while( 1 ){
    82    111   
    83    112       /* Read (and consume) the next character from the input pattern. */
    84    113       UChar32 uPattern;
    85         -    U8_NEXT_UNSAFE(zPattern, iPattern, uPattern);
          114  +    SQLITE_ICU_READ_UTF8(zPattern, uPattern);
          115  +    if( uPattern==0 ) break;
    86    116   
    87    117       /* There are now 4 possibilities:
    88    118       **
    89    119       **     1. uPattern is an unescaped match-all character "%",
    90    120       **     2. uPattern is an unescaped match-one character "_",
    91    121       **     3. uPattern is an unescaped escape character, or
    92    122       **     4. uPattern is to be handled as an ordinary character
................................................................................
    95    125         /* Case 1. */
    96    126         uint8_t c;
    97    127   
    98    128         /* Skip any MATCH_ALL or MATCH_ONE characters that follow a
    99    129         ** MATCH_ALL. For each MATCH_ONE, skip one character in the 
   100    130         ** test string.
   101    131         */
   102         -      while( (c=zPattern[iPattern]) == MATCH_ALL || c == MATCH_ONE ){
          132  +      while( (c=*zPattern) == MATCH_ALL || c == MATCH_ONE ){
   103    133           if( c==MATCH_ONE ){
   104         -          if( zString[iString]==0 ) return 0;
   105         -          U8_FWD_1_UNSAFE(zString, iString);
          134  +          if( *zString==0 ) return 0;
          135  +          SQLITE_ICU_SKIP_UTF8(zString);
   106    136           }
   107         -        iPattern++;
          137  +        zPattern++;
   108    138         }
   109    139   
   110         -      if( zPattern[iPattern]==0 ) return 1;
          140  +      if( *zPattern==0 ) return 1;
   111    141   
   112         -      while( zString[iString] ){
   113         -        if( icuLikeCompare(&zPattern[iPattern], &zString[iString], uEsc) ){
          142  +      while( *zString ){
          143  +        if( icuLikeCompare(zPattern, zString, uEsc) ){
   114    144             return 1;
   115    145           }
   116         -        U8_FWD_1_UNSAFE(zString, iString);
          146  +        SQLITE_ICU_SKIP_UTF8(zString);
   117    147         }
   118    148         return 0;
   119    149   
   120    150       }else if( !prevEscape && uPattern==MATCH_ONE ){
   121    151         /* Case 2. */
   122         -      if( zString[iString]==0 ) return 0;
   123         -      U8_FWD_1_UNSAFE(zString, iString);
          152  +      if( *zString==0 ) return 0;
          153  +      SQLITE_ICU_SKIP_UTF8(zString);
   124    154   
   125    155       }else if( !prevEscape && uPattern==uEsc){
   126    156         /* Case 3. */
   127    157         prevEscape = 1;
   128    158   
   129    159       }else{
   130    160         /* Case 4. */
   131    161         UChar32 uString;
   132         -      U8_NEXT_UNSAFE(zString, iString, uString);
          162  +      SQLITE_ICU_READ_UTF8(zString, uString);
   133    163         uString = u_foldCase(uString, U_FOLD_CASE_DEFAULT);
   134    164         uPattern = u_foldCase(uPattern, U_FOLD_CASE_DEFAULT);
   135    165         if( uString!=uPattern ){
   136    166           return 0;
   137    167         }
   138    168         prevEscape = 0;
   139    169       }
   140    170     }
   141    171   
   142         -  return zString[iString]==0;
          172  +  return *zString==0;
   143    173   }
   144    174   
   145    175   /*
   146    176   ** Implementation of the like() SQL function.  This function implements
   147    177   ** the build-in LIKE operator.  The first argument to the function is the
   148    178   ** pattern and the second argument is the string.  So, the SQL statements:
   149    179   **