/ Check-in [0a47c8f8]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Combine the implementation of LIKE and GLOB into a single parameterized function. (CVS 1923)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 0a47c8f86d1649e9ae7edd4c49a6fe5f5272351e
User & Date: drh 2004-08-31 00:52:37
Context
2004-08-31
13:45
Simplifications and optimizations. Also: disable the corrupt.test for now. (CVS 1924) check-in: 8fd65e70 user: drh tags: trunk
00:52
Combine the implementation of LIKE and GLOB into a single parameterized function. (CVS 1923) check-in: 0a47c8f8 user: drh tags: trunk
2004-08-30
16:52
Better detection and handling of corrupt database files. (CVS 1922) check-in: 8f5b199e user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/func.c.

    12     12   ** This file contains the C functions that implement various SQL
    13     13   ** functions of SQLite.  
    14     14   **
    15     15   ** There is only one exported symbol in this file - the function
    16     16   ** sqliteRegisterBuildinFunctions() found at the bottom of the file.
    17     17   ** All other code has file scope.
    18     18   **
    19         -** $Id: func.c,v 1.80 2004/08/08 20:22:18 drh Exp $
           19  +** $Id: func.c,v 1.81 2004/08/31 00:52:37 drh Exp $
    20     20   */
    21     21   #include <ctype.h>
    22     22   #include <math.h>
    23     23   #include <stdlib.h>
    24     24   #include <assert.h>
    25     25   #include "sqliteInt.h"
    26     26   #include "vdbeInt.h"
................................................................................
   294    294     int arg,
   295    295     sqlite3_value **argv
   296    296   ){
   297    297     sqlite *db = sqlite3_user_data(context);
   298    298     sqlite3_result_int(context, sqlite3_total_changes(db));
   299    299   }
   300    300   
   301         -#if 0
   302         -
   303    301   /*
   304         -** A LIKE pattern compiles to an instance of the following structure. Refer
   305         -** to the comment for compileLike() function for details.
          302  +** A structure defining how to do GLOB-style comparisons.
   306    303   */
   307         -struct LikePattern {
   308         -  int nState;
   309         -  struct LikeState {
   310         -    int val;           /* Unicode codepoint or -1 for any char i.e. '_' */
   311         -    int failstate;     /* State to jump to if next char is not val */
   312         -  } aState[1];
          304  +struct compareInfo {
          305  +  u8 matchAll;
          306  +  u8 matchOne;
          307  +  u8 matchSet;
          308  +  u8 noCase;
   313    309   };
   314         -typedef struct LikePattern LikePattern;
   315         -
   316         -void deleteLike(void *pLike){
   317         -  sqliteFree(pLike);
   318         -}
   319         -/* #define TRACE_LIKE */
   320         -#if defined(TRACE_LIKE) && !defined(NDEBUG)
   321         -char *dumpLike(LikePattern *pLike){
   322         -  int i;
   323         -  int k = 0;
   324         -  char *zBuf = (char *)sqliteMalloc(pLike->nState*40);
   325         -  
   326         -  k += sprintf(&zBuf[k], "%d states - ", pLike->nState);
   327         -  for(i=0; i<pLike->nState; i++){
   328         -    k += sprintf(&zBuf[k], " %d:(%d, %d)", i, pLike->aState[i].val,
   329         -        pLike->aState[i].failstate);
   330         -  }
   331         -  return zBuf;
   332         -}
   333         -#endif
   334         -
   335         -/*
   336         -** This function compiles an SQL 'LIKE' pattern into a state machine, 
   337         -** represented by a LikePattern structure.
   338         -**
   339         -** Each state of the state-machine has two attributes, 'val' and
   340         -** 'failstate'. The val attribute is either the value of a unicode 
   341         -** codepoint, or -1, indicating a '_' wildcard (match any single
   342         -** character). The failstate is either the number of another state
   343         -** or -1, indicating jump to 'no match'.
   344         -**
   345         -** To see if a string matches a pattern the pattern is
   346         -** compiled to a state machine that is executed according to the algorithm
   347         -** below. The string is assumed to be terminated by a 'NUL' character
   348         -** (unicode codepoint 0).
          310  +static const struct compareInfo globInfo = { '*', '?', '[', 0 };
          311  +static const struct compareInfo likeInfo = { '%', '_',   0, 1 };
          312  +
          313  +/*
          314  +** X is a pointer to the first byte of a UTF-8 character.  Increment
          315  +** X so that it points to the next character.  This only works right
          316  +** if X points to a well-formed UTF-8 string.
          317  +*/
          318  +#define sqliteNextChar(X)  while( (0xc0&*++(X))==0x80 ){}
          319  +#define sqliteCharVal(X)   sqlite3ReadUtf8(X)
          320  +
          321  +
          322  +/*
          323  +** Compare two UTF-8 strings for equality where the first string can
          324  +** potentially be a "glob" expression.  Return true (1) if they
          325  +** are the same and false (0) if they are different.
          326  +**
          327  +** Globbing rules:
          328  +**
          329  +**      '*'       Matches any sequence of zero or more characters.
          330  +**
          331  +**      '?'       Matches exactly one character.
          332  +**
          333  +**     [...]      Matches one character from the enclosed list of
          334  +**                characters.
          335  +**
          336  +**     [^...]     Matches one character not in the enclosed list.
          337  +**
          338  +** With the [...] and [^...] matching, a ']' character can be included
          339  +** in the list by making it the first character after '[' or '^'.  A
          340  +** range of characters can be specified using '-'.  Example:
          341  +** "[a-z]" matches any single lower-case letter.  To match a '-', make
          342  +** it the last character in the list.
          343  +**
          344  +** This routine is usually quick, but can be N**2 in the worst case.
          345  +**
          346  +** Hints: to match '*' or '?', put them in "[]".  Like this:
   349    347   **
   350         -** 1   S = 0
   351         -** 2   DO 
   352         -** 3       C = <Next character from input string>
   353         -** 4       IF( C matches <State S val> )
   354         -** 5           S = S+1
   355         -** 6       ELSE IF( S != <State S failstate> )
   356         -** 7           S = <State S failstate>
   357         -** 8           <Rewind Input string 1 character>
   358         -** 9   WHILE( (C != NUL) AND (S != FAILED) )
   359         -** 10
   360         -** 11  IF( S == <number of states> )
   361         -** 12      RETURN MATCH
   362         -** 13  ELSE
   363         -** 14      RETURN NO-MATCH
   364         -**       
   365         -** In practice there is a small optimization to avoid the <Rewind>
   366         -** operation in line 8 of the description above.
   367         -**
   368         -** For example, the following pattern, 'X%ABabc%_Y' is compiled to
   369         -** the state machine below.
   370         -**
   371         -** State    Val          FailState
   372         -** -------------------------------
   373         -** 0        120 (x)      -1 (NO MATCH)
   374         -** 1        97  (a)      1
   375         -** 2        98  (b)      1
   376         -** 3        97  (a)      1
   377         -** 4        98  (b)      2
   378         -** 5        99  (c)      3
   379         -** 6        -1  (_)      6
   380         -** 7        121 (y)      7
   381         -** 8        0   (NUL)    7
   382         -**
   383         -** The algorithms implemented to compile and execute the state machine were
   384         -** first presented in "Fast pattern matching in strings", Knuth, Morris and
   385         -** Pratt, 1977.
   386         -**       
   387         -*/
   388         -LikePattern *compileLike(sqlite3_value *pPattern, u8 enc){
   389         -  LikePattern *pLike;
   390         -  struct LikeState *aState;
   391         -  int pc_state = -1;    /* State number of previous '%' wild card */
   392         -  int n = 0;
   393         -  int c;
   394         -
   395         -  int offset = 0;
   396         -  const char *zLike;
   397         - 
   398         -  if( enc==SQLITE_UTF8 ){
   399         -    zLike = sqlite3_value_text(pPattern);
   400         -    n = sqlite3_value_bytes(pPattern) + 1;
   401         -  }else{
   402         -    zLike = sqlite3_value_text16(pPattern);
   403         -    n = sqlite3_value_bytes16(pPattern)/2 + 1;
   404         -  }
   405         -
   406         -  pLike = (LikePattern *)
   407         -      sqliteMalloc(sizeof(LikePattern)+n*sizeof(struct LikeState));
   408         -  aState = pLike->aState;
   409         -
   410         -  n = 0;
   411         -  do {
   412         -    c = sqlite3ReadUniChar(zLike, &offset, &enc, 1);
   413         -    if( c==95 ){        /* A '_' wildcard */
   414         -      aState[n].val = -1;
   415         -      n++;
   416         -    }else if( c==37 ){  /* A '%' wildcard */
   417         -      aState[n].failstate = n;
   418         -      pc_state = n;
   419         -    }else{              /* A regular character */
   420         -      aState[n].val = c;
   421         -
   422         -      assert( pc_state<=n );
   423         -      if( pc_state<0 ){
   424         -        aState[n].failstate = -1;
   425         -      }else if( pc_state==n ){
   426         -        if( c ){
   427         -          aState[n].failstate = pc_state;
   428         -        }else{
   429         -          aState[n].failstate = -2;
   430         -        }
   431         -      }else{
   432         -        int k = pLike->aState[n-1].failstate;
   433         -        while( k>pc_state && aState[k+1].val!=-1 && aState[k+1].val!=c ){
   434         -          k = aState[k].failstate;
   435         -        }
   436         -        if( k!=pc_state && aState[k+1].val==c ){
   437         -          assert( k==pc_state );
   438         -          k++;
   439         -        }
   440         -        aState[n].failstate = k;
   441         -      }
   442         -      n++;
   443         -    }
   444         -  }while( c );
   445         -  pLike->nState = n;
   446         -#if defined(TRACE_LIKE) && !defined(NDEBUG)
   447         -  {
   448         -    char *zCompiled = dumpLike(pLike);
   449         -    printf("Pattern=\"%s\" Compiled=\"%s\"\n", zPattern, zCompiled);
   450         -    sqliteFree(zCompiled);
   451         -  }
   452         -#endif
   453         -  return pLike;
   454         -}
   455         -
   456         -/*
   457         -** Implementation of the like() SQL function.  This function implements
   458         -** the build-in LIKE operator.  The first argument to the function is the
   459         -** pattern and the second argument is the string.  So, the SQL statements:
   460         -**
   461         -**       A LIKE B
   462         -**
   463         -** is implemented as like(B,A).
   464         -**
   465         -** If the pointer retrieved by via a call to sqlite3_user_data() is
   466         -** not NULL, then this function uses UTF-16. Otherwise UTF-8.
   467         -*/
   468         -static void likeFunc(
   469         -  sqlite3_context *context, 
   470         -  int argc, 
   471         -  sqlite3_value **argv
          348  +**         abc[*]xyz        Matches "abc*xyz" only
          349  +*/
          350  +int patternCompare(
          351  +  const u8 *zPattern,              /* The glob pattern */
          352  +  const u8 *zString,               /* The string to compare against the glob */
          353  +  const struct compareInfo *pInfo  /* Information about how to do the compare */
   472    354   ){
   473    355     register int c;
   474         -  u8 enc;
   475         -  int offset = 0;
   476         -  const unsigned char *zString;
   477         -  LikePattern *pLike = sqlite3_get_auxdata(context, 0); 
   478         -  struct LikeState *aState;
   479         -  register struct LikeState *pState;
   480         -
   481         -  /* If either argument is NULL, the result is NULL */
   482         -  if( sqlite3_value_type(argv[1])==SQLITE_NULL || 
   483         -      sqlite3_value_type(argv[0])==SQLITE_NULL ){
   484         -    return;
   485         -  }
   486         -
   487         -  /* If the user-data pointer is NULL, use UTF-8. Otherwise UTF-16. */
   488         -  if( sqlite3_user_data(context) ){
   489         -    enc = SQLITE_UTF16NATIVE;
   490         -    zString = (const unsigned char *)sqlite3_value_text16(argv[1]);
   491         -    assert(0);
   492         -  }else{
   493         -    enc = SQLITE_UTF8;
   494         -    zString = sqlite3_value_text(argv[1]);
   495         -  }
   496         -
   497         -  /* If the LIKE pattern has not been compiled, compile it now. */
   498         -  if( !pLike ){
   499         -    pLike = compileLike(argv[0], enc);
   500         -    if( !pLike ){
   501         -      sqlite3_result_error(context, "out of memory", -1);
   502         -      return;
   503         -    }
   504         -    sqlite3_set_auxdata(context, 0, pLike, deleteLike);
   505         -  }
   506         -  aState = pLike->aState;
   507         -  pState = aState;
   508         -
   509         -  do {
   510         -    if( enc==SQLITE_UTF8 ){
   511         -      c = zString[offset++];
   512         -      if( c&0x80 ){
   513         -        offset--;
   514         -        c = sqlite3ReadUniChar(zString, &offset, &enc, 1);
          356  +  int invert;
          357  +  int seen;
          358  +  int c2;
          359  +  u8 matchOne = pInfo->matchOne;
          360  +  u8 matchAll = pInfo->matchAll;
          361  +  u8 matchSet = pInfo->matchSet;
          362  +  u8 noCase = pInfo->noCase; 
          363  +
          364  +  while( (c = *zPattern)!=0 ){
          365  +    if( c==matchAll ){
          366  +      while( (c=zPattern[1]) == matchAll || c == matchOne ){
          367  +        if( c==matchOne ){
          368  +          if( *zString==0 ) return 0;
          369  +          sqliteNextChar(zString);
          370  +        }
          371  +        zPattern++;
          372  +      }
          373  +      if( c==0 ) return 1;
          374  +      if( c==matchSet ){
          375  +        while( *zString && patternCompare(&zPattern[1],zString,pInfo)==0 ){
          376  +          sqliteNextChar(zString);
          377  +        }
          378  +        return *zString!=0;
          379  +      }else{
          380  +        while( (c2 = *zString)!=0 ){
          381  +          if( noCase ){
          382  +            c2 = sqlite3UpperToLower[c2];
          383  +            c = sqlite3UpperToLower[c];
          384  +            while( c2 != 0 && c2 != c ){ c2 = sqlite3UpperToLower[*++zString]; }
          385  +          }else{
          386  +            while( c2 != 0 && c2 != c ){ c2 = *++zString; }
          387  +          }
          388  +          if( c2==0 ) return 0;
          389  +          if( patternCompare(&zPattern[1],zString,pInfo) ) return 1;
          390  +          sqliteNextChar(zString);
          391  +        }
          392  +        return 0;
          393  +      }
          394  +    }else if( c==matchOne ){
          395  +      if( *zString==0 ) return 0;
          396  +      sqliteNextChar(zString);
          397  +      zPattern++;
          398  +    }else if( c==matchSet ){
          399  +      int prior_c = 0;
          400  +      seen = 0;
          401  +      invert = 0;
          402  +      c = sqliteCharVal(zString);
          403  +      if( c==0 ) return 0;
          404  +      c2 = *++zPattern;
          405  +      if( c2=='^' ){ invert = 1; c2 = *++zPattern; }
          406  +      if( c2==']' ){
          407  +        if( c==']' ) seen = 1;
          408  +        c2 = *++zPattern;
          409  +      }
          410  +      while( (c2 = sqliteCharVal(zPattern))!=0 && c2!=']' ){
          411  +        if( c2=='-' && zPattern[1]!=']' && zPattern[1]!=0 && prior_c>0 ){
          412  +          zPattern++;
          413  +          c2 = sqliteCharVal(zPattern);
          414  +          if( c>=prior_c && c<=c2 ) seen = 1;
          415  +          prior_c = 0;
          416  +        }else if( c==c2 ){
          417  +          seen = 1;
          418  +          prior_c = c2;
          419  +        }else{
          420  +          prior_c = c2;
          421  +        }
          422  +        sqliteNextChar(zPattern);
   515    423         }
          424  +      if( c2==0 || (seen ^ invert)==0 ) return 0;
          425  +      sqliteNextChar(zString);
          426  +      zPattern++;
   516    427       }else{
   517         -      c = sqlite3ReadUniChar(zString, &offset, &enc, 1);
   518         -    }
   519         -
   520         -skip_read:
   521         -
   522         -#if defined(TRACE_LIKE) && !defined(NDEBUG)
   523         -    printf("State=%d:(%d, %d) Input=%d\n", 
   524         -        (aState - pState), pState->val, pState->failstate, c);
   525         -#endif
   526         -
   527         -    if( pState->val==-1 || pState->val==c ){
   528         -      pState++;
   529         -    }else{
   530         -      struct LikeState *pFailState = &aState[pState->failstate];
   531         -      if( pState!=pFailState ){
   532         -        pState = pFailState;
   533         -        if( c && pState>=aState ) goto skip_read;
          428  +      if( noCase ){
          429  +        if( sqlite3UpperToLower[c] != sqlite3UpperToLower[*zString] ) return 0;
          430  +      }else{
          431  +        if( c != *zString ) return 0;
   534    432         }
          433  +      zPattern++;
          434  +      zString++;
   535    435       }
   536         -  }while( c && pState>=aState );
          436  +  }
          437  +  return *zString==0;
          438  +}
   537    439   
   538         -  if( (pState-aState)==pLike->nState || (pState-aState)<-1 ){
   539         -    sqlite3_result_int(context, 1);
   540         -  }else{
   541         -    sqlite3_result_int(context, 0);
   542         -  }
   543         -}
   544         -#endif
   545    440   
   546    441   /*
   547    442   ** Implementation of the like() SQL function.  This function implements
   548    443   ** the build-in LIKE operator.  The first argument to the function is the
   549    444   ** pattern and the second argument is the string.  So, the SQL statements:
   550    445   **
   551    446   **       A LIKE B
................................................................................
   559    454     sqlite3_context *context, 
   560    455     int argc, 
   561    456     sqlite3_value **argv
   562    457   ){
   563    458     const unsigned char *zA = sqlite3_value_text(argv[0]);
   564    459     const unsigned char *zB = sqlite3_value_text(argv[1]);
   565    460     if( zA && zB ){
   566         -    sqlite3_result_int(context, sqlite3utf8LikeCompare(zA, zB));
          461  +    sqlite3_result_int(context, patternCompare(zA, zB, &likeInfo));
   567    462     }
   568    463   }
   569    464   
   570    465   /*
   571    466   ** Implementation of the glob() SQL function.  This function implements
   572    467   ** the build-in GLOB operator.  The first argument to the function is the
   573    468   ** string and the second argument is the pattern.  So, the SQL statements:
................................................................................
   576    471   **
   577    472   ** is implemented as glob(A,B).
   578    473   */
   579    474   static void globFunc(sqlite3_context *context, int arg, sqlite3_value **argv){
   580    475     const unsigned char *zA = sqlite3_value_text(argv[0]);
   581    476     const unsigned char *zB = sqlite3_value_text(argv[1]);
   582    477     if( zA && zB ){
   583         -    sqlite3_result_int(context, sqlite3GlobCompare(zA, zB));
          478  +    sqlite3_result_int(context, patternCompare(zA, zB, &globInfo));
   584    479     }
   585    480   }
   586    481   
   587    482   /*
   588    483   ** Implementation of the NULLIF(x,y) function.  The result is the first
   589    484   ** argument if the arguments are different.  The result is NULL if the
   590    485   ** arguments are equal to each other.
................................................................................
  1011    906     sqlite3_value *pRes;
  1012    907     pRes = (sqlite3_value *)sqlite3_aggregate_context(context, sizeof(Mem));
  1013    908     if( pRes->flags ){
  1014    909       sqlite3_result_value(context, pRes);
  1015    910     }
  1016    911     sqlite3VdbeMemRelease(pRes);
  1017    912   }
          913  +
  1018    914   
  1019    915   /*
  1020    916   ** This function registered all of the above C functions as SQL
  1021    917   ** functions.  This should be the only routine in this file with
  1022    918   ** external linkage.
  1023    919   */
  1024    920   void sqlite3RegisterBuiltinFunctions(sqlite *db){

Changes to src/sqliteInt.h.

     7      7   **    May you do good and not evil.
     8      8   **    May you find forgiveness for yourself and forgive others.
     9      9   **    May you share freely, never taking more than you give.
    10     10   **
    11     11   *************************************************************************
    12     12   ** Internal interface definitions for SQLite.
    13     13   **
    14         -** @(#) $Id: sqliteInt.h,v 1.316 2004/08/21 17:54:45 drh Exp $
           14  +** @(#) $Id: sqliteInt.h,v 1.317 2004/08/31 00:52:37 drh Exp $
    15     15   */
    16     16   #ifndef _SQLITEINT_H_
    17     17   #define _SQLITEINT_H_
    18     18   
    19     19   #include "config.h"
    20     20   #include "sqlite3.h"
    21     21   #include "hash.h"
................................................................................
  1266   1266   Table *sqlite3LocateTable(Parse*,const char*, const char*);
  1267   1267   Index *sqlite3FindIndex(sqlite*,const char*, const char*);
  1268   1268   void sqlite3UnlinkAndDeleteTable(sqlite*,int,const char*);
  1269   1269   void sqlite3UnlinkAndDeleteIndex(sqlite*,int,const char*);
  1270   1270   void sqlite3UnlinkAndDeleteTrigger(sqlite*,int,const char*);
  1271   1271   void sqlite3Vacuum(Parse*, Token*);
  1272   1272   int sqlite3RunVacuum(char**, sqlite*);
  1273         -int sqlite3GlobCompare(const unsigned char*,const unsigned char*);
  1274   1273   char *sqlite3NameFromToken(Token*);
  1275   1274   int sqlite3ExprCheck(Parse*, Expr*, int, int*);
  1276   1275   int sqlite3ExprCompare(Expr*, Expr*);
  1277   1276   int sqliteFuncId(Token*);
  1278   1277   int sqlite3ExprResolveIds(Parse*, SrcList*, ExprList*, Expr*);
  1279   1278   int sqlite3ExprResolveAndCheck(Parse*,SrcList*,ExprList*,Expr*,int,int*);
  1280   1279   int sqlite3ExprAnalyzeAggregates(Parse*, Expr*);
................................................................................
  1350   1349   int sqlite3FixTriggerStep(DbFixer*, TriggerStep*);
  1351   1350   double sqlite3AtoF(const char *z, const char **);
  1352   1351   char *sqlite3_snprintf(int,char*,const char*,...);
  1353   1352   int sqlite3GetInt32(const char *, int*);
  1354   1353   int sqlite3FitsIn64Bits(const char *);
  1355   1354   int sqlite3utf16ByteLen(const void *pData, int nChar);
  1356   1355   int sqlite3utf8CharLen(const char *pData, int nByte);
  1357         -int sqlite3utf8LikeCompare(const unsigned char*, const unsigned char*);
         1356  +int sqlite3ReadUtf8(const unsigned char *);
  1358   1357   int sqlite3PutVarint(unsigned char *, u64);
  1359   1358   int sqlite3GetVarint(const unsigned char *, u64 *);
  1360   1359   int sqlite3GetVarint32(const unsigned char *, u32 *);
  1361   1360   int sqlite3VarintLen(u64 v);
  1362   1361   char sqlite3AffinityType(const char *, int);
  1363   1362   void sqlite3IndexAffinityStr(Vdbe *, Index *);
  1364   1363   void sqlite3TableAffinityStr(Vdbe *, Table *);
................................................................................
  1384   1383   
  1385   1384   const void *sqlite3ValueText(sqlite3_value*, u8);
  1386   1385   int sqlite3ValueBytes(sqlite3_value*, u8);
  1387   1386   void sqlite3ValueSetStr(sqlite3_value*, int, const void *,u8, void(*)(void*));
  1388   1387   void sqlite3ValueFree(sqlite3_value*);
  1389   1388   sqlite3_value *sqlite3ValueNew();
  1390   1389   sqlite3_value *sqlite3GetTransientValue(sqlite *db);
         1390  +extern const unsigned char sqlite3UpperToLower[];
  1391   1391   
  1392   1392   #endif

Changes to src/utf.c.

     8      8   **    May you find forgiveness for yourself and forgive others.
     9      9   **    May you share freely, never taking more than you give.
    10     10   **
    11     11   *************************************************************************
    12     12   ** This file contains routines used to translate between UTF-8, 
    13     13   ** UTF-16, UTF-16BE, and UTF-16LE.
    14     14   **
    15         -** $Id: utf.c,v 1.27 2004/08/08 23:39:19 drh Exp $
           15  +** $Id: utf.c,v 1.28 2004/08/31 00:52:37 drh Exp $
    16     16   **
    17     17   ** Notes on UTF-8:
    18     18   **
    19     19   **   Byte-0    Byte-1    Byte-2    Byte-3    Value
    20     20   **  0xxxxxxx                                 00000000 00000000 0xxxxxxx
    21     21   **  110yyyyy  10xxxxxx                       00000000 00000yyy yyxxxxxx
    22     22   **  1110zzzz  10yyyyyy  10xxxxxx             00000000 zzzzyyyy yyxxxxxx
................................................................................
    58     58   ** sqlite3utf8LikeCompare()  - Do a LIKE match given two UTF8 char* strings.
    59     59   **
    60     60   */
    61     61   #include <assert.h>
    62     62   #include "sqliteInt.h"
    63     63   #include "vdbeInt.h"
    64     64   
    65         -/*
    66         -** The following macro, LOWERCASE(x), takes an integer representing a
    67         -** unicode code point. The value returned is the same code point folded to
    68         -** lower case, if applicable. SQLite currently understands the upper/lower
    69         -** case relationship between the 26 characters used in the English
    70         -** language only.
    71         -**
    72         -** This means that characters with umlauts etc. will not be folded
    73         -** correctly (unless they are encoded as composite characters, which would
    74         -** doubtless cause much trouble).
    75         -*/
    76         -#define LOWERCASE(x) (x<91?(int)(UpperToLower[x]):x)
    77         -static unsigned char UpperToLower[91] = {
    78         -      0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
    79         -     18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
    80         -     36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
    81         -     54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 97, 98, 99,100,101,102,103,
    82         -    104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,
    83         -    122,
    84         -};
    85         -
    86     65   /*
    87     66   ** This table maps from the first byte of a UTF-8 character to the number
    88     67   ** of trailing bytes expected. A value '255' indicates that the table key
    89     68   ** is not a legal first byte for a UTF-8 character.
    90     69   */
    91     70   static const u8 xtra_utf8_bytes[256]  = {
    92     71   /* 0xxxxxxx */
................................................................................
   136    115     switch( xtra ){                                      \
   137    116       case 255: c = (int)0xFFFD; break;                  \
   138    117       case 3: c = (c<<6) + *(zIn)++;                     \
   139    118       case 2: c = (c<<6) + *(zIn)++;                     \
   140    119       case 1: c = (c<<6) + *(zIn)++;                     \
   141    120       c -= xtra_utf8_bits[xtra];                         \
   142    121     }                                                    \
          122  +}
          123  +int sqlite3ReadUtf8(const unsigned char *z){
          124  +  int c;
          125  +  READ_UTF8(z, c);
          126  +  return c;
   143    127   }
   144    128   
   145    129   #define SKIP_UTF8(zIn) {                               \
   146    130     zIn += (xtra_utf8_bytes[*(u8 *)zIn] + 1);            \
   147    131   }
   148    132   
   149    133   #define WRITE_UTF8(zOut, c) {                          \
................................................................................
   487    471         READ_UTF16LE(z, c);
   488    472         n++;
   489    473       }
   490    474     }
   491    475     return (z-(char const *)zIn)-((c==0)?2:0);
   492    476   }
   493    477   
   494         -/*
   495         -** Compare two UTF-8 strings for equality using the "LIKE" operator of
   496         -** SQL.  The '%' character matches any sequence of 0 or more
   497         -** characters and '_' matches any single character.  Case is
   498         -** not significant.
   499         -*/
   500         -int sqlite3utf8LikeCompare(
   501         -  const unsigned char *zPattern, 
   502         -  const unsigned char *zString
   503         -){
   504         -  register int c;
   505         -  int c2;
   506         -
   507         -  while( (c = LOWERCASE(*zPattern))!=0 ){
   508         -    switch( c ){
   509         -      case '%': {
   510         -        while( (c=zPattern[1]) == '%' || c == '_' ){
   511         -          if( c=='_' ){
   512         -            if( *zString==0 ) return 0;
   513         -            SKIP_UTF8(zString);
   514         -          }
   515         -          zPattern++;
   516         -        }
   517         -        if( c==0 ) return 1;
   518         -        c = LOWERCASE(c);
   519         -        while( (c2=LOWERCASE(*zString))!=0 ){
   520         -          while( c2 != 0 && c2 != c ){ 
   521         -            zString++;
   522         -            c2 = LOWERCASE(*zString); 
   523         -          }
   524         -          if( c2==0 ) return 0;
   525         -          if( sqlite3utf8LikeCompare(&zPattern[1],zString) ) return 1;
   526         -          SKIP_UTF8(zString);
   527         -        }
   528         -        return 0;
   529         -      }
   530         -      case '_': {
   531         -        if( *zString==0 ) return 0;
   532         -        SKIP_UTF8(zString);
   533         -        zPattern++;
   534         -        break;
   535         -      }
   536         -      default: {
   537         -        if( c != LOWERCASE(*zString) ) return 0;
   538         -        zPattern++;
   539         -        zString++;
   540         -        break;
   541         -      }
   542         -    }
   543         -  }
   544         -  return *zString==0;
   545         -}
   546         -
   547    478   /*
   548    479   ** UTF-16 implementation of the substr()
   549    480   */
   550    481   void sqlite3utf16Substr(
   551    482     sqlite3_context *context,
   552    483     int argc,
   553    484     sqlite3_value **argv

Changes to src/util.c.

    10     10   **
    11     11   *************************************************************************
    12     12   ** Utility functions used throughout sqlite.
    13     13   **
    14     14   ** This file contains functions for allocating memory, comparing
    15     15   ** strings, and stuff like that.
    16     16   **
    17         -** $Id: util.c,v 1.113 2004/08/18 02:10:15 drh Exp $
           17  +** $Id: util.c,v 1.114 2004/08/31 00:52:37 drh Exp $
    18     18   */
    19     19   #include "sqliteInt.h"
    20     20   #include <stdarg.h>
    21     21   #include <ctype.h>
    22     22   
    23     23   #if SQLITE_DEBUG>2 && defined(__GLIBC__)
    24     24   #include <execinfo.h>
................................................................................
   529    529       }
   530    530     }
   531    531   }
   532    532   
   533    533   /* An array to map all upper-case characters into their corresponding
   534    534   ** lower-case character. 
   535    535   */
   536         -static unsigned char UpperToLower[] = {
          536  +const unsigned char sqlite3UpperToLower[] = {
   537    537         0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
   538    538        18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
   539    539        36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
   540    540        54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 97, 98, 99,100,101,102,103,
   541    541       104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,
   542    542       122, 91, 92, 93, 94, 95, 96, 97, 98, 99,100,101,102,103,104,105,106,107,
   543    543       108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,
................................................................................
   546    546       162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,
   547    547       180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,
   548    548       198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,
   549    549       216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,
   550    550       234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,
   551    551       252,253,254,255
   552    552   };
          553  +#define UpperToLower sqlite3UpperToLower
   553    554   
   554    555   /*
   555    556   ** This function computes a hash on the name of a keyword.
   556    557   ** Case is not significant.
   557    558   */
   558    559   int sqlite3HashNoCase(const char *z, int n){
   559    560     int h = 0;
................................................................................
   758    759   int sqlite3FitsIn64Bits(const char *zNum){
   759    760     int i, c;
   760    761     if( *zNum=='-' || *zNum=='+' ) zNum++;
   761    762     for(i=0; (c=zNum[i])>='0' && c<='9'; i++){}
   762    763     return i<19 || (i==19 && memcmp(zNum,"9223372036854775807",19)<=0);
   763    764   }
   764    765   
   765         -#if 1  /* We are now always UTF-8 */
   766         -/*
   767         -** X is a pointer to the first byte of a UTF-8 character.  Increment
   768         -** X so that it points to the next character.  This only works right
   769         -** if X points to a well-formed UTF-8 string.
   770         -*/
   771         -#define sqliteNextChar(X)  while( (0xc0&*++(X))==0x80 ){}
   772         -#define sqliteCharVal(X)   sqlite3ReadUtf8(X)
   773         -
   774         -#else /* !defined(SQLITE_UTF8) */
   775         -/*
   776         -** For iso8859 encoding, the next character is just the next byte.
   777         -*/
   778         -#define sqliteNextChar(X)  (++(X));
   779         -#define sqliteCharVal(X)   ((int)*(X))
   780         -
   781         -#endif /* defined(SQLITE_UTF8) */
   782         -
   783         -
   784         -#if 1  /* We are now always UTF-8 */
   785         -/*
   786         -** Convert the UTF-8 character to which z points into a 31-bit
   787         -** UCS character.  This only works right if z points to a well-formed
   788         -** UTF-8 string.
   789         -*/
   790         -int sqlite3ReadUtf8(const unsigned char *z){
   791         -  int c;
   792         -  static const char initVal[] = {
   793         -      0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
   794         -     15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
   795         -     30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
   796         -     45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
   797         -     60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
   798         -     75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
   799         -     90,  91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104,
   800         -    105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
   801         -    120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
   802         -    135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
   803         -    150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
   804         -    165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
   805         -    180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,   0,   1,   2,
   806         -      3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,
   807         -     18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,   0,
   808         -      1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,
   809         -      0,   1,   2,   3,   4,   5,   6,   7,   0,   1,   2,   3,   0,   1, 254,
   810         -    255,
   811         -  };
   812         -  c = initVal[*(z++)];
   813         -  while( (0xc0&*z)==0x80 ){
   814         -    c = (c<<6) | (0x3f&*(z++));
   815         -  }
   816         -  return c;
   817         -}
   818         -#endif
   819         -
   820         -/*
   821         -** Compare two UTF-8 strings for equality where the first string can
   822         -** potentially be a "glob" expression.  Return true (1) if they
   823         -** are the same and false (0) if they are different.
   824         -**
   825         -** Globbing rules:
   826         -**
   827         -**      '*'       Matches any sequence of zero or more characters.
   828         -**
   829         -**      '?'       Matches exactly one character.
   830         -**
   831         -**     [...]      Matches one character from the enclosed list of
   832         -**                characters.
   833         -**
   834         -**     [^...]     Matches one character not in the enclosed list.
   835         -**
   836         -** With the [...] and [^...] matching, a ']' character can be included
   837         -** in the list by making it the first character after '[' or '^'.  A
   838         -** range of characters can be specified using '-'.  Example:
   839         -** "[a-z]" matches any single lower-case letter.  To match a '-', make
   840         -** it the last character in the list.
   841         -**
   842         -** This routine is usually quick, but can be N**2 in the worst case.
   843         -**
   844         -** Hints: to match '*' or '?', put them in "[]".  Like this:
   845         -**
   846         -**         abc[*]xyz        Matches "abc*xyz" only
   847         -*/
   848         -int 
   849         -sqlite3GlobCompare(const unsigned char *zPattern, const unsigned char *zString){
   850         -  register int c;
   851         -  int invert;
   852         -  int seen;
   853         -  int c2;
   854         -
   855         -  while( (c = *zPattern)!=0 ){
   856         -    switch( c ){
   857         -      case '*':
   858         -        while( (c=zPattern[1]) == '*' || c == '?' ){
   859         -          if( c=='?' ){
   860         -            if( *zString==0 ) return 0;
   861         -            sqliteNextChar(zString);
   862         -          }
   863         -          zPattern++;
   864         -        }
   865         -        if( c==0 ) return 1;
   866         -        if( c=='[' ){
   867         -          while( *zString && sqlite3GlobCompare(&zPattern[1],zString)==0 ){
   868         -            sqliteNextChar(zString);
   869         -          }
   870         -          return *zString!=0;
   871         -        }else{
   872         -          while( (c2 = *zString)!=0 ){
   873         -            while( c2 != 0 && c2 != c ){ c2 = *++zString; }
   874         -            if( c2==0 ) return 0;
   875         -            if( sqlite3GlobCompare(&zPattern[1],zString) ) return 1;
   876         -            sqliteNextChar(zString);
   877         -          }
   878         -          return 0;
   879         -        }
   880         -      case '?': {
   881         -        if( *zString==0 ) return 0;
   882         -        sqliteNextChar(zString);
   883         -        zPattern++;
   884         -        break;
   885         -      }
   886         -      case '[': {
   887         -        int prior_c = 0;
   888         -        seen = 0;
   889         -        invert = 0;
   890         -        c = sqliteCharVal(zString);
   891         -        if( c==0 ) return 0;
   892         -        c2 = *++zPattern;
   893         -        if( c2=='^' ){ invert = 1; c2 = *++zPattern; }
   894         -        if( c2==']' ){
   895         -          if( c==']' ) seen = 1;
   896         -          c2 = *++zPattern;
   897         -        }
   898         -        while( (c2 = sqliteCharVal(zPattern))!=0 && c2!=']' ){
   899         -          if( c2=='-' && zPattern[1]!=']' && zPattern[1]!=0 && prior_c>0 ){
   900         -            zPattern++;
   901         -            c2 = sqliteCharVal(zPattern);
   902         -            if( c>=prior_c && c<=c2 ) seen = 1;
   903         -            prior_c = 0;
   904         -          }else if( c==c2 ){
   905         -            seen = 1;
   906         -            prior_c = c2;
   907         -          }else{
   908         -            prior_c = c2;
   909         -          }
   910         -          sqliteNextChar(zPattern);
   911         -        }
   912         -        if( c2==0 || (seen ^ invert)==0 ) return 0;
   913         -        sqliteNextChar(zString);
   914         -        zPattern++;
   915         -        break;
   916         -      }
   917         -      default: {
   918         -        if( c != *zString ) return 0;
   919         -        zPattern++;
   920         -        zString++;
   921         -        break;
   922         -      }
   923         -    }
   924         -  }
   925         -  return *zString==0;
   926         -}
   927    766   
   928    767   /*
   929    768   ** Change the sqlite.magic from SQLITE_MAGIC_OPEN to SQLITE_MAGIC_BUSY.
   930    769   ** Return an error (non-zero) if the magic was not SQLITE_MAGIC_OPEN
   931    770   ** when this routine is called.
   932    771   **
   933    772   ** This routine is a attempt to detect if two threads use the