SQLite

Check-in [39a415eaa6]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Optimisation for unicode encoding conversion routines. (CVS 1614)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 39a415eaa65964742e40b7ea4d471fa04007c6c9
User & Date: danielk1977 2004-06-18 04:24:54.000
Context
2004-06-18
06:02
Fix a couple of gcc warnings. (CVS 1615) (check-in: 960f55f3ec user: danielk1977 tags: trunk)
04:24
Optimisation for unicode encoding conversion routines. (CVS 1614) (check-in: 39a415eaa6 user: danielk1977 tags: trunk)
2004-06-17
19:04
Documentation updates in preparation for the release of version 3.0.0. (CVS 1613) (check-in: 9fb29f7331 user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/build.c.
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
**     DROP INDEX
**     creating ID lists
**     BEGIN TRANSACTION
**     COMMIT
**     ROLLBACK
**     PRAGMA
**
** $Id: build.c,v 1.220 2004/06/17 06:13:34 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include <ctype.h>

/*
** This routine is called when a new SQL statement is beginning to
** be parsed.  Check to see if the schema for the database needs







|







19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
**     DROP INDEX
**     creating ID lists
**     BEGIN TRANSACTION
**     COMMIT
**     ROLLBACK
**     PRAGMA
**
** $Id: build.c,v 1.221 2004/06/18 04:24:54 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include <ctype.h>

/*
** This routine is called when a new SQL statement is beginning to
** be parsed.  Check to see if the schema for the database needs
955
956
957
958
959
960
961
962
963
964

965
966
967
968
969
970
971
972
973
974
975
976
977
  if( nName<0 ) nName = strlen(zName);
  if( db->xCollNeeded ){
    zExternal = sqliteStrNDup(zName, nName);
    if( !zExternal ) return;
      db->xCollNeeded(db->pCollNeededArg, db, (int)db->enc, zExternal);
  }
  if( db->xCollNeeded16 ){
    if( SQLITE_BIGENDIAN ){
      zExternal = sqlite3utf8to16be(zName, nName);
    }else{

      zExternal = sqlite3utf8to16le(zName, nName);
    }
    if( !zExternal ) return;
    db->xCollNeeded16(db->pCollNeededArg, db, (int)db->enc, zExternal);
  }
  if( zExternal ) sqliteFree(zExternal);
}

static int synthCollSeq(Parse *pParse, CollSeq *pColl){
  /* The collation factory failed to deliver a function but there may be
  ** other versions of this collation function (for other text encodings)
  ** available. Use one of these instead. Avoid a UTF-8 <-> UTF-16
  ** conversion if possible.







<
|
<
>
|
<



<







955
956
957
958
959
960
961

962

963
964

965
966
967

968
969
970
971
972
973
974
  if( nName<0 ) nName = strlen(zName);
  if( db->xCollNeeded ){
    zExternal = sqliteStrNDup(zName, nName);
    if( !zExternal ) return;
      db->xCollNeeded(db->pCollNeededArg, db, (int)db->enc, zExternal);
  }
  if( db->xCollNeeded16 ){

    sqlite3_value *pTmp = sqlite3GetTransientValue(db);

    sqlite3ValueSetStr(pTmp, -1, zName, SQLITE_UTF8, SQLITE_STATIC);
    zExternal = sqlite3ValueText(pTmp, SQLITE_UTF16NATIVE);

    if( !zExternal ) return;
    db->xCollNeeded16(db->pCollNeededArg, db, (int)db->enc, zExternal);
  }

}

static int synthCollSeq(Parse *pParse, CollSeq *pColl){
  /* The collation factory failed to deliver a function but there may be
  ** other versions of this collation function (for other text encodings)
  ** available. Use one of these instead. Avoid a UTF-8 <-> UTF-16
  ** conversion if possible.
2623
2624
2625
2626
2627
2628
2629












** sqlite3BeginWriteOperation() but there should only be a single
** call to sqlite3EndWriteOperation() at the conclusion of the statement.
*/
void sqlite3EndWriteOperation(Parse *pParse){
  /* Delete me! */
  return;
}



















>
>
>
>
>
>
>
>
>
>
>
>
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
** sqlite3BeginWriteOperation() but there should only be a single
** call to sqlite3EndWriteOperation() at the conclusion of the statement.
*/
void sqlite3EndWriteOperation(Parse *pParse){
  /* Delete me! */
  return;
}

/* 
** Return the transient sqlite3_value object used for encoding conversions
** during SQL compilation.
*/
sqlite3_value *sqlite3GetTransientValue(sqlite *db){
  if( !db->pValue ){
    db->pValue = sqlite3ValueNew();
  }
  return db->pValue;
}

Changes to src/main.c.
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
**
*************************************************************************
** Main file for the SQLite library.  The routines in this file
** implement the programmer interface to the library.  Routines in
** other files are for internal use by SQLite and should not be
** accessed by users of the library.
**
** $Id: main.c,v 1.224 2004/06/16 12:00:56 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "os.h"
#include <ctype.h>

/*
** A pointer to this structure is used to communicate information







|







10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
**
*************************************************************************
** Main file for the SQLite library.  The routines in this file
** implement the programmer interface to the library.  Routines in
** other files are for internal use by SQLite and should not be
** accessed by users of the library.
**
** $Id: main.c,v 1.225 2004/06/18 04:24:54 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "os.h"
#include <ctype.h>

/*
** A pointer to this structure is used to communicate information
511
512
513
514
515
516
517






518
519
520
521
522
523
524
    CollSeq *pColl = (CollSeq *)sqliteHashData(i);
    sqliteFree(pColl);
  }
  sqlite3HashClear(&db->aCollSeq);

  sqlite3HashClear(&db->aFunc);
  sqlite3Error(db, SQLITE_OK, 0); /* Deallocates any cached error strings. */






  sqliteFree(db);
}

/*
** Rollback all database files.
*/
void sqlite3RollbackAll(sqlite *db){







>
>
>
>
>
>







511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
    CollSeq *pColl = (CollSeq *)sqliteHashData(i);
    sqliteFree(pColl);
  }
  sqlite3HashClear(&db->aCollSeq);

  sqlite3HashClear(&db->aFunc);
  sqlite3Error(db, SQLITE_OK, 0); /* Deallocates any cached error strings. */
  if( db->pValue ){
    sqlite3ValueFree(db->pValue);
  }
  if( db->pErr ){
    sqlite3ValueFree(db->pErr);
  }
  sqliteFree(db);
}

/*
** Rollback all database files.
*/
void sqlite3RollbackAll(sqlite *db){
745
746
747
748
749
750
751

752



753
754
755
756
757
758
759
760
761
762
763
764
765
766
  int iCollateArg,
  void *pUserData,
  void (*xFunc)(sqlite3_context*,int,sqlite3_value**),
  void (*xStep)(sqlite3_context*,int,sqlite3_value**),
  void (*xFinal)(sqlite3_context*)
){
  int rc;

  char *zFunctionName8;



  zFunctionName8 = sqlite3utf16to8(zFunctionName, -1, SQLITE_BIGENDIAN);
  if( !zFunctionName8 ){
    return SQLITE_NOMEM;
  }
  rc = sqlite3_create_function(db, zFunctionName8, nArg, eTextRep, 
      iCollateArg, pUserData, xFunc, xStep, xFinal);
  sqliteFree(zFunctionName8);
  return rc;
}

/*
** Register a trace function.  The pArg from the previously registered trace
** is returned.  
**







>
|
>
>
>
|
|


|

<







751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768

769
770
771
772
773
774
775
  int iCollateArg,
  void *pUserData,
  void (*xFunc)(sqlite3_context*,int,sqlite3_value**),
  void (*xStep)(sqlite3_context*,int,sqlite3_value**),
  void (*xFinal)(sqlite3_context*)
){
  int rc;
  char const *zFunc8;

  sqlite3_value *pTmp = sqlite3GetTransientValue(db);
  sqlite3ValueSetStr(pTmp, -1, zFunctionName, SQLITE_UTF16NATIVE,SQLITE_STATIC);
  zFunc8 = sqlite3ValueText(pTmp, SQLITE_UTF8);

  if( !zFunc8 ){
    return SQLITE_NOMEM;
  }
  rc = sqlite3_create_function(db, zFunc8, nArg, eTextRep, 
      iCollateArg, pUserData, xFunc, xStep, xFinal);

  return rc;
}

/*
** Register a trace function.  The pArg from the previously registered trace
** is returned.  
**
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880



881
882
883
884
885


886
887
888
889
890
891
892
893
894




895
896
897
898
899
900
901
902
}

/*
** Return UTF-8 encoded English language explanation of the most recent
** error.
*/
const char *sqlite3_errmsg(sqlite3 *db){
  if( !db ){
    /* If db is NULL, then assume that a malloc() failed during an
    ** sqlite3_open() call.
    */
    return sqlite3ErrStr(SQLITE_NOMEM);
  }
  if( db->zErrMsg ){
    return db->zErrMsg;
  }
  return sqlite3ErrStr(db->errCode);
}

/*
** Return UTF-16 encoded English language explanation of the most recent
** error.
*/
const void *sqlite3_errmsg16(sqlite3 *db){
  if( !db ){
    /* If db is NULL, then assume that a malloc() failed during an
    ** sqlite3_open() call. We have a static version of the string 
    ** "out of memory" encoded using UTF-16 just for this purpose.
    **
    ** Because all the characters in the string are in the unicode
    ** range 0x00-0xFF, if we pad the big-endian string with a 
    ** zero byte, we can obtain the little-endian string with
    ** &big_endian[1].
    */
    static char outOfMemBe[] = {
      0, 'o', 0, 'u', 0, 't', 0, ' ', 
      0, 'o', 0, 'f', 0, ' ', 
      0, 'm', 0, 'e', 0, 'm', 0, 'o', 0, 'r', 0, 'y', 0, 0, 0
    };
    static char *outOfMemLe = &outOfMemBe[1];




    if( SQLITE_BIGENDIAN ){
      return (void *)outOfMemBe;
    }else{
      return (void *)outOfMemLe;
    }


  }
  if( !db->zErrMsg16 ){
    char const *zErr8 = sqlite3_errmsg(db);
    if( SQLITE_BIGENDIAN ){
      db->zErrMsg16 = sqlite3utf8to16be(zErr8, -1);
    }else{
      db->zErrMsg16 = sqlite3utf8to16le(zErr8, -1);
    }
  }




  return db->zErrMsg16;
}

int sqlite3_errcode(sqlite3 *db){
  if( !db ) return SQLITE_NOMEM;
  return db->errCode;
}








|





|
|

|







<
<
<
<
<
|
|
|
|
|
|
|
|
|
|
<

>
>
>
|
<
<
<

>
>
|
<
<
<
<
<
<
|
|
>
>
>
>
|







849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872





873
874
875
876
877
878
879
880
881
882

883
884
885
886
887



888
889
890
891






892
893
894
895
896
897
898
899
900
901
902
903
904
905
}

/*
** Return UTF-8 encoded English language explanation of the most recent
** error.
*/
const char *sqlite3_errmsg(sqlite3 *db){
  if( !db || !db->pErr ){
    /* If db is NULL, then assume that a malloc() failed during an
    ** sqlite3_open() call.
    */
    return sqlite3ErrStr(SQLITE_NOMEM);
  }
  if( !sqlite3_value_text(db->pErr) ){
    return sqlite3ErrStr(db->errCode);
  }
  return sqlite3_value_text(db->pErr);
}

/*
** Return UTF-16 encoded English language explanation of the most recent
** error.
*/
const void *sqlite3_errmsg16(sqlite3 *db){





  /* Because all the characters in the string are in the unicode
  ** range 0x00-0xFF, if we pad the big-endian string with a 
  ** zero byte, we can obtain the little-endian string with
  ** &big_endian[1].
  */
  static char outOfMemBe[] = {
    0, 'o', 0, 'u', 0, 't', 0, ' ', 
    0, 'o', 0, 'f', 0, ' ', 
    0, 'm', 0, 'e', 0, 'm', 0, 'o', 0, 'r', 0, 'y', 0, 0, 0
  };


  if( db && db->pErr ){
    if( !sqlite3_value_text16(db->pErr) ){
      sqlite3ValueSetStr(db->pErr, -1, sqlite3ErrStr(db->errCode),
          SQLITE_UTF8, SQLITE_STATIC);



    }
    if( sqlite3_value_text16(db->pErr) ){
      return sqlite3_value_text16(db->pErr);
    }






  }  

  /* If db is NULL, then assume that a malloc() failed during an
  ** sqlite3_open() call. We have a static version of the string 
  ** "out of memory" encoded using UTF-16 just for this purpose.
  */
  return (void *)(&outOfMemBe[SQLITE_UTF16NATIVE==SQLITE_UTF16LE?1:0]);
}

int sqlite3_errcode(sqlite3 *db){
  if( !db ) return SQLITE_NOMEM;
  return db->errCode;
}

1043
1044
1045
1046
1047
1048
1049
1050
1051
1052

1053


1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
  sqlite3_stmt **ppStmt,    /* OUT: A pointer to the prepared statement */
  const void **pzTail       /* OUT: End of parsed string */
){
  /* This function currently works by first transforming the UTF-16
  ** encoded string to UTF-8, then invoking sqlite3_prepare(). The
  ** tricky bit is figuring out the pointer to return in *pzTail.
  */
  char *zSql8 = 0;
  char const *zTail8 = 0;
  int rc;




  zSql8 = sqlite3utf16to8(zSql, nBytes, SQLITE_BIGENDIAN);
  if( !zSql8 ){
    sqlite3Error(db, SQLITE_NOMEM, 0);
    return SQLITE_NOMEM;
  }
  rc = sqlite3_prepare(db, zSql8, -1, ppStmt, &zTail8);

  if( zTail8 && pzTail ){
    /* If sqlite3_prepare returns a tail pointer, we calculate the
    ** equivalent pointer into the UTF-16 string by counting the unicode
    ** characters between zSql8 and zTail8, and then returning a pointer
    ** the same number of characters into the UTF-16 string.
    */
    int chars_parsed = sqlite3utf8CharLen(zSql8, zTail8-zSql8);
    *pzTail = (u8 *)zSql + sqlite3utf16ByteLen(zSql, chars_parsed);
  }
  sqliteFree(zSql8);
 
  return rc;
}

/*
** This routine does the work of opening a database on behalf of
** sqlite3_open() and sqlite3_open16(). The database filename "zFilename"  







|


>

>
>
|















<







1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075

1076
1077
1078
1079
1080
1081
1082
  sqlite3_stmt **ppStmt,    /* OUT: A pointer to the prepared statement */
  const void **pzTail       /* OUT: End of parsed string */
){
  /* This function currently works by first transforming the UTF-16
  ** encoded string to UTF-8, then invoking sqlite3_prepare(). The
  ** tricky bit is figuring out the pointer to return in *pzTail.
  */
  char const *zSql8 = 0;
  char const *zTail8 = 0;
  int rc;
  sqlite3_value *pTmp;

  pTmp = sqlite3GetTransientValue(db);
  sqlite3ValueSetStr(pTmp, -1, zSql, SQLITE_UTF16NATIVE, SQLITE_STATIC);
  zSql8 = sqlite3ValueText(pTmp, SQLITE_UTF8);
  if( !zSql8 ){
    sqlite3Error(db, SQLITE_NOMEM, 0);
    return SQLITE_NOMEM;
  }
  rc = sqlite3_prepare(db, zSql8, -1, ppStmt, &zTail8);

  if( zTail8 && pzTail ){
    /* If sqlite3_prepare returns a tail pointer, we calculate the
    ** equivalent pointer into the UTF-16 string by counting the unicode
    ** characters between zSql8 and zTail8, and then returning a pointer
    ** the same number of characters into the UTF-16 string.
    */
    int chars_parsed = sqlite3utf8CharLen(zSql8, zTail8-zSql8);
    *pzTail = (u8 *)zSql + sqlite3utf16ByteLen(zSql, chars_parsed);
  }

 
  return rc;
}

/*
** This routine does the work of opening a database on behalf of
** sqlite3_open() and sqlite3_open16(). The database filename "zFilename"  
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150

1151
1152
1153
1154
1155
1156
1157
    db->temp_store = 2;
    db->nMaster = 0;    /* Disable atomic multi-file commit for :memory: */
  }else{
    db->nMaster = -1;   /* Size of master journal filename initially unknown */
  }
  rc = sqlite3BtreeFactory(db, zFilename, 0, MAX_PAGES, &db->aDb[0].pBt);
  if( rc!=SQLITE_OK ){
    /* FIX ME: sqlite3BtreeFactory() should call sqlite3Error(). */
    sqlite3Error(db, rc, 0);
    db->magic = SQLITE_MAGIC_CLOSED;
    goto opendb_out;
  }
  db->aDb[0].zName = "main";
  db->aDb[1].zName = "temp";

  /* Register all built-in functions, but do not attempt to read the
  ** database schema yet. This is delayed until the first time the database
  ** is accessed.
  */
  sqlite3RegisterBuiltinFunctions(db);
  if( rc==SQLITE_OK ){

    db->magic = SQLITE_MAGIC_OPEN;
  }else{
    sqlite3Error(db, rc, "%s", zErrMsg, 0);
    if( zErrMsg ) sqliteFree(zErrMsg);
    db->magic = SQLITE_MAGIC_CLOSED;
  }








<













>







1135
1136
1137
1138
1139
1140
1141

1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
    db->temp_store = 2;
    db->nMaster = 0;    /* Disable atomic multi-file commit for :memory: */
  }else{
    db->nMaster = -1;   /* Size of master journal filename initially unknown */
  }
  rc = sqlite3BtreeFactory(db, zFilename, 0, MAX_PAGES, &db->aDb[0].pBt);
  if( rc!=SQLITE_OK ){

    sqlite3Error(db, rc, 0);
    db->magic = SQLITE_MAGIC_CLOSED;
    goto opendb_out;
  }
  db->aDb[0].zName = "main";
  db->aDb[1].zName = "temp";

  /* Register all built-in functions, but do not attempt to read the
  ** database schema yet. This is delayed until the first time the database
  ** is accessed.
  */
  sqlite3RegisterBuiltinFunctions(db);
  if( rc==SQLITE_OK ){
    sqlite3Error(db, SQLITE_OK, 0);
    db->magic = SQLITE_MAGIC_OPEN;
  }else{
    sqlite3Error(db, rc, "%s", zErrMsg, 0);
    if( zErrMsg ) sqliteFree(zErrMsg);
    db->magic = SQLITE_MAGIC_CLOSED;
  }

1173
1174
1175
1176
1177
1178
1179
1180
1181

1182
1183
1184


1185
1186
1187
1188
1189
1190
1191
1192
1193


1194

1195
1196
1197
1198
1199
1200
1201
/*
** Open a new database handle.
*/
int sqlite3_open16(
  const void *zFilename, 
  sqlite3 **ppDb
){
  char *zFilename8;   /* zFilename encoded in UTF-8 instead of UTF-16 */
  int rc;


  assert( ppDb );



  zFilename8 = sqlite3utf16to8(zFilename, -1, SQLITE_BIGENDIAN);
  if( !zFilename8 ){
    *ppDb = 0;
    return SQLITE_NOMEM;
  }
  rc = openDatabase(zFilename8, ppDb);
  if( rc==SQLITE_OK && *ppDb ){
    sqlite3_exec(*ppDb, "PRAGMA encoding = 'UTF-16'", 0, 0, 0);
  }


  sqliteFree(zFilename8);


  return rc;
}

/*
** The following routine destroys a virtual machine that is created by
** the sqlite3_compile() routine. The integer returned is an SQLITE_







|
|
>


|
>
>
|
|
<
<
<
|
|
|
|
>
>
|
>







1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194



1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
/*
** Open a new database handle.
*/
int sqlite3_open16(
  const void *zFilename, 
  sqlite3 **ppDb
){
  char const *zFilename8;   /* zFilename encoded in UTF-8 instead of UTF-16 */
  int rc = SQLITE_NOMEM;
  sqlite3_value *pVal;

  assert( ppDb );
  *ppDb = 0;
  pVal = sqlite3ValueNew();
  sqlite3ValueSetStr(pVal, -1, zFilename, SQLITE_UTF16NATIVE, SQLITE_STATIC);
  zFilename8 = sqlite3ValueText(pVal, SQLITE_UTF8);
  if( zFilename8 ){



    rc = openDatabase(zFilename8, ppDb);
    if( rc==SQLITE_OK && *ppDb ){
      sqlite3_exec(*ppDb, "PRAGMA encoding = 'UTF-16'", 0, 0, 0);
    }
  }
  if( pVal ){
    sqlite3ValueFree(pVal);
  }

  return rc;
}

/*
** The following routine destroys a virtual machine that is created by
** the sqlite3_compile() routine. The integer returned is an SQLITE_
1269
1270
1271
1272
1273
1274
1275



1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
  sqlite3* db, 
  const char *zName, 
  int enc, 
  void* pCtx,
  int(*xCompare)(void*,int,const void*,int,const void*)
){
  int rc;



  char *zName8 = sqlite3utf16to8(zName, -1, SQLITE_BIGENDIAN);
  rc = sqlite3_create_collation(db, zName8, enc, pCtx, xCompare);
  sqliteFree(zName8);
  return rc;
}

/*
** Register a collation sequence factory callback with the database handle
** db. Replace any previously installed collation sequence factory.
*/
int sqlite3_collation_needed(







>
>
>
|
|
<
<







1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288


1289
1290
1291
1292
1293
1294
1295
  sqlite3* db, 
  const char *zName, 
  int enc, 
  void* pCtx,
  int(*xCompare)(void*,int,const void*,int,const void*)
){
  int rc;
  char const *zName8;
  sqlite3_value *pTmp = sqlite3GetTransientValue(db);
  sqlite3ValueSetStr(pTmp, -1, zName, SQLITE_UTF16NATIVE, SQLITE_STATIC);
  zName8 = sqlite3ValueText(pTmp, SQLITE_UTF8);
  return sqlite3_create_collation(db, zName8, enc, pCtx, xCompare);


}

/*
** Register a collation sequence factory callback with the database handle
** db. Replace any previously installed collation sequence factory.
*/
int sqlite3_collation_needed(
Changes to src/sqliteInt.h.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
/*
** 2001 September 15
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** Internal interface definitions for SQLite.
**
** @(#) $Id: sqliteInt.h,v 1.286 2004/06/17 05:36:44 danielk1977 Exp $
*/
#include "config.h"
#include "sqlite3.h"
#include "hash.h"
#include "parse.h"
#include <stdio.h>
#include <stdlib.h>













|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
/*
** 2001 September 15
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** Internal interface definitions for SQLite.
**
** @(#) $Id: sqliteInt.h,v 1.287 2004/06/18 04:24:54 danielk1977 Exp $
*/
#include "config.h"
#include "sqlite3.h"
#include "hash.h"
#include "parse.h"
#include <stdio.h>
#include <stdlib.h>
190
191
192
193
194
195
196

197
198
199
200
201
202
203
# define sqliteMallocRaw(X) sqlite3Malloc_(X,0,__FILE__,__LINE__)
# define sqliteFree(X)      sqlite3Free_(X,__FILE__,__LINE__)
# define sqliteRealloc(X,Y) sqlite3Realloc_(X,Y,__FILE__,__LINE__)
# define sqliteStrDup(X)    sqlite3StrDup_(X,__FILE__,__LINE__)
# define sqliteStrNDup(X,Y) sqlite3StrNDup_(X,Y,__FILE__,__LINE__)
  void sqlite3StrRealloc(char**);
#else

# define sqlite3Realloc_(X,Y) sqliteRealloc(X,Y)
# define sqlite3StrRealloc(X)
#endif

/*
** This variable gets set if malloc() ever fails.  After it gets set,
** the SQLite library shuts down permanently.







>







190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
# define sqliteMallocRaw(X) sqlite3Malloc_(X,0,__FILE__,__LINE__)
# define sqliteFree(X)      sqlite3Free_(X,__FILE__,__LINE__)
# define sqliteRealloc(X,Y) sqlite3Realloc_(X,Y,__FILE__,__LINE__)
# define sqliteStrDup(X)    sqlite3StrDup_(X,__FILE__,__LINE__)
# define sqliteStrNDup(X,Y) sqlite3StrNDup_(X,Y,__FILE__,__LINE__)
  void sqlite3StrRealloc(char**);
#else
# define sqlite3FreeX sqliteFree
# define sqlite3Realloc_(X,Y) sqliteRealloc(X,Y)
# define sqlite3StrRealloc(X)
#endif

/*
** This variable gets set if malloc() ever fails.  After it gets set,
** the SQLite library shuts down permanently.
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432





433
434
435
436
437
438
439
#ifndef SQLITE_OMIT_PROGRESS_CALLBACK
  int (*xProgress)(void *);     /* The progress callback */
  void *pProgressArg;           /* Argument to the progress callback */
  int nProgressOps;             /* Number of opcodes for progress callback */
#endif

  int errCode;                  /* Most recent error code (SQLITE_*) */
  char *zErrMsg;                /* Most recent error message (UTF-8 encoded) */
  void *zErrMsg16;              /* Most recent error message (UTF-16 encoded) */
  u8 enc;                       /* Text encoding for this database. */
  u8 autoCommit;                /* The auto-commit flag. */
  int nMaster;                  /* Length of master journal name. -1=unknown */
  void(*xCollNeeded)(void*,sqlite3*,int eTextRep,const char*);
  void(*xCollNeeded16)(void*,sqlite3*,int eTextRep,const void*);
  void *pCollNeededArg;





};

/*
** Possible values for the sqlite.flags and or Db.flags fields.
**
** On sqlite.flags, the SQLITE_InTrans value means that we have
** executed a BEGIN.  On Db.flags, SQLITE_InTrans means a statement







<
<






>
>
>
>
>







419
420
421
422
423
424
425


426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
#ifndef SQLITE_OMIT_PROGRESS_CALLBACK
  int (*xProgress)(void *);     /* The progress callback */
  void *pProgressArg;           /* Argument to the progress callback */
  int nProgressOps;             /* Number of opcodes for progress callback */
#endif

  int errCode;                  /* Most recent error code (SQLITE_*) */


  u8 enc;                       /* Text encoding for this database. */
  u8 autoCommit;                /* The auto-commit flag. */
  int nMaster;                  /* Length of master journal name. -1=unknown */
  void(*xCollNeeded)(void*,sqlite3*,int eTextRep,const char*);
  void(*xCollNeeded16)(void*,sqlite3*,int eTextRep,const void*);
  void *pCollNeededArg;
  sqlite3_value *pValue;        /* Value used for transient conversions */
  sqlite3_value *pErr;          /* Most recent error message */

  char *zErrMsg;                /* Most recent error message (UTF-8 encoded) */
  char *zErrMsg16;              /* Most recent error message (UTF-8 encoded) */
};

/*
** Possible values for the sqlite.flags and or Db.flags fields.
**
** On sqlite.flags, the SQLITE_InTrans value means that we have
** executed a BEGIN.  On Db.flags, SQLITE_InTrans means a statement
1209
1210
1211
1212
1213
1214
1215

1216
1217
1218
1219
1220
1221
1222
#ifdef SQLITE_DEBUG
  void *sqlite3Malloc_(int,int,char*,int);
  void sqlite3Free_(void*,char*,int);
  void *sqlite3Realloc_(void*,int,char*,int);
  char *sqlite3StrDup_(const char*,char*,int);
  char *sqlite3StrNDup_(const char*, int,char*,int);
  void sqlite3CheckMemory(void*,int);

#else
  void *sqliteMalloc(int);
  void *sqliteMallocRaw(int);
  void sqliteFree(void*);
  void *sqliteRealloc(void*,int);
  char *sqliteStrDup(const char*);
  char *sqliteStrNDup(const char*, int);







>







1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
#ifdef SQLITE_DEBUG
  void *sqlite3Malloc_(int,int,char*,int);
  void sqlite3Free_(void*,char*,int);
  void *sqlite3Realloc_(void*,int,char*,int);
  char *sqlite3StrDup_(const char*,char*,int);
  char *sqlite3StrNDup_(const char*, int,char*,int);
  void sqlite3CheckMemory(void*,int);
  void sqlite3FreeX(void *p);
#else
  void *sqliteMalloc(int);
  void *sqliteMallocRaw(int);
  void sqliteFree(void*);
  void *sqliteRealloc(void*,int);
  char *sqliteStrDup(const char*);
  char *sqliteStrNDup(const char*, int);
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417

int sqlite3FixExprList(DbFixer*, ExprList*);
int sqlite3FixTriggerStep(DbFixer*, TriggerStep*);
double sqlite3AtoF(const char *z, const char **);
char *sqlite3_snprintf(int,char*,const char*,...);
int sqlite3GetInt32(const char *, int*);
int sqlite3GetInt64(const char *, i64*);
int sqlite3FitsIn64Bits(const char *);
unsigned char *sqlite3utf16to8(const void *pData, int N, int big_endian);
void *sqlite3utf8to16be(const unsigned char *pIn, int N);
void *sqlite3utf8to16le(const unsigned char *pIn, int N);
void sqlite3utf16to16le(void *pData, int N);
void sqlite3utf16to16be(void *pData, int N);
int sqlite3utf16ByteLen(const void *pData, int nChar);
int sqlite3utf8CharLen(const char *pData, int nByte);
int sqlite3utf8LikeCompare(const unsigned char*, const unsigned char*);
int sqlite3PutVarint(unsigned char *, u64);
int sqlite3GetVarint(const unsigned char *, u64 *);
int sqlite3GetVarint32(const unsigned char *, u32 *);
int sqlite3VarintLen(u64 v);
char sqlite3AffinityType(const char *, int);
void sqlite3IndexAffinityStr(Vdbe *, Index *);
void sqlite3TableAffinityStr(Vdbe *, Table *);
char sqlite3CompareAffinity(Expr *pExpr, char aff2);
char const *sqlite3AffinityString(char affinity);
int sqlite3IndexAffinityOk(Expr *pExpr, char idx_affinity);
char sqlite3ExprAffinity(Expr *pExpr);
int sqlite3atoi64(const char*, i64*);
void sqlite3Error(sqlite *, int, const char*,...);
int sqlite3utfTranslate(const void *, int , u8 , void **, int *, u8);
u8 sqlite3UtfReadBom(const void *zData, int nData);
void *sqlite3HexToBlob(const char *z);
int sqlite3TwoPartName(Parse *, Token *, Token *, Token **);
const char *sqlite3ErrStr(int);
int sqlite3ReadUniChar(const char *zStr, int *pOffset, u8 *pEnc, int fold);
int sqlite3ReadSchema(sqlite *db, char **);
CollSeq *sqlite3FindCollSeq(sqlite *,u8 enc, const char *,int,int);
CollSeq *sqlite3LocateCollSeq(Parse *pParse, const char *zName, int nName);
CollSeq *sqlite3ExprCollSeq(Parse *pParse, Expr *pExpr);
int sqlite3CheckCollSeq(Parse *, CollSeq *);
int sqlite3CheckIndexCollSeq(Parse *, Index *);
int sqlite3CheckObjectName(Parse *, const char *);

const void *sqlite3ValueText(sqlite3_value*, u8);
int sqlite3ValueBytes(sqlite3_value*, u8);
void sqlite3ValueSetStr(sqlite3_value*, int, const void *,u8);
void sqlite3ValueFree(sqlite3_value*);
sqlite3_value *sqlite3ValueNew();








<
<
<
<
<
















<
<














|


>
1376
1377
1378
1379
1380
1381
1382





1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398


1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
int sqlite3FixExprList(DbFixer*, ExprList*);
int sqlite3FixTriggerStep(DbFixer*, TriggerStep*);
double sqlite3AtoF(const char *z, const char **);
char *sqlite3_snprintf(int,char*,const char*,...);
int sqlite3GetInt32(const char *, int*);
int sqlite3GetInt64(const char *, i64*);
int sqlite3FitsIn64Bits(const char *);





int sqlite3utf16ByteLen(const void *pData, int nChar);
int sqlite3utf8CharLen(const char *pData, int nByte);
int sqlite3utf8LikeCompare(const unsigned char*, const unsigned char*);
int sqlite3PutVarint(unsigned char *, u64);
int sqlite3GetVarint(const unsigned char *, u64 *);
int sqlite3GetVarint32(const unsigned char *, u32 *);
int sqlite3VarintLen(u64 v);
char sqlite3AffinityType(const char *, int);
void sqlite3IndexAffinityStr(Vdbe *, Index *);
void sqlite3TableAffinityStr(Vdbe *, Table *);
char sqlite3CompareAffinity(Expr *pExpr, char aff2);
char const *sqlite3AffinityString(char affinity);
int sqlite3IndexAffinityOk(Expr *pExpr, char idx_affinity);
char sqlite3ExprAffinity(Expr *pExpr);
int sqlite3atoi64(const char*, i64*);
void sqlite3Error(sqlite *, int, const char*,...);


void *sqlite3HexToBlob(const char *z);
int sqlite3TwoPartName(Parse *, Token *, Token *, Token **);
const char *sqlite3ErrStr(int);
int sqlite3ReadUniChar(const char *zStr, int *pOffset, u8 *pEnc, int fold);
int sqlite3ReadSchema(sqlite *db, char **);
CollSeq *sqlite3FindCollSeq(sqlite *,u8 enc, const char *,int,int);
CollSeq *sqlite3LocateCollSeq(Parse *pParse, const char *zName, int nName);
CollSeq *sqlite3ExprCollSeq(Parse *pParse, Expr *pExpr);
int sqlite3CheckCollSeq(Parse *, CollSeq *);
int sqlite3CheckIndexCollSeq(Parse *, Index *);
int sqlite3CheckObjectName(Parse *, const char *);

const void *sqlite3ValueText(sqlite3_value*, u8);
int sqlite3ValueBytes(sqlite3_value*, u8);
void sqlite3ValueSetStr(sqlite3_value*, int, const void *,u8, void(*)(void*));
void sqlite3ValueFree(sqlite3_value*);
sqlite3_value *sqlite3ValueNew();
sqlite3_value *sqlite3GetTransientValue(sqlite *db);
Changes to src/test1.c.
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
**    May you share freely, never taking more than you give.
**
*************************************************************************
** Code for testing the printf() interface to SQLite.  This code
** is not included in the SQLite library.  It is used for automated
** testing of the SQLite library.
**
** $Id: test1.c,v 1.77 2004/06/15 02:44:19 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "tcl.h"
#include "os.h"
#include <stdlib.h>
#include <string.h>








|







9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
**    May you share freely, never taking more than you give.
**
*************************************************************************
** Code for testing the printf() interface to SQLite.  This code
** is not included in the SQLite library.  It is used for automated
** testing of the SQLite library.
**
** $Id: test1.c,v 1.78 2004/06/18 04:24:55 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "tcl.h"
#include "os.h"
#include <stdlib.h>
#include <string.h>

936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
      Tcl_ListObjAppendElement(i,pX,Tcl_NewStringObj("UTF-16BE",-1));
      break;
    default:
      assert(0);
  }

  pVal = sqlite3ValueNew();
  sqlite3ValueSetStr(pVal, nA, zA, encin);
  Tcl_ListObjAppendElement(i,pX,Tcl_NewStringObj(sqlite3_value_text(pVal),-1));
  sqlite3ValueSetStr(pVal, nB, zB, encin);
  Tcl_ListObjAppendElement(i,pX,Tcl_NewStringObj(sqlite3_value_text(pVal),-1));
  sqlite3ValueFree(pVal);

  Tcl_EvalObjEx(i, pX, 0);
  Tcl_DecrRefCount(pX);
  Tcl_GetIntFromObj(i, Tcl_GetObjResult(i), &res);
  return res;







|

|







936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
      Tcl_ListObjAppendElement(i,pX,Tcl_NewStringObj("UTF-16BE",-1));
      break;
    default:
      assert(0);
  }

  pVal = sqlite3ValueNew();
  sqlite3ValueSetStr(pVal, nA, zA, encin, SQLITE_STATIC);
  Tcl_ListObjAppendElement(i,pX,Tcl_NewStringObj(sqlite3_value_text(pVal),-1));
  sqlite3ValueSetStr(pVal, nB, zB, encin, SQLITE_STATIC);
  Tcl_ListObjAppendElement(i,pX,Tcl_NewStringObj(sqlite3_value_text(pVal),-1));
  sqlite3ValueFree(pVal);

  Tcl_EvalObjEx(i, pX, 0);
  Tcl_DecrRefCount(pX);
  Tcl_GetIntFromObj(i, Tcl_GetObjResult(i), &res);
  return res;
Changes to src/test5.c.
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
*************************************************************************
** Code for testing the utf.c module in SQLite.  This code
** is not included in the SQLite library.  It is used for automated
** testing of the SQLite library. Specifically, the code in this file
** is used for testing the SQLite routines for converting between
** the various supported unicode encodings.
**
** $Id: test5.c,v 1.10 2004/06/12 00:42:35 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "vdbeInt.h"
#include "os.h"         /* to get SQLITE_BIGENDIAN */
#include "tcl.h"
#include <stdlib.h>
#include <string.h>

/*
** Return the number of bytes up to and including the first pair of
** 0x00 bytes in *pStr.
*/
static int utf16_length(const unsigned char *pZ){
  const unsigned char *pC1 = pZ;
  const unsigned char *pC2 = pZ+1;
  while( *pC1 || *pC2 ){
    pC1 += 2;
    pC2 += 2;
  }
  return (pC1-pZ)+2;
}

/*
** tclcmd:   sqlite_utf8to16le  STRING
** title:    Convert STRING from utf-8 to utf-16le
**
** Return the utf-16le encoded string
*/
static int sqlite_utf8to16le(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  unsigned char *out;
  unsigned char *in;
  Tcl_Obj *res;

  if( objc!=2 ){
    Tcl_AppendResult(interp, "wrong # args: should be \"",
        Tcl_GetStringFromObj(objv[0], 0), "<utf-8 encoded-string>", 0);
    return TCL_ERROR;
  }

  in = Tcl_GetString(objv[1]);
  out = (unsigned char *)sqlite3utf8to16le(in, -1);
  res = Tcl_NewByteArrayObj(out, utf16_length(out));
  sqliteFree(out); 

  Tcl_SetObjResult(interp, res);

  return TCL_OK;
}

/*
** tclcmd:   sqlite_utf8to16be  STRING
** title:    Convert STRING from utf-8 to utf-16be
**
** Return the utf-16be encoded string
*/
static int sqlite_utf8to16be(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  unsigned char *out;
  unsigned char *in;
  Tcl_Obj *res;

  if( objc!=2 ){
    Tcl_AppendResult(interp, "wrong # args: should be \"",
        Tcl_GetStringFromObj(objv[0], 0), "<utf-8 encoded-string>", 0);
    return TCL_ERROR;
  }

  in = Tcl_GetByteArrayFromObj(objv[1], 0);
  in = Tcl_GetString(objv[1]);
  out = (unsigned char *)sqlite3utf8to16be(in, -1);
  res = Tcl_NewByteArrayObj(out, utf16_length(out));
  sqliteFree(out);

  Tcl_SetObjResult(interp, res);

  return TCL_OK;
}

/*
** tclcmd:   sqlite_utf16to16le  STRING
** title:    Convert STRING from utf-16 in native byte order to utf-16le
**
** Return the utf-16le encoded string.  If the input string contains
** a byte-order mark, then the byte order mark should override the
** native byte order.
*/
static int sqlite_utf16to16le(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  unsigned char *out;
  unsigned char *in;
  int in_len;
  Tcl_Obj *res;

  if( objc!=2 ){
    Tcl_AppendResult(interp, "wrong # args: should be \"",
        Tcl_GetStringFromObj(objv[0], 0), "<utf-16 encoded-string>", 0);
    return TCL_ERROR;
  }

  in = Tcl_GetByteArrayFromObj(objv[1], &in_len);
  out = (unsigned char *)sqliteMalloc(in_len);
  memcpy(out, in, in_len);
  
  sqlite3utf16to16le(out, -1);
  res = Tcl_NewByteArrayObj(out, utf16_length(out));
  sqliteFree(out);

  Tcl_SetObjResult(interp, res);

  return TCL_OK;
}

/*
** tclcmd:   sqlite_utf16to16be  STRING
** title:    Convert STRING from utf-16 in native byte order to utf-16be
**
** Return the utf-16be encoded string.  If the input string contains
** a byte-order mark, then the byte order mark should override the
** native byte order.
*/
static int sqlite_utf16to16be(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  unsigned char *out;
  unsigned char *in;
  int in_len;
  Tcl_Obj *res;

  if( objc!=2 ){
    Tcl_AppendResult(interp, "wrong # args: should be \"",
        Tcl_GetStringFromObj(objv[0], 0), "<utf-16 encoded-string>", 0);
    return TCL_ERROR;
  }

  in = Tcl_GetByteArrayFromObj(objv[1], &in_len);
  out = (unsigned char *)sqliteMalloc(in_len);
  memcpy(out, in, in_len);
  
  sqlite3utf16to16be(out, -1);
  res = Tcl_NewByteArrayObj(out, utf16_length(out));
  sqliteFree(out);

  Tcl_SetObjResult(interp, res);

  return TCL_OK;
}

/*
** tclcmd:   sqlite_utf16to8  STRING
** title:    Convert STRING from utf-16 in native byte order to utf-8
**
** Return the utf-8 encoded string.  If the input string contains
** a byte-order mark, then the byte order mark should override the
** native byte order.
*/
static int sqlite_utf16to8(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  unsigned char *out;
  unsigned char *in;
  Tcl_Obj *res;

  if( objc!=2 ){
    Tcl_AppendResult(interp, "wrong # args: should be \"",
        Tcl_GetStringFromObj(objv[0], 0), " <utf-16 encoded-string>", 0);
    return TCL_ERROR;
  }

  in = Tcl_GetByteArrayFromObj(objv[1], 0);
  out = sqlite3utf16to8(in, -1, SQLITE_BIGENDIAN);
  res = Tcl_NewByteArrayObj(out, strlen(out)+1);
  sqliteFree(out);

  Tcl_SetObjResult(interp, res);

  return TCL_OK;
}

/*
** The first argument is a TCL UTF-8 string. Return the byte array
** object with the encoded representation of the string, including
** the NULL terminator.
*/
static int binarize(
  void * clientData,







|








<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26





























































































































































































27
28
29
30
31
32
33
*************************************************************************
** Code for testing the utf.c module in SQLite.  This code
** is not included in the SQLite library.  It is used for automated
** testing of the SQLite library. Specifically, the code in this file
** is used for testing the SQLite routines for converting between
** the various supported unicode encodings.
**
** $Id: test5.c,v 1.11 2004/06/18 04:24:55 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "vdbeInt.h"
#include "os.h"         /* to get SQLITE_BIGENDIAN */
#include "tcl.h"
#include <stdlib.h>
#include <string.h>






























































































































































































/*
** The first argument is a TCL UTF-8 string. Return the byte array
** object with the encoded representation of the string, including
** the NULL terminator.
*/
static int binarize(
  void * clientData,
277
278
279
280
281
282
283






















































































284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299


300
301
302
303
304
305
306
307
      zVal = sqlite3_value_text(&val);
    }
  }

  return TCL_OK;
}
























































































/*
** Register commands with the TCL interpreter.
*/
int Sqlitetest5_Init(Tcl_Interp *interp){
  static struct {
    char *zName;
    Tcl_ObjCmdProc *xProc;
  } aCmd[] = {
    { "sqlite_utf16to8",         (Tcl_ObjCmdProc*)sqlite_utf16to8    },
    { "sqlite_utf8to16le",       (Tcl_ObjCmdProc*)sqlite_utf8to16le  },
    { "sqlite_utf8to16be",       (Tcl_ObjCmdProc*)sqlite_utf8to16be  },
    { "sqlite_utf16to16le",      (Tcl_ObjCmdProc*)sqlite_utf16to16le },
    { "sqlite_utf16to16be",      (Tcl_ObjCmdProc*)sqlite_utf16to16be },
    { "binarize",                (Tcl_ObjCmdProc*)binarize },
    { "test_value_overhead",     (Tcl_ObjCmdProc*)test_value_overhead },


  };
  int i;
  for(i=0; i<sizeof(aCmd)/sizeof(aCmd[0]); i++){
    Tcl_CreateObjCommand(interp, aCmd[i].zName, aCmd[i].xProc, 0, 0);
  }
  return SQLITE_OK;
}








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>









<
<
<
<
<


>
>








88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189





190
191
192
193
194
195
196
197
198
199
200
201
      zVal = sqlite3_value_text(&val);
    }
  }

  return TCL_OK;
}

static u8 name_to_enc(Tcl_Interp *interp, Tcl_Obj *pObj){
  struct EncName {
    char *zName;
    u8 enc;
  } encnames[] = {
    { "UTF8", SQLITE_UTF8 },
    { "UTF16LE", SQLITE_UTF16LE },
    { "UTF16BE", SQLITE_UTF16BE },
    { "UTF16", SQLITE_UTF16NATIVE },
    { 0, 0 }
  };
  struct EncName *pEnc;
  char *z = Tcl_GetString(pObj);
  for(pEnc=&encnames[0]; pEnc->zName; pEnc++){
    if( 0==sqlite3StrICmp(z, pEnc->zName) ){
      break;
    }
  }
  if( !pEnc->enc ){
    Tcl_AppendResult(interp, "No such encoding: ", z, 0);
  }
  return pEnc->enc;
}

static int test_translate(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  u8 enc_from;
  u8 enc_to;
  sqlite3_value *pVal;

  const char *z;
  int len;

  if( objc!=4 ){
    Tcl_AppendResult(interp, "wrong # args: should be \"",
        Tcl_GetStringFromObj(objv[0], 0), 
        " <string/blob> <from enc> <to enc>", 0
    );
    return TCL_ERROR;
  }

  enc_from = name_to_enc(interp, objv[2]);
  if( !enc_from ) return TCL_ERROR;
  enc_to = name_to_enc(interp, objv[3]);
  if( !enc_to ) return TCL_ERROR;

  pVal = sqlite3ValueNew();

  if( enc_from==SQLITE_UTF8 ){
    z = Tcl_GetString(objv[1]);
    sqlite3ValueSetStr(pVal, -1, z, enc_from, SQLITE_STATIC);
  }else{
    z = Tcl_GetByteArrayFromObj(objv[1], &len);
    sqlite3ValueSetStr(pVal, -1, z, enc_from, SQLITE_STATIC);
  }

  z = sqlite3ValueText(pVal, enc_to);
  len = sqlite3ValueBytes(pVal, enc_to) + (enc_to==SQLITE_UTF8?1:2);
  Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(z, len));

  sqlite3ValueFree(pVal);

  return TCL_OK;
}

/*
** Usage: translate_selftest
**
** Call sqlite3utfSelfTest() to run the internal tests for unicode
** translation. If there is a problem an assert() will fail.
**/
void sqlite3utfSelfTest();
static int test_translate_selftest(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  sqlite3utfSelfTest();
  return SQLITE_OK;
}


/*
** Register commands with the TCL interpreter.
*/
int Sqlitetest5_Init(Tcl_Interp *interp){
  static struct {
    char *zName;
    Tcl_ObjCmdProc *xProc;
  } aCmd[] = {





    { "binarize",                (Tcl_ObjCmdProc*)binarize },
    { "test_value_overhead",     (Tcl_ObjCmdProc*)test_value_overhead },
    { "test_translate",          (Tcl_ObjCmdProc*)test_translate     },
    { "translate_selftest",      (Tcl_ObjCmdProc*)test_translate_selftest},
  };
  int i;
  for(i=0; i<sizeof(aCmd)/sizeof(aCmd[0]); i++){
    Tcl_CreateObjCommand(interp, aCmd[i].zName, aCmd[i].xProc, 0, 0);
  }
  return SQLITE_OK;
}

Changes to src/tokenize.c.
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
*************************************************************************
** An tokenizer for SQL
**
** This file contains C code that splits an SQL input string up into
** individual tokens and sends those tokens one-by-one over to the
** parser for analysis.
**
** $Id: tokenize.c,v 1.76 2004/05/31 23:56:43 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "os.h"
#include <ctype.h>
#include <stdlib.h>

/*







|







11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
*************************************************************************
** An tokenizer for SQL
**
** This file contains C code that splits an SQL input string up into
** individual tokens and sends those tokens one-by-one over to the
** parser for analysis.
**
** $Id: tokenize.c,v 1.77 2004/06/18 04:24:55 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "os.h"
#include <ctype.h>
#include <stdlib.h>

/*
697
698
699
700
701
702
703


704
705



706
707
708


709
710


/*
** This routine is the same as the sqlite3_complete() routine described
** above, except that the parameter is required to be UTF-16 encoded, not
** UTF-8.
*/
int sqlite3_complete16(const void *zSql){


  int rc;
  char *zSql8 = sqlite3utf16to8(zSql, -1, SQLITE_BIGENDIAN);



  if( !zSql8 ) return 0;
  rc = sqlite3_complete(zSql8);
  sqliteFree(zSql8);


  return rc;
}








>
>
|
|
>
>
>
|
|
|
>
>


>
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718

/*
** This routine is the same as the sqlite3_complete() routine described
** above, except that the parameter is required to be UTF-16 encoded, not
** UTF-8.
*/
int sqlite3_complete16(const void *zSql){
  sqlite3_value *pVal;
  char *zSql8;
  int rc = 0;

  pVal = sqlite3ValueNew();
  sqlite3ValueSetStr(pVal, -1, zSql, SQLITE_UTF16NATIVE, SQLITE_STATIC);
  zSql8 = sqlite3ValueText(pVal, SQLITE_UTF8);
  if( zSql8 ){
    rc = sqlite3_complete(zSql8);
    sqliteFree(zSql8);
  }
  sqlite3ValueFree(pVal);
  return rc;
}

Changes to src/utf.c.
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains routines used to translate between UTF-8, 
** UTF-16, UTF-16BE, and UTF-16LE.
**
** $Id: utf.c,v 1.20 2004/06/17 05:36:44 danielk1977 Exp $
**
** Notes on UTF-8:
**
**   Byte-0    Byte-1    Byte-2    Byte-3    Value
**  0xxxxxxx                                 00000000 00000000 0xxxxxxx
**  110yyyyy  10xxxxxx                       00000000 00000yyy yyxxxxxx
**  1110zzzz  10yyyyyy  10xxxxxx             00000000 zzzzyyyy yyxxxxxx







|







8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains routines used to translate between UTF-8, 
** UTF-16, UTF-16BE, and UTF-16LE.
**
** $Id: utf.c,v 1.21 2004/06/18 04:24:55 danielk1977 Exp $
**
** Notes on UTF-8:
**
**   Byte-0    Byte-1    Byte-2    Byte-3    Value
**  0xxxxxxx                                 00000000 00000000 0xxxxxxx
**  110yyyyy  10xxxxxx                       00000000 00000yyy yyxxxxxx
**  1110zzzz  10yyyyyy  10xxxxxx             00000000 zzzzyyyy yyxxxxxx
44
45
46
47
48
49
50









51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
** When converting malformed UTF-8 strings to UTF-16, one instance of the
** replacement character U+FFFD for each byte that cannot be interpeted as
** part of a valid unicode character.
**
** When converting malformed UTF-16 strings to UTF-8, one instance of the
** replacement character U+FFFD for each pair of bytes that cannot be
** interpeted as part of a valid unicode character.









*/
#include <assert.h>
#include "sqliteInt.h"

typedef struct UtfString UtfString;
struct UtfString {
  unsigned char *pZ;    /* Raw string data */
  int n;                /* Allocated length of pZ in bytes */
  int c;                /* Number of pZ bytes already read or written */
};

/*
** These two macros are used to interpret the first two bytes of the 
** unsigned char array pZ as a 16-bit unsigned int. BE16() for a big-endian
** interpretation, LE16() for little-endian.
*/
#define BE16(pZ) (((u16)((pZ)[0])<<8) + (u16)((pZ)[1]))
#define LE16(pZ) (((u16)((pZ)[1])<<8) + (u16)((pZ)[0]))

/*
** READ_16 interprets the first two bytes of the unsigned char array pZ 
** as a 16-bit unsigned int. If big_endian is non-zero the intepretation
** is big-endian, otherwise little-endian.
*/
#define READ_16(pZ,big_endian) (big_endian?BE16(pZ):LE16(pZ))

/*
** The following macro, LOWERCASE(x), takes an integer representing a
** unicode code point. The value returned is the same code point folded to
** lower case, if applicable. SQLite currently understands the upper/lower
** case relationship between the 26 characters used in the English
** language only.







>
>
>
>
>
>
>
>
>



|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63





















64
65
66
67
68
69
70
** When converting malformed UTF-8 strings to UTF-16, one instance of the
** replacement character U+FFFD for each byte that cannot be interpeted as
** part of a valid unicode character.
**
** When converting malformed UTF-16 strings to UTF-8, one instance of the
** replacement character U+FFFD for each pair of bytes that cannot be
** interpeted as part of a valid unicode character.
**
** This file contains the following public routines:
**
** sqlite3VdbeMemTranslate() - Translate the encoding used by a Mem* string.
** sqlite3VdbeMemHandleBom() - Handle byte-order-marks in UTF16 Mem* strings.
** sqlite3utf16ByteLen()     - Calculate byte-length of a void* UTF16 string.
** sqlite3utf8CharLen()      - Calculate char-length of a char* UTF8 string.
** sqlite3utf8LikeCompare()  - Do a LIKE match given two UTF8 char* strings.
**
*/
#include <assert.h>
#include "sqliteInt.h"
#include "vdbeInt.h"






















/*
** The following macro, LOWERCASE(x), takes an integer representing a
** unicode code point. The value returned is the same code point folded to
** lower case, if applicable. SQLite currently understands the upper/lower
** case relationship between the 26 characters used in the English
** language only.
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115

116








117
118




119



120
121
122
123
124
125
126
127

128
129
130

131
132
133
134
135

136
137
138
139
140
141
142
143





144




145
146
147
148
149
150
151

152
153
154
155
156
157
158
159
160
161
162
163
164
165


166
167
168
169
170
171
172
173
174
175

176
177
178
179
180
181
182
183
184

185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209


210
211
212
213
214
215
216
217
218
219

220
221



222
223
224
225
226
227
228
229


230
231
232
233



234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251

252
253
254
255
256
257

258




259
260
261




262

263

264

265
266
267
268
269
270
271
272



273
274
275
276

277
278
279
280
281


282


283


284
285
286
287
288
289
290
291
292


293
294

295

296






297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318



319
320
321
322
323
324

325
326
327


328
329
330
331
332
333
334
335
336

337
338

339
340
341
342
343


344

345





346
347

348
349
350
351
352
353
354
355
356
357
358
359
360

361

362
363

364

365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384


385
386
387
388
389



390
391
392
393
394
395
396
397






398
399
400
401


402
403


404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425

426
427


428
429

430
431




432
433
434
435
436
437
438
439
440
441



442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462


463

464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766















































     36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
     54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 97, 98, 99,100,101,102,103,
    104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,
    122,
};

/*
** The first parameter, zStr, points at a unicode string. This routine
** reads a single character from the string and returns the codepoint value
** of the character read.
**
** The value of *pEnc is the string encoding. If *pEnc is SQLITE_UTF16LE or
** SQLITE_UTF16BE, and the first character read is a byte-order-mark, then
** the value of *pEnc is modified if necessary. In this case the next
** character is read and it's code-point value returned.
**
** The value of *pOffset is the byte-offset in zStr from which to begin
** reading. It is incremented by the number of bytes read by this function.
**
** If the fourth parameter, fold, is non-zero, then codepoint values are
** folded to lower-case before being returned. See comments for macro
** LOWERCASE(x) for details.
*/
int sqlite3ReadUniChar(const char *zStr, int *pOffset, u8 *pEnc, int fold){

  int ret = 0;









  switch( *pEnc ){




    case SQLITE_UTF8: {




#if 0
  static const int initVal[] = {
      0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
     15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
     30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
     45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
     60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,

     75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
     90,  91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104,
    105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,

    120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
    135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
    150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
    165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
    180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,   0,   1,   2,

      3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,
     18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,   0,
      1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,
      0,   1,   2,   3,   4,   5,   6,   7,   0,   1,   2,   3,   0,   1, 254,
    255,
  };
  ret = initVal[(unsigned char)zStr[(*pOffset)++]];
  while( (0xc0&zStr[*pOffset])==0x80 ){





    ret = (ret<<6) | (0x3f&(zStr[(*pOffset)++]));




  }
#endif

      struct Utf8TblRow {
        u8 b1_mask;
        u8 b1_masked_val;
        u8 b1_value_mask;

        int trailing_bytes;
      };
      static const struct Utf8TblRow utf8tbl[] = {
        { 0x80, 0x00, 0x7F, 0 },
        { 0xE0, 0xC0, 0x1F, 1 },
        { 0xF0, 0xE0, 0x0F, 2 },
        { 0xF8, 0xF0, 0x0E, 3 },
        { 0, 0, 0, 0}
      };
    
      u8 b1;   /* First byte of the potentially multi-byte utf-8 character */
      int ii;
      struct Utf8TblRow const *pRow;
    


      pRow = &(utf8tbl[0]);
    
      b1 = zStr[(*pOffset)++];
      while( pRow->b1_mask && (b1&pRow->b1_mask)!=pRow->b1_masked_val ){
        pRow++;
      }
      if( !pRow->b1_mask ){
        return (int)0xFFFD;
      }
      

      ret = (u32)(b1&pRow->b1_value_mask);
      for( ii=0; ii<pRow->trailing_bytes; ii++ ){
        u8 b = zStr[(*pOffset)++];
        if( (b&0xC0)!=0x80 ){
          return (int)0xFFFD;
        }
        ret = (ret<<6) + (u32)(b&0x3F);
      }
      break;

    }

    case SQLITE_UTF16LE:
    case SQLITE_UTF16BE: {
      u32 code_point;   /* the first code-point in the character */
      u32 code_point2;  /* the second code-point in the character, if any */
    
      code_point = READ_16(&zStr[*pOffset], (*pEnc==SQLITE_UTF16BE));
      *pOffset += 2;
    
      /* If this is a non-surrogate code-point, just cast it to an int and
      ** this is the code-point value.
      */
      if( code_point<0xD800 || code_point>0xE000 ){
        ret = code_point;
        break;
      }

      /* If this is a trailing surrogate code-point, then the string is
      ** malformed; return the replacement character.
      */
      if( code_point>0xDBFF ){
        return (int)0xFFFD;
      }
    


      /* The code-point just read is a leading surrogate code-point. If their
      ** is not enough data left or the next code-point is not a trailing
      ** surrogate, return the replacement character.
      */
      code_point2 = READ_16(&zStr[*pOffset], (*pEnc==SQLITE_UTF16BE));
      *pOffset += 2;
      if( code_point2<0xDC00 || code_point>0xDFFF ){
        return (int)0xFFFD;
      }
   

      ret = ( 
          (((code_point&0x03C0)+0x0040)<<16) +   /* uuuuu */



          ((code_point&0x003F)<<10) +            /* xxxxxx */
          (code_point2&0x03FF)                   /* yy yyyyyyyy */
      );
    }
    default:
      assert(0);
  }



  if( fold ){
    return LOWERCASE(ret);
  }
  return ret;



}

/*
** Read the BOM from the start of *pStr, if one is present. Return zero
** for little-endian, non-zero for big-endian. If no BOM is present, return
** the value of the parameter "big_endian".
**
** Return values:
**     1 -> big-endian string
**     0 -> little-endian string
*/
static int readUtf16Bom(UtfString *pStr, int big_endian){
  /* The BOM must be the first thing read from the string */
  assert( pStr->c==0 );

  /* If the string data consists of 1 byte or less, the BOM will make no
  ** difference anyway. In this case just fall through to the default case
  ** and return the native byte-order for this machine.

  **
  ** Otherwise, check the first 2 bytes of the string to see if a BOM is
  ** present.
  */
  if( pStr->n>1 ){
    u8 bom = sqlite3UtfReadBom(pStr->pZ, 2);

    if( bom ){




      pStr->c += 2;
      return (bom==SQLITE_UTF16LE)?0:1;
    }




  }



  return big_endian;

}

/*
** zData is a UTF-16 encoded string, nData bytes in length. This routine
** checks if there is a byte-order mark at the start of zData. If no
** byte order mark is found 0 is returned. Otherwise SQLITE_UTF16BE or
** SQLITE_UTF16LE is returned, depending on whether The BOM indicates that
** the text is big-endian or little-endian.



*/
u8 sqlite3UtfReadBom(const void *zData, int nData){
  if( nData<0 || nData>1 ){
    u8 b1 = *(u8 *)zData;

    u8 b2 = *(((u8 *)zData) + 1);
    if( b1==0xFE && b2==0xFF ){
      return SQLITE_UTF16BE;
    }
    if( b1==0xFF && b2==0xFE ){


      return SQLITE_UTF16LE;


    }


  }
  return 0;
}


/*
** Read a single unicode character from the UTF-8 encoded string *pStr. The
** value returned is a unicode scalar value. In the case of malformed
** strings, the unicode replacement character U+FFFD may be returned.


*/
static u32 readUtf8(UtfString *pStr){

  u8 enc = SQLITE_UTF8;

  return sqlite3ReadUniChar(pStr->pZ, &pStr->c, &enc, 0);






}

/*
** Write the unicode character 'code' to the string pStr using UTF-8
** encoding. SQLITE_NOMEM may be returned if sqlite3Malloc() fails.
*/
static int writeUtf8(UtfString *pStr, u32 code){
  struct Utf8WriteTblRow {
    u32 max_code;
    int trailing_bytes;
    u8 b1_and_mask;
    u8 b1_or_mask;
  };
  static const struct Utf8WriteTblRow utf8tbl[] = {
    {0x0000007F, 0, 0x7F, 0x00},
    {0x000007FF, 1, 0xDF, 0xC0},
    {0x0000FFFF, 2, 0xEF, 0xE0},
    {0x0010FFFF, 3, 0xF7, 0xF0},
    {0x00000000, 0, 0x00, 0x00}
  };
  const struct Utf8WriteTblRow *pRow = &utf8tbl[0];




  while( code>pRow->max_code ){
    assert( pRow->max_code );
    pRow++;
  }

  /* Ensure there is enough room left in the output buffer to write

  ** this UTF-8 character. 
  */
  assert( (pStr->n-pStr->c)>=(pRow->trailing_bytes+1) );



  /* Write the UTF-8 encoded character to pStr. All cases below are
  ** intentionally fall-through.
  */
  switch( pRow->trailing_bytes ){
    case 3:
      pStr->pZ[pStr->c+3] = (((u8)code)&0x3F)|0x80;
      code = code>>6;
    case 2:

      pStr->pZ[pStr->c+2] = (((u8)code)&0x3F)|0x80;
      code = code>>6;

    case 1:
      pStr->pZ[pStr->c+1] = (((u8)code)&0x3F)|0x80;
      code = code>>6;
    case 0:
      pStr->pZ[pStr->c] = (((u8)code)&(pRow->b1_and_mask))|(pRow->b1_or_mask);


  }

  pStr->c += (pRow->trailing_bytes + 1);






  return 0;

}

/*
** Read a single unicode character from the UTF-16 encoded string *pStr. The
** value returned is a unicode scalar value. In the case of malformed
** strings, the unicode replacement character U+FFFD may be returned.
**
** If big_endian is true, the string is assumed to be UTF-16BE encoded.
** Otherwise, it is UTF-16LE encoded.
*/
static u32 readUtf16(UtfString *pStr, int big_endian){
  u32 code_point;   /* the first code-point in the character */


  /* If there is only one byte of data left in the string, return the 

  ** replacement character.
  */

  if( (pStr->n-pStr->c)==1 ){

    pStr->c++;
    return (int)0xFFFD;
  }

  code_point = READ_16(&(pStr->pZ[pStr->c]), big_endian);
  pStr->c += 2;

  /* If this is a non-surrogate code-point, just cast it to an int and
  ** return the code-point value.
  */
  if( code_point<0xD800 || code_point>0xE000 ){
    return code_point;
  }

  /* If this is a trailing surrogate code-point, then the string is
  ** malformed; return the replacement character.
  */
  if( code_point>0xDBFF ){
    return 0xFFFD;
  }



  /* The code-point just read is a leading surrogate code-point. If their
  ** is not enough data left or the next code-point is not a trailing
  ** surrogate, return the replacement character.
  */



  if( (pStr->n-pStr->c)>1 ){
    u32 code_point2 = READ_16(&pStr->pZ[pStr->c], big_endian);
    if( code_point2<0xDC00 || code_point>0xDFFF ){
      return 0xFFFD;
    }
    pStr->c += 2;

    return ( 






        (((code_point&0x03C0)+0x0040)<<16) +   /* uuuuu */
        ((code_point&0x003F)<<10) +            /* xxxxxx */
        (code_point2&0x03FF)                   /* yy yyyyyyyy */
    );



  }else{


    return (int)0xFFFD;
  }
  
  /* not reached */
}

static int writeUtf16(UtfString *pStr, int code, int big_endian){
  int bytes;
  unsigned char *hi_byte;
  unsigned char *lo_byte;

  bytes = (code>0x0000FFFF?4:2);

  /* Ensure there is enough room left in the output buffer to write
  ** this UTF-8 character.
  */
  assert( (pStr->n-pStr->c)>=bytes );
  
  /* Initialise hi_byte and lo_byte to point at the locations into which
  ** the MSB and LSB of the (first) 16-bit unicode code-point written for
  ** this character.
  */

  hi_byte = (big_endian?&pStr->pZ[pStr->c]:&pStr->pZ[pStr->c+1]);
  lo_byte = (big_endian?&pStr->pZ[pStr->c+1]:&pStr->pZ[pStr->c]);



  if( bytes==2 ){

    *hi_byte = (u8)((code&0x0000FF00)>>8);
    *lo_byte = (u8)(code&0x000000FF);




  }else{
    u32 wrd;
    wrd = ((((code&0x001F0000)-0x00010000)+(code&0x0000FC00))>>10)|0x0000D800;
    *hi_byte = (u8)((wrd&0x0000FF00)>>8);
    *lo_byte = (u8)(wrd&0x000000FF);

    wrd = (code&0x000003FF)|0x0000DC00;
    *(hi_byte+2) = (u8)((wrd&0x0000FF00)>>8);
    *(lo_byte+2) = (u8)(wrd&0x000000FF);
  }




  pStr->c += bytes;
  
  return 0;
}

/*
** pZ is a UTF-8 encoded unicode string. If nByte is less than zero,
** return the number of unicode characters in pZ up to (but not including)
** the first 0x00 byte. If nByte is not less than zero, return the
** number of unicode characters in the first nByte of pZ (or up to 
** the first 0x00, whichever comes first).
*/
int sqlite3utf8CharLen(const char *pZ, int nByte){
  UtfString str;
  int ret = 0;
  u32 code = 1;

  str.pZ = (char *)pZ;
  str.n = nByte;
  str.c = 0;




  while( (nByte<0 || str.c<str.n) && code!=0 ){
    code = readUtf8(&str);
    ret++;
  }
  if( code==0 ) ret--;

  return ret;
}

/*
** pZ is a UTF-16 encoded unicode string. If nChar is less than zero,
** return the number of bytes up to (but not including), the first pair
** of consecutive 0x00 bytes in pZ. If nChar is not less than zero,
** then return the number of bytes in the first nChar unicode characters
** in pZ (or up until the first pair of 0x00 bytes, whichever comes first).
*/
int sqlite3utf16ByteLen(const void *pZ, int nChar){
  if( nChar<0 ){
    const unsigned char *pC1 = (unsigned char *)pZ;
    const unsigned char *pC2 = (unsigned char *)pZ+1;
    while( *pC1 || *pC2 ){
      pC1 += 2;
      pC2 += 2;
    }
    return pC1-(unsigned char *)pZ;
  }else{
    UtfString str;
    u32 code = 1;
    int big_endian;
    int nRead = 0;
    int ret;

    str.pZ = (char *)pZ;
    str.c = 0;
    str.n = -1;

    /* Check for a BOM. We just ignore it if there is one, it's only read
    ** so that it is not counted as a character. 
    */
    big_endian = readUtf16Bom(&str, 0);
    ret = 0-str.c;

    while( code!=0 && nRead<nChar ){
      code = readUtf16(&str, big_endian);
      nRead++;
    }
    if( code==0 ){
      ret -= 2;
    }
    return str.c + ret;
  }
}

/*
** Convert a string in UTF-16 native byte (or with a Byte-order-mark or
** "BOM") into a UTF-8 string.  The UTF-8 string is written into space 
** obtained from sqlite3Malloc() and must be released by the calling function.
**
** The parameter N is the number of bytes in the UTF-16 string.  If N is
** negative, the entire string up to the first \u0000 character is translated.
**
** The returned UTF-8 string is always \000 terminated.
*/
unsigned char *sqlite3utf16to8(const void *pData, int N, int big_endian){
  UtfString in;
  UtfString out;

  out.pZ = 0;

  in.pZ = (unsigned char *)pData;
  in.n = N;
  in.c = 0;

  if( in.n<0 ){
    in.n = sqlite3utf16ByteLen(in.pZ, -1);
  }

  /* A UTF-8 encoding of a unicode string can require at most 1.5 times as
  ** much space to store as the same string encoded using UTF-16. Allocate
  ** this now.
  */
  out.n = (in.n*1.5) + 1;
  out.pZ = sqliteMalloc(out.n);
  if( !out.pZ ){
    return 0;
  }
  out.c = 0;

  big_endian = readUtf16Bom(&in, big_endian);
  while( in.c<in.n ){
    writeUtf8(&out, readUtf16(&in, big_endian));
  }

  /* Add the NULL-terminator character */
  assert( out.c<out.n );
  out.pZ[out.c] = 0x00;

  return out.pZ;
}

static void *utf8toUtf16(const unsigned char *pIn, int N, int big_endian){
  UtfString in;
  UtfString out;

  in.pZ = (unsigned char *)pIn;
  in.n = N;
  in.c = 0;

  if( in.n<0 ){
    in.n = strlen(in.pZ);
  }

  /* A UTF-16 encoding of a unicode string can require at most twice as
  ** much space to store as the same string encoded using UTF-8. Allocate
  ** this now.
  */
  out.n = (in.n*2) + 2;
  out.pZ = sqliteMalloc(out.n);
  if( !out.pZ ){
    return 0;
  }
  out.c = 0;

  while( in.c<in.n ){
    writeUtf16(&out, readUtf8(&in), big_endian);
  }

  /* Add the NULL-terminator character */
  assert( (out.c+1)<out.n );
  out.pZ[out.c] = 0x00;
  out.pZ[out.c+1] = 0x00;

  return out.pZ;
}

/*
** Translate UTF-8 to UTF-16BE or UTF-16LE
*/
void *sqlite3utf8to16be(const unsigned char *pIn, int N){
  return utf8toUtf16(pIn, N, 1);
}

void *sqlite3utf8to16le(const unsigned char *pIn, int N){
  return utf8toUtf16(pIn, N, 0);
}

/* 
** This routine does the work for sqlite3utf16to16le() and
** sqlite3utf16to16be(). If big_endian is 1 the input string is
** transformed in place to UTF-16BE encoding. If big_endian is 0 then
** the input is transformed to UTF-16LE.
**
** Unless the first two bytes of the input string is a BOM, the input is
** assumed to be UTF-16 encoded using the machines native byte ordering.
*/
static void utf16to16(void *pData, int N, int big_endian){
  UtfString inout;
  inout.pZ = (unsigned char *)pData;
  inout.c = 0;
  inout.n = N;

  if( inout.n<0 ){
    inout.n = sqlite3utf16ByteLen(inout.pZ, -1);
  }

  if( readUtf16Bom(&inout, SQLITE_BIGENDIAN)!=big_endian ){
    /* swab(&inout.pZ[inout.c], inout.pZ, inout.n-inout.c); */
    int i;
    for(i=0; i<(inout.n-inout.c); i += 2){
      char c1 = inout.pZ[i+inout.c];
      char c2 = inout.pZ[i+inout.c+1];
      inout.pZ[i] = c2;
      inout.pZ[i+1] = c1;
    }
  }else if( inout.c ){
    memmove(inout.pZ, &inout.pZ[inout.c], inout.n-inout.c);
  }

  inout.pZ[inout.n-inout.c] = 0x00;
  inout.pZ[inout.n-inout.c+1] = 0x00;
}

/*
** Convert a string in UTF-16 native byte or with a BOM into a UTF-16LE
** string.  The conversion occurs in-place.  The output overwrites the
** input.  N bytes are converted.  If N is negative everything is converted
** up to the first \u0000 character.
**
** If the native byte order is little-endian and there is no BOM, then
** this routine is a no-op.  If there is a BOM at the start of the string,
** it is removed.
**
** Translation from UTF-16LE to UTF-16BE and back again is accomplished
** using the library function swab().
*/
void sqlite3utf16to16le(void *pData, int N){
  utf16to16(pData, N, 0);
}

/*
** Convert a string in UTF-16 native byte or with a BOM into a UTF-16BE
** string.  The conversion occurs in-place.  The output overwrites the
** input.  N bytes are converted.  If N is negative everything is converted
** up to the first \u0000 character.
**
** If the native byte order is little-endian and there is no BOM, then
** this routine is a no-op.  If there is a BOM at the start of the string,
** it is removed.
**
** Translation from UTF-16LE to UTF-16BE and back again is accomplished
** using the library function swab().
*/
void sqlite3utf16to16be(void *pData, int N){
  utf16to16(pData, N, 1);
}

/*
** This function is used to translate between UTF-8 and UTF-16. The
** result is returned in dynamically allocated memory.
*/
int sqlite3utfTranslate(
  const void *zData, int nData,  /* Input string */
  u8 enc1,                       /* Encoding of zData */
  void **zOut, int *nOut,        /* Output string */
  u8 enc2                        /* Desired encoding of output */
){
  assert( enc1==SQLITE_UTF8 || enc1==SQLITE_UTF16LE || enc1==SQLITE_UTF16BE );
  assert( enc2==SQLITE_UTF8 || enc2==SQLITE_UTF16LE || enc2==SQLITE_UTF16BE );
  assert( 
    (enc1==SQLITE_UTF8 && (enc2==SQLITE_UTF16LE || enc2==SQLITE_UTF16BE)) ||
    (enc2==SQLITE_UTF8 && (enc1==SQLITE_UTF16LE || enc1==SQLITE_UTF16BE))
  );

  if( enc1==SQLITE_UTF8 ){
    if( enc2==SQLITE_UTF16LE ){
      *zOut = sqlite3utf8to16le(zData, nData);
    }else{
      *zOut = sqlite3utf8to16be(zData, nData);
    }
    if( !(*zOut) ) return SQLITE_NOMEM;
    *nOut = sqlite3utf16ByteLen(*zOut, -1);
  }else{
    *zOut = sqlite3utf16to8(zData, nData, enc1==SQLITE_UTF16BE);
    if( !(*zOut) ) return SQLITE_NOMEM;
    *nOut = strlen(*zOut);
  }
  return SQLITE_OK;
}

#define sqliteNextChar(X)  while( (0xc0&*++(X))==0x80 ){}

/*
** Compare two UTF-8 strings for equality using the "LIKE" operator of
** SQL.  The '%' character matches any sequence of 0 or more
** characters and '_' matches any single character.  Case is
** not significant.
*/
int sqlite3utf8LikeCompare(
  const unsigned char *zPattern, 
  const unsigned char *zString
){
  register int c;
  int c2;

  while( (c = LOWERCASE(*zPattern))!=0 ){
    switch( c ){
      case '%': {
        while( (c=zPattern[1]) == '%' || c == '_' ){
          if( c=='_' ){
            if( *zString==0 ) return 0;
            sqliteNextChar(zString);
          }
          zPattern++;
        }
        if( c==0 ) return 1;
        c = LOWERCASE(c);
        while( (c2=LOWERCASE(*zString))!=0 ){
          while( c2 != 0 && c2 != c ){ 
            zString++;
            c2 = LOWERCASE(*zString); 
          }
          if( c2==0 ) return 0;
          if( sqlite3utf8LikeCompare(&zPattern[1],zString) ) return 1;
          sqliteNextChar(zString);
        }
        return 0;
      }
      case '_': {
        if( *zString==0 ) return 0;
        sqliteNextChar(zString);
        zPattern++;
        break;
      }
      default: {
        if( c != LOWERCASE(*zString) ) return 0;
        zPattern++;
        zString++;
        break;
      }
    }
  }
  return *zString==0;
}






















































<
|
|
<
<
<
<
|
<
<
<
<
<
<
<

<
>
|
>
>
>
>
>
>
>
>

|
>
>
>
>
|
>
>
>

|
<
|
|
<
|
|
>
|
<
<
>
|
|
|
|
<
>
|
|
|
|
|
|
|
|
>
>
>
>
>
|
>
>
>
>
|
<

<
<
<
<
>
|
<
<
<
<
<
<
<
<
|
<
<
<
|
>
>
|
|
|
|
|
|
|
|
<
|
>
|
|
|
|
<
<
|
<
<
>
|

|
|
|
|
|
|
|
|
|
|
<
<
<
<
|
<
<
<
<
<
<
|
|
>
>
|
|
|
|
|
|
|
<
|
|
>
|
|
>
>
>
|
|
<
|
<
<
|
|
>
>
|
<
<
|
>
>
>



<
<
|
<
|
<
<
|
<
<
<
|
<
|
<
>
|
|
|
|
<
|
>
|
>
>
>
>
|
<
|
>
>
>
>
|
>
|
>
|
>
|
|
<
<
<
<
<
|
>
>
>
|
<
|
|
>
|
<
<
<
|
>
>
|
>
>

>
>

<
|
|
|
|
<
<
|
>
>
|
|
>
|
>
|
>
>
>
>
>
>
|
|
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<

>
>
>
|
|
|
|
|
|
>
|
<
|
>
>
|
<
<
<
<
<
<
<
<
>
<
|
>
|
|
|
|
|
>
>
|
>
|
>
>
>
>
>
|
|
>
|
|
<
<
<
<
<
<
<
<
<
<

>
|
>
|
<
>
|
>
|
<
<
|
|
<
|
<
<
<
<
<

|
<
<
<
<
<
|
>
>
|
<
<
|
<
>
>
>
|
<
<
|
|
<

<
>
>
>
>
>
>
|
|
|
|
>
>

|
>
>
|
<
|
<
|
|
<
<
<
<
|
<
|
<
<
<
<
|
<
<
|
<
>
|
|
>
>
|
|
>
|
|
>
>
>
>
|
<
<
<
<
|
<
<
<
|
>
>
>
|
<
|
|









|
<
|
<
|
<
|
|
>
>
|
>
|
|
|

<
<
|









|
<
<
<
<
<
<
<
<
<
<
<
<
<
|
|
<
|
<
|
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
|
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<




















|












|





|













>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
80
81
82
83
84
85
86

87
88




89







90

91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112

113
114

115
116
117
118


119
120
121
122
123

124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143

144




145
146








147



148
149
150
151
152
153
154
155
156
157
158

159
160
161
162
163
164


165


166
167
168
169
170
171
172
173
174
175
176
177
178




179






180
181
182
183
184
185
186
187
188
189
190

191
192
193
194
195
196
197
198
199
200

201


202
203
204
205
206


207
208
209
210
211
212
213


214

215


216



217

218

219
220
221
222
223

224
225
226
227
228
229
230
231

232
233
234
235
236
237
238
239
240
241
242
243
244





245
246
247
248
249

250
251
252
253



254
255
256
257
258
259
260
261
262
263

264
265
266
267


268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284











285







286
287
288
289
290
291
292
293
294
295
296
297

298
299
300
301








302

303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324










325
326
327
328
329

330
331
332
333


334
335

336





337
338





339
340
341
342


343

344
345
346
347


348
349

350

351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367

368

369
370




371

372




373


374

375
376
377
378
379
380
381
382
383
384
385
386
387
388
389




390



391
392
393
394
395

396
397
398
399
400
401
402
403
404
405
406
407

408

409

410
411
412
413
414
415
416
417
418
419


420
421
422
423
424
425
426
427
428
429
430













431
432

433

434





435










436













437


































































438








































439


440



441
















442















443
444
















445





446











447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
     36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
     54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 97, 98, 99,100,101,102,103,
    104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,
    122,
};

/*

** This table maps from the first byte of a UTF-8 character to the number
** of trailing bytes expected. A value '255' indicates that the table key




** is not a legal first byte for a UTF-8 character.







*/

static const u8 xtra_utf8_bytes[256]  = {
/* 0xxxxxxx */
0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,

/* 10wwwwww */
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,

/* 110yyyyy */
1, 1, 1, 1, 1, 1, 1, 1,     1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,     1, 1, 1, 1, 1, 1, 1, 1,

/* 1110zzzz */

2, 2, 2, 2, 2, 2, 2, 2,     2, 2, 2, 2, 2, 2, 2, 2,


/* 11110yyy */
3, 3, 3, 3, 3, 3, 3, 3,     255, 255, 255, 255, 255, 255, 255, 255,
};



/*
** This table maps from the number of trailing bytes in a UTF-8 character
** to an integer constant that is effectively calculated for each character
** read by a naive implementation of a UTF-8 character reader. The code
** in the READ_UTF8 macro explains things best.

*/
static const int xtra_utf8_bits[4] =  {
0,
12416,          /* (0xC0 << 6) + (0x80) */
925824,         /* (0xE0 << 12) + (0x80 << 6) + (0x80) */
63447168        /* (0xF0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
};

#define READ_UTF8(zIn, c) { \
  int xtra;                                            \
  c = *(zIn)++;                                        \
  xtra = xtra_utf8_bytes[c];                           \
  switch( xtra ){                                      \
    case 255: c = (int)0xFFFD; break;                  \
    case 3: c = (c<<6) + *(zIn)++;                     \
    case 2: c = (c<<6) + *(zIn)++;                     \
    case 1: c = (c<<6) + *(zIn)++;                     \
    c -= xtra_utf8_bits[xtra];                         \
  }                                                    \
}






#define SKIP_UTF8(zIn) {                               \
  zIn += (xtra_utf8_bytes[*(u8 *)zIn] + 1);            \








}




#define WRITE_UTF8(zOut, c) {                          \
  if( c<0x00080 ){                                     \
    *zOut++ = (c&0xFF);                                \
  }                                                    \
  else if( c<0x00800 ){                                \
    *zOut++ = 0xC0 + ((c>>6)&0x1F);                    \
    *zOut++ = 0x80 + (c & 0x3F);                       \
  }                                                    \
  else if( c<0x10000 ){                                \
    *zOut++ = 0xE0 + ((c>>12)&0x0F);                   \

    *zOut++ = 0x80 + ((c>>6) & 0x3F);                  \
    *zOut++ = 0x80 + (c & 0x3F);                       \
  }else{                                               \
    *zOut++ = 0xF0 + ((c>>18) & 0x07);                 \
    *zOut++ = 0x80 + ((c>>12) & 0x3F);                 \
    *zOut++ = 0x80 + ((c>>6) & 0x3F);                  \


    *zOut++ = 0x80 + (c & 0x3F);                       \


  }                                                    \
}

#define WRITE_UTF16LE(zOut, c) {                                \
  if( c<=0xFFFF ){                                              \
    *zOut++ = (c&0x00FF);                                       \
    *zOut++ = ((c>>8)&0x00FF);                                  \
  }else{                                                        \
    *zOut++ = (((c>>10)&0x003F) + (((c-0x10000)>>10)&0x00C0));  \
    *zOut++ = (0x00D8 + (((c-0x10000)>>18)&0x03));              \
    *zOut++ = (c&0x00FF);                                       \
    *zOut++ = (0x00DC + ((c>>8)&0x03));                         \
  }                                                             \




}







#define WRITE_UTF16BE(zOut, c) {                                \
  if( c<=0xFFFF ){                                              \
    *zOut++ = ((c>>8)&0x00FF);                                  \
    *zOut++ = (c&0x00FF);                                       \
  }else{                                                        \
    *zOut++ = (0x00D8 + (((c-0x10000)>>18)&0x03));              \
    *zOut++ = (((c>>10)&0x003F) + (((c-0x10000)>>10)&0x00C0));  \
    *zOut++ = (0x00DC + ((c>>8)&0x03));                         \
    *zOut++ = (c&0x00FF);                                       \
  }                                                             \

}

#define READ_UTF16LE(zIn, c){                                         \
  c = (*zIn++);                                                       \
  c += ((*zIn++)<<8);                                                 \
  if( c>=0xD800 && c<=0xE000 ){                                       \
    int c2 = (*zIn++);                                                \
    c2 += ((*zIn++)<<8);                                              \
    c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10);   \
  }                                                                   \

}



#define READ_UTF16BE(zIn, c){                                         \
  c = ((*zIn++)<<8);                                                  \
  c += (*zIn++);                                                      \
  if( c>=0xD800 && c<=0xE000 ){                                       \


    int c2 = ((*zIn++)<<8);                                           \
    c2 += (*zIn++);                                                   \
    c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10);   \
  }                                                                   \
}

/*


** If the TRANSLATE_TRACE macro is defined, the value of each Mem is

** printed on stderr on the way into and out of sqlite3VdbeMemTranslate().


*/ 



/* #define TRANSLATE_TRACE 1 */



/*
** This routine transforms the internal text encoding used by pMem to
** desiredEnc. It is an error if the string is already of the desired
** encoding, or if *pMem does not contain a string value.
*/

int sqlite3VdbeMemTranslate(Mem *pMem, u8 desiredEnc){
  unsigned char zShort[NBFS]; /* Temporary short output buffer */
  int len;                    /* Maximum length of output string in bytes */
  unsigned char *zOut;                  /* Output buffer */
  unsigned char *zIn;                   /* Input iterator */
  unsigned char *zTerm;                 /* End of input */
  unsigned char *z;                     /* Output iterator */
  int c;


  assert( pMem->flags&MEM_Str );
  assert( pMem->enc!=desiredEnc );
  assert( pMem->enc!=0 );
  assert( pMem->n>=0 );

#ifdef TRANSLATE_TRACE
  {
    char zBuf[100];
    sqlite3VdbeMemPrettyPrint(pMem, zBuf, 100);
    fprintf(stderr, "INPUT:  %s\n", zBuf);
  }
#endif






  /* If the translation is between UTF-16 little and big endian, then 
  ** all that is required is to swap the byte order. This case is handled
  ** differently from the others.
  */

  if( pMem->enc!=SQLITE_UTF8 && desiredEnc!=SQLITE_UTF8 ){
    u8 temp;
    sqlite3VdbeMemMakeWriteable(pMem);
    zIn = pMem->z;



    zTerm = &zIn[pMem->n];
    while( zIn<zTerm ){
      temp = *zIn;
      *zIn = *(zIn+1);
      zIn++;
      *zIn++ = temp;
    }
    pMem->enc = desiredEnc;
    goto translate_out;
  }


  /* Set zIn to point at the start of the input buffer and zTerm to point 1
  ** byte past the end.
  **


  ** Variable zOut is set to point at the output buffer. This may be space
  ** obtained from malloc(), or Mem.zShort, if it large enough and not in
  ** use, or the zShort array on the stack (see above).
  */
  zIn = pMem->z;
  zTerm = &zIn[pMem->n];
  len = pMem->n*2 + 2;
  if( len>NBFS ){
    zOut = sqliteMallocRaw(len);
    if( !zOut ) return SQLITE_NOMEM;
  }else{
    if( pMem->z==pMem->zShort ){
      zOut = zShort;
    }else{
      zOut = pMem->zShort;
    }
  }











  z = zOut;








  if( pMem->enc==SQLITE_UTF8 ){
    if( desiredEnc==SQLITE_UTF16LE ){
      /* UTF-8 -> UTF-16 Little-endian */
      while( zIn<zTerm ){
        READ_UTF8(zIn, c); 
        WRITE_UTF16LE(z, c);
      }
      WRITE_UTF16LE(z, 0);
      pMem->n = (z-zOut)-2;
    }else if( desiredEnc==SQLITE_UTF16BE ){
      /* UTF-8 -> UTF-16 Big-endian */

      while( zIn<zTerm ){
        READ_UTF8(zIn, c); 
        WRITE_UTF16BE(z, c);
      }








      WRITE_UTF16BE(z, 0);

      pMem->n = (z-zOut)-2;
    }
  }else{
    assert( desiredEnc==SQLITE_UTF8 );
    if( pMem->enc==SQLITE_UTF16LE ){
      /* UTF-16 Little-endian -> UTF-8 */
      while( zIn<zTerm ){
        READ_UTF16LE(zIn, c); 
        WRITE_UTF8(z, c);
      }
      WRITE_UTF8(z, 0);
      pMem->n = (z-zOut)-1;
    }else{
      /* UTF-16 Little-endian -> UTF-8 */
      while( zIn<zTerm ){
        READ_UTF16BE(zIn, c); 
        WRITE_UTF8(z, c);
      }
      WRITE_UTF8(z, 0);
      pMem->n = (z-zOut)-1;
    }
  }











  sqlite3VdbeMemRelease(pMem);
  pMem->flags &= ~(MEM_Static|MEM_Dyn|MEM_Ephem|MEM_Short);
  pMem->enc = desiredEnc;
  if( (char *)zOut==pMem->zShort ){

    pMem->flags |= (MEM_Term|MEM_Short);
  }else if( zOut==zShort ){
    memcpy(pMem->zShort, zOut, len);
    zOut = pMem->zShort;


    pMem->flags |= (MEM_Term|MEM_Short);
  }else{

    pMem->flags |= (MEM_Term|MEM_Dyn);





  }
  pMem->z = zOut;






translate_out:
#ifdef TRANSLATE_TRACE
  {


    char zBuf[100];

    sqlite3VdbeMemPrettyPrint(pMem, zBuf, 100);
    fprintf(stderr, "OUTPUT: %s\n", zBuf);
  }
#endif


  return SQLITE_OK;
}



/*
** This routine checks for a byte-order mark at the beginning of the 
** UTF-16 string stored in *pMem. If one is present, it is removed and
** the encoding of the Mem adjusted. This routine does not do any
** byte-swapping, it just sets Mem.enc appropriately.
**
** The allocation (static, dynamic etc.) and encoding of the Mem may be
** changed by this function.
*/
int sqlite3VdbeMemHandleBom(Mem *pMem){
  int rc = SQLITE_OK;
  u8 bom = 0;

  if( pMem->n<0 || pMem->n>1 ){
    u8 b1 = *(u8 *)pMem->z;
    u8 b2 = *(((u8 *)pMem->z) + 1);
    if( b1==0xFE && b2==0xFF ){

      bom = SQLITE_UTF16BE;

    }
    if( b1==0xFF && b2==0xFE ){




      bom = SQLITE_UTF16LE;

    }




  }


  

  if( bom ){
    if( pMem->flags & MEM_Short ){
      memmove(pMem->zShort, &pMem->zShort[2], NBFS-2);
      pMem->n -= 2;
      pMem->enc = bom;
    }
    else if( pMem->flags & MEM_Dyn ){
      void (*xDel)(void*) = pMem->xDel;
      char *z = pMem->z;
      pMem->z = 0;
      pMem->xDel = 0;
      rc = sqlite3VdbeMemSetStr(pMem, &z[2], pMem->n-2, bom, SQLITE_TRANSIENT);
      if( xDel ){
        xDel(z);
      }else{




        sqliteFree(z);



      }
    }else{
      rc = sqlite3VdbeMemSetStr(pMem, &pMem->z[2], pMem->n-2, bom, 
          SQLITE_TRANSIENT);
    }

  }
  return rc;
}

/*
** pZ is a UTF-8 encoded unicode string. If nByte is less than zero,
** return the number of unicode characters in pZ up to (but not including)
** the first 0x00 byte. If nByte is not less than zero, return the
** number of unicode characters in the first nByte of pZ (or up to 
** the first 0x00, whichever comes first).
*/
int sqlite3utf8CharLen(const char *z, int nByte){

  int r = 0;

  const char *zTerm;

  if( nByte>0 ){
    zTerm = &z[nByte];
  }else{
    zTerm = (const char *)(-1);
  }
  assert( z<=zTerm );
  while( *z!=0 && z<zTerm ){
    SKIP_UTF8(z);
    r++;
  }


  return r;
}

/*
** pZ is a UTF-16 encoded unicode string. If nChar is less than zero,
** return the number of bytes up to (but not including), the first pair
** of consecutive 0x00 bytes in pZ. If nChar is not less than zero,
** then return the number of bytes in the first nChar unicode characters
** in pZ (or up until the first pair of 0x00 bytes, whichever comes first).
*/
int sqlite3utf16ByteLen(const void *zIn, int nChar){













  int c = 1;
  char const *z = zIn;

  int n = 0;

  if( SQLITE_UTF16NATIVE==SQLITE_UTF16BE ){





    while( c && ((nChar<0) || n<nChar) ){










      READ_UTF16BE(z, c);













      n++;


































































    }








































  }else{


    while( c && ((nChar<0) || n<nChar) ){



      READ_UTF16LE(z, c);
















      n++;















    }
  }
















  return (z-(char const *)zIn)-((c==0)?2:0);





}












/*
** Compare two UTF-8 strings for equality using the "LIKE" operator of
** SQL.  The '%' character matches any sequence of 0 or more
** characters and '_' matches any single character.  Case is
** not significant.
*/
int sqlite3utf8LikeCompare(
  const unsigned char *zPattern, 
  const unsigned char *zString
){
  register int c;
  int c2;

  while( (c = LOWERCASE(*zPattern))!=0 ){
    switch( c ){
      case '%': {
        while( (c=zPattern[1]) == '%' || c == '_' ){
          if( c=='_' ){
            if( *zString==0 ) return 0;
            SKIP_UTF8(zString);
          }
          zPattern++;
        }
        if( c==0 ) return 1;
        c = LOWERCASE(c);
        while( (c2=LOWERCASE(*zString))!=0 ){
          while( c2 != 0 && c2 != c ){ 
            zString++;
            c2 = LOWERCASE(*zString); 
          }
          if( c2==0 ) return 0;
          if( sqlite3utf8LikeCompare(&zPattern[1],zString) ) return 1;
          SKIP_UTF8(zString);
        }
        return 0;
      }
      case '_': {
        if( *zString==0 ) return 0;
        SKIP_UTF8(zString);
        zPattern++;
        break;
      }
      default: {
        if( c != LOWERCASE(*zString) ) return 0;
        zPattern++;
        zString++;
        break;
      }
    }
  }
  return *zString==0;
}

#ifndef NDEBUG
/*
** This routine is called from the TCL test function "translate_selftest".
** It checks that the primitives for serializing and deserializing
** characters in each encoding are inverses of each other.
*/
void sqlite3utfSelfTest(){
  int i;
  unsigned char zBuf[20];
  unsigned char *z;
  int n;
  int c;

  for(i=0; 0 && i<0x00110000; i++){
    z = zBuf;
    WRITE_UTF8(z, i);
    n = z-zBuf;
    z = zBuf;
    READ_UTF8(z, c);
    assert( c==i );
    assert( (z-zBuf)==n );
  }
  for(i=0; i<0x00110000; i++){
    if( i>=0xD800 && i<=0xE000 ) continue;
    z = zBuf;
    WRITE_UTF16LE(z, i);
    n = z-zBuf;
    z = zBuf;
    READ_UTF16LE(z, c);
    assert( c==i );
    assert( (z-zBuf)==n );
  }
  for(i=0; i<0x00110000; i++){
    if( i>=0xD800 && i<=0xE000 ) continue;
    z = zBuf;
    WRITE_UTF16BE(z, i);
    n = z-zBuf;
    z = zBuf;
    READ_UTF16BE(z, c);
    assert( c==i );
    assert( (z-zBuf)==n );
  }
}
#endif


Changes to src/util.c.
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
**
*************************************************************************
** Utility functions used throughout sqlite.
**
** This file contains functions for allocating memory, comparing
** strings, and stuff like that.
**
** $Id: util.c,v 1.102 2004/06/16 07:45:29 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include <stdarg.h>
#include <ctype.h>

#if SQLITE_DEBUG>2 && defined(__GLIBC__)
#include <execinfo.h>







|







10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
**
*************************************************************************
** Utility functions used throughout sqlite.
**
** This file contains functions for allocating memory, comparing
** strings, and stuff like that.
**
** $Id: util.c,v 1.103 2004/06/18 04:24:55 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include <stdarg.h>
#include <ctype.h>

#if SQLITE_DEBUG>2 && defined(__GLIBC__)
#include <execinfo.h>
252
253
254
255
256
257
258







259
260
261
262
263
264
265
  zNew = sqlite3Malloc_(n+1, 0, zFile, line);
  if( zNew ){
    memcpy(zNew, z, n);
    zNew[n] = 0;
  }
  return zNew;
}







#endif /* SQLITE_DEBUG */

/*
** The following versions of malloc() and free() are for use in a
** normal build.
*/
#if !defined(SQLITE_DEBUG)







>
>
>
>
>
>
>







252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
  zNew = sqlite3Malloc_(n+1, 0, zFile, line);
  if( zNew ){
    memcpy(zNew, z, n);
    zNew[n] = 0;
  }
  return zNew;
}

/*
** A version of sqliteFree that is always a function, not a macro.
*/
void sqlite3FreeX(void *p){
  sqliteFree(p);
}
#endif /* SQLITE_DEBUG */

/*
** The following versions of malloc() and free() are for use in a
** normal build.
*/
#if !defined(SQLITE_DEBUG)
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461

462
463
464
465




466
467
468
469
470
471
472
** encoded in UTF-8.
**
** To clear the most recent error for slqite handle "db", sqlite3Error
** should be called with err_code set to SQLITE_OK and zFormat set
** to NULL.
*/
void sqlite3Error(sqlite *db, int err_code, const char *zFormat, ...){
  /* Free any existing error message. */
  if( db->zErrMsg ){
    sqliteFree(db->zErrMsg);
    db->zErrMsg = 0;
  }
  if( db->zErrMsg16 ){
    sqliteFree(db->zErrMsg16);
    db->zErrMsg16 = 0;
  }

  /* Set the new error code and error message. */
  db->errCode = err_code;
  if( zFormat ){

    va_list ap;
    va_start(ap, zFormat);
    db->zErrMsg = sqlite3VMPrintf(zFormat, ap);
    va_end(ap);




  }
}

/*
** Add an error message to pParse->zErrMsg and increment pParse->nErr.
** The following formatting characters are allowed:
**







<
<
<
<
<
<
<
<
<
|
<
|
|
>
|
|
|
|
>
>
>
>







449
450
451
452
453
454
455









456

457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
** encoded in UTF-8.
**
** To clear the most recent error for slqite handle "db", sqlite3Error
** should be called with err_code set to SQLITE_OK and zFormat set
** to NULL.
*/
void sqlite3Error(sqlite *db, int err_code, const char *zFormat, ...){









  if( db && (db->pErr || (db->pErr = sqlite3ValueNew())) ){

    db->errCode = err_code;
    if( zFormat ){
      char *z;
      va_list ap;
      va_start(ap, zFormat);
      z = sqlite3VMPrintf(zFormat, ap);
      va_end(ap);
      sqlite3ValueSetStr(db->pErr, -1, z, SQLITE_UTF8, sqlite3FreeX);
    }else{
      sqlite3ValueSetStr(db->pErr, 0, 0, SQLITE_UTF8, SQLITE_STATIC);
    }
  }
}

/*
** Add an error message to pParse->zErrMsg and increment pParse->nErr.
** The following formatting characters are allowed:
**
Changes to src/vdbe.c.
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
**
** Various scripts scan this source file in order to generate HTML
** documentation, headers files, or other derived files.  The formatting
** of the code in this file is, therefore, important.  See other comments
** in this file for details.  If in doubt, do not deviate from existing
** commenting and indentation practices when changing or adding code.
**
** $Id: vdbe.c,v 1.378 2004/06/17 07:53:03 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "os.h"
#include <ctype.h>
#include "vdbeInt.h"

/*







|







39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
**
** Various scripts scan this source file in order to generate HTML
** documentation, headers files, or other derived files.  The formatting
** of the code in this file is, therefore, important.  See other comments
** in this file for details.  If in doubt, do not deviate from existing
** commenting and indentation practices when changing or adding code.
**
** $Id: vdbe.c,v 1.379 2004/06/18 04:24:55 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "os.h"
#include <ctype.h>
#include "vdbeInt.h"

/*
357
358
359
360
361
362
363
364
365
366


367
368
369
370
371
372
373
}

#ifndef NDEBUG
/*
** Write a nice string representation of the contents of cell pMem
** into buffer zBuf, length nBuf.
*/
void prettyPrintMem(Mem *pMem, char *zBuf, int nBuf){
  char *zCsr = zBuf;
  int f = pMem->flags;



  if( f&MEM_Blob ){
    int i;
    char c;
    if( f & MEM_Dyn ){
      c = 'z';
      assert( (f & (MEM_Static|MEM_Ephem))==0 );







|


>
>







357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
}

#ifndef NDEBUG
/*
** Write a nice string representation of the contents of cell pMem
** into buffer zBuf, length nBuf.
*/
void sqlite3VdbeMemPrettyPrint(Mem *pMem, char *zBuf, int nBuf){
  char *zCsr = zBuf;
  int f = pMem->flags;

  static const char *encnames[] = {"(X)", "(8)", "(16LE)", "(16BE)"};

  if( f&MEM_Blob ){
    int i;
    char c;
    if( f & MEM_Dyn ){
      c = 'z';
      assert( (f & (MEM_Static|MEM_Ephem))==0 );
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428

429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
      zBuf[1] = 's';
    }
    k = 2;
    k += sprintf(&zBuf[k], "%d", pMem->n);
    zBuf[k++] = '[';
    for(j=0; j<15 && j<pMem->n; j++){
      u8 c = pMem->z[j];
/*
      if( c==0 && j==pMem->n-1 ) break;
            zBuf[k++] = "0123456789ABCDEF"[c>>4];
            zBuf[k++] = "0123456789ABCDEF"[c&0xf];
*/
      if( c>=0x20 && c<0x7f ){
        zBuf[k++] = c;
      }else{
        zBuf[k++] = '.';
      }
    }
    zBuf[k++] = ']';

    zBuf[k++] = 0;
  }
}

/* Temporary - this is useful in conjunction with prettyPrintMem whilst
** debugging. 
*/
char zGdbBuf[100];
#endif


#ifdef VDBE_PROFILE
/*
** The following routine only works on pentium-class processors.
** It uses the RDTSC opcode to read cycle count value out of the







<
<
<
<
<







>



<
<
<
<
<







412
413
414
415
416
417
418





419
420
421
422
423
424
425
426
427
428
429





430
431
432
433
434
435
436
      zBuf[1] = 's';
    }
    k = 2;
    k += sprintf(&zBuf[k], "%d", pMem->n);
    zBuf[k++] = '[';
    for(j=0; j<15 && j<pMem->n; j++){
      u8 c = pMem->z[j];





      if( c>=0x20 && c<0x7f ){
        zBuf[k++] = c;
      }else{
        zBuf[k++] = '.';
      }
    }
    zBuf[k++] = ']';
    k += sprintf(&zBuf[k], encnames[pMem->enc]);
    zBuf[k++] = 0;
  }
}





#endif


#ifdef VDBE_PROFILE
/*
** The following routine only works on pentium-class processors.
** It uses the RDTSC opcode to read cycle count value out of the
730
731
732
733
734
735
736
737
738
739
740
741
742


743
744
745
746
747

748
749
750
751
752
753
754
755
756
757
** P3 points to a nul terminated UTF-8 string. This opcode is transformed
** into an OP_String before it is executed for the first time.
*/
case OP_String8: {
  pOp->opcode = OP_String;

  if( db->enc!=SQLITE_UTF8 && pOp->p3 ){
    char *z = pOp->p3;
    if( db->enc==SQLITE_UTF16LE ){
      pOp->p3 = sqlite3utf8to16le(z, -1);
    }else{
      pOp->p3 = sqlite3utf8to16be(z, -1);
    }


    if( pOp->p3type==P3_DYNAMIC ){
      sqliteFree(z);
    }
    pOp->p3type = P3_DYNAMIC;
    if( !pOp->p3 ) goto no_mem;

  }

  /* Fall through to the next case, OP_String */
}
  
/* Opcode: String * * P3
**
** The string value P3 is pushed onto the stack.  If P3==0 then a
** NULL is pushed onto the stack. P3 is assumed to be a nul terminated
** string encoded with the database native encoding.







|
|
|
<
|
<
>
>

|


|
>

<
|







723
724
725
726
727
728
729
730
731
732

733

734
735
736
737
738
739
740
741
742

743
744
745
746
747
748
749
750
** P3 points to a nul terminated UTF-8 string. This opcode is transformed
** into an OP_String before it is executed for the first time.
*/
case OP_String8: {
  pOp->opcode = OP_String;

  if( db->enc!=SQLITE_UTF8 && pOp->p3 ){
    pTos++;
    sqlite3VdbeMemSetStr(pTos, pOp->p3, -1, SQLITE_UTF8, SQLITE_STATIC);
    if( SQLITE_OK!=sqlite3VdbeChangeEncoding(pTos, db->enc) ) goto no_mem;

    if( SQLITE_OK!=sqlite3VdbeMemDynamicify(pTos) ) goto no_mem;

    pTos->flags &= ~(MEM_Dyn);
    pTos->flags |= MEM_Static;
    if( pOp->p3type==P3_DYNAMIC ){
      sqliteFree(pOp->p3);
    }
    pOp->p3type = P3_DYNAMIC;
    pOp->p3 = pTos->z;
    break;
  }

  /* Otherwise fall through to the next case, OP_String */
}
  
/* Opcode: String * * P3
**
** The string value P3 is pushed onto the stack.  If P3==0 then a
** NULL is pushed onto the stack. P3 is assumed to be a nul terminated
** string encoded with the database native encoding.
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
          fprintf(p->trace, " si:%lld", pTos[i].i);
        }else if( pTos[i].flags & MEM_Int ){
          fprintf(p->trace, " i:%lld", pTos[i].i);
        }else if( pTos[i].flags & MEM_Real ){
          fprintf(p->trace, " r:%g", pTos[i].r);
        }else{
          char zBuf[100];
          prettyPrintMem(&pTos[i], zBuf, 100);
          fprintf(p->trace, " ");
          fprintf(p->trace, zBuf);
        }
      }
      if( rc!=0 ) fprintf(p->trace," rc=%d",rc);
      fprintf(p->trace,"\n");
    }







|







4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
          fprintf(p->trace, " si:%lld", pTos[i].i);
        }else if( pTos[i].flags & MEM_Int ){
          fprintf(p->trace, " i:%lld", pTos[i].i);
        }else if( pTos[i].flags & MEM_Real ){
          fprintf(p->trace, " r:%g", pTos[i].r);
        }else{
          char zBuf[100];
          sqlite3VdbeMemPrettyPrint(&pTos[i], zBuf, 100);
          fprintf(p->trace, " ");
          fprintf(p->trace, zBuf);
        }
      }
      if( rc!=0 ) fprintf(p->trace," rc=%d",rc);
      fprintf(p->trace,"\n");
    }
Changes to src/vdbeInt.h.
386
387
388
389
390
391
392


int sqlite3VdbeMemIntegerify(Mem*);
int sqlite3VdbeMemRealify(Mem*);
int sqlite3VdbeMemFromBtree(BtCursor*,int,int,int,Mem*);
void sqlite3VdbeMemRelease(Mem *p);
#ifndef NDEBUG
void sqlite3VdbeMemSanity(Mem*, u8);
#endif









>
>
386
387
388
389
390
391
392
393
394
int sqlite3VdbeMemIntegerify(Mem*);
int sqlite3VdbeMemRealify(Mem*);
int sqlite3VdbeMemFromBtree(BtCursor*,int,int,int,Mem*);
void sqlite3VdbeMemRelease(Mem *p);
#ifndef NDEBUG
void sqlite3VdbeMemSanity(Mem*, u8);
#endif
int sqlite3VdbeMemTranslate(Mem*, u8);
void sqlite3VdbeMemPrettyPrint(Mem *pMem, char *zBuf, int nBuf);
Changes to src/vdbeapi.c.
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540

  rc = vdbeUnbind(p, i);
  if( rc ){
    return rc;
  }
  pVar = &p->apVar[i-1];

  /* There may or may not be a byte order mark at the start of the UTF-16.
  ** Either way set 'txt_enc' to the SQLITE_UTF16* value indicating the 
  ** actual byte order used by this string. If the string does happen
  ** to contain a BOM, then move zData so that it points to the first
  ** byte after the BOM.
  */
  txt_enc = sqlite3UtfReadBom(zData, nData);
  if( txt_enc ){
    zData = (void *)(((u8 *)zData) + 2);
    nData -= 2;
  }else{
    txt_enc = SQLITE_BIGENDIAN?SQLITE_UTF16BE:SQLITE_UTF16LE;
  }
  rc = sqlite3VdbeMemSetStr(pVar, zData, nData, txt_enc, xDel);
  if( rc ){
    return rc;
  }
  rc = sqlite3VdbeChangeEncoding(pVar, p->db->enc);
  return rc;
}







<
<
<
<
<
<
<
<
<
<
<
<
<
|






514
515
516
517
518
519
520













521
522
523
524
525
526
527

  rc = vdbeUnbind(p, i);
  if( rc ){
    return rc;
  }
  pVar = &p->apVar[i-1];














  rc = sqlite3VdbeMemSetStr(pVar, zData, nData, SQLITE_UTF16NATIVE, xDel);
  if( rc ){
    return rc;
  }
  rc = sqlite3VdbeChangeEncoding(pVar, p->db->enc);
  return rc;
}
Changes to src/vdbemem.c.
17
18
19
20
21
22
23
24
25
26
27
28
29
30

31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
*/
#include "sqliteInt.h"
#include "os.h"
#include <ctype.h>
#include "vdbeInt.h"

/*
** If pMem is a string object, this routine sets the encoding of the string
** (to one of UTF-8 or UTF16) and whether or not the string is
** nul-terminated. If pMem is not a string object, then this routine is
** a no-op.
**
** The second argument, "desiredEnc" is one of TEXT_Utf8, TEXT_Utf16le
** or TEXT_Utf16be.  This routine changes the encoding of pMem to match

** desiredEnc.
**
** SQLITE_OK is returned if the conversion is successful (or not required).
** SQLITE_NOMEM may be returned if a malloc() fails during conversion
** between formats.
*/
int sqlite3VdbeChangeEncoding(Mem *pMem, int desiredEnc){
  /* If this is not a string, or if it is a string but the encoding is
  ** already correct, do nothing. */
  if( !(pMem->flags&MEM_Str) || pMem->enc==desiredEnc ){
    return SQLITE_OK;
  }

  if( pMem->enc==SQLITE_UTF8 || desiredEnc==SQLITE_UTF8 ){
    /* If the current encoding does not match the desired encoding, then
    ** we will need to do some translation between encodings.
    */
    char *z;
    int n;
    int rc;

    rc = sqlite3utfTranslate(pMem->z, pMem->n, pMem->enc, (void **)&z, 
        &n, desiredEnc);
    if( rc!=SQLITE_OK ){
      return rc;
    }
    sqlite3VdbeMemRelease(pMem);

    /* Result of sqlite3utfTranslate is currently always dynamically
    ** allocated and nul terminated. This might be altered as a performance
    ** enhancement later.
    */
    pMem->z = z;
    pMem->n = n;
    pMem->flags &= ~(MEM_Ephem | MEM_Short | MEM_Static);
    pMem->flags |= MEM_Str | MEM_Dyn | MEM_Term;
    pMem->xDel = 0;
  }else{
    /* Must be translating between UTF-16le and UTF-16be. */
    int i;
    u8 *pFrom, *pTo;
    sqlite3VdbeMemMakeWriteable(pMem);
    for(i=0, pFrom=pMem->z, pTo=&pMem->z[1]; i<pMem->n; i+=2, pFrom+=2,pTo+=2){
      u8 temp = *pFrom;
      *pFrom = *pTo;
      *pTo = temp;
    }
  }
  pMem->enc = desiredEnc;
  return SQLITE_OK;
}

/*
** Make the given Mem object MEM_Dyn.
**
** Return SQLITE_OK on success or SQLITE_NOMEM if malloc fails.
*/







|
|
|
<

<
|
>
|






<
<



<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







17
18
19
20
21
22
23
24
25
26

27

28
29
30
31
32
33
34
35
36


37
38
39










40



























41
42
43
44
45
46
47
*/
#include "sqliteInt.h"
#include "os.h"
#include <ctype.h>
#include "vdbeInt.h"

/*
** If pMem is an object with a valid string representation, this routine
** ensures the internal encoding for the string representation is
** 'desiredEnc', one of SQLITE_UTF8, SQLITE_UTF16LE or SQLITE_UTF16BE.

**

** If pMem is not a string object, or the encoding of the string
** representation is already stored using the requested encoding, then this
** routine is a no-op.
**
** SQLITE_OK is returned if the conversion is successful (or not required).
** SQLITE_NOMEM may be returned if a malloc() fails during conversion
** between formats.
*/
int sqlite3VdbeChangeEncoding(Mem *pMem, int desiredEnc){


  if( !(pMem->flags&MEM_Str) || pMem->enc==desiredEnc ){
    return SQLITE_OK;
  }










  return sqlite3VdbeMemTranslate(pMem, desiredEnc);



























}

/*
** Make the given Mem object MEM_Dyn.
**
** Return SQLITE_OK on success or SQLITE_NOMEM if malloc fails.
*/
401
402
403
404
405
406
407
408
409
410



411
412
413
414
415
416
417
418
419
420
421
422
423
424
        pMem->flags |= MEM_Term;
      }
      break;

    case SQLITE_UTF16LE:
    case SQLITE_UTF16BE:
      pMem->flags |= MEM_Str;
      if( n<0 ){
        pMem->n = sqlite3utf16ByteLen(z,-1);
        pMem->flags |= MEM_Term;



      }
      break;

    default:
      assert(0);
  }
  if( xDel==SQLITE_TRANSIENT ){
    return sqlite3VdbeMemMakeWriteable(pMem);
  }
  return SQLITE_OK;
}

/*
** Compare the values contained by the two memory cells, returning







|
|

>
>
>






|







361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
        pMem->flags |= MEM_Term;
      }
      break;

    case SQLITE_UTF16LE:
    case SQLITE_UTF16BE:
      pMem->flags |= MEM_Str;
      if( pMem->n<0 ){
        pMem->n = sqlite3utf16ByteLen(pMem->z,-1);
        pMem->flags |= MEM_Term;
      }
      if( sqlite3VdbeMemHandleBom(pMem) ){
        return SQLITE_NOMEM;
      }
      break;

    default:
      assert(0);
  }
  if( pMem->flags&MEM_Ephem ){
    return sqlite3VdbeMemMakeWriteable(pMem);
  }
  return SQLITE_OK;
}

/*
** Compare the values contained by the two memory cells, returning
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
      return -1;
    }

    assert( pMem1->enc==pMem2->enc );
    assert( pMem1->enc==SQLITE_UTF8 || 
            pMem1->enc==SQLITE_UTF16LE || pMem1->enc==SQLITE_UTF16BE );

    /* FIX ME: This may fail if the collation sequence is deleted after
    ** this vdbe program is compiled. We cannot just use BINARY in this
    ** case as this may lead to a segfault caused by traversing an index
    ** table incorrectly.  We need to return an error to the user in this
    ** case.
    */
    assert( !pColl || pColl->xCmp );

    if( pColl ){
      if( pMem1->enc==pColl->enc ){
        return pColl->xCmp(pColl->pUser,pMem1->n,pMem1->z,pMem2->n,pMem2->z);
      }else{







|
|
<
<
|







457
458
459
460
461
462
463
464
465


466
467
468
469
470
471
472
473
      return -1;
    }

    assert( pMem1->enc==pMem2->enc );
    assert( pMem1->enc==SQLITE_UTF8 || 
            pMem1->enc==SQLITE_UTF16LE || pMem1->enc==SQLITE_UTF16BE );

    /* This assert may fail if the collation sequence is deleted after this
    ** vdbe program is compiled. The documentation defines this as an


    ** undefined condition. A crash is usual result.
    */
    assert( !pColl || pColl->xCmp );

    if( pColl ){
      if( pMem1->enc==pColl->enc ){
        return pColl->xCmp(pColl->pUser,pMem1->n,pMem1->z,pMem2->n,pMem2->z);
      }else{
641
642
643
644
645
646
647

648

649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676






677
678
679
680

681
682
683
684
685
686
687
688
689
690
691
/* This function is only available internally, it is not part of the
** external API. It works in a similar way to sqlite3_value_text(),
** except the data returned is in the encoding specified by the second
** parameter, which must be one of SQLITE_UTF16BE, SQLITE_UTF16LE or
** SQLITE_UTF8.
*/
const void *sqlite3ValueText(sqlite3_value* pVal, u8 enc){

  assert( enc==SQLITE_UTF16LE || enc==SQLITE_UTF16BE || enc==SQLITE_UTF8);

  if( pVal->flags&MEM_Null ){
    /* For a NULL return a NULL Pointer */
    return 0;
  }

  if( pVal->flags&MEM_Str ){
    /* If there is already a string representation, make sure it is in
    ** encoded in the required UTF-16 byte order.
    */
    sqlite3VdbeChangeEncoding(pVal, enc);
  }else if( !(pVal->flags&MEM_Blob) ){
    /* Otherwise, unless this is a blob, convert it to a UTF-16 string */
    sqlite3VdbeMemStringify(pVal, enc);
  }

  return (const void *)(pVal->z);
}

sqlite3_value* sqlite3ValueNew(){
  Mem *p = sqliteMalloc(sizeof(*p));
  if( p ){
    p->flags = MEM_Null;
    p->type = SQLITE_NULL;
  }
  return p;
}

void sqlite3ValueSetStr(sqlite3_value *v, int n, const void *z, u8 enc){






  sqlite3VdbeMemSetStr((Mem *)v, z, n, enc, SQLITE_STATIC);
}

void sqlite3ValueFree(sqlite3_value *v){

  sqlite3ValueSetStr(v, 0, 0, SQLITE_UTF8);
  sqliteFree(v);
}

int sqlite3ValueBytes(sqlite3_value *pVal, u8 enc){
  Mem *p = (Mem*)pVal;
  if( (p->flags & MEM_Blob)!=0 || sqlite3ValueText(pVal, enc) ){
    return p->n;
  }
  return 0;
}







>

>

<


<

<
<
<


<


<












|
>
>
>
>
>
>
|



>
|










602
603
604
605
606
607
608
609
610
611
612

613
614

615



616
617

618
619

620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
/* This function is only available internally, it is not part of the
** external API. It works in a similar way to sqlite3_value_text(),
** except the data returned is in the encoding specified by the second
** parameter, which must be one of SQLITE_UTF16BE, SQLITE_UTF16LE or
** SQLITE_UTF8.
*/
const void *sqlite3ValueText(sqlite3_value* pVal, u8 enc){
  if( !pVal ) return 0;
  assert( enc==SQLITE_UTF16LE || enc==SQLITE_UTF16BE || enc==SQLITE_UTF8);

  if( pVal->flags&MEM_Null ){

    return 0;
  }

  if( pVal->flags&MEM_Str ){



    sqlite3VdbeChangeEncoding(pVal, enc);
  }else if( !(pVal->flags&MEM_Blob) ){

    sqlite3VdbeMemStringify(pVal, enc);
  }

  return (const void *)(pVal->z);
}

sqlite3_value* sqlite3ValueNew(){
  Mem *p = sqliteMalloc(sizeof(*p));
  if( p ){
    p->flags = MEM_Null;
    p->type = SQLITE_NULL;
  }
  return p;
}

void sqlite3ValueSetStr(
  sqlite3_value *v, 
  int n, 
  const void *z, 
  u8 enc,
  void (*xDel)(void*)
){
  if( v ) sqlite3VdbeMemSetStr((Mem *)v, z, n, enc, xDel);
}

void sqlite3ValueFree(sqlite3_value *v){
  if( !v ) return;
  sqlite3ValueSetStr(v, 0, 0, SQLITE_UTF8, SQLITE_STATIC);
  sqliteFree(v);
}

int sqlite3ValueBytes(sqlite3_value *pVal, u8 enc){
  Mem *p = (Mem*)pVal;
  if( (p->flags & MEM_Blob)!=0 || sqlite3ValueText(pVal, enc) ){
    return p->n;
  }
  return 0;
}
Changes to test/enc.test.
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The focus of
# this file is testing the SQLite routines used for converting between the
# various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
# UTF-16be).
#
# $Id: enc.test,v 1.1 2004/05/22 08:16:11 danielk1977 Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl

proc do_bincmp_test {testname got expect} {
  binary scan $expect \c* expectvals
  binary scan $got \c* gotvals







|







9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The focus of
# this file is testing the SQLite routines used for converting between the
# various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
# UTF-16be).
#
# $Id: enc.test,v 1.2 2004/06/18 04:24:56 danielk1977 Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl

proc do_bincmp_test {testname got expect} {
  binary scan $expect \c* expectvals
  binary scan $got \c* gotvals
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131

132
133
134
135

136
137
138
139
140
141
142
143
144
145
146
147
# sqlite_utf16to8 (steps 3, 4)
# sqlite_utf16to16le (step 5)
# sqlite_utf16to16be (step 5)
#
proc test_conversion {testname str} {
 
  # Step 1.
  set utf16le_sqlite [sqlite_utf8to16le $str]
  set utf16le_tcl [encoding convertto unicode $str]
  append utf16le_tcl "\x00\x00"
  if { $::tcl_platform(byteOrder)!="littleEndian" } {
    set utf16le_tcl [swap_byte_order $utf16le_tcl]
  }
  do_bincmp_test $testname.1 $utf16le_sqlite $utf16le_tcl
  set utf16le $utf16le_tcl

  # Step 2.
  set utf16be_sqlite [sqlite_utf8to16be $str]
  set utf16be_tcl [encoding convertto unicode $str]
  append utf16be_tcl "\x00\x00"
  if { $::tcl_platform(byteOrder)=="littleEndian" } {
    set utf16be_tcl [swap_byte_order $utf16be_tcl]
  }
  do_bincmp_test $testname.2 $utf16be_sqlite $utf16be_tcl
  set utf16be $utf16be_tcl
 
  # Step 3.
  if { $::tcl_platform(byteOrder)=="littleEndian" } {
    set utf16 $utf16le
  } else {
    set utf16 $utf16be
  }
  set utf8_sqlite [sqlite_utf16to8 $utf16]
  do_bincmp_test $testname.3 $utf8_sqlite [binarize $str]

  # Step 4 (little endian).
  append utf16le_bom "\xFF\xFE" $utf16le
  set utf8_sqlite [sqlite_utf16to8 $utf16le_bom]
  do_bincmp_test $testname.4.le $utf8_sqlite [binarize $str]

  # Step 4 (big endian).
  append utf16be_bom "\xFE\xFF" $utf16be
  set utf8_sqlite [sqlite_utf16to8 $utf16be_bom]
  do_bincmp_test $testname.4.be $utf8_sqlite [binarize $str]

  # Step 5 (little endian to little endian).
  set utf16_sqlite [sqlite_utf16to16le $utf16le_bom]
  do_bincmp_test $testname.5.le.le $utf16_sqlite $utf16le

  # Step 5 (big endian to big endian).
  set utf16_sqlite [sqlite_utf16to16be $utf16be_bom]
  do_bincmp_test $testname.5.be.be $utf16_sqlite $utf16be

  # Step 5 (big endian to little endian).
  set utf16_sqlite [sqlite_utf16to16le $utf16be_bom]
  do_bincmp_test $testname.5.be.le $utf16_sqlite $utf16le

  # Step 5 (little endian to big endian).
  set utf16_sqlite [sqlite_utf16to16be $utf16le_bom]
  do_bincmp_test $testname.5.le.be $utf16_sqlite $utf16be
}



test_conversion enc-1 "hello world"
test_conversion enc-2 "sqlite"
test_conversion enc-3 ""

test_conversion enc-4 "\u1234"
test_conversion enc-5 "\u4321abc"
test_conversion enc-6 "\u4321\u1234"
test_conversion enc-7 [string repeat "abcde\u00EF\u00EE\uFFFCabc" 100]
test_conversion enc-8 [string repeat "\u007E\u007F\u0080\u0081" 100]
test_conversion enc-9 [string repeat "\u07FE\u07FF\u0800\u0801\uFFF0" 100]

finish_test











|









|














|




|




|



|



|



|



|



>




>








<



70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145

146
147
148
# sqlite_utf16to8 (steps 3, 4)
# sqlite_utf16to16le (step 5)
# sqlite_utf16to16be (step 5)
#
proc test_conversion {testname str} {
 
  # Step 1.
  set utf16le_sqlite [test_translate $str UTF8 UTF16LE]
  set utf16le_tcl [encoding convertto unicode $str]
  append utf16le_tcl "\x00\x00"
  if { $::tcl_platform(byteOrder)!="littleEndian" } {
    set utf16le_tcl [swap_byte_order $utf16le_tcl]
  }
  do_bincmp_test $testname.1 $utf16le_sqlite $utf16le_tcl
  set utf16le $utf16le_tcl

  # Step 2.
  set utf16be_sqlite [test_translate $str UTF8 UTF16BE]
  set utf16be_tcl [encoding convertto unicode $str]
  append utf16be_tcl "\x00\x00"
  if { $::tcl_platform(byteOrder)=="littleEndian" } {
    set utf16be_tcl [swap_byte_order $utf16be_tcl]
  }
  do_bincmp_test $testname.2 $utf16be_sqlite $utf16be_tcl
  set utf16be $utf16be_tcl
 
  # Step 3.
  if { $::tcl_platform(byteOrder)=="littleEndian" } {
    set utf16 $utf16le
  } else {
    set utf16 $utf16be
  }
  set utf8_sqlite [test_translate $utf16 UTF16 UTF8]
  do_bincmp_test $testname.3 $utf8_sqlite [binarize $str]

  # Step 4 (little endian).
  append utf16le_bom "\xFF\xFE" $utf16le
  set utf8_sqlite [test_translate $utf16le_bom UTF16 UTF8]
  do_bincmp_test $testname.4.le $utf8_sqlite [binarize $str]

  # Step 4 (big endian).
  append utf16be_bom "\xFE\xFF" $utf16be
  set utf8_sqlite [test_translate $utf16be_bom UTF16 UTF8]
  do_bincmp_test $testname.4.be $utf8_sqlite [binarize $str]

  # Step 5 (little endian to little endian).
  set utf16_sqlite [test_translate $utf16le_bom UTF16LE UTF16LE]
  do_bincmp_test $testname.5.le.le $utf16_sqlite $utf16le

  # Step 5 (big endian to big endian).
  set utf16_sqlite [test_translate $utf16be_bom UTF16 UTF16BE]
  do_bincmp_test $testname.5.be.be $utf16_sqlite $utf16be

  # Step 5 (big endian to little endian).
  set utf16_sqlite [test_translate $utf16be_bom UTF16 UTF16LE]
  do_bincmp_test $testname.5.be.le $utf16_sqlite $utf16le

  # Step 5 (little endian to big endian).
  set utf16_sqlite [test_translate $utf16le_bom UTF16 UTF16BE]
  do_bincmp_test $testname.5.le.be $utf16_sqlite $utf16be
}

translate_selftest

test_conversion enc-1 "hello world"
test_conversion enc-2 "sqlite"
test_conversion enc-3 ""
test_conversion enc-X "\u0100"
test_conversion enc-4 "\u1234"
test_conversion enc-5 "\u4321abc"
test_conversion enc-6 "\u4321\u1234"
test_conversion enc-7 [string repeat "abcde\u00EF\u00EE\uFFFCabc" 100]
test_conversion enc-8 [string repeat "\u007E\u007F\u0080\u0081" 100]
test_conversion enc-9 [string repeat "\u07FE\u07FF\u0800\u0801\uFFF0" 100]

finish_test