/ Check-in [39a415ea]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Optimisation for unicode encoding conversion routines. (CVS 1614)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:39a415eaa65964742e40b7ea4d471fa04007c6c9
User & Date: danielk1977 2004-06-18 04:24:54
Context
2004-06-18
06:02
Fix a couple of gcc warnings. (CVS 1615) check-in: 960f55f3 user: danielk1977 tags: trunk
04:24
Optimisation for unicode encoding conversion routines. (CVS 1614) check-in: 39a415ea user: danielk1977 tags: trunk
2004-06-17
19:04
Documentation updates in preparation for the release of version 3.0.0. (CVS 1613) check-in: 9fb29f73 user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/build.c.

19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
...
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
....
2623
2624
2625
2626
2627
2628
2629












**     DROP INDEX
**     creating ID lists
**     BEGIN TRANSACTION
**     COMMIT
**     ROLLBACK
**     PRAGMA
**
** $Id: build.c,v 1.220 2004/06/17 06:13:34 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include <ctype.h>

/*
** This routine is called when a new SQL statement is beginning to
** be parsed.  Check to see if the schema for the database needs
................................................................................
  if( nName<0 ) nName = strlen(zName);
  if( db->xCollNeeded ){
    zExternal = sqliteStrNDup(zName, nName);
    if( !zExternal ) return;
      db->xCollNeeded(db->pCollNeededArg, db, (int)db->enc, zExternal);
  }
  if( db->xCollNeeded16 ){
    if( SQLITE_BIGENDIAN ){
      zExternal = sqlite3utf8to16be(zName, nName);
    }else{
      zExternal = sqlite3utf8to16le(zName, nName);
    }
    if( !zExternal ) return;
    db->xCollNeeded16(db->pCollNeededArg, db, (int)db->enc, zExternal);
  }
  if( zExternal ) sqliteFree(zExternal);
}

static int synthCollSeq(Parse *pParse, CollSeq *pColl){
  /* The collation factory failed to deliver a function but there may be
  ** other versions of this collation function (for other text encodings)
  ** available. Use one of these instead. Avoid a UTF-8 <-> UTF-16
  ** conversion if possible.
................................................................................
** sqlite3BeginWriteOperation() but there should only be a single
** call to sqlite3EndWriteOperation() at the conclusion of the statement.
*/
void sqlite3EndWriteOperation(Parse *pParse){
  /* Delete me! */
  return;
}



















|







 







|
|
<
|
<



<







 







>
>
>
>
>
>
>
>
>
>
>
>
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
...
955
956
957
958
959
960
961
962
963

964

965
966
967

968
969
970
971
972
973
974
....
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
**     DROP INDEX
**     creating ID lists
**     BEGIN TRANSACTION
**     COMMIT
**     ROLLBACK
**     PRAGMA
**
** $Id: build.c,v 1.221 2004/06/18 04:24:54 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include <ctype.h>

/*
** This routine is called when a new SQL statement is beginning to
** be parsed.  Check to see if the schema for the database needs
................................................................................
  if( nName<0 ) nName = strlen(zName);
  if( db->xCollNeeded ){
    zExternal = sqliteStrNDup(zName, nName);
    if( !zExternal ) return;
      db->xCollNeeded(db->pCollNeededArg, db, (int)db->enc, zExternal);
  }
  if( db->xCollNeeded16 ){
    sqlite3_value *pTmp = sqlite3GetTransientValue(db);
    sqlite3ValueSetStr(pTmp, -1, zName, SQLITE_UTF8, SQLITE_STATIC);

    zExternal = sqlite3ValueText(pTmp, SQLITE_UTF16NATIVE);

    if( !zExternal ) return;
    db->xCollNeeded16(db->pCollNeededArg, db, (int)db->enc, zExternal);
  }

}

static int synthCollSeq(Parse *pParse, CollSeq *pColl){
  /* The collation factory failed to deliver a function but there may be
  ** other versions of this collation function (for other text encodings)
  ** available. Use one of these instead. Avoid a UTF-8 <-> UTF-16
  ** conversion if possible.
................................................................................
** sqlite3BeginWriteOperation() but there should only be a single
** call to sqlite3EndWriteOperation() at the conclusion of the statement.
*/
void sqlite3EndWriteOperation(Parse *pParse){
  /* Delete me! */
  return;
}

/* 
** Return the transient sqlite3_value object used for encoding conversions
** during SQL compilation.
*/
sqlite3_value *sqlite3GetTransientValue(sqlite *db){
  if( !db->pValue ){
    db->pValue = sqlite3ValueNew();
  }
  return db->pValue;
}

Changes to src/main.c.

10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
...
511
512
513
514
515
516
517






518
519
520
521
522
523
524
...
745
746
747
748
749
750
751
752
753





754
755
756
757
758
759
760
761
762
763
764
765
766
...
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884




885


886
887
888
889
890
891
892
893
894
895





896
897
898
899
900
901
902
....
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052

1053
1054



1055
1056
1057
1058
1059
1060
1061
....
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
....
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150

1151
1152
1153
1154
1155
1156
1157
....
1173
1174
1175
1176
1177
1178
1179
1180
1181

1182
1183
1184
1185
1186
1187


1188
1189

1190
1191
1192
1193
1194




1195
1196
1197
1198
1199
1200
1201
....
1269
1270
1271
1272
1273
1274
1275
1276



1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
**
*************************************************************************
** Main file for the SQLite library.  The routines in this file
** implement the programmer interface to the library.  Routines in
** other files are for internal use by SQLite and should not be
** accessed by users of the library.
**
** $Id: main.c,v 1.224 2004/06/16 12:00:56 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "os.h"
#include <ctype.h>

/*
** A pointer to this structure is used to communicate information
................................................................................
    CollSeq *pColl = (CollSeq *)sqliteHashData(i);
    sqliteFree(pColl);
  }
  sqlite3HashClear(&db->aCollSeq);

  sqlite3HashClear(&db->aFunc);
  sqlite3Error(db, SQLITE_OK, 0); /* Deallocates any cached error strings. */






  sqliteFree(db);
}

/*
** Rollback all database files.
*/
void sqlite3RollbackAll(sqlite *db){
................................................................................
  int iCollateArg,
  void *pUserData,
  void (*xFunc)(sqlite3_context*,int,sqlite3_value**),
  void (*xStep)(sqlite3_context*,int,sqlite3_value**),
  void (*xFinal)(sqlite3_context*)
){
  int rc;
  char *zFunctionName8;
  zFunctionName8 = sqlite3utf16to8(zFunctionName, -1, SQLITE_BIGENDIAN);





  if( !zFunctionName8 ){
    return SQLITE_NOMEM;
  }
  rc = sqlite3_create_function(db, zFunctionName8, nArg, eTextRep, 
      iCollateArg, pUserData, xFunc, xStep, xFinal);
  sqliteFree(zFunctionName8);
  return rc;
}

/*
** Register a trace function.  The pArg from the previously registered trace
** is returned.  
**
................................................................................
}

/*
** Return UTF-8 encoded English language explanation of the most recent
** error.
*/
const char *sqlite3_errmsg(sqlite3 *db){
  if( !db ){
    /* If db is NULL, then assume that a malloc() failed during an
    ** sqlite3_open() call.
    */
    return sqlite3ErrStr(SQLITE_NOMEM);
  }
  if( db->zErrMsg ){
    return db->zErrMsg;
  }
  return sqlite3ErrStr(db->errCode);
}

/*
** Return UTF-16 encoded English language explanation of the most recent
** error.
*/
const void *sqlite3_errmsg16(sqlite3 *db){
  if( !db ){
    /* If db is NULL, then assume that a malloc() failed during an
    ** sqlite3_open() call. We have a static version of the string 
    ** "out of memory" encoded using UTF-16 just for this purpose.
    **
    ** Because all the characters in the string are in the unicode
    ** range 0x00-0xFF, if we pad the big-endian string with a 
    ** zero byte, we can obtain the little-endian string with
    ** &big_endian[1].
    */
    static char outOfMemBe[] = {
      0, 'o', 0, 'u', 0, 't', 0, ' ', 
      0, 'o', 0, 'f', 0, ' ', 
      0, 'm', 0, 'e', 0, 'm', 0, 'o', 0, 'r', 0, 'y', 0, 0, 0
    };
    static char *outOfMemLe = &outOfMemBe[1];

    if( SQLITE_BIGENDIAN ){
      return (void *)outOfMemBe;
    }else{
      return (void *)outOfMemLe;




    }


  }
  if( !db->zErrMsg16 ){
    char const *zErr8 = sqlite3_errmsg(db);
    if( SQLITE_BIGENDIAN ){
      db->zErrMsg16 = sqlite3utf8to16be(zErr8, -1);
    }else{
      db->zErrMsg16 = sqlite3utf8to16le(zErr8, -1);
    }
  }
  return db->zErrMsg16;





}

int sqlite3_errcode(sqlite3 *db){
  if( !db ) return SQLITE_NOMEM;
  return db->errCode;
}

................................................................................
  sqlite3_stmt **ppStmt,    /* OUT: A pointer to the prepared statement */
  const void **pzTail       /* OUT: End of parsed string */
){
  /* This function currently works by first transforming the UTF-16
  ** encoded string to UTF-8, then invoking sqlite3_prepare(). The
  ** tricky bit is figuring out the pointer to return in *pzTail.
  */
  char *zSql8 = 0;
  char const *zTail8 = 0;
  int rc;


  zSql8 = sqlite3utf16to8(zSql, nBytes, SQLITE_BIGENDIAN);



  if( !zSql8 ){
    sqlite3Error(db, SQLITE_NOMEM, 0);
    return SQLITE_NOMEM;
  }
  rc = sqlite3_prepare(db, zSql8, -1, ppStmt, &zTail8);

  if( zTail8 && pzTail ){
................................................................................
    ** equivalent pointer into the UTF-16 string by counting the unicode
    ** characters between zSql8 and zTail8, and then returning a pointer
    ** the same number of characters into the UTF-16 string.
    */
    int chars_parsed = sqlite3utf8CharLen(zSql8, zTail8-zSql8);
    *pzTail = (u8 *)zSql + sqlite3utf16ByteLen(zSql, chars_parsed);
  }
  sqliteFree(zSql8);
 
  return rc;
}

/*
** This routine does the work of opening a database on behalf of
** sqlite3_open() and sqlite3_open16(). The database filename "zFilename"  
................................................................................
    db->temp_store = 2;
    db->nMaster = 0;    /* Disable atomic multi-file commit for :memory: */
  }else{
    db->nMaster = -1;   /* Size of master journal filename initially unknown */
  }
  rc = sqlite3BtreeFactory(db, zFilename, 0, MAX_PAGES, &db->aDb[0].pBt);
  if( rc!=SQLITE_OK ){
    /* FIX ME: sqlite3BtreeFactory() should call sqlite3Error(). */
    sqlite3Error(db, rc, 0);
    db->magic = SQLITE_MAGIC_CLOSED;
    goto opendb_out;
  }
  db->aDb[0].zName = "main";
  db->aDb[1].zName = "temp";

  /* Register all built-in functions, but do not attempt to read the
  ** database schema yet. This is delayed until the first time the database
  ** is accessed.
  */
  sqlite3RegisterBuiltinFunctions(db);
  if( rc==SQLITE_OK ){

    db->magic = SQLITE_MAGIC_OPEN;
  }else{
    sqlite3Error(db, rc, "%s", zErrMsg, 0);
    if( zErrMsg ) sqliteFree(zErrMsg);
    db->magic = SQLITE_MAGIC_CLOSED;
  }

................................................................................
/*
** Open a new database handle.
*/
int sqlite3_open16(
  const void *zFilename, 
  sqlite3 **ppDb
){
  char *zFilename8;   /* zFilename encoded in UTF-8 instead of UTF-16 */
  int rc;


  assert( ppDb );

  zFilename8 = sqlite3utf16to8(zFilename, -1, SQLITE_BIGENDIAN);
  if( !zFilename8 ){
    *ppDb = 0;


    return SQLITE_NOMEM;
  }

  rc = openDatabase(zFilename8, ppDb);
  if( rc==SQLITE_OK && *ppDb ){
    sqlite3_exec(*ppDb, "PRAGMA encoding = 'UTF-16'", 0, 0, 0);
  }
  sqliteFree(zFilename8);





  return rc;
}

/*
** The following routine destroys a virtual machine that is created by
** the sqlite3_compile() routine. The integer returned is an SQLITE_
................................................................................
  sqlite3* db, 
  const char *zName, 
  int enc, 
  void* pCtx,
  int(*xCompare)(void*,int,const void*,int,const void*)
){
  int rc;
  char *zName8 = sqlite3utf16to8(zName, -1, SQLITE_BIGENDIAN);



  rc = sqlite3_create_collation(db, zName8, enc, pCtx, xCompare);
  sqliteFree(zName8);
  return rc;
}

/*
** Register a collation sequence factory callback with the database handle
** db. Replace any previously installed collation sequence factory.
*/
int sqlite3_collation_needed(







|







 







>
>
>
>
>
>







 







|
<
>
>
>
>
>
|


|

<







 







|





|
|

|







<
<
<
<
<
|
|
|
|
|
|
|
|
|
|
<

<
<
<
<
>
>
>
>

>
>
|
<
<
<
<
<
<
|
|
<
>
>
>
>
>







 







|


>

<
>
>
>







 







<







 







<













>







 







|
|
>


<
<
<
|
>
>
|
<
>
|
|
|
|
<
>
>
>
>







 







|
>
>
>
|
<
<







10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
...
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
...
751
752
753
754
755
756
757
758

759
760
761
762
763
764
765
766
767
768

769
770
771
772
773
774
775
...
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872





873
874
875
876
877
878
879
880
881
882

883




884
885
886
887
888
889
890
891






892
893

894
895
896
897
898
899
900
901
902
903
904
905
....
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057

1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
....
1069
1070
1071
1072
1073
1074
1075

1076
1077
1078
1079
1080
1081
1082
....
1135
1136
1137
1138
1139
1140
1141

1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
....
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189



1190
1191
1192
1193

1194
1195
1196
1197
1198

1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
....
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288


1289
1290
1291
1292
1293
1294
1295
**
*************************************************************************
** Main file for the SQLite library.  The routines in this file
** implement the programmer interface to the library.  Routines in
** other files are for internal use by SQLite and should not be
** accessed by users of the library.
**
** $Id: main.c,v 1.225 2004/06/18 04:24:54 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "os.h"
#include <ctype.h>

/*
** A pointer to this structure is used to communicate information
................................................................................
    CollSeq *pColl = (CollSeq *)sqliteHashData(i);
    sqliteFree(pColl);
  }
  sqlite3HashClear(&db->aCollSeq);

  sqlite3HashClear(&db->aFunc);
  sqlite3Error(db, SQLITE_OK, 0); /* Deallocates any cached error strings. */
  if( db->pValue ){
    sqlite3ValueFree(db->pValue);
  }
  if( db->pErr ){
    sqlite3ValueFree(db->pErr);
  }
  sqliteFree(db);
}

/*
** Rollback all database files.
*/
void sqlite3RollbackAll(sqlite *db){
................................................................................
  int iCollateArg,
  void *pUserData,
  void (*xFunc)(sqlite3_context*,int,sqlite3_value**),
  void (*xStep)(sqlite3_context*,int,sqlite3_value**),
  void (*xFinal)(sqlite3_context*)
){
  int rc;
  char const *zFunc8;


  sqlite3_value *pTmp = sqlite3GetTransientValue(db);
  sqlite3ValueSetStr(pTmp, -1, zFunctionName, SQLITE_UTF16NATIVE,SQLITE_STATIC);
  zFunc8 = sqlite3ValueText(pTmp, SQLITE_UTF8);

  if( !zFunc8 ){
    return SQLITE_NOMEM;
  }
  rc = sqlite3_create_function(db, zFunc8, nArg, eTextRep, 
      iCollateArg, pUserData, xFunc, xStep, xFinal);

  return rc;
}

/*
** Register a trace function.  The pArg from the previously registered trace
** is returned.  
**
................................................................................
}

/*
** Return UTF-8 encoded English language explanation of the most recent
** error.
*/
const char *sqlite3_errmsg(sqlite3 *db){
  if( !db || !db->pErr ){
    /* If db is NULL, then assume that a malloc() failed during an
    ** sqlite3_open() call.
    */
    return sqlite3ErrStr(SQLITE_NOMEM);
  }
  if( !sqlite3_value_text(db->pErr) ){
    return sqlite3ErrStr(db->errCode);
  }
  return sqlite3_value_text(db->pErr);
}

/*
** Return UTF-16 encoded English language explanation of the most recent
** error.
*/
const void *sqlite3_errmsg16(sqlite3 *db){





  /* Because all the characters in the string are in the unicode
  ** range 0x00-0xFF, if we pad the big-endian string with a 
  ** zero byte, we can obtain the little-endian string with
  ** &big_endian[1].
  */
  static char outOfMemBe[] = {
    0, 'o', 0, 'u', 0, 't', 0, ' ', 
    0, 'o', 0, 'f', 0, ' ', 
    0, 'm', 0, 'e', 0, 'm', 0, 'o', 0, 'r', 0, 'y', 0, 0, 0
  };






  if( db && db->pErr ){
    if( !sqlite3_value_text16(db->pErr) ){
      sqlite3ValueSetStr(db->pErr, -1, sqlite3ErrStr(db->errCode),
          SQLITE_UTF8, SQLITE_STATIC);
    }
    if( sqlite3_value_text16(db->pErr) ){
      return sqlite3_value_text16(db->pErr);
    }






  }  


  /* If db is NULL, then assume that a malloc() failed during an
  ** sqlite3_open() call. We have a static version of the string 
  ** "out of memory" encoded using UTF-16 just for this purpose.
  */
  return (void *)(&outOfMemBe[SQLITE_UTF16NATIVE==SQLITE_UTF16LE?1:0]);
}

int sqlite3_errcode(sqlite3 *db){
  if( !db ) return SQLITE_NOMEM;
  return db->errCode;
}

................................................................................
  sqlite3_stmt **ppStmt,    /* OUT: A pointer to the prepared statement */
  const void **pzTail       /* OUT: End of parsed string */
){
  /* This function currently works by first transforming the UTF-16
  ** encoded string to UTF-8, then invoking sqlite3_prepare(). The
  ** tricky bit is figuring out the pointer to return in *pzTail.
  */
  char const *zSql8 = 0;
  char const *zTail8 = 0;
  int rc;
  sqlite3_value *pTmp;


  pTmp = sqlite3GetTransientValue(db);
  sqlite3ValueSetStr(pTmp, -1, zSql, SQLITE_UTF16NATIVE, SQLITE_STATIC);
  zSql8 = sqlite3ValueText(pTmp, SQLITE_UTF8);
  if( !zSql8 ){
    sqlite3Error(db, SQLITE_NOMEM, 0);
    return SQLITE_NOMEM;
  }
  rc = sqlite3_prepare(db, zSql8, -1, ppStmt, &zTail8);

  if( zTail8 && pzTail ){
................................................................................
    ** equivalent pointer into the UTF-16 string by counting the unicode
    ** characters between zSql8 and zTail8, and then returning a pointer
    ** the same number of characters into the UTF-16 string.
    */
    int chars_parsed = sqlite3utf8CharLen(zSql8, zTail8-zSql8);
    *pzTail = (u8 *)zSql + sqlite3utf16ByteLen(zSql, chars_parsed);
  }

 
  return rc;
}

/*
** This routine does the work of opening a database on behalf of
** sqlite3_open() and sqlite3_open16(). The database filename "zFilename"  
................................................................................
    db->temp_store = 2;
    db->nMaster = 0;    /* Disable atomic multi-file commit for :memory: */
  }else{
    db->nMaster = -1;   /* Size of master journal filename initially unknown */
  }
  rc = sqlite3BtreeFactory(db, zFilename, 0, MAX_PAGES, &db->aDb[0].pBt);
  if( rc!=SQLITE_OK ){

    sqlite3Error(db, rc, 0);
    db->magic = SQLITE_MAGIC_CLOSED;
    goto opendb_out;
  }
  db->aDb[0].zName = "main";
  db->aDb[1].zName = "temp";

  /* Register all built-in functions, but do not attempt to read the
  ** database schema yet. This is delayed until the first time the database
  ** is accessed.
  */
  sqlite3RegisterBuiltinFunctions(db);
  if( rc==SQLITE_OK ){
    sqlite3Error(db, SQLITE_OK, 0);
    db->magic = SQLITE_MAGIC_OPEN;
  }else{
    sqlite3Error(db, rc, "%s", zErrMsg, 0);
    if( zErrMsg ) sqliteFree(zErrMsg);
    db->magic = SQLITE_MAGIC_CLOSED;
  }

................................................................................
/*
** Open a new database handle.
*/
int sqlite3_open16(
  const void *zFilename, 
  sqlite3 **ppDb
){
  char const *zFilename8;   /* zFilename encoded in UTF-8 instead of UTF-16 */
  int rc = SQLITE_NOMEM;
  sqlite3_value *pVal;

  assert( ppDb );



  *ppDb = 0;
  pVal = sqlite3ValueNew();
  sqlite3ValueSetStr(pVal, -1, zFilename, SQLITE_UTF16NATIVE, SQLITE_STATIC);
  zFilename8 = sqlite3ValueText(pVal, SQLITE_UTF8);

  if( zFilename8 ){
    rc = openDatabase(zFilename8, ppDb);
    if( rc==SQLITE_OK && *ppDb ){
      sqlite3_exec(*ppDb, "PRAGMA encoding = 'UTF-16'", 0, 0, 0);
    }

  }
  if( pVal ){
    sqlite3ValueFree(pVal);
  }

  return rc;
}

/*
** The following routine destroys a virtual machine that is created by
** the sqlite3_compile() routine. The integer returned is an SQLITE_
................................................................................
  sqlite3* db, 
  const char *zName, 
  int enc, 
  void* pCtx,
  int(*xCompare)(void*,int,const void*,int,const void*)
){
  int rc;
  char const *zName8;
  sqlite3_value *pTmp = sqlite3GetTransientValue(db);
  sqlite3ValueSetStr(pTmp, -1, zName, SQLITE_UTF16NATIVE, SQLITE_STATIC);
  zName8 = sqlite3ValueText(pTmp, SQLITE_UTF8);
  return sqlite3_create_collation(db, zName8, enc, pCtx, xCompare);


}

/*
** Register a collation sequence factory callback with the database handle
** db. Replace any previously installed collation sequence factory.
*/
int sqlite3_collation_needed(

Changes to src/sqliteInt.h.

7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
...
190
191
192
193
194
195
196

197
198
199
200
201
202
203
...
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432





433
434
435
436
437
438
439
....
1209
1210
1211
1212
1213
1214
1215

1216
1217
1218
1219
1220
1221
1222
....
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
....
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
....
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417

**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** Internal interface definitions for SQLite.
**
** @(#) $Id: sqliteInt.h,v 1.286 2004/06/17 05:36:44 danielk1977 Exp $
*/
#include "config.h"
#include "sqlite3.h"
#include "hash.h"
#include "parse.h"
#include <stdio.h>
#include <stdlib.h>
................................................................................
# define sqliteMallocRaw(X) sqlite3Malloc_(X,0,__FILE__,__LINE__)
# define sqliteFree(X)      sqlite3Free_(X,__FILE__,__LINE__)
# define sqliteRealloc(X,Y) sqlite3Realloc_(X,Y,__FILE__,__LINE__)
# define sqliteStrDup(X)    sqlite3StrDup_(X,__FILE__,__LINE__)
# define sqliteStrNDup(X,Y) sqlite3StrNDup_(X,Y,__FILE__,__LINE__)
  void sqlite3StrRealloc(char**);
#else

# define sqlite3Realloc_(X,Y) sqliteRealloc(X,Y)
# define sqlite3StrRealloc(X)
#endif

/*
** This variable gets set if malloc() ever fails.  After it gets set,
** the SQLite library shuts down permanently.
................................................................................
#ifndef SQLITE_OMIT_PROGRESS_CALLBACK
  int (*xProgress)(void *);     /* The progress callback */
  void *pProgressArg;           /* Argument to the progress callback */
  int nProgressOps;             /* Number of opcodes for progress callback */
#endif

  int errCode;                  /* Most recent error code (SQLITE_*) */
  char *zErrMsg;                /* Most recent error message (UTF-8 encoded) */
  void *zErrMsg16;              /* Most recent error message (UTF-16 encoded) */
  u8 enc;                       /* Text encoding for this database. */
  u8 autoCommit;                /* The auto-commit flag. */
  int nMaster;                  /* Length of master journal name. -1=unknown */
  void(*xCollNeeded)(void*,sqlite3*,int eTextRep,const char*);
  void(*xCollNeeded16)(void*,sqlite3*,int eTextRep,const void*);
  void *pCollNeededArg;





};

/*
** Possible values for the sqlite.flags and or Db.flags fields.
**
** On sqlite.flags, the SQLITE_InTrans value means that we have
** executed a BEGIN.  On Db.flags, SQLITE_InTrans means a statement
................................................................................
#ifdef SQLITE_DEBUG
  void *sqlite3Malloc_(int,int,char*,int);
  void sqlite3Free_(void*,char*,int);
  void *sqlite3Realloc_(void*,int,char*,int);
  char *sqlite3StrDup_(const char*,char*,int);
  char *sqlite3StrNDup_(const char*, int,char*,int);
  void sqlite3CheckMemory(void*,int);

#else
  void *sqliteMalloc(int);
  void *sqliteMallocRaw(int);
  void sqliteFree(void*);
  void *sqliteRealloc(void*,int);
  char *sqliteStrDup(const char*);
  char *sqliteStrNDup(const char*, int);
................................................................................
int sqlite3FixExprList(DbFixer*, ExprList*);
int sqlite3FixTriggerStep(DbFixer*, TriggerStep*);
double sqlite3AtoF(const char *z, const char **);
char *sqlite3_snprintf(int,char*,const char*,...);
int sqlite3GetInt32(const char *, int*);
int sqlite3GetInt64(const char *, i64*);
int sqlite3FitsIn64Bits(const char *);
unsigned char *sqlite3utf16to8(const void *pData, int N, int big_endian);
void *sqlite3utf8to16be(const unsigned char *pIn, int N);
void *sqlite3utf8to16le(const unsigned char *pIn, int N);
void sqlite3utf16to16le(void *pData, int N);
void sqlite3utf16to16be(void *pData, int N);
int sqlite3utf16ByteLen(const void *pData, int nChar);
int sqlite3utf8CharLen(const char *pData, int nByte);
int sqlite3utf8LikeCompare(const unsigned char*, const unsigned char*);
int sqlite3PutVarint(unsigned char *, u64);
int sqlite3GetVarint(const unsigned char *, u64 *);
int sqlite3GetVarint32(const unsigned char *, u32 *);
int sqlite3VarintLen(u64 v);
................................................................................
void sqlite3TableAffinityStr(Vdbe *, Table *);
char sqlite3CompareAffinity(Expr *pExpr, char aff2);
char const *sqlite3AffinityString(char affinity);
int sqlite3IndexAffinityOk(Expr *pExpr, char idx_affinity);
char sqlite3ExprAffinity(Expr *pExpr);
int sqlite3atoi64(const char*, i64*);
void sqlite3Error(sqlite *, int, const char*,...);
int sqlite3utfTranslate(const void *, int , u8 , void **, int *, u8);
u8 sqlite3UtfReadBom(const void *zData, int nData);
void *sqlite3HexToBlob(const char *z);
int sqlite3TwoPartName(Parse *, Token *, Token *, Token **);
const char *sqlite3ErrStr(int);
int sqlite3ReadUniChar(const char *zStr, int *pOffset, u8 *pEnc, int fold);
int sqlite3ReadSchema(sqlite *db, char **);
CollSeq *sqlite3FindCollSeq(sqlite *,u8 enc, const char *,int,int);
CollSeq *sqlite3LocateCollSeq(Parse *pParse, const char *zName, int nName);
................................................................................
CollSeq *sqlite3ExprCollSeq(Parse *pParse, Expr *pExpr);
int sqlite3CheckCollSeq(Parse *, CollSeq *);
int sqlite3CheckIndexCollSeq(Parse *, Index *);
int sqlite3CheckObjectName(Parse *, const char *);

const void *sqlite3ValueText(sqlite3_value*, u8);
int sqlite3ValueBytes(sqlite3_value*, u8);
void sqlite3ValueSetStr(sqlite3_value*, int, const void *,u8);
void sqlite3ValueFree(sqlite3_value*);
sqlite3_value *sqlite3ValueNew();








|







 







>







 







<
<






>
>
>
>
>







 







>







 







<
<
<
<
<







 







<
<







 







|


>
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
...
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
...
419
420
421
422
423
424
425


426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
....
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
....
1376
1377
1378
1379
1380
1381
1382





1383
1384
1385
1386
1387
1388
1389
....
1392
1393
1394
1395
1396
1397
1398


1399
1400
1401
1402
1403
1404
1405
....
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** Internal interface definitions for SQLite.
**
** @(#) $Id: sqliteInt.h,v 1.287 2004/06/18 04:24:54 danielk1977 Exp $
*/
#include "config.h"
#include "sqlite3.h"
#include "hash.h"
#include "parse.h"
#include <stdio.h>
#include <stdlib.h>
................................................................................
# define sqliteMallocRaw(X) sqlite3Malloc_(X,0,__FILE__,__LINE__)
# define sqliteFree(X)      sqlite3Free_(X,__FILE__,__LINE__)
# define sqliteRealloc(X,Y) sqlite3Realloc_(X,Y,__FILE__,__LINE__)
# define sqliteStrDup(X)    sqlite3StrDup_(X,__FILE__,__LINE__)
# define sqliteStrNDup(X,Y) sqlite3StrNDup_(X,Y,__FILE__,__LINE__)
  void sqlite3StrRealloc(char**);
#else
# define sqlite3FreeX sqliteFree
# define sqlite3Realloc_(X,Y) sqliteRealloc(X,Y)
# define sqlite3StrRealloc(X)
#endif

/*
** This variable gets set if malloc() ever fails.  After it gets set,
** the SQLite library shuts down permanently.
................................................................................
#ifndef SQLITE_OMIT_PROGRESS_CALLBACK
  int (*xProgress)(void *);     /* The progress callback */
  void *pProgressArg;           /* Argument to the progress callback */
  int nProgressOps;             /* Number of opcodes for progress callback */
#endif

  int errCode;                  /* Most recent error code (SQLITE_*) */


  u8 enc;                       /* Text encoding for this database. */
  u8 autoCommit;                /* The auto-commit flag. */
  int nMaster;                  /* Length of master journal name. -1=unknown */
  void(*xCollNeeded)(void*,sqlite3*,int eTextRep,const char*);
  void(*xCollNeeded16)(void*,sqlite3*,int eTextRep,const void*);
  void *pCollNeededArg;
  sqlite3_value *pValue;        /* Value used for transient conversions */
  sqlite3_value *pErr;          /* Most recent error message */

  char *zErrMsg;                /* Most recent error message (UTF-8 encoded) */
  char *zErrMsg16;              /* Most recent error message (UTF-8 encoded) */
};

/*
** Possible values for the sqlite.flags and or Db.flags fields.
**
** On sqlite.flags, the SQLITE_InTrans value means that we have
** executed a BEGIN.  On Db.flags, SQLITE_InTrans means a statement
................................................................................
#ifdef SQLITE_DEBUG
  void *sqlite3Malloc_(int,int,char*,int);
  void sqlite3Free_(void*,char*,int);
  void *sqlite3Realloc_(void*,int,char*,int);
  char *sqlite3StrDup_(const char*,char*,int);
  char *sqlite3StrNDup_(const char*, int,char*,int);
  void sqlite3CheckMemory(void*,int);
  void sqlite3FreeX(void *p);
#else
  void *sqliteMalloc(int);
  void *sqliteMallocRaw(int);
  void sqliteFree(void*);
  void *sqliteRealloc(void*,int);
  char *sqliteStrDup(const char*);
  char *sqliteStrNDup(const char*, int);
................................................................................
int sqlite3FixExprList(DbFixer*, ExprList*);
int sqlite3FixTriggerStep(DbFixer*, TriggerStep*);
double sqlite3AtoF(const char *z, const char **);
char *sqlite3_snprintf(int,char*,const char*,...);
int sqlite3GetInt32(const char *, int*);
int sqlite3GetInt64(const char *, i64*);
int sqlite3FitsIn64Bits(const char *);





int sqlite3utf16ByteLen(const void *pData, int nChar);
int sqlite3utf8CharLen(const char *pData, int nByte);
int sqlite3utf8LikeCompare(const unsigned char*, const unsigned char*);
int sqlite3PutVarint(unsigned char *, u64);
int sqlite3GetVarint(const unsigned char *, u64 *);
int sqlite3GetVarint32(const unsigned char *, u32 *);
int sqlite3VarintLen(u64 v);
................................................................................
void sqlite3TableAffinityStr(Vdbe *, Table *);
char sqlite3CompareAffinity(Expr *pExpr, char aff2);
char const *sqlite3AffinityString(char affinity);
int sqlite3IndexAffinityOk(Expr *pExpr, char idx_affinity);
char sqlite3ExprAffinity(Expr *pExpr);
int sqlite3atoi64(const char*, i64*);
void sqlite3Error(sqlite *, int, const char*,...);


void *sqlite3HexToBlob(const char *z);
int sqlite3TwoPartName(Parse *, Token *, Token *, Token **);
const char *sqlite3ErrStr(int);
int sqlite3ReadUniChar(const char *zStr, int *pOffset, u8 *pEnc, int fold);
int sqlite3ReadSchema(sqlite *db, char **);
CollSeq *sqlite3FindCollSeq(sqlite *,u8 enc, const char *,int,int);
CollSeq *sqlite3LocateCollSeq(Parse *pParse, const char *zName, int nName);
................................................................................
CollSeq *sqlite3ExprCollSeq(Parse *pParse, Expr *pExpr);
int sqlite3CheckCollSeq(Parse *, CollSeq *);
int sqlite3CheckIndexCollSeq(Parse *, Index *);
int sqlite3CheckObjectName(Parse *, const char *);

const void *sqlite3ValueText(sqlite3_value*, u8);
int sqlite3ValueBytes(sqlite3_value*, u8);
void sqlite3ValueSetStr(sqlite3_value*, int, const void *,u8, void(*)(void*));
void sqlite3ValueFree(sqlite3_value*);
sqlite3_value *sqlite3ValueNew();
sqlite3_value *sqlite3GetTransientValue(sqlite *db);

Changes to src/test1.c.

9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
...
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
**    May you share freely, never taking more than you give.
**
*************************************************************************
** Code for testing the printf() interface to SQLite.  This code
** is not included in the SQLite library.  It is used for automated
** testing of the SQLite library.
**
** $Id: test1.c,v 1.77 2004/06/15 02:44:19 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "tcl.h"
#include "os.h"
#include <stdlib.h>
#include <string.h>

................................................................................
      Tcl_ListObjAppendElement(i,pX,Tcl_NewStringObj("UTF-16BE",-1));
      break;
    default:
      assert(0);
  }

  pVal = sqlite3ValueNew();
  sqlite3ValueSetStr(pVal, nA, zA, encin);
  Tcl_ListObjAppendElement(i,pX,Tcl_NewStringObj(sqlite3_value_text(pVal),-1));
  sqlite3ValueSetStr(pVal, nB, zB, encin);
  Tcl_ListObjAppendElement(i,pX,Tcl_NewStringObj(sqlite3_value_text(pVal),-1));
  sqlite3ValueFree(pVal);

  Tcl_EvalObjEx(i, pX, 0);
  Tcl_DecrRefCount(pX);
  Tcl_GetIntFromObj(i, Tcl_GetObjResult(i), &res);
  return res;







|







 







|

|







9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
...
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
**    May you share freely, never taking more than you give.
**
*************************************************************************
** Code for testing the printf() interface to SQLite.  This code
** is not included in the SQLite library.  It is used for automated
** testing of the SQLite library.
**
** $Id: test1.c,v 1.78 2004/06/18 04:24:55 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "tcl.h"
#include "os.h"
#include <stdlib.h>
#include <string.h>

................................................................................
      Tcl_ListObjAppendElement(i,pX,Tcl_NewStringObj("UTF-16BE",-1));
      break;
    default:
      assert(0);
  }

  pVal = sqlite3ValueNew();
  sqlite3ValueSetStr(pVal, nA, zA, encin, SQLITE_STATIC);
  Tcl_ListObjAppendElement(i,pX,Tcl_NewStringObj(sqlite3_value_text(pVal),-1));
  sqlite3ValueSetStr(pVal, nB, zB, encin, SQLITE_STATIC);
  Tcl_ListObjAppendElement(i,pX,Tcl_NewStringObj(sqlite3_value_text(pVal),-1));
  sqlite3ValueFree(pVal);

  Tcl_EvalObjEx(i, pX, 0);
  Tcl_DecrRefCount(pX);
  Tcl_GetIntFromObj(i, Tcl_GetObjResult(i), &res);
  return res;

Changes to src/test5.c.

11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
...
277
278
279
280
281
282
283
















284




















































285


















286
287
288
289
290
291
292
293
294
295
296
297
298
299


300
301
302
303
304
305
306
307
*************************************************************************
** Code for testing the utf.c module in SQLite.  This code
** is not included in the SQLite library.  It is used for automated
** testing of the SQLite library. Specifically, the code in this file
** is used for testing the SQLite routines for converting between
** the various supported unicode encodings.
**
** $Id: test5.c,v 1.10 2004/06/12 00:42:35 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "vdbeInt.h"
#include "os.h"         /* to get SQLITE_BIGENDIAN */
#include "tcl.h"
#include <stdlib.h>
#include <string.h>

/*
** Return the number of bytes up to and including the first pair of
** 0x00 bytes in *pStr.
*/
static int utf16_length(const unsigned char *pZ){
  const unsigned char *pC1 = pZ;
  const unsigned char *pC2 = pZ+1;
  while( *pC1 || *pC2 ){
    pC1 += 2;
    pC2 += 2;
  }
  return (pC1-pZ)+2;
}

/*
** tclcmd:   sqlite_utf8to16le  STRING
** title:    Convert STRING from utf-8 to utf-16le
**
** Return the utf-16le encoded string
*/
static int sqlite_utf8to16le(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  unsigned char *out;
  unsigned char *in;
  Tcl_Obj *res;

  if( objc!=2 ){
    Tcl_AppendResult(interp, "wrong # args: should be \"",
        Tcl_GetStringFromObj(objv[0], 0), "<utf-8 encoded-string>", 0);
    return TCL_ERROR;
  }

  in = Tcl_GetString(objv[1]);
  out = (unsigned char *)sqlite3utf8to16le(in, -1);
  res = Tcl_NewByteArrayObj(out, utf16_length(out));
  sqliteFree(out); 

  Tcl_SetObjResult(interp, res);

  return TCL_OK;
}

/*
** tclcmd:   sqlite_utf8to16be  STRING
** title:    Convert STRING from utf-8 to utf-16be
**
** Return the utf-16be encoded string
*/
static int sqlite_utf8to16be(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  unsigned char *out;
  unsigned char *in;
  Tcl_Obj *res;

  if( objc!=2 ){
    Tcl_AppendResult(interp, "wrong # args: should be \"",
        Tcl_GetStringFromObj(objv[0], 0), "<utf-8 encoded-string>", 0);
    return TCL_ERROR;
  }

  in = Tcl_GetByteArrayFromObj(objv[1], 0);
  in = Tcl_GetString(objv[1]);
  out = (unsigned char *)sqlite3utf8to16be(in, -1);
  res = Tcl_NewByteArrayObj(out, utf16_length(out));
  sqliteFree(out);

  Tcl_SetObjResult(interp, res);

  return TCL_OK;
}

/*
** tclcmd:   sqlite_utf16to16le  STRING
** title:    Convert STRING from utf-16 in native byte order to utf-16le
**
** Return the utf-16le encoded string.  If the input string contains
** a byte-order mark, then the byte order mark should override the
** native byte order.
*/
static int sqlite_utf16to16le(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  unsigned char *out;
  unsigned char *in;
  int in_len;
  Tcl_Obj *res;

  if( objc!=2 ){
    Tcl_AppendResult(interp, "wrong # args: should be \"",
        Tcl_GetStringFromObj(objv[0], 0), "<utf-16 encoded-string>", 0);
    return TCL_ERROR;
  }

  in = Tcl_GetByteArrayFromObj(objv[1], &in_len);
  out = (unsigned char *)sqliteMalloc(in_len);
  memcpy(out, in, in_len);
  
  sqlite3utf16to16le(out, -1);
  res = Tcl_NewByteArrayObj(out, utf16_length(out));
  sqliteFree(out);

  Tcl_SetObjResult(interp, res);

  return TCL_OK;
}

/*
** tclcmd:   sqlite_utf16to16be  STRING
** title:    Convert STRING from utf-16 in native byte order to utf-16be
**
** Return the utf-16be encoded string.  If the input string contains
** a byte-order mark, then the byte order mark should override the
** native byte order.
*/
static int sqlite_utf16to16be(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  unsigned char *out;
  unsigned char *in;
  int in_len;
  Tcl_Obj *res;

  if( objc!=2 ){
    Tcl_AppendResult(interp, "wrong # args: should be \"",
        Tcl_GetStringFromObj(objv[0], 0), "<utf-16 encoded-string>", 0);
    return TCL_ERROR;
  }

  in = Tcl_GetByteArrayFromObj(objv[1], &in_len);
  out = (unsigned char *)sqliteMalloc(in_len);
  memcpy(out, in, in_len);
  
  sqlite3utf16to16be(out, -1);
  res = Tcl_NewByteArrayObj(out, utf16_length(out));
  sqliteFree(out);

  Tcl_SetObjResult(interp, res);

  return TCL_OK;
}

/*
** tclcmd:   sqlite_utf16to8  STRING
** title:    Convert STRING from utf-16 in native byte order to utf-8
**
** Return the utf-8 encoded string.  If the input string contains
** a byte-order mark, then the byte order mark should override the
** native byte order.
*/
static int sqlite_utf16to8(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  unsigned char *out;
  unsigned char *in;
  Tcl_Obj *res;

  if( objc!=2 ){
    Tcl_AppendResult(interp, "wrong # args: should be \"",
        Tcl_GetStringFromObj(objv[0], 0), " <utf-16 encoded-string>", 0);
    return TCL_ERROR;
  }

  in = Tcl_GetByteArrayFromObj(objv[1], 0);
  out = sqlite3utf16to8(in, -1, SQLITE_BIGENDIAN);
  res = Tcl_NewByteArrayObj(out, strlen(out)+1);
  sqliteFree(out);

  Tcl_SetObjResult(interp, res);

  return TCL_OK;
}

/*
** The first argument is a TCL UTF-8 string. Return the byte array
** object with the encoded representation of the string, including
** the NULL terminator.
*/
static int binarize(
  void * clientData,
................................................................................
      zVal = sqlite3_value_text(&val);
    }
  }

  return TCL_OK;
}






































































/*


















** Register commands with the TCL interpreter.
*/
int Sqlitetest5_Init(Tcl_Interp *interp){
  static struct {
    char *zName;
    Tcl_ObjCmdProc *xProc;
  } aCmd[] = {
    { "sqlite_utf16to8",         (Tcl_ObjCmdProc*)sqlite_utf16to8    },
    { "sqlite_utf8to16le",       (Tcl_ObjCmdProc*)sqlite_utf8to16le  },
    { "sqlite_utf8to16be",       (Tcl_ObjCmdProc*)sqlite_utf8to16be  },
    { "sqlite_utf16to16le",      (Tcl_ObjCmdProc*)sqlite_utf16to16le },
    { "sqlite_utf16to16be",      (Tcl_ObjCmdProc*)sqlite_utf16to16be },
    { "binarize",                (Tcl_ObjCmdProc*)binarize },
    { "test_value_overhead",     (Tcl_ObjCmdProc*)test_value_overhead },


  };
  int i;
  for(i=0; i<sizeof(aCmd)/sizeof(aCmd[0]); i++){
    Tcl_CreateObjCommand(interp, aCmd[i].zName, aCmd[i].xProc, 0, 0);
  }
  return SQLITE_OK;
}








|








<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







<
<
<
<
<


>
>








11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26





























































































































































































27
28
29
30
31
32
33
..
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189





190
191
192
193
194
195
196
197
198
199
200
201
*************************************************************************
** Code for testing the utf.c module in SQLite.  This code
** is not included in the SQLite library.  It is used for automated
** testing of the SQLite library. Specifically, the code in this file
** is used for testing the SQLite routines for converting between
** the various supported unicode encodings.
**
** $Id: test5.c,v 1.11 2004/06/18 04:24:55 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "vdbeInt.h"
#include "os.h"         /* to get SQLITE_BIGENDIAN */
#include "tcl.h"
#include <stdlib.h>
#include <string.h>






























































































































































































/*
** The first argument is a TCL UTF-8 string. Return the byte array
** object with the encoded representation of the string, including
** the NULL terminator.
*/
static int binarize(
  void * clientData,
................................................................................
      zVal = sqlite3_value_text(&val);
    }
  }

  return TCL_OK;
}

static u8 name_to_enc(Tcl_Interp *interp, Tcl_Obj *pObj){
  struct EncName {
    char *zName;
    u8 enc;
  } encnames[] = {
    { "UTF8", SQLITE_UTF8 },
    { "UTF16LE", SQLITE_UTF16LE },
    { "UTF16BE", SQLITE_UTF16BE },
    { "UTF16", SQLITE_UTF16NATIVE },
    { 0, 0 }
  };
  struct EncName *pEnc;
  char *z = Tcl_GetString(pObj);
  for(pEnc=&encnames[0]; pEnc->zName; pEnc++){
    if( 0==sqlite3StrICmp(z, pEnc->zName) ){
      break;
    }
  }
  if( !pEnc->enc ){
    Tcl_AppendResult(interp, "No such encoding: ", z, 0);
  }
  return pEnc->enc;
}

static int test_translate(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  u8 enc_from;
  u8 enc_to;
  sqlite3_value *pVal;

  const char *z;
  int len;

  if( objc!=4 ){
    Tcl_AppendResult(interp, "wrong # args: should be \"",
        Tcl_GetStringFromObj(objv[0], 0), 
        " <string/blob> <from enc> <to enc>", 0
    );
    return TCL_ERROR;
  }

  enc_from = name_to_enc(interp, objv[2]);
  if( !enc_from ) return TCL_ERROR;
  enc_to = name_to_enc(interp, objv[3]);
  if( !enc_to ) return TCL_ERROR;

  pVal = sqlite3ValueNew();

  if( enc_from==SQLITE_UTF8 ){
    z = Tcl_GetString(objv[1]);
    sqlite3ValueSetStr(pVal, -1, z, enc_from, SQLITE_STATIC);
  }else{
    z = Tcl_GetByteArrayFromObj(objv[1], &len);
    sqlite3ValueSetStr(pVal, -1, z, enc_from, SQLITE_STATIC);
  }

  z = sqlite3ValueText(pVal, enc_to);
  len = sqlite3ValueBytes(pVal, enc_to) + (enc_to==SQLITE_UTF8?1:2);
  Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(z, len));

  sqlite3ValueFree(pVal);

  return TCL_OK;
}

/*
** Usage: translate_selftest
**
** Call sqlite3utfSelfTest() to run the internal tests for unicode
** translation. If there is a problem an assert() will fail.
**/
void sqlite3utfSelfTest();
static int test_translate_selftest(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  sqlite3utfSelfTest();
  return SQLITE_OK;
}


/*
** Register commands with the TCL interpreter.
*/
int Sqlitetest5_Init(Tcl_Interp *interp){
  static struct {
    char *zName;
    Tcl_ObjCmdProc *xProc;
  } aCmd[] = {





    { "binarize",                (Tcl_ObjCmdProc*)binarize },
    { "test_value_overhead",     (Tcl_ObjCmdProc*)test_value_overhead },
    { "test_translate",          (Tcl_ObjCmdProc*)test_translate     },
    { "translate_selftest",      (Tcl_ObjCmdProc*)test_translate_selftest},
  };
  int i;
  for(i=0; i<sizeof(aCmd)/sizeof(aCmd[0]); i++){
    Tcl_CreateObjCommand(interp, aCmd[i].zName, aCmd[i].xProc, 0, 0);
  }
  return SQLITE_OK;
}

Changes to src/tokenize.c.

11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
...
697
698
699
700
701
702
703


704
705




706
707
708


709
710

*************************************************************************
** An tokenizer for SQL
**
** This file contains C code that splits an SQL input string up into
** individual tokens and sends those tokens one-by-one over to the
** parser for analysis.
**
** $Id: tokenize.c,v 1.76 2004/05/31 23:56:43 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "os.h"
#include <ctype.h>
#include <stdlib.h>

/*
................................................................................

/*
** This routine is the same as the sqlite3_complete() routine described
** above, except that the parameter is required to be UTF-16 encoded, not
** UTF-8.
*/
int sqlite3_complete16(const void *zSql){


  int rc;
  char *zSql8 = sqlite3utf16to8(zSql, -1, SQLITE_BIGENDIAN);




  if( !zSql8 ) return 0;
  rc = sqlite3_complete(zSql8);
  sqliteFree(zSql8);


  return rc;
}








|







 







>
>
|
<
>
>
>
>
|
|
|
>
>


>
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
...
697
698
699
700
701
702
703
704
705
706

707
708
709
710
711
712
713
714
715
716
717
718
*************************************************************************
** An tokenizer for SQL
**
** This file contains C code that splits an SQL input string up into
** individual tokens and sends those tokens one-by-one over to the
** parser for analysis.
**
** $Id: tokenize.c,v 1.77 2004/06/18 04:24:55 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "os.h"
#include <ctype.h>
#include <stdlib.h>

/*
................................................................................

/*
** This routine is the same as the sqlite3_complete() routine described
** above, except that the parameter is required to be UTF-16 encoded, not
** UTF-8.
*/
int sqlite3_complete16(const void *zSql){
  sqlite3_value *pVal;
  char *zSql8;
  int rc = 0;


  pVal = sqlite3ValueNew();
  sqlite3ValueSetStr(pVal, -1, zSql, SQLITE_UTF16NATIVE, SQLITE_STATIC);
  zSql8 = sqlite3ValueText(pVal, SQLITE_UTF8);
  if( zSql8 ){
    rc = sqlite3_complete(zSql8);
    sqliteFree(zSql8);
  }
  sqlite3ValueFree(pVal);
  return rc;
}

Changes to src/utf.c.

8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
..
44
45
46
47
48
49
50









51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
..
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116










117




118
119
120

121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146


147
148
149
150
151
152


153
154
155
156
157
158
159












160
161
162
163
164











165
166
167
168
169
170


171
172
173
174
175















176
177
178
179
180
181
182
183



184






185
186
187
188
189
190










191
192
193
194
195
196
197
198
199
200








201
202
203
204
205
206
207








208
209
210
211
212



213
214
215
216
217

218
219
220
221
222









223
224



225
226



227
228

229
230
231



232
233

234
235
236
237
238
239
240
241
242
243



244
245
246
247










248
249
250
251






252
253
254



255
256
257
258
259
260











261
262

263
264






265







266
267
268
269
270
271
272
273
274
275
276
277
278
279

280
281


282




283







284
285


286
287
288
289
290
291
292
293
294






















295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356


357
358
359



360
361
362
363
364
365
366





367


368
369
370
371
372
373
374
375
376
377





378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400







401
402
403
404

405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436


437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496

497
498
499
500
501
502
503
504
505
506
507



508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701



702
703
704
705
706
707
708
709
710

711
712
713
714
715
716
717
718
719
720
721
...
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766















































**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains routines used to translate between UTF-8, 
** UTF-16, UTF-16BE, and UTF-16LE.
**
** $Id: utf.c,v 1.20 2004/06/17 05:36:44 danielk1977 Exp $
**
** Notes on UTF-8:
**
**   Byte-0    Byte-1    Byte-2    Byte-3    Value
**  0xxxxxxx                                 00000000 00000000 0xxxxxxx
**  110yyyyy  10xxxxxx                       00000000 00000yyy yyxxxxxx
**  1110zzzz  10yyyyyy  10xxxxxx             00000000 zzzzyyyy yyxxxxxx
................................................................................
** When converting malformed UTF-8 strings to UTF-16, one instance of the
** replacement character U+FFFD for each byte that cannot be interpeted as
** part of a valid unicode character.
**
** When converting malformed UTF-16 strings to UTF-8, one instance of the
** replacement character U+FFFD for each pair of bytes that cannot be
** interpeted as part of a valid unicode character.









*/
#include <assert.h>
#include "sqliteInt.h"

typedef struct UtfString UtfString;
struct UtfString {
  unsigned char *pZ;    /* Raw string data */
  int n;                /* Allocated length of pZ in bytes */
  int c;                /* Number of pZ bytes already read or written */
};

/*
** These two macros are used to interpret the first two bytes of the 
** unsigned char array pZ as a 16-bit unsigned int. BE16() for a big-endian
** interpretation, LE16() for little-endian.
*/
#define BE16(pZ) (((u16)((pZ)[0])<<8) + (u16)((pZ)[1]))
#define LE16(pZ) (((u16)((pZ)[1])<<8) + (u16)((pZ)[0]))

/*
** READ_16 interprets the first two bytes of the unsigned char array pZ 
** as a 16-bit unsigned int. If big_endian is non-zero the intepretation
** is big-endian, otherwise little-endian.
*/
#define READ_16(pZ,big_endian) (big_endian?BE16(pZ):LE16(pZ))

/*
** The following macro, LOWERCASE(x), takes an integer representing a
** unicode code point. The value returned is the same code point folded to
** lower case, if applicable. SQLite currently understands the upper/lower
** case relationship between the 26 characters used in the English
** language only.
................................................................................
     36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
     54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 97, 98, 99,100,101,102,103,
    104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,
    122,
};

/*
** The first parameter, zStr, points at a unicode string. This routine
** reads a single character from the string and returns the codepoint value
** of the character read.
**
** The value of *pEnc is the string encoding. If *pEnc is SQLITE_UTF16LE or
** SQLITE_UTF16BE, and the first character read is a byte-order-mark, then
** the value of *pEnc is modified if necessary. In this case the next
** character is read and it's code-point value returned.
**
** The value of *pOffset is the byte-offset in zStr from which to begin
** reading. It is incremented by the number of bytes read by this function.
**
** If the fourth parameter, fold, is non-zero, then codepoint values are
** folded to lower-case before being returned. See comments for macro
** LOWERCASE(x) for details.
*/
int sqlite3ReadUniChar(const char *zStr, int *pOffset, u8 *pEnc, int fold){
  int ret = 0;















  switch( *pEnc ){
    case SQLITE_UTF8: {


#if 0
  static const int initVal[] = {
      0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
     15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
     30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
     45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
     60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
     75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
     90,  91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104,
    105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
    120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
    135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
    150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
    165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
    180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,   0,   1,   2,
      3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,
     18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,   0,
      1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,
      0,   1,   2,   3,   4,   5,   6,   7,   0,   1,   2,   3,   0,   1, 254,
    255,
  };
  ret = initVal[(unsigned char)zStr[(*pOffset)++]];
  while( (0xc0&zStr[*pOffset])==0x80 ){
    ret = (ret<<6) | (0x3f&(zStr[(*pOffset)++]));
  }
#endif



      struct Utf8TblRow {
        u8 b1_mask;
        u8 b1_masked_val;
        u8 b1_value_mask;
        int trailing_bytes;


      };
      static const struct Utf8TblRow utf8tbl[] = {
        { 0x80, 0x00, 0x7F, 0 },
        { 0xE0, 0xC0, 0x1F, 1 },
        { 0xF0, 0xE0, 0x0F, 2 },
        { 0xF8, 0xF0, 0x0E, 3 },
        { 0, 0, 0, 0}












      };
    
      u8 b1;   /* First byte of the potentially multi-byte utf-8 character */
      int ii;
      struct Utf8TblRow const *pRow;











    
      pRow = &(utf8tbl[0]);
    
      b1 = zStr[(*pOffset)++];
      while( pRow->b1_mask && (b1&pRow->b1_mask)!=pRow->b1_masked_val ){
        pRow++;


      }
      if( !pRow->b1_mask ){
        return (int)0xFFFD;
      }
      















      ret = (u32)(b1&pRow->b1_value_mask);
      for( ii=0; ii<pRow->trailing_bytes; ii++ ){
        u8 b = zStr[(*pOffset)++];
        if( (b&0xC0)!=0x80 ){
          return (int)0xFFFD;
        }
        ret = (ret<<6) + (u32)(b&0x3F);
      }



      break;






    }

    case SQLITE_UTF16LE:
    case SQLITE_UTF16BE: {
      u32 code_point;   /* the first code-point in the character */
      u32 code_point2;  /* the second code-point in the character, if any */










    
      code_point = READ_16(&zStr[*pOffset], (*pEnc==SQLITE_UTF16BE));
      *pOffset += 2;
    
      /* If this is a non-surrogate code-point, just cast it to an int and
      ** this is the code-point value.
      */
      if( code_point<0xD800 || code_point>0xE000 ){
        ret = code_point;
        break;








      }

      /* If this is a trailing surrogate code-point, then the string is
      ** malformed; return the replacement character.
      */
      if( code_point>0xDBFF ){
        return (int)0xFFFD;








      }
    
      /* The code-point just read is a leading surrogate code-point. If their
      ** is not enough data left or the next code-point is not a trailing
      ** surrogate, return the replacement character.



      */
      code_point2 = READ_16(&zStr[*pOffset], (*pEnc==SQLITE_UTF16BE));
      *pOffset += 2;
      if( code_point2<0xDC00 || code_point>0xDFFF ){
        return (int)0xFFFD;

      }
   
      ret = ( 
          (((code_point&0x03C0)+0x0040)<<16) +   /* uuuuu */
          ((code_point&0x003F)<<10) +            /* xxxxxx */









          (code_point2&0x03FF)                   /* yy yyyyyyyy */
      );



    }
    default:



      assert(0);
  }


  if( fold ){
    return LOWERCASE(ret);



  }
  return ret;

}

/*
** Read the BOM from the start of *pStr, if one is present. Return zero
** for little-endian, non-zero for big-endian. If no BOM is present, return
** the value of the parameter "big_endian".
**
** Return values:
**     1 -> big-endian string
**     0 -> little-endian string



*/
static int readUtf16Bom(UtfString *pStr, int big_endian){
  /* The BOM must be the first thing read from the string */
  assert( pStr->c==0 );











  /* If the string data consists of 1 byte or less, the BOM will make no
  ** difference anyway. In this case just fall through to the default case
  ** and return the native byte-order for this machine.






  **
  ** Otherwise, check the first 2 bytes of the string to see if a BOM is
  ** present.



  */
  if( pStr->n>1 ){
    u8 bom = sqlite3UtfReadBom(pStr->pZ, 2);
    if( bom ){
      pStr->c += 2;
      return (bom==SQLITE_UTF16LE)?0:1;











    }
  }


  return big_endian;






}








/*
** zData is a UTF-16 encoded string, nData bytes in length. This routine
** checks if there is a byte-order mark at the start of zData. If no
** byte order mark is found 0 is returned. Otherwise SQLITE_UTF16BE or
** SQLITE_UTF16LE is returned, depending on whether The BOM indicates that
** the text is big-endian or little-endian.
*/
u8 sqlite3UtfReadBom(const void *zData, int nData){
  if( nData<0 || nData>1 ){
    u8 b1 = *(u8 *)zData;
    u8 b2 = *(((u8 *)zData) + 1);
    if( b1==0xFE && b2==0xFF ){
      return SQLITE_UTF16BE;

    }
    if( b1==0xFF && b2==0xFE ){


      return SQLITE_UTF16LE;




    }







  }
  return 0;


}


/*
** Read a single unicode character from the UTF-8 encoded string *pStr. The
** value returned is a unicode scalar value. In the case of malformed
** strings, the unicode replacement character U+FFFD may be returned.
*/
static u32 readUtf8(UtfString *pStr){






















  u8 enc = SQLITE_UTF8;
  return sqlite3ReadUniChar(pStr->pZ, &pStr->c, &enc, 0);
}

/*
** Write the unicode character 'code' to the string pStr using UTF-8
** encoding. SQLITE_NOMEM may be returned if sqlite3Malloc() fails.
*/
static int writeUtf8(UtfString *pStr, u32 code){
  struct Utf8WriteTblRow {
    u32 max_code;
    int trailing_bytes;
    u8 b1_and_mask;
    u8 b1_or_mask;
  };
  static const struct Utf8WriteTblRow utf8tbl[] = {
    {0x0000007F, 0, 0x7F, 0x00},
    {0x000007FF, 1, 0xDF, 0xC0},
    {0x0000FFFF, 2, 0xEF, 0xE0},
    {0x0010FFFF, 3, 0xF7, 0xF0},
    {0x00000000, 0, 0x00, 0x00}
  };
  const struct Utf8WriteTblRow *pRow = &utf8tbl[0];

  while( code>pRow->max_code ){
    assert( pRow->max_code );
    pRow++;
  }

  /* Ensure there is enough room left in the output buffer to write
  ** this UTF-8 character. 
  */
  assert( (pStr->n-pStr->c)>=(pRow->trailing_bytes+1) );

  /* Write the UTF-8 encoded character to pStr. All cases below are
  ** intentionally fall-through.
  */
  switch( pRow->trailing_bytes ){
    case 3:
      pStr->pZ[pStr->c+3] = (((u8)code)&0x3F)|0x80;
      code = code>>6;
    case 2:
      pStr->pZ[pStr->c+2] = (((u8)code)&0x3F)|0x80;
      code = code>>6;
    case 1:
      pStr->pZ[pStr->c+1] = (((u8)code)&0x3F)|0x80;
      code = code>>6;
    case 0:
      pStr->pZ[pStr->c] = (((u8)code)&(pRow->b1_and_mask))|(pRow->b1_or_mask);
  }
  pStr->c += (pRow->trailing_bytes + 1);

  return 0;
}

/*
** Read a single unicode character from the UTF-16 encoded string *pStr. The
** value returned is a unicode scalar value. In the case of malformed
** strings, the unicode replacement character U+FFFD may be returned.
**
** If big_endian is true, the string is assumed to be UTF-16BE encoded.
** Otherwise, it is UTF-16LE encoded.


*/
static u32 readUtf16(UtfString *pStr, int big_endian){
  u32 code_point;   /* the first code-point in the character */




  /* If there is only one byte of data left in the string, return the 
  ** replacement character.
  */
  if( (pStr->n-pStr->c)==1 ){
    pStr->c++;
    return (int)0xFFFD;





  }



  code_point = READ_16(&(pStr->pZ[pStr->c]), big_endian);
  pStr->c += 2;

  /* If this is a non-surrogate code-point, just cast it to an int and
  ** return the code-point value.
  */
  if( code_point<0xD800 || code_point>0xE000 ){
    return code_point;
  }






  /* If this is a trailing surrogate code-point, then the string is
  ** malformed; return the replacement character.
  */
  if( code_point>0xDBFF ){
    return 0xFFFD;
  }

  /* The code-point just read is a leading surrogate code-point. If their
  ** is not enough data left or the next code-point is not a trailing
  ** surrogate, return the replacement character.
  */
  if( (pStr->n-pStr->c)>1 ){
    u32 code_point2 = READ_16(&pStr->pZ[pStr->c], big_endian);
    if( code_point2<0xDC00 || code_point>0xDFFF ){
      return 0xFFFD;
    }
    pStr->c += 2;

    return ( 
        (((code_point&0x03C0)+0x0040)<<16) +   /* uuuuu */
        ((code_point&0x003F)<<10) +            /* xxxxxx */
        (code_point2&0x03FF)                   /* yy yyyyyyyy */







    );

  }else{
    return (int)0xFFFD;

  }
  
  /* not reached */
}

static int writeUtf16(UtfString *pStr, int code, int big_endian){
  int bytes;
  unsigned char *hi_byte;
  unsigned char *lo_byte;

  bytes = (code>0x0000FFFF?4:2);

  /* Ensure there is enough room left in the output buffer to write
  ** this UTF-8 character.
  */
  assert( (pStr->n-pStr->c)>=bytes );
  
  /* Initialise hi_byte and lo_byte to point at the locations into which
  ** the MSB and LSB of the (first) 16-bit unicode code-point written for
  ** this character.
  */
  hi_byte = (big_endian?&pStr->pZ[pStr->c]:&pStr->pZ[pStr->c+1]);
  lo_byte = (big_endian?&pStr->pZ[pStr->c+1]:&pStr->pZ[pStr->c]);

  if( bytes==2 ){
    *hi_byte = (u8)((code&0x0000FF00)>>8);
    *lo_byte = (u8)(code&0x000000FF);
  }else{
    u32 wrd;
    wrd = ((((code&0x001F0000)-0x00010000)+(code&0x0000FC00))>>10)|0x0000D800;
    *hi_byte = (u8)((wrd&0x0000FF00)>>8);
    *lo_byte = (u8)(wrd&0x000000FF);



    wrd = (code&0x000003FF)|0x0000DC00;
    *(hi_byte+2) = (u8)((wrd&0x0000FF00)>>8);
    *(lo_byte+2) = (u8)(wrd&0x000000FF);
  }

  pStr->c += bytes;
  
  return 0;
}

/*
** pZ is a UTF-8 encoded unicode string. If nByte is less than zero,
** return the number of unicode characters in pZ up to (but not including)
** the first 0x00 byte. If nByte is not less than zero, return the
** number of unicode characters in the first nByte of pZ (or up to 
** the first 0x00, whichever comes first).
*/
int sqlite3utf8CharLen(const char *pZ, int nByte){
  UtfString str;
  int ret = 0;
  u32 code = 1;

  str.pZ = (char *)pZ;
  str.n = nByte;
  str.c = 0;

  while( (nByte<0 || str.c<str.n) && code!=0 ){
    code = readUtf8(&str);
    ret++;
  }
  if( code==0 ) ret--;

  return ret;
}

/*
** pZ is a UTF-16 encoded unicode string. If nChar is less than zero,
** return the number of bytes up to (but not including), the first pair
** of consecutive 0x00 bytes in pZ. If nChar is not less than zero,
** then return the number of bytes in the first nChar unicode characters
** in pZ (or up until the first pair of 0x00 bytes, whichever comes first).
*/
int sqlite3utf16ByteLen(const void *pZ, int nChar){
  if( nChar<0 ){
    const unsigned char *pC1 = (unsigned char *)pZ;
    const unsigned char *pC2 = (unsigned char *)pZ+1;
    while( *pC1 || *pC2 ){
      pC1 += 2;
      pC2 += 2;
    }
    return pC1-(unsigned char *)pZ;
  }else{
    UtfString str;
    u32 code = 1;
    int big_endian;
    int nRead = 0;
    int ret;

    str.pZ = (char *)pZ;

    str.c = 0;
    str.n = -1;

    /* Check for a BOM. We just ignore it if there is one, it's only read
    ** so that it is not counted as a character. 
    */
    big_endian = readUtf16Bom(&str, 0);
    ret = 0-str.c;

    while( code!=0 && nRead<nChar ){
      code = readUtf16(&str, big_endian);



      nRead++;
    }
    if( code==0 ){
      ret -= 2;
    }
    return str.c + ret;
  }
}

/*
** Convert a string in UTF-16 native byte (or with a Byte-order-mark or
** "BOM") into a UTF-8 string.  The UTF-8 string is written into space 
** obtained from sqlite3Malloc() and must be released by the calling function.
**
** The parameter N is the number of bytes in the UTF-16 string.  If N is
** negative, the entire string up to the first \u0000 character is translated.
**
** The returned UTF-8 string is always \000 terminated.
*/
unsigned char *sqlite3utf16to8(const void *pData, int N, int big_endian){
  UtfString in;
  UtfString out;

  out.pZ = 0;

  in.pZ = (unsigned char *)pData;
  in.n = N;
  in.c = 0;

  if( in.n<0 ){
    in.n = sqlite3utf16ByteLen(in.pZ, -1);
  }

  /* A UTF-8 encoding of a unicode string can require at most 1.5 times as
  ** much space to store as the same string encoded using UTF-16. Allocate
  ** this now.
  */
  out.n = (in.n*1.5) + 1;
  out.pZ = sqliteMalloc(out.n);
  if( !out.pZ ){
    return 0;
  }
  out.c = 0;

  big_endian = readUtf16Bom(&in, big_endian);
  while( in.c<in.n ){
    writeUtf8(&out, readUtf16(&in, big_endian));
  }

  /* Add the NULL-terminator character */
  assert( out.c<out.n );
  out.pZ[out.c] = 0x00;

  return out.pZ;
}

static void *utf8toUtf16(const unsigned char *pIn, int N, int big_endian){
  UtfString in;
  UtfString out;

  in.pZ = (unsigned char *)pIn;
  in.n = N;
  in.c = 0;

  if( in.n<0 ){
    in.n = strlen(in.pZ);
  }

  /* A UTF-16 encoding of a unicode string can require at most twice as
  ** much space to store as the same string encoded using UTF-8. Allocate
  ** this now.
  */
  out.n = (in.n*2) + 2;
  out.pZ = sqliteMalloc(out.n);
  if( !out.pZ ){
    return 0;
  }
  out.c = 0;

  while( in.c<in.n ){
    writeUtf16(&out, readUtf8(&in), big_endian);
  }

  /* Add the NULL-terminator character */
  assert( (out.c+1)<out.n );
  out.pZ[out.c] = 0x00;
  out.pZ[out.c+1] = 0x00;

  return out.pZ;
}

/*
** Translate UTF-8 to UTF-16BE or UTF-16LE
*/
void *sqlite3utf8to16be(const unsigned char *pIn, int N){
  return utf8toUtf16(pIn, N, 1);
}

void *sqlite3utf8to16le(const unsigned char *pIn, int N){
  return utf8toUtf16(pIn, N, 0);
}

/* 
** This routine does the work for sqlite3utf16to16le() and
** sqlite3utf16to16be(). If big_endian is 1 the input string is
** transformed in place to UTF-16BE encoding. If big_endian is 0 then
** the input is transformed to UTF-16LE.
**
** Unless the first two bytes of the input string is a BOM, the input is
** assumed to be UTF-16 encoded using the machines native byte ordering.
*/
static void utf16to16(void *pData, int N, int big_endian){
  UtfString inout;
  inout.pZ = (unsigned char *)pData;
  inout.c = 0;
  inout.n = N;

  if( inout.n<0 ){
    inout.n = sqlite3utf16ByteLen(inout.pZ, -1);
  }

  if( readUtf16Bom(&inout, SQLITE_BIGENDIAN)!=big_endian ){
    /* swab(&inout.pZ[inout.c], inout.pZ, inout.n-inout.c); */
    int i;
    for(i=0; i<(inout.n-inout.c); i += 2){
      char c1 = inout.pZ[i+inout.c];
      char c2 = inout.pZ[i+inout.c+1];
      inout.pZ[i] = c2;
      inout.pZ[i+1] = c1;
    }
  }else if( inout.c ){
    memmove(inout.pZ, &inout.pZ[inout.c], inout.n-inout.c);
  }

  inout.pZ[inout.n-inout.c] = 0x00;
  inout.pZ[inout.n-inout.c+1] = 0x00;
}

/*
** Convert a string in UTF-16 native byte or with a BOM into a UTF-16LE
** string.  The conversion occurs in-place.  The output overwrites the
** input.  N bytes are converted.  If N is negative everything is converted
** up to the first \u0000 character.
**
** If the native byte order is little-endian and there is no BOM, then
** this routine is a no-op.  If there is a BOM at the start of the string,
** it is removed.
**
** Translation from UTF-16LE to UTF-16BE and back again is accomplished
** using the library function swab().
*/
void sqlite3utf16to16le(void *pData, int N){
  utf16to16(pData, N, 0);
}

/*
** Convert a string in UTF-16 native byte or with a BOM into a UTF-16BE
** string.  The conversion occurs in-place.  The output overwrites the
** input.  N bytes are converted.  If N is negative everything is converted
** up to the first \u0000 character.
**
** If the native byte order is little-endian and there is no BOM, then
** this routine is a no-op.  If there is a BOM at the start of the string,
** it is removed.
**
** Translation from UTF-16LE to UTF-16BE and back again is accomplished
** using the library function swab().
*/
void sqlite3utf16to16be(void *pData, int N){
  utf16to16(pData, N, 1);
}

/*
** This function is used to translate between UTF-8 and UTF-16. The
** result is returned in dynamically allocated memory.
*/
int sqlite3utfTranslate(
  const void *zData, int nData,  /* Input string */
  u8 enc1,                       /* Encoding of zData */
  void **zOut, int *nOut,        /* Output string */
  u8 enc2                        /* Desired encoding of output */
){
  assert( enc1==SQLITE_UTF8 || enc1==SQLITE_UTF16LE || enc1==SQLITE_UTF16BE );
  assert( enc2==SQLITE_UTF8 || enc2==SQLITE_UTF16LE || enc2==SQLITE_UTF16BE );
  assert( 
    (enc1==SQLITE_UTF8 && (enc2==SQLITE_UTF16LE || enc2==SQLITE_UTF16BE)) ||
    (enc2==SQLITE_UTF8 && (enc1==SQLITE_UTF16LE || enc1==SQLITE_UTF16BE))
  );

  if( enc1==SQLITE_UTF8 ){
    if( enc2==SQLITE_UTF16LE ){
      *zOut = sqlite3utf8to16le(zData, nData);
    }else{
      *zOut = sqlite3utf8to16be(zData, nData);



    }
    if( !(*zOut) ) return SQLITE_NOMEM;
    *nOut = sqlite3utf16ByteLen(*zOut, -1);
  }else{
    *zOut = sqlite3utf16to8(zData, nData, enc1==SQLITE_UTF16BE);
    if( !(*zOut) ) return SQLITE_NOMEM;
    *nOut = strlen(*zOut);
  }
  return SQLITE_OK;

}

#define sqliteNextChar(X)  while( (0xc0&*++(X))==0x80 ){}

/*
** Compare two UTF-8 strings for equality using the "LIKE" operator of
** SQL.  The '%' character matches any sequence of 0 or more
** characters and '_' matches any single character.  Case is
** not significant.
*/
int sqlite3utf8LikeCompare(
................................................................................

  while( (c = LOWERCASE(*zPattern))!=0 ){
    switch( c ){
      case '%': {
        while( (c=zPattern[1]) == '%' || c == '_' ){
          if( c=='_' ){
            if( *zString==0 ) return 0;
            sqliteNextChar(zString);
          }
          zPattern++;
        }
        if( c==0 ) return 1;
        c = LOWERCASE(c);
        while( (c2=LOWERCASE(*zString))!=0 ){
          while( c2 != 0 && c2 != c ){ 
            zString++;
            c2 = LOWERCASE(*zString); 
          }
          if( c2==0 ) return 0;
          if( sqlite3utf8LikeCompare(&zPattern[1],zString) ) return 1;
          sqliteNextChar(zString);
        }
        return 0;
      }
      case '_': {
        if( *zString==0 ) return 0;
        sqliteNextChar(zString);
        zPattern++;
        break;
      }
      default: {
        if( c != LOWERCASE(*zString) ) return 0;
        zPattern++;
        zString++;
        break;
      }
    }
  }
  return *zString==0;
}






















































|







 







>
>
>
>
>
>
>
>
>



<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<







 







|
|
|
<
<
<
<
<
<
<
<
<
<
<
<

<
<
>
>
>
>
>
>
>
>
>
>

>
>
>
>
|
<

>
|
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
>
>

<
<
<
<
<
>
>
|
<
<
<
<
<
<
>
>
>
>
>
>
>
>
>
>
>
>
|
|
<
<
<
>
>
>
>
>
>
>
>
>
>
>
|
<
|
<
<
<
>
>
|
<
<
|
<
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
|
<
<
|
<
|
>
>
>
|
>
>
>
>
>
>
|

<
<
<
<
>
>
>
>
>
>
>
>
>
>
|
<
<
|
<
<
<
<
<
<
>
>
>
>
>
>
>
>
|

<
<
<
<
<
>
>
>
>
>
>
>
>
|
|
<
<
<
>
>
>
|
<
<
<
<
>
|
<
<
<
<
>
>
>
>
>
>
>
>
>
|
<
>
>
>
|
<
>
>
>
|
|
>
|
<
<
>
>
>

<
>
|
<
<
<
<
<
<
<
<
<
>
>
>
|
<
<
<
>
>
>
>
>
>
>
>
>
>
|
<
<
<
>
>
>
>
>
>

<
<
>
>
>

<
<
<
<
<
>
>
>
>
>
>
>
>
>
>
>


>

<
>
>
>
>
>
>
|
>
>
>
>
>
>
>
|
<
<
<
<
<
<
<
<
<
<
<
<
|
>

<
>
>
|
>
>
>
>
|
>
>
>
>
>
>
>
|
<
>
>
|
|

<
<
<
<
<
<
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
<



|
|
|
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<

<
<
>
>

<
<
>
>
>

<
<
<
<
<
<
>
>
>
>
>
|
>
>
|
<
<
|
<
<
<
<
<
|
>
>
>
>
>
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
>
>
>
>
>
>
>
|
<
|
<
>
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
>
>
|
<
<
<

<
<
<
|









|
|
|
|
|
|
|
|
|
|
|
|
|
|
<
<









|
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
>
|
<
<
<
<
<
<
<
<
<
<
>
>
>
|

<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
>
>
>

<
<
<
<
<
<

<
>


<
<







 







|












|





|













>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
..
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62









63












64
65
66
67
68
69
70
..
80
81
82
83
84
85
86
87
88
89












90


91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106

107
108
109
110






















111

112
113
114





115
116
117






118
119
120
121
122
123
124
125
126
127
128
129
130
131



132
133
134
135
136
137
138
139
140
141
142
143

144



145
146
147


148

149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166


167

168
169
170
171
172
173
174
175
176
177
178
179
180




181
182
183
184
185
186
187
188
189
190
191


192






193
194
195
196
197
198
199
200
201
202





203
204
205
206
207
208
209
210
211
212



213
214
215
216




217
218




219
220
221
222
223
224
225
226
227
228

229
230
231
232

233
234
235
236
237
238
239


240
241
242
243

244
245









246
247
248
249



250
251
252
253
254
255
256
257
258
259
260



261
262
263
264
265
266
267


268
269
270
271





272
273
274
275
276
277
278
279
280
281
282
283
284
285
286

287
288
289
290
291
292
293
294
295
296
297
298
299
300
301












302
303
304

305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320

321
322
323
324
325






326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348

349
350
351
352
353
354
355


















































356


357
358
359


360
361
362
363






364
365
366
367
368
369
370
371
372


373





374
375
376
377
378
379
380






















381
382
383
384
385
386
387
388

389

390
391


























392




393
394
395



396



397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420


421
422
423
424
425
426
427
428
429
430













431


432
433










434
435
436
437
438






























































































































































































439

440
441
442
443






444

445
446
447


448
449
450
451
452
453
454
...
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains routines used to translate between UTF-8, 
** UTF-16, UTF-16BE, and UTF-16LE.
**
** $Id: utf.c,v 1.21 2004/06/18 04:24:55 danielk1977 Exp $
**
** Notes on UTF-8:
**
**   Byte-0    Byte-1    Byte-2    Byte-3    Value
**  0xxxxxxx                                 00000000 00000000 0xxxxxxx
**  110yyyyy  10xxxxxx                       00000000 00000yyy yyxxxxxx
**  1110zzzz  10yyyyyy  10xxxxxx             00000000 zzzzyyyy yyxxxxxx
................................................................................
** When converting malformed UTF-8 strings to UTF-16, one instance of the
** replacement character U+FFFD for each byte that cannot be interpeted as
** part of a valid unicode character.
**
** When converting malformed UTF-16 strings to UTF-8, one instance of the
** replacement character U+FFFD for each pair of bytes that cannot be
** interpeted as part of a valid unicode character.
**
** This file contains the following public routines:
**
** sqlite3VdbeMemTranslate() - Translate the encoding used by a Mem* string.
** sqlite3VdbeMemHandleBom() - Handle byte-order-marks in UTF16 Mem* strings.
** sqlite3utf16ByteLen()     - Calculate byte-length of a void* UTF16 string.
** sqlite3utf8CharLen()      - Calculate char-length of a char* UTF8 string.
** sqlite3utf8LikeCompare()  - Do a LIKE match given two UTF8 char* strings.
**
*/
#include <assert.h>
#include "sqliteInt.h"









#include "vdbeInt.h"













/*
** The following macro, LOWERCASE(x), takes an integer representing a
** unicode code point. The value returned is the same code point folded to
** lower case, if applicable. SQLite currently understands the upper/lower
** case relationship between the 26 characters used in the English
** language only.
................................................................................
     36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
     54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 97, 98, 99,100,101,102,103,
    104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,
    122,
};

/*
** This table maps from the first byte of a UTF-8 character to the number
** of trailing bytes expected. A value '255' indicates that the table key
** is not a legal first byte for a UTF-8 character.












*/


static const u8 xtra_utf8_bytes[256]  = {
/* 0xxxxxxx */
0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,

/* 10wwwwww */
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,


/* 110yyyyy */
1, 1, 1, 1, 1, 1, 1, 1,     1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,     1, 1, 1, 1, 1, 1, 1, 1,
























/* 1110zzzz */
2, 2, 2, 2, 2, 2, 2, 2,     2, 2, 2, 2, 2, 2, 2, 2,






/* 11110yyy */
3, 3, 3, 3, 3, 3, 3, 3,     255, 255, 255, 255, 255, 255, 255, 255,
};







/*
** This table maps from the number of trailing bytes in a UTF-8 character
** to an integer constant that is effectively calculated for each character
** read by a naive implementation of a UTF-8 character reader. The code
** in the READ_UTF8 macro explains things best.
*/
static const int xtra_utf8_bits[4] =  {
0,
12416,          /* (0xC0 << 6) + (0x80) */
925824,         /* (0xE0 << 12) + (0x80 << 6) + (0x80) */
63447168        /* (0xF0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
};




#define READ_UTF8(zIn, c) { \
  int xtra;                                            \
  c = *(zIn)++;                                        \
  xtra = xtra_utf8_bytes[c];                           \
  switch( xtra ){                                      \
    case 255: c = (int)0xFFFD; break;                  \
    case 3: c = (c<<6) + *(zIn)++;                     \
    case 2: c = (c<<6) + *(zIn)++;                     \
    case 1: c = (c<<6) + *(zIn)++;                     \
    c -= xtra_utf8_bits[xtra];                         \
  }                                                    \
}





#define SKIP_UTF8(zIn) {                               \
  zIn += (xtra_utf8_bytes[*(u8 *)zIn] + 1);            \
}




#define WRITE_UTF8(zOut, c) {                          \
  if( c<0x00080 ){                                     \
    *zOut++ = (c&0xFF);                                \
  }                                                    \
  else if( c<0x00800 ){                                \
    *zOut++ = 0xC0 + ((c>>6)&0x1F);                    \
    *zOut++ = 0x80 + (c & 0x3F);                       \
  }                                                    \
  else if( c<0x10000 ){                                \
    *zOut++ = 0xE0 + ((c>>12)&0x0F);                   \
    *zOut++ = 0x80 + ((c>>6) & 0x3F);                  \
    *zOut++ = 0x80 + (c & 0x3F);                       \
  }else{                                               \
    *zOut++ = 0xF0 + ((c>>18) & 0x07);                 \
    *zOut++ = 0x80 + ((c>>12) & 0x3F);                 \
    *zOut++ = 0x80 + ((c>>6) & 0x3F);                  \
    *zOut++ = 0x80 + (c & 0x3F);                       \
  }                                                    \


}


#define WRITE_UTF16LE(zOut, c) {                                \
  if( c<=0xFFFF ){                                              \
    *zOut++ = (c&0x00FF);                                       \
    *zOut++ = ((c>>8)&0x00FF);                                  \
  }else{                                                        \
    *zOut++ = (((c>>10)&0x003F) + (((c-0x10000)>>10)&0x00C0));  \
    *zOut++ = (0x00D8 + (((c-0x10000)>>18)&0x03));              \
    *zOut++ = (c&0x00FF);                                       \
    *zOut++ = (0x00DC + ((c>>8)&0x03));                         \
  }                                                             \
}





#define WRITE_UTF16BE(zOut, c) {                                \
  if( c<=0xFFFF ){                                              \
    *zOut++ = ((c>>8)&0x00FF);                                  \
    *zOut++ = (c&0x00FF);                                       \
  }else{                                                        \
    *zOut++ = (0x00D8 + (((c-0x10000)>>18)&0x03));              \
    *zOut++ = (((c>>10)&0x003F) + (((c-0x10000)>>10)&0x00C0));  \
    *zOut++ = (0x00DC + ((c>>8)&0x03));                         \
    *zOut++ = (c&0x00FF);                                       \
  }                                                             \
}









#define READ_UTF16LE(zIn, c){                                         \
  c = (*zIn++);                                                       \
  c += ((*zIn++)<<8);                                                 \
  if( c>=0xD800 && c<=0xE000 ){                                       \
    int c2 = (*zIn++);                                                \
    c2 += ((*zIn++)<<8);                                              \
    c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10);   \
  }                                                                   \
}






#define READ_UTF16BE(zIn, c){                                         \
  c = ((*zIn++)<<8);                                                  \
  c += (*zIn++);                                                      \
  if( c>=0xD800 && c<=0xE000 ){                                       \
    int c2 = ((*zIn++)<<8);                                           \
    c2 += (*zIn++);                                                   \
    c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10);   \
  }                                                                   \
}




/*
** If the TRANSLATE_TRACE macro is defined, the value of each Mem is
** printed on stderr on the way into and out of sqlite3VdbeMemTranslate().
*/ 




/* #define TRANSLATE_TRACE 1 */





/*
** This routine transforms the internal text encoding used by pMem to
** desiredEnc. It is an error if the string is already of the desired
** encoding, or if *pMem does not contain a string value.
*/
int sqlite3VdbeMemTranslate(Mem *pMem, u8 desiredEnc){
  unsigned char zShort[NBFS]; /* Temporary short output buffer */
  int len;                    /* Maximum length of output string in bytes */
  unsigned char *zOut;                  /* Output buffer */
  unsigned char *zIn;                   /* Input iterator */

  unsigned char *zTerm;                 /* End of input */
  unsigned char *z;                     /* Output iterator */
  int c;


  assert( pMem->flags&MEM_Str );
  assert( pMem->enc!=desiredEnc );
  assert( pMem->enc!=0 );
  assert( pMem->n>=0 );

#ifdef TRANSLATE_TRACE
  {


    char zBuf[100];
    sqlite3VdbeMemPrettyPrint(pMem, zBuf, 100);
    fprintf(stderr, "INPUT:  %s\n", zBuf);
  }

#endif










  /* If the translation is between UTF-16 little and big endian, then 
  ** all that is required is to swap the byte order. This case is handled
  ** differently from the others.
  */



  if( pMem->enc!=SQLITE_UTF8 && desiredEnc!=SQLITE_UTF8 ){
    u8 temp;
    sqlite3VdbeMemMakeWriteable(pMem);
    zIn = pMem->z;
    zTerm = &zIn[pMem->n];
    while( zIn<zTerm ){
      temp = *zIn;
      *zIn = *(zIn+1);
      zIn++;
      *zIn++ = temp;
    }



    pMem->enc = desiredEnc;
    goto translate_out;
  }

  /* Set zIn to point at the start of the input buffer and zTerm to point 1
  ** byte past the end.
  **


  ** Variable zOut is set to point at the output buffer. This may be space
  ** obtained from malloc(), or Mem.zShort, if it large enough and not in
  ** use, or the zShort array on the stack (see above).
  */





  zIn = pMem->z;
  zTerm = &zIn[pMem->n];
  len = pMem->n*2 + 2;
  if( len>NBFS ){
    zOut = sqliteMallocRaw(len);
    if( !zOut ) return SQLITE_NOMEM;
  }else{
    if( pMem->z==pMem->zShort ){
      zOut = zShort;
    }else{
      zOut = pMem->zShort;
    }
  }
  z = zOut;


  if( pMem->enc==SQLITE_UTF8 ){
    if( desiredEnc==SQLITE_UTF16LE ){
      /* UTF-8 -> UTF-16 Little-endian */
      while( zIn<zTerm ){
        READ_UTF8(zIn, c); 
        WRITE_UTF16LE(z, c);
      }
      WRITE_UTF16LE(z, 0);
      pMem->n = (z-zOut)-2;
    }else if( desiredEnc==SQLITE_UTF16BE ){
      /* UTF-8 -> UTF-16 Big-endian */
      while( zIn<zTerm ){
        READ_UTF8(zIn, c); 
        WRITE_UTF16BE(z, c);
      }












      WRITE_UTF16BE(z, 0);
      pMem->n = (z-zOut)-2;
    }

  }else{
    assert( desiredEnc==SQLITE_UTF8 );
    if( pMem->enc==SQLITE_UTF16LE ){
      /* UTF-16 Little-endian -> UTF-8 */
      while( zIn<zTerm ){
        READ_UTF16LE(zIn, c); 
        WRITE_UTF8(z, c);
      }
      WRITE_UTF8(z, 0);
      pMem->n = (z-zOut)-1;
    }else{
      /* UTF-16 Little-endian -> UTF-8 */
      while( zIn<zTerm ){
        READ_UTF16BE(zIn, c); 
        WRITE_UTF8(z, c);
      }

      WRITE_UTF8(z, 0);
      pMem->n = (z-zOut)-1;
    }
  }







  sqlite3VdbeMemRelease(pMem);
  pMem->flags &= ~(MEM_Static|MEM_Dyn|MEM_Ephem|MEM_Short);
  pMem->enc = desiredEnc;
  if( (char *)zOut==pMem->zShort ){
    pMem->flags |= (MEM_Term|MEM_Short);
  }else if( zOut==zShort ){
    memcpy(pMem->zShort, zOut, len);
    zOut = pMem->zShort;
    pMem->flags |= (MEM_Term|MEM_Short);
  }else{
    pMem->flags |= (MEM_Term|MEM_Dyn);
  }
  pMem->z = zOut;

translate_out:
#ifdef TRANSLATE_TRACE
  {
    char zBuf[100];
    sqlite3VdbeMemPrettyPrint(pMem, zBuf, 100);
    fprintf(stderr, "OUTPUT: %s\n", zBuf);
  }
#endif
  return SQLITE_OK;

}

/*
** This routine checks for a byte-order mark at the beginning of the 
** UTF-16 string stored in *pMem. If one is present, it is removed and
** the encoding of the Mem adjusted. This routine does not do any
** byte-swapping, it just sets Mem.enc appropriately.


















































**


** The allocation (static, dynamic etc.) and encoding of the Mem may be
** changed by this function.
*/


int sqlite3VdbeMemHandleBom(Mem *pMem){
  int rc = SQLITE_OK;
  u8 bom = 0;







  if( pMem->n<0 || pMem->n>1 ){
    u8 b1 = *(u8 *)pMem->z;
    u8 b2 = *(((u8 *)pMem->z) + 1);
    if( b1==0xFE && b2==0xFF ){
      bom = SQLITE_UTF16BE;
    }
    if( b1==0xFF && b2==0xFE ){
      bom = SQLITE_UTF16LE;
    }


  }





  
  if( bom ){
    if( pMem->flags & MEM_Short ){
      memmove(pMem->zShort, &pMem->zShort[2], NBFS-2);
      pMem->n -= 2;
      pMem->enc = bom;
    }






















    else if( pMem->flags & MEM_Dyn ){
      void (*xDel)(void*) = pMem->xDel;
      char *z = pMem->z;
      pMem->z = 0;
      pMem->xDel = 0;
      rc = sqlite3VdbeMemSetStr(pMem, &z[2], pMem->n-2, bom, SQLITE_TRANSIENT);
      if( xDel ){
        xDel(z);

      }else{

        sqliteFree(z);
      }


























    }else{




      rc = sqlite3VdbeMemSetStr(pMem, &pMem->z[2], pMem->n-2, bom, 
          SQLITE_TRANSIENT);
    }



  }



  return rc;
}

/*
** pZ is a UTF-8 encoded unicode string. If nByte is less than zero,
** return the number of unicode characters in pZ up to (but not including)
** the first 0x00 byte. If nByte is not less than zero, return the
** number of unicode characters in the first nByte of pZ (or up to 
** the first 0x00, whichever comes first).
*/
int sqlite3utf8CharLen(const char *z, int nByte){
  int r = 0;
  const char *zTerm;
  if( nByte>0 ){
    zTerm = &z[nByte];
  }else{
    zTerm = (const char *)(-1);
  }
  assert( z<=zTerm );
  while( *z!=0 && z<zTerm ){
    SKIP_UTF8(z);
    r++;
  }
  return r;


}

/*
** pZ is a UTF-16 encoded unicode string. If nChar is less than zero,
** return the number of bytes up to (but not including), the first pair
** of consecutive 0x00 bytes in pZ. If nChar is not less than zero,
** then return the number of bytes in the first nChar unicode characters
** in pZ (or up until the first pair of 0x00 bytes, whichever comes first).
*/
int sqlite3utf16ByteLen(const void *zIn, int nChar){













  int c = 1;


  char const *z = zIn;
  int n = 0;










  if( SQLITE_UTF16NATIVE==SQLITE_UTF16BE ){
    while( c && ((nChar<0) || n<nChar) ){
      READ_UTF16BE(z, c);
      n++;
    }






























































































































































































  }else{

    while( c && ((nChar<0) || n<nChar) ){
      READ_UTF16LE(z, c);
      n++;
    }






  }

  return (z-(char const *)zIn)-((c==0)?2:0);
}



/*
** Compare two UTF-8 strings for equality using the "LIKE" operator of
** SQL.  The '%' character matches any sequence of 0 or more
** characters and '_' matches any single character.  Case is
** not significant.
*/
int sqlite3utf8LikeCompare(
................................................................................

  while( (c = LOWERCASE(*zPattern))!=0 ){
    switch( c ){
      case '%': {
        while( (c=zPattern[1]) == '%' || c == '_' ){
          if( c=='_' ){
            if( *zString==0 ) return 0;
            SKIP_UTF8(zString);
          }
          zPattern++;
        }
        if( c==0 ) return 1;
        c = LOWERCASE(c);
        while( (c2=LOWERCASE(*zString))!=0 ){
          while( c2 != 0 && c2 != c ){ 
            zString++;
            c2 = LOWERCASE(*zString); 
          }
          if( c2==0 ) return 0;
          if( sqlite3utf8LikeCompare(&zPattern[1],zString) ) return 1;
          SKIP_UTF8(zString);
        }
        return 0;
      }
      case '_': {
        if( *zString==0 ) return 0;
        SKIP_UTF8(zString);
        zPattern++;
        break;
      }
      default: {
        if( c != LOWERCASE(*zString) ) return 0;
        zPattern++;
        zString++;
        break;
      }
    }
  }
  return *zString==0;
}

#ifndef NDEBUG
/*
** This routine is called from the TCL test function "translate_selftest".
** It checks that the primitives for serializing and deserializing
** characters in each encoding are inverses of each other.
*/
void sqlite3utfSelfTest(){
  int i;
  unsigned char zBuf[20];
  unsigned char *z;
  int n;
  int c;

  for(i=0; 0 && i<0x00110000; i++){
    z = zBuf;
    WRITE_UTF8(z, i);
    n = z-zBuf;
    z = zBuf;
    READ_UTF8(z, c);
    assert( c==i );
    assert( (z-zBuf)==n );
  }
  for(i=0; i<0x00110000; i++){
    if( i>=0xD800 && i<=0xE000 ) continue;
    z = zBuf;
    WRITE_UTF16LE(z, i);
    n = z-zBuf;
    z = zBuf;
    READ_UTF16LE(z, c);
    assert( c==i );
    assert( (z-zBuf)==n );
  }
  for(i=0; i<0x00110000; i++){
    if( i>=0xD800 && i<=0xE000 ) continue;
    z = zBuf;
    WRITE_UTF16BE(z, i);
    n = z-zBuf;
    z = zBuf;
    READ_UTF16BE(z, c);
    assert( c==i );
    assert( (z-zBuf)==n );
  }
}
#endif


Changes to src/util.c.

10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
...
252
253
254
255
256
257
258







259
260
261
262
263
264
265
...
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461

462
463
464
465




466
467
468
469
470
471
472
**
*************************************************************************
** Utility functions used throughout sqlite.
**
** This file contains functions for allocating memory, comparing
** strings, and stuff like that.
**
** $Id: util.c,v 1.102 2004/06/16 07:45:29 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include <stdarg.h>
#include <ctype.h>

#if SQLITE_DEBUG>2 && defined(__GLIBC__)
#include <execinfo.h>
................................................................................
  zNew = sqlite3Malloc_(n+1, 0, zFile, line);
  if( zNew ){
    memcpy(zNew, z, n);
    zNew[n] = 0;
  }
  return zNew;
}







#endif /* SQLITE_DEBUG */

/*
** The following versions of malloc() and free() are for use in a
** normal build.
*/
#if !defined(SQLITE_DEBUG)
................................................................................
** encoded in UTF-8.
**
** To clear the most recent error for slqite handle "db", sqlite3Error
** should be called with err_code set to SQLITE_OK and zFormat set
** to NULL.
*/
void sqlite3Error(sqlite *db, int err_code, const char *zFormat, ...){
  /* Free any existing error message. */
  if( db->zErrMsg ){
    sqliteFree(db->zErrMsg);
    db->zErrMsg = 0;
  }
  if( db->zErrMsg16 ){
    sqliteFree(db->zErrMsg16);
    db->zErrMsg16 = 0;
  }

  /* Set the new error code and error message. */
  db->errCode = err_code;
  if( zFormat ){

    va_list ap;
    va_start(ap, zFormat);
    db->zErrMsg = sqlite3VMPrintf(zFormat, ap);
    va_end(ap);




  }
}

/*
** Add an error message to pParse->zErrMsg and increment pParse->nErr.
** The following formatting characters are allowed:
**







|







 







>
>
>
>
>
>
>







 







|
<
<
<
<
<
<
<
<
<
<
|
|
>
|
|
|
|
>
>
>
>







10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
...
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
...
449
450
451
452
453
454
455
456










457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
**
*************************************************************************
** Utility functions used throughout sqlite.
**
** This file contains functions for allocating memory, comparing
** strings, and stuff like that.
**
** $Id: util.c,v 1.103 2004/06/18 04:24:55 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include <stdarg.h>
#include <ctype.h>

#if SQLITE_DEBUG>2 && defined(__GLIBC__)
#include <execinfo.h>
................................................................................
  zNew = sqlite3Malloc_(n+1, 0, zFile, line);
  if( zNew ){
    memcpy(zNew, z, n);
    zNew[n] = 0;
  }
  return zNew;
}

/*
** A version of sqliteFree that is always a function, not a macro.
*/
void sqlite3FreeX(void *p){
  sqliteFree(p);
}
#endif /* SQLITE_DEBUG */

/*
** The following versions of malloc() and free() are for use in a
** normal build.
*/
#if !defined(SQLITE_DEBUG)
................................................................................
** encoded in UTF-8.
**
** To clear the most recent error for slqite handle "db", sqlite3Error
** should be called with err_code set to SQLITE_OK and zFormat set
** to NULL.
*/
void sqlite3Error(sqlite *db, int err_code, const char *zFormat, ...){
  if( db && (db->pErr || (db->pErr = sqlite3ValueNew())) ){










    db->errCode = err_code;
    if( zFormat ){
      char *z;
      va_list ap;
      va_start(ap, zFormat);
      z = sqlite3VMPrintf(zFormat, ap);
      va_end(ap);
      sqlite3ValueSetStr(db->pErr, -1, z, SQLITE_UTF8, sqlite3FreeX);
    }else{
      sqlite3ValueSetStr(db->pErr, 0, 0, SQLITE_UTF8, SQLITE_STATIC);
    }
  }
}

/*
** Add an error message to pParse->zErrMsg and increment pParse->nErr.
** The following formatting characters are allowed:
**

Changes to src/vdbe.c.

39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
...
357
358
359
360
361
362
363
364
365
366


367
368
369
370
371
372
373
...
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428

429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
...
730
731
732
733
734
735
736
737
738
739
740
741
742

743
744
745
746
747

748
749
750
751
752
753
754
755
756
757
....
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
**
** Various scripts scan this source file in order to generate HTML
** documentation, headers files, or other derived files.  The formatting
** of the code in this file is, therefore, important.  See other comments
** in this file for details.  If in doubt, do not deviate from existing
** commenting and indentation practices when changing or adding code.
**
** $Id: vdbe.c,v 1.378 2004/06/17 07:53:03 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "os.h"
#include <ctype.h>
#include "vdbeInt.h"

/*
................................................................................
}

#ifndef NDEBUG
/*
** Write a nice string representation of the contents of cell pMem
** into buffer zBuf, length nBuf.
*/
void prettyPrintMem(Mem *pMem, char *zBuf, int nBuf){
  char *zCsr = zBuf;
  int f = pMem->flags;



  if( f&MEM_Blob ){
    int i;
    char c;
    if( f & MEM_Dyn ){
      c = 'z';
      assert( (f & (MEM_Static|MEM_Ephem))==0 );
................................................................................
      zBuf[1] = 's';
    }
    k = 2;
    k += sprintf(&zBuf[k], "%d", pMem->n);
    zBuf[k++] = '[';
    for(j=0; j<15 && j<pMem->n; j++){
      u8 c = pMem->z[j];
/*
      if( c==0 && j==pMem->n-1 ) break;
            zBuf[k++] = "0123456789ABCDEF"[c>>4];
            zBuf[k++] = "0123456789ABCDEF"[c&0xf];
*/
      if( c>=0x20 && c<0x7f ){
        zBuf[k++] = c;
      }else{
        zBuf[k++] = '.';
      }
    }
    zBuf[k++] = ']';

    zBuf[k++] = 0;
  }
}

/* Temporary - this is useful in conjunction with prettyPrintMem whilst
** debugging. 
*/
char zGdbBuf[100];
#endif


#ifdef VDBE_PROFILE
/*
** The following routine only works on pentium-class processors.
** It uses the RDTSC opcode to read cycle count value out of the
................................................................................
** P3 points to a nul terminated UTF-8 string. This opcode is transformed
** into an OP_String before it is executed for the first time.
*/
case OP_String8: {
  pOp->opcode = OP_String;

  if( db->enc!=SQLITE_UTF8 && pOp->p3 ){
    char *z = pOp->p3;
    if( db->enc==SQLITE_UTF16LE ){
      pOp->p3 = sqlite3utf8to16le(z, -1);
    }else{
      pOp->p3 = sqlite3utf8to16be(z, -1);
    }

    if( pOp->p3type==P3_DYNAMIC ){
      sqliteFree(z);
    }
    pOp->p3type = P3_DYNAMIC;
    if( !pOp->p3 ) goto no_mem;

  }

  /* Fall through to the next case, OP_String */
}
  
/* Opcode: String * * P3
**
** The string value P3 is pushed onto the stack.  If P3==0 then a
** NULL is pushed onto the stack. P3 is assumed to be a nul terminated
** string encoded with the database native encoding.
................................................................................
          fprintf(p->trace, " si:%lld", pTos[i].i);
        }else if( pTos[i].flags & MEM_Int ){
          fprintf(p->trace, " i:%lld", pTos[i].i);
        }else if( pTos[i].flags & MEM_Real ){
          fprintf(p->trace, " r:%g", pTos[i].r);
        }else{
          char zBuf[100];
          prettyPrintMem(&pTos[i], zBuf, 100);
          fprintf(p->trace, " ");
          fprintf(p->trace, zBuf);
        }
      }
      if( rc!=0 ) fprintf(p->trace," rc=%d",rc);
      fprintf(p->trace,"\n");
    }







|







 







|


>
>







 







<
<
<
<
<







>



<
<
<
<
<







 







|
|
|
|
|
<
>

|


|
>

<
|







 







|







39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
...
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
...
412
413
414
415
416
417
418





419
420
421
422
423
424
425
426
427
428
429





430
431
432
433
434
435
436
...
723
724
725
726
727
728
729
730
731
732
733
734

735
736
737
738
739
740
741
742

743
744
745
746
747
748
749
750
....
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
**
** Various scripts scan this source file in order to generate HTML
** documentation, headers files, or other derived files.  The formatting
** of the code in this file is, therefore, important.  See other comments
** in this file for details.  If in doubt, do not deviate from existing
** commenting and indentation practices when changing or adding code.
**
** $Id: vdbe.c,v 1.379 2004/06/18 04:24:55 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "os.h"
#include <ctype.h>
#include "vdbeInt.h"

/*
................................................................................
}

#ifndef NDEBUG
/*
** Write a nice string representation of the contents of cell pMem
** into buffer zBuf, length nBuf.
*/
void sqlite3VdbeMemPrettyPrint(Mem *pMem, char *zBuf, int nBuf){
  char *zCsr = zBuf;
  int f = pMem->flags;

  static const char *encnames[] = {"(X)", "(8)", "(16LE)", "(16BE)"};

  if( f&MEM_Blob ){
    int i;
    char c;
    if( f & MEM_Dyn ){
      c = 'z';
      assert( (f & (MEM_Static|MEM_Ephem))==0 );
................................................................................
      zBuf[1] = 's';
    }
    k = 2;
    k += sprintf(&zBuf[k], "%d", pMem->n);
    zBuf[k++] = '[';
    for(j=0; j<15 && j<pMem->n; j++){
      u8 c = pMem->z[j];





      if( c>=0x20 && c<0x7f ){
        zBuf[k++] = c;
      }else{
        zBuf[k++] = '.';
      }
    }
    zBuf[k++] = ']';
    k += sprintf(&zBuf[k], encnames[pMem->enc]);
    zBuf[k++] = 0;
  }
}





#endif


#ifdef VDBE_PROFILE
/*
** The following routine only works on pentium-class processors.
** It uses the RDTSC opcode to read cycle count value out of the
................................................................................
** P3 points to a nul terminated UTF-8 string. This opcode is transformed
** into an OP_String before it is executed for the first time.
*/
case OP_String8: {
  pOp->opcode = OP_String;

  if( db->enc!=SQLITE_UTF8 && pOp->p3 ){
    pTos++;
    sqlite3VdbeMemSetStr(pTos, pOp->p3, -1, SQLITE_UTF8, SQLITE_STATIC);
    if( SQLITE_OK!=sqlite3VdbeChangeEncoding(pTos, db->enc) ) goto no_mem;
    if( SQLITE_OK!=sqlite3VdbeMemDynamicify(pTos) ) goto no_mem;
    pTos->flags &= ~(MEM_Dyn);

    pTos->flags |= MEM_Static;
    if( pOp->p3type==P3_DYNAMIC ){
      sqliteFree(pOp->p3);
    }
    pOp->p3type = P3_DYNAMIC;
    pOp->p3 = pTos->z;
    break;
  }

  /* Otherwise fall through to the next case, OP_String */
}
  
/* Opcode: String * * P3
**
** The string value P3 is pushed onto the stack.  If P3==0 then a
** NULL is pushed onto the stack. P3 is assumed to be a nul terminated
** string encoded with the database native encoding.
................................................................................
          fprintf(p->trace, " si:%lld", pTos[i].i);
        }else if( pTos[i].flags & MEM_Int ){
          fprintf(p->trace, " i:%lld", pTos[i].i);
        }else if( pTos[i].flags & MEM_Real ){
          fprintf(p->trace, " r:%g", pTos[i].r);
        }else{
          char zBuf[100];
          sqlite3VdbeMemPrettyPrint(&pTos[i], zBuf, 100);
          fprintf(p->trace, " ");
          fprintf(p->trace, zBuf);
        }
      }
      if( rc!=0 ) fprintf(p->trace," rc=%d",rc);
      fprintf(p->trace,"\n");
    }

Changes to src/vdbeInt.h.

386
387
388
389
390
391
392


int sqlite3VdbeMemIntegerify(Mem*);
int sqlite3VdbeMemRealify(Mem*);
int sqlite3VdbeMemFromBtree(BtCursor*,int,int,int,Mem*);
void sqlite3VdbeMemRelease(Mem *p);
#ifndef NDEBUG
void sqlite3VdbeMemSanity(Mem*, u8);
#endif









>
>
386
387
388
389
390
391
392
393
394
int sqlite3VdbeMemIntegerify(Mem*);
int sqlite3VdbeMemRealify(Mem*);
int sqlite3VdbeMemFromBtree(BtCursor*,int,int,int,Mem*);
void sqlite3VdbeMemRelease(Mem *p);
#ifndef NDEBUG
void sqlite3VdbeMemSanity(Mem*, u8);
#endif
int sqlite3VdbeMemTranslate(Mem*, u8);
void sqlite3VdbeMemPrettyPrint(Mem *pMem, char *zBuf, int nBuf);

Changes to src/vdbeapi.c.

514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540

  rc = vdbeUnbind(p, i);
  if( rc ){
    return rc;
  }
  pVar = &p->apVar[i-1];

  /* There may or may not be a byte order mark at the start of the UTF-16.
  ** Either way set 'txt_enc' to the SQLITE_UTF16* value indicating the 
  ** actual byte order used by this string. If the string does happen
  ** to contain a BOM, then move zData so that it points to the first
  ** byte after the BOM.
  */
  txt_enc = sqlite3UtfReadBom(zData, nData);
  if( txt_enc ){
    zData = (void *)(((u8 *)zData) + 2);
    nData -= 2;
  }else{
    txt_enc = SQLITE_BIGENDIAN?SQLITE_UTF16BE:SQLITE_UTF16LE;
  }
  rc = sqlite3VdbeMemSetStr(pVar, zData, nData, txt_enc, xDel);
  if( rc ){
    return rc;
  }
  rc = sqlite3VdbeChangeEncoding(pVar, p->db->enc);
  return rc;
}







<
<
<
<
<
<
<
<
<
<
<
<
<
|






514
515
516
517
518
519
520













521
522
523
524
525
526
527

  rc = vdbeUnbind(p, i);
  if( rc ){
    return rc;
  }
  pVar = &p->apVar[i-1];














  rc = sqlite3VdbeMemSetStr(pVar, zData, nData, SQLITE_UTF16NATIVE, xDel);
  if( rc ){
    return rc;
  }
  rc = sqlite3VdbeChangeEncoding(pVar, p->db->enc);
  return rc;
}

Changes to src/vdbemem.c.

17
18
19
20
21
22
23
24
25
26
27
28

29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
...
401
402
403
404
405
406
407
408
409
410



411
412
413
414
415
416
417
418
419
420
421
422
423
424
...
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
...
641
642
643
644
645
646
647

648

649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676






677
678
679
680

681
682
683
684
685
686
687
688
689
690
691
*/
#include "sqliteInt.h"
#include "os.h"
#include <ctype.h>
#include "vdbeInt.h"

/*
** If pMem is a string object, this routine sets the encoding of the string
** (to one of UTF-8 or UTF16) and whether or not the string is
** nul-terminated. If pMem is not a string object, then this routine is
** a no-op.
**

** The second argument, "desiredEnc" is one of TEXT_Utf8, TEXT_Utf16le
** or TEXT_Utf16be.  This routine changes the encoding of pMem to match
** desiredEnc.
**
** SQLITE_OK is returned if the conversion is successful (or not required).
** SQLITE_NOMEM may be returned if a malloc() fails during conversion
** between formats.
*/
int sqlite3VdbeChangeEncoding(Mem *pMem, int desiredEnc){
  /* If this is not a string, or if it is a string but the encoding is
  ** already correct, do nothing. */
  if( !(pMem->flags&MEM_Str) || pMem->enc==desiredEnc ){
    return SQLITE_OK;
  }

  if( pMem->enc==SQLITE_UTF8 || desiredEnc==SQLITE_UTF8 ){
    /* If the current encoding does not match the desired encoding, then
    ** we will need to do some translation between encodings.
    */
    char *z;
    int n;
    int rc;

    rc = sqlite3utfTranslate(pMem->z, pMem->n, pMem->enc, (void **)&z, 
        &n, desiredEnc);
    if( rc!=SQLITE_OK ){
      return rc;
    }
    sqlite3VdbeMemRelease(pMem);

    /* Result of sqlite3utfTranslate is currently always dynamically
    ** allocated and nul terminated. This might be altered as a performance
    ** enhancement later.
    */
    pMem->z = z;
    pMem->n = n;
    pMem->flags &= ~(MEM_Ephem | MEM_Short | MEM_Static);
    pMem->flags |= MEM_Str | MEM_Dyn | MEM_Term;
    pMem->xDel = 0;
  }else{
    /* Must be translating between UTF-16le and UTF-16be. */
    int i;
    u8 *pFrom, *pTo;
    sqlite3VdbeMemMakeWriteable(pMem);
    for(i=0, pFrom=pMem->z, pTo=&pMem->z[1]; i<pMem->n; i+=2, pFrom+=2,pTo+=2){
      u8 temp = *pFrom;
      *pFrom = *pTo;
      *pTo = temp;
    }
  }
  pMem->enc = desiredEnc;
  return SQLITE_OK;
}

/*
** Make the given Mem object MEM_Dyn.
**
** Return SQLITE_OK on success or SQLITE_NOMEM if malloc fails.
*/
................................................................................
        pMem->flags |= MEM_Term;
      }
      break;

    case SQLITE_UTF16LE:
    case SQLITE_UTF16BE:
      pMem->flags |= MEM_Str;
      if( n<0 ){
        pMem->n = sqlite3utf16ByteLen(z,-1);
        pMem->flags |= MEM_Term;



      }
      break;

    default:
      assert(0);
  }
  if( xDel==SQLITE_TRANSIENT ){
    return sqlite3VdbeMemMakeWriteable(pMem);
  }
  return SQLITE_OK;
}

/*
** Compare the values contained by the two memory cells, returning
................................................................................
      return -1;
    }

    assert( pMem1->enc==pMem2->enc );
    assert( pMem1->enc==SQLITE_UTF8 || 
            pMem1->enc==SQLITE_UTF16LE || pMem1->enc==SQLITE_UTF16BE );

    /* FIX ME: This may fail if the collation sequence is deleted after
    ** this vdbe program is compiled. We cannot just use BINARY in this
    ** case as this may lead to a segfault caused by traversing an index
    ** table incorrectly.  We need to return an error to the user in this
    ** case.
    */
    assert( !pColl || pColl->xCmp );

    if( pColl ){
      if( pMem1->enc==pColl->enc ){
        return pColl->xCmp(pColl->pUser,pMem1->n,pMem1->z,pMem2->n,pMem2->z);
      }else{
................................................................................
/* This function is only available internally, it is not part of the
** external API. It works in a similar way to sqlite3_value_text(),
** except the data returned is in the encoding specified by the second
** parameter, which must be one of SQLITE_UTF16BE, SQLITE_UTF16LE or
** SQLITE_UTF8.
*/
const void *sqlite3ValueText(sqlite3_value* pVal, u8 enc){

  assert( enc==SQLITE_UTF16LE || enc==SQLITE_UTF16BE || enc==SQLITE_UTF8);

  if( pVal->flags&MEM_Null ){
    /* For a NULL return a NULL Pointer */
    return 0;
  }

  if( pVal->flags&MEM_Str ){
    /* If there is already a string representation, make sure it is in
    ** encoded in the required UTF-16 byte order.
    */
    sqlite3VdbeChangeEncoding(pVal, enc);
  }else if( !(pVal->flags&MEM_Blob) ){
    /* Otherwise, unless this is a blob, convert it to a UTF-16 string */
    sqlite3VdbeMemStringify(pVal, enc);
  }

  return (const void *)(pVal->z);
}

sqlite3_value* sqlite3ValueNew(){
  Mem *p = sqliteMalloc(sizeof(*p));
  if( p ){
    p->flags = MEM_Null;
    p->type = SQLITE_NULL;
  }
  return p;
}

void sqlite3ValueSetStr(sqlite3_value *v, int n, const void *z, u8 enc){






  sqlite3VdbeMemSetStr((Mem *)v, z, n, enc, SQLITE_STATIC);
}

void sqlite3ValueFree(sqlite3_value *v){

  sqlite3ValueSetStr(v, 0, 0, SQLITE_UTF8);
  sqliteFree(v);
}

int sqlite3ValueBytes(sqlite3_value *pVal, u8 enc){
  Mem *p = (Mem*)pVal;
  if( (p->flags & MEM_Blob)!=0 || sqlite3ValueText(pVal, enc) ){
    return p->n;
  }
  return 0;
}







|
|
|
<

>
|
|
<






<
<



<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







 







|
|

>
>
>






|







 







|
|
|
<
<







 







>

>

<


<

<
<
<


<


<












|
>
>
>
>
>
>
|



>
|










17
18
19
20
21
22
23
24
25
26

27
28
29
30

31
32
33
34
35
36


37
38
39










40



























41
42
43
44
45
46
47
...
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
...
457
458
459
460
461
462
463
464
465
466


467
468
469
470
471
472
473
...
602
603
604
605
606
607
608
609
610
611
612

613
614

615



616
617

618
619

620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
*/
#include "sqliteInt.h"
#include "os.h"
#include <ctype.h>
#include "vdbeInt.h"

/*
** If pMem is an object with a valid string representation, this routine
** ensures the internal encoding for the string representation is
** 'desiredEnc', one of SQLITE_UTF8, SQLITE_UTF16LE or SQLITE_UTF16BE.

**
** If pMem is not a string object, or the encoding of the string
** representation is already stored using the requested encoding, then this
** routine is a no-op.

**
** SQLITE_OK is returned if the conversion is successful (or not required).
** SQLITE_NOMEM may be returned if a malloc() fails during conversion
** between formats.
*/
int sqlite3VdbeChangeEncoding(Mem *pMem, int desiredEnc){


  if( !(pMem->flags&MEM_Str) || pMem->enc==desiredEnc ){
    return SQLITE_OK;
  }










  return sqlite3VdbeMemTranslate(pMem, desiredEnc);



























}

/*
** Make the given Mem object MEM_Dyn.
**
** Return SQLITE_OK on success or SQLITE_NOMEM if malloc fails.
*/
................................................................................
        pMem->flags |= MEM_Term;
      }
      break;

    case SQLITE_UTF16LE:
    case SQLITE_UTF16BE:
      pMem->flags |= MEM_Str;
      if( pMem->n<0 ){
        pMem->n = sqlite3utf16ByteLen(pMem->z,-1);
        pMem->flags |= MEM_Term;
      }
      if( sqlite3VdbeMemHandleBom(pMem) ){
        return SQLITE_NOMEM;
      }
      break;

    default:
      assert(0);
  }
  if( pMem->flags&MEM_Ephem ){
    return sqlite3VdbeMemMakeWriteable(pMem);
  }
  return SQLITE_OK;
}

/*
** Compare the values contained by the two memory cells, returning
................................................................................
      return -1;
    }

    assert( pMem1->enc==pMem2->enc );
    assert( pMem1->enc==SQLITE_UTF8 || 
            pMem1->enc==SQLITE_UTF16LE || pMem1->enc==SQLITE_UTF16BE );

    /* This assert may fail if the collation sequence is deleted after this
    ** vdbe program is compiled. The documentation defines this as an
    ** undefined condition. A crash is usual result.


    */
    assert( !pColl || pColl->xCmp );

    if( pColl ){
      if( pMem1->enc==pColl->enc ){
        return pColl->xCmp(pColl->pUser,pMem1->n,pMem1->z,pMem2->n,pMem2->z);
      }else{
................................................................................
/* This function is only available internally, it is not part of the
** external API. It works in a similar way to sqlite3_value_text(),
** except the data returned is in the encoding specified by the second
** parameter, which must be one of SQLITE_UTF16BE, SQLITE_UTF16LE or
** SQLITE_UTF8.
*/
const void *sqlite3ValueText(sqlite3_value* pVal, u8 enc){
  if( !pVal ) return 0;
  assert( enc==SQLITE_UTF16LE || enc==SQLITE_UTF16BE || enc==SQLITE_UTF8);

  if( pVal->flags&MEM_Null ){

    return 0;
  }

  if( pVal->flags&MEM_Str ){



    sqlite3VdbeChangeEncoding(pVal, enc);
  }else if( !(pVal->flags&MEM_Blob) ){

    sqlite3VdbeMemStringify(pVal, enc);
  }

  return (const void *)(pVal->z);
}

sqlite3_value* sqlite3ValueNew(){
  Mem *p = sqliteMalloc(sizeof(*p));
  if( p ){
    p->flags = MEM_Null;
    p->type = SQLITE_NULL;
  }
  return p;
}

void sqlite3ValueSetStr(
  sqlite3_value *v, 
  int n, 
  const void *z, 
  u8 enc,
  void (*xDel)(void*)
){
  if( v ) sqlite3VdbeMemSetStr((Mem *)v, z, n, enc, xDel);
}

void sqlite3ValueFree(sqlite3_value *v){
  if( !v ) return;
  sqlite3ValueSetStr(v, 0, 0, SQLITE_UTF8, SQLITE_STATIC);
  sqliteFree(v);
}

int sqlite3ValueBytes(sqlite3_value *pVal, u8 enc){
  Mem *p = (Mem*)pVal;
  if( (p->flags & MEM_Blob)!=0 || sqlite3ValueText(pVal, enc) ){
    return p->n;
  }
  return 0;
}

Changes to test/enc.test.

9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
..
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
..
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131

132
133
134
135

136
137
138
139
140
141
142
143
144
145
146
147
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The focus of
# this file is testing the SQLite routines used for converting between the
# various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
# UTF-16be).
#
# $Id: enc.test,v 1.1 2004/05/22 08:16:11 danielk1977 Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl

proc do_bincmp_test {testname got expect} {
  binary scan $expect \c* expectvals
  binary scan $got \c* gotvals
................................................................................
# sqlite_utf16to8 (steps 3, 4)
# sqlite_utf16to16le (step 5)
# sqlite_utf16to16be (step 5)
#
proc test_conversion {testname str} {
 
  # Step 1.
  set utf16le_sqlite [sqlite_utf8to16le $str]
  set utf16le_tcl [encoding convertto unicode $str]
  append utf16le_tcl "\x00\x00"
  if { $::tcl_platform(byteOrder)!="littleEndian" } {
    set utf16le_tcl [swap_byte_order $utf16le_tcl]
  }
  do_bincmp_test $testname.1 $utf16le_sqlite $utf16le_tcl
  set utf16le $utf16le_tcl

  # Step 2.
  set utf16be_sqlite [sqlite_utf8to16be $str]
  set utf16be_tcl [encoding convertto unicode $str]
  append utf16be_tcl "\x00\x00"
  if { $::tcl_platform(byteOrder)=="littleEndian" } {
    set utf16be_tcl [swap_byte_order $utf16be_tcl]
  }
  do_bincmp_test $testname.2 $utf16be_sqlite $utf16be_tcl
  set utf16be $utf16be_tcl
................................................................................
 
  # Step 3.
  if { $::tcl_platform(byteOrder)=="littleEndian" } {
    set utf16 $utf16le
  } else {
    set utf16 $utf16be
  }
  set utf8_sqlite [sqlite_utf16to8 $utf16]
  do_bincmp_test $testname.3 $utf8_sqlite [binarize $str]

  # Step 4 (little endian).
  append utf16le_bom "\xFF\xFE" $utf16le
  set utf8_sqlite [sqlite_utf16to8 $utf16le_bom]
  do_bincmp_test $testname.4.le $utf8_sqlite [binarize $str]

  # Step 4 (big endian).
  append utf16be_bom "\xFE\xFF" $utf16be
  set utf8_sqlite [sqlite_utf16to8 $utf16be_bom]
  do_bincmp_test $testname.4.be $utf8_sqlite [binarize $str]

  # Step 5 (little endian to little endian).
  set utf16_sqlite [sqlite_utf16to16le $utf16le_bom]
  do_bincmp_test $testname.5.le.le $utf16_sqlite $utf16le

  # Step 5 (big endian to big endian).
  set utf16_sqlite [sqlite_utf16to16be $utf16be_bom]
  do_bincmp_test $testname.5.be.be $utf16_sqlite $utf16be

  # Step 5 (big endian to little endian).
  set utf16_sqlite [sqlite_utf16to16le $utf16be_bom]
  do_bincmp_test $testname.5.be.le $utf16_sqlite $utf16le

  # Step 5 (little endian to big endian).
  set utf16_sqlite [sqlite_utf16to16be $utf16le_bom]
  do_bincmp_test $testname.5.le.be $utf16_sqlite $utf16be
}



test_conversion enc-1 "hello world"
test_conversion enc-2 "sqlite"
test_conversion enc-3 ""

test_conversion enc-4 "\u1234"
test_conversion enc-5 "\u4321abc"
test_conversion enc-6 "\u4321\u1234"
test_conversion enc-7 [string repeat "abcde\u00EF\u00EE\uFFFCabc" 100]
test_conversion enc-8 [string repeat "\u007E\u007F\u0080\u0081" 100]
test_conversion enc-9 [string repeat "\u07FE\u07FF\u0800\u0801\uFFF0" 100]

finish_test











|







 







|









|







 







|




|




|



|



|



|



|



>




>








<



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
..
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
..
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145

146
147
148
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The focus of
# this file is testing the SQLite routines used for converting between the
# various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
# UTF-16be).
#
# $Id: enc.test,v 1.2 2004/06/18 04:24:56 danielk1977 Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl

proc do_bincmp_test {testname got expect} {
  binary scan $expect \c* expectvals
  binary scan $got \c* gotvals
................................................................................
# sqlite_utf16to8 (steps 3, 4)
# sqlite_utf16to16le (step 5)
# sqlite_utf16to16be (step 5)
#
proc test_conversion {testname str} {
 
  # Step 1.
  set utf16le_sqlite [test_translate $str UTF8 UTF16LE]
  set utf16le_tcl [encoding convertto unicode $str]
  append utf16le_tcl "\x00\x00"
  if { $::tcl_platform(byteOrder)!="littleEndian" } {
    set utf16le_tcl [swap_byte_order $utf16le_tcl]
  }
  do_bincmp_test $testname.1 $utf16le_sqlite $utf16le_tcl
  set utf16le $utf16le_tcl

  # Step 2.
  set utf16be_sqlite [test_translate $str UTF8 UTF16BE]
  set utf16be_tcl [encoding convertto unicode $str]
  append utf16be_tcl "\x00\x00"
  if { $::tcl_platform(byteOrder)=="littleEndian" } {
    set utf16be_tcl [swap_byte_order $utf16be_tcl]
  }
  do_bincmp_test $testname.2 $utf16be_sqlite $utf16be_tcl
  set utf16be $utf16be_tcl
................................................................................
 
  # Step 3.
  if { $::tcl_platform(byteOrder)=="littleEndian" } {
    set utf16 $utf16le
  } else {
    set utf16 $utf16be
  }
  set utf8_sqlite [test_translate $utf16 UTF16 UTF8]
  do_bincmp_test $testname.3 $utf8_sqlite [binarize $str]

  # Step 4 (little endian).
  append utf16le_bom "\xFF\xFE" $utf16le
  set utf8_sqlite [test_translate $utf16le_bom UTF16 UTF8]
  do_bincmp_test $testname.4.le $utf8_sqlite [binarize $str]

  # Step 4 (big endian).
  append utf16be_bom "\xFE\xFF" $utf16be
  set utf8_sqlite [test_translate $utf16be_bom UTF16 UTF8]
  do_bincmp_test $testname.4.be $utf8_sqlite [binarize $str]

  # Step 5 (little endian to little endian).
  set utf16_sqlite [test_translate $utf16le_bom UTF16LE UTF16LE]
  do_bincmp_test $testname.5.le.le $utf16_sqlite $utf16le

  # Step 5 (big endian to big endian).
  set utf16_sqlite [test_translate $utf16be_bom UTF16 UTF16BE]
  do_bincmp_test $testname.5.be.be $utf16_sqlite $utf16be

  # Step 5 (big endian to little endian).
  set utf16_sqlite [test_translate $utf16be_bom UTF16 UTF16LE]
  do_bincmp_test $testname.5.be.le $utf16_sqlite $utf16le

  # Step 5 (little endian to big endian).
  set utf16_sqlite [test_translate $utf16le_bom UTF16 UTF16BE]
  do_bincmp_test $testname.5.le.be $utf16_sqlite $utf16be
}

translate_selftest

test_conversion enc-1 "hello world"
test_conversion enc-2 "sqlite"
test_conversion enc-3 ""
test_conversion enc-X "\u0100"
test_conversion enc-4 "\u1234"
test_conversion enc-5 "\u4321abc"
test_conversion enc-6 "\u4321\u1234"
test_conversion enc-7 [string repeat "abcde\u00EF\u00EE\uFFFCabc" 100]
test_conversion enc-8 [string repeat "\u007E\u007F\u0080\u0081" 100]
test_conversion enc-9 [string repeat "\u07FE\u07FF\u0800\u0801\uFFF0" 100]

finish_test