/ Check-in [c17b8641]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix a bug with UTF-16 byte-order-marks on big-endian hosts. (CVS 1522)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: c17b864103fe5e6def0f650eadb7b2cc6e87144f
User & Date: danielk1977 2004-06-02 00:29:24
Context
2004-06-02
00:41
Remove the sqlite3_libencoding() api and the ISO8859 encoding option. (CVS 1523) check-in: b53640ed user: drh tags: trunk
00:29
Fix a bug with UTF-16 byte-order-marks on big-endian hosts. (CVS 1522) check-in: c17b8641 user: danielk1977 tags: trunk
00:08
Fix another website typo. (CVS 1521) check-in: 24e2bbd0 user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/utf.c.

8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
..
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
...
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains routines used to translate between UTF-8, 
** UTF-16, UTF-16BE, and UTF-16LE.
**
** $Id: utf.c,v 1.15 2004/05/31 18:51:58 drh Exp $
**
** Notes on UTF-8:
**
**   Byte-0    Byte-1    Byte-2    Byte-3    Value
**  0xxxxxxx                                 00000000 00000000 0xxxxxxx
**  110yyyyy  10xxxxxx                       00000000 00000yyy yyxxxxxx
**  1110zzzz  10yyyyyy  10xxxxxx             00000000 zzzzyyyy yyxxxxxx
................................................................................
typedef struct UtfString UtfString;
struct UtfString {
  unsigned char *pZ;    /* Raw string data */
  int n;                /* Allocated length of pZ in bytes */
  int c;                /* Number of pZ bytes already read or written */
};

/* TODO: Implement this macro in os.h. It should be 1 on big-endian
** machines, and 0 on little-endian.
*/
#define SQLITE_NATIVE_BIGENDIAN 0

#if SQLITE_NATIVE_BIGENDIAN == 1
#define BOM_BIGENDIAN 0x0000FFFE
#define BOM_LITTLEENDIAN 0x0000FEFF
#else
#define BOM_BIGENDIAN 0x0000FEFF
#define BOM_LITTLEENDIAN 0x0000FFFE
#endif

/*
** These two macros are used to interpret the first two bytes of the 
** unsigned char array pZ as a 16-bit unsigned int. BE16() for a big-endian
** interpretation, LE16() for little-endian.
*/
#define BE16(pZ) (((u16)((pZ)[0])<<8) + (u16)((pZ)[1]))
#define LE16(pZ) (((u16)((pZ)[1])<<8) + (u16)((pZ)[0]))
................................................................................
  ** difference anyway. In this case just fall through to the default case
  ** and return the native byte-order for this machine.
  **
  ** Otherwise, check the first 2 bytes of the string to see if a BOM is
  ** present.
  */
  if( pStr->n>1 ){
    u32 bom = BE16(pStr->pZ);
    if( bom==BOM_BIGENDIAN ){
      pStr->c = 2;
      return 1;
    }
    if( bom==BOM_LITTLEENDIAN ){
      pStr->c = 2;
      return 0;
    }
  }

  return big_endian;
}

/*







|







 







<
<
<
<
<
<
<
<
<
<
<
<
<







 







|
|
|
|
<
<
<
<







8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
..
55
56
57
58
59
60
61













62
63
64
65
66
67
68
..
91
92
93
94
95
96
97
98
99
100
101




102
103
104
105
106
107
108
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains routines used to translate between UTF-8, 
** UTF-16, UTF-16BE, and UTF-16LE.
**
** $Id: utf.c,v 1.16 2004/06/02 00:29:24 danielk1977 Exp $
**
** Notes on UTF-8:
**
**   Byte-0    Byte-1    Byte-2    Byte-3    Value
**  0xxxxxxx                                 00000000 00000000 0xxxxxxx
**  110yyyyy  10xxxxxx                       00000000 00000yyy yyxxxxxx
**  1110zzzz  10yyyyyy  10xxxxxx             00000000 zzzzyyyy yyxxxxxx
................................................................................
typedef struct UtfString UtfString;
struct UtfString {
  unsigned char *pZ;    /* Raw string data */
  int n;                /* Allocated length of pZ in bytes */
  int c;                /* Number of pZ bytes already read or written */
};














/*
** These two macros are used to interpret the first two bytes of the 
** unsigned char array pZ as a 16-bit unsigned int. BE16() for a big-endian
** interpretation, LE16() for little-endian.
*/
#define BE16(pZ) (((u16)((pZ)[0])<<8) + (u16)((pZ)[1]))
#define LE16(pZ) (((u16)((pZ)[1])<<8) + (u16)((pZ)[0]))
................................................................................
  ** difference anyway. In this case just fall through to the default case
  ** and return the native byte-order for this machine.
  **
  ** Otherwise, check the first 2 bytes of the string to see if a BOM is
  ** present.
  */
  if( pStr->n>1 ){
    u8 bom = sqlite3UtfReadBom(pStr->pZ, 2);
    if( bom ){
      pStr->c += 2;
      return (bom==TEXT_Utf16le)?0:1;




    }
  }

  return big_endian;
}

/*