SQLite

Check-in [c17b864103]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix a bug with UTF-16 byte-order-marks on big-endian hosts. (CVS 1522)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: c17b864103fe5e6def0f650eadb7b2cc6e87144f
User & Date: danielk1977 2004-06-02 00:29:24.000
Context
2004-06-02
00:41
Remove the sqlite3_libencoding() api and the ISO8859 encoding option. (CVS 1523) (check-in: b53640ed22 user: drh tags: trunk)
00:29
Fix a bug with UTF-16 byte-order-marks on big-endian hosts. (CVS 1522) (check-in: c17b864103 user: danielk1977 tags: trunk)
00:08
Fix another website typo. (CVS 1521) (check-in: 24e2bbd0f2 user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/utf.c.
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains routines used to translate between UTF-8, 
** UTF-16, UTF-16BE, and UTF-16LE.
**
** $Id: utf.c,v 1.15 2004/05/31 18:51:58 drh Exp $
**
** Notes on UTF-8:
**
**   Byte-0    Byte-1    Byte-2    Byte-3    Value
**  0xxxxxxx                                 00000000 00000000 0xxxxxxx
**  110yyyyy  10xxxxxx                       00000000 00000yyy yyxxxxxx
**  1110zzzz  10yyyyyy  10xxxxxx             00000000 zzzzyyyy yyxxxxxx







|







8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains routines used to translate between UTF-8, 
** UTF-16, UTF-16BE, and UTF-16LE.
**
** $Id: utf.c,v 1.16 2004/06/02 00:29:24 danielk1977 Exp $
**
** Notes on UTF-8:
**
**   Byte-0    Byte-1    Byte-2    Byte-3    Value
**  0xxxxxxx                                 00000000 00000000 0xxxxxxx
**  110yyyyy  10xxxxxx                       00000000 00000yyy yyxxxxxx
**  1110zzzz  10yyyyyy  10xxxxxx             00000000 zzzzyyyy yyxxxxxx
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
typedef struct UtfString UtfString;
struct UtfString {
  unsigned char *pZ;    /* Raw string data */
  int n;                /* Allocated length of pZ in bytes */
  int c;                /* Number of pZ bytes already read or written */
};

/* TODO: Implement this macro in os.h. It should be 1 on big-endian
** machines, and 0 on little-endian.
*/
#define SQLITE_NATIVE_BIGENDIAN 0

#if SQLITE_NATIVE_BIGENDIAN == 1
#define BOM_BIGENDIAN 0x0000FFFE
#define BOM_LITTLEENDIAN 0x0000FEFF
#else
#define BOM_BIGENDIAN 0x0000FEFF
#define BOM_LITTLEENDIAN 0x0000FFFE
#endif

/*
** These two macros are used to interpret the first two bytes of the 
** unsigned char array pZ as a 16-bit unsigned int. BE16() for a big-endian
** interpretation, LE16() for little-endian.
*/
#define BE16(pZ) (((u16)((pZ)[0])<<8) + (u16)((pZ)[1]))
#define LE16(pZ) (((u16)((pZ)[1])<<8) + (u16)((pZ)[0]))







<
<
<
<
<
<
<
<
<
<
<
<
<







55
56
57
58
59
60
61













62
63
64
65
66
67
68
typedef struct UtfString UtfString;
struct UtfString {
  unsigned char *pZ;    /* Raw string data */
  int n;                /* Allocated length of pZ in bytes */
  int c;                /* Number of pZ bytes already read or written */
};














/*
** These two macros are used to interpret the first two bytes of the 
** unsigned char array pZ as a 16-bit unsigned int. BE16() for a big-endian
** interpretation, LE16() for little-endian.
*/
#define BE16(pZ) (((u16)((pZ)[0])<<8) + (u16)((pZ)[1]))
#define LE16(pZ) (((u16)((pZ)[1])<<8) + (u16)((pZ)[0]))
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
  ** difference anyway. In this case just fall through to the default case
  ** and return the native byte-order for this machine.
  **
  ** Otherwise, check the first 2 bytes of the string to see if a BOM is
  ** present.
  */
  if( pStr->n>1 ){
    u32 bom = BE16(pStr->pZ);
    if( bom==BOM_BIGENDIAN ){
      pStr->c = 2;
      return 1;
    }
    if( bom==BOM_LITTLEENDIAN ){
      pStr->c = 2;
      return 0;
    }
  }

  return big_endian;
}

/*







|
|
|
|
<
<
<
<







91
92
93
94
95
96
97
98
99
100
101




102
103
104
105
106
107
108
  ** difference anyway. In this case just fall through to the default case
  ** and return the native byte-order for this machine.
  **
  ** Otherwise, check the first 2 bytes of the string to see if a BOM is
  ** present.
  */
  if( pStr->n>1 ){
    u8 bom = sqlite3UtfReadBom(pStr->pZ, 2);
    if( bom ){
      pStr->c += 2;
      return (bom==TEXT_Utf16le)?0:1;




    }
  }

  return big_endian;
}

/*