/ Check-in [7173b392]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Minor change to sqlite3Utf8Read() to make consistent with READ_UTF8() usage and avoid implementation defined usages of <<. Added some additional UTF-8 test cases.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 7173b3929fae4e678223b0e978a2da7fa50a9005
User & Date: shaneh 2011-03-24 17:43:18
Context
2011-03-28
19:10
Add test infrastructure (and some tests) to inject faults into os_unix.c using the new xSetSystemCall interface. check-in: 0e1d20df user: dan tags: trunk
2011-03-26
15:05
Skeleton code for the word-fuzzer virtual table. check-in: ea3a4ee1 user: drh tags: word-fuzzer
2011-03-24
17:43
Minor change to sqlite3Utf8Read() to make consistent with READ_UTF8() usage and avoid implementation defined usages of <<. Added some additional UTF-8 test cases. check-in: 7173b392 user: shaneh tags: trunk
17:37
Fix problem with tableapi.test on Windows. check-in: 69fe0c87 user: shaneh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/utf.c.

163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
...
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
        || (c&0xFFFFF800)==0xD800                          \
        || (c&0xFFFFFFFE)==0xFFFE ){  c = 0xFFFD; }        \
  }
int sqlite3Utf8Read(
  const unsigned char *zIn,       /* First byte of UTF-8 character */
  const unsigned char **pzNext    /* Write first byte past UTF-8 char here */
){
  int c;

  /* Same as READ_UTF8() above but without the zTerm parameter.
  ** For this routine, we assume the UTF8 string is always zero-terminated.
  */
  c = *(zIn++);
  if( c>=0xc0 ){
    c = sqlite3Utf8Trans1[c-0xc0];
................................................................................
#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
/*
** Translate UTF-8 to UTF-8.
**
** This has the effect of making sure that the string is well-formed
** UTF-8.  Miscoded characters are removed.
**
** The translation is done in-place (since it is impossible for the
** correct UTF-8 encoding to be longer than a malformed encoding).
*/
int sqlite3Utf8To8(unsigned char *zIn){
  unsigned char *zOut = zIn;
  unsigned char *zStart = zIn;
  u32 c;

  while( zIn[0] ){
    c = sqlite3Utf8Read(zIn, (const u8**)&zIn);
    if( c!=0xfffd ){
      WRITE_UTF8(zOut, c);
    }
  }
  *zOut = 0;
  return (int)(zOut - zStart);







|







 







|
|






|







163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
...
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
        || (c&0xFFFFF800)==0xD800                          \
        || (c&0xFFFFFFFE)==0xFFFE ){  c = 0xFFFD; }        \
  }
int sqlite3Utf8Read(
  const unsigned char *zIn,       /* First byte of UTF-8 character */
  const unsigned char **pzNext    /* Write first byte past UTF-8 char here */
){
  unsigned int c;

  /* Same as READ_UTF8() above but without the zTerm parameter.
  ** For this routine, we assume the UTF8 string is always zero-terminated.
  */
  c = *(zIn++);
  if( c>=0xc0 ){
    c = sqlite3Utf8Trans1[c-0xc0];
................................................................................
#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
/*
** Translate UTF-8 to UTF-8.
**
** This has the effect of making sure that the string is well-formed
** UTF-8.  Miscoded characters are removed.
**
** The translation is done in-place and aborted if the output
** overruns the input.
*/
int sqlite3Utf8To8(unsigned char *zIn){
  unsigned char *zOut = zIn;
  unsigned char *zStart = zIn;
  u32 c;

  while( zIn[0] && zOut<=zIn ){
    c = sqlite3Utf8Read(zIn, (const u8**)&zIn);
    if( c!=0xfffd ){
      WRITE_UTF8(zOut, c);
    }
  }
  *zOut = 0;
  return (int)(zOut - zStart);

Added test/badutf2.test.















































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# 2011 March 15
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
# This file implements regression tests for SQLite library. 
#
# This file checks to make sure SQLite is able to gracEFully
# handle malformed UTF-8.
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl

proc utf8_to_ustr2 {s} {
  set r ""
  foreach i [split $s ""] {
    scan $i %c c
    append r [format \\u%04.4X $c]
  }
  set r
}

proc utf8_to_hstr {in} {
 regsub -all -- {(..)} $in {%[format "%s" \1]} out
 subst $out
}

proc utf8_to_xstr {in} {
 regsub -all -- {(..)} $in {\\\\x[format "%s" \1]} out
 subst $out
}

proc utf8_to_ustr {in} {
 regsub -all -- {(..)} $in {\\\\u[format "%04.4X" 0x\1]} out
 subst $out
}

do_test badutf2-1.0 {
  db close
  forcedelete test.db
  sqlite3 db test.db
  db eval "PRAGMA encoding = 'UTF-8'"
} {}

do_test badutf2-4.0 {
  set S [sqlite3_prepare_v2 db "SELECT ?" -1 dummy]
  sqlite3_expired $S
} {0}
        
foreach { i len uval xstr ustr u2u } {
1 1 00     \x00         {}        {}
2 1 01     \x01         "\\u0001" 01
3 1 3F     \x3F         "\\u003F" 3F
4 1 7F     \x7F         "\\u007F" 7F
5 1 80     \x80         "\\u0080" C280
6 1 C3BF   \xFF         "\\u00FF" C3BF
7 3 EFBFBD \xEF\xBF\xBD "\\uFFFD" {}
} {

  set hstr [ utf8_to_hstr $uval ]

  ifcapable bloblit {
    if {$hstr != "%00"} {
      do_test badutf2-2.1.$i {
        set sql "SELECT '$hstr'=CAST(x'$uval' AS text) AS x;"
        set res [ sqlite3_exec db $sql ]
        lindex [ lindex $res 1] 1
      } {1}
      do_test badutf2-2.2.$i {
        set sql "SELECT CAST('$hstr' AS blob)=x'$uval' AS x;"
        set res [ sqlite3_exec db $sql ]
        lindex [ lindex $res 1] 1
      } {1}
    }
    do_test badutf2-2.3.$i {
      set sql "SELECT hex(CAST(x'$uval' AS text)) AS x;"
      set res [ sqlite3_exec db $sql ]
      lindex [ lindex $res 1] 1
    } $uval
    do_test badutf2-2.4.$i {
      set sql "SELECT hex(CAST(x'$uval' AS text)) AS x;"
      set res [ sqlite3_exec db $sql ]
      lindex [ lindex $res 1] 1
    } $uval
  }

  if {$hstr != "%00"} {
    do_test badutf2-3.1.$i {
      set sql "SELECT hex('$hstr') AS x;"
      set res [ sqlite3_exec db $sql ]
      lindex [ lindex $res 1] 1
    } $uval
  }

  do_test badutf2-4.1.$i {
    sqlite3_reset $S
    sqlite3_bind_text $S 1 $xstr $len
    sqlite3_step $S
    utf8_to_ustr2 [ sqlite3_column_text $S 0 ]
  } $ustr

  do_test badutf2-5.1.$i {
    utf8_to_utf8 $uval
  } $u2u

}

do_test badutf2-4.2 {
  sqlite3_finalize $S
} {SQLITE_OK}


finish_test