Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Back out the change from [7fab1393c2b22b1f] that tries to convert invalid surrogate characters in UTF16 into the replacement character 0xfffd, as we find that this breaks some software. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA3-256: |
4218c7b71fb6b227dbe4b852718584c1 |
User & Date: | drh 2020-05-20 15:02:04.035 |
Context
2020-05-21
| ||
19:13 | Avoid another potential OOB read in sqlite3expert.c. (check-in: 0ccea80092 user: dan tags: trunk) | |
2020-05-20
| ||
15:02 | Back out the change from [7fab1393c2b22b1f] that tries to convert invalid surrogate characters in UTF16 into the replacement character 0xfffd, as we find that this breaks some software. (check-in: 4218c7b71f user: drh tags: trunk) | |
2020-05-19
| ||
15:51 | Add compiler hints to disable TSAN for the routines that access the -shm file header in WAL mode using a double-read with memory barrier. (check-in: 3117c1b5a9 user: drh tags: trunk) | |
Changes
Changes to src/utf.c.
︙ | ︙ | |||
280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 | assert( desiredEnc==SQLITE_UTF8 ); if( pMem->enc==SQLITE_UTF16LE ){ /* UTF-16 Little-endian -> UTF-8 */ while( zIn<zTerm ){ c = *(zIn++); c += (*(zIn++))<<8; if( c>=0xd800 && c<0xe000 ){ if( c>=0xdc00 || zIn>=zTerm ){ c = 0xfffd; }else{ int c2 = *(zIn++); c2 += (*(zIn++))<<8; if( c2<0xdc00 || c2>=0xe000 ){ zIn -= 2; c = 0xfffd; }else{ c = ((c&0x3ff)<<10) + (c2&0x3ff) + 0x10000; } } } WRITE_UTF8(z, c); } }else{ /* UTF-16 Big-endian -> UTF-8 */ while( zIn<zTerm ){ c = (*(zIn++))<<8; c += *(zIn++); if( c>=0xd800 && c<0xe000 ){ if( c>=0xdc00 || zIn>=zTerm ){ c = 0xfffd; }else{ int c2 = (*(zIn++))<<8; c2 += *(zIn++); if( c2<0xdc00 || c2>=0xe000 ){ zIn -= 2; c = 0xfffd; }else{ c = ((c&0x3ff)<<10) + (c2&0x3ff) + 0x10000; } } } WRITE_UTF8(z, c); } } pMem->n = (int)(z - zOut); } *z = 0; | > > > > > > > > > > > > > > > > | 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 | assert( desiredEnc==SQLITE_UTF8 ); if( pMem->enc==SQLITE_UTF16LE ){ /* UTF-16 Little-endian -> UTF-8 */ while( zIn<zTerm ){ c = *(zIn++); c += (*(zIn++))<<8; if( c>=0xd800 && c<0xe000 ){ #ifdef SQLITE_REPLACE_INVALID_UTF if( c>=0xdc00 || zIn>=zTerm ){ c = 0xfffd; }else{ int c2 = *(zIn++); c2 += (*(zIn++))<<8; if( c2<0xdc00 || c2>=0xe000 ){ zIn -= 2; c = 0xfffd; }else{ c = ((c&0x3ff)<<10) + (c2&0x3ff) + 0x10000; } } #else if( zIn<zTerm ){ int c2 = (*zIn++); c2 += ((*zIn++)<<8); c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10); } #endif } WRITE_UTF8(z, c); } }else{ /* UTF-16 Big-endian -> UTF-8 */ while( zIn<zTerm ){ c = (*(zIn++))<<8; c += *(zIn++); if( c>=0xd800 && c<0xe000 ){ #ifdef SQLITE_REPLACE_INVALID_UTF if( c>=0xdc00 || zIn>=zTerm ){ c = 0xfffd; }else{ int c2 = (*(zIn++))<<8; c2 += *(zIn++); if( c2<0xdc00 || c2>=0xe000 ){ zIn -= 2; c = 0xfffd; }else{ c = ((c&0x3ff)<<10) + (c2&0x3ff) + 0x10000; } } #else if( zIn<zTerm ){ int c2 = ((*zIn++)<<8); c2 += (*zIn++); c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10); } #endif } WRITE_UTF8(z, c); } } pMem->n = (int)(z - zOut); } *z = 0; |
︙ | ︙ |
Changes to test/tkt-3fe897352e.test.
︙ | ︙ | |||
29 30 31 32 33 34 35 | sqlite3 db :memory: db eval { PRAGMA encoding=UTF8; CREATE TABLE t1(x); INSERT INTO t1 VALUES(hex_to_utf16be('D800')); SELECT hex(x) FROM t1; } | | | | | | 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 | sqlite3 db :memory: db eval { PRAGMA encoding=UTF8; CREATE TABLE t1(x); INSERT INTO t1 VALUES(hex_to_utf16be('D800')); SELECT hex(x) FROM t1; } } {EDA080} do_test tkt-3fe89-1.2 { db eval { DELETE FROM t1; INSERT INTO t1 VALUES(hex_to_utf16le('00D8')); SELECT hex(x) FROM t1; } } {EDA080} do_test tkt-3fe89-1.3 { db eval { DELETE FROM t1; INSERT INTO t1 VALUES(hex_to_utf16be('DFFF')); SELECT hex(x) FROM t1; } } {EDBFBF} do_test tkt-3fe89-1.4 { db eval { DELETE FROM t1; INSERT INTO t1 VALUES(hex_to_utf16le('FFDF')); SELECT hex(x) FROM t1; } } {EDBFBF} finish_test |