/ Check-in [4218c7b7]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Back out the change from [7fab1393c2b22b1f] that tries to convert invalid surrogate characters in UTF16 into the replacement character 0xfffd, as we find that this breaks some software.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 4218c7b71fb6b227dbe4b852718584c150164af2d84e067cb810aa602554a609
User & Date: drh 2020-05-20 15:02:04
Context
2020-05-21
19:13
Avoid another potential OOB read in sqlite3expert.c. (check-in: 0ccea800 user: dan tags: trunk)
2020-05-20
15:02
Back out the change from [7fab1393c2b22b1f] that tries to convert invalid surrogate characters in UTF16 into the replacement character 0xfffd, as we find that this breaks some software. (check-in: 4218c7b7 user: drh tags: trunk)
2020-05-19
15:51
Add compiler hints to disable TSAN for the routines that access the -shm file header in WAL mode using a double-read with memory barrier. (check-in: 3117c1b5 user: drh tags: trunk)
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/utf.c.

280
281
282
283
284
285
286

287
288
289
290
291
292
293
294
295
296
297
298







299
300
301
302
303
304
305
306
307

308
309
310
311
312
313
314
315
316
317
318
319







320
321
322
323
324
325
326
    assert( desiredEnc==SQLITE_UTF8 );
    if( pMem->enc==SQLITE_UTF16LE ){
      /* UTF-16 Little-endian -> UTF-8 */
      while( zIn<zTerm ){
        c = *(zIn++);
        c += (*(zIn++))<<8;
        if( c>=0xd800 && c<0xe000 ){

          if( c>=0xdc00 || zIn>=zTerm ){
            c = 0xfffd;
          }else{
            int c2 = *(zIn++);
            c2 += (*(zIn++))<<8;
            if( c2<0xdc00 || c2>=0xe000 ){
              zIn -= 2;
              c = 0xfffd;
            }else{
              c = ((c&0x3ff)<<10) + (c2&0x3ff) + 0x10000;
            }
          }







        }
        WRITE_UTF8(z, c);
      }
    }else{
      /* UTF-16 Big-endian -> UTF-8 */
      while( zIn<zTerm ){
        c = (*(zIn++))<<8;
        c += *(zIn++);
        if( c>=0xd800 && c<0xe000 ){

          if( c>=0xdc00 || zIn>=zTerm ){
            c = 0xfffd;
          }else{
            int c2 = (*(zIn++))<<8;
            c2 += *(zIn++);
            if( c2<0xdc00 || c2>=0xe000 ){
              zIn -= 2;
              c = 0xfffd;
            }else{
              c = ((c&0x3ff)<<10) + (c2&0x3ff) + 0x10000;
            }
          }







        }
        WRITE_UTF8(z, c);
      }
    }
    pMem->n = (int)(z - zOut);
  }
  *z = 0;







>












>
>
>
>
>
>
>









>












>
>
>
>
>
>
>







280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
    assert( desiredEnc==SQLITE_UTF8 );
    if( pMem->enc==SQLITE_UTF16LE ){
      /* UTF-16 Little-endian -> UTF-8 */
      while( zIn<zTerm ){
        c = *(zIn++);
        c += (*(zIn++))<<8;
        if( c>=0xd800 && c<0xe000 ){
#ifdef SQLITE_REPLACE_INVALID_UTF
          if( c>=0xdc00 || zIn>=zTerm ){
            c = 0xfffd;
          }else{
            int c2 = *(zIn++);
            c2 += (*(zIn++))<<8;
            if( c2<0xdc00 || c2>=0xe000 ){
              zIn -= 2;
              c = 0xfffd;
            }else{
              c = ((c&0x3ff)<<10) + (c2&0x3ff) + 0x10000;
            }
          }
#else
          if( zIn<zTerm ){
            int c2 = (*zIn++);
            c2 += ((*zIn++)<<8);
            c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10);
          }
#endif
        }
        WRITE_UTF8(z, c);
      }
    }else{
      /* UTF-16 Big-endian -> UTF-8 */
      while( zIn<zTerm ){
        c = (*(zIn++))<<8;
        c += *(zIn++);
        if( c>=0xd800 && c<0xe000 ){
#ifdef SQLITE_REPLACE_INVALID_UTF
          if( c>=0xdc00 || zIn>=zTerm ){
            c = 0xfffd;
          }else{
            int c2 = (*(zIn++))<<8;
            c2 += *(zIn++);
            if( c2<0xdc00 || c2>=0xe000 ){
              zIn -= 2;
              c = 0xfffd;
            }else{
              c = ((c&0x3ff)<<10) + (c2&0x3ff) + 0x10000;
            }
          }
#else
          if( zIn<zTerm ){
            int c2 = ((*zIn++)<<8);
            c2 += (*zIn++);
            c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10);
          }
#endif
        }
        WRITE_UTF8(z, c);
      }
    }
    pMem->n = (int)(z - zOut);
  }
  *z = 0;

Changes to test/tkt-3fe897352e.test.

29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
  sqlite3 db :memory:
  db eval {
    PRAGMA encoding=UTF8;
    CREATE TABLE t1(x);
    INSERT INTO t1 VALUES(hex_to_utf16be('D800'));
    SELECT hex(x) FROM t1;
  }
} {EFBFBD}
do_test tkt-3fe89-1.2 {
  db eval {
    DELETE FROM t1;
    INSERT INTO t1 VALUES(hex_to_utf16le('00D8'));
    SELECT hex(x) FROM t1;
  }
} {EFBFBD}
do_test tkt-3fe89-1.3 {
  db eval {
    DELETE FROM t1;
    INSERT INTO t1 VALUES(hex_to_utf16be('DFFF'));
    SELECT hex(x) FROM t1;
  }
} {EFBFBD}
do_test tkt-3fe89-1.4 {
  db eval {
    DELETE FROM t1;
    INSERT INTO t1 VALUES(hex_to_utf16le('FFDF'));
    SELECT hex(x) FROM t1;
  }
} {EFBFBD}


finish_test







|






|






|






|



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
  sqlite3 db :memory:
  db eval {
    PRAGMA encoding=UTF8;
    CREATE TABLE t1(x);
    INSERT INTO t1 VALUES(hex_to_utf16be('D800'));
    SELECT hex(x) FROM t1;
  }
} {EDA080}
do_test tkt-3fe89-1.2 {
  db eval {
    DELETE FROM t1;
    INSERT INTO t1 VALUES(hex_to_utf16le('00D8'));
    SELECT hex(x) FROM t1;
  }
} {EDA080}
do_test tkt-3fe89-1.3 {
  db eval {
    DELETE FROM t1;
    INSERT INTO t1 VALUES(hex_to_utf16be('DFFF'));
    SELECT hex(x) FROM t1;
  }
} {EDBFBF}
do_test tkt-3fe89-1.4 {
  db eval {
    DELETE FROM t1;
    INSERT INTO t1 VALUES(hex_to_utf16le('FFDF'));
    SELECT hex(x) FROM t1;
  }
} {EDBFBF}


finish_test