SQLite

Check-in [51027f08c0]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Proper surrogate pair decoding added to JSON functions. See the mailing list bug report and https://bugs.python.org/issue38749. More test cases needed here, but it seems to work so far.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 51027f08c0478f1bf9d7545d9e268c772c0a5cd5dda4b03d78f16c7d94f2f50d
User & Date: drh 2019-11-10 11:09:06.218
Context
2019-11-11
15:13
Remove an asm() block from build tool mksourceid.c, as it causes build failures on some systems and performance is not important at build-time. (check-in: 8e100e6c35 user: dan tags: trunk)
2019-11-10
11:09
Proper surrogate pair decoding added to JSON functions. See the mailing list bug report and https://bugs.python.org/issue38749. More test cases needed here, but it seems to work so far. (check-in: 51027f08c0 user: drh tags: trunk)
10:08
Remove an incorrect ALWAYS() macro. (check-in: f7a74f89db user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/misc/json1.c.
517
518
519
520
521
522
523































524
525
526
527
528
529
530
){
  JsonString s;
  jsonInit(&s, pCtx);
  jsonRenderNode(pNode, &s, aReplace);
  jsonResult(&s);
  sqlite3_result_subtype(pCtx, JSON_SUBTYPE);
}
































/*
** Make the JsonNode the return value of the function.
*/
static void jsonReturn(
  JsonNode *pNode,            /* Node to return */
  sqlite3_context *pCtx,      /* Return value for this function */







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
){
  JsonString s;
  jsonInit(&s, pCtx);
  jsonRenderNode(pNode, &s, aReplace);
  jsonResult(&s);
  sqlite3_result_subtype(pCtx, JSON_SUBTYPE);
}

/*
** Translate a single byte of Hex into an integer.
** This routine only works if h really is a valid hexadecimal
** character:  0..9a..fA..F
*/
static u8 jsonHexToInt(int h){
  assert( (h>='0' && h<='9') ||  (h>='a' && h<='f') ||  (h>='A' && h<='F') );
#ifdef SQLITE_EBCDIC
  h += 9*(1&~(h>>4));
#else
  h += 9*(1&(h>>6));
#endif
  return (u8)(h & 0xf);
}

/*
** Convert a 4-byte hex string into an integer
*/
static u32 jsonHexToInt4(const char *z){
  u32 v;
  assert( safe_isxdigit(z[0]) );
  assert( safe_isxdigit(z[1]) );
  assert( safe_isxdigit(z[2]) );
  assert( safe_isxdigit(z[3]) );
  v = (jsonHexToInt(z[0])<<12)
    + (jsonHexToInt(z[1])<<8)
    + (jsonHexToInt(z[2])<<4)
    + jsonHexToInt(z[3]);
  return v;
}

/*
** Make the JsonNode the return value of the function.
*/
static void jsonReturn(
  JsonNode *pNode,            /* Node to return */
  sqlite3_context *pCtx,      /* Return value for this function */
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633















634
635
636

637
638
639
640
641
642
643
        for(i=1, j=0; i<n-1; i++){
          char c = z[i];
          if( c!='\\' ){
            zOut[j++] = c;
          }else{
            c = z[++i];
            if( c=='u' ){
              u32 v = 0, k;
              for(k=0; k<4; i++, k++){
                assert( i<n-2 );
                c = z[i+1];
                assert( safe_isxdigit(c) );
                if( c<='9' ) v = v*16 + c - '0';
                else if( c<='F' ) v = v*16 + c - 'A' + 10;
                else v = v*16 + c - 'a' + 10;
              }
              if( v==0 ) break;
              if( v<=0x7f ){
                zOut[j++] = (char)v;
              }else if( v<=0x7ff ){
                zOut[j++] = (char)(0xc0 | (v>>6));
                zOut[j++] = 0x80 | (v&0x3f);
              }else{















                zOut[j++] = (char)(0xe0 | (v>>12));
                zOut[j++] = 0x80 | ((v>>6)&0x3f);
                zOut[j++] = 0x80 | (v&0x3f);

              }
            }else{
              if( c=='b' ){
                c = '\b';
              }else if( c=='f' ){
                c = '\f';
              }else if( c=='n' ){







|
<
<
|
<
<
<
<
<







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
|
>







642
643
644
645
646
647
648
649


650





651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
        for(i=1, j=0; i<n-1; i++){
          char c = z[i];
          if( c!='\\' ){
            zOut[j++] = c;
          }else{
            c = z[++i];
            if( c=='u' ){
              u32 v = jsonHexToInt4(z+i+1);


              i += 4;





              if( v==0 ) break;
              if( v<=0x7f ){
                zOut[j++] = (char)v;
              }else if( v<=0x7ff ){
                zOut[j++] = (char)(0xc0 | (v>>6));
                zOut[j++] = 0x80 | (v&0x3f);
              }else{
                u32 vlo;
                if( (v&0xfc00)==0xd800
                  && i<n-6
                  && z[i+1]=='\\'
                  && z[i+2]=='u'
                  && ((vlo = jsonHexToInt4(z+i+3))&0xfc00)==0xdc00
                ){
                  /* We have a surrogate pair */
                  v = ((v&0x3ff)<<10) + (vlo&0x3ff) + 0x10000;
                  i += 6;
                  zOut[j++] = 0xf0 | (v>>18);
                  zOut[j++] = 0x80 | ((v>>12)&0x3f);
                  zOut[j++] = 0x80 | ((v>>6)&0x3f);
                  zOut[j++] = 0x80 | (v&0x3f);
                }else{
                  zOut[j++] = 0xe0 | (v>>12);
                  zOut[j++] = 0x80 | ((v>>6)&0x3f);
                  zOut[j++] = 0x80 | (v&0x3f);
                }
              }
            }else{
              if( c=='b' ){
                c = '\b';
              }else if( c=='f' ){
                c = '\f';
              }else if( c=='n' ){
Changes to test/json101.test.
827
828
829
830
831
832
833















834
835
} {a 1 integer 1 2 {} {$.a} {$} b 2 integer 2 4 {} {$.b} {$}}
do_execsql_test json-15.120 {
  SELECT * FROM (JSON_EACH('{"a":1, "b":2}'));
} {a 1 integer 1 2 {} {$.a} {$} b 2 integer 2 4 {} {$.b} {$}}
do_execsql_test json-15.130 {
  SELECT xyz.* FROM (JSON_EACH('{"a":1, "b":2}')) AS xyz;
} {a 1 integer 1 2 {} {$.a} {$} b 2 integer 2 4 {} {$.b} {$}}
















finish_test







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
} {a 1 integer 1 2 {} {$.a} {$} b 2 integer 2 4 {} {$.b} {$}}
do_execsql_test json-15.120 {
  SELECT * FROM (JSON_EACH('{"a":1, "b":2}'));
} {a 1 integer 1 2 {} {$.a} {$} b 2 integer 2 4 {} {$.b} {$}}
do_execsql_test json-15.130 {
  SELECT xyz.* FROM (JSON_EACH('{"a":1, "b":2}')) AS xyz;
} {a 1 integer 1 2 {} {$.a} {$} b 2 integer 2 4 {} {$.b} {$}}

# 2019-11-10
# Mailing list bug report on the handling of surrogate pairs
# in JSON.
#
do_execsql_test json-16.10 {
  SELECT length(json_extract('"abc\uD834\uDD1Exyz"','$'));
} {7}
do_execsql_test json-16.20 {
  SELECT length(json_extract('"\uD834\uDD1E"','$'));
} {1}
do_execsql_test json-16.30 {
  SELECT unicode(json_extract('"\uD834\uDD1E"','$'));
} {119070}


finish_test