/ Check-in [19064d7c]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Make sure that UTF16 to UTF8 conversions to not read past the end of the UTF16 input buffer if the last two bytes of the UTF16 happen to be the first half of a surrogate pair. Ticket [3fe897352e]
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 19064d7cea838e1a93fe63743ed247f440679e97
User & Date: drh 2009-10-23 18:15:46
References
2009-10-24
01:55
Check-in [19064d7cea] broke the pTail return on sqlite3_prepare16() when the SQL contained surrogates. This check-in restores correct function. Part of ticket [3fe897352e]. check-in: 65b1e3a4 user: drh tags: trunk
01:48 Ticket [3fe89735] Malformed UTF16 leads to a 2-byte buffer overread status still Fixed with 1 other change artifact: 1b24ca61 user: shane
Context
2009-10-24
01:55
Check-in [19064d7cea] broke the pTail return on sqlite3_prepare16() when the SQL contained surrogates. This check-in restores correct function. Part of ticket [3fe897352e]. check-in: 65b1e3a4 user: drh tags: trunk
2009-10-23
18:15
Make sure that UTF16 to UTF8 conversions to not read past the end of the UTF16 input buffer if the last two bytes of the UTF16 happen to be the first half of a surrogate pair. Ticket [3fe897352e] check-in: 19064d7c user: drh tags: trunk
01:27
In shell, ensure that do_meta_command() returns consistent error values. Adjusted the text of some error message to be more consistent. Ticket [beb2dd69ad]. check-in: 1ebac9ed user: shane tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/test_func.c.

   308    308       assert( pStmt==0 );
   309    309       zErr = sqlite3_mprintf("sqlite3_prepare_v2() error: %s",sqlite3_errmsg(db));
   310    310       sqlite3_result_text(pCtx, zErr, -1, sqlite3_free);
   311    311       sqlite3_result_error_code(pCtx, rc);
   312    312     }
   313    313   }
   314    314   
          315  +
          316  +/*
          317  +** convert one character from hex to binary
          318  +*/
          319  +static int testHexChar(char c){
          320  +  if( c>='0' && c<='9' ){
          321  +    return c - '0';
          322  +  }else if( c>='a' && c<='f' ){
          323  +    return c - 'a' + 10;
          324  +  }else if( c>='A' && c<='F' ){
          325  +    return c - 'A' + 10;
          326  +  }
          327  +  return 0;
          328  +}
          329  +
          330  +/*
          331  +** Convert hex to binary.
          332  +*/
          333  +static void testHexToBin(const char *zIn, char *zOut){
          334  +  while( zIn[0] && zIn[1] ){
          335  +    *(zOut++) = (testHexChar(zIn[0])<<4) + testHexChar(zIn[1]);
          336  +    zIn += 2;
          337  +  }
          338  +}
          339  +
          340  +/*
          341  +**      hex_to_utf16be(HEX)
          342  +**
          343  +** Convert the input string from HEX into binary.  Then return the
          344  +** result using sqlite3_result_text16le().
          345  +*/
          346  +static void testHexToUtf16be(
          347  +  sqlite3_context *pCtx, 
          348  +  int nArg,
          349  +  sqlite3_value **argv
          350  +){
          351  +  int n;
          352  +  const char *zIn;
          353  +  char *zOut;
          354  +  assert( nArg==1 );
          355  +  n = sqlite3_value_bytes(argv[0]);
          356  +  zIn = (const char*)sqlite3_value_text(argv[0]);
          357  +  zOut = sqlite3_malloc( n/2 );
          358  +  if( zOut==0 ){
          359  +    sqlite3_result_error_nomem(pCtx);
          360  +  }else{
          361  +    testHexToBin(zIn, zOut);
          362  +    sqlite3_result_text16be(pCtx, zOut, n/2, sqlite3_free);
          363  +  }
          364  +}
          365  +
          366  +/*
          367  +**      hex_to_utf8(HEX)
          368  +**
          369  +** Convert the input string from HEX into binary.  Then return the
          370  +** result using sqlite3_result_text16le().
          371  +*/
          372  +static void testHexToUtf8(
          373  +  sqlite3_context *pCtx, 
          374  +  int nArg,
          375  +  sqlite3_value **argv
          376  +){
          377  +  int n;
          378  +  const char *zIn;
          379  +  char *zOut;
          380  +  assert( nArg==1 );
          381  +  n = sqlite3_value_bytes(argv[0]);
          382  +  zIn = (const char*)sqlite3_value_text(argv[0]);
          383  +  zOut = sqlite3_malloc( n/2 );
          384  +  if( zOut==0 ){
          385  +    sqlite3_result_error_nomem(pCtx);
          386  +  }else{
          387  +    testHexToBin(zIn, zOut);
          388  +    sqlite3_result_text(pCtx, zOut, n/2, sqlite3_free);
          389  +  }
          390  +}
          391  +
          392  +/*
          393  +**      hex_to_utf16le(HEX)
          394  +**
          395  +** Convert the input string from HEX into binary.  Then return the
          396  +** result using sqlite3_result_text16le().
          397  +*/
          398  +static void testHexToUtf16le(
          399  +  sqlite3_context *pCtx, 
          400  +  int nArg,
          401  +  sqlite3_value **argv
          402  +){
          403  +  int n;
          404  +  const char *zIn;
          405  +  char *zOut;
          406  +  assert( nArg==1 );
          407  +  n = sqlite3_value_bytes(argv[0]);
          408  +  zIn = (const char*)sqlite3_value_text(argv[0]);
          409  +  zOut = sqlite3_malloc( n/2 );
          410  +  if( zOut==0 ){
          411  +    sqlite3_result_error_nomem(pCtx);
          412  +  }else{
          413  +    testHexToBin(zIn, zOut);
          414  +    sqlite3_result_text16le(pCtx, zOut, n/2, sqlite3_free);
          415  +  }
          416  +}
   315    417   
   316    418   static int registerTestFunctions(sqlite3 *db){
   317    419     static const struct {
   318    420        char *zName;
   319    421        signed char nArg;
   320    422        unsigned char eTextRep; /* 1: UTF-16.  0: UTF-8 */
   321    423        void (*xFunc)(sqlite3_context*,int,sqlite3_value **);
   322    424     } aFuncs[] = {
   323    425       { "randstr",               2, SQLITE_UTF8, randStr    },
   324    426       { "test_destructor",       1, SQLITE_UTF8, test_destructor},
   325    427   #ifndef SQLITE_OMIT_UTF16
   326    428       { "test_destructor16",     1, SQLITE_UTF8, test_destructor16},
          429  +    { "hex_to_utf16be",        1, SQLITE_UTF8, testHexToUtf16be},
          430  +    { "hex_to_utf16le",        1, SQLITE_UTF8, testHexToUtf16le},
   327    431   #endif
          432  +    { "hex_to_utf8",           1, SQLITE_UTF8, testHexToUtf8},
   328    433       { "test_destructor_count", 0, SQLITE_UTF8, test_destructor_count},
   329    434       { "test_auxdata",         -1, SQLITE_UTF8, test_auxdata},
   330    435       { "test_error",            1, SQLITE_UTF8, test_error},
   331    436       { "test_error",            2, SQLITE_UTF8, test_error},
   332    437       { "test_eval",             1, SQLITE_UTF8, test_eval},
   333    438       { "test_isolation",        2, SQLITE_UTF8, test_isolation},
   334    439       { "test_counter",          1, SQLITE_UTF8, counterFunc},
................................................................................
   442    547     return TCL_OK;
   443    548   
   444    549   abuse_err:
   445    550     Tcl_AppendResult(interp, "sqlite3_create_function abused test failed", 
   446    551                      (char*)0);
   447    552     return TCL_ERROR;
   448    553   }
   449         -
   450         -
   451    554   
   452    555   /*
   453    556   ** Register commands with the TCL interpreter.
   454    557   */
   455    558   int Sqlitetest_func_Init(Tcl_Interp *interp){
   456    559     static struct {
   457    560        char *zName;

Changes to src/utf.c.

   103    103       *zOut++ = (u8)(0x00D8 + (((c-0x10000)>>18)&0x03));              \
   104    104       *zOut++ = (u8)(((c>>10)&0x003F) + (((c-0x10000)>>10)&0x00C0));  \
   105    105       *zOut++ = (u8)(0x00DC + ((c>>8)&0x03));                         \
   106    106       *zOut++ = (u8)(c&0x00FF);                                       \
   107    107     }                                                                 \
   108    108   }
   109    109   
   110         -#define READ_UTF16LE(zIn, c){                                         \
          110  +#define READ_UTF16LE(zIn, zTerm, c){                                  \
   111    111     c = (*zIn++);                                                       \
   112    112     c += ((*zIn++)<<8);                                                 \
   113         -  if( c>=0xD800 && c<0xE000 ){                                        \
          113  +  if( c>=0xD800 && c<0xE000 && zIn<zTerm ){                           \
   114    114       int c2 = (*zIn++);                                                \
   115    115       c2 += ((*zIn++)<<8);                                              \
   116    116       c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10);   \
   117    117     }                                                                   \
   118    118   }
   119    119   
   120         -#define READ_UTF16BE(zIn, c){                                         \
          120  +#define READ_UTF16BE(zIn, zTerm, c){                                  \
   121    121     c = ((*zIn++)<<8);                                                  \
   122    122     c += (*zIn++);                                                      \
   123         -  if( c>=0xD800 && c<0xE000 ){                                        \
          123  +  if( c>=0xD800 && c<0xE000 && zIn<zTerm ){                           \
   124    124       int c2 = ((*zIn++)<<8);                                           \
   125    125       c2 += (*zIn++);                                                   \
   126    126       c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10);   \
   127    127     }                                                                   \
   128    128   }
   129    129   
   130    130   /*
................................................................................
   301    301       pMem->n = (int)(z - zOut);
   302    302       *z++ = 0;
   303    303     }else{
   304    304       assert( desiredEnc==SQLITE_UTF8 );
   305    305       if( pMem->enc==SQLITE_UTF16LE ){
   306    306         /* UTF-16 Little-endian -> UTF-8 */
   307    307         while( zIn<zTerm ){
   308         -        READ_UTF16LE(zIn, c); 
          308  +        READ_UTF16LE(zIn, zTerm, c); 
   309    309           WRITE_UTF8(z, c);
   310    310         }
   311    311       }else{
   312    312         /* UTF-16 Big-endian -> UTF-8 */
   313    313         while( zIn<zTerm ){
   314         -        READ_UTF16BE(zIn, c); 
          314  +        READ_UTF16BE(zIn, zTerm, c); 
   315    315           WRITE_UTF8(z, c);
   316    316         }
   317    317       }
   318    318       pMem->n = (int)(z - zOut);
   319    319     }
   320    320     *z = 0;
   321    321     assert( (pMem->n+(desiredEnc==SQLITE_UTF8?1:2))<=len );
................................................................................
   484    484   ** pZ is a UTF-16 encoded unicode string at least nChar characters long.
   485    485   ** Return the number of bytes in the first nChar unicode characters
   486    486   ** in pZ.  nChar must be non-negative.
   487    487   */
   488    488   int sqlite3Utf16ByteLen(const void *zIn, int nChar){
   489    489     int c;
   490    490     unsigned char const *z = zIn;
          491  +  unsigned char const *zTerm = &z[nChar];
   491    492     int n = 0;
   492    493     if( SQLITE_UTF16NATIVE==SQLITE_UTF16BE ){
   493    494       /* Using an "if (SQLITE_UTF16NATIVE==SQLITE_UTF16BE)" construct here
   494    495       ** and in other parts of this file means that at one branch will
   495    496       ** not be covered by coverage testing on any single host. But coverage
   496    497       ** will be complete if the tests are run on both a little-endian and 
   497    498       ** big-endian host. Because both the UTF16NATIVE and SQLITE_UTF16BE
   498    499       ** macros are constant at compile time the compiler can determine
   499    500       ** which branch will be followed. It is therefore assumed that no runtime
   500    501       ** penalty is paid for this "if" statement.
   501    502       */
   502    503       while( n<nChar ){
   503         -      READ_UTF16BE(z, c);
          504  +      READ_UTF16BE(z, zTerm, c);
   504    505         n++;
   505    506       }
   506    507     }else{
   507    508       while( n<nChar ){
   508         -      READ_UTF16LE(z, c);
          509  +      READ_UTF16LE(z, zTerm, c);
   509    510         n++;
   510    511       }
   511    512     }
   512    513     return (int)(z-(unsigned char const *)zIn);
   513    514   }
   514    515   
   515    516   #if defined(SQLITE_TEST)
................................................................................
   543    544       if( i>=0xD800 && i<0xE000 ) continue;
   544    545       z = zBuf;
   545    546       WRITE_UTF16LE(z, i);
   546    547       n = (int)(z-zBuf);
   547    548       assert( n>0 && n<=4 );
   548    549       z[0] = 0;
   549    550       z = zBuf;
   550         -    READ_UTF16LE(z, c);
          551  +    READ_UTF16LE(z, &zBuf[n], c);
   551    552       assert( c==i );
   552    553       assert( (z-zBuf)==n );
   553    554     }
   554    555     for(i=0; i<0x00110000; i++){
   555    556       if( i>=0xD800 && i<0xE000 ) continue;
   556    557       z = zBuf;
   557    558       WRITE_UTF16BE(z, i);
   558    559       n = (int)(z-zBuf);
   559    560       assert( n>0 && n<=4 );
   560    561       z[0] = 0;
   561    562       z = zBuf;
   562         -    READ_UTF16BE(z, c);
          563  +    READ_UTF16BE(z, &zBuf[n], c);
   563    564       assert( c==i );
   564    565       assert( (z-zBuf)==n );
   565    566     }
   566    567   }
   567    568   #endif /* SQLITE_TEST */
   568    569   #endif /* SQLITE_OMIT_UTF16 */

Added test/tkt-3fe897352e.test.

            1  +# 2009 October 23
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#***********************************************************************
           11  +# This file implements regression tests for SQLite library.
           12  +#
           13  +# This file implements tests to verify that ticket [3fe897352e8d8] has been
           14  +# fixed.  
           15  +#
           16  +
           17  +set testdir [file dirname $argv0]
           18  +source $testdir/tester.tcl
           19  +
           20  +do_test tkt-3fe89-1.1 {
           21  +  db close
           22  +  sqlite3 db :memory:
           23  +  db eval {
           24  +    PRAGMA encoding=UTF8;
           25  +    CREATE TABLE t1(x);
           26  +    INSERT INTO t1 VALUES(hex_to_utf16be('D800'));
           27  +    SELECT hex(x) FROM t1;
           28  +  }
           29  +} {EDA080}
           30  +do_test tkt-3fe89-1.2 {
           31  +  db eval {
           32  +    DELETE FROM t1;
           33  +    INSERT INTO t1 VALUES(hex_to_utf16le('00D8'));
           34  +    SELECT hex(x) FROM t1;
           35  +  }
           36  +} {EDA080}
           37  +do_test tkt-3fe89-1.3 {
           38  +  db eval {
           39  +    DELETE FROM t1;
           40  +    INSERT INTO t1 VALUES(hex_to_utf16be('DFFF'));
           41  +    SELECT hex(x) FROM t1;
           42  +  }
           43  +} {EDBFBF}
           44  +do_test tkt-3fe89-1.4 {
           45  +  db eval {
           46  +    DELETE FROM t1;
           47  +    INSERT INTO t1 VALUES(hex_to_utf16le('FFDF'));
           48  +    SELECT hex(x) FROM t1;
           49  +  }
           50  +} {EDBFBF}
           51  +
           52  +
           53  +finish_test