/ Check-in [766bb7e5]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix a bug in the sqlite3Utf16ByteLen() function so that it computes the correct length even for strings that contain surrogate pairs. Ticket #3766. (CVS 6427)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 766bb7e59c28884e40ce13e3fc55c870d06d7e34
User & Date: drh 2009-04-01 18:40:32
Context
2009-04-01
19:07
Remove an unreachable branch from lockBtree(). Add comments. (CVS 6428) check-in: 85979295 user: danielk1977 tags: trunk
18:40
Fix a bug in the sqlite3Utf16ByteLen() function so that it computes the correct length even for strings that contain surrogate pairs. Ticket #3766. (CVS 6427) check-in: 766bb7e5 user: drh tags: trunk
18:25
Test that two database connections that use different VFS implementations may not share a cache. (CVS 6426) check-in: 3a92c956 user: danielk1977 tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/test1.c.

9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
....
3257
3258
3259
3260
3261
3262
3263

3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275

3276
3277
3278
3279
3280
3281
3282
....
3949
3950
3951
3952
3953
3954
3955



3956
3957
3958
3959
3960
3961
3962
3963
**    May you share freely, never taking more than you give.
**
*************************************************************************
** Code for testing all sorts of SQLite interfaces.  This code
** is not included in the SQLite library.  It is used for automated
** testing of the SQLite library.
**
** $Id: test1.c,v 1.349 2009/03/27 12:32:55 drh Exp $
*/
#include "sqliteInt.h"
#include "tcl.h"
#include <stdlib.h>
#include <string.h>

/*
................................................................................
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
#ifndef SQLITE_OMIT_UTF16
  sqlite3 *db;
  const void *zErr;

  int bytes = 0;

  if( objc!=2 ){
    Tcl_AppendResult(interp, "wrong # args: should be \"", 
       Tcl_GetString(objv[0]), " DB", 0);
    return TCL_ERROR;
  }
  if( getDbPointer(interp, Tcl_GetString(objv[1]), &db) ) return TCL_ERROR;

  zErr = sqlite3_errmsg16(db);
  if( zErr ){
    bytes = sqlite3Utf16ByteLen(zErr, -1);

  }
  Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(zErr, bytes));
#endif /* SQLITE_OMIT_UTF16 */
  return TCL_OK;
}

/*
................................................................................
  }

  if( getStmtPointer(interp, Tcl_GetString(objv[1]), &pStmt) ) return TCL_ERROR;
  if( Tcl_GetIntFromObj(interp, objv[2], &col) ) return TCL_ERROR;

  zName16 = xFunc(pStmt, col);
  if( zName16 ){



    pRet = Tcl_NewByteArrayObj(zName16, sqlite3Utf16ByteLen(zName16, -1)+2);
    Tcl_SetObjResult(interp, pRet);
  }
#endif /* SQLITE_OMIT_UTF16 */

  return TCL_OK;
}








|







 







>











|
>







 







>
>
>
|







9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
....
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
....
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
**    May you share freely, never taking more than you give.
**
*************************************************************************
** Code for testing all sorts of SQLite interfaces.  This code
** is not included in the SQLite library.  It is used for automated
** testing of the SQLite library.
**
** $Id: test1.c,v 1.350 2009/04/01 18:40:32 drh Exp $
*/
#include "sqliteInt.h"
#include "tcl.h"
#include <stdlib.h>
#include <string.h>

/*
................................................................................
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
#ifndef SQLITE_OMIT_UTF16
  sqlite3 *db;
  const void *zErr;
  const char *z;
  int bytes = 0;

  if( objc!=2 ){
    Tcl_AppendResult(interp, "wrong # args: should be \"", 
       Tcl_GetString(objv[0]), " DB", 0);
    return TCL_ERROR;
  }
  if( getDbPointer(interp, Tcl_GetString(objv[1]), &db) ) return TCL_ERROR;

  zErr = sqlite3_errmsg16(db);
  if( zErr ){
    z = zErr;
    for(bytes=0; z[bytes] || z[bytes+1]; bytes+=2){}
  }
  Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(zErr, bytes));
#endif /* SQLITE_OMIT_UTF16 */
  return TCL_OK;
}

/*
................................................................................
  }

  if( getStmtPointer(interp, Tcl_GetString(objv[1]), &pStmt) ) return TCL_ERROR;
  if( Tcl_GetIntFromObj(interp, objv[2], &col) ) return TCL_ERROR;

  zName16 = xFunc(pStmt, col);
  if( zName16 ){
    int n;
    const char *z = zName16;
    for(n=0; z[n] || z[n+1]; n+=2){}
    pRet = Tcl_NewByteArrayObj(zName16, n+2);
    Tcl_SetObjResult(interp, pRet);
  }
#endif /* SQLITE_OMIT_UTF16 */

  return TCL_OK;
}

Changes to src/utf.c.

8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
...
451
452
453
454
455
456
457
458
459
460
461
462

463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains routines used to translate between UTF-8, 
** UTF-16, UTF-16BE, and UTF-16LE.
**
** $Id: utf.c,v 1.72 2009/04/01 16:33:38 drh Exp $
**
** Notes on UTF-8:
**
**   Byte-0    Byte-1    Byte-2    Byte-3    Value
**  0xxxxxxx                                 00000000 00000000 0xxxxxxx
**  110yyyyy  10xxxxxx                       00000000 00000yyy yyxxxxxx
**  1110zzzz  10yyyyyy  10xxxxxx             00000000 zzzzyyyy yyxxxxxx
................................................................................
  }
  assert( (m.flags & MEM_Term)!=0 || db->mallocFailed );
  assert( (m.flags & MEM_Str)!=0 || db->mallocFailed );
  return (m.flags & MEM_Dyn)!=0 ? m.z : sqlite3DbStrDup(db, m.z);
}

/*
** pZ is a UTF-16 encoded unicode string. If nChar is less than zero,
** return the number of bytes up to (but not including), the first pair
** of consecutive 0x00 bytes in pZ. If nChar is not less than zero,
** then return the number of bytes in the first nChar unicode characters
** in pZ (or up until the first pair of 0x00 bytes, whichever comes first).

*/
int sqlite3Utf16ByteLen(const void *zIn, int nChar){
  unsigned int c = 1;
  char const *z = zIn;
  int n = 0;
  if( SQLITE_UTF16NATIVE==SQLITE_UTF16BE ){
    /* Using an "if (SQLITE_UTF16NATIVE==SQLITE_UTF16BE)" construct here
    ** and in other parts of this file means that at one branch will
    ** not be covered by coverage testing on any single host. But coverage
    ** will be complete if the tests are run on both a little-endian and 
    ** big-endian host. Because both the UTF16NATIVE and SQLITE_UTF16BE
    ** macros are constant at compile time the compiler can determine
    ** which branch will be followed. It is therefore assumed that no runtime
    ** penalty is paid for this "if" statement.
    */
    while( c && ((nChar<0) || n<nChar) ){
      READ_UTF16BE(z, c);
      n++;
    }
  }else{
    while( c && ((nChar<0) || n<nChar) ){
      READ_UTF16LE(z, c);
      n++;
    }
  }
  return (int)(z-(char const *)zIn)-((c==0)?2:0);
}

#if defined(SQLITE_TEST)
/*
** This routine is called from the TCL test function "translate_selftest".
** It checks that the primitives for serializing and deserializing
** characters in each encoding are inverses of each other.







|







 







|
<
<
|
<
>


|
|











|




|




|







8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
...
451
452
453
454
455
456
457
458


459

460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains routines used to translate between UTF-8, 
** UTF-16, UTF-16BE, and UTF-16LE.
**
** $Id: utf.c,v 1.73 2009/04/01 18:40:32 drh Exp $
**
** Notes on UTF-8:
**
**   Byte-0    Byte-1    Byte-2    Byte-3    Value
**  0xxxxxxx                                 00000000 00000000 0xxxxxxx
**  110yyyyy  10xxxxxx                       00000000 00000yyy yyxxxxxx
**  1110zzzz  10yyyyyy  10xxxxxx             00000000 zzzzyyyy yyxxxxxx
................................................................................
  }
  assert( (m.flags & MEM_Term)!=0 || db->mallocFailed );
  assert( (m.flags & MEM_Str)!=0 || db->mallocFailed );
  return (m.flags & MEM_Dyn)!=0 ? m.z : sqlite3DbStrDup(db, m.z);
}

/*
** pZ is a UTF-16 encoded unicode string at least nChar characters long.


** Return the number of bytes in the first nChar unicode characters

** in pZ.  nChar must be non-negative.
*/
int sqlite3Utf16ByteLen(const void *zIn, int nChar){
  int c;
  unsigned char const *z = zIn;
  int n = 0;
  if( SQLITE_UTF16NATIVE==SQLITE_UTF16BE ){
    /* Using an "if (SQLITE_UTF16NATIVE==SQLITE_UTF16BE)" construct here
    ** and in other parts of this file means that at one branch will
    ** not be covered by coverage testing on any single host. But coverage
    ** will be complete if the tests are run on both a little-endian and 
    ** big-endian host. Because both the UTF16NATIVE and SQLITE_UTF16BE
    ** macros are constant at compile time the compiler can determine
    ** which branch will be followed. It is therefore assumed that no runtime
    ** penalty is paid for this "if" statement.
    */
    while( n<nChar ){
      READ_UTF16BE(z, c);
      n++;
    }
  }else{
    while( n<nChar ){
      READ_UTF16LE(z, c);
      n++;
    }
  }
  return (int)(z-(unsigned char const *)zIn);
}

#if defined(SQLITE_TEST)
/*
** This routine is called from the TCL test function "translate_selftest".
** It checks that the primitives for serializing and deserializing
** characters in each encoding are inverses of each other.