SQLite

Check-in [53c553c760]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Improve test coverage of utf.c. (CVS 1678)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 53c553c760a84e213c2277588bcc292892c2b0ce
User & Date: danielk1977 2004-06-23 13:46:32.000
Context
2004-06-24
00:20
Apply (1679) to version 3. Ticket #777. (CVS 1680) (check-in: 0a26b91580 user: danielk1977 tags: trunk)
2004-06-23
13:46
Improve test coverage of utf.c. (CVS 1678) (check-in: 53c553c760 user: danielk1977 tags: trunk)
12:35
Fix a bug in the previous checkin. (CVS 1677) (check-in: f31c84a64f user: danielk1977 tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/test5.c.
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
*************************************************************************
** Code for testing the utf.c module in SQLite.  This code
** is not included in the SQLite library.  It is used for automated
** testing of the SQLite library. Specifically, the code in this file
** is used for testing the SQLite routines for converting between
** the various supported unicode encodings.
**
** $Id: test5.c,v 1.11 2004/06/18 04:24:55 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "vdbeInt.h"
#include "os.h"         /* to get SQLITE_BIGENDIAN */
#include "tcl.h"
#include <stdlib.h>
#include <string.h>







|







11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
*************************************************************************
** Code for testing the utf.c module in SQLite.  This code
** is not included in the SQLite library.  It is used for automated
** testing of the SQLite library. Specifically, the code in this file
** is used for testing the SQLite routines for converting between
** the various supported unicode encodings.
**
** $Id: test5.c,v 1.12 2004/06/23 13:46:32 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "vdbeInt.h"
#include "os.h"         /* to get SQLITE_BIGENDIAN */
#include "tcl.h"
#include <stdlib.h>
#include <string.h>
112
113
114
115
116
117
118




119
120
121
122
123
124
125
126
127
128
129
130

131
132
133
134
135
136
137
138



139
140
141
142
143
144
145
146
147
148



149
150
151





152
153
154
155
156
157
158
159
  }
  if( !pEnc->enc ){
    Tcl_AppendResult(interp, "No such encoding: ", z, 0);
  }
  return pEnc->enc;
}





static int test_translate(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  u8 enc_from;
  u8 enc_to;
  sqlite3_value *pVal;

  const char *z;
  int len;


  if( objc!=4 ){
    Tcl_AppendResult(interp, "wrong # args: should be \"",
        Tcl_GetStringFromObj(objv[0], 0), 
        " <string/blob> <from enc> <to enc>", 0
    );
    return TCL_ERROR;
  }




  enc_from = name_to_enc(interp, objv[2]);
  if( !enc_from ) return TCL_ERROR;
  enc_to = name_to_enc(interp, objv[3]);
  if( !enc_to ) return TCL_ERROR;

  pVal = sqlite3ValueNew();

  if( enc_from==SQLITE_UTF8 ){
    z = Tcl_GetString(objv[1]);



    sqlite3ValueSetStr(pVal, -1, z, enc_from, SQLITE_STATIC);
  }else{
    z = Tcl_GetByteArrayFromObj(objv[1], &len);





    sqlite3ValueSetStr(pVal, -1, z, enc_from, SQLITE_STATIC);
  }

  z = sqlite3ValueText(pVal, enc_to);
  len = sqlite3ValueBytes(pVal, enc_to) + (enc_to==SQLITE_UTF8?1:2);
  Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(z, len));

  sqlite3ValueFree(pVal);







>
>
>
>










|

>

|






>
>
>










>
>
>
|


>
>
>
>
>
|







112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
  }
  if( !pEnc->enc ){
    Tcl_AppendResult(interp, "No such encoding: ", z, 0);
  }
  return pEnc->enc;
}

/*
** Usage:   test_translate <string/blob> <from enc> <to enc> ?<transient>?
**
*/
static int test_translate(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  u8 enc_from;
  u8 enc_to;
  sqlite3_value *pVal;

  char *z;
  int len;
  void (*xDel)(void *p) = SQLITE_STATIC;

  if( objc!=4 && objc!=5 ){
    Tcl_AppendResult(interp, "wrong # args: should be \"",
        Tcl_GetStringFromObj(objv[0], 0), 
        " <string/blob> <from enc> <to enc>", 0
    );
    return TCL_ERROR;
  }
  if( objc==5 ){
    xDel = sqlite3FreeX;
  }

  enc_from = name_to_enc(interp, objv[2]);
  if( !enc_from ) return TCL_ERROR;
  enc_to = name_to_enc(interp, objv[3]);
  if( !enc_to ) return TCL_ERROR;

  pVal = sqlite3ValueNew();

  if( enc_from==SQLITE_UTF8 ){
    z = Tcl_GetString(objv[1]);
    if( objc==5 ){
      z = sqliteStrDup(z);
    }
    sqlite3ValueSetStr(pVal, -1, z, enc_from, xDel);
  }else{
    z = Tcl_GetByteArrayFromObj(objv[1], &len);
    if( objc==5 ){
      char *zTmp = z;
      z = sqliteMalloc(len);
      memcpy(z, zTmp, len);
    }
    sqlite3ValueSetStr(pVal, -1, z, enc_from, xDel);
  }

  z = sqlite3ValueText(pVal, enc_to);
  len = sqlite3ValueBytes(pVal, enc_to) + (enc_to==SQLITE_UTF8?1:2);
  Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(z, len));

  sqlite3ValueFree(pVal);
Changes to src/utf.c.
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains routines used to translate between UTF-8, 
** UTF-16, UTF-16BE, and UTF-16LE.
**
** $Id: utf.c,v 1.24 2004/06/23 00:23:49 danielk1977 Exp $
**
** Notes on UTF-8:
**
**   Byte-0    Byte-1    Byte-2    Byte-3    Value
**  0xxxxxxx                                 00000000 00000000 0xxxxxxx
**  110yyyyy  10xxxxxx                       00000000 00000yyy yyxxxxxx
**  1110zzzz  10yyyyyy  10xxxxxx             00000000 zzzzyyyy yyxxxxxx







|







8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains routines used to translate between UTF-8, 
** UTF-16, UTF-16BE, and UTF-16LE.
**
** $Id: utf.c,v 1.25 2004/06/23 13:46:32 danielk1977 Exp $
**
** Notes on UTF-8:
**
**   Byte-0    Byte-1    Byte-2    Byte-3    Value
**  0xxxxxxx                                 00000000 00000000 0xxxxxxx
**  110yyyyy  10xxxxxx                       00000000 00000yyy yyxxxxxx
**  1110zzzz  10yyyyyy  10xxxxxx             00000000 zzzzyyyy yyxxxxxx
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
  */
  zIn = pMem->z;
  zTerm = &zIn[pMem->n];
  if( len>NBFS ){
    zOut = sqliteMallocRaw(len);
    if( !zOut ) return SQLITE_NOMEM;
  }else{
    if( pMem->z==pMem->zShort ){
      zOut = zShort;
    }else{
      zOut = pMem->zShort;
    }
  }
  z = zOut;

  if( pMem->enc==SQLITE_UTF8 ){
    if( desiredEnc==SQLITE_UTF16LE ){
      /* UTF-8 -> UTF-16 Little-endian */
      while( zIn<zTerm ){







<
|
<
<
<







288
289
290
291
292
293
294

295



296
297
298
299
300
301
302
  */
  zIn = pMem->z;
  zTerm = &zIn[pMem->n];
  if( len>NBFS ){
    zOut = sqliteMallocRaw(len);
    if( !zOut ) return SQLITE_NOMEM;
  }else{

    zOut = zShort;



  }
  z = zOut;

  if( pMem->enc==SQLITE_UTF8 ){
    if( desiredEnc==SQLITE_UTF16LE ){
      /* UTF-8 -> UTF-16 Little-endian */
      while( zIn<zTerm ){
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
    }
  }
  assert( (pMem->n+(desiredEnc==SQLITE_UTF8?1:2))<=len );

  sqlite3VdbeMemRelease(pMem);
  pMem->flags &= ~(MEM_Static|MEM_Dyn|MEM_Ephem|MEM_Short);
  pMem->enc = desiredEnc;
  if( (char *)zOut==pMem->zShort ){
    pMem->flags |= (MEM_Term|MEM_Short);
  }else if( zOut==zShort ){
    memcpy(pMem->zShort, zOut, len);
    zOut = pMem->zShort;
    pMem->flags |= (MEM_Term|MEM_Short);
  }else{
    pMem->flags |= (MEM_Term|MEM_Dyn);
  }
  pMem->z = zOut;







<
<
|







335
336
337
338
339
340
341


342
343
344
345
346
347
348
349
    }
  }
  assert( (pMem->n+(desiredEnc==SQLITE_UTF8?1:2))<=len );

  sqlite3VdbeMemRelease(pMem);
  pMem->flags &= ~(MEM_Static|MEM_Dyn|MEM_Ephem|MEM_Short);
  pMem->enc = desiredEnc;


  if( zOut==zShort ){
    memcpy(pMem->zShort, zOut, len);
    zOut = pMem->zShort;
    pMem->flags |= (MEM_Term|MEM_Short);
  }else{
    pMem->flags |= (MEM_Term|MEM_Dyn);
  }
  pMem->z = zOut;
386
387
388
389
390
391
392
393

394


395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
    }
    if( b1==0xFF && b2==0xFE ){
      bom = SQLITE_UTF16LE;
    }
  }
  
  if( bom ){
    if( pMem->flags & MEM_Short ){

      memmove(pMem->zShort, &pMem->zShort[2], NBFS-2);


      pMem->n -= 2;
      pMem->enc = bom;
    }
    else if( pMem->flags & MEM_Dyn ){
      void (*xDel)(void*) = pMem->xDel;
      char *z = pMem->z;
      pMem->z = 0;
      pMem->xDel = 0;
      rc = sqlite3VdbeMemSetStr(pMem, &z[2], pMem->n-2, bom, SQLITE_TRANSIENT);
      if( xDel ){
        xDel(z);
      }else{
        sqliteFree(z);
      }
    }else{
      rc = sqlite3VdbeMemSetStr(pMem, &pMem->z[2], pMem->n-2, bom, 
          SQLITE_TRANSIENT);
    }
  }
  return rc;
}

/*
** pZ is a UTF-8 encoded unicode string. If nByte is less than zero,
** return the number of unicode characters in pZ up to (but not including)
** the first 0x00 byte. If nByte is not less than zero, return the
** number of unicode characters in the first nByte of pZ (or up to 
** the first 0x00, whichever comes first).
*/
int sqlite3utf8CharLen(const char *z, int nByte){
  int r = 0;
  const char *zTerm;
  if( nByte>0 ){
    zTerm = &z[nByte];
  }else{
    zTerm = (const char *)(-1);
  }
  assert( z<=zTerm );
  while( *z!=0 && z<zTerm ){
    SKIP_UTF8(z);







|
>
|
>
>
|
|
<
|





<
|
<
<
<


















|







380
381
382
383
384
385
386
387
388
389
390
391
392
393

394
395
396
397
398
399

400



401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
    }
    if( b1==0xFF && b2==0xFE ){
      bom = SQLITE_UTF16LE;
    }
  }
  
  if( bom ){
    /* This function is called as soon as a string is stored in a Mem*,
    ** from within sqlite3VdbeMemSetStr(). At that point it is not possible
    ** for the string to be stored in Mem.zShort, or for it to be stored
    ** in dynamic memory with no destructor.
    */
    assert( !(pMem->flags&MEM_Short) );
    assert( !(pMem->flags&MEM_Dyn) || pMem->xDel );

    if( pMem->flags & MEM_Dyn ){
      void (*xDel)(void*) = pMem->xDel;
      char *z = pMem->z;
      pMem->z = 0;
      pMem->xDel = 0;
      rc = sqlite3VdbeMemSetStr(pMem, &z[2], pMem->n-2, bom, SQLITE_TRANSIENT);

      xDel(z);



    }else{
      rc = sqlite3VdbeMemSetStr(pMem, &pMem->z[2], pMem->n-2, bom, 
          SQLITE_TRANSIENT);
    }
  }
  return rc;
}

/*
** pZ is a UTF-8 encoded unicode string. If nByte is less than zero,
** return the number of unicode characters in pZ up to (but not including)
** the first 0x00 byte. If nByte is not less than zero, return the
** number of unicode characters in the first nByte of pZ (or up to 
** the first 0x00, whichever comes first).
*/
int sqlite3utf8CharLen(const char *z, int nByte){
  int r = 0;
  const char *zTerm;
  if( nByte>=0 ){
    zTerm = &z[nByte];
  }else{
    zTerm = (const char *)(-1);
  }
  assert( z<=zTerm );
  while( *z!=0 && z<zTerm ){
    SKIP_UTF8(z);
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
void sqlite3utfSelfTest(){
  int i;
  unsigned char zBuf[20];
  unsigned char *z;
  int n;
  int c;

  for(i=0; 0 && i<0x00110000; i++){
    z = zBuf;
    WRITE_UTF8(z, i);
    n = z-zBuf;
    z = zBuf;
    READ_UTF8(z, c);
    assert( c==i );
    assert( (z-zBuf)==n );







|







516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
void sqlite3utfSelfTest(){
  int i;
  unsigned char zBuf[20];
  unsigned char *z;
  int n;
  int c;

  for(i=0; i<0x00110000; i++){
    z = zBuf;
    WRITE_UTF8(z, i);
    n = z-zBuf;
    z = zBuf;
    READ_UTF8(z, c);
    assert( c==i );
    assert( (z-zBuf)==n );
Changes to test/enc.test.
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The focus of
# this file is testing the SQLite routines used for converting between the
# various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
# UTF-16be).
#
# $Id: enc.test,v 1.3 2004/06/19 00:16:31 drh Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl

proc do_bincmp_test {testname got expect} {
  binary scan $expect \c* expectvals
  binary scan $got \c* gotvals







|







9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The focus of
# this file is testing the SQLite routines used for converting between the
# various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
# UTF-16be).
#
# $Id: enc.test,v 1.4 2004/06/23 13:46:33 danielk1977 Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl

proc do_bincmp_test {testname got expect} {
  binary scan $expect \c* expectvals
  binary scan $got \c* gotvals
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
    set utf16 $utf16be
  }
  set utf8_sqlite3 [test_translate $utf16 UTF16 UTF8]
  do_bincmp_test $testname.3 $utf8_sqlite3 [binarize $str]

  # Step 4 (little endian).
  append utf16le_bom "\xFF\xFE" $utf16le
  set utf8_sqlite3 [test_translate $utf16le_bom UTF16 UTF8]
  do_bincmp_test $testname.4.le $utf8_sqlite3 [binarize $str]

  # Step 4 (big endian).
  append utf16be_bom "\xFE\xFF" $utf16be
  set utf8_sqlite3 [test_translate $utf16be_bom UTF16 UTF8]
  do_bincmp_test $testname.4.be $utf8_sqlite3 [binarize $str]








|







100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
    set utf16 $utf16be
  }
  set utf8_sqlite3 [test_translate $utf16 UTF16 UTF8]
  do_bincmp_test $testname.3 $utf8_sqlite3 [binarize $str]

  # Step 4 (little endian).
  append utf16le_bom "\xFF\xFE" $utf16le
  set utf8_sqlite3 [test_translate $utf16le_bom UTF16 UTF8 1]
  do_bincmp_test $testname.4.le $utf8_sqlite3 [binarize $str]

  # Step 4 (big endian).
  append utf16be_bom "\xFE\xFF" $utf16be
  set utf8_sqlite3 [test_translate $utf16be_bom UTF16 UTF8]
  do_bincmp_test $testname.4.be $utf8_sqlite3 [binarize $str]