/ Check-in [53c553c7]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Improve test coverage of utf.c. (CVS 1678)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 53c553c760a84e213c2277588bcc292892c2b0ce
User & Date: danielk1977 2004-06-23 13:46:32
Context
2004-06-24
00:20
Apply (1679) to version 3. Ticket #777. (CVS 1680) check-in: 0a26b915 user: danielk1977 tags: trunk
2004-06-23
13:46
Improve test coverage of utf.c. (CVS 1678) check-in: 53c553c7 user: danielk1977 tags: trunk
12:35
Fix a bug in the previous checkin. (CVS 1677) check-in: f31c84a6 user: danielk1977 tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/test5.c.

11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
...
112
113
114
115
116
117
118




119
120
121
122
123
124
125
126
127
128
129
130

131
132
133
134
135
136
137
138



139
140
141
142
143
144
145
146
147
148



149
150
151





152
153
154
155
156
157
158
159
*************************************************************************
** Code for testing the utf.c module in SQLite.  This code
** is not included in the SQLite library.  It is used for automated
** testing of the SQLite library. Specifically, the code in this file
** is used for testing the SQLite routines for converting between
** the various supported unicode encodings.
**
** $Id: test5.c,v 1.11 2004/06/18 04:24:55 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "vdbeInt.h"
#include "os.h"         /* to get SQLITE_BIGENDIAN */
#include "tcl.h"
#include <stdlib.h>
#include <string.h>
................................................................................
  }
  if( !pEnc->enc ){
    Tcl_AppendResult(interp, "No such encoding: ", z, 0);
  }
  return pEnc->enc;
}





static int test_translate(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  u8 enc_from;
  u8 enc_to;
  sqlite3_value *pVal;

  const char *z;
  int len;


  if( objc!=4 ){
    Tcl_AppendResult(interp, "wrong # args: should be \"",
        Tcl_GetStringFromObj(objv[0], 0), 
        " <string/blob> <from enc> <to enc>", 0
    );
    return TCL_ERROR;
  }




  enc_from = name_to_enc(interp, objv[2]);
  if( !enc_from ) return TCL_ERROR;
  enc_to = name_to_enc(interp, objv[3]);
  if( !enc_to ) return TCL_ERROR;

  pVal = sqlite3ValueNew();

  if( enc_from==SQLITE_UTF8 ){
    z = Tcl_GetString(objv[1]);



    sqlite3ValueSetStr(pVal, -1, z, enc_from, SQLITE_STATIC);
  }else{
    z = Tcl_GetByteArrayFromObj(objv[1], &len);





    sqlite3ValueSetStr(pVal, -1, z, enc_from, SQLITE_STATIC);
  }

  z = sqlite3ValueText(pVal, enc_to);
  len = sqlite3ValueBytes(pVal, enc_to) + (enc_to==SQLITE_UTF8?1:2);
  Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(z, len));

  sqlite3ValueFree(pVal);







|







 







>
>
>
>










|

>

|






>
>
>










>
>
>
|


>
>
>
>
>
|







11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
...
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
*************************************************************************
** Code for testing the utf.c module in SQLite.  This code
** is not included in the SQLite library.  It is used for automated
** testing of the SQLite library. Specifically, the code in this file
** is used for testing the SQLite routines for converting between
** the various supported unicode encodings.
**
** $Id: test5.c,v 1.12 2004/06/23 13:46:32 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "vdbeInt.h"
#include "os.h"         /* to get SQLITE_BIGENDIAN */
#include "tcl.h"
#include <stdlib.h>
#include <string.h>
................................................................................
  }
  if( !pEnc->enc ){
    Tcl_AppendResult(interp, "No such encoding: ", z, 0);
  }
  return pEnc->enc;
}

/*
** Usage:   test_translate <string/blob> <from enc> <to enc> ?<transient>?
**
*/
static int test_translate(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  u8 enc_from;
  u8 enc_to;
  sqlite3_value *pVal;

  char *z;
  int len;
  void (*xDel)(void *p) = SQLITE_STATIC;

  if( objc!=4 && objc!=5 ){
    Tcl_AppendResult(interp, "wrong # args: should be \"",
        Tcl_GetStringFromObj(objv[0], 0), 
        " <string/blob> <from enc> <to enc>", 0
    );
    return TCL_ERROR;
  }
  if( objc==5 ){
    xDel = sqlite3FreeX;
  }

  enc_from = name_to_enc(interp, objv[2]);
  if( !enc_from ) return TCL_ERROR;
  enc_to = name_to_enc(interp, objv[3]);
  if( !enc_to ) return TCL_ERROR;

  pVal = sqlite3ValueNew();

  if( enc_from==SQLITE_UTF8 ){
    z = Tcl_GetString(objv[1]);
    if( objc==5 ){
      z = sqliteStrDup(z);
    }
    sqlite3ValueSetStr(pVal, -1, z, enc_from, xDel);
  }else{
    z = Tcl_GetByteArrayFromObj(objv[1], &len);
    if( objc==5 ){
      char *zTmp = z;
      z = sqliteMalloc(len);
      memcpy(z, zTmp, len);
    }
    sqlite3ValueSetStr(pVal, -1, z, enc_from, xDel);
  }

  z = sqlite3ValueText(pVal, enc_to);
  len = sqlite3ValueBytes(pVal, enc_to) + (enc_to==SQLITE_UTF8?1:2);
  Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(z, len));

  sqlite3ValueFree(pVal);

Changes to src/utf.c.

8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
...
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
...
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
...
386
387
388
389
390
391
392
393
394
395
396
397



398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
...
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
...
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains routines used to translate between UTF-8, 
** UTF-16, UTF-16BE, and UTF-16LE.
**
** $Id: utf.c,v 1.24 2004/06/23 00:23:49 danielk1977 Exp $
**
** Notes on UTF-8:
**
**   Byte-0    Byte-1    Byte-2    Byte-3    Value
**  0xxxxxxx                                 00000000 00000000 0xxxxxxx
**  110yyyyy  10xxxxxx                       00000000 00000yyy yyxxxxxx
**  1110zzzz  10yyyyyy  10xxxxxx             00000000 zzzzyyyy yyxxxxxx
................................................................................
  */
  zIn = pMem->z;
  zTerm = &zIn[pMem->n];
  if( len>NBFS ){
    zOut = sqliteMallocRaw(len);
    if( !zOut ) return SQLITE_NOMEM;
  }else{
    if( pMem->z==pMem->zShort ){
      zOut = zShort;
    }else{
      zOut = pMem->zShort;
    }
  }
  z = zOut;

  if( pMem->enc==SQLITE_UTF8 ){
    if( desiredEnc==SQLITE_UTF16LE ){
      /* UTF-8 -> UTF-16 Little-endian */
      while( zIn<zTerm ){
................................................................................
    }
  }
  assert( (pMem->n+(desiredEnc==SQLITE_UTF8?1:2))<=len );

  sqlite3VdbeMemRelease(pMem);
  pMem->flags &= ~(MEM_Static|MEM_Dyn|MEM_Ephem|MEM_Short);
  pMem->enc = desiredEnc;
  if( (char *)zOut==pMem->zShort ){
    pMem->flags |= (MEM_Term|MEM_Short);
  }else if( zOut==zShort ){
    memcpy(pMem->zShort, zOut, len);
    zOut = pMem->zShort;
    pMem->flags |= (MEM_Term|MEM_Short);
  }else{
    pMem->flags |= (MEM_Term|MEM_Dyn);
  }
  pMem->z = zOut;
................................................................................
    }
    if( b1==0xFF && b2==0xFE ){
      bom = SQLITE_UTF16LE;
    }
  }
  
  if( bom ){
    if( pMem->flags & MEM_Short ){
      memmove(pMem->zShort, &pMem->zShort[2], NBFS-2);
      pMem->n -= 2;
      pMem->enc = bom;
    }



    else if( pMem->flags & MEM_Dyn ){
      void (*xDel)(void*) = pMem->xDel;
      char *z = pMem->z;
      pMem->z = 0;
      pMem->xDel = 0;
      rc = sqlite3VdbeMemSetStr(pMem, &z[2], pMem->n-2, bom, SQLITE_TRANSIENT);
      if( xDel ){
        xDel(z);
      }else{
        sqliteFree(z);
      }
    }else{
      rc = sqlite3VdbeMemSetStr(pMem, &pMem->z[2], pMem->n-2, bom, 
          SQLITE_TRANSIENT);
    }
  }
  return rc;
}
................................................................................
** the first 0x00 byte. If nByte is not less than zero, return the
** number of unicode characters in the first nByte of pZ (or up to 
** the first 0x00, whichever comes first).
*/
int sqlite3utf8CharLen(const char *z, int nByte){
  int r = 0;
  const char *zTerm;
  if( nByte>0 ){
    zTerm = &z[nByte];
  }else{
    zTerm = (const char *)(-1);
  }
  assert( z<=zTerm );
  while( *z!=0 && z<zTerm ){
    SKIP_UTF8(z);
................................................................................
void sqlite3utfSelfTest(){
  int i;
  unsigned char zBuf[20];
  unsigned char *z;
  int n;
  int c;

  for(i=0; 0 && i<0x00110000; i++){
    z = zBuf;
    WRITE_UTF8(z, i);
    n = z-zBuf;
    z = zBuf;
    READ_UTF8(z, c);
    assert( c==i );
    assert( (z-zBuf)==n );







|







 







<
|
<
<
<







 







<
<
|







 







|
|
|
|
<
>
>
>
|





<
|
<
<
<







 







|







 







|







8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
...
288
289
290
291
292
293
294

295



296
297
298
299
300
301
302
...
335
336
337
338
339
340
341


342
343
344
345
346
347
348
349
...
380
381
382
383
384
385
386
387
388
389
390

391
392
393
394
395
396
397
398
399

400



401
402
403
404
405
406
407
...
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
...
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains routines used to translate between UTF-8, 
** UTF-16, UTF-16BE, and UTF-16LE.
**
** $Id: utf.c,v 1.25 2004/06/23 13:46:32 danielk1977 Exp $
**
** Notes on UTF-8:
**
**   Byte-0    Byte-1    Byte-2    Byte-3    Value
**  0xxxxxxx                                 00000000 00000000 0xxxxxxx
**  110yyyyy  10xxxxxx                       00000000 00000yyy yyxxxxxx
**  1110zzzz  10yyyyyy  10xxxxxx             00000000 zzzzyyyy yyxxxxxx
................................................................................
  */
  zIn = pMem->z;
  zTerm = &zIn[pMem->n];
  if( len>NBFS ){
    zOut = sqliteMallocRaw(len);
    if( !zOut ) return SQLITE_NOMEM;
  }else{

    zOut = zShort;



  }
  z = zOut;

  if( pMem->enc==SQLITE_UTF8 ){
    if( desiredEnc==SQLITE_UTF16LE ){
      /* UTF-8 -> UTF-16 Little-endian */
      while( zIn<zTerm ){
................................................................................
    }
  }
  assert( (pMem->n+(desiredEnc==SQLITE_UTF8?1:2))<=len );

  sqlite3VdbeMemRelease(pMem);
  pMem->flags &= ~(MEM_Static|MEM_Dyn|MEM_Ephem|MEM_Short);
  pMem->enc = desiredEnc;


  if( zOut==zShort ){
    memcpy(pMem->zShort, zOut, len);
    zOut = pMem->zShort;
    pMem->flags |= (MEM_Term|MEM_Short);
  }else{
    pMem->flags |= (MEM_Term|MEM_Dyn);
  }
  pMem->z = zOut;
................................................................................
    }
    if( b1==0xFF && b2==0xFE ){
      bom = SQLITE_UTF16LE;
    }
  }
  
  if( bom ){
    /* This function is called as soon as a string is stored in a Mem*,
    ** from within sqlite3VdbeMemSetStr(). At that point it is not possible
    ** for the string to be stored in Mem.zShort, or for it to be stored
    ** in dynamic memory with no destructor.

    */
    assert( !(pMem->flags&MEM_Short) );
    assert( !(pMem->flags&MEM_Dyn) || pMem->xDel );
    if( pMem->flags & MEM_Dyn ){
      void (*xDel)(void*) = pMem->xDel;
      char *z = pMem->z;
      pMem->z = 0;
      pMem->xDel = 0;
      rc = sqlite3VdbeMemSetStr(pMem, &z[2], pMem->n-2, bom, SQLITE_TRANSIENT);

      xDel(z);



    }else{
      rc = sqlite3VdbeMemSetStr(pMem, &pMem->z[2], pMem->n-2, bom, 
          SQLITE_TRANSIENT);
    }
  }
  return rc;
}
................................................................................
** the first 0x00 byte. If nByte is not less than zero, return the
** number of unicode characters in the first nByte of pZ (or up to 
** the first 0x00, whichever comes first).
*/
int sqlite3utf8CharLen(const char *z, int nByte){
  int r = 0;
  const char *zTerm;
  if( nByte>=0 ){
    zTerm = &z[nByte];
  }else{
    zTerm = (const char *)(-1);
  }
  assert( z<=zTerm );
  while( *z!=0 && z<zTerm ){
    SKIP_UTF8(z);
................................................................................
void sqlite3utfSelfTest(){
  int i;
  unsigned char zBuf[20];
  unsigned char *z;
  int n;
  int c;

  for(i=0; i<0x00110000; i++){
    z = zBuf;
    WRITE_UTF8(z, i);
    n = z-zBuf;
    z = zBuf;
    READ_UTF8(z, c);
    assert( c==i );
    assert( (z-zBuf)==n );

Changes to test/enc.test.

9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
...
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The focus of
# this file is testing the SQLite routines used for converting between the
# various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
# UTF-16be).
#
# $Id: enc.test,v 1.3 2004/06/19 00:16:31 drh Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl

proc do_bincmp_test {testname got expect} {
  binary scan $expect \c* expectvals
  binary scan $got \c* gotvals
................................................................................
    set utf16 $utf16be
  }
  set utf8_sqlite3 [test_translate $utf16 UTF16 UTF8]
  do_bincmp_test $testname.3 $utf8_sqlite3 [binarize $str]

  # Step 4 (little endian).
  append utf16le_bom "\xFF\xFE" $utf16le
  set utf8_sqlite3 [test_translate $utf16le_bom UTF16 UTF8]
  do_bincmp_test $testname.4.le $utf8_sqlite3 [binarize $str]

  # Step 4 (big endian).
  append utf16be_bom "\xFE\xFF" $utf16be
  set utf8_sqlite3 [test_translate $utf16be_bom UTF16 UTF8]
  do_bincmp_test $testname.4.be $utf8_sqlite3 [binarize $str]








|







 







|







9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
...
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The focus of
# this file is testing the SQLite routines used for converting between the
# various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
# UTF-16be).
#
# $Id: enc.test,v 1.4 2004/06/23 13:46:33 danielk1977 Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl

proc do_bincmp_test {testname got expect} {
  binary scan $expect \c* expectvals
  binary scan $got \c* gotvals
................................................................................
    set utf16 $utf16be
  }
  set utf8_sqlite3 [test_translate $utf16 UTF16 UTF8]
  do_bincmp_test $testname.3 $utf8_sqlite3 [binarize $str]

  # Step 4 (little endian).
  append utf16le_bom "\xFF\xFE" $utf16le
  set utf8_sqlite3 [test_translate $utf16le_bom UTF16 UTF8 1]
  do_bincmp_test $testname.4.le $utf8_sqlite3 [binarize $str]

  # Step 4 (big endian).
  append utf16be_bom "\xFE\xFF" $utf16be
  set utf8_sqlite3 [test_translate $utf16be_bom UTF16 UTF8]
  do_bincmp_test $testname.4.be $utf8_sqlite3 [binarize $str]