Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Improve test coverage of utf.c. (CVS 1678) |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
53c553c760a84e213c2277588bcc2928 |
User & Date: | danielk1977 2004-06-23 13:46:32.000 |
Context
2004-06-24
| ||
00:20 | Apply (1679) to version 3. Ticket #777. (CVS 1680) (check-in: 0a26b91580 user: danielk1977 tags: trunk) | |
2004-06-23
| ||
13:46 | Improve test coverage of utf.c. (CVS 1678) (check-in: 53c553c760 user: danielk1977 tags: trunk) | |
12:35 | Fix a bug in the previous checkin. (CVS 1677) (check-in: f31c84a64f user: danielk1977 tags: trunk) | |
Changes
Changes to src/test5.c.
︙ | ︙ | |||
11 12 13 14 15 16 17 | ************************************************************************* ** Code for testing the utf.c module in SQLite. This code ** is not included in the SQLite library. It is used for automated ** testing of the SQLite library. Specifically, the code in this file ** is used for testing the SQLite routines for converting between ** the various supported unicode encodings. ** | | | 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 | ************************************************************************* ** Code for testing the utf.c module in SQLite. This code ** is not included in the SQLite library. It is used for automated ** testing of the SQLite library. Specifically, the code in this file ** is used for testing the SQLite routines for converting between ** the various supported unicode encodings. ** ** $Id: test5.c,v 1.12 2004/06/23 13:46:32 danielk1977 Exp $ */ #include "sqliteInt.h" #include "vdbeInt.h" #include "os.h" /* to get SQLITE_BIGENDIAN */ #include "tcl.h" #include <stdlib.h> #include <string.h> |
︙ | ︙ | |||
112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 | } if( !pEnc->enc ){ Tcl_AppendResult(interp, "No such encoding: ", z, 0); } return pEnc->enc; } static int test_translate( void * clientData, Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[] ){ u8 enc_from; u8 enc_to; sqlite3_value *pVal; | > > > > | > | > > > > > > | > > > > > | | 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 | } if( !pEnc->enc ){ Tcl_AppendResult(interp, "No such encoding: ", z, 0); } return pEnc->enc; } /* ** Usage: test_translate <string/blob> <from enc> <to enc> ?<transient>? ** */ static int test_translate( void * clientData, Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[] ){ u8 enc_from; u8 enc_to; sqlite3_value *pVal; char *z; int len; void (*xDel)(void *p) = SQLITE_STATIC; if( objc!=4 && objc!=5 ){ Tcl_AppendResult(interp, "wrong # args: should be \"", Tcl_GetStringFromObj(objv[0], 0), " <string/blob> <from enc> <to enc>", 0 ); return TCL_ERROR; } if( objc==5 ){ xDel = sqlite3FreeX; } enc_from = name_to_enc(interp, objv[2]); if( !enc_from ) return TCL_ERROR; enc_to = name_to_enc(interp, objv[3]); if( !enc_to ) return TCL_ERROR; pVal = sqlite3ValueNew(); if( enc_from==SQLITE_UTF8 ){ z = Tcl_GetString(objv[1]); if( objc==5 ){ z = sqliteStrDup(z); } sqlite3ValueSetStr(pVal, -1, z, enc_from, xDel); }else{ z = Tcl_GetByteArrayFromObj(objv[1], &len); if( objc==5 ){ char *zTmp = z; z = sqliteMalloc(len); memcpy(z, zTmp, len); } sqlite3ValueSetStr(pVal, -1, z, enc_from, xDel); } z = sqlite3ValueText(pVal, enc_to); len = sqlite3ValueBytes(pVal, enc_to) + (enc_to==SQLITE_UTF8?1:2); Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(z, len)); sqlite3ValueFree(pVal); |
︙ | ︙ |
Changes to src/utf.c.
︙ | ︙ | |||
8 9 10 11 12 13 14 | ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* ** This file contains routines used to translate between UTF-8, ** UTF-16, UTF-16BE, and UTF-16LE. ** | | | 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 | ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* ** This file contains routines used to translate between UTF-8, ** UTF-16, UTF-16BE, and UTF-16LE. ** ** $Id: utf.c,v 1.25 2004/06/23 13:46:32 danielk1977 Exp $ ** ** Notes on UTF-8: ** ** Byte-0 Byte-1 Byte-2 Byte-3 Value ** 0xxxxxxx 00000000 00000000 0xxxxxxx ** 110yyyyy 10xxxxxx 00000000 00000yyy yyxxxxxx ** 1110zzzz 10yyyyyy 10xxxxxx 00000000 zzzzyyyy yyxxxxxx |
︙ | ︙ | |||
288 289 290 291 292 293 294 | */ zIn = pMem->z; zTerm = &zIn[pMem->n]; if( len>NBFS ){ zOut = sqliteMallocRaw(len); if( !zOut ) return SQLITE_NOMEM; }else{ | < | < < < | 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 | */ zIn = pMem->z; zTerm = &zIn[pMem->n]; if( len>NBFS ){ zOut = sqliteMallocRaw(len); if( !zOut ) return SQLITE_NOMEM; }else{ zOut = zShort; } z = zOut; if( pMem->enc==SQLITE_UTF8 ){ if( desiredEnc==SQLITE_UTF16LE ){ /* UTF-8 -> UTF-16 Little-endian */ while( zIn<zTerm ){ |
︙ | ︙ | |||
339 340 341 342 343 344 345 | } } assert( (pMem->n+(desiredEnc==SQLITE_UTF8?1:2))<=len ); sqlite3VdbeMemRelease(pMem); pMem->flags &= ~(MEM_Static|MEM_Dyn|MEM_Ephem|MEM_Short); pMem->enc = desiredEnc; | < < | | 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 | } } assert( (pMem->n+(desiredEnc==SQLITE_UTF8?1:2))<=len ); sqlite3VdbeMemRelease(pMem); pMem->flags &= ~(MEM_Static|MEM_Dyn|MEM_Ephem|MEM_Short); pMem->enc = desiredEnc; if( zOut==zShort ){ memcpy(pMem->zShort, zOut, len); zOut = pMem->zShort; pMem->flags |= (MEM_Term|MEM_Short); }else{ pMem->flags |= (MEM_Term|MEM_Dyn); } pMem->z = zOut; |
︙ | ︙ | |||
386 387 388 389 390 391 392 | } if( b1==0xFF && b2==0xFE ){ bom = SQLITE_UTF16LE; } } if( bom ){ | | > | > > | | < | < | < < < | | 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 | } if( b1==0xFF && b2==0xFE ){ bom = SQLITE_UTF16LE; } } if( bom ){ /* This function is called as soon as a string is stored in a Mem*, ** from within sqlite3VdbeMemSetStr(). At that point it is not possible ** for the string to be stored in Mem.zShort, or for it to be stored ** in dynamic memory with no destructor. */ assert( !(pMem->flags&MEM_Short) ); assert( !(pMem->flags&MEM_Dyn) || pMem->xDel ); if( pMem->flags & MEM_Dyn ){ void (*xDel)(void*) = pMem->xDel; char *z = pMem->z; pMem->z = 0; pMem->xDel = 0; rc = sqlite3VdbeMemSetStr(pMem, &z[2], pMem->n-2, bom, SQLITE_TRANSIENT); xDel(z); }else{ rc = sqlite3VdbeMemSetStr(pMem, &pMem->z[2], pMem->n-2, bom, SQLITE_TRANSIENT); } } return rc; } /* ** pZ is a UTF-8 encoded unicode string. If nByte is less than zero, ** return the number of unicode characters in pZ up to (but not including) ** the first 0x00 byte. If nByte is not less than zero, return the ** number of unicode characters in the first nByte of pZ (or up to ** the first 0x00, whichever comes first). */ int sqlite3utf8CharLen(const char *z, int nByte){ int r = 0; const char *zTerm; if( nByte>=0 ){ zTerm = &z[nByte]; }else{ zTerm = (const char *)(-1); } assert( z<=zTerm ); while( *z!=0 && z<zTerm ){ SKIP_UTF8(z); |
︙ | ︙ | |||
524 525 526 527 528 529 530 | void sqlite3utfSelfTest(){ int i; unsigned char zBuf[20]; unsigned char *z; int n; int c; | | | 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 | void sqlite3utfSelfTest(){ int i; unsigned char zBuf[20]; unsigned char *z; int n; int c; for(i=0; i<0x00110000; i++){ z = zBuf; WRITE_UTF8(z, i); n = z-zBuf; z = zBuf; READ_UTF8(z, c); assert( c==i ); assert( (z-zBuf)==n ); |
︙ | ︙ |
Changes to test/enc.test.
︙ | ︙ | |||
9 10 11 12 13 14 15 | # #*********************************************************************** # This file implements regression tests for SQLite library. The focus of # this file is testing the SQLite routines used for converting between the # various suported unicode encodings (UTF-8, UTF-16, UTF-16le and # UTF-16be). # | | | 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 | # #*********************************************************************** # This file implements regression tests for SQLite library. The focus of # this file is testing the SQLite routines used for converting between the # various suported unicode encodings (UTF-8, UTF-16, UTF-16le and # UTF-16be). # # $Id: enc.test,v 1.4 2004/06/23 13:46:33 danielk1977 Exp $ set testdir [file dirname $argv0] source $testdir/tester.tcl proc do_bincmp_test {testname got expect} { binary scan $expect \c* expectvals binary scan $got \c* gotvals |
︙ | ︙ | |||
100 101 102 103 104 105 106 | set utf16 $utf16be } set utf8_sqlite3 [test_translate $utf16 UTF16 UTF8] do_bincmp_test $testname.3 $utf8_sqlite3 [binarize $str] # Step 4 (little endian). append utf16le_bom "\xFF\xFE" $utf16le | | | 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 | set utf16 $utf16be } set utf8_sqlite3 [test_translate $utf16 UTF16 UTF8] do_bincmp_test $testname.3 $utf8_sqlite3 [binarize $str] # Step 4 (little endian). append utf16le_bom "\xFF\xFE" $utf16le set utf8_sqlite3 [test_translate $utf16le_bom UTF16 UTF8 1] do_bincmp_test $testname.4.le $utf8_sqlite3 [binarize $str] # Step 4 (big endian). append utf16be_bom "\xFE\xFF" $utf16be set utf8_sqlite3 [test_translate $utf16be_bom UTF16 UTF8] do_bincmp_test $testname.4.be $utf8_sqlite3 [binarize $str] |
︙ | ︙ |