# 2002 May 24 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # This file implements regression tests for SQLite library. The focus of # this file is testing the SQLite routines used for converting between the # various suported unicode encodings (UTF-8, UTF-16, UTF-16le and # UTF-16be). # # $Id: enc2.test,v 1.28 2006/09/23 20:36:03 drh Exp $ set testdir [file dirname $argv0] source $testdir/tester.tcl # If UTF16 support is disabled, ignore the tests in this file # ifcapable {!utf16} { finish_test return } # The rough organisation of tests in this file is: # # enc2.1.*: Simple tests with a UTF-8 db. # enc2.2.*: Simple tests with a UTF-16LE db. # enc2.3.*: Simple tests with a UTF-16BE db. # enc2.4.*: Test that attached databases must have the same text encoding # as the main database. # enc2.5.*: Test the behaviour of the library when a collation sequence is # not available for the most desirable text encoding. # enc2.6.*: Similar test for user functions. # enc2.7.*: Test that the VerifyCookie opcode protects against assuming the # wrong text encoding for the database. # enc2.8.*: Test sqlite3_complete16() # db close # Return the UTF-8 representation of the supplied UTF-16 string $str. proc utf8 {str} { # If $str ends in two 0x00 0x00 bytes, knock these off before # converting to UTF-8 using TCL. binary scan $str \c* vals if {[lindex $vals end]==0 && [lindex $vals end-1]==0} { set str [binary format \c* [lrange $vals 0 end-2]] } set r [encoding convertfrom unicode $str] return $r } # # This proc contains all the tests in this file. It is run # three times. Each time the file 'test.db' contains a database # with the following contents: set dbcontents { CREATE TABLE t1(a PRIMARY KEY, b, c); INSERT INTO t1 VALUES('one', 'I', 1); } # This proc tests that we can open and manipulate the test.db # database, and that it is possible to retreive values in # various text encodings. # proc run_test_script {t enc} { # Open the database and pull out a (the) row. do_test $t.1 { sqlite3 db test.db; set DB [sqlite3_connection_pointer db] execsql {SELECT * FROM t1} } {one I 1} # Insert some data do_test $t.2 { execsql {INSERT INTO t1 VALUES('two', 'II', 2);} execsql {SELECT * FROM t1} } {one I 1 two II 2} # Insert some data do_test $t.3 { execsql { INSERT INTO t1 VALUES('three','III',3); INSERT INTO t1 VALUES('four','IV',4); INSERT INTO t1 VALUES('five','V',5); } execsql {SELECT * FROM t1} } {one I 1 two II 2 three III 3 four IV 4 five V 5} # Use the index do_test $t.4 { execsql { SELECT * FROM t1 WHERE a = 'one'; } } {one I 1} do_test $t.5 { execsql { SELECT * FROM t1 WHERE a = 'four'; } } {four IV 4} ifcapable subquery { do_test $t.6 { execsql { SELECT * FROM t1 WHERE a IN ('one', 'two'); } } {one I 1 two II 2} } # Now check that we can retrieve data in both UTF-16 and UTF-8 do_test $t.7 { set STMT [sqlite3_prepare $DB "SELECT a FROM t1 WHERE c>3;" -1 TAIL] sqlite3_step $STMT sqlite3_column_text $STMT 0 } {four} do_test $t.8 { sqlite3_step $STMT utf8 [sqlite3_column_text16 $STMT 0] } {five} do_test $t.9 { sqlite3_finalize $STMT } SQLITE_OK ifcapable vacuum { execsql VACUUM } do_test $t.10 { db eval {PRAGMA encoding} } $enc } # The three unicode encodings understood by SQLite. set encodings [list UTF-8 UTF-16le UTF-16be] set sqlite_os_trace 0 set i 1 foreach enc $encodings { file delete -force test.db sqlite3 db test.db db eval "PRAGMA encoding = \"$enc\"" execsql $dbcontents do_test enc2-$i.0.1 { db eval {PRAGMA encoding} } $enc do_test enc2-$i.0.2 { db eval {PRAGMA encoding=UTF8} db eval {PRAGMA encoding} } $enc do_test enc2-$i.0.3 { db eval {PRAGMA encoding=UTF16le} db eval {PRAGMA encoding} } $enc do_test enc2-$i.0.4 { db eval {PRAGMA encoding=UTF16be} db eval {PRAGMA encoding} } $enc db close run_test_script enc2-$i $enc db close incr i } # Test that it is an error to try to attach a database with a different # encoding to the main database. do_test enc2-4.1 { file delete -force test.db sqlite3 db test.db db eval "PRAGMA encoding = 'UTF-8'" db eval "CREATE TABLE abc(a, b, c);" } {} do_test enc2-4.2 { file delete -force test2.db sqlite3 db2 test2.db db2 eval "PRAGMA encoding = 'UTF-16'" db2 eval "CREATE TABLE abc(a, b, c);" } {} do_test enc2-4.3 { catchsql { ATTACH 'test2.db' as aux; } } {1 {attached databases must use the same text encoding as main database}} db2 close db close # The following tests - enc2-5.* - test that SQLite selects the correct # collation sequence when more than one is available. set ::values [list one two three four five] set ::test_collate_enc INVALID proc test_collate {enc lhs rhs} { set ::test_collate_enc $enc set l [lsearch -exact $::values $lhs] set r [lsearch -exact $::values $rhs] set res [expr $l - $r] # puts "enc=$enc lhs=$lhs/$l rhs=$rhs/$r res=$res" return $res } file delete -force test.db sqlite3 db test.db; set DB [sqlite3_connection_pointer db] do_test enc2-5.0 { execsql { CREATE TABLE t5(a); INSERT INTO t5 VALUES('one'); INSERT INTO t5 VALUES('two'); INSERT INTO t5 VALUES('five'); INSERT INTO t5 VALUES('three'); INSERT INTO t5 VALUES('four'); } } {} do_test enc2-5.1 { add_test_collate $DB 1 1 1 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate;}] lappend res $::test_collate_enc } {one two three four five UTF-8} do_test enc2-5.2 { add_test_collate $DB 0 1 0 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] lappend res $::test_collate_enc } {one two three four five UTF-16LE} do_test enc2-5.3 { add_test_collate $DB 0 0 1 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] lappend res $::test_collate_enc } {one two three four five UTF-16BE} db close file delete -force test.db sqlite3 db test.db; set DB [sqlite3_connection_pointer db] execsql {pragma encoding = 'UTF-16LE'} do_test enc2-5.4 { execsql { CREATE TABLE t5(a); INSERT INTO t5 VALUES('one'); INSERT INTO t5 VALUES('two'); INSERT INTO t5 VALUES('five'); INSERT INTO t5 VALUES('three'); INSERT INTO t5 VALUES('four'); } } {} do_test enc2-5.5 { add_test_collate $DB 1 1 1 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] lappend res $::test_collate_enc } {one two three four five UTF-16LE} do_test enc2-5.6 { add_test_collate $DB 1 0 1 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] lappend res $::test_collate_enc } {one two three four five UTF-16BE} do_test enc2-5.7 { add_test_collate $DB 1 0 0 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] lappend res $::test_collate_enc } {one two three four five UTF-8} db close file delete -force test.db sqlite3 db test.db; set DB [sqlite3_connection_pointer db] execsql {pragma encoding = 'UTF-16BE'} do_test enc2-5.8 { execsql { CREATE TABLE t5(a); INSERT INTO t5 VALUES('one'); INSERT INTO t5 VALUES('two'); INSERT INTO t5 VALUES('five'); INSERT INTO t5 VALUES('three'); INSERT INTO t5 VALUES('four'); } } {} do_test enc2-5.9 { add_test_collate $DB 1 1 1 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] lappend res $::test_collate_enc } {one two three four five UTF-16BE} do_test enc2-5.10 { add_test_collate $DB 1 1 0 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] lappend res $::test_collate_enc } {one two three four five UTF-16LE} do_test enc2-5.11 { add_test_collate $DB 1 0 0 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] lappend res $::test_collate_enc } {one two three four five UTF-8} # Also test that a UTF-16 collation factory works. do_test enc2-5-12 { add_test_collate $DB 0 0 0 catchsql { SELECT * FROM t5 ORDER BY 1 COLLATE test_collate } } {1 {no such collation sequence: test_collate}} do_test enc2-5.13 { add_test_collate_needed $DB set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate; }] lappend res $::test_collate_enc } {one two three four five UTF-16BE} do_test enc2-5.14 { set ::sqlite_last_needed_collation } test_collate db close file delete -force test.db do_test enc2-5.15 { sqlite3 db test.db; set ::DB [sqlite3_connection_pointer db] add_test_collate_needed $::DB set ::sqlite_last_needed_collation } {} do_test enc2-5.16 { execsql {CREATE TABLE t1(a varchar collate test_collate);} } {} do_test enc2-5.17 { set ::sqlite_last_needed_collation } {test_collate} # The following tests - enc2-6.* - test that SQLite selects the correct # user function when more than one is available. proc test_function {enc arg} { return "$enc $arg" } db close file delete -force test.db sqlite3 db test.db; set DB [sqlite3_connection_pointer db] execsql {pragma encoding = 'UTF-8'} do_test enc2-6.0 { execsql { CREATE TABLE t5(a); INSERT INTO t5 VALUES('one'); } } {} do_test enc2-6.1 { add_test_function $DB 1 1 1 execsql { SELECT test_function('sqlite') } } {{UTF-8 sqlite}} db close sqlite3 db test.db; set DB [sqlite3_connection_pointer db] do_test enc2-6.2 { add_test_function $DB 0 1 0 execsql { SELECT test_function('sqlite') } } {{UTF-16LE sqlite}} db close sqlite3 db test.db; set DB [sqlite3_connection_pointer db] do_test enc2-6.3 { add_test_function $DB 0 0 1 execsql { SELECT test_function('sqlite') } } {{UTF-16BE sqlite}} db close file delete -force test.db sqlite3 db test.db; set DB [sqlite3_connection_pointer db] execsql {pragma encoding = 'UTF-16LE'} do_test enc2-6.3 { execsql { CREATE TABLE t5(a); INSERT INTO t5 VALUES('sqlite'); } } {} do_test enc2-6.4 { add_test_function $DB 1 1 1 execsql { SELECT test_function('sqlite') } } {{UTF-16LE sqlite}} db close sqlite3 db test.db; set DB [sqlite3_connection_pointer db] do_test enc2-6.5 { add_test_function $DB 0 1 0 execsql { SELECT test_function('sqlite') } } {{UTF-16LE sqlite}} db close sqlite3 db test.db; set DB [sqlite3_connection_pointer db] do_test enc2-6.6 { add_test_function $DB 0 0 1 execsql { SELECT test_function('sqlite') } } {{UTF-16BE sqlite}} db close file delete -force test.db sqlite3 db test.db; set DB [sqlite3_connection_pointer db] execsql {pragma encoding = 'UTF-16BE'} do_test enc2-6.7 { execsql { CREATE TABLE t5(a); INSERT INTO t5 VALUES('sqlite'); } } {} do_test enc2-6.8 { add_test_function $DB 1 1 1 execsql { SELECT test_function('sqlite') } } {{UTF-16BE sqlite}} db close sqlite3 db test.db; set DB [sqlite3_connection_pointer db] do_test enc2-6.9 { add_test_function $DB 0 1 0 execsql { SELECT test_function('sqlite') } } {{UTF-16LE sqlite}} db close sqlite3 db test.db; set DB [sqlite3_connection_pointer db] do_test enc2-6.10 { add_test_function $DB 0 0 1 execsql { SELECT test_function('sqlite') } } {{UTF-16BE sqlite}} db close file delete -force test.db # The following tests - enc2-7.* - function as follows: # # 1: Open an empty database file assuming UTF-16 encoding. # 2: Open the same database with a different handle assuming UTF-8. Create # a table using this handle. # 3: Read the sqlite_master table from the first handle. # 4: Ensure the first handle recognises the database encoding is UTF-8. # do_test enc2-7.1 { sqlite3 db test.db execsql { PRAGMA encoding = 'UTF-16'; SELECT * FROM sqlite_master; } } {} do_test enc2-7.2 { set enc [execsql { PRAGMA encoding; }] string range $enc 0 end-2 ;# Chop off the "le" or "be" } {UTF-16} do_test enc2-7.3 { sqlite3 db2 test.db execsql { PRAGMA encoding = 'UTF-8'; CREATE TABLE abc(a, b, c); } db2 } {} do_test enc2-7.4 { execsql { SELECT * FROM sqlite_master; } } "table abc abc [expr $AUTOVACUUM?3:2] {CREATE TABLE abc(a, b, c)}" do_test enc2-7.5 { execsql { PRAGMA encoding; } } {UTF-8} db close db2 close proc utf16 {utf8} { set utf16 [encoding convertto unicode $utf8] append utf16 "\x00\x00" return $utf16 } ifcapable {complete} { do_test enc2-8.1 { sqlite3_complete16 [utf16 "SELECT * FROM t1;"] } {1} do_test enc2-8.2 { sqlite3_complete16 [utf16 "SELECT * FROM"] } {0} } # Test that the encoding of an empty database may still be set after the # (empty) schema has been initialized. file delete -force test.db do_test enc2-9.1 { sqlite3 db test.db execsql { PRAGMA encoding = 'UTF-8'; PRAGMA encoding; } } {UTF-8} do_test enc2-9.2 { sqlite3 db test.db execsql { PRAGMA encoding = 'UTF-16le'; PRAGMA encoding; } } {UTF-16le} do_test enc2-9.3 { sqlite3 db test.db execsql { SELECT * FROM sqlite_master; PRAGMA encoding = 'UTF-8'; PRAGMA encoding; } } {UTF-8} do_test enc2-9.4 { sqlite3 db test.db execsql { PRAGMA encoding = 'UTF-16le'; CREATE TABLE abc(a, b, c); PRAGMA encoding; } } {UTF-16le} do_test enc2-9.5 { sqlite3 db test.db execsql { PRAGMA encoding = 'UTF-8'; PRAGMA encoding; } } {UTF-16le} # Ticket #1987. # Disallow encoding changes once the encoding has been set. # do_test enc2-10.1 { db close file delete -force test.db test.db-journal sqlite3 db test.db db eval { PRAGMA encoding=UTF16; CREATE TABLE t1(a); PRAGMA encoding=UTF8; CREATE TABLE t2(b); } db close sqlite3 db test.db db eval { SELECT name FROM sqlite_master } } {t1 t2} finish_test