SQLite
Check-in [46f7c9301eae7a4d80a5282aa8538a500639755b]
Not logged in
Overview
SHA1 Hash:46f7c9301eae7a4d80a5282aa8538a500639755b
Date: 2013-01-26 19:26:11
User: drh
Comment:Add a single test case to fts4unicode.test to verify that title-case maps to lower case.
Tags And Properties
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to test/fts4unicode.test

40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68



69
70
71
72
73
74
75
...
379
380
381
382
383
384
385
386
387
    append sql "'"
  }
  append sql ")"
  uplevel [list do_execsql_test $tn $sql [list [list {*}$res]]]
}

do_unicode_token_test 1.0 {a B c D} {0 a a 1 b B 2 c c 3 d D}
do_unicode_token_test 1.1 {  } {0   1   2  }
do_unicode_token_test 1.2 {xx xx xx} {0 xx xx 1 xx xx 2 xx xx}

# 0x00DF is a small "sharp s". 0x1E9E is a capital sharp s.
do_unicode_token_test 1.3 "\uDF" "0 \uDF \uDF"
do_unicode_token_test 1.4 "\u1E9E" "0  \u1E9E"
do_unicode_token_test 1.5 "\u1E9E" "0 \uDF \u1E9E"

do_unicode_token_test 1.6 "The quick brown fox" {
  0 the The 1 quick quick 2 brown brown 3 fox fox
}
do_unicode_token_test 1.7 "The\u00bfquick\u224ebrown\u2263fox" {
  0 the The 1 quick quick 2 brown brown 3 fox fox
}

do_unicode_token_test2 1.8  {a B c D} {0 a a 1 b B 2 c c 3 d D}
do_unicode_token_test2 1.9  {  } {0 a  1 o  2 u }
do_unicode_token_test2 1.10 {xx xx xx} {0 xax xx 1 xox xx 2 xux xx}

# Check that diacritics are removed if remove_diacritics=1 is specified.
# And that they do not break tokens.
do_unicode_token_test2 1.10 "xx\u0301xx" "0 xxxx xx\u301xx"




#-------------------------------------------------------------------------
#
set docs [list {
  Enhance the INSERT syntax to allow multiple rows to be inserted via the
  VALUES clause.
} {
................................................................................
  do_isspace_test 6.$T.19 $T   {8196 8197 8198 8199}
  do_isspace_test 6.$T.19 $T   {8200 8201 8202 8239}
  do_isspace_test 6.$T.19 $T   {8287 12288}
}


finish_test









|
|



|










|
|



|
>
>
>







 







<
<
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
...
382
383
384
385
386
387
388


    append sql "'"
  }
  append sql ")"
  uplevel [list do_execsql_test $tn $sql [list [list {*}$res]]]
}

do_unicode_token_test 1.0 {a B c D} {0 a a 1 b B 2 c c 3 d D}
do_unicode_token_test 1.1 {Ä Ö Ü} {0 ä Ä 1 ö Ö 2 ü Ü}
do_unicode_token_test 1.2 {xÄx xÖx xÜx} {0 xäx xÄx 1 xöx xÖx 2 xüx xÜx}

# 0x00DF is a small "sharp s". 0x1E9E is a capital sharp s.
do_unicode_token_test 1.3 "\uDF" "0 \uDF \uDF"
do_unicode_token_test 1.4 "\u1E9E" "0 ß \u1E9E"
do_unicode_token_test 1.5 "\u1E9E" "0 \uDF \u1E9E"

do_unicode_token_test 1.6 "The quick brown fox" {
  0 the The 1 quick quick 2 brown brown 3 fox fox
}
do_unicode_token_test 1.7 "The\u00bfquick\u224ebrown\u2263fox" {
  0 the The 1 quick quick 2 brown brown 3 fox fox
}

do_unicode_token_test2 1.8  {a B c D} {0 a a 1 b B 2 c c 3 d D}
do_unicode_token_test2 1.9  {Ä Ö Ü} {0 a Ä 1 o Ö 2 u Ü}
do_unicode_token_test2 1.10 {xÄx xÖx xÜx} {0 xax xÄx 1 xox xÖx 2 xux xÜx}

# Check that diacritics are removed if remove_diacritics=1 is specified.
# And that they do not break tokens.
do_unicode_token_test2 1.11 "xx\u0301xx" "0 xxxx xx\u301xx"

# Title-case mappings work
do_unicode_token_test 1.12 "\u01c5" "0 \u01c6 \u01c5"

#-------------------------------------------------------------------------
#
set docs [list {
  Enhance the INSERT syntax to allow multiple rows to be inserted via the
  VALUES clause.
} {
................................................................................
  do_isspace_test 6.$T.19 $T   {8196 8197 8198 8199}
  do_isspace_test 6.$T.19 $T   {8200 8201 8202 8239}
  do_isspace_test 6.$T.19 $T   {8287 12288}
}


finish_test