Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Changes In Branch fts4-unicode Excluding Merge-Ins
This is equivalent to a diff from 6d326d44 to e71495a8
2012-05-26
| ||
18:42 | Merge fts4-unicode branch with trunk. (check-in: 25ba1f84 user: dan tags: trunk) | |
18:28 | If SQLITE_DISABLE_FTS3_UNICODE is defined, do not build the "unicode61" tokenizer. (Closed-Leaf check-in: e71495a8 user: dan tags: fts4-unicode) | |
18:06 | Enable the use of shared cache for an in-memory database, so that separate database connections can share the same in-memory database. (check-in: 4590e433 user: drh tags: shared-cache-memdb) | |
17:57 | Change the format of the tables used by sqlite3FtsUnicodeTolower() to make them a little smaller. (check-in: b89d3834 user: dan tags: fts4-unicode) | |
2012-05-25
| ||
17:50 | Add an experimental tokenizer to fts4 - "unicode". This tokenizer works in the same way except that it understands unicode "simple case folding" and recognizes all characters not classified as "Letters" or "Numbers" by unicode as token separators. (check-in: 0c13570e user: dan tags: fts4-unicode) | |
2012-05-22
| ||
17:39 | Merge the 3.7.12.1 trunk changes into the WinRT branch. (check-in: b9ed0b26 user: drh tags: winrt) | |
13:11 | Version 3.7.12.1 (check-in: 972e75bb user: drh tags: apple-osx) | |
13:01 | Version 3.7.12.1 (check-in: d07b7b67 user: drh tags: sessions) | |
02:45 | Version 3.7.12.1 (check-in: 6d326d44 user: drh tags: trunk, release, version-3.7.12.1) | |
2012-05-21
| ||
22:45 | Increase the version number to 3.7.12.1. Minor changes to test scripts. (check-in: 5519cc5e user: drh tags: trunk) | |
Changes to ext/fts3/README.tokenizers.
1 2 3 4 5 6 7 8 9 10 11 12 13 | 1. FTS3 Tokenizers When creating a new full-text table, FTS3 allows the user to select the text tokenizer implementation to be used when indexing text by specifying a "tokenize" clause as part of the CREATE VIRTUAL TABLE statement: CREATE VIRTUAL TABLE <table-name> USING fts3( <columns ...> [, tokenize <tokenizer-name> [<tokenizer-args>]] ); The built-in tokenizers (valid values to pass as <tokenizer name>) are | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 | 1. FTS3 Tokenizers When creating a new full-text table, FTS3 allows the user to select the text tokenizer implementation to be used when indexing text by specifying a "tokenize" clause as part of the CREATE VIRTUAL TABLE statement: CREATE VIRTUAL TABLE <table-name> USING fts3( <columns ...> [, tokenize <tokenizer-name> [<tokenizer-args>]] ); The built-in tokenizers (valid values to pass as <tokenizer name>) are "simple", "porter" and "unicode". <tokenizer-args> should consist of zero or more white-space separated arguments to pass to the selected tokenizer implementation. The interpretation of the arguments, if any, depends on the individual tokenizer. 2. Custom Tokenizers |
︙ | ︙ |
Changes to ext/fts3/fts3.c.
︙ | ︙ | |||
3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 | ** ** Calling sqlite3Fts3SimpleTokenizerModule() sets the value pointed ** to by the argument to point to the "simple" tokenizer implementation. ** And so on. */ void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule); void sqlite3Fts3PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule); #ifdef SQLITE_ENABLE_ICU void sqlite3Fts3IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule); #endif /* ** Initialise the fts3 extension. If this extension is built as part ** of the sqlite library, then this function is called directly by ** SQLite. If fts3 is built as a dynamically loadable extension, this ** function is called by the sqlite3_extension_init() entry point. */ int sqlite3Fts3Init(sqlite3 *db){ int rc = SQLITE_OK; Fts3Hash *pHash = 0; const sqlite3_tokenizer_module *pSimple = 0; const sqlite3_tokenizer_module *pPorter = 0; #ifdef SQLITE_ENABLE_ICU const sqlite3_tokenizer_module *pIcu = 0; sqlite3Fts3IcuTokenizerModule(&pIcu); #endif #ifdef SQLITE_TEST rc = sqlite3Fts3InitTerm(db); if( rc!=SQLITE_OK ) return rc; #endif rc = sqlite3Fts3InitAux(db); | > > > > > > > > > > | 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 | ** ** Calling sqlite3Fts3SimpleTokenizerModule() sets the value pointed ** to by the argument to point to the "simple" tokenizer implementation. ** And so on. */ void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule); void sqlite3Fts3PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule); #ifndef SQLITE_DISABLE_FTS3_UNICODE void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const**ppModule); #endif #ifdef SQLITE_ENABLE_ICU void sqlite3Fts3IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule); #endif /* ** Initialise the fts3 extension. If this extension is built as part ** of the sqlite library, then this function is called directly by ** SQLite. If fts3 is built as a dynamically loadable extension, this ** function is called by the sqlite3_extension_init() entry point. */ int sqlite3Fts3Init(sqlite3 *db){ int rc = SQLITE_OK; Fts3Hash *pHash = 0; const sqlite3_tokenizer_module *pSimple = 0; const sqlite3_tokenizer_module *pPorter = 0; #ifndef SQLITE_DISABLE_FTS3_UNICODE const sqlite3_tokenizer_module *pUnicode = 0; #endif #ifdef SQLITE_ENABLE_ICU const sqlite3_tokenizer_module *pIcu = 0; sqlite3Fts3IcuTokenizerModule(&pIcu); #endif #ifndef SQLITE_DISABLE_FTS3_UNICODE sqlite3Fts3UnicodeTokenizer(&pUnicode); #endif #ifdef SQLITE_TEST rc = sqlite3Fts3InitTerm(db); if( rc!=SQLITE_OK ) return rc; #endif rc = sqlite3Fts3InitAux(db); |
︙ | ︙ | |||
3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 | sqlite3Fts3HashInit(pHash, FTS3_HASH_STRING, 1); } /* Load the built-in tokenizers into the hash table */ if( rc==SQLITE_OK ){ if( sqlite3Fts3HashInsert(pHash, "simple", 7, (void *)pSimple) || sqlite3Fts3HashInsert(pHash, "porter", 7, (void *)pPorter) #ifdef SQLITE_ENABLE_ICU || (pIcu && sqlite3Fts3HashInsert(pHash, "icu", 4, (void *)pIcu)) #endif ){ rc = SQLITE_NOMEM; } } | > > > > | 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 | sqlite3Fts3HashInit(pHash, FTS3_HASH_STRING, 1); } /* Load the built-in tokenizers into the hash table */ if( rc==SQLITE_OK ){ if( sqlite3Fts3HashInsert(pHash, "simple", 7, (void *)pSimple) || sqlite3Fts3HashInsert(pHash, "porter", 7, (void *)pPorter) #ifndef SQLITE_DISABLE_FTS3_UNICODE || sqlite3Fts3HashInsert(pHash, "unicode61", 10, (void *)pUnicode) #endif #ifdef SQLITE_ENABLE_ICU || (pIcu && sqlite3Fts3HashInsert(pHash, "icu", 4, (void *)pIcu)) #endif ){ rc = SQLITE_NOMEM; } } |
︙ | ︙ |
Changes to ext/fts3/fts3Int.h.
︙ | ︙ | |||
536 537 538 539 540 541 542 543 544 545 | int sqlite3Fts3MsrIncrNext( Fts3Table *, Fts3MultiSegReader *, sqlite3_int64 *, char **, int *); int sqlite3Fts3EvalPhrasePoslist(Fts3Cursor *, Fts3Expr *, int iCol, char **); int sqlite3Fts3MsrOvfl(Fts3Cursor *, Fts3MultiSegReader *, int *); int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr); int sqlite3Fts3DeferredTokenList(Fts3DeferredToken *, char **, int *); #endif /* !SQLITE_CORE || SQLITE_ENABLE_FTS3 */ #endif /* _FTSINT_H */ | > > > > | 536 537 538 539 540 541 542 543 544 545 546 547 548 549 | int sqlite3Fts3MsrIncrNext( Fts3Table *, Fts3MultiSegReader *, sqlite3_int64 *, char **, int *); int sqlite3Fts3EvalPhrasePoslist(Fts3Cursor *, Fts3Expr *, int iCol, char **); int sqlite3Fts3MsrOvfl(Fts3Cursor *, Fts3MultiSegReader *, int *); int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr); int sqlite3Fts3DeferredTokenList(Fts3DeferredToken *, char **, int *); /* fts3_unicode2.c (functions generated by parsing unicode text files) */ int sqlite3FtsUnicodeTolower(int); int sqlite3FtsUnicodeIsalnum(int); #endif /* !SQLITE_CORE || SQLITE_ENABLE_FTS3 */ #endif /* _FTSINT_H */ |
Added ext/fts3/fts3_unicode.c.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 | /* ** 2012 May 24 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** ** ** Implementation of the "unicode" full-text-search tokenizer. */ #ifndef SQLITE_DISABLE_FTS3_UNICODE #include "fts3Int.h" #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) #include <assert.h> #include <stdlib.h> #include <stdio.h> #include <string.h> #include "fts3_tokenizer.h" /* ** The following two macros - READ_UTF8 and WRITE_UTF8 - have been copied ** from the sqlite3 source file utf.c. If this file is compiled as part ** of the amalgamation, they are not required. */ #ifndef SQLITE_AMALGAMATION static const unsigned char sqlite3Utf8Trans1[] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00, }; #define READ_UTF8(zIn, zTerm, c) \ c = *(zIn++); \ if( c>=0xc0 ){ \ c = sqlite3Utf8Trans1[c-0xc0]; \ while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){ \ c = (c<<6) + (0x3f & *(zIn++)); \ } \ if( c<0x80 \ || (c&0xFFFFF800)==0xD800 \ || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \ } #define WRITE_UTF8(zOut, c) { \ if( c<0x00080 ){ \ *zOut++ = (u8)(c&0xFF); \ } \ else if( c<0x00800 ){ \ *zOut++ = 0xC0 + (u8)((c>>6)&0x1F); \ *zOut++ = 0x80 + (u8)(c & 0x3F); \ } \ else if( c<0x10000 ){ \ *zOut++ = 0xE0 + (u8)((c>>12)&0x0F); \ *zOut++ = 0x80 + (u8)((c>>6) & 0x3F); \ *zOut++ = 0x80 + (u8)(c & 0x3F); \ }else{ \ *zOut++ = 0xF0 + (u8)((c>>18) & 0x07); \ *zOut++ = 0x80 + (u8)((c>>12) & 0x3F); \ *zOut++ = 0x80 + (u8)((c>>6) & 0x3F); \ *zOut++ = 0x80 + (u8)(c & 0x3F); \ } \ } #endif /* ifndef SQLITE_AMALGAMATION */ typedef struct unicode_tokenizer unicode_tokenizer; typedef struct unicode_cursor unicode_cursor; struct unicode_tokenizer { sqlite3_tokenizer base; }; struct unicode_cursor { sqlite3_tokenizer_cursor base; const unsigned char *aInput; /* Input text being tokenized */ int nInput; /* Size of aInput[] in bytes */ int iOff; /* Current offset within aInput[] */ int iToken; /* Index of next token to be returned */ char *zToken; /* storage for current token */ int nAlloc; /* space allocated at zToken */ }; /* ** Create a new tokenizer instance. */ static int unicodeCreate( int nArg, /* Size of array argv[] */ const char * const *azArg, /* Tokenizer creation arguments */ sqlite3_tokenizer **pp /* OUT: New tokenizer handle */ ){ unicode_tokenizer *pNew; /* New tokenizer object */ pNew = (unicode_tokenizer *) sqlite3_malloc(sizeof(unicode_tokenizer)); if( pNew==NULL ){ return SQLITE_NOMEM; } memset(pNew, 0, sizeof(unicode_tokenizer)); *pp = &pNew->base; return SQLITE_OK; } /* ** Destroy a tokenizer allocated by unicodeCreate(). */ static int unicodeDestroy(sqlite3_tokenizer *pTokenizer){ sqlite3_free(pTokenizer); return SQLITE_OK; } /* ** Prepare to begin tokenizing a particular string. The input ** string to be tokenized is pInput[0..nBytes-1]. A cursor ** used to incrementally tokenize this string is returned in ** *ppCursor. */ static int unicodeOpen( sqlite3_tokenizer *p, /* The tokenizer */ const char *aInput, /* Input string */ int nInput, /* Size of string aInput in bytes */ sqlite3_tokenizer_cursor **pp /* OUT: New cursor object */ ){ unicode_cursor *pCsr; pCsr = (unicode_cursor *)sqlite3_malloc(sizeof(unicode_cursor)); if( pCsr==0 ){ return SQLITE_NOMEM; } memset(pCsr, 0, sizeof(unicode_cursor)); pCsr->aInput = (const unsigned char *)aInput; if( aInput==0 ){ pCsr->nInput = 0; }else if( nInput<0 ){ pCsr->nInput = (int)strlen(aInput); }else{ pCsr->nInput = nInput; } *pp = &pCsr->base; UNUSED_PARAMETER(p); return SQLITE_OK; } /* ** Close a tokenization cursor previously opened by a call to ** simpleOpen() above. */ static int unicodeClose(sqlite3_tokenizer_cursor *pCursor){ unicode_cursor *pCsr = (unicode_cursor *) pCursor; sqlite3_free(pCsr->zToken); sqlite3_free(pCsr); return SQLITE_OK; } /* ** Extract the next token from a tokenization cursor. The cursor must ** have been opened by a prior call to simpleOpen(). */ static int unicodeNext( sqlite3_tokenizer_cursor *p, /* Cursor returned by simpleOpen */ const char **paToken, /* OUT: Token text */ int *pnToken, /* OUT: Number of bytes at *paToken */ int *piStart, /* OUT: Starting offset of token */ int *piEnd, /* OUT: Ending offset of token */ int *piPos /* OUT: Position integer of token */ ){ unicode_cursor *pCsr = (unicode_cursor *)p; int iCode; char *zOut; const unsigned char *z = &pCsr->aInput[pCsr->iOff]; const unsigned char *zStart = z; const unsigned char *zEnd; const unsigned char *zTerm = &pCsr->aInput[pCsr->nInput]; /* Scan past any delimiter characters before the start of the next token. ** Return SQLITE_DONE early if this takes us all the way to the end of ** the input. */ while( z<zTerm ){ READ_UTF8(z, zTerm, iCode); if( sqlite3FtsUnicodeIsalnum(iCode) ) break; zStart = z; } if( zStart>=zTerm ) return SQLITE_DONE; zOut = pCsr->zToken; do { /* Grow the output buffer if required. */ if( (zOut-pCsr->zToken)>=(pCsr->nAlloc-4) ){ char *zNew = sqlite3_realloc(pCsr->zToken, pCsr->nAlloc+64); if( !zNew ) return SQLITE_NOMEM; zOut = &zNew[zOut - pCsr->zToken]; pCsr->zToken = zNew; pCsr->nAlloc += 64; } /* Write the folded case of the last character read to the output */ zEnd = z; WRITE_UTF8(zOut, sqlite3FtsUnicodeTolower(iCode)); /* If the cursor is not at EOF, read the next character */ if( z>=zTerm ) break; READ_UTF8(z, zTerm, iCode); }while( sqlite3FtsUnicodeIsalnum(iCode) ); /* Set the output variables and return. */ pCsr->iOff = (z - pCsr->aInput); *paToken = pCsr->zToken; *pnToken = zOut - pCsr->zToken; *piStart = (zStart - pCsr->aInput); *piEnd = (zEnd - pCsr->aInput); *piPos = pCsr->iToken++; return SQLITE_OK; } /* ** Set *ppModule to a pointer to the sqlite3_tokenizer_module ** structure for the unicode tokenizer. */ void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const **ppModule){ static const sqlite3_tokenizer_module module = { 0, unicodeCreate, unicodeDestroy, unicodeOpen, unicodeClose, unicodeNext, 0, }; *ppModule = &module; } #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ #endif /* ifndef SQLITE_DISABLE_FTS3_UNICODE */ |
Added ext/fts3/fts3_unicode2.c.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 | /* ** 2012 May 25 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** */ /* ** DO NOT EDIT THIS MACHINE GENERATED FILE. */ #ifndef SQLITE_DISABLE_FTS3_UNICODE #include <assert.h> /* ** Return true if the argument corresponds to a unicode codepoint ** classified as either a letter or a number. Otherwise false. ** ** The results are undefined if the value passed to this function ** is less than zero. */ int sqlite3FtsUnicodeIsalnum(int c){ /* Each unsigned integer in the following array corresponds to a contiguous ** range of unicode codepoints that are not either letters or numbers (i.e. ** codepoints for which this function should return 0). ** ** The most significant 22 bits in each 32-bit value contain the first ** codepoint in the range. The least significant 10 bits are used to store ** the size of the range (always at least 1). In other words, the value ** ((C<<22) + N) represents a range of N codepoints starting with codepoint ** C. It is not possible to represent a range larger than 1023 codepoints ** using this format. */ const static unsigned int aEntry[] = { 0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07, 0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01, 0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401, 0x000BBC81, 0x000DD401, 0x000DF801, 0x000E1002, 0x000E1C01, 0x000FD801, 0x00120808, 0x00156806, 0x00162402, 0x00163C01, 0x00164437, 0x0017CC02, 0x00180005, 0x00181816, 0x00187802, 0x00192C15, 0x0019A804, 0x0019C001, 0x001B5001, 0x001B580F, 0x001B9C07, 0x001BF402, 0x001C000E, 0x001C3C01, 0x001C4401, 0x001CC01B, 0x001E980B, 0x001FAC09, 0x001FD804, 0x00205804, 0x00206C09, 0x00209403, 0x0020A405, 0x0020C00F, 0x00216403, 0x00217801, 0x0023901B, 0x00240004, 0x0024E803, 0x0024F812, 0x00254407, 0x00258804, 0x0025C001, 0x00260403, 0x0026F001, 0x0026F807, 0x00271C02, 0x00272C03, 0x00275C01, 0x00278802, 0x0027C802, 0x0027E802, 0x00280403, 0x0028F001, 0x0028F805, 0x00291C02, 0x00292C03, 0x00294401, 0x0029C002, 0x0029D401, 0x002A0403, 0x002AF001, 0x002AF808, 0x002B1C03, 0x002B2C03, 0x002B8802, 0x002BC002, 0x002C0403, 0x002CF001, 0x002CF807, 0x002D1C02, 0x002D2C03, 0x002D5802, 0x002D8802, 0x002DC001, 0x002E0801, 0x002EF805, 0x002F1803, 0x002F2804, 0x002F5C01, 0x002FCC08, 0x00300403, 0x0030F807, 0x00311803, 0x00312804, 0x00315402, 0x00318802, 0x0031FC01, 0x00320802, 0x0032F001, 0x0032F807, 0x00331803, 0x00332804, 0x00335402, 0x00338802, 0x00340802, 0x0034F807, 0x00351803, 0x00352804, 0x00355C01, 0x00358802, 0x0035E401, 0x00360802, 0x00372801, 0x00373C06, 0x00375801, 0x00376008, 0x0037C803, 0x0038C401, 0x0038D007, 0x0038FC01, 0x00391C09, 0x00396802, 0x003AC401, 0x003AD006, 0x003AEC02, 0x003B2006, 0x003C041F, 0x003CD00C, 0x003DC417, 0x003E340B, 0x003E6424, 0x003EF80F, 0x003F380D, 0x0040AC14, 0x00412806, 0x00415804, 0x00417803, 0x00418803, 0x00419C07, 0x0041C404, 0x0042080C, 0x00423C01, 0x00426806, 0x0043EC01, 0x004D740C, 0x004E400A, 0x00500001, 0x0059B402, 0x005A0001, 0x005A6C02, 0x005BAC03, 0x005C4803, 0x005CC805, 0x005D4802, 0x005DC802, 0x005ED023, 0x005F6004, 0x005F7401, 0x0060000F, 0x0062A401, 0x0064800C, 0x0064C00C, 0x00650001, 0x00651002, 0x0066C011, 0x00672002, 0x00677822, 0x00685C05, 0x00687802, 0x0069540A, 0x0069801D, 0x0069FC01, 0x006A8007, 0x006AA006, 0x006C0005, 0x006CD011, 0x006D6823, 0x006E0003, 0x006E840D, 0x006F980E, 0x006FF004, 0x00709014, 0x0070EC05, 0x0071F802, 0x00730008, 0x00734019, 0x0073B401, 0x0073C803, 0x00770027, 0x0077F004, 0x007EF401, 0x007EFC03, 0x007F3403, 0x007F7403, 0x007FB403, 0x007FF402, 0x00800065, 0x0081A806, 0x0081E805, 0x00822805, 0x0082801A, 0x00834021, 0x00840002, 0x00840C04, 0x00842002, 0x00845001, 0x00845803, 0x00847806, 0x00849401, 0x00849C01, 0x0084A401, 0x0084B801, 0x0084E802, 0x00850005, 0x00852804, 0x00853C01, 0x00864264, 0x00900027, 0x0091000B, 0x0092704E, 0x00940200, 0x009C0475, 0x009E53B9, 0x00AD400A, 0x00B39406, 0x00B3BC03, 0x00B3E404, 0x00B3F802, 0x00B5C001, 0x00B5FC01, 0x00B7804F, 0x00B8C00C, 0x00BA001A, 0x00BA6C59, 0x00BC00D6, 0x00BFC00C, 0x00C00005, 0x00C02019, 0x00C0A807, 0x00C0D802, 0x00C0F403, 0x00C26404, 0x00C28001, 0x00C3EC01, 0x00C64002, 0x00C6580A, 0x00C70024, 0x00C8001F, 0x00C8A81E, 0x00C94001, 0x00C98020, 0x00CA2827, 0x00CB003F, 0x00CC0100, 0x01370040, 0x02924037, 0x0293F802, 0x02983403, 0x0299BC10, 0x029A7C01, 0x029BC008, 0x029C0017, 0x029C8002, 0x029E2402, 0x02A00801, 0x02A01801, 0x02A02C01, 0x02A08C09, 0x02A0D804, 0x02A1D004, 0x02A20002, 0x02A2D011, 0x02A33802, 0x02A38012, 0x02A3E003, 0x02A4980A, 0x02A51C0D, 0x02A57C01, 0x02A60004, 0x02A6CC1B, 0x02A77802, 0x02A8A40E, 0x02A90C01, 0x02A93002, 0x02A97004, 0x02A9DC03, 0x02A9EC01, 0x02AAC001, 0x02AAC803, 0x02AADC02, 0x02AAF802, 0x02AB0401, 0x02AB7802, 0x02ABAC07, 0x02ABD402, 0x02AF8C0B, 0x03600001, 0x036DFC02, 0x036FFC02, 0x037FFC02, 0x03E3FC01, 0x03EC7801, 0x03ECA401, 0x03EEC810, 0x03F4F802, 0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023, 0x03F95013, 0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807, 0x03FCEC06, 0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405, 0x04040003, 0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E, 0x040E7C01, 0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01, 0x04280403, 0x04281402, 0x04283004, 0x0428E003, 0x0428FC01, 0x04294009, 0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016, 0x04420003, 0x0442C012, 0x04440003, 0x04449C0E, 0x04450004, 0x04460003, 0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004, 0x05BD442E, 0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5, 0x07480046, 0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01, 0x075C5401, 0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401, 0x075EA401, 0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064, 0x07C2800F, 0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F, 0x07C4C03C, 0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009, 0x07C94002, 0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014, 0x07CE8025, 0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001, 0x07D108B6, 0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018, 0x07D7EC46, 0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401, 0x38008060, 0x380400F0, 0x3C000001, 0x3FFFF401, 0x40000001, 0x43FFF401, }; static const unsigned int aAscii[4] = { 0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001, }; if( c<128 ){ return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 ); }else if( c<(1<<22) ){ unsigned int key = (((unsigned int)c)<<10) | 0x000003FF; int iRes; int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; int iLo = 0; while( iHi>=iLo ){ int iTest = (iHi + iLo) / 2; if( key >= aEntry[iTest] ){ iRes = iTest; iLo = iTest+1; }else{ iHi = iTest-1; } } assert( aEntry[0]<key ); assert( key>=aEntry[iRes] ); return (c >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF))); } return 1; } /* ** Interpret the argument as a unicode codepoint. If the codepoint ** is an upper case character that has a lower case equivalent, ** return the codepoint corresponding to the lower case version. ** Otherwise, return a copy of the argument. ** ** The results are undefined if the value passed to this function ** is less than zero. */ int sqlite3FtsUnicodeTolower(int c){ /* Each entry in the following array defines a rule for folding a range ** of codepoints to lower case. The rule applies to a range of nRange ** codepoints starting at codepoint iCode. ** ** If the least significant bit in flags is clear, then the rule applies ** to all nRange codepoints (i.e. all nRange codepoints are upper case and ** need to be folded). Or, if it is set, then the rule only applies to ** every second codepoint in the range, starting with codepoint C. ** ** The 7 most significant bits in flags are an index into the aiOff[] ** array. If a specific codepoint C does require folding, then its lower ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF). ** ** The contents of this array are generated by parsing the CaseFolding.txt ** file distributed as part of the "Unicode Character Database". See ** http://www.unicode.org for details. */ static const struct TableEntry { unsigned short iCode; unsigned char flags; unsigned char nRange; } aEntry[] = { {65, 14, 26}, {181, 64, 1}, {192, 14, 23}, {216, 14, 7}, {256, 1, 48}, {306, 1, 6}, {313, 1, 16}, {330, 1, 46}, {376, 116, 1}, {377, 1, 6}, {383, 104, 1}, {385, 50, 1}, {386, 1, 4}, {390, 44, 1}, {391, 0, 1}, {393, 42, 2}, {395, 0, 1}, {398, 32, 1}, {399, 38, 1}, {400, 40, 1}, {401, 0, 1}, {403, 42, 1}, {404, 46, 1}, {406, 52, 1}, {407, 48, 1}, {408, 0, 1}, {412, 52, 1}, {413, 54, 1}, {415, 56, 1}, {416, 1, 6}, {422, 60, 1}, {423, 0, 1}, {425, 60, 1}, {428, 0, 1}, {430, 60, 1}, {431, 0, 1}, {433, 58, 2}, {435, 1, 4}, {439, 62, 1}, {440, 0, 1}, {444, 0, 1}, {452, 2, 1}, {453, 0, 1}, {455, 2, 1}, {456, 0, 1}, {458, 2, 1}, {459, 1, 18}, {478, 1, 18}, {497, 2, 1}, {498, 1, 4}, {502, 122, 1}, {503, 134, 1}, {504, 1, 40}, {544, 110, 1}, {546, 1, 18}, {570, 70, 1}, {571, 0, 1}, {573, 108, 1}, {574, 68, 1}, {577, 0, 1}, {579, 106, 1}, {580, 28, 1}, {581, 30, 1}, {582, 1, 10}, {837, 36, 1}, {880, 1, 4}, {886, 0, 1}, {902, 18, 1}, {904, 16, 3}, {908, 26, 1}, {910, 24, 2}, {913, 14, 17}, {931, 14, 9}, {962, 0, 1}, {975, 4, 1}, {976, 140, 1}, {977, 142, 1}, {981, 146, 1}, {982, 144, 1}, {984, 1, 24}, {1008, 136, 1}, {1009, 138, 1}, {1012, 130, 1}, {1013, 128, 1}, {1015, 0, 1}, {1017, 152, 1}, {1018, 0, 1}, {1021, 110, 3}, {1024, 34, 16}, {1040, 14, 32}, {1120, 1, 34}, {1162, 1, 54}, {1216, 6, 1}, {1217, 1, 14}, {1232, 1, 88}, {1329, 22, 38}, {4256, 66, 38}, {4295, 66, 1}, {4301, 66, 1}, {7680, 1, 150}, {7835, 132, 1}, {7838, 96, 1}, {7840, 1, 96}, {7944, 150, 8}, {7960, 150, 6}, {7976, 150, 8}, {7992, 150, 8}, {8008, 150, 6}, {8025, 151, 8}, {8040, 150, 8}, {8072, 150, 8}, {8088, 150, 8}, {8104, 150, 8}, {8120, 150, 2}, {8122, 126, 2}, {8124, 148, 1}, {8126, 100, 1}, {8136, 124, 4}, {8140, 148, 1}, {8152, 150, 2}, {8154, 120, 2}, {8168, 150, 2}, {8170, 118, 2}, {8172, 152, 1}, {8184, 112, 2}, {8186, 114, 2}, {8188, 148, 1}, {8486, 98, 1}, {8490, 92, 1}, {8491, 94, 1}, {8498, 12, 1}, {8544, 8, 16}, {8579, 0, 1}, {9398, 10, 26}, {11264, 22, 47}, {11360, 0, 1}, {11362, 88, 1}, {11363, 102, 1}, {11364, 90, 1}, {11367, 1, 6}, {11373, 84, 1}, {11374, 86, 1}, {11375, 80, 1}, {11376, 82, 1}, {11378, 0, 1}, {11381, 0, 1}, {11390, 78, 2}, {11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1}, {42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14}, {42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1}, {42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1}, {42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1}, {65313, 14, 26}, }; static const unsigned short aiOff[] = { 1, 2, 8, 15, 16, 26, 28, 32, 37, 38, 40, 48, 63, 64, 69, 71, 79, 80, 116, 202, 203, 205, 206, 207, 209, 210, 211, 213, 214, 217, 218, 219, 775, 7264, 10792, 10795, 23228, 23256, 30204, 54721, 54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274, 57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406, 65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462, 65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511, 65514, 65521, 65527, 65528, 65529, }; int ret = c; assert( c>=0 ); assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 ); if( c<128 ){ if( c>='A' && c<='Z' ) ret = c + ('a' - 'A'); }else if( c<65536 ){ int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; int iLo = 0; int iRes = -1; while( iHi>=iLo ){ int iTest = (iHi + iLo) / 2; int cmp = (c - aEntry[iTest].iCode); if( cmp>=0 ){ iRes = iTest; iLo = iTest+1; }else{ iHi = iTest-1; } } assert( iRes<0 || c>=aEntry[iRes].iCode ); if( iRes>=0 ){ const struct TableEntry *p = &aEntry[iRes]; if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; assert( ret>0 ); } } } else if( c>=66560 && c<66600 ){ ret = c + 40; } return ret; } #endif /* ifndef SQLITE_DISABLE_FTS3_UNICODE */ |
Changes to ext/fts3/fts3_write.c.
︙ | ︙ | |||
3170 3171 3172 3173 3174 3175 3176 | if( sqlite3_step(pStmt)==SQLITE_ROW ){ fts3DecodeIntArray(nStat, a, sqlite3_column_blob(pStmt, 0), sqlite3_column_bytes(pStmt, 0)); }else{ memset(a, 0, sizeof(u32)*(nStat) ); } | | > > > > > | 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 | if( sqlite3_step(pStmt)==SQLITE_ROW ){ fts3DecodeIntArray(nStat, a, sqlite3_column_blob(pStmt, 0), sqlite3_column_bytes(pStmt, 0)); }else{ memset(a, 0, sizeof(u32)*(nStat) ); } rc = sqlite3_reset(pStmt); if( rc!=SQLITE_OK ){ sqlite3_free(a); *pRC = rc; return; } if( nChng<0 && a[0]<(u32)(-nChng) ){ a[0] = 0; }else{ a[0] += nChng; } for(i=0; i<p->nColumn+1; i++){ u32 x = a[i+1]; |
︙ | ︙ |
Added ext/fts3/unicode/CaseFolding.txt.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 | # CaseFolding-6.1.0.txt # Date: 2011-07-25, 21:21:56 GMT [MD] # # Unicode Character Database # Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # # Case Folding Properties # # This file is a supplement to the UnicodeData file. # It provides a case folding mapping generated from the Unicode Character Database. # If all characters are mapped according to the full mapping below, then # case differences (according to UnicodeData.txt and SpecialCasing.txt) # are eliminated. # # The data supports both implementations that require simple case foldings # (where string lengths don't change), and implementations that allow full case folding # (where string lengths may grow). Note that where they can be supported, the # full case foldings are superior: for example, they allow "MASSE" and "Maße" to match. # # All code points not listed in this file map to themselves. # # NOTE: case folding does not preserve normalization formats! # # For information on case folding, including how to have case folding # preserve normalization formats, see Section 3.13 Default Case Algorithms in # The Unicode Standard, Version 5.0. # # ================================================================================ # Format # ================================================================================ # The entries in this file are in the following machine-readable format: # # <code>; <status>; <mapping>; # <name> # # The status field is: # C: common case folding, common mappings shared by both simple and full mappings. # F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces. # S: simple case folding, mappings to single characters where different from F. # T: special case for uppercase I and dotted uppercase I # - For non-Turkic languages, this mapping is normally not used. # - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters. # Note that the Turkic mappings do not maintain canonical equivalence without additional processing. # See the discussions of case mapping in the Unicode Standard for more information. # # Usage: # A. To do a simple case folding, use the mappings with status C + S. # B. To do a full case folding, use the mappings with status C + F. # # The mappings with status T can be used or omitted depending on the desired case-folding # behavior. (The default option is to exclude them.) # # ================================================================= # Property: Case_Folding # All code points not explicitly listed for Case_Folding # have the value C for the status field, and the code point itself for the mapping field. # @missing: 0000..10FFFF; C; <code point> # ================================================================= 0041; C; 0061; # LATIN CAPITAL LETTER A 0042; C; 0062; # LATIN CAPITAL LETTER B 0043; C; 0063; # LATIN CAPITAL LETTER C 0044; C; 0064; # LATIN CAPITAL LETTER D 0045; C; 0065; # LATIN CAPITAL LETTER E 0046; C; 0066; # LATIN CAPITAL LETTER F 0047; C; 0067; # LATIN CAPITAL LETTER G 0048; C; 0068; # LATIN CAPITAL LETTER H 0049; C; 0069; # LATIN CAPITAL LETTER I 0049; T; 0131; # LATIN CAPITAL LETTER I 004A; C; 006A; # LATIN CAPITAL LETTER J 004B; C; 006B; # LATIN CAPITAL LETTER K 004C; C; 006C; # LATIN CAPITAL LETTER L 004D; C; 006D; # LATIN CAPITAL LETTER M 004E; C; 006E; # LATIN CAPITAL LETTER N 004F; C; 006F; # LATIN CAPITAL LETTER O 0050; C; 0070; # LATIN CAPITAL LETTER P 0051; C; 0071; # LATIN CAPITAL LETTER Q 0052; C; 0072; # LATIN CAPITAL LETTER R 0053; C; 0073; # LATIN CAPITAL LETTER S 0054; C; 0074; # LATIN CAPITAL LETTER T 0055; C; 0075; # LATIN CAPITAL LETTER U 0056; C; 0076; # LATIN CAPITAL LETTER V 0057; C; 0077; # LATIN CAPITAL LETTER W 0058; C; 0078; # LATIN CAPITAL LETTER X 0059; C; 0079; # LATIN CAPITAL LETTER Y 005A; C; 007A; # LATIN CAPITAL LETTER Z 00B5; C; 03BC; # MICRO SIGN 00C0; C; 00E0; # LATIN CAPITAL LETTER A WITH GRAVE 00C1; C; 00E1; # LATIN CAPITAL LETTER A WITH ACUTE 00C2; C; 00E2; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX 00C3; C; 00E3; # LATIN CAPITAL LETTER A WITH TILDE 00C4; C; 00E4; # LATIN CAPITAL LETTER A WITH DIAERESIS 00C5; C; 00E5; # LATIN CAPITAL LETTER A WITH RING ABOVE 00C6; C; 00E6; # LATIN CAPITAL LETTER AE 00C7; C; 00E7; # LATIN CAPITAL LETTER C WITH CEDILLA 00C8; C; 00E8; # LATIN CAPITAL LETTER E WITH GRAVE 00C9; C; 00E9; # LATIN CAPITAL LETTER E WITH ACUTE 00CA; C; 00EA; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX 00CB; C; 00EB; # LATIN CAPITAL LETTER E WITH DIAERESIS 00CC; C; 00EC; # LATIN CAPITAL LETTER I WITH GRAVE 00CD; C; 00ED; # LATIN CAPITAL LETTER I WITH ACUTE 00CE; C; 00EE; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX 00CF; C; 00EF; # LATIN CAPITAL LETTER I WITH DIAERESIS 00D0; C; 00F0; # LATIN CAPITAL LETTER ETH 00D1; C; 00F1; # LATIN CAPITAL LETTER N WITH TILDE 00D2; C; 00F2; # LATIN CAPITAL LETTER O WITH GRAVE 00D3; C; 00F3; # LATIN CAPITAL LETTER O WITH ACUTE 00D4; C; 00F4; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX 00D5; C; 00F5; # LATIN CAPITAL LETTER O WITH TILDE 00D6; C; 00F6; # LATIN CAPITAL LETTER O WITH DIAERESIS 00D8; C; 00F8; # LATIN CAPITAL LETTER O WITH STROKE 00D9; C; 00F9; # LATIN CAPITAL LETTER U WITH GRAVE 00DA; C; 00FA; # LATIN CAPITAL LETTER U WITH ACUTE 00DB; C; 00FB; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX 00DC; C; 00FC; # LATIN CAPITAL LETTER U WITH DIAERESIS 00DD; C; 00FD; # LATIN CAPITAL LETTER Y WITH ACUTE 00DE; C; 00FE; # LATIN CAPITAL LETTER THORN 00DF; F; 0073 0073; # LATIN SMALL LETTER SHARP S 0100; C; 0101; # LATIN CAPITAL LETTER A WITH MACRON 0102; C; 0103; # LATIN CAPITAL LETTER A WITH BREVE 0104; C; 0105; # LATIN CAPITAL LETTER A WITH OGONEK 0106; C; 0107; # LATIN CAPITAL LETTER C WITH ACUTE 0108; C; 0109; # LATIN CAPITAL LETTER C WITH CIRCUMFLEX 010A; C; 010B; # LATIN CAPITAL LETTER C WITH DOT ABOVE 010C; C; 010D; # LATIN CAPITAL LETTER C WITH CARON 010E; C; 010F; # LATIN CAPITAL LETTER D WITH CARON 0110; C; 0111; # LATIN CAPITAL LETTER D WITH STROKE 0112; C; 0113; # LATIN CAPITAL LETTER E WITH MACRON 0114; C; 0115; # LATIN CAPITAL LETTER E WITH BREVE 0116; C; 0117; # LATIN CAPITAL LETTER E WITH DOT ABOVE 0118; C; 0119; # LATIN CAPITAL LETTER E WITH OGONEK 011A; C; 011B; # LATIN CAPITAL LETTER E WITH CARON 011C; C; 011D; # LATIN CAPITAL LETTER G WITH CIRCUMFLEX 011E; C; 011F; # LATIN CAPITAL LETTER G WITH BREVE 0120; C; 0121; # LATIN CAPITAL LETTER G WITH DOT ABOVE 0122; C; 0123; # LATIN CAPITAL LETTER G WITH CEDILLA 0124; C; 0125; # LATIN CAPITAL LETTER H WITH CIRCUMFLEX 0126; C; 0127; # LATIN CAPITAL LETTER H WITH STROKE 0128; C; 0129; # LATIN CAPITAL LETTER I WITH TILDE 012A; C; 012B; # LATIN CAPITAL LETTER I WITH MACRON 012C; C; 012D; # LATIN CAPITAL LETTER I WITH BREVE 012E; C; 012F; # LATIN CAPITAL LETTER I WITH OGONEK 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE 0132; C; 0133; # LATIN CAPITAL LIGATURE IJ 0134; C; 0135; # LATIN CAPITAL LETTER J WITH CIRCUMFLEX 0136; C; 0137; # LATIN CAPITAL LETTER K WITH CEDILLA 0139; C; 013A; # LATIN CAPITAL LETTER L WITH ACUTE 013B; C; 013C; # LATIN CAPITAL LETTER L WITH CEDILLA 013D; C; 013E; # LATIN CAPITAL LETTER L WITH CARON 013F; C; 0140; # LATIN CAPITAL LETTER L WITH MIDDLE DOT 0141; C; 0142; # LATIN CAPITAL LETTER L WITH STROKE 0143; C; 0144; # LATIN CAPITAL LETTER N WITH ACUTE 0145; C; 0146; # LATIN CAPITAL LETTER N WITH CEDILLA 0147; C; 0148; # LATIN CAPITAL LETTER N WITH CARON 0149; F; 02BC 006E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE 014A; C; 014B; # LATIN CAPITAL LETTER ENG 014C; C; 014D; # LATIN CAPITAL LETTER O WITH MACRON 014E; C; 014F; # LATIN CAPITAL LETTER O WITH BREVE 0150; C; 0151; # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE 0152; C; 0153; # LATIN CAPITAL LIGATURE OE 0154; C; 0155; # LATIN CAPITAL LETTER R WITH ACUTE 0156; C; 0157; # LATIN CAPITAL LETTER R WITH CEDILLA 0158; C; 0159; # LATIN CAPITAL LETTER R WITH CARON 015A; C; 015B; # LATIN CAPITAL LETTER S WITH ACUTE 015C; C; 015D; # LATIN CAPITAL LETTER S WITH CIRCUMFLEX 015E; C; 015F; # LATIN CAPITAL LETTER S WITH CEDILLA 0160; C; 0161; # LATIN CAPITAL LETTER S WITH CARON 0162; C; 0163; # LATIN CAPITAL LETTER T WITH CEDILLA 0164; C; 0165; # LATIN CAPITAL LETTER T WITH CARON 0166; C; 0167; # LATIN CAPITAL LETTER T WITH STROKE 0168; C; 0169; # LATIN CAPITAL LETTER U WITH TILDE 016A; C; 016B; # LATIN CAPITAL LETTER U WITH MACRON 016C; C; 016D; # LATIN CAPITAL LETTER U WITH BREVE 016E; C; 016F; # LATIN CAPITAL LETTER U WITH RING ABOVE 0170; C; 0171; # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE 0172; C; 0173; # LATIN CAPITAL LETTER U WITH OGONEK 0174; C; 0175; # LATIN CAPITAL LETTER W WITH CIRCUMFLEX 0176; C; 0177; # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX 0178; C; 00FF; # LATIN CAPITAL LETTER Y WITH DIAERESIS 0179; C; 017A; # LATIN CAPITAL LETTER Z WITH ACUTE 017B; C; 017C; # LATIN CAPITAL LETTER Z WITH DOT ABOVE 017D; C; 017E; # LATIN CAPITAL LETTER Z WITH CARON 017F; C; 0073; # LATIN SMALL LETTER LONG S 0181; C; 0253; # LATIN CAPITAL LETTER B WITH HOOK 0182; C; 0183; # LATIN CAPITAL LETTER B WITH TOPBAR 0184; C; 0185; # LATIN CAPITAL LETTER TONE SIX 0186; C; 0254; # LATIN CAPITAL LETTER OPEN O 0187; C; 0188; # LATIN CAPITAL LETTER C WITH HOOK 0189; C; 0256; # LATIN CAPITAL LETTER AFRICAN D 018A; C; 0257; # LATIN CAPITAL LETTER D WITH HOOK 018B; C; 018C; # LATIN CAPITAL LETTER D WITH TOPBAR 018E; C; 01DD; # LATIN CAPITAL LETTER REVERSED E 018F; C; 0259; # LATIN CAPITAL LETTER SCHWA 0190; C; 025B; # LATIN CAPITAL LETTER OPEN E 0191; C; 0192; # LATIN CAPITAL LETTER F WITH HOOK 0193; C; 0260; # LATIN CAPITAL LETTER G WITH HOOK 0194; C; 0263; # LATIN CAPITAL LETTER GAMMA 0196; C; 0269; # LATIN CAPITAL LETTER IOTA 0197; C; 0268; # LATIN CAPITAL LETTER I WITH STROKE 0198; C; 0199; # LATIN CAPITAL LETTER K WITH HOOK 019C; C; 026F; # LATIN CAPITAL LETTER TURNED M 019D; C; 0272; # LATIN CAPITAL LETTER N WITH LEFT HOOK 019F; C; 0275; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE 01A0; C; 01A1; # LATIN CAPITAL LETTER O WITH HORN 01A2; C; 01A3; # LATIN CAPITAL LETTER OI 01A4; C; 01A5; # LATIN CAPITAL LETTER P WITH HOOK 01A6; C; 0280; # LATIN LETTER YR 01A7; C; 01A8; # LATIN CAPITAL LETTER TONE TWO 01A9; C; 0283; # LATIN CAPITAL LETTER ESH 01AC; C; 01AD; # LATIN CAPITAL LETTER T WITH HOOK 01AE; C; 0288; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK 01AF; C; 01B0; # LATIN CAPITAL LETTER U WITH HORN 01B1; C; 028A; # LATIN CAPITAL LETTER UPSILON 01B2; C; 028B; # LATIN CAPITAL LETTER V WITH HOOK 01B3; C; 01B4; # LATIN CAPITAL LETTER Y WITH HOOK 01B5; C; 01B6; # LATIN CAPITAL LETTER Z WITH STROKE 01B7; C; 0292; # LATIN CAPITAL LETTER EZH 01B8; C; 01B9; # LATIN CAPITAL LETTER EZH REVERSED 01BC; C; 01BD; # LATIN CAPITAL LETTER TONE FIVE 01C4; C; 01C6; # LATIN CAPITAL LETTER DZ WITH CARON 01C5; C; 01C6; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON 01C7; C; 01C9; # LATIN CAPITAL LETTER LJ 01C8; C; 01C9; # LATIN CAPITAL LETTER L WITH SMALL LETTER J 01CA; C; 01CC; # LATIN CAPITAL LETTER NJ 01CB; C; 01CC; # LATIN CAPITAL LETTER N WITH SMALL LETTER J 01CD; C; 01CE; # LATIN CAPITAL LETTER A WITH CARON 01CF; C; 01D0; # LATIN CAPITAL LETTER I WITH CARON 01D1; C; 01D2; # LATIN CAPITAL LETTER O WITH CARON 01D3; C; 01D4; # LATIN CAPITAL LETTER U WITH CARON 01D5; C; 01D6; # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON 01D7; C; 01D8; # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE 01D9; C; 01DA; # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON 01DB; C; 01DC; # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE 01DE; C; 01DF; # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON 01E0; C; 01E1; # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON 01E2; C; 01E3; # LATIN CAPITAL LETTER AE WITH MACRON 01E4; C; 01E5; # LATIN CAPITAL LETTER G WITH STROKE 01E6; C; 01E7; # LATIN CAPITAL LETTER G WITH CARON 01E8; C; 01E9; # LATIN CAPITAL LETTER K WITH CARON 01EA; C; 01EB; # LATIN CAPITAL LETTER O WITH OGONEK 01EC; C; 01ED; # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON 01EE; C; 01EF; # LATIN CAPITAL LETTER EZH WITH CARON 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON 01F1; C; 01F3; # LATIN CAPITAL LETTER DZ 01F2; C; 01F3; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z 01F4; C; 01F5; # LATIN CAPITAL LETTER G WITH ACUTE 01F6; C; 0195; # LATIN CAPITAL LETTER HWAIR 01F7; C; 01BF; # LATIN CAPITAL LETTER WYNN 01F8; C; 01F9; # LATIN CAPITAL LETTER N WITH GRAVE 01FA; C; 01FB; # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE 01FC; C; 01FD; # LATIN CAPITAL LETTER AE WITH ACUTE 01FE; C; 01FF; # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE 0200; C; 0201; # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE 0202; C; 0203; # LATIN CAPITAL LETTER A WITH INVERTED BREVE 0204; C; 0205; # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE 0206; C; 0207; # LATIN CAPITAL LETTER E WITH INVERTED BREVE 0208; C; 0209; # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE 020A; C; 020B; # LATIN CAPITAL LETTER I WITH INVERTED BREVE 020C; C; 020D; # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE 020E; C; 020F; # LATIN CAPITAL LETTER O WITH INVERTED BREVE 0210; C; 0211; # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE 0212; C; 0213; # LATIN CAPITAL LETTER R WITH INVERTED BREVE 0214; C; 0215; # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE 0216; C; 0217; # LATIN CAPITAL LETTER U WITH INVERTED BREVE 0218; C; 0219; # LATIN CAPITAL LETTER S WITH COMMA BELOW 021A; C; 021B; # LATIN CAPITAL LETTER T WITH COMMA BELOW 021C; C; 021D; # LATIN CAPITAL LETTER YOGH 021E; C; 021F; # LATIN CAPITAL LETTER H WITH CARON 0220; C; 019E; # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG 0222; C; 0223; # LATIN CAPITAL LETTER OU 0224; C; 0225; # LATIN CAPITAL LETTER Z WITH HOOK 0226; C; 0227; # LATIN CAPITAL LETTER A WITH DOT ABOVE 0228; C; 0229; # LATIN CAPITAL LETTER E WITH CEDILLA 022A; C; 022B; # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON 022C; C; 022D; # LATIN CAPITAL LETTER O WITH TILDE AND MACRON 022E; C; 022F; # LATIN CAPITAL LETTER O WITH DOT ABOVE 0230; C; 0231; # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON 0232; C; 0233; # LATIN CAPITAL LETTER Y WITH MACRON 023A; C; 2C65; # LATIN CAPITAL LETTER A WITH STROKE 023B; C; 023C; # LATIN CAPITAL LETTER C WITH STROKE 023D; C; 019A; # LATIN CAPITAL LETTER L WITH BAR 023E; C; 2C66; # LATIN CAPITAL LETTER T WITH DIAGONAL STROKE 0241; C; 0242; # LATIN CAPITAL LETTER GLOTTAL STOP 0243; C; 0180; # LATIN CAPITAL LETTER B WITH STROKE 0244; C; 0289; # LATIN CAPITAL LETTER U BAR 0245; C; 028C; # LATIN CAPITAL LETTER TURNED V 0246; C; 0247; # LATIN CAPITAL LETTER E WITH STROKE 0248; C; 0249; # LATIN CAPITAL LETTER J WITH STROKE 024A; C; 024B; # LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL 024C; C; 024D; # LATIN CAPITAL LETTER R WITH STROKE 024E; C; 024F; # LATIN CAPITAL LETTER Y WITH STROKE 0345; C; 03B9; # COMBINING GREEK YPOGEGRAMMENI 0370; C; 0371; # GREEK CAPITAL LETTER HETA 0372; C; 0373; # GREEK CAPITAL LETTER ARCHAIC SAMPI 0376; C; 0377; # GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA 0386; C; 03AC; # GREEK CAPITAL LETTER ALPHA WITH TONOS 0388; C; 03AD; # GREEK CAPITAL LETTER EPSILON WITH TONOS 0389; C; 03AE; # GREEK CAPITAL LETTER ETA WITH TONOS 038A; C; 03AF; # GREEK CAPITAL LETTER IOTA WITH TONOS 038C; C; 03CC; # GREEK CAPITAL LETTER OMICRON WITH TONOS 038E; C; 03CD; # GREEK CAPITAL LETTER UPSILON WITH TONOS 038F; C; 03CE; # GREEK CAPITAL LETTER OMEGA WITH TONOS 0390; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS 0391; C; 03B1; # GREEK CAPITAL LETTER ALPHA 0392; C; 03B2; # GREEK CAPITAL LETTER BETA 0393; C; 03B3; # GREEK CAPITAL LETTER GAMMA 0394; C; 03B4; # GREEK CAPITAL LETTER DELTA 0395; C; 03B5; # GREEK CAPITAL LETTER EPSILON 0396; C; 03B6; # GREEK CAPITAL LETTER ZETA 0397; C; 03B7; # GREEK CAPITAL LETTER ETA 0398; C; 03B8; # GREEK CAPITAL LETTER THETA 0399; C; 03B9; # GREEK CAPITAL LETTER IOTA 039A; C; 03BA; # GREEK CAPITAL LETTER KAPPA 039B; C; 03BB; # GREEK CAPITAL LETTER LAMDA 039C; C; 03BC; # GREEK CAPITAL LETTER MU 039D; C; 03BD; # GREEK CAPITAL LETTER NU 039E; C; 03BE; # GREEK CAPITAL LETTER XI 039F; C; 03BF; # GREEK CAPITAL LETTER OMICRON 03A0; C; 03C0; # GREEK CAPITAL LETTER PI 03A1; C; 03C1; # GREEK CAPITAL LETTER RHO 03A3; C; 03C3; # GREEK CAPITAL LETTER SIGMA 03A4; C; 03C4; # GREEK CAPITAL LETTER TAU 03A5; C; 03C5; # GREEK CAPITAL LETTER UPSILON 03A6; C; 03C6; # GREEK CAPITAL LETTER PHI 03A7; C; 03C7; # GREEK CAPITAL LETTER CHI 03A8; C; 03C8; # GREEK CAPITAL LETTER PSI 03A9; C; 03C9; # GREEK CAPITAL LETTER OMEGA 03AA; C; 03CA; # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA 03AB; C; 03CB; # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA 03B0; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS 03C2; C; 03C3; # GREEK SMALL LETTER FINAL SIGMA 03CF; C; 03D7; # GREEK CAPITAL KAI SYMBOL 03D0; C; 03B2; # GREEK BETA SYMBOL 03D1; C; 03B8; # GREEK THETA SYMBOL 03D5; C; 03C6; # GREEK PHI SYMBOL 03D6; C; 03C0; # GREEK PI SYMBOL 03D8; C; 03D9; # GREEK LETTER ARCHAIC KOPPA 03DA; C; 03DB; # GREEK LETTER STIGMA 03DC; C; 03DD; # GREEK LETTER DIGAMMA 03DE; C; 03DF; # GREEK LETTER KOPPA 03E0; C; 03E1; # GREEK LETTER SAMPI 03E2; C; 03E3; # COPTIC CAPITAL LETTER SHEI 03E4; C; 03E5; # COPTIC CAPITAL LETTER FEI 03E6; C; 03E7; # COPTIC CAPITAL LETTER KHEI 03E8; C; 03E9; # COPTIC CAPITAL LETTER HORI 03EA; C; 03EB; # COPTIC CAPITAL LETTER GANGIA 03EC; C; 03ED; # COPTIC CAPITAL LETTER SHIMA 03EE; C; 03EF; # COPTIC CAPITAL LETTER DEI 03F0; C; 03BA; # GREEK KAPPA SYMBOL 03F1; C; 03C1; # GREEK RHO SYMBOL 03F4; C; 03B8; # GREEK CAPITAL THETA SYMBOL 03F5; C; 03B5; # GREEK LUNATE EPSILON SYMBOL 03F7; C; 03F8; # GREEK CAPITAL LETTER SHO 03F9; C; 03F2; # GREEK CAPITAL LUNATE SIGMA SYMBOL 03FA; C; 03FB; # GREEK CAPITAL LETTER SAN 03FD; C; 037B; # GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL 03FE; C; 037C; # GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL 03FF; C; 037D; # GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL 0400; C; 0450; # CYRILLIC CAPITAL LETTER IE WITH GRAVE 0401; C; 0451; # CYRILLIC CAPITAL LETTER IO 0402; C; 0452; # CYRILLIC CAPITAL LETTER DJE 0403; C; 0453; # CYRILLIC CAPITAL LETTER GJE 0404; C; 0454; # CYRILLIC CAPITAL LETTER UKRAINIAN IE 0405; C; 0455; # CYRILLIC CAPITAL LETTER DZE 0406; C; 0456; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I 0407; C; 0457; # CYRILLIC CAPITAL LETTER YI 0408; C; 0458; # CYRILLIC CAPITAL LETTER JE 0409; C; 0459; # CYRILLIC CAPITAL LETTER LJE 040A; C; 045A; # CYRILLIC CAPITAL LETTER NJE 040B; C; 045B; # CYRILLIC CAPITAL LETTER TSHE 040C; C; 045C; # CYRILLIC CAPITAL LETTER KJE 040D; C; 045D; # CYRILLIC CAPITAL LETTER I WITH GRAVE 040E; C; 045E; # CYRILLIC CAPITAL LETTER SHORT U 040F; C; 045F; # CYRILLIC CAPITAL LETTER DZHE 0410; C; 0430; # CYRILLIC CAPITAL LETTER A 0411; C; 0431; # CYRILLIC CAPITAL LETTER BE 0412; C; 0432; # CYRILLIC CAPITAL LETTER VE 0413; C; 0433; # CYRILLIC CAPITAL LETTER GHE 0414; C; 0434; # CYRILLIC CAPITAL LETTER DE 0415; C; 0435; # CYRILLIC CAPITAL LETTER IE 0416; C; 0436; # CYRILLIC CAPITAL LETTER ZHE 0417; C; 0437; # CYRILLIC CAPITAL LETTER ZE 0418; C; 0438; # CYRILLIC CAPITAL LETTER I 0419; C; 0439; # CYRILLIC CAPITAL LETTER SHORT I 041A; C; 043A; # CYRILLIC CAPITAL LETTER KA 041B; C; 043B; # CYRILLIC CAPITAL LETTER EL 041C; C; 043C; # CYRILLIC CAPITAL LETTER EM 041D; C; 043D; # CYRILLIC CAPITAL LETTER EN 041E; C; 043E; # CYRILLIC CAPITAL LETTER O 041F; C; 043F; # CYRILLIC CAPITAL LETTER PE 0420; C; 0440; # CYRILLIC CAPITAL LETTER ER 0421; C; 0441; # CYRILLIC CAPITAL LETTER ES 0422; C; 0442; # CYRILLIC CAPITAL LETTER TE 0423; C; 0443; # CYRILLIC CAPITAL LETTER U 0424; C; 0444; # CYRILLIC CAPITAL LETTER EF 0425; C; 0445; # CYRILLIC CAPITAL LETTER HA 0426; C; 0446; # CYRILLIC CAPITAL LETTER TSE 0427; C; 0447; # CYRILLIC CAPITAL LETTER CHE 0428; C; 0448; # CYRILLIC CAPITAL LETTER SHA 0429; C; 0449; # CYRILLIC CAPITAL LETTER SHCHA 042A; C; 044A; # CYRILLIC CAPITAL LETTER HARD SIGN 042B; C; 044B; # CYRILLIC CAPITAL LETTER YERU 042C; C; 044C; # CYRILLIC CAPITAL LETTER SOFT SIGN 042D; C; 044D; # CYRILLIC CAPITAL LETTER E 042E; C; 044E; # CYRILLIC CAPITAL LETTER YU 042F; C; 044F; # CYRILLIC CAPITAL LETTER YA 0460; C; 0461; # CYRILLIC CAPITAL LETTER OMEGA 0462; C; 0463; # CYRILLIC CAPITAL LETTER YAT 0464; C; 0465; # CYRILLIC CAPITAL LETTER IOTIFIED E 0466; C; 0467; # CYRILLIC CAPITAL LETTER LITTLE YUS 0468; C; 0469; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS 046A; C; 046B; # CYRILLIC CAPITAL LETTER BIG YUS 046C; C; 046D; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS 046E; C; 046F; # CYRILLIC CAPITAL LETTER KSI 0470; C; 0471; # CYRILLIC CAPITAL LETTER PSI 0472; C; 0473; # CYRILLIC CAPITAL LETTER FITA 0474; C; 0475; # CYRILLIC CAPITAL LETTER IZHITSA 0476; C; 0477; # CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT 0478; C; 0479; # CYRILLIC CAPITAL LETTER UK 047A; C; 047B; # CYRILLIC CAPITAL LETTER ROUND OMEGA 047C; C; 047D; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO 047E; C; 047F; # CYRILLIC CAPITAL LETTER OT 0480; C; 0481; # CYRILLIC CAPITAL LETTER KOPPA 048A; C; 048B; # CYRILLIC CAPITAL LETTER SHORT I WITH TAIL 048C; C; 048D; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN 048E; C; 048F; # CYRILLIC CAPITAL LETTER ER WITH TICK 0490; C; 0491; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN 0492; C; 0493; # CYRILLIC CAPITAL LETTER GHE WITH STROKE 0494; C; 0495; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK 0496; C; 0497; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER 0498; C; 0499; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER 049A; C; 049B; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER 049C; C; 049D; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE 049E; C; 049F; # CYRILLIC CAPITAL LETTER KA WITH STROKE 04A0; C; 04A1; # CYRILLIC CAPITAL LETTER BASHKIR KA 04A2; C; 04A3; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER 04A4; C; 04A5; # CYRILLIC CAPITAL LIGATURE EN GHE 04A6; C; 04A7; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK 04A8; C; 04A9; # CYRILLIC CAPITAL LETTER ABKHASIAN HA 04AA; C; 04AB; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER 04AC; C; 04AD; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER 04AE; C; 04AF; # CYRILLIC CAPITAL LETTER STRAIGHT U 04B0; C; 04B1; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE 04B2; C; 04B3; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER 04B4; C; 04B5; # CYRILLIC CAPITAL LIGATURE TE TSE 04B6; C; 04B7; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER 04B8; C; 04B9; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE 04BA; C; 04BB; # CYRILLIC CAPITAL LETTER SHHA 04BC; C; 04BD; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE 04BE; C; 04BF; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER 04C0; C; 04CF; # CYRILLIC LETTER PALOCHKA 04C1; C; 04C2; # CYRILLIC CAPITAL LETTER ZHE WITH BREVE 04C3; C; 04C4; # CYRILLIC CAPITAL LETTER KA WITH HOOK 04C5; C; 04C6; # CYRILLIC CAPITAL LETTER EL WITH TAIL 04C7; C; 04C8; # CYRILLIC CAPITAL LETTER EN WITH HOOK 04C9; C; 04CA; # CYRILLIC CAPITAL LETTER EN WITH TAIL 04CB; C; 04CC; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE 04CD; C; 04CE; # CYRILLIC CAPITAL LETTER EM WITH TAIL 04D0; C; 04D1; # CYRILLIC CAPITAL LETTER A WITH BREVE 04D2; C; 04D3; # CYRILLIC CAPITAL LETTER A WITH DIAERESIS 04D4; C; 04D5; # CYRILLIC CAPITAL LIGATURE A IE 04D6; C; 04D7; # CYRILLIC CAPITAL LETTER IE WITH BREVE 04D8; C; 04D9; # CYRILLIC CAPITAL LETTER SCHWA 04DA; C; 04DB; # CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS 04DC; C; 04DD; # CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS 04DE; C; 04DF; # CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS 04E0; C; 04E1; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE 04E2; C; 04E3; # CYRILLIC CAPITAL LETTER I WITH MACRON 04E4; C; 04E5; # CYRILLIC CAPITAL LETTER I WITH DIAERESIS 04E6; C; 04E7; # CYRILLIC CAPITAL LETTER O WITH DIAERESIS 04E8; C; 04E9; # CYRILLIC CAPITAL LETTER BARRED O 04EA; C; 04EB; # CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS 04EC; C; 04ED; # CYRILLIC CAPITAL LETTER E WITH DIAERESIS 04EE; C; 04EF; # CYRILLIC CAPITAL LETTER U WITH MACRON 04F0; C; 04F1; # CYRILLIC CAPITAL LETTER U WITH DIAERESIS 04F2; C; 04F3; # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE 04F4; C; 04F5; # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS 04F6; C; 04F7; # CYRILLIC CAPITAL LETTER GHE WITH DESCENDER 04F8; C; 04F9; # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS 04FA; C; 04FB; # CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK 04FC; C; 04FD; # CYRILLIC CAPITAL LETTER HA WITH HOOK 04FE; C; 04FF; # CYRILLIC CAPITAL LETTER HA WITH STROKE 0500; C; 0501; # CYRILLIC CAPITAL LETTER KOMI DE 0502; C; 0503; # CYRILLIC CAPITAL LETTER KOMI DJE 0504; C; 0505; # CYRILLIC CAPITAL LETTER KOMI ZJE 0506; C; 0507; # CYRILLIC CAPITAL LETTER KOMI DZJE 0508; C; 0509; # CYRILLIC CAPITAL LETTER KOMI LJE 050A; C; 050B; # CYRILLIC CAPITAL LETTER KOMI NJE 050C; C; 050D; # CYRILLIC CAPITAL LETTER KOMI SJE 050E; C; 050F; # CYRILLIC CAPITAL LETTER KOMI TJE 0510; C; 0511; # CYRILLIC CAPITAL LETTER REVERSED ZE 0512; C; 0513; # CYRILLIC CAPITAL LETTER EL WITH HOOK 0514; C; 0515; # CYRILLIC CAPITAL LETTER LHA 0516; C; 0517; # CYRILLIC CAPITAL LETTER RHA 0518; C; 0519; # CYRILLIC CAPITAL LETTER YAE 051A; C; 051B; # CYRILLIC CAPITAL LETTER QA 051C; C; 051D; # CYRILLIC CAPITAL LETTER WE 051E; C; 051F; # CYRILLIC CAPITAL LETTER ALEUT KA 0520; C; 0521; # CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK 0522; C; 0523; # CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK 0524; C; 0525; # CYRILLIC CAPITAL LETTER PE WITH DESCENDER 0526; C; 0527; # CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER 0531; C; 0561; # ARMENIAN CAPITAL LETTER AYB 0532; C; 0562; # ARMENIAN CAPITAL LETTER BEN 0533; C; 0563; # ARMENIAN CAPITAL LETTER GIM 0534; C; 0564; # ARMENIAN CAPITAL LETTER DA 0535; C; 0565; # ARMENIAN CAPITAL LETTER ECH 0536; C; 0566; # ARMENIAN CAPITAL LETTER ZA 0537; C; 0567; # ARMENIAN CAPITAL LETTER EH 0538; C; 0568; # ARMENIAN CAPITAL LETTER ET 0539; C; 0569; # ARMENIAN CAPITAL LETTER TO 053A; C; 056A; # ARMENIAN CAPITAL LETTER ZHE 053B; C; 056B; # ARMENIAN CAPITAL LETTER INI 053C; C; 056C; # ARMENIAN CAPITAL LETTER LIWN 053D; C; 056D; # ARMENIAN CAPITAL LETTER XEH 053E; C; 056E; # ARMENIAN CAPITAL LETTER CA 053F; C; 056F; # ARMENIAN CAPITAL LETTER KEN 0540; C; 0570; # ARMENIAN CAPITAL LETTER HO 0541; C; 0571; # ARMENIAN CAPITAL LETTER JA 0542; C; 0572; # ARMENIAN CAPITAL LETTER GHAD 0543; C; 0573; # ARMENIAN CAPITAL LETTER CHEH 0544; C; 0574; # ARMENIAN CAPITAL LETTER MEN 0545; C; 0575; # ARMENIAN CAPITAL LETTER YI 0546; C; 0576; # ARMENIAN CAPITAL LETTER NOW 0547; C; 0577; # ARMENIAN CAPITAL LETTER SHA 0548; C; 0578; # ARMENIAN CAPITAL LETTER VO 0549; C; 0579; # ARMENIAN CAPITAL LETTER CHA 054A; C; 057A; # ARMENIAN CAPITAL LETTER PEH 054B; C; 057B; # ARMENIAN CAPITAL LETTER JHEH 054C; C; 057C; # ARMENIAN CAPITAL LETTER RA 054D; C; 057D; # ARMENIAN CAPITAL LETTER SEH 054E; C; 057E; # ARMENIAN CAPITAL LETTER VEW 054F; C; 057F; # ARMENIAN CAPITAL LETTER TIWN 0550; C; 0580; # ARMENIAN CAPITAL LETTER REH 0551; C; 0581; # ARMENIAN CAPITAL LETTER CO 0552; C; 0582; # ARMENIAN CAPITAL LETTER YIWN 0553; C; 0583; # ARMENIAN CAPITAL LETTER PIWR 0554; C; 0584; # ARMENIAN CAPITAL LETTER KEH 0555; C; 0585; # ARMENIAN CAPITAL LETTER OH 0556; C; 0586; # ARMENIAN CAPITAL LETTER FEH 0587; F; 0565 0582; # ARMENIAN SMALL LIGATURE ECH YIWN 10A0; C; 2D00; # GEORGIAN CAPITAL LETTER AN 10A1; C; 2D01; # GEORGIAN CAPITAL LETTER BAN 10A2; C; 2D02; # GEORGIAN CAPITAL LETTER GAN 10A3; C; 2D03; # GEORGIAN CAPITAL LETTER DON 10A4; C; 2D04; # GEORGIAN CAPITAL LETTER EN 10A5; C; 2D05; # GEORGIAN CAPITAL LETTER VIN 10A6; C; 2D06; # GEORGIAN CAPITAL LETTER ZEN 10A7; C; 2D07; # GEORGIAN CAPITAL LETTER TAN 10A8; C; 2D08; # GEORGIAN CAPITAL LETTER IN 10A9; C; 2D09; # GEORGIAN CAPITAL LETTER KAN 10AA; C; 2D0A; # GEORGIAN CAPITAL LETTER LAS 10AB; C; 2D0B; # GEORGIAN CAPITAL LETTER MAN 10AC; C; 2D0C; # GEORGIAN CAPITAL LETTER NAR 10AD; C; 2D0D; # GEORGIAN CAPITAL LETTER ON 10AE; C; 2D0E; # GEORGIAN CAPITAL LETTER PAR 10AF; C; 2D0F; # GEORGIAN CAPITAL LETTER ZHAR 10B0; C; 2D10; # GEORGIAN CAPITAL LETTER RAE 10B1; C; 2D11; # GEORGIAN CAPITAL LETTER SAN 10B2; C; 2D12; # GEORGIAN CAPITAL LETTER TAR 10B3; C; 2D13; # GEORGIAN CAPITAL LETTER UN 10B4; C; 2D14; # GEORGIAN CAPITAL LETTER PHAR 10B5; C; 2D15; # GEORGIAN CAPITAL LETTER KHAR 10B6; C; 2D16; # GEORGIAN CAPITAL LETTER GHAN 10B7; C; 2D17; # GEORGIAN CAPITAL LETTER QAR 10B8; C; 2D18; # GEORGIAN CAPITAL LETTER SHIN 10B9; C; 2D19; # GEORGIAN CAPITAL LETTER CHIN 10BA; C; 2D1A; # GEORGIAN CAPITAL LETTER CAN 10BB; C; 2D1B; # GEORGIAN CAPITAL LETTER JIL 10BC; C; 2D1C; # GEORGIAN CAPITAL LETTER CIL 10BD; C; 2D1D; # GEORGIAN CAPITAL LETTER CHAR 10BE; C; 2D1E; # GEORGIAN CAPITAL LETTER XAN 10BF; C; 2D1F; # GEORGIAN CAPITAL LETTER JHAN 10C0; C; 2D20; # GEORGIAN CAPITAL LETTER HAE 10C1; C; 2D21; # GEORGIAN CAPITAL LETTER HE 10C2; C; 2D22; # GEORGIAN CAPITAL LETTER HIE 10C3; C; 2D23; # GEORGIAN CAPITAL LETTER WE 10C4; C; 2D24; # GEORGIAN CAPITAL LETTER HAR 10C5; C; 2D25; # GEORGIAN CAPITAL LETTER HOE 10C7; C; 2D27; # GEORGIAN CAPITAL LETTER YN 10CD; C; 2D2D; # GEORGIAN CAPITAL LETTER AEN 1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW 1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE 1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW 1E06; C; 1E07; # LATIN CAPITAL LETTER B WITH LINE BELOW 1E08; C; 1E09; # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE 1E0A; C; 1E0B; # LATIN CAPITAL LETTER D WITH DOT ABOVE 1E0C; C; 1E0D; # LATIN CAPITAL LETTER D WITH DOT BELOW 1E0E; C; 1E0F; # LATIN CAPITAL LETTER D WITH LINE BELOW 1E10; C; 1E11; # LATIN CAPITAL LETTER D WITH CEDILLA 1E12; C; 1E13; # LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW 1E14; C; 1E15; # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE 1E16; C; 1E17; # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE 1E18; C; 1E19; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW 1E1A; C; 1E1B; # LATIN CAPITAL LETTER E WITH TILDE BELOW 1E1C; C; 1E1D; # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE 1E1E; C; 1E1F; # LATIN CAPITAL LETTER F WITH DOT ABOVE 1E20; C; 1E21; # LATIN CAPITAL LETTER G WITH MACRON 1E22; C; 1E23; # LATIN CAPITAL LETTER H WITH DOT ABOVE 1E24; C; 1E25; # LATIN CAPITAL LETTER H WITH DOT BELOW 1E26; C; 1E27; # LATIN CAPITAL LETTER H WITH DIAERESIS 1E28; C; 1E29; # LATIN CAPITAL LETTER H WITH CEDILLA 1E2A; C; 1E2B; # LATIN CAPITAL LETTER H WITH BREVE BELOW 1E2C; C; 1E2D; # LATIN CAPITAL LETTER I WITH TILDE BELOW 1E2E; C; 1E2F; # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE 1E30; C; 1E31; # LATIN CAPITAL LETTER K WITH ACUTE 1E32; C; 1E33; # LATIN CAPITAL LETTER K WITH DOT BELOW 1E34; C; 1E35; # LATIN CAPITAL LETTER K WITH LINE BELOW 1E36; C; 1E37; # LATIN CAPITAL LETTER L WITH DOT BELOW 1E38; C; 1E39; # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON 1E3A; C; 1E3B; # LATIN CAPITAL LETTER L WITH LINE BELOW 1E3C; C; 1E3D; # LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW 1E3E; C; 1E3F; # LATIN CAPITAL LETTER M WITH ACUTE 1E40; C; 1E41; # LATIN CAPITAL LETTER M WITH DOT ABOVE 1E42; C; 1E43; # LATIN CAPITAL LETTER M WITH DOT BELOW 1E44; C; 1E45; # LATIN CAPITAL LETTER N WITH DOT ABOVE 1E46; C; 1E47; # LATIN CAPITAL LETTER N WITH DOT BELOW 1E48; C; 1E49; # LATIN CAPITAL LETTER N WITH LINE BELOW 1E4A; C; 1E4B; # LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW 1E4C; C; 1E4D; # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE 1E4E; C; 1E4F; # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS 1E50; C; 1E51; # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE 1E52; C; 1E53; # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE 1E54; C; 1E55; # LATIN CAPITAL LETTER P WITH ACUTE 1E56; C; 1E57; # LATIN CAPITAL LETTER P WITH DOT ABOVE 1E58; C; 1E59; # LATIN CAPITAL LETTER R WITH DOT ABOVE 1E5A; C; 1E5B; # LATIN CAPITAL LETTER R WITH DOT BELOW 1E5C; C; 1E5D; # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON 1E5E; C; 1E5F; # LATIN CAPITAL LETTER R WITH LINE BELOW 1E60; C; 1E61; # LATIN CAPITAL LETTER S WITH DOT ABOVE 1E62; C; 1E63; # LATIN CAPITAL LETTER S WITH DOT BELOW 1E64; C; 1E65; # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE 1E66; C; 1E67; # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE 1E68; C; 1E69; # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE 1E6A; C; 1E6B; # LATIN CAPITAL LETTER T WITH DOT ABOVE 1E6C; C; 1E6D; # LATIN CAPITAL LETTER T WITH DOT BELOW 1E6E; C; 1E6F; # LATIN CAPITAL LETTER T WITH LINE BELOW 1E70; C; 1E71; # LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW 1E72; C; 1E73; # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW 1E74; C; 1E75; # LATIN CAPITAL LETTER U WITH TILDE BELOW 1E76; C; 1E77; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW 1E78; C; 1E79; # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE 1E7A; C; 1E7B; # LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS 1E7C; C; 1E7D; # LATIN CAPITAL LETTER V WITH TILDE 1E7E; C; 1E7F; # LATIN CAPITAL LETTER V WITH DOT BELOW 1E80; C; 1E81; # LATIN CAPITAL LETTER W WITH GRAVE 1E82; C; 1E83; # LATIN CAPITAL LETTER W WITH ACUTE 1E84; C; 1E85; # LATIN CAPITAL LETTER W WITH DIAERESIS 1E86; C; 1E87; # LATIN CAPITAL LETTER W WITH DOT ABOVE 1E88; C; 1E89; # LATIN CAPITAL LETTER W WITH DOT BELOW 1E8A; C; 1E8B; # LATIN CAPITAL LETTER X WITH DOT ABOVE 1E8C; C; 1E8D; # LATIN CAPITAL LETTER X WITH DIAERESIS 1E8E; C; 1E8F; # LATIN CAPITAL LETTER Y WITH DOT ABOVE 1E90; C; 1E91; # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX 1E92; C; 1E93; # LATIN CAPITAL LETTER Z WITH DOT BELOW 1E94; C; 1E95; # LATIN CAPITAL LETTER Z WITH LINE BELOW 1E96; F; 0068 0331; # LATIN SMALL LETTER H WITH LINE BELOW 1E97; F; 0074 0308; # LATIN SMALL LETTER T WITH DIAERESIS 1E98; F; 0077 030A; # LATIN SMALL LETTER W WITH RING ABOVE 1E99; F; 0079 030A; # LATIN SMALL LETTER Y WITH RING ABOVE 1E9A; F; 0061 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING 1E9B; C; 1E61; # LATIN SMALL LETTER LONG S WITH DOT ABOVE 1E9E; F; 0073 0073; # LATIN CAPITAL LETTER SHARP S 1E9E; S; 00DF; # LATIN CAPITAL LETTER SHARP S 1EA0; C; 1EA1; # LATIN CAPITAL LETTER A WITH DOT BELOW 1EA2; C; 1EA3; # LATIN CAPITAL LETTER A WITH HOOK ABOVE 1EA4; C; 1EA5; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE 1EA6; C; 1EA7; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE 1EA8; C; 1EA9; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE 1EAA; C; 1EAB; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE 1EAC; C; 1EAD; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW 1EAE; C; 1EAF; # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE 1EB0; C; 1EB1; # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE 1EB2; C; 1EB3; # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE 1EB4; C; 1EB5; # LATIN CAPITAL LETTER A WITH BREVE AND TILDE 1EB6; C; 1EB7; # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW 1EB8; C; 1EB9; # LATIN CAPITAL LETTER E WITH DOT BELOW 1EBA; C; 1EBB; # LATIN CAPITAL LETTER E WITH HOOK ABOVE 1EBC; C; 1EBD; # LATIN CAPITAL LETTER E WITH TILDE 1EBE; C; 1EBF; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE 1EC0; C; 1EC1; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE 1EC2; C; 1EC3; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE 1EC4; C; 1EC5; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE 1EC6; C; 1EC7; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW 1EC8; C; 1EC9; # LATIN CAPITAL LETTER I WITH HOOK ABOVE 1ECA; C; 1ECB; # LATIN CAPITAL LETTER I WITH DOT BELOW 1ECC; C; 1ECD; # LATIN CAPITAL LETTER O WITH DOT BELOW 1ECE; C; 1ECF; # LATIN CAPITAL LETTER O WITH HOOK ABOVE 1ED0; C; 1ED1; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE 1ED2; C; 1ED3; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE 1ED4; C; 1ED5; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE 1ED6; C; 1ED7; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE 1ED8; C; 1ED9; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW 1EDA; C; 1EDB; # LATIN CAPITAL LETTER O WITH HORN AND ACUTE 1EDC; C; 1EDD; # LATIN CAPITAL LETTER O WITH HORN AND GRAVE 1EDE; C; 1EDF; # LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE 1EE0; C; 1EE1; # LATIN CAPITAL LETTER O WITH HORN AND TILDE 1EE2; C; 1EE3; # LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW 1EE4; C; 1EE5; # LATIN CAPITAL LETTER U WITH DOT BELOW 1EE6; C; 1EE7; # LATIN CAPITAL LETTER U WITH HOOK ABOVE 1EE8; C; 1EE9; # LATIN CAPITAL LETTER U WITH HORN AND ACUTE 1EEA; C; 1EEB; # LATIN CAPITAL LETTER U WITH HORN AND GRAVE 1EEC; C; 1EED; # LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE 1EEE; C; 1EEF; # LATIN CAPITAL LETTER U WITH HORN AND TILDE 1EF0; C; 1EF1; # LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW 1EF2; C; 1EF3; # LATIN CAPITAL LETTER Y WITH GRAVE 1EF4; C; 1EF5; # LATIN CAPITAL LETTER Y WITH DOT BELOW 1EF6; C; 1EF7; # LATIN CAPITAL LETTER Y WITH HOOK ABOVE 1EF8; C; 1EF9; # LATIN CAPITAL LETTER Y WITH TILDE 1EFA; C; 1EFB; # LATIN CAPITAL LETTER MIDDLE-WELSH LL 1EFC; C; 1EFD; # LATIN CAPITAL LETTER MIDDLE-WELSH V 1EFE; C; 1EFF; # LATIN CAPITAL LETTER Y WITH LOOP 1F08; C; 1F00; # GREEK CAPITAL LETTER ALPHA WITH PSILI 1F09; C; 1F01; # GREEK CAPITAL LETTER ALPHA WITH DASIA 1F0A; C; 1F02; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA 1F0B; C; 1F03; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA 1F0C; C; 1F04; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA 1F0D; C; 1F05; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA 1F0E; C; 1F06; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI 1F0F; C; 1F07; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI 1F18; C; 1F10; # GREEK CAPITAL LETTER EPSILON WITH PSILI 1F19; C; 1F11; # GREEK CAPITAL LETTER EPSILON WITH DASIA 1F1A; C; 1F12; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA 1F1B; C; 1F13; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA 1F1C; C; 1F14; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA 1F1D; C; 1F15; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA 1F28; C; 1F20; # GREEK CAPITAL LETTER ETA WITH PSILI 1F29; C; 1F21; # GREEK CAPITAL LETTER ETA WITH DASIA 1F2A; C; 1F22; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA 1F2B; C; 1F23; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA 1F2C; C; 1F24; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA 1F2D; C; 1F25; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA 1F2E; C; 1F26; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI 1F2F; C; 1F27; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI 1F38; C; 1F30; # GREEK CAPITAL LETTER IOTA WITH PSILI 1F39; C; 1F31; # GREEK CAPITAL LETTER IOTA WITH DASIA 1F3A; C; 1F32; # GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA 1F3B; C; 1F33; # GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA 1F3C; C; 1F34; # GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA 1F3D; C; 1F35; # GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA 1F3E; C; 1F36; # GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI 1F3F; C; 1F37; # GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI 1F48; C; 1F40; # GREEK CAPITAL LETTER OMICRON WITH PSILI 1F49; C; 1F41; # GREEK CAPITAL LETTER OMICRON WITH DASIA 1F4A; C; 1F42; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA 1F4B; C; 1F43; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA 1F4C; C; 1F44; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA 1F4D; C; 1F45; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA 1F50; F; 03C5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI 1F52; F; 03C5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA 1F54; F; 03C5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA 1F56; F; 03C5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI 1F59; C; 1F51; # GREEK CAPITAL LETTER UPSILON WITH DASIA 1F5B; C; 1F53; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA 1F5D; C; 1F55; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA 1F5F; C; 1F57; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI 1F68; C; 1F60; # GREEK CAPITAL LETTER OMEGA WITH PSILI 1F69; C; 1F61; # GREEK CAPITAL LETTER OMEGA WITH DASIA 1F6A; C; 1F62; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA 1F6B; C; 1F63; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA 1F6C; C; 1F64; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA 1F6D; C; 1F65; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA 1F6E; C; 1F66; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI 1F6F; C; 1F67; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI 1F80; F; 1F00 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI 1F81; F; 1F01 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI 1F82; F; 1F02 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI 1F83; F; 1F03 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI 1F84; F; 1F04 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI 1F85; F; 1F05 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI 1F86; F; 1F06 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI 1F87; F; 1F07 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI 1F88; F; 1F00 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI 1F88; S; 1F80; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI 1F89; F; 1F01 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI 1F89; S; 1F81; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI 1F8A; F; 1F02 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI 1F8A; S; 1F82; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI 1F8B; F; 1F03 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI 1F8B; S; 1F83; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI 1F8C; F; 1F04 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI 1F8C; S; 1F84; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI 1F8D; F; 1F05 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI 1F8D; S; 1F85; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI 1F8E; F; 1F06 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 1F8E; S; 1F86; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 1F8F; F; 1F07 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 1F8F; S; 1F87; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 1F90; F; 1F20 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI 1F91; F; 1F21 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI 1F92; F; 1F22 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI 1F93; F; 1F23 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI 1F94; F; 1F24 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI 1F95; F; 1F25 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI 1F96; F; 1F26 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI 1F97; F; 1F27 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI 1F98; F; 1F20 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI 1F98; S; 1F90; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI 1F99; F; 1F21 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI 1F99; S; 1F91; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI 1F9A; F; 1F22 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI 1F9A; S; 1F92; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI 1F9B; F; 1F23 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI 1F9B; S; 1F93; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI 1F9C; F; 1F24 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI 1F9C; S; 1F94; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI 1F9D; F; 1F25 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI 1F9D; S; 1F95; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI 1F9E; F; 1F26 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 1F9E; S; 1F96; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 1F9F; F; 1F27 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 1F9F; S; 1F97; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 1FA0; F; 1F60 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI 1FA1; F; 1F61 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI 1FA2; F; 1F62 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI 1FA3; F; 1F63 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI 1FA4; F; 1F64 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI 1FA5; F; 1F65 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI 1FA6; F; 1F66 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI 1FA7; F; 1F67 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI 1FA8; F; 1F60 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI 1FA8; S; 1FA0; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI 1FA9; F; 1F61 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI 1FA9; S; 1FA1; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI 1FAA; F; 1F62 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI 1FAA; S; 1FA2; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI 1FAB; F; 1F63 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI 1FAB; S; 1FA3; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI 1FAC; F; 1F64 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI 1FAC; S; 1FA4; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI 1FAD; F; 1F65 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI 1FAD; S; 1FA5; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI 1FAE; F; 1F66 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 1FAE; S; 1FA6; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 1FAF; F; 1F67 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 1FAF; S; 1FA7; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 1FB2; F; 1F70 03B9; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI 1FB3; F; 03B1 03B9; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI 1FB4; F; 03AC 03B9; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI 1FB6; F; 03B1 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI 1FB7; F; 03B1 0342 03B9; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI 1FB8; C; 1FB0; # GREEK CAPITAL LETTER ALPHA WITH VRACHY 1FB9; C; 1FB1; # GREEK CAPITAL LETTER ALPHA WITH MACRON 1FBA; C; 1F70; # GREEK CAPITAL LETTER ALPHA WITH VARIA 1FBB; C; 1F71; # GREEK CAPITAL LETTER ALPHA WITH OXIA 1FBC; F; 03B1 03B9; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI 1FBC; S; 1FB3; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI 1FBE; C; 03B9; # GREEK PROSGEGRAMMENI 1FC2; F; 1F74 03B9; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI 1FC3; F; 03B7 03B9; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI 1FC4; F; 03AE 03B9; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI 1FC6; F; 03B7 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI 1FC7; F; 03B7 0342 03B9; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI 1FC8; C; 1F72; # GREEK CAPITAL LETTER EPSILON WITH VARIA 1FC9; C; 1F73; # GREEK CAPITAL LETTER EPSILON WITH OXIA 1FCA; C; 1F74; # GREEK CAPITAL LETTER ETA WITH VARIA 1FCB; C; 1F75; # GREEK CAPITAL LETTER ETA WITH OXIA 1FCC; F; 03B7 03B9; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI 1FCC; S; 1FC3; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI 1FD2; F; 03B9 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA 1FD3; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA 1FD6; F; 03B9 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI 1FD7; F; 03B9 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI 1FD8; C; 1FD0; # GREEK CAPITAL LETTER IOTA WITH VRACHY 1FD9; C; 1FD1; # GREEK CAPITAL LETTER IOTA WITH MACRON 1FDA; C; 1F76; # GREEK CAPITAL LETTER IOTA WITH VARIA 1FDB; C; 1F77; # GREEK CAPITAL LETTER IOTA WITH OXIA 1FE2; F; 03C5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA 1FE3; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA 1FE4; F; 03C1 0313; # GREEK SMALL LETTER RHO WITH PSILI 1FE6; F; 03C5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI 1FE7; F; 03C5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI 1FE8; C; 1FE0; # GREEK CAPITAL LETTER UPSILON WITH VRACHY 1FE9; C; 1FE1; # GREEK CAPITAL LETTER UPSILON WITH MACRON 1FEA; C; 1F7A; # GREEK CAPITAL LETTER UPSILON WITH VARIA 1FEB; C; 1F7B; # GREEK CAPITAL LETTER UPSILON WITH OXIA 1FEC; C; 1FE5; # GREEK CAPITAL LETTER RHO WITH DASIA 1FF2; F; 1F7C 03B9; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI 1FF3; F; 03C9 03B9; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI 1FF4; F; 03CE 03B9; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI 1FF6; F; 03C9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI 1FF7; F; 03C9 0342 03B9; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI 1FF8; C; 1F78; # GREEK CAPITAL LETTER OMICRON WITH VARIA 1FF9; C; 1F79; # GREEK CAPITAL LETTER OMICRON WITH OXIA 1FFA; C; 1F7C; # GREEK CAPITAL LETTER OMEGA WITH VARIA 1FFB; C; 1F7D; # GREEK CAPITAL LETTER OMEGA WITH OXIA 1FFC; F; 03C9 03B9; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI 1FFC; S; 1FF3; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI 2126; C; 03C9; # OHM SIGN 212A; C; 006B; # KELVIN SIGN 212B; C; 00E5; # ANGSTROM SIGN 2132; C; 214E; # TURNED CAPITAL F 2160; C; 2170; # ROMAN NUMERAL ONE 2161; C; 2171; # ROMAN NUMERAL TWO 2162; C; 2172; # ROMAN NUMERAL THREE 2163; C; 2173; # ROMAN NUMERAL FOUR 2164; C; 2174; # ROMAN NUMERAL FIVE 2165; C; 2175; # ROMAN NUMERAL SIX 2166; C; 2176; # ROMAN NUMERAL SEVEN 2167; C; 2177; # ROMAN NUMERAL EIGHT 2168; C; 2178; # ROMAN NUMERAL NINE 2169; C; 2179; # ROMAN NUMERAL TEN 216A; C; 217A; # ROMAN NUMERAL ELEVEN 216B; C; 217B; # ROMAN NUMERAL TWELVE 216C; C; 217C; # ROMAN NUMERAL FIFTY 216D; C; 217D; # ROMAN NUMERAL ONE HUNDRED 216E; C; 217E; # ROMAN NUMERAL FIVE HUNDRED 216F; C; 217F; # ROMAN NUMERAL ONE THOUSAND 2183; C; 2184; # ROMAN NUMERAL REVERSED ONE HUNDRED 24B6; C; 24D0; # CIRCLED LATIN CAPITAL LETTER A 24B7; C; 24D1; # CIRCLED LATIN CAPITAL LETTER B 24B8; C; 24D2; # CIRCLED LATIN CAPITAL LETTER C 24B9; C; 24D3; # CIRCLED LATIN CAPITAL LETTER D 24BA; C; 24D4; # CIRCLED LATIN CAPITAL LETTER E 24BB; C; 24D5; # CIRCLED LATIN CAPITAL LETTER F 24BC; C; 24D6; # CIRCLED LATIN CAPITAL LETTER G 24BD; C; 24D7; # CIRCLED LATIN CAPITAL LETTER H 24BE; C; 24D8; # CIRCLED LATIN CAPITAL LETTER I 24BF; C; 24D9; # CIRCLED LATIN CAPITAL LETTER J 24C0; C; 24DA; # CIRCLED LATIN CAPITAL LETTER K 24C1; C; 24DB; # CIRCLED LATIN CAPITAL LETTER L 24C2; C; 24DC; # CIRCLED LATIN CAPITAL LETTER M 24C3; C; 24DD; # CIRCLED LATIN CAPITAL LETTER N 24C4; C; 24DE; # CIRCLED LATIN CAPITAL LETTER O 24C5; C; 24DF; # CIRCLED LATIN CAPITAL LETTER P 24C6; C; 24E0; # CIRCLED LATIN CAPITAL LETTER Q 24C7; C; 24E1; # CIRCLED LATIN CAPITAL LETTER R 24C8; C; 24E2; # CIRCLED LATIN CAPITAL LETTER S 24C9; C; 24E3; # CIRCLED LATIN CAPITAL LETTER T 24CA; C; 24E4; # CIRCLED LATIN CAPITAL LETTER U 24CB; C; 24E5; # CIRCLED LATIN CAPITAL LETTER V 24CC; C; 24E6; # CIRCLED LATIN CAPITAL LETTER W 24CD; C; 24E7; # CIRCLED LATIN CAPITAL LETTER X 24CE; C; 24E8; # CIRCLED LATIN CAPITAL LETTER Y 24CF; C; 24E9; # CIRCLED LATIN CAPITAL LETTER Z 2C00; C; 2C30; # GLAGOLITIC CAPITAL LETTER AZU 2C01; C; 2C31; # GLAGOLITIC CAPITAL LETTER BUKY 2C02; C; 2C32; # GLAGOLITIC CAPITAL LETTER VEDE 2C03; C; 2C33; # GLAGOLITIC CAPITAL LETTER GLAGOLI 2C04; C; 2C34; # GLAGOLITIC CAPITAL LETTER DOBRO 2C05; C; 2C35; # GLAGOLITIC CAPITAL LETTER YESTU 2C06; C; 2C36; # GLAGOLITIC CAPITAL LETTER ZHIVETE 2C07; C; 2C37; # GLAGOLITIC CAPITAL LETTER DZELO 2C08; C; 2C38; # GLAGOLITIC CAPITAL LETTER ZEMLJA 2C09; C; 2C39; # GLAGOLITIC CAPITAL LETTER IZHE 2C0A; C; 2C3A; # GLAGOLITIC CAPITAL LETTER INITIAL IZHE 2C0B; C; 2C3B; # GLAGOLITIC CAPITAL LETTER I 2C0C; C; 2C3C; # GLAGOLITIC CAPITAL LETTER DJERVI 2C0D; C; 2C3D; # GLAGOLITIC CAPITAL LETTER KAKO 2C0E; C; 2C3E; # GLAGOLITIC CAPITAL LETTER LJUDIJE 2C0F; C; 2C3F; # GLAGOLITIC CAPITAL LETTER MYSLITE 2C10; C; 2C40; # GLAGOLITIC CAPITAL LETTER NASHI 2C11; C; 2C41; # GLAGOLITIC CAPITAL LETTER ONU 2C12; C; 2C42; # GLAGOLITIC CAPITAL LETTER POKOJI 2C13; C; 2C43; # GLAGOLITIC CAPITAL LETTER RITSI 2C14; C; 2C44; # GLAGOLITIC CAPITAL LETTER SLOVO 2C15; C; 2C45; # GLAGOLITIC CAPITAL LETTER TVRIDO 2C16; C; 2C46; # GLAGOLITIC CAPITAL LETTER UKU 2C17; C; 2C47; # GLAGOLITIC CAPITAL LETTER FRITU 2C18; C; 2C48; # GLAGOLITIC CAPITAL LETTER HERU 2C19; C; 2C49; # GLAGOLITIC CAPITAL LETTER OTU 2C1A; C; 2C4A; # GLAGOLITIC CAPITAL LETTER PE 2C1B; C; 2C4B; # GLAGOLITIC CAPITAL LETTER SHTA 2C1C; C; 2C4C; # GLAGOLITIC CAPITAL LETTER TSI 2C1D; C; 2C4D; # GLAGOLITIC CAPITAL LETTER CHRIVI 2C1E; C; 2C4E; # GLAGOLITIC CAPITAL LETTER SHA 2C1F; C; 2C4F; # GLAGOLITIC CAPITAL LETTER YERU 2C20; C; 2C50; # GLAGOLITIC CAPITAL LETTER YERI 2C21; C; 2C51; # GLAGOLITIC CAPITAL LETTER YATI 2C22; C; 2C52; # GLAGOLITIC CAPITAL LETTER SPIDERY HA 2C23; C; 2C53; # GLAGOLITIC CAPITAL LETTER YU 2C24; C; 2C54; # GLAGOLITIC CAPITAL LETTER SMALL YUS 2C25; C; 2C55; # GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL 2C26; C; 2C56; # GLAGOLITIC CAPITAL LETTER YO 2C27; C; 2C57; # GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS 2C28; C; 2C58; # GLAGOLITIC CAPITAL LETTER BIG YUS 2C29; C; 2C59; # GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS 2C2A; C; 2C5A; # GLAGOLITIC CAPITAL LETTER FITA 2C2B; C; 2C5B; # GLAGOLITIC CAPITAL LETTER IZHITSA 2C2C; C; 2C5C; # GLAGOLITIC CAPITAL LETTER SHTAPIC 2C2D; C; 2C5D; # GLAGOLITIC CAPITAL LETTER TROKUTASTI A 2C2E; C; 2C5E; # GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 2C60; C; 2C61; # LATIN CAPITAL LETTER L WITH DOUBLE BAR 2C62; C; 026B; # LATIN CAPITAL LETTER L WITH MIDDLE TILDE 2C63; C; 1D7D; # LATIN CAPITAL LETTER P WITH STROKE 2C64; C; 027D; # LATIN CAPITAL LETTER R WITH TAIL 2C67; C; 2C68; # LATIN CAPITAL LETTER H WITH DESCENDER 2C69; C; 2C6A; # LATIN CAPITAL LETTER K WITH DESCENDER 2C6B; C; 2C6C; # LATIN CAPITAL LETTER Z WITH DESCENDER 2C6D; C; 0251; # LATIN CAPITAL LETTER ALPHA 2C6E; C; 0271; # LATIN CAPITAL LETTER M WITH HOOK 2C6F; C; 0250; # LATIN CAPITAL LETTER TURNED A 2C70; C; 0252; # LATIN CAPITAL LETTER TURNED ALPHA 2C72; C; 2C73; # LATIN CAPITAL LETTER W WITH HOOK 2C75; C; 2C76; # LATIN CAPITAL LETTER HALF H 2C7E; C; 023F; # LATIN CAPITAL LETTER S WITH SWASH TAIL 2C7F; C; 0240; # LATIN CAPITAL LETTER Z WITH SWASH TAIL 2C80; C; 2C81; # COPTIC CAPITAL LETTER ALFA 2C82; C; 2C83; # COPTIC CAPITAL LETTER VIDA 2C84; C; 2C85; # COPTIC CAPITAL LETTER GAMMA 2C86; C; 2C87; # COPTIC CAPITAL LETTER DALDA 2C88; C; 2C89; # COPTIC CAPITAL LETTER EIE 2C8A; C; 2C8B; # COPTIC CAPITAL LETTER SOU 2C8C; C; 2C8D; # COPTIC CAPITAL LETTER ZATA 2C8E; C; 2C8F; # COPTIC CAPITAL LETTER HATE 2C90; C; 2C91; # COPTIC CAPITAL LETTER THETHE 2C92; C; 2C93; # COPTIC CAPITAL LETTER IAUDA 2C94; C; 2C95; # COPTIC CAPITAL LETTER KAPA 2C96; C; 2C97; # COPTIC CAPITAL LETTER LAULA 2C98; C; 2C99; # COPTIC CAPITAL LETTER MI 2C9A; C; 2C9B; # COPTIC CAPITAL LETTER NI 2C9C; C; 2C9D; # COPTIC CAPITAL LETTER KSI 2C9E; C; 2C9F; # COPTIC CAPITAL LETTER O 2CA0; C; 2CA1; # COPTIC CAPITAL LETTER PI 2CA2; C; 2CA3; # COPTIC CAPITAL LETTER RO 2CA4; C; 2CA5; # COPTIC CAPITAL LETTER SIMA 2CA6; C; 2CA7; # COPTIC CAPITAL LETTER TAU 2CA8; C; 2CA9; # COPTIC CAPITAL LETTER UA 2CAA; C; 2CAB; # COPTIC CAPITAL LETTER FI 2CAC; C; 2CAD; # COPTIC CAPITAL LETTER KHI 2CAE; C; 2CAF; # COPTIC CAPITAL LETTER PSI 2CB0; C; 2CB1; # COPTIC CAPITAL LETTER OOU 2CB2; C; 2CB3; # COPTIC CAPITAL LETTER DIALECT-P ALEF 2CB4; C; 2CB5; # COPTIC CAPITAL LETTER OLD COPTIC AIN 2CB6; C; 2CB7; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE 2CB8; C; 2CB9; # COPTIC CAPITAL LETTER DIALECT-P KAPA 2CBA; C; 2CBB; # COPTIC CAPITAL LETTER DIALECT-P NI 2CBC; C; 2CBD; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI 2CBE; C; 2CBF; # COPTIC CAPITAL LETTER OLD COPTIC OOU 2CC0; C; 2CC1; # COPTIC CAPITAL LETTER SAMPI 2CC2; C; 2CC3; # COPTIC CAPITAL LETTER CROSSED SHEI 2CC4; C; 2CC5; # COPTIC CAPITAL LETTER OLD COPTIC SHEI 2CC6; C; 2CC7; # COPTIC CAPITAL LETTER OLD COPTIC ESH 2CC8; C; 2CC9; # COPTIC CAPITAL LETTER AKHMIMIC KHEI 2CCA; C; 2CCB; # COPTIC CAPITAL LETTER DIALECT-P HORI 2CCC; C; 2CCD; # COPTIC CAPITAL LETTER OLD COPTIC HORI 2CCE; C; 2CCF; # COPTIC CAPITAL LETTER OLD COPTIC HA 2CD0; C; 2CD1; # COPTIC CAPITAL LETTER L-SHAPED HA 2CD2; C; 2CD3; # COPTIC CAPITAL LETTER OLD COPTIC HEI 2CD4; C; 2CD5; # COPTIC CAPITAL LETTER OLD COPTIC HAT 2CD6; C; 2CD7; # COPTIC CAPITAL LETTER OLD COPTIC GANGIA 2CD8; C; 2CD9; # COPTIC CAPITAL LETTER OLD COPTIC DJA 2CDA; C; 2CDB; # COPTIC CAPITAL LETTER OLD COPTIC SHIMA 2CDC; C; 2CDD; # COPTIC CAPITAL LETTER OLD NUBIAN SHIMA 2CDE; C; 2CDF; # COPTIC CAPITAL LETTER OLD NUBIAN NGI 2CE0; C; 2CE1; # COPTIC CAPITAL LETTER OLD NUBIAN NYI 2CE2; C; 2CE3; # COPTIC CAPITAL LETTER OLD NUBIAN WAU 2CEB; C; 2CEC; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI 2CED; C; 2CEE; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA 2CF2; C; 2CF3; # COPTIC CAPITAL LETTER BOHAIRIC KHEI A640; C; A641; # CYRILLIC CAPITAL LETTER ZEMLYA A642; C; A643; # CYRILLIC CAPITAL LETTER DZELO A644; C; A645; # CYRILLIC CAPITAL LETTER REVERSED DZE A646; C; A647; # CYRILLIC CAPITAL LETTER IOTA A648; C; A649; # CYRILLIC CAPITAL LETTER DJERV A64A; C; A64B; # CYRILLIC CAPITAL LETTER MONOGRAPH UK A64C; C; A64D; # CYRILLIC CAPITAL LETTER BROAD OMEGA A64E; C; A64F; # CYRILLIC CAPITAL LETTER NEUTRAL YER A650; C; A651; # CYRILLIC CAPITAL LETTER YERU WITH BACK YER A652; C; A653; # CYRILLIC CAPITAL LETTER IOTIFIED YAT A654; C; A655; # CYRILLIC CAPITAL LETTER REVERSED YU A656; C; A657; # CYRILLIC CAPITAL LETTER IOTIFIED A A658; C; A659; # CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS A65A; C; A65B; # CYRILLIC CAPITAL LETTER BLENDED YUS A65C; C; A65D; # CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS A65E; C; A65F; # CYRILLIC CAPITAL LETTER YN A660; C; A661; # CYRILLIC CAPITAL LETTER REVERSED TSE A662; C; A663; # CYRILLIC CAPITAL LETTER SOFT DE A664; C; A665; # CYRILLIC CAPITAL LETTER SOFT EL A666; C; A667; # CYRILLIC CAPITAL LETTER SOFT EM A668; C; A669; # CYRILLIC CAPITAL LETTER MONOCULAR O A66A; C; A66B; # CYRILLIC CAPITAL LETTER BINOCULAR O A66C; C; A66D; # CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O A680; C; A681; # CYRILLIC CAPITAL LETTER DWE A682; C; A683; # CYRILLIC CAPITAL LETTER DZWE A684; C; A685; # CYRILLIC CAPITAL LETTER ZHWE A686; C; A687; # CYRILLIC CAPITAL LETTER CCHE A688; C; A689; # CYRILLIC CAPITAL LETTER DZZE A68A; C; A68B; # CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK A68C; C; A68D; # CYRILLIC CAPITAL LETTER TWE A68E; C; A68F; # CYRILLIC CAPITAL LETTER TSWE A690; C; A691; # CYRILLIC CAPITAL LETTER TSSE A692; C; A693; # CYRILLIC CAPITAL LETTER TCHE A694; C; A695; # CYRILLIC CAPITAL LETTER HWE A696; C; A697; # CYRILLIC CAPITAL LETTER SHWE A722; C; A723; # LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF A724; C; A725; # LATIN CAPITAL LETTER EGYPTOLOGICAL AIN A726; C; A727; # LATIN CAPITAL LETTER HENG A728; C; A729; # LATIN CAPITAL LETTER TZ A72A; C; A72B; # LATIN CAPITAL LETTER TRESILLO A72C; C; A72D; # LATIN CAPITAL LETTER CUATRILLO A72E; C; A72F; # LATIN CAPITAL LETTER CUATRILLO WITH COMMA A732; C; A733; # LATIN CAPITAL LETTER AA A734; C; A735; # LATIN CAPITAL LETTER AO A736; C; A737; # LATIN CAPITAL LETTER AU A738; C; A739; # LATIN CAPITAL LETTER AV A73A; C; A73B; # LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR A73C; C; A73D; # LATIN CAPITAL LETTER AY A73E; C; A73F; # LATIN CAPITAL LETTER REVERSED C WITH DOT A740; C; A741; # LATIN CAPITAL LETTER K WITH STROKE A742; C; A743; # LATIN CAPITAL LETTER K WITH DIAGONAL STROKE A744; C; A745; # LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE A746; C; A747; # LATIN CAPITAL LETTER BROKEN L A748; C; A749; # LATIN CAPITAL LETTER L WITH HIGH STROKE A74A; C; A74B; # LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY A74C; C; A74D; # LATIN CAPITAL LETTER O WITH LOOP A74E; C; A74F; # LATIN CAPITAL LETTER OO A750; C; A751; # LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER A752; C; A753; # LATIN CAPITAL LETTER P WITH FLOURISH A754; C; A755; # LATIN CAPITAL LETTER P WITH SQUIRREL TAIL A756; C; A757; # LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER A758; C; A759; # LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE A75A; C; A75B; # LATIN CAPITAL LETTER R ROTUNDA A75C; C; A75D; # LATIN CAPITAL LETTER RUM ROTUNDA A75E; C; A75F; # LATIN CAPITAL LETTER V WITH DIAGONAL STROKE A760; C; A761; # LATIN CAPITAL LETTER VY A762; C; A763; # LATIN CAPITAL LETTER VISIGOTHIC Z A764; C; A765; # LATIN CAPITAL LETTER THORN WITH STROKE A766; C; A767; # LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER A768; C; A769; # LATIN CAPITAL LETTER VEND A76A; C; A76B; # LATIN CAPITAL LETTER ET A76C; C; A76D; # LATIN CAPITAL LETTER IS A76E; C; A76F; # LATIN CAPITAL LETTER CON A779; C; A77A; # LATIN CAPITAL LETTER INSULAR D A77B; C; A77C; # LATIN CAPITAL LETTER INSULAR F A77D; C; 1D79; # LATIN CAPITAL LETTER INSULAR G A77E; C; A77F; # LATIN CAPITAL LETTER TURNED INSULAR G A780; C; A781; # LATIN CAPITAL LETTER TURNED L A782; C; A783; # LATIN CAPITAL LETTER INSULAR R A784; C; A785; # LATIN CAPITAL LETTER INSULAR S A786; C; A787; # LATIN CAPITAL LETTER INSULAR T A78B; C; A78C; # LATIN CAPITAL LETTER SALTILLO A78D; C; 0265; # LATIN CAPITAL LETTER TURNED H A790; C; A791; # LATIN CAPITAL LETTER N WITH DESCENDER A792; C; A793; # LATIN CAPITAL LETTER C WITH BAR A7A0; C; A7A1; # LATIN CAPITAL LETTER G WITH OBLIQUE STROKE A7A2; C; A7A3; # LATIN CAPITAL LETTER K WITH OBLIQUE STROKE A7A4; C; A7A5; # LATIN CAPITAL LETTER N WITH OBLIQUE STROKE A7A6; C; A7A7; # LATIN CAPITAL LETTER R WITH OBLIQUE STROKE A7A8; C; A7A9; # LATIN CAPITAL LETTER S WITH OBLIQUE STROKE A7AA; C; 0266; # LATIN CAPITAL LETTER H WITH HOOK FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL FB03; F; 0066 0066 0069; # LATIN SMALL LIGATURE FFI FB04; F; 0066 0066 006C; # LATIN SMALL LIGATURE FFL FB05; F; 0073 0074; # LATIN SMALL LIGATURE LONG S T FB06; F; 0073 0074; # LATIN SMALL LIGATURE ST FB13; F; 0574 0576; # ARMENIAN SMALL LIGATURE MEN NOW FB14; F; 0574 0565; # ARMENIAN SMALL LIGATURE MEN ECH FB15; F; 0574 056B; # ARMENIAN SMALL LIGATURE MEN INI FB16; F; 057E 0576; # ARMENIAN SMALL LIGATURE VEW NOW FB17; F; 0574 056D; # ARMENIAN SMALL LIGATURE MEN XEH FF21; C; FF41; # FULLWIDTH LATIN CAPITAL LETTER A FF22; C; FF42; # FULLWIDTH LATIN CAPITAL LETTER B FF23; C; FF43; # FULLWIDTH LATIN CAPITAL LETTER C FF24; C; FF44; # FULLWIDTH LATIN CAPITAL LETTER D FF25; C; FF45; # FULLWIDTH LATIN CAPITAL LETTER E FF26; C; FF46; # FULLWIDTH LATIN CAPITAL LETTER F FF27; C; FF47; # FULLWIDTH LATIN CAPITAL LETTER G FF28; C; FF48; # FULLWIDTH LATIN CAPITAL LETTER H FF29; C; FF49; # FULLWIDTH LATIN CAPITAL LETTER I FF2A; C; FF4A; # FULLWIDTH LATIN CAPITAL LETTER J FF2B; C; FF4B; # FULLWIDTH LATIN CAPITAL LETTER K FF2C; C; FF4C; # FULLWIDTH LATIN CAPITAL LETTER L FF2D; C; FF4D; # FULLWIDTH LATIN CAPITAL LETTER M FF2E; C; FF4E; # FULLWIDTH LATIN CAPITAL LETTER N FF2F; C; FF4F; # FULLWIDTH LATIN CAPITAL LETTER O FF30; C; FF50; # FULLWIDTH LATIN CAPITAL LETTER P FF31; C; FF51; # FULLWIDTH LATIN CAPITAL LETTER Q FF32; C; FF52; # FULLWIDTH LATIN CAPITAL LETTER R FF33; C; FF53; # FULLWIDTH LATIN CAPITAL LETTER S FF34; C; FF54; # FULLWIDTH LATIN CAPITAL LETTER T FF35; C; FF55; # FULLWIDTH LATIN CAPITAL LETTER U FF36; C; FF56; # FULLWIDTH LATIN CAPITAL LETTER V FF37; C; FF57; # FULLWIDTH LATIN CAPITAL LETTER W FF38; C; FF58; # FULLWIDTH LATIN CAPITAL LETTER X FF39; C; FF59; # FULLWIDTH LATIN CAPITAL LETTER Y FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z 10400; C; 10428; # DESERET CAPITAL LETTER LONG I 10401; C; 10429; # DESERET CAPITAL LETTER LONG E 10402; C; 1042A; # DESERET CAPITAL LETTER LONG A 10403; C; 1042B; # DESERET CAPITAL LETTER LONG AH 10404; C; 1042C; # DESERET CAPITAL LETTER LONG O 10405; C; 1042D; # DESERET CAPITAL LETTER LONG OO 10406; C; 1042E; # DESERET CAPITAL LETTER SHORT I 10407; C; 1042F; # DESERET CAPITAL LETTER SHORT E 10408; C; 10430; # DESERET CAPITAL LETTER SHORT A 10409; C; 10431; # DESERET CAPITAL LETTER SHORT AH 1040A; C; 10432; # DESERET CAPITAL LETTER SHORT O 1040B; C; 10433; # DESERET CAPITAL LETTER SHORT OO 1040C; C; 10434; # DESERET CAPITAL LETTER AY 1040D; C; 10435; # DESERET CAPITAL LETTER OW 1040E; C; 10436; # DESERET CAPITAL LETTER WU 1040F; C; 10437; # DESERET CAPITAL LETTER YEE 10410; C; 10438; # DESERET CAPITAL LETTER H 10411; C; 10439; # DESERET CAPITAL LETTER PEE 10412; C; 1043A; # DESERET CAPITAL LETTER BEE 10413; C; 1043B; # DESERET CAPITAL LETTER TEE 10414; C; 1043C; # DESERET CAPITAL LETTER DEE 10415; C; 1043D; # DESERET CAPITAL LETTER CHEE 10416; C; 1043E; # DESERET CAPITAL LETTER JEE 10417; C; 1043F; # DESERET CAPITAL LETTER KAY 10418; C; 10440; # DESERET CAPITAL LETTER GAY 10419; C; 10441; # DESERET CAPITAL LETTER EF 1041A; C; 10442; # DESERET CAPITAL LETTER VEE 1041B; C; 10443; # DESERET CAPITAL LETTER ETH 1041C; C; 10444; # DESERET CAPITAL LETTER THEE 1041D; C; 10445; # DESERET CAPITAL LETTER ES 1041E; C; 10446; # DESERET CAPITAL LETTER ZEE 1041F; C; 10447; # DESERET CAPITAL LETTER ESH 10420; C; 10448; # DESERET CAPITAL LETTER ZHEE 10421; C; 10449; # DESERET CAPITAL LETTER ER 10422; C; 1044A; # DESERET CAPITAL LETTER EL 10423; C; 1044B; # DESERET CAPITAL LETTER EM 10424; C; 1044C; # DESERET CAPITAL LETTER EN 10425; C; 1044D; # DESERET CAPITAL LETTER ENG 10426; C; 1044E; # DESERET CAPITAL LETTER OI 10427; C; 1044F; # DESERET CAPITAL LETTER EW |
Added ext/fts3/unicode/UnicodeData.txt.
more than 10,000 changes
Added ext/fts3/unicode/mkunicode.tcl.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 | # Parameter $zName must be a path to the file UnicodeData.txt. This command # reads the file and returns a list of codepoints (integers). The list # contains all codepoints in the UnicodeData.txt assigned to any "General # Category" that is not a "Letter" or "Number". # proc an_load_unicodedata_text {zName} { set fd [open $zName] set lField { code character_name general_category canonical_combining_classes bidirectional_category character_decomposition_mapping decimal_digit_value digit_value numeric_value mirrored unicode_1_name iso10646_comment_field uppercase_mapping lowercase_mapping titlecase_mapping } set lRet [list] while { ![eof $fd] } { set line [gets $fd] if {$line == ""} continue set fields [split $line ";"] if {[llength $fields] != [llength $lField]} { error "parse error: $line" } foreach $lField $fields {} set iCode [expr "0x$code"] set bAlnum [expr {[lsearch {L N} [string range $general_category 0 0]]>=0}] if { !$bAlnum } { lappend lRet $iCode } } close $fd set lRet } proc an_load_separator_ranges {} { global unicodedata.txt set lSep [an_load_unicodedata_text ${unicodedata.txt}] unset -nocomplain iFirst unset -nocomplain nRange set lRange [list] foreach sep $lSep { if {0==[info exists iFirst]} { set iFirst $sep set nRange 1 } elseif { $sep == ($iFirst+$nRange) } { incr nRange } else { lappend lRange [list $iFirst $nRange] set iFirst $sep set nRange 1 } } lappend lRange [list $iFirst $nRange] set lRange } proc an_print_range_array {lRange} { set iFirstMax 0 set nRangeMax 0 foreach range $lRange { foreach {iFirst nRange} $range {} if {$iFirst > $iFirstMax} {set iFirstMax $iFirst} if {$nRange > $nRangeMax} {set nRangeMax $nRange} } if {$iFirstMax >= (1<<22)} {error "first-max is too large for format"} if {$nRangeMax >= (1<<10)} {error "range-max is too large for format"} puts -nonewline " " puts [string trim { /* Each unsigned integer in the following array corresponds to a contiguous ** range of unicode codepoints that are not either letters or numbers (i.e. ** codepoints for which this function should return 0). ** ** The most significant 22 bits in each 32-bit value contain the first ** codepoint in the range. The least significant 10 bits are used to store ** the size of the range (always at least 1). In other words, the value ** ((C<<22) + N) represents a range of N codepoints starting with codepoint ** C. It is not possible to represent a range larger than 1023 codepoints ** using this format. */ }] puts -nonewline " const static unsigned int aEntry\[\] = \{" set i 0 foreach range $lRange { foreach {iFirst nRange} $range {} set u32 [format "0x%08X" [expr ($iFirst<<10) + $nRange]] if {($i % 5)==0} {puts "" ; puts -nonewline " "} puts -nonewline " $u32," incr i } puts "" puts " \};" } proc an_print_ascii_bitmap {lRange} { foreach range $lRange { foreach {iFirst nRange} $range {} for {set i $iFirst} {$i < ($iFirst+$nRange)} {incr i} { if {$i<=127} { set a($i) 1 } } } set aAscii [list 0 0 0 0] foreach key [array names a] { set idx [expr $key >> 5] lset aAscii $idx [expr [lindex $aAscii $idx] | (1 << ($key&0x001F))] } puts " static const unsigned int aAscii\[4\] = \{" puts -nonewline " " foreach v $aAscii { puts -nonewline [format " 0x%08X," $v] } puts "" puts " \};" } proc print_isalnum {zFunc lRange} { puts "/*" puts "** Return true if the argument corresponds to a unicode codepoint" puts "** classified as either a letter or a number. Otherwise false." puts "**" puts "** The results are undefined if the value passed to this function" puts "** is less than zero." puts "*/" puts "int ${zFunc}\(int c)\{" an_print_range_array $lRange an_print_ascii_bitmap $lRange puts { if( c<128 ){ return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 ); }else if( c<(1<<22) ){ unsigned int key = (((unsigned int)c)<<10) | 0x000003FF; int iRes; int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; int iLo = 0; while( iHi>=iLo ){ int iTest = (iHi + iLo) / 2; if( key >= aEntry[iTest] ){ iRes = iTest; iLo = iTest+1; }else{ iHi = iTest-1; } } assert( aEntry[0]<key ); assert( key>=aEntry[iRes] ); return (c >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF))); } return 1;} puts "\}" } proc print_test_isalnum {zFunc lRange} { foreach range $lRange { foreach {iFirst nRange} $range {} for {set i $iFirst} {$i < ($iFirst+$nRange)} {incr i} { set a($i) 1 } } puts "static int isalnum_test(int *piCode)\{" puts -nonewline " unsigned char aAlnum\[\] = \{" for {set i 0} {$i < 70000} {incr i} { if {($i % 32)==0} { puts "" ; puts -nonewline " " } set bFlag [expr ![info exists a($i)]] puts -nonewline "${bFlag}," } puts "" puts " \};" puts -nonewline " int aLargeSep\[\] = \{" set i 0 foreach iSep [lsort -integer [array names a]] { if {$iSep<70000} continue if {($i % 8)==0} { puts "" ; puts -nonewline " " } puts -nonewline " $iSep," incr i } puts "" puts " \};" puts -nonewline " int aLargeOther\[\] = \{" set i 0 foreach iSep [lsort -integer [array names a]] { if {$iSep<70000} continue if {[info exists a([expr $iSep-1])]==0} { if {($i % 8)==0} { puts "" ; puts -nonewline " " } puts -nonewline " [expr $iSep-1]," incr i } if {[info exists a([expr $iSep+1])]==0} { if {($i % 8)==0} { puts "" ; puts -nonewline " " } puts -nonewline " [expr $iSep+1]," incr i } } puts "" puts " \};" puts [subst -nocommands { int i; for(i=0; i<sizeof(aAlnum)/sizeof(aAlnum[0]); i++){ if( ${zFunc}(i)!=aAlnum[i] ){ *piCode = i; return 1; } } for(i=0; i<sizeof(aLargeSep)/sizeof(aLargeSep[0]); i++){ if( ${zFunc}(aLargeSep[i])!=0 ){ *piCode = aLargeSep[i]; return 1; } } for(i=0; i<sizeof(aLargeOther)/sizeof(aLargeOther[0]); i++){ if( ${zFunc}(aLargeOther[i])!=1 ){ *piCode = aLargeOther[i]; return 1; } } }] puts " return 0;" puts "\}" } #------------------------------------------------------------------------- proc tl_load_casefolding_txt {zName} { global tl_lookup_table set fd [open $zName] while { ![eof $fd] } { set line [gets $fd] if {[string range $line 0 0] == "#"} continue if {$line == ""} continue foreach x {a b c d} {unset -nocomplain $x} foreach {a b c d} [split $line ";"] {} set a2 [list] set c2 [list] foreach elem $a { lappend a2 [expr "0x[string trim $elem]"] } foreach elem $c { lappend c2 [expr "0x[string trim $elem]"] } set b [string trim $b] set d [string trim $d] if {$b=="C" || $b=="S"} { set tl_lookup_table($a2) $c2 } } } proc tl_create_records {} { global tl_lookup_table set iFirst "" set nOff 0 set nRange 0 set nIncr 0 set lRecord [list] foreach code [lsort -integer [array names tl_lookup_table]] { set mapping $tl_lookup_table($code) if {$iFirst == ""} { set iFirst $code set nOff [expr $mapping - $code] set nRange 1 set nIncr 1 } else { set diff [expr $code - ($iFirst + ($nIncr * ($nRange - 1)))] if { $nRange==1 && ($diff==1 || $diff==2) } { set nIncr $diff } if {$diff != $nIncr || ($mapping - $code)!=$nOff} { if { $nRange==1 } {set nIncr 1} lappend lRecord [list $iFirst $nIncr $nRange $nOff] set iFirst $code set nOff [expr $mapping - $code] set nRange 1 set nIncr 1 } else { incr nRange } } } lappend lRecord [list $iFirst $nIncr $nRange $nOff] set lRecord } proc tl_print_table_header {} { puts -nonewline " " puts [string trim { /* Each entry in the following array defines a rule for folding a range ** of codepoints to lower case. The rule applies to a range of nRange ** codepoints starting at codepoint iCode. ** ** If the least significant bit in flags is clear, then the rule applies ** to all nRange codepoints (i.e. all nRange codepoints are upper case and ** need to be folded). Or, if it is set, then the rule only applies to ** every second codepoint in the range, starting with codepoint C. ** ** The 7 most significant bits in flags are an index into the aiOff[] ** array. If a specific codepoint C does require folding, then its lower ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF). ** ** The contents of this array are generated by parsing the CaseFolding.txt ** file distributed as part of the "Unicode Character Database". See ** http://www.unicode.org for details. */ }] puts " static const struct TableEntry \{" puts " unsigned short iCode;" puts " unsigned char flags;" puts " unsigned char nRange;" puts " \} aEntry\[\] = \{" } proc tl_print_table_entry {togglevar entry liOff} { upvar $togglevar t foreach {iFirst nIncr nRange nOff} $entry {} if {$iFirst > (1<<16)} { return 1 } if {[info exists t]==0} {set t 0} if {$t==0} { puts -nonewline " " } set flags 0 if {$nIncr==2} { set flags 1 ; set nRange [expr $nRange * 2]} if {$nOff<0} { incr nOff [expr (1<<16)] } set idx [lsearch $liOff $nOff] if {$idx<0} {error "malfunction generating aiOff"} set flags [expr $flags + $idx*2] set txt "{$iFirst, $flags, $nRange}," if {$t==2} { puts $txt } else { puts -nonewline [format "% -23s" $txt] } set t [expr ($t+1)%3] return 0 } proc tl_print_table_footer {togglevar} { upvar $togglevar t if {$t!=0} {puts ""} puts " \};" } proc tl_print_if_entry {entry} { foreach {iFirst nIncr nRange nOff} $entry {} if {$nIncr==2} {error "tl_print_if_entry needs improvement!"} puts " else if( c>=$iFirst && c<[expr $iFirst+$nRange] )\{" puts " ret = c + $nOff;" puts " \}" } proc tl_generate_ioff_table {lRecord} { foreach entry $lRecord { foreach {iFirst nIncr nRange iOff} $entry {} if {$iOff<0} { incr iOff [expr (1<<16)] } if {[info exists a($iOff)]} continue set a($iOff) 1 } set liOff [lsort -integer [array names a]] if {[llength $liOff]>128} { error "Too many distinct ioffs" } return $liOff } proc tl_print_ioff_table {liOff} { puts -nonewline " static const unsigned short aiOff\[\] = \{" set i 0 foreach off $liOff { if {($i % 8)==0} {puts "" ; puts -nonewline " "} puts -nonewline [format "% -7s" "$off,"] incr i } puts "" puts " \};" } proc print_tolower {zFunc} { set lRecord [tl_create_records] set lHigh [list] puts "/*" puts "** Interpret the argument as a unicode codepoint. If the codepoint" puts "** is an upper case character that has a lower case equivalent," puts "** return the codepoint corresponding to the lower case version." puts "** Otherwise, return a copy of the argument." puts "**" puts "** The results are undefined if the value passed to this function" puts "** is less than zero." puts "*/" puts "int ${zFunc}\(int c)\{" set liOff [tl_generate_ioff_table $lRecord] tl_print_table_header foreach entry $lRecord { if {[tl_print_table_entry toggle $entry $liOff]} { lappend lHigh $entry } } tl_print_table_footer toggle tl_print_ioff_table $liOff puts { int ret = c; assert( c>=0 ); assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 ); if( c<128 ){ if( c>='A' && c<='Z' ) ret = c + ('a' - 'A'); }else if( c<65536 ){ int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; int iLo = 0; int iRes = -1; while( iHi>=iLo ){ int iTest = (iHi + iLo) / 2; int cmp = (c - aEntry[iTest].iCode); if( cmp>=0 ){ iRes = iTest; iLo = iTest+1; }else{ iHi = iTest-1; } } assert( iRes<0 || c>=aEntry[iRes].iCode ); if( iRes>=0 ){ const struct TableEntry *p = &aEntry[iRes]; if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; assert( ret>0 ); } } } } foreach entry $lHigh { tl_print_if_entry $entry } puts "" puts " return ret;" puts "\}" } proc print_tolower_test {zFunc} { global tl_lookup_table puts "static int tolower_test(int *piCode)\{" puts -nonewline " static int aLookup\[\] = \{" for {set i 0} {$i < 70000} {incr i} { set expected $i catch { set expected $tl_lookup_table($i) } if {($i % 8)==0} { puts "" ; puts -nonewline " " } puts -nonewline "$expected, " } puts " \};" puts " int i;" puts " for(i=0; i<sizeof(aLookup)/sizeof(aLookup\[0\]); i++)\{" puts " if( ${zFunc}\(i)!=aLookup\[i\] )\{" puts " *piCode = i;" puts " return 1;" puts " \}" puts " \}" puts " return 0;" puts "\}" } proc print_fileheader {} { puts [string trim { /* ** 2012 May 25 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** */ /* ** DO NOT EDIT THIS MACHINE GENERATED FILE. */ }] puts "" puts "#ifndef SQLITE_DISABLE_FTS3_UNICODE" puts "" puts "#include <assert.h>" puts "" } proc print_test_main {} { puts "" puts "#include <stdio.h>" puts "" puts "int main(int argc, char **argv)\{" puts " int r1, r2;" puts " int code;" puts " r1 = isalnum_test(&code);" puts " if( r1 ) printf(\"isalnum(): Problem with code %d\\n\",code);" puts " else printf(\"isalnum(): test passed\\n\");" puts " r2 = tolower_test(&code);" puts " if( r2 ) printf(\"tolower(): Problem with code %d\\n\",code);" puts " else printf(\"tolower(): test passed\\n\");" puts " return (r1 || r2);" puts "\}" } # Proces the command line arguments. Exit early if they are not to # our liking. # proc usage {} { puts -nonewline stderr "Usage: $::argv0 ?-test? i" puts stderr "<CaseFolding.txt file> <UnicodeData.txt file>" exit 1 } if {[llength $argv]!=2 && [llength $argv]!=3} usage if {[llength $argv]==3 && [lindex $argv 0]!="-test"} usage set unicodedata.txt [lindex $argv end] set casefolding.txt [lindex $argv end-1] set generate_test_code [expr {[llength $argv]==3}] # Print the isalnum() function to stdout. # print_fileheader set lRange [an_load_separator_ranges] print_isalnum sqlite3FtsUnicodeIsalnum $lRange # Leave a gap between the two generated C functions. # puts "" puts "" # Print the tolower() function to stdout. # tl_load_casefolding_txt ${casefolding.txt} print_tolower sqlite3FtsUnicodeTolower # Print the test routines and main() function to stdout, if -test # was specified. # if {$::generate_test_code} { print_test_isalnum sqlite3FtsUnicodeIsalnum $lRange print_tolower_test sqlite3FtsUnicodeTolower print_test_main } puts "#endif /* ifndef SQLITE_DISABLE_FTS3_UNICODE */" |
Changes to main.mk.
︙ | ︙ | |||
51 52 53 54 55 56 57 58 59 60 61 62 63 64 | # Object files for the SQLite library. # LIBOBJ+= alter.o analyze.o attach.o auth.o \ backup.o bitvec.o btmutex.o btree.o build.o \ callback.o complete.o ctime.o date.o delete.o expr.o fault.o fkey.o \ fts3.o fts3_aux.o fts3_expr.o fts3_hash.o fts3_icu.o fts3_porter.o \ fts3_snippet.o fts3_tokenizer.o fts3_tokenizer1.o \ fts3_write.o func.o global.o hash.o \ icu.o insert.o journal.o legacy.o loadext.o \ main.o malloc.o mem0.o mem1.o mem2.o mem3.o mem5.o \ memjournal.o \ mutex.o mutex_noop.o mutex_os2.o mutex_unix.o mutex_w32.o \ notify.o opcodes.o os.o os_os2.o os_unix.o os_win.o \ pager.o parse.o pcache.o pcache1.o pragma.o prepare.o printf.o \ | > | 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 | # Object files for the SQLite library. # LIBOBJ+= alter.o analyze.o attach.o auth.o \ backup.o bitvec.o btmutex.o btree.o build.o \ callback.o complete.o ctime.o date.o delete.o expr.o fault.o fkey.o \ fts3.o fts3_aux.o fts3_expr.o fts3_hash.o fts3_icu.o fts3_porter.o \ fts3_snippet.o fts3_tokenizer.o fts3_tokenizer1.o \ fts3_unicode.o fts3_unicode2.o \ fts3_write.o func.o global.o hash.o \ icu.o insert.o journal.o legacy.o loadext.o \ main.o malloc.o mem0.o mem1.o mem2.o mem3.o mem5.o \ memjournal.o \ mutex.o mutex_noop.o mutex_os2.o mutex_unix.o mutex_w32.o \ notify.o opcodes.o os.o os_os2.o os_unix.o os_win.o \ pager.o parse.o pcache.o pcache1.o pragma.o prepare.o printf.o \ |
︙ | ︙ | |||
194 195 196 197 198 199 200 201 202 203 204 205 206 207 | $(TOP)/ext/fts3/fts3_hash.h \ $(TOP)/ext/fts3/fts3_icu.c \ $(TOP)/ext/fts3/fts3_porter.c \ $(TOP)/ext/fts3/fts3_snippet.c \ $(TOP)/ext/fts3/fts3_tokenizer.h \ $(TOP)/ext/fts3/fts3_tokenizer.c \ $(TOP)/ext/fts3/fts3_tokenizer1.c \ $(TOP)/ext/fts3/fts3_write.c SRC += \ $(TOP)/ext/icu/sqliteicu.h \ $(TOP)/ext/icu/icu.c SRC += \ $(TOP)/ext/rtree/rtree.h \ $(TOP)/ext/rtree/rtree.c | > > | 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 | $(TOP)/ext/fts3/fts3_hash.h \ $(TOP)/ext/fts3/fts3_icu.c \ $(TOP)/ext/fts3/fts3_porter.c \ $(TOP)/ext/fts3/fts3_snippet.c \ $(TOP)/ext/fts3/fts3_tokenizer.h \ $(TOP)/ext/fts3/fts3_tokenizer.c \ $(TOP)/ext/fts3/fts3_tokenizer1.c \ $(TOP)/ext/fts3/fts3_unicode.c \ $(TOP)/ext/fts3/fts3_unicode2.c \ $(TOP)/ext/fts3/fts3_write.c SRC += \ $(TOP)/ext/icu/sqliteicu.h \ $(TOP)/ext/icu/icu.c SRC += \ $(TOP)/ext/rtree/rtree.h \ $(TOP)/ext/rtree/rtree.c |
︙ | ︙ | |||
503 504 505 506 507 508 509 510 511 512 513 514 515 516 | $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_porter.c fts3_tokenizer.o: $(TOP)/ext/fts3/fts3_tokenizer.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_tokenizer.c fts3_tokenizer1.o: $(TOP)/ext/fts3/fts3_tokenizer1.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_tokenizer1.c fts3_write.o: $(TOP)/ext/fts3/fts3_write.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_write.c rtree.o: $(TOP)/ext/rtree/rtree.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/rtree/rtree.c | > > > > > > | 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 | $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_porter.c fts3_tokenizer.o: $(TOP)/ext/fts3/fts3_tokenizer.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_tokenizer.c fts3_tokenizer1.o: $(TOP)/ext/fts3/fts3_tokenizer1.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_tokenizer1.c fts3_unicode.o: $(TOP)/ext/fts3/fts3_unicode.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_unicode.c fts3_unicode2.o: $(TOP)/ext/fts3/fts3_unicode2.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_unicode2.c fts3_write.o: $(TOP)/ext/fts3/fts3_write.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_write.c rtree.o: $(TOP)/ext/rtree/rtree.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/rtree/rtree.c |
︙ | ︙ |
Changes to src/test_config.c.
︙ | ︙ | |||
302 303 304 305 306 307 308 309 310 311 312 313 314 315 | #endif #ifdef SQLITE_ENABLE_FTS3 Tcl_SetVar2(interp, "sqlite_options", "fts3", "1", TCL_GLOBAL_ONLY); #else Tcl_SetVar2(interp, "sqlite_options", "fts3", "0", TCL_GLOBAL_ONLY); #endif #ifdef SQLITE_OMIT_GET_TABLE Tcl_SetVar2(interp, "sqlite_options", "gettable", "0", TCL_GLOBAL_ONLY); #else Tcl_SetVar2(interp, "sqlite_options", "gettable", "1", TCL_GLOBAL_ONLY); #endif | > > > > > > | 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 | #endif #ifdef SQLITE_ENABLE_FTS3 Tcl_SetVar2(interp, "sqlite_options", "fts3", "1", TCL_GLOBAL_ONLY); #else Tcl_SetVar2(interp, "sqlite_options", "fts3", "0", TCL_GLOBAL_ONLY); #endif #if !defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_DISABLE_FTS3_UNICODE) Tcl_SetVar2(interp, "sqlite_options", "fts3_unicode", "0", TCL_GLOBAL_ONLY); #else Tcl_SetVar2(interp, "sqlite_options", "fts3_unicode", "1", TCL_GLOBAL_ONLY); #endif #ifdef SQLITE_OMIT_GET_TABLE Tcl_SetVar2(interp, "sqlite_options", "gettable", "0", TCL_GLOBAL_ONLY); #else Tcl_SetVar2(interp, "sqlite_options", "gettable", "1", TCL_GLOBAL_ONLY); #endif |
︙ | ︙ |
Changes to test/fts3fault2.test.
︙ | ︙ | |||
126 127 128 129 130 131 132 133 134 | faultsim_restore_and_reopen db eval {SELECT * FROM sqlite_master} } -body { execsql { INSERT INTO ft(ft) VALUES('rebuild') } } -test { faultsim_test_result {0 {}} } finish_test | > > > > > > > > > > > > > > > > > > > > > > > > | 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 | faultsim_restore_and_reopen db eval {SELECT * FROM sqlite_master} } -body { execsql { INSERT INTO ft(ft) VALUES('rebuild') } } -test { faultsim_test_result {0 {}} } ifcapable fts3_unicode { do_test 5.0 { faultsim_delete_and_reopen execsql { CREATE VIRTUAL TABLE ft USING fts4(a, tokenize=unicode61); } faultsim_save_and_close } {} do_faultsim_test 5.1 -faults oom* -prep { faultsim_restore_and_reopen db eval {SELECT * FROM sqlite_master} } -body { execsql { INSERT INTO ft VALUES('the quick brown fox'); } execsql { INSERT INTO ft VALUES( 'theunusuallylongtokenthatjustdragsonandonandonandthendragsonsomemoreeof' ); } execsql { SELECT docid FROM ft WHERE ft MATCH 'th*' } } -test { faultsim_test_result {0 {1 2}} } } finish_test |
Added test/fts4unicode.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 | # 2012 May 25 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # # The tests in this file focus on testing the "unicode" FTS tokenizer. # set testdir [file dirname $argv0] source $testdir/tester.tcl ifcapable !fts3_unicode { finish_test ; return } set ::testprefix fts4unicode proc do_unicode_token_test {tn input res} { set input [string map {' ''} $input] uplevel [list do_execsql_test $tn " SELECT fts3_tokenizer_test('unicode61', '$input'); " [list [list {*}$res]]] } do_unicode_token_test 1.0 {a B c D} {0 a a 1 b B 2 c c 3 d D} do_unicode_token_test 1.1 {Ä Ö Ü} {0 ä Ä 1 ö Ö 2 ü Ü} do_unicode_token_test 1.2 {xÄx xÖx xÜx} {0 xäx xÄx 1 xöx xÖx 2 xüx xÜx} # 0x00DF is a small "sharp s". 0x1E9E is a capital sharp s. do_unicode_token_test 1.3 "\uDF" "0 \uDF \uDF" do_unicode_token_test 1.4 "\u1E9E" "0 ß \u1E9E" do_unicode_token_test 1.5 "\u1E9E" "0 \uDF \u1E9E" do_unicode_token_test 1.6 "The quick brown fox" { 0 the The 1 quick quick 2 brown brown 3 fox fox } do_unicode_token_test 1.7 "The\u00bfquick\u224ebrown\u2263fox" { 0 the The 1 quick quick 2 brown brown 3 fox fox } #------------------------------------------------------------------------- # set docs [list { Enhance the INSERT syntax to allow multiple rows to be inserted via the VALUES clause. } { Enhance the CREATE VIRTUAL TABLE command to support the IF NOT EXISTS clause. } { Added the sqlite3_stricmp() interface as a counterpart to sqlite3_strnicmp(). } { Added the sqlite3_db_readonly() interface. } { Added the SQLITE_FCNTL_PRAGMA file control, giving VFS implementations the ability to add new PRAGMA statements or to override built-in PRAGMAs. } { Queries of the form: "SELECT max(x), y FROM table" returns the value of y on the same row that contains the maximum x value. } { Added support for the FTS4 languageid option. } { Documented support for the FTS4 content option. This feature has actually been in the code since version 3.7.9 but is only now considered to be officially supported. } { Pending statements no longer block ROLLBACK. Instead, the pending statement will return SQLITE_ABORT upon next access after the ROLLBACK. } { Improvements to the handling of CSV inputs in the command-line shell } { Fix a bug introduced in version 3.7.10 that might cause a LEFT JOIN to be incorrectly converted into an INNER JOIN if the WHERE clause indexable terms connected by OR. }] set map(a) [list "\u00C4" "\u00E4"] ; # LATIN LETTER A WITH DIAERESIS set map(e) [list "\u00CB" "\u00EB"] ; # LATIN LETTER E WITH DIAERESIS set map(i) [list "\u00CF" "\u00EF"] ; # LATIN LETTER I WITH DIAERESIS set map(o) [list "\u00D6" "\u00F6"] ; # LATIN LETTER O WITH DIAERESIS set map(u) [list "\u00DC" "\u00FC"] ; # LATIN LETTER U WITH DIAERESIS set map(y) [list "\u0178" "\u00FF"] ; # LATIN LETTER Y WITH DIAERESIS set map(h) [list "\u1E26" "\u1E27"] ; # LATIN LETTER H WITH DIAERESIS set map(w) [list "\u1E84" "\u1E85"] ; # LATIN LETTER W WITH DIAERESIS set map(x) [list "\u1E8C" "\u1E8D"] ; # LATIN LETTER X WITH DIAERESIS foreach k [array names map] { lappend mappings [string toupper $k] [lindex $map($k) 0] lappend mappings $k [lindex $map($k) 1] } proc mapdoc {doc} { set doc [regsub -all {[[:space:]]+} $doc " "] string map $::mappings [string trim $doc] } do_test 2.0 { execsql { CREATE VIRTUAL TABLE t2 USING fts4(tokenize=unicode61, x); } foreach doc $docs { set d [mapdoc $doc] execsql { INSERT INTO t2 VALUES($d) } } } {} do_test 2.1 { set q [mapdoc "row"] execsql { SELECT * FROM t2 WHERE t2 MATCH $q } } [list [mapdoc { Queries of the form: "SELECT max(x), y FROM table" returns the value of y on the same row that contains the maximum x value. }]] foreach {tn query snippet} { 2 "row" { ...returns the value of y on the same [row] that contains the maximum x value. } 3 "ROW" { ...returns the value of y on the same [row] that contains the maximum x value. } 4 "rollback" { ...[ROLLBACK]. Instead, the pending statement will return SQLITE_ABORT upon next access after the [ROLLBACK]. } 5 "rOllback" { ...[ROLLBACK]. Instead, the pending statement will return SQLITE_ABORT upon next access after the [ROLLBACK]. } 6 "lang*" { Added support for the FTS4 [languageid] option. } } { do_test 2.$tn { set q [mapdoc $query] execsql { SELECT snippet(t2, '[', ']', '...') FROM t2 WHERE t2 MATCH $q } } [list [mapdoc $snippet]] } #------------------------------------------------------------------------- # Make sure the unicode61 tokenizer does not crash if it is passed a # NULL pointer. reset_db do_execsql_test 3.1 { CREATE VIRTUAL TABLE t1 USING fts4(tokenize=unicode61, x, y); INSERT INTO t1 VALUES(NULL, 'a b c'); } do_execsql_test 3.2 { SELECT snippet(t1, '[', ']') FROM t1 WHERE t1 MATCH 'b' } {{a [b] c}} do_execsql_test 3.3 { BEGIN; DELETE FROM t1; INSERT INTO t1 VALUES('b b b b b b b b b b b', 'b b b b b b b b b b b b b'); INSERT INTO t1 SELECT * FROM t1; INSERT INTO t1 SELECT * FROM t1; INSERT INTO t1 SELECT * FROM t1; INSERT INTO t1 SELECT * FROM t1; INSERT INTO t1 SELECT * FROM t1; INSERT INTO t1 SELECT * FROM t1; INSERT INTO t1 SELECT * FROM t1; INSERT INTO t1 SELECT * FROM t1; INSERT INTO t1 SELECT * FROM t1; INSERT INTO t1 SELECT * FROM t1; INSERT INTO t1 SELECT * FROM t1; INSERT INTO t1 SELECT * FROM t1; INSERT INTO t1 SELECT * FROM t1; INSERT INTO t1 SELECT * FROM t1; INSERT INTO t1 SELECT * FROM t1; INSERT INTO t1 SELECT * FROM t1; INSERT INTO t1 VALUES('a b c', NULL); INSERT INTO t1 VALUES('a x c', NULL); COMMIT; } do_execsql_test 3.4 { SELECT * FROM t1 WHERE t1 MATCH 'a b'; } {{a b c} {}} #------------------------------------------------------------------------- # reset_db do_test 4.1 { set a "abc\uFFFEdef" set b "abc\uD800def" set c "\uFFFEdef" set d "\uD800def" execsql { CREATE VIRTUAL TABLE t1 USING fts4(tokenize=unicode61, x); INSERT INTO t1 VALUES($a); INSERT INTO t1 VALUES($b); INSERT INTO t1 VALUES($c); INSERT INTO t1 VALUES($d); } } {} do_test 4.2 { set a [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0x62}] set b [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0x62}] set c [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}] set d [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}] execsql { INSERT INTO t1 VALUES($a); INSERT INTO t1 VALUES($b); INSERT INTO t1 VALUES($c); INSERT INTO t1 VALUES($d); } } {} do_test 4.3 { set a [binary format c* {0xF7 0xBF 0xBF 0xBF}] set b [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF}] set c [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF 0xBF}] set d [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF}] execsql { INSERT INTO t1 VALUES($a); INSERT INTO t1 VALUES($b); INSERT INTO t1 VALUES($c); INSERT INTO t1 VALUES($d); } } {} finish_test |
Changes to test/permutations.test.
︙ | ︙ | |||
181 182 183 184 185 186 187 | fts3near.test fts3query.test fts3shared.test fts3snippet.test fts3sort.test fts3fault.test fts3malloc.test fts3matchinfo.test fts3aux1.test fts3comp1.test fts3auto.test fts4aa.test fts4content.test fts3conf.test fts3prefix.test fts3fault2.test fts3corrupt.test fts3corrupt2.test fts3first.test fts4langid.test fts4merge.test | | | 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 | fts3near.test fts3query.test fts3shared.test fts3snippet.test fts3sort.test fts3fault.test fts3malloc.test fts3matchinfo.test fts3aux1.test fts3comp1.test fts3auto.test fts4aa.test fts4content.test fts3conf.test fts3prefix.test fts3fault2.test fts3corrupt.test fts3corrupt2.test fts3first.test fts4langid.test fts4merge.test fts4check.test fts4unicode.test } lappend ::testsuitelist xxx #------------------------------------------------------------------------- # Define the coverage related test suites: # |
︙ | ︙ |
Changes to tool/mksqlite3c.tcl.
︙ | ︙ | |||
312 313 314 315 316 317 318 319 320 321 322 323 324 325 | fts3_expr.c fts3_hash.c fts3_porter.c fts3_tokenizer.c fts3_tokenizer1.c fts3_write.c fts3_snippet.c rtree.c icu.c fts3_icu.c } { copy_file tsrc/$file } | > > | 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 | fts3_expr.c fts3_hash.c fts3_porter.c fts3_tokenizer.c fts3_tokenizer1.c fts3_write.c fts3_snippet.c fts3_unicode.c fts3_unicode2.c rtree.c icu.c fts3_icu.c } { copy_file tsrc/$file } |
︙ | ︙ |