/ Check-in [7adfa4a5]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix the spellfix1_scriptcode() function to ignore whitespace and punctuation, and to recognize hebrew and arabic scripts.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:7adfa4a5794e47f97491c08abeaaac90e826b331
User & Date: drh 2015-12-17 14:18:21
Context
2015-12-18
16:29
Micro-optimizations and comment fixes on the mem5.c memory allocator module. check-in: 8bf5e056 user: drh tags: trunk
03:59
Reduce the size of the CellInfo object from 32 to 24 bytes on 64-bit machines. Closed-Leaf check-in: 78507154 user: drh tags: optimize-cellinfo
2015-12-17
20:36
Add the "offsets=0" option to fts5, to create a smaller index without term offset information. A few things are currently broken on this branch. check-in: 40b5bbf0 user: dan tags: fts5-offsets
17:30
Reduce the size of the VdbeCursor object by a pointer (the pBt pointer used for ephemeral tables). check-in: 98b710c3 user: drh tags: optimize-vdbecursor
14:18
Fix the spellfix1_scriptcode() function to ignore whitespace and punctuation, and to recognize hebrew and arabic scripts. check-in: 7adfa4a5 user: drh tags: trunk
13:28
Fixes for harmless compiler warnings. check-in: 85ebd46c user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/misc/spellfix.c.

  1713   1713     int nIn = sqlite3_value_bytes(argv[0]);
  1714   1714     int c, sz;
  1715   1715     int scriptMask = 0;
  1716   1716     int res;
  1717   1717   # define SCRIPT_LATIN       0x0001
  1718   1718   # define SCRIPT_CYRILLIC    0x0002
  1719   1719   # define SCRIPT_GREEK       0x0004
         1720  +# define SCRIPT_HEBREW      0x0008
         1721  +# define SCRIPT_ARABIC      0x0010
  1720   1722   
  1721   1723     while( nIn>0 ){
  1722   1724       c = utf8Read(zIn, nIn, &sz);
  1723   1725       zIn += sz;
  1724   1726       nIn -= sz;
  1725         -    if( c<0x02af ){
         1727  +    if( c<0x02af && (c>=0x80 || midClass[c&0x7f]<CCLASS_DIGIT) ){
  1726   1728         scriptMask |= SCRIPT_LATIN;
  1727   1729       }else if( c>=0x0400 && c<=0x04ff ){
  1728   1730         scriptMask |= SCRIPT_CYRILLIC;
  1729   1731       }else if( c>=0x0386 && c<=0x03ce ){
  1730   1732         scriptMask |= SCRIPT_GREEK;
         1733  +    }else if( c>=0x0590 && c<=0x05ff ){
         1734  +      scriptMask |= SCRIPT_HEBREW;
         1735  +    }else if( c>=0x0600 && c<=0x06ff ){
         1736  +      scriptMask |= SCRIPT_ARABIC;
  1731   1737       }
  1732   1738     }
  1733   1739     switch( scriptMask ){
  1734   1740       case 0:                res = 999; break;
  1735   1741       case SCRIPT_LATIN:     res = 215; break;
  1736   1742       case SCRIPT_CYRILLIC:  res = 220; break;
  1737   1743       case SCRIPT_GREEK:     res = 200; break;
         1744  +    case SCRIPT_HEBREW:    res = 125; break;
         1745  +    case SCRIPT_ARABIC:    res = 160; break;
  1738   1746       default:               res = 998; break;
  1739   1747     }
  1740   1748     sqlite3_result_int(context, res);
  1741   1749   }
  1742   1750   
  1743   1751   /* End transliterate
  1744   1752   ******************************************************************************

Added test/spellfix3.test.

            1  +# 2015-12-17
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#***********************************************************************
           11  +#
           12  +
           13  +set testdir [file dirname $argv0]
           14  +source $testdir/tester.tcl
           15  +set testprefix spellfix3
           16  +
           17  +ifcapable !vtab { finish_test ; return }
           18  +
           19  +load_static_extension db spellfix
           20  +
           21  +do_execsql_test 100 {
           22  +  SELECT spellfix1_scriptcode('And God said, “Let there be light”');
           23  +} {215}
           24  +do_execsql_test 110 {
           25  +  SELECT spellfix1_scriptcode('Бог сказал: "Да будет свет"');
           26  +} {220}
           27  +do_execsql_test 120 {
           28  +  SELECT spellfix1_scriptcode('και ειπεν ο θεος γενηθητω φως και εγενετο φως');
           29  +} {200}
           30  +do_execsql_test 130 {
           31  +  SELECT spellfix1_scriptcode('וַיֹּ֥אמֶר אֱלֹהִ֖ים יְהִ֣י א֑וֹר וַֽיְהִי־אֽוֹר׃');
           32  +} {125}
           33  +do_execsql_test 140 {
           34  +  SELECT spellfix1_scriptcode('فِي ذَلِكَ الوَقتِ، قالَ اللهُ: لِيَكُنْ نُورٌ. فَصَارَ نُورٌ.');
           35  +} {160}
           36  +do_execsql_test 200 {
           37  +  SELECT spellfix1_scriptcode('+3.14159');
           38  +} {999}
           39  +do_execsql_test 210 {
           40  +  SELECT spellfix1_scriptcode('And God said: "Да будет свет"');
           41  +} {998}
           42  +
           43  +finish_test