SQLite

Check-in [7adfa4a579]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix the spellfix1_scriptcode() function to ignore whitespace and punctuation, and to recognize hebrew and arabic scripts.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 7adfa4a5794e47f97491c08abeaaac90e826b331
User & Date: drh 2015-12-17 14:18:21.904
Context
2015-12-18
16:29
Micro-optimizations and comment fixes on the mem5.c memory allocator module. (check-in: 8bf5e056eb user: drh tags: trunk)
03:59
Reduce the size of the CellInfo object from 32 to 24 bytes on 64-bit machines. (Closed-Leaf check-in: 7850715406 user: drh tags: optimize-cellinfo)
2015-12-17
20:36
Add the "offsets=0" option to fts5, to create a smaller index without term offset information. A few things are currently broken on this branch. (check-in: 40b5bbf02a user: dan tags: fts5-offsets)
17:30
Reduce the size of the VdbeCursor object by a pointer (the pBt pointer used for ephemeral tables). (check-in: 98b710c363 user: drh tags: optimize-vdbecursor)
14:18
Fix the spellfix1_scriptcode() function to ignore whitespace and punctuation, and to recognize hebrew and arabic scripts. (check-in: 7adfa4a579 user: drh tags: trunk)
13:28
Fixes for harmless compiler warnings. (check-in: 85ebd46c70 user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/misc/spellfix.c.
1713
1714
1715
1716
1717
1718
1719


1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730




1731
1732
1733
1734
1735
1736
1737


1738
1739
1740
1741
1742
1743
1744
  int nIn = sqlite3_value_bytes(argv[0]);
  int c, sz;
  int scriptMask = 0;
  int res;
# define SCRIPT_LATIN       0x0001
# define SCRIPT_CYRILLIC    0x0002
# define SCRIPT_GREEK       0x0004



  while( nIn>0 ){
    c = utf8Read(zIn, nIn, &sz);
    zIn += sz;
    nIn -= sz;
    if( c<0x02af ){
      scriptMask |= SCRIPT_LATIN;
    }else if( c>=0x0400 && c<=0x04ff ){
      scriptMask |= SCRIPT_CYRILLIC;
    }else if( c>=0x0386 && c<=0x03ce ){
      scriptMask |= SCRIPT_GREEK;




    }
  }
  switch( scriptMask ){
    case 0:                res = 999; break;
    case SCRIPT_LATIN:     res = 215; break;
    case SCRIPT_CYRILLIC:  res = 220; break;
    case SCRIPT_GREEK:     res = 200; break;


    default:               res = 998; break;
  }
  sqlite3_result_int(context, res);
}

/* End transliterate
******************************************************************************







>
>





|





>
>
>
>







>
>







1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
  int nIn = sqlite3_value_bytes(argv[0]);
  int c, sz;
  int scriptMask = 0;
  int res;
# define SCRIPT_LATIN       0x0001
# define SCRIPT_CYRILLIC    0x0002
# define SCRIPT_GREEK       0x0004
# define SCRIPT_HEBREW      0x0008
# define SCRIPT_ARABIC      0x0010

  while( nIn>0 ){
    c = utf8Read(zIn, nIn, &sz);
    zIn += sz;
    nIn -= sz;
    if( c<0x02af && (c>=0x80 || midClass[c&0x7f]<CCLASS_DIGIT) ){
      scriptMask |= SCRIPT_LATIN;
    }else if( c>=0x0400 && c<=0x04ff ){
      scriptMask |= SCRIPT_CYRILLIC;
    }else if( c>=0x0386 && c<=0x03ce ){
      scriptMask |= SCRIPT_GREEK;
    }else if( c>=0x0590 && c<=0x05ff ){
      scriptMask |= SCRIPT_HEBREW;
    }else if( c>=0x0600 && c<=0x06ff ){
      scriptMask |= SCRIPT_ARABIC;
    }
  }
  switch( scriptMask ){
    case 0:                res = 999; break;
    case SCRIPT_LATIN:     res = 215; break;
    case SCRIPT_CYRILLIC:  res = 220; break;
    case SCRIPT_GREEK:     res = 200; break;
    case SCRIPT_HEBREW:    res = 125; break;
    case SCRIPT_ARABIC:    res = 160; break;
    default:               res = 998; break;
  }
  sqlite3_result_int(context, res);
}

/* End transliterate
******************************************************************************
Added test/spellfix3.test.






















































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# 2015-12-17
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl
set testprefix spellfix3

ifcapable !vtab { finish_test ; return }

load_static_extension db spellfix

do_execsql_test 100 {
  SELECT spellfix1_scriptcode('And God said, “Let there be light”');
} {215}
do_execsql_test 110 {
  SELECT spellfix1_scriptcode('Бог сказал: "Да будет свет"');
} {220}
do_execsql_test 120 {
  SELECT spellfix1_scriptcode('και ειπεν ο θεος γενηθητω φως και εγενετο φως');
} {200}
do_execsql_test 130 {
  SELECT spellfix1_scriptcode('וַיֹּ֥אמֶר אֱלֹהִ֖ים יְהִ֣י א֑וֹר וַֽיְהִי־אֽוֹר׃');
} {125}
do_execsql_test 140 {
  SELECT spellfix1_scriptcode('فِي ذَلِكَ الوَقتِ، قالَ اللهُ: لِيَكُنْ نُورٌ. فَصَارَ نُورٌ.');
} {160}
do_execsql_test 200 {
  SELECT spellfix1_scriptcode('+3.14159');
} {999}
do_execsql_test 210 {
  SELECT spellfix1_scriptcode('And God said: "Да будет свет"');
} {998}

finish_test