/ Check-in [75361013]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:In the spellfix phonehash() function, add letter W into the same character class as V.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:7536101317c00fbb5bf555120feb55b9bc40e8ba
User & Date: drh 2012-08-31 14:55:05
Context
2012-09-03
11:39
Add resources describing the version number and other information to EXEs and DLLs generated using MSVC. check-in: a15a7084 user: drh tags: trunk
10:32
Add Win32 version resources to the applicable binaries built by the MSVC makefile. check-in: e2f27d28 user: mistachkin tags: win32Resources
2012-08-31
14:55
In the spellfix phonehash() function, add letter W into the same character class as V. check-in: 75361013 user: drh tags: trunk
12:31
Changes for ERROR_PATH_NOT_FOUND in addition to ERROR_FILE_NOT_FOUND in winAccess(). check-in: 527340ab user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/test_spellfix.c.

26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
..
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
...
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
#include <ctype.h>

/*
** Character classes for ASCII characters:
**
**   0   ''        Silent letters:   H W
**   1   'A'       Any vowel:   A E I O U (Y)
**   2   'B'       A bilabeal stop or fricative:  B F P V
**   3   'C'       Other fricatives or back stops:  C G J K Q S X Z
**   4   'D'       Alveolar stops:  D T
**   5   'H'       Letter H at the beginning of a word
**   6   'L'       Glide:  L
**   7   'R'       Semivowel:  R
**   8   'M'       Nasals:  M N
**   9   'W'       Letter W at the beginning of a word
**   10  'Y'       Letter Y at the beginning of a word.
**   11  '9'       Digits: 0 1 2 3 4 5 6 7 8 9
**   12  ' '       White space
**   13  '?'       Other.
*/
#define CCLASS_SILENT         0
#define CCLASS_VOWEL          1
#define CCLASS_B              2
#define CCLASS_C              3
#define CCLASS_D              4
#define CCLASS_H              5
#define CCLASS_L              6
#define CCLASS_R              7
#define CCLASS_M              8
#define CCLASS_W              9
#define CCLASS_Y             10
#define CCLASS_DIGIT         11
#define CCLASS_SPACE         12
#define CCLASS_OTHER         13

/*
** The following table gives the character class for non-initial ASCII
** characters.
*/
static const unsigned char midClass[] = {
 /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
................................................................................
 /* B */ CCLASS_B,        /* C */ CCLASS_C,       /* D */ CCLASS_D,
 /* E */ CCLASS_VOWEL,    /* F */ CCLASS_B,       /* G */ CCLASS_C,
 /* H */ CCLASS_SILENT,   /* I */ CCLASS_VOWEL,   /* J */ CCLASS_C,
 /* K */ CCLASS_C,        /* L */ CCLASS_L,       /* M */ CCLASS_M,
 /* N */ CCLASS_M,        /* O */ CCLASS_VOWEL,   /* P */ CCLASS_B,
 /* Q */ CCLASS_C,        /* R */ CCLASS_R,       /* S */ CCLASS_C,
 /* T */ CCLASS_D,        /* U */ CCLASS_VOWEL,   /* V */ CCLASS_B,
 /* W */ CCLASS_SILENT,   /* X */ CCLASS_C,       /* Y */ CCLASS_VOWEL,
 /* Z */ CCLASS_C,        /* [ */ CCLASS_OTHER,   /* \ */ CCLASS_OTHER,
 /* ] */ CCLASS_OTHER,    /* ^ */ CCLASS_OTHER,   /* _ */ CCLASS_OTHER,
 /* ` */ CCLASS_OTHER,    /* a */ CCLASS_VOWEL,   /* b */ CCLASS_B,
 /* c */ CCLASS_C,        /* d */ CCLASS_D,       /* e */ CCLASS_VOWEL,
 /* f */ CCLASS_B,        /* g */ CCLASS_C,       /* h */ CCLASS_SILENT,
 /* i */ CCLASS_VOWEL,    /* j */ CCLASS_C,       /* k */ CCLASS_C,
 /* l */ CCLASS_L,        /* m */ CCLASS_M,       /* n */ CCLASS_M,
 /* o */ CCLASS_VOWEL,    /* p */ CCLASS_B,       /* q */ CCLASS_C,
 /* r */ CCLASS_R,        /* s */ CCLASS_C,       /* t */ CCLASS_D,
 /* u */ CCLASS_VOWEL,    /* v */ CCLASS_B,       /* w */ CCLASS_SILENT,
 /* x */ CCLASS_C,        /* y */ CCLASS_VOWEL,   /* z */ CCLASS_C,
 /* { */ CCLASS_OTHER,    /* | */ CCLASS_OTHER,   /* } */ CCLASS_OTHER,
 /* ~ */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   
};
/* 
** This tables gives the character class for ASCII characters that form the
** initial character of a word.  The only difference from midClass is with
................................................................................
 /* B */ CCLASS_B,        /* C */ CCLASS_C,       /* D */ CCLASS_D,
 /* E */ CCLASS_VOWEL,    /* F */ CCLASS_B,       /* G */ CCLASS_C,
 /* H */ CCLASS_SILENT,   /* I */ CCLASS_VOWEL,   /* J */ CCLASS_C,
 /* K */ CCLASS_C,        /* L */ CCLASS_L,       /* M */ CCLASS_M,
 /* N */ CCLASS_M,        /* O */ CCLASS_VOWEL,   /* P */ CCLASS_B,
 /* Q */ CCLASS_C,        /* R */ CCLASS_R,       /* S */ CCLASS_C,
 /* T */ CCLASS_D,        /* U */ CCLASS_VOWEL,   /* V */ CCLASS_B,
 /* W */ CCLASS_W,        /* X */ CCLASS_C,       /* Y */ CCLASS_Y,
 /* Z */ CCLASS_C,        /* [ */ CCLASS_OTHER,   /* \ */ CCLASS_OTHER,
 /* ] */ CCLASS_OTHER,    /* ^ */ CCLASS_OTHER,   /* _ */ CCLASS_OTHER,
 /* ` */ CCLASS_OTHER,    /* a */ CCLASS_VOWEL,   /* b */ CCLASS_B,
 /* c */ CCLASS_C,        /* d */ CCLASS_D,       /* e */ CCLASS_VOWEL,
 /* f */ CCLASS_B,        /* g */ CCLASS_C,       /* h */ CCLASS_SILENT,
 /* i */ CCLASS_VOWEL,    /* j */ CCLASS_C,       /* k */ CCLASS_C,
 /* l */ CCLASS_L,        /* m */ CCLASS_M,       /* n */ CCLASS_M,
 /* o */ CCLASS_VOWEL,    /* p */ CCLASS_B,       /* q */ CCLASS_C,
 /* r */ CCLASS_R,        /* s */ CCLASS_C,       /* t */ CCLASS_D,
 /* u */ CCLASS_VOWEL,    /* v */ CCLASS_B,       /* w */ CCLASS_W,
 /* x */ CCLASS_C,        /* y */ CCLASS_Y,       /* z */ CCLASS_C,
 /* { */ CCLASS_OTHER,    /* | */ CCLASS_OTHER,   /* } */ CCLASS_OTHER,
 /* ~ */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   
};

/*
** Mapping from the character class number (0-13) to a symbol for each
** character class.  Note that initClass[] can be used to map the class
** symbol back into the class number.
*/
static const unsigned char className[] = ".ABCDHLRMWY9 ?";

/*
** Generate a "phonetic hash" from a string of ASCII characters
** in zIn[0..nIn-1].
**
**   * Map characters by character class as defined above.
**   * Omit double-letters







|






<
|
|
|
|










<
|
|
|
|







 







|









|







 







|









|










|







26
27
28
29
30
31
32
33
34
35
36
37
38
39

40
41
42
43
44
45
46
47
48
49
50
51
52
53

54
55
56
57
58
59
60
61
62
63
64
..
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
...
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
#include <ctype.h>

/*
** Character classes for ASCII characters:
**
**   0   ''        Silent letters:   H W
**   1   'A'       Any vowel:   A E I O U (Y)
**   2   'B'       A bilabeal stop or fricative:  B F P V W
**   3   'C'       Other fricatives or back stops:  C G J K Q S X Z
**   4   'D'       Alveolar stops:  D T
**   5   'H'       Letter H at the beginning of a word
**   6   'L'       Glide:  L
**   7   'R'       Semivowel:  R
**   8   'M'       Nasals:  M N

**   9   'Y'       Letter Y at the beginning of a word.
**   10  '9'       Digits: 0 1 2 3 4 5 6 7 8 9
**   11  ' '       White space
**   12  '?'       Other.
*/
#define CCLASS_SILENT         0
#define CCLASS_VOWEL          1
#define CCLASS_B              2
#define CCLASS_C              3
#define CCLASS_D              4
#define CCLASS_H              5
#define CCLASS_L              6
#define CCLASS_R              7
#define CCLASS_M              8

#define CCLASS_Y              9
#define CCLASS_DIGIT         10
#define CCLASS_SPACE         11
#define CCLASS_OTHER         12

/*
** The following table gives the character class for non-initial ASCII
** characters.
*/
static const unsigned char midClass[] = {
 /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
................................................................................
 /* B */ CCLASS_B,        /* C */ CCLASS_C,       /* D */ CCLASS_D,
 /* E */ CCLASS_VOWEL,    /* F */ CCLASS_B,       /* G */ CCLASS_C,
 /* H */ CCLASS_SILENT,   /* I */ CCLASS_VOWEL,   /* J */ CCLASS_C,
 /* K */ CCLASS_C,        /* L */ CCLASS_L,       /* M */ CCLASS_M,
 /* N */ CCLASS_M,        /* O */ CCLASS_VOWEL,   /* P */ CCLASS_B,
 /* Q */ CCLASS_C,        /* R */ CCLASS_R,       /* S */ CCLASS_C,
 /* T */ CCLASS_D,        /* U */ CCLASS_VOWEL,   /* V */ CCLASS_B,
 /* W */ CCLASS_B,        /* X */ CCLASS_C,       /* Y */ CCLASS_VOWEL,
 /* Z */ CCLASS_C,        /* [ */ CCLASS_OTHER,   /* \ */ CCLASS_OTHER,
 /* ] */ CCLASS_OTHER,    /* ^ */ CCLASS_OTHER,   /* _ */ CCLASS_OTHER,
 /* ` */ CCLASS_OTHER,    /* a */ CCLASS_VOWEL,   /* b */ CCLASS_B,
 /* c */ CCLASS_C,        /* d */ CCLASS_D,       /* e */ CCLASS_VOWEL,
 /* f */ CCLASS_B,        /* g */ CCLASS_C,       /* h */ CCLASS_SILENT,
 /* i */ CCLASS_VOWEL,    /* j */ CCLASS_C,       /* k */ CCLASS_C,
 /* l */ CCLASS_L,        /* m */ CCLASS_M,       /* n */ CCLASS_M,
 /* o */ CCLASS_VOWEL,    /* p */ CCLASS_B,       /* q */ CCLASS_C,
 /* r */ CCLASS_R,        /* s */ CCLASS_C,       /* t */ CCLASS_D,
 /* u */ CCLASS_VOWEL,    /* v */ CCLASS_B,       /* w */ CCLASS_B,
 /* x */ CCLASS_C,        /* y */ CCLASS_VOWEL,   /* z */ CCLASS_C,
 /* { */ CCLASS_OTHER,    /* | */ CCLASS_OTHER,   /* } */ CCLASS_OTHER,
 /* ~ */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   
};
/* 
** This tables gives the character class for ASCII characters that form the
** initial character of a word.  The only difference from midClass is with
................................................................................
 /* B */ CCLASS_B,        /* C */ CCLASS_C,       /* D */ CCLASS_D,
 /* E */ CCLASS_VOWEL,    /* F */ CCLASS_B,       /* G */ CCLASS_C,
 /* H */ CCLASS_SILENT,   /* I */ CCLASS_VOWEL,   /* J */ CCLASS_C,
 /* K */ CCLASS_C,        /* L */ CCLASS_L,       /* M */ CCLASS_M,
 /* N */ CCLASS_M,        /* O */ CCLASS_VOWEL,   /* P */ CCLASS_B,
 /* Q */ CCLASS_C,        /* R */ CCLASS_R,       /* S */ CCLASS_C,
 /* T */ CCLASS_D,        /* U */ CCLASS_VOWEL,   /* V */ CCLASS_B,
 /* W */ CCLASS_B,        /* X */ CCLASS_C,       /* Y */ CCLASS_Y,
 /* Z */ CCLASS_C,        /* [ */ CCLASS_OTHER,   /* \ */ CCLASS_OTHER,
 /* ] */ CCLASS_OTHER,    /* ^ */ CCLASS_OTHER,   /* _ */ CCLASS_OTHER,
 /* ` */ CCLASS_OTHER,    /* a */ CCLASS_VOWEL,   /* b */ CCLASS_B,
 /* c */ CCLASS_C,        /* d */ CCLASS_D,       /* e */ CCLASS_VOWEL,
 /* f */ CCLASS_B,        /* g */ CCLASS_C,       /* h */ CCLASS_SILENT,
 /* i */ CCLASS_VOWEL,    /* j */ CCLASS_C,       /* k */ CCLASS_C,
 /* l */ CCLASS_L,        /* m */ CCLASS_M,       /* n */ CCLASS_M,
 /* o */ CCLASS_VOWEL,    /* p */ CCLASS_B,       /* q */ CCLASS_C,
 /* r */ CCLASS_R,        /* s */ CCLASS_C,       /* t */ CCLASS_D,
 /* u */ CCLASS_VOWEL,    /* v */ CCLASS_B,       /* w */ CCLASS_B,
 /* x */ CCLASS_C,        /* y */ CCLASS_Y,       /* z */ CCLASS_C,
 /* { */ CCLASS_OTHER,    /* | */ CCLASS_OTHER,   /* } */ CCLASS_OTHER,
 /* ~ */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   
};

/*
** Mapping from the character class number (0-13) to a symbol for each
** character class.  Note that initClass[] can be used to map the class
** symbol back into the class number.
*/
static const unsigned char className[] = ".ABCDHLRMY9 ?";

/*
** Generate a "phonetic hash" from a string of ASCII characters
** in zIn[0..nIn-1].
**
**   * Map characters by character class as defined above.
**   * Omit double-letters

Changes to test/spellfix.test.

64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
  execsql { CREATE VIRTUAL TABLE t1 USING spellfix1 }
  foreach word $vocab {
    execsql { INSERT INTO t1(word) VALUES($word) }
  }
} {}

foreach {tn word res} {
  1   raxpi*     {rasping 5 rasped 5 raspberry 6 rasp 4 rasps 4}
  2   ril*       {rail 4 railed 4 railer 4 railers 4 railing 4}
  3   rilis*     {realism 6 realist 6 realistic 6 realistically 6 realists 6}
  4   reail*     {real 3 realest 3 realign 3 realigned 3 realigning 3}
  5   ras*       {rascal 3 rascally 3 rascals 3 rash 3 rasher 3}
  6   realistss* {realists 8 realigns 8 realistic 9 realistically 9 realest 7}
  7   realistss  {realists 8 realist 7 realigns 8 realistic 9 realest 7}
  8   rllation*  {realities 9 reality 7 rallied 7 railed 4}







|







64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
  execsql { CREATE VIRTUAL TABLE t1 USING spellfix1 }
  foreach word $vocab {
    execsql { INSERT INTO t1(word) VALUES($word) }
  }
} {}

foreach {tn word res} {
  1   raxpi*     {rasping 5 rasped 5 ragweed 5 raspberry 6 rasp 4}
  2   ril*       {rail 4 railed 4 railer 4 railers 4 railing 4}
  3   rilis*     {realism 6 realist 6 realistic 6 realistically 6 realists 6}
  4   reail*     {real 3 realest 3 realign 3 realigned 3 realigning 3}
  5   ras*       {rascal 3 rascally 3 rascals 3 rash 3 rasher 3}
  6   realistss* {realists 8 realigns 8 realistic 9 realistically 9 realest 7}
  7   realistss  {realists 8 realist 7 realigns 8 realistic 9 realest 7}
  8   rllation*  {realities 9 reality 7 rallied 7 railed 4}