Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Added the nextchar.c extension. Minor changes to the spellfix.c extension so that it can be appended to an amalgamation and compiled without duplicating symbols. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
56b9a417f5451631f11c5206d625f114 |
User & Date: | drh 2013-04-25 19:31:33.149 |
Context
2013-04-26
| ||
12:08 | Reduce the default SQLITE_MAX_MMAP_SIZE slightly so that it fits in a signed 32-bit integer. (check-in: 460752b857 user: drh tags: trunk) | |
06:58 | Merge latest trunk changes. (check-in: 4d08e74d34 user: dan tags: fts3-expr-rebalance) | |
2013-04-25
| ||
19:31 | Added the nextchar.c extension. Minor changes to the spellfix.c extension so that it can be appended to an amalgamation and compiled without duplicating symbols. (check-in: 56b9a417f5 user: drh tags: trunk) | |
17:27 | Fix the tool/build-shell.sh script to remove references to files that are now loadable extensions. (check-in: aabeea98f5 user: drh tags: trunk) | |
Changes
Changes to Makefile.in.
︙ | ︙ | |||
388 389 390 391 392 393 394 395 396 397 398 399 400 401 | # Statically linked extensions # TESTSRC += \ $(TOP)/ext/misc/amatch.c \ $(TOP)/ext/misc/closure.c \ $(TOP)/ext/misc/fuzzer.c \ $(TOP)/ext/misc/ieee754.c \ $(TOP)/ext/misc/regexp.c \ $(TOP)/ext/misc/spellfix.c \ $(TOP)/ext/misc/wholenumber.c # Source code to the library files needed by the test fixture # TESTSRC2 = \ | > | 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 | # Statically linked extensions # TESTSRC += \ $(TOP)/ext/misc/amatch.c \ $(TOP)/ext/misc/closure.c \ $(TOP)/ext/misc/fuzzer.c \ $(TOP)/ext/misc/ieee754.c \ $(TOP)/ext/misc/nextchar.c \ $(TOP)/ext/misc/regexp.c \ $(TOP)/ext/misc/spellfix.c \ $(TOP)/ext/misc/wholenumber.c # Source code to the library files needed by the test fixture # TESTSRC2 = \ |
︙ | ︙ |
Changes to Makefile.msc.
︙ | ︙ | |||
708 709 710 711 712 713 714 715 716 717 718 719 720 721 | # Statically linked extensions # TESTEXT = \ $(TOP)\ext\misc\amatch.c \ $(TOP)\ext\misc\closure.c \ $(TOP)\ext\misc\fuzzer.c \ $(TOP)\ext\misc\ieee754.c \ $(TOP)\ext\misc\regexp.c \ $(TOP)\ext\misc\spellfix.c \ $(TOP)\ext\misc\wholenumber.c # Source code to the library files needed by the test fixture # | > | 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 | # Statically linked extensions # TESTEXT = \ $(TOP)\ext\misc\amatch.c \ $(TOP)\ext\misc\closure.c \ $(TOP)\ext\misc\fuzzer.c \ $(TOP)\ext\misc\ieee754.c \ $(TOP)\ext\misc\nextchar.c \ $(TOP)\ext\misc\regexp.c \ $(TOP)\ext\misc\spellfix.c \ $(TOP)\ext\misc\wholenumber.c # Source code to the library files needed by the test fixture # |
︙ | ︙ |
Added ext/misc/nextchar.c.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 | /* ** 2013-02-28 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** ** ** This file contains code to implement the next_char(A,T,F,W) SQL function. ** ** The next_char(A,T,F,H) function finds all valid "next" characters for ** string A given the vocabulary in T.F. The T.F field should be indexed. ** If the W value exists and is a non-empty string, then it is an SQL ** expression that limits the entries in T.F that will be considered. ** ** For example, suppose an application has a dictionary like this: ** ** CREATE TABLE dictionary(word TEXT UNIQUE); ** ** Further suppose that for user keypad entry, it is desired to disable ** (gray out) keys that are not valid as the next character. If the ** the user has previously entered (say) 'cha' then to find all allowed ** next characters (and thereby determine when keys should not be grayed ** out) run the following query: ** ** SELECT next_char('cha','dictionary','word'); */ #include "sqlite3ext.h" SQLITE_EXTENSION_INIT1 #include <string.h> /* ** A structure to hold context of the next_char() computation across ** nested function calls. */ typedef struct nextCharContext nextCharContext; struct nextCharContext { sqlite3 *db; /* Database connection */ sqlite3_stmt *pStmt; /* Prepared statement used to query */ const unsigned char *zPrefix; /* Prefix to scan */ int nPrefix; /* Size of zPrefix in bytes */ int nAlloc; /* Space allocated to aResult */ int nUsed; /* Space used in aResult */ unsigned int *aResult; /* Array of next characters */ int mallocFailed; /* True if malloc fails */ int otherError; /* True for any other failure */ }; /* ** Append a result character if the character is not already in the ** result. */ static void nextCharAppend(nextCharContext *p, unsigned c){ int i; for(i=0; i<p->nUsed; i++){ if( p->aResult[i]==c ) return; } if( p->nUsed+1 > p->nAlloc ){ unsigned int *aNew; int n = p->nAlloc*2 + 30; aNew = sqlite3_realloc(p->aResult, n*sizeof(unsigned int)); if( aNew==0 ){ p->mallocFailed = 1; return; }else{ p->aResult = aNew; p->nAlloc = n; } } p->aResult[p->nUsed++] = c; } /* ** Write a character into z[] as UTF8. Return the number of bytes needed ** to hold the character */ static int writeUtf8(unsigned char *z, unsigned c){ if( c<0x00080 ){ z[0] = (unsigned char)(c&0xff); return 1; } if( c<0x00800 ){ z[0] = 0xC0 + (unsigned char)((c>>6)&0x1F); z[1] = 0x80 + (unsigned char)(c & 0x3F); return 2; } if( c<0x10000 ){ z[0] = 0xE0 + (unsigned char)((c>>12)&0x0F); z[1] = 0x80 + (unsigned char)((c>>6) & 0x3F); z[2] = 0x80 + (unsigned char)(c & 0x3F); return 3; } z[0] = 0xF0 + (unsigned char)((c>>18) & 0x07); z[1] = 0x80 + (unsigned char)((c>>12) & 0x3F); z[2] = 0x80 + (unsigned char)((c>>6) & 0x3F); z[3] = 0x80 + (unsigned char)(c & 0x3F); return 4; } /* ** Read a UTF8 character out of z[] and write it into *pOut. Return ** the number of bytes in z[] that were used to construct the character. */ static int readUtf8(const unsigned char *z, unsigned *pOut){ static const unsigned char validBits[] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00, }; unsigned c = z[0]; if( c<0xc0 ){ *pOut = c; return 1; }else{ int n = 1; c = validBits[c-0xc0]; while( (z[n] & 0xc0)==0x80 ){ c = (c<<6) + (0x3f & z[n++]); } if( c<0x80 || (c&0xFFFFF800)==0xD800 || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } *pOut = c; return n; } } /* ** The nextCharContext structure has been set up. Add all "next" characters ** to the result set. */ static void findNextChars(nextCharContext *p){ unsigned cPrev = 0; unsigned char zPrev[8]; int n, rc; for(;;){ sqlite3_bind_text(p->pStmt, 1, (char*)p->zPrefix, p->nPrefix, SQLITE_STATIC); n = writeUtf8(zPrev, cPrev+1); sqlite3_bind_text(p->pStmt, 2, (char*)zPrev, n, SQLITE_STATIC); rc = sqlite3_step(p->pStmt); if( rc==SQLITE_DONE ){ sqlite3_reset(p->pStmt); return; }else if( rc!=SQLITE_ROW ){ p->otherError = rc; return; }else{ const unsigned char *zOut = sqlite3_column_text(p->pStmt, 0); unsigned cNext; n = readUtf8(zOut+p->nPrefix, &cNext); sqlite3_reset(p->pStmt); nextCharAppend(p, cNext); cPrev = cNext; if( p->mallocFailed ) return; } } } /* ** next_character(A,T,F,W) ** ** Return a string composted of all next possible characters after ** A for elements of T.F. If W is supplied, then it is an SQL expression ** that limits the elements in T.F that are considered. */ static void nextCharFunc( sqlite3_context *context, int argc, sqlite3_value **argv ){ nextCharContext c; const unsigned char *zTable = sqlite3_value_text(argv[1]); const unsigned char *zField = sqlite3_value_text(argv[2]); const unsigned char *zWhere; char *zSql; int rc; memset(&c, 0, sizeof(c)); c.db = sqlite3_context_db_handle(context); c.zPrefix = sqlite3_value_text(argv[0]); c.nPrefix = sqlite3_value_bytes(argv[0]); if( zTable==0 || zField==0 || c.zPrefix==0 ) return; if( argc<4 || (zWhere = sqlite3_value_text(argv[3]))==0 || zWhere[0]==0 ){ zSql = sqlite3_mprintf( "SELECT \"%w\" FROM \"%w\"" " WHERE \"%w\">=(?1 || ?2)" " AND \"%w\"<=(?1 || char(1114111))" /* 1114111 == 0x10ffff */ " ORDER BY 1 ASC LIMIT 1", zField, zTable, zField, zField); }else{ zSql = sqlite3_mprintf( "SELECT \"%w\" FROM \"%w\"" " WHERE \"%w\">=(?1 || ?2)" " AND \"%w\"<=(?1 || char(1114111))" /* 1114111 == 0x10ffff */ " AND (%s)" " ORDER BY 1 ASC LIMIT 1", zField, zTable, zField, zField, zWhere); } if( zSql==0 ){ sqlite3_result_error_nomem(context); return; } rc = sqlite3_prepare_v2(c.db, zSql, -1, &c.pStmt, 0); sqlite3_free(zSql); if( rc ){ sqlite3_result_error(context, sqlite3_errmsg(c.db), -1); return; } findNextChars(&c); if( c.mallocFailed ){ sqlite3_result_error_nomem(context); }else{ unsigned char *pRes; pRes = sqlite3_malloc( c.nUsed*4 + 1 ); if( pRes==0 ){ sqlite3_result_error_nomem(context); }else{ int i; int n = 0; for(i=0; i<c.nUsed; i++){ n += writeUtf8(pRes+n, c.aResult[i]); } pRes[n] = 0; sqlite3_result_text(context, (const char*)pRes, n, sqlite3_free); } } sqlite3_finalize(c.pStmt); sqlite3_free(c.aResult); } #ifdef _WIN32 __declspec(dllexport) #endif int sqlite3_nextchar_init( sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi ){ int rc = SQLITE_OK; SQLITE_EXTENSION_INIT2(pApi); (void)pzErrMsg; /* Unused parameter */ rc = sqlite3_create_function(db, "next_char", 3, SQLITE_UTF8, 0, nextCharFunc, 0, 0); if( rc==SQLITE_OK ){ rc = sqlite3_create_function(db, "next_char", 4, SQLITE_UTF8, 0, nextCharFunc, 0, 0); } return rc; } |
Changes to ext/misc/spellfix.c.
︙ | ︙ | |||
13 14 15 16 17 18 19 | ** This module implements the spellfix1 VIRTUAL TABLE that can be used ** to search a large vocabulary for close matches. See separate ** documentation files (spellfix1.wiki and editdist3.wiki) for details. */ #include "sqlite3ext.h" SQLITE_EXTENSION_INIT1 | > | | | | | | | | | > | 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 | ** This module implements the spellfix1 VIRTUAL TABLE that can be used ** to search a large vocabulary for close matches. See separate ** documentation files (spellfix1.wiki and editdist3.wiki) for details. */ #include "sqlite3ext.h" SQLITE_EXTENSION_INIT1 #ifndef SQLITE_AMALGAMATION # include <string.h> # include <stdio.h> # include <stdlib.h> # include <assert.h> # define ALWAYS(X) 1 # define NEVER(X) 0 typedef unsigned char u8; typedef unsigned short u16; # include <ctype.h> #endif /* ** Character classes for ASCII characters: ** ** 0 '' Silent letters: H W ** 1 'A' Any vowel: A E I O U (Y) ** 2 'B' A bilabeal stop or fricative: B F P V W |
︙ | ︙ |
Changes to main.mk.
︙ | ︙ | |||
270 271 272 273 274 275 276 277 278 279 280 281 282 283 | # Extensions to be statically loaded. # TESTSRC += \ $(TOP)/ext/misc/amatch.c \ $(TOP)/ext/misc/closure.c \ $(TOP)/ext/misc/fuzzer.c \ $(TOP)/ext/misc/ieee754.c \ $(TOP)/ext/misc/regexp.c \ $(TOP)/ext/misc/spellfix.c \ $(TOP)/ext/misc/wholenumber.c #TESTSRC += $(TOP)/ext/fts2/fts2_tokenizer.c #TESTSRC += $(TOP)/ext/fts3/fts3_tokenizer.c | > | 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 | # Extensions to be statically loaded. # TESTSRC += \ $(TOP)/ext/misc/amatch.c \ $(TOP)/ext/misc/closure.c \ $(TOP)/ext/misc/fuzzer.c \ $(TOP)/ext/misc/ieee754.c \ $(TOP)/ext/misc/nextchar.c \ $(TOP)/ext/misc/regexp.c \ $(TOP)/ext/misc/spellfix.c \ $(TOP)/ext/misc/wholenumber.c #TESTSRC += $(TOP)/ext/fts2/fts2_tokenizer.c #TESTSRC += $(TOP)/ext/fts3/fts3_tokenizer.c |
︙ | ︙ |
Changes to src/test1.c.
︙ | ︙ | |||
6043 6044 6045 6046 6047 6048 6049 | } sqlite3_test_control(SQLITE_TESTCTRL_OPTIMIZATIONS, db, mask); return TCL_OK; } typedef struct sqlite3_api_routines sqlite3_api_routines; /* | | | > > | | | > | | | | | | | | | | | | | | > | 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 | } sqlite3_test_control(SQLITE_TESTCTRL_OPTIMIZATIONS, db, mask); return TCL_OK; } typedef struct sqlite3_api_routines sqlite3_api_routines; /* ** load_static_extension DB NAME ... ** ** Load one or more statically linked extensions. */ static int tclLoadStaticExtensionCmd( void * clientData, Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[] ){ extern int sqlite3_amatch_init(sqlite3*,char**,const sqlite3_api_routines*); extern int sqlite3_closure_init(sqlite3*,char**,const sqlite3_api_routines*); extern int sqlite3_fuzzer_init(sqlite3*,char**,const sqlite3_api_routines*); extern int sqlite3_ieee_init(sqlite3*,char**,const sqlite3_api_routines*); extern int sqlite3_nextchar_init(sqlite3*,char**,const sqlite3_api_routines*); extern int sqlite3_regexp_init(sqlite3*,char**,const sqlite3_api_routines*); extern int sqlite3_spellfix_init(sqlite3*,char**,const sqlite3_api_routines*); extern int sqlite3_wholenumber_init(sqlite3*,char**,const sqlite3_api_routines*); static const struct { const char *zExtName; int (*pInit)(sqlite3*,char**,const sqlite3_api_routines*); } aExtension[] = { { "amatch", sqlite3_amatch_init }, { "closure", sqlite3_closure_init }, { "fuzzer", sqlite3_fuzzer_init }, { "ieee754", sqlite3_ieee_init }, { "nextchar", sqlite3_nextchar_init }, { "regexp", sqlite3_regexp_init }, { "spellfix", sqlite3_spellfix_init }, { "wholenumber", sqlite3_wholenumber_init }, }; sqlite3 *db; const char *zName; int i, j, rc; char *zErrMsg = 0; if( objc<3 ){ Tcl_WrongNumArgs(interp, 1, objv, "DB NAME ..."); return TCL_ERROR; } if( getDbPointer(interp, Tcl_GetString(objv[1]), &db) ) return TCL_ERROR; for(j=2; j<objc; j++){ zName = Tcl_GetString(objv[j]); for(i=0; i<ArraySize(aExtension); i++){ if( strcmp(zName, aExtension[i].zExtName)==0 ) break; } if( i>=ArraySize(aExtension) ){ Tcl_AppendResult(interp, "no such extension: ", zName, (char*)0); return TCL_ERROR; } rc = aExtension[i].pInit(db, &zErrMsg, 0); if( rc!=SQLITE_OK || zErrMsg ){ Tcl_AppendResult(interp, "initialization of ", zName, " failed: ", zErrMsg, (char*)0); sqlite3_free(zErrMsg); return TCL_ERROR; } } return TCL_OK; } /* ** Register commands with the TCL interpreter. |
︙ | ︙ |
Changes to test/spellfix.test.
︙ | ︙ | |||
12 13 14 15 16 17 18 | set testdir [file dirname $argv0] source $testdir/tester.tcl set testprefix spellfix ifcapable !vtab { finish_test ; return } | | | 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 | set testdir [file dirname $argv0] source $testdir/tester.tcl set testprefix spellfix ifcapable !vtab { finish_test ; return } load_static_extension db spellfix nextchar set vocab { rabbi rabbit rabbits rabble rabid rabies raccoon raccoons race raced racer racers races racetrack racial racially racing rack racked racket racketeer racketeering racketeers rackets racking racks radar radars radial radially radian radiance radiant radiantly radiate radiated radiates radiating radiation radiations radiator radiators radical radically radicals radices radii radio |
︙ | ︙ | |||
80 81 82 83 84 85 86 87 88 89 90 91 92 93 | } { do_execsql_test 1.2.$tn { SELECT word, matchlen FROM t1 WHERE word MATCH $word ORDER BY score, word LIMIT 5 } $res } do_execsql_test 2.1 { CREATE VIRTUAL TABLE t2 USING spellfix1; INSERT INTO t2 (word, soundslike) VALUES('school', 'skuul'); INSERT INTO t2 (word, soundslike) VALUES('psalm', 'sarm'); SELECT word, matchlen FROM t2 WHERE word MATCH 'sar*' LIMIT 5; } {psalm 4} | > > > > > > > > > > > > > > > > > > > > | 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 | } { do_execsql_test 1.2.$tn { SELECT word, matchlen FROM t1 WHERE word MATCH $word ORDER BY score, word LIMIT 5 } $res } # Tests of the next_char function. # do_test 1.10 { db eval { CREATE TABLE vocab(w TEXT PRIMARY KEY); INSERT INTO vocab SELECT word FROM t1; } } {} do_execsql_test 1.11 { SELECT next_char('re','vocab','w'); } {a} do_execsql_test 1.12 { SELECT next_char('r','vocab','w'); } {ae} do_execsql_test 1.13 { SELECT next_char('','vocab','w'); } {r} do_test 1.14 { catchsql {SELECT next_char('','xyzzy','a')} } {1 {no such table: xyzzy}} do_execsql_test 2.1 { CREATE VIRTUAL TABLE t2 USING spellfix1; INSERT INTO t2 (word, soundslike) VALUES('school', 'skuul'); INSERT INTO t2 (word, soundslike) VALUES('psalm', 'sarm'); SELECT word, matchlen FROM t2 WHERE word MATCH 'sar*' LIMIT 5; } {psalm 4} |
︙ | ︙ |