SQLite: Check-in [936b06aaa8]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview

Comment:	Add a Porter stemmer option to the FTS1 module. (CVS 3452)
Downloads:	Tarball \| ZIP archive
Timelines:	family \| ancestors \| descendants \| both \| trunk
Files:	files \| file ages \| folders
SHA1:	936b06aaa8133e83104de87e03dc94e286a31f86
User & Date:	drh 2006-10-01 18:41:20.000

Context

2006-10-01
18:58		Remove one non-working test case fromthe Porter stemmer tests and add an acknowledgement for the source of the test data (Martin Porter himself.) (CVS 3453) (check-in: 1a2df2a61b user: drh tags: trunk)
18:41		Add a Porter stemmer option to the FTS1 module. (CVS 3452) (check-in: 936b06aaa8 user: drh tags: trunk)
2006-09-29
14:01		Make sure memory does not leak when patching up column names so that they are unique in a join or view. Tickets #1952 and #2002. (CVS 3451) (check-in: fcde639119 user: drh tags: trunk)

Changes

Changes to ext/fts1/fts1.c.

Added ext/fts1/fts1_porter.c.

Changes to ext/fts1/fts1_tokenizer.h.

Changes to ext/fts1/fts1_tokenizer1.c.

Added test/fts1d.test.

Added test/fts1porter.test.

more than 10,000 changes

︙			︙
48 49 50 51 52 53 54 55 56 57 58 59 60 61	/* Forward declaration / static const sqlite3_tokenizer_module simpleTokenizerModule; static int isDelim(simple_tokenizer t, unsigned char c){ return c<0x80 && t->delim[c]; } static int simpleCreate( int argc, const char * const argv, sqlite3_tokenizer ppTokenizer ){ simple_tokenizer t; t = (simple_tokenizer *) calloc(sizeof(simple_tokenizer), 1);	> > >	48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64	/* Forward declaration / static const sqlite3_tokenizer_module simpleTokenizerModule; static int isDelim(simple_tokenizer t, unsigned char c){ return c<0x80 && t->delim[c]; } /* ** Create a new tokenizer instance. / static int simpleCreate( int argc, const char const argv, sqlite3_tokenizer ppTokenizer ){ simple_tokenizer t; t = (simple_tokenizer *) calloc(sizeof(simple_tokenizer), 1);
︙			︙
83 84 85 86 87 88 89 90 91 92 93 94 95 ~~96 97 98~~ 99 100 101 102 103 104 105	} } ppTokenizer = &t->base; return SQLITE_OK; } static int simpleDestroy(sqlite3_tokenizer pTokenizer){ free(pTokenizer); return SQLITE_OK; } static int simpleOpen( ~~sqlite3_tokenizer pTokenizer, const char pInput, int nBytes, sqlite3_tokenizer_cursor *ppCursor~~ ){ simple_tokenizer_cursor c; c = (simple_tokenizer_cursor *) malloc(sizeof(simple_tokenizer_cursor)); c->pInput = pInput; if( pInput==0 ){ c->nBytes = 0;	> > > > > > > > > \| \| \|	86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117	} } ppTokenizer = &t->base; return SQLITE_OK; } / ** Destroy a tokenizer / static int simpleDestroy(sqlite3_tokenizer pTokenizer){ free(pTokenizer); return SQLITE_OK; } /* Prepare to begin tokenizing a particular string. The input string to be tokenized is pInput[0..nBytes-1]. A cursor used to incrementally tokenize this string is returned in ppCursor. / static int simpleOpen( sqlite3_tokenizer pTokenizer, / The tokenizer / const char pInput, int nBytes, /* String to be tokenized / sqlite3_tokenizer_cursor ppCursor / OUT: Tokenization cursor / ){ simple_tokenizer_cursor c; c = (simple_tokenizer_cursor *) malloc(sizeof(simple_tokenizer_cursor)); c->pInput = pInput; if( pInput==0 ){ c->nBytes = 0;
︙			︙
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 ~~128 129~~ ~~130~~ 131 132 133 134 135 136 137	c->pToken = NULL; /* no space allocated, yet. / c->nTokenAllocated = 0; ppCursor = &c->base; return SQLITE_OK; } static int simpleClose(sqlite3_tokenizer_cursor pCursor){ simple_tokenizer_cursor c = (simple_tokenizer_cursor ) pCursor; free(c->pToken); free(c); return SQLITE_OK; } static int simpleNext( ~~sqlite3_tokenizer_cursor pCursor, const char *ppToken, int ~~pnBytes,~~~~ ~~int piStartOffset, in~~t piEndO~~ffset~~, int~~ ~~piPosition~~~~ ){ simple_tokenizer_cursor c = (simple_tokenizer_cursor ) pCursor; simple_tokenizer t = (simple_tokenizer ) pCursor->pTokenizer; unsigned char p = (unsigned char *)c->pInput; while( c->iOffset<c->nBytes ){ int iStartOffset;	> > > > > > > > \| \| > \| > >	125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160	c->pToken = NULL; /* no space allocated, yet. / c->nTokenAllocated = 0; ppCursor = &c->base; return SQLITE_OK; } /* Close a tokenization cursor previously opened by a call to simpleOpen() above. / static int simpleClose(sqlite3_tokenizer_cursor pCursor){ simple_tokenizer_cursor c = (simple_tokenizer_cursor ) pCursor; free(c->pToken); free(c); return SQLITE_OK; } /* Extract the next token from a tokenization cursor. The cursor must have been opened by a prior call to simpleOpen(). / static int simpleNext( sqlite3_tokenizer_cursor pCursor, /* Cursor returned by simpleOpen / const char ppToken, / OUT: ppToken is the token text / int pnBytes, / OUT: Number of bytes in token / int piStartOffset, /* OUT: Starting offset of token / int piEndOffset, /* OUT: Ending offset of token / int piPosition /* OUT: Position integer of token / ){ simple_tokenizer_cursor c = (simple_tokenizer_cursor ) pCursor; simple_tokenizer t = (simple_tokenizer ) pCursor->pTokenizer; unsigned char p = (unsigned char *)c->pInput; while( c->iOffset<c->nBytes ){ int iStartOffset;
︙			︙
146 147 148 149 150 151 152 ~~153~~ 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189	while( c->iOffset<c->nBytes && !isDelim(t, p[c->iOffset]) ){ c->iOffset++; } if( c->iOffset>iStartOffset ){ int i, n = c->iOffset-iStartOffset; if( n>c->nTokenAllocated ){ ~~c->pToken = realloc(c->pToken, n);~~ } for(i=0; i<n; i++){ /* TODO(shess) This needs expansion to handle UTF-8 ** case-insensitivity. / unsigned char ch = p[iStartOffset+i]; c->pToken[i] = ch<0x80 ? tolower(ch) : ch; } ppToken = c->pToken; pnBytes = n; piStartOffset = iStartOffset; piEndOffset = c->iOffset; piPosition = c->iToken++; return SQLITE_OK; } } return SQLITE_DONE; } static const sqlite3_tokenizer_module simpleTokenizerModule = { 0, simpleCreate, simpleDestroy, simpleOpen, simpleClose, simpleNext, }; void sqlite3Fts1SimpleTokenizerModule( sqlite3_tokenizer_module const*ppModule ){ ppModule = &simpleTokenizerModule; } #endif /* !defined(SQLITE_CORE) \|\| defined(SQLITE_ENABLE_FTS1) */	> \| > > > > > > >	169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220	while( c->iOffset<c->nBytes && !isDelim(t, p[c->iOffset]) ){ c->iOffset++; } if( c->iOffset>iStartOffset ){ int i, n = c->iOffset-iStartOffset; if( n>c->nTokenAllocated ){ c->nTokenAllocated = n+20; c->pToken = realloc(c->pToken, c->nTokenAllocated); } for(i=0; i<n; i++){ /* TODO(shess) This needs expansion to handle UTF-8 ** case-insensitivity. / unsigned char ch = p[iStartOffset+i]; c->pToken[i] = ch<0x80 ? tolower(ch) : ch; } ppToken = c->pToken; pnBytes = n; piStartOffset = iStartOffset; piEndOffset = c->iOffset; piPosition = c->iToken++; return SQLITE_OK; } } return SQLITE_DONE; } /* ** The set of routines that implement the simple tokenizer / static const sqlite3_tokenizer_module simpleTokenizerModule = { 0, simpleCreate, simpleDestroy, simpleOpen, simpleClose, simpleNext, }; / Allocate a new simple tokenizer. Return a pointer to the new tokenizer in ppModule / void sqlite3Fts1SimpleTokenizerModule( sqlite3_tokenizer_module const*ppModule ){ ppModule = &simpleTokenizerModule; } #endif /* !defined(SQLITE_CORE) \|\| defined(SQLITE_ENABLE_FTS1) */