SQLite

Check-in [fbcf2d75cd]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add a test that calls fts2_tokenizer() with an argument set via C code. (CVS 4118)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: fbcf2d75cd2b88d175c122477aa483f0771870e5
User & Date: danielk1977 2007-06-25 12:05:40.000
Context
2007-06-25
12:49
All the use of MySQL-style quoting in the FTS modules. Ticket #2446. (CVS 4119) (check-in: 3be2a6d1c3 user: drh tags: trunk)
12:05
Add a test that calls fts2_tokenizer() with an argument set via C code. (CVS 4118) (check-in: fbcf2d75cd user: danielk1977 tags: trunk)
11:24
Add some tests for the fts2 icu tokenizer. (CVS 4117) (check-in: b79ced3e0a user: danielk1977 tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts2/README.tokenizers.
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
  
  Where <tokenizer-name> is a string identifying the tokenizer and
  <sqlite3_tokenizer_module ptr> is a pointer to an sqlite3_tokenizer_module
  structure encoded as an SQL blob. If the second argument is present,
  it is registered as tokenizer <tokenizer-name> and a copy of it
  returned. If only one argument is passed, a pointer to the tokenizer
  implementation currently registered as <tokenizer-name> is returned,
  encoded as a blob. Or, if no such tokenizer exists, an SQL NULL value 
  is returned.

  SECURITY: If the fts2 extension is used in an environment where potentially
    malicious users may execute arbitrary SQL (i.e. gears), they should be
    prevented from invoking the fts2_tokenizer() function, possibly using the
    authorisation callback.

  See "Sample code" below for an example of calling the fts2_tokenizer()







|
|







43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
  
  Where <tokenizer-name> is a string identifying the tokenizer and
  <sqlite3_tokenizer_module ptr> is a pointer to an sqlite3_tokenizer_module
  structure encoded as an SQL blob. If the second argument is present,
  it is registered as tokenizer <tokenizer-name> and a copy of it
  returned. If only one argument is passed, a pointer to the tokenizer
  implementation currently registered as <tokenizer-name> is returned,
  encoded as a blob. Or, if no such tokenizer exists, an SQL exception
  (error) is raised.

  SECURITY: If the fts2 extension is used in an environment where potentially
    malicious users may execute arbitrary SQL (i.e. gears), they should be
    prevented from invoking the fts2_tokenizer() function, possibly using the
    authorisation callback.

  See "Sample code" below for an example of calling the fts2_tokenizer()
82
83
84
85
86
87
88
89





90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105





106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
  fts2_tokenizer.h).

4. Sample code.

  The following two code samples illustrate the way C code should invoke
  the fts2_tokenizer() scalar function:

  int registerTokenizer(sqlite3 *db, char *zName, sqlite3_tokenizer_module *p){





    sqlite3_stmt *pStmt;
    const char zSql[] = "SELECT fts2_tokenizer(?, ?)";

    rc = sqlite3_prepare_v2(db, zSql, &pStmt);
    if( rc!=SQLITE_OK ){
      return rc;
    }

    sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
    sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC);
    sqlite3_step(pStmt);

    return sqlite3_finalize(pStmt);
  }

  int queryTokenizer(sqlite3 *db, char *zName, sqlite3_tokenizer_module **pp){





    sqlite3_stmt *pStmt;
    const char zSql[] = "SELECT fts2_tokenizer(?)";

    *pp = 0;
    rc = sqlite3_prepare_v2(db, zSql, &pStmt);
    if( rc!=SQLITE_OK ){
      return rc;
    }

    sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
    if( SQLITE_ROW==sqlite3_step(pStmt) ){
      if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
        memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
      }
    }

    return sqlite3_finalize(pStmt);
  }








|
>
>
>
>
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>
>
>
>
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|

82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
  fts2_tokenizer.h).

4. Sample code.

  The following two code samples illustrate the way C code should invoke
  the fts2_tokenizer() scalar function:

      int registerTokenizer(
        sqlite3 *db, 
        char *zName, 
        const sqlite3_tokenizer_module *p
      ){
        int rc;
        sqlite3_stmt *pStmt;
        const char zSql[] = "SELECT fts2_tokenizer(?, ?)";
      
        rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
        if( rc!=SQLITE_OK ){
          return rc;
        }
      
        sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
        sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC);
        sqlite3_step(pStmt);
      
        return sqlite3_finalize(pStmt);
      }
      
      int queryTokenizer(
        sqlite3 *db, 
        char *zName,  
        const sqlite3_tokenizer_module **pp
      ){
        int rc;
        sqlite3_stmt *pStmt;
        const char zSql[] = "SELECT fts2_tokenizer(?)";
      
        *pp = 0;
        rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
        if( rc!=SQLITE_OK ){
          return rc;
        }
      
        sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
        if( SQLITE_ROW==sqlite3_step(pStmt) ){
          if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
            memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
          }
        }
      
        return sqlite3_finalize(pStmt);
      }

Changes to ext/fts2/fts2_tokenizer.c.
69
70
71
72
73
74
75

76
77
78
79
80
81
82

  sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT);
}

#ifdef SQLITE_TEST

#include <tcl.h>


/*
** Implementation of a special SQL scalar function for testing tokenizers 
** designed to be used in concert with the Tcl testing framework. This
** function must be called with two arguments:
**
**   SELECT <function-name>(<key-name>, <input-string>);







>







69
70
71
72
73
74
75
76
77
78
79
80
81
82
83

  sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT);
}

#ifdef SQLITE_TEST

#include <tcl.h>
#include <string.h>

/*
** Implementation of a special SQL scalar function for testing tokenizers 
** designed to be used in concert with the Tcl testing framework. This
** function must be called with two arguments:
**
**   SELECT <function-name>(<key-name>, <input-string>);
183
184
185
186
187
188
189



































































































190
191
192
193
194
195
196
  if( zErr ){
    sqlite3_result_error(context, zErr, -1);
  }else{
    sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT);
  }
  Tcl_DecrRefCount(pRet);
}



































































































#endif

/*
** Set up SQL objects in database db used to access the contents of
** the hash table pointed to by argument pHash. The hash table must
** been initialised to use string keys, and to take a private copy 
** of the key when a value is inserted. i.e. by a call similar to:







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
  if( zErr ){
    sqlite3_result_error(context, zErr, -1);
  }else{
    sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT);
  }
  Tcl_DecrRefCount(pRet);
}

static
int registerTokenizer(
  sqlite3 *db, 
  char *zName, 
  const sqlite3_tokenizer_module *p
){
  int rc;
  sqlite3_stmt *pStmt;
  const char zSql[] = "SELECT fts2_tokenizer(?, ?)";

  rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
  if( rc!=SQLITE_OK ){
    return rc;
  }

  sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
  sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC);
  sqlite3_step(pStmt);

  return sqlite3_finalize(pStmt);
}

static
int queryTokenizer(
  sqlite3 *db, 
  char *zName,  
  const sqlite3_tokenizer_module **pp
){
  int rc;
  sqlite3_stmt *pStmt;
  const char zSql[] = "SELECT fts2_tokenizer(?)";

  *pp = 0;
  rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
  if( rc!=SQLITE_OK ){
    return rc;
  }

  sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
  if( SQLITE_ROW==sqlite3_step(pStmt) ){
    if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
      memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
    }
  }

  return sqlite3_finalize(pStmt);
}

void sqlite3Fts2SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);

/*
** Implementation of the scalar function fts2_tokenizer_internal_test().
** This function is used for testing only, it is not included in the
** build unless SQLITE_TEST is defined.
**
** The purpose of this is to test that the fts2_tokenizer() function
** can be used as designed by the C-code in the queryTokenizer and
** registerTokenizer() functions above. These two functions are repeated
** in the README.tokenizer file as an example, so it is important to
** test them.
**
** To run the tests, evaluate the fts2_tokenizer_internal_test() scalar
** function with no arguments. An assert() will fail if a problem is
** detected. i.e.:
**
**     SELECT fts2_tokenizer_internal_test();
**
*/
static void intTestFunc(
  sqlite3_context *context,
  int argc,
  sqlite3_value **argv
){
  int rc;
  const sqlite3_tokenizer_module *p1;
  const sqlite3_tokenizer_module *p2;
  sqlite3 *db = (sqlite3 *)sqlite3_user_data(context);

  /* Test the query function */
  sqlite3Fts2SimpleTokenizerModule(&p1);
  rc = queryTokenizer(db, "simple", &p2);
  assert( rc==SQLITE_OK );
  assert( p1==p2 );
  rc = queryTokenizer(db, "nosuchtokenizer", &p2);
  assert( rc==SQLITE_ERROR );
  assert( p2==0 );
  assert( 0==strcmp(sqlite3_errmsg(db), "unknown tokenizer: nosuchtokenizer") );

  /* Test the storage function */
  rc = registerTokenizer(db, "nosuchtokenizer", p1);
  assert( rc==SQLITE_OK );
  rc = queryTokenizer(db, "nosuchtokenizer", &p2);
  assert( rc==SQLITE_OK );
  assert( p2==p1 );

  sqlite3_result_text(context, "ok", -1, SQLITE_STATIC);
}

#endif

/*
** Set up SQL objects in database db used to access the contents of
** the hash table pointed to by argument pHash. The hash table must
** been initialised to use string keys, and to take a private copy 
** of the key when a value is inserted. i.e. by a call similar to:
209
210
211
212
213
214
215

216
217

218
219
220

221
222
223
224
225

226
227
228
229
230

231
232
233
234

235
236
237
int sqlite3Fts2InitHashTable(
  sqlite3 *db, 
  fts2Hash *pHash, 
  const char *zName
){
  int rc;
  void *p = (void *)pHash;

  const int any = SQLITE_ANY;
  char *zTest = 0;


#ifdef SQLITE_TEST
  zTest = sqlite3_mprintf("%s_test", zName);

  if( !zTest ){
    return SQLITE_NOMEM;
  }
#endif


  if( (rc = sqlite3_create_function(db, zName, 1, any, p, scalarFunc, 0, 0))
   || (rc = sqlite3_create_function(db, zName, 2, any, p, scalarFunc, 0, 0))
#ifdef SQLITE_TEST
   || (rc = sqlite3_create_function(db, zTest, 2, any, p, testFunc, 0, 0))
   || (rc = sqlite3_create_function(db, zTest, 3, any, p, testFunc, 0, 0))

#endif
  );

  sqlite3_free(zTest);

  return rc;
}








>


>



>
|
|



>
|




>




>



309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
int sqlite3Fts2InitHashTable(
  sqlite3 *db, 
  fts2Hash *pHash, 
  const char *zName
){
  int rc;
  void *p = (void *)pHash;
  void *pdb = (void *)db;
  const int any = SQLITE_ANY;
  char *zTest = 0;
  char *zTest2 = 0;

#ifdef SQLITE_TEST
  zTest = sqlite3_mprintf("%s_test", zName);
  zTest2 = sqlite3_mprintf("%s_internal_test", zName);
  if( !zTest || !zTest2 ){
    rc = SQLITE_NOMEM;
  }
#endif

  if( rc!=SQLITE_OK
   || (rc = sqlite3_create_function(db, zName, 1, any, p, scalarFunc, 0, 0))
   || (rc = sqlite3_create_function(db, zName, 2, any, p, scalarFunc, 0, 0))
#ifdef SQLITE_TEST
   || (rc = sqlite3_create_function(db, zTest, 2, any, p, testFunc, 0, 0))
   || (rc = sqlite3_create_function(db, zTest, 3, any, p, testFunc, 0, 0))
   || (rc = sqlite3_create_function(db, zTest2, 0, any, pdb, intTestFunc, 0, 0))
#endif
  );

  sqlite3_free(zTest);
  sqlite3_free(zTest2);
  return rc;
}

Changes to test/fts2token.test.
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
#    May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The focus 
# of this script is testing the pluggable tokeniser feature of the 
# FTS2 module.
#
# $Id: fts2token.test,v 1.2 2007/06/25 11:24:39 danielk1977 Exp $
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl

# If SQLITE_ENABLE_FTS2 is defined, omit this file.
ifcapable !fts2 {







|







8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
#    May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The focus 
# of this script is testing the pluggable tokeniser feature of the 
# FTS2 module.
#
# $Id: fts2token.test,v 1.3 2007/06/25 12:05:40 danielk1977 Exp $
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl

# If SQLITE_ENABLE_FTS2 is defined, omit this file.
ifcapable !fts2 {
162
163
164
165
166
167
168
169




170
  append output "2 then then "
  append output "3 [string tolower $longtoken] $longtoken"

  do_icu_test fts2token-4.6 MiddleOfTheOcean  $input $output
  do_icu_test fts2token-4.7 th_TH  $input $output
  do_icu_test fts2token-4.8 en_US  $input $output
}





finish_test








>
>
>
>

162
163
164
165
166
167
168
169
170
171
172
173
174
  append output "2 then then "
  append output "3 [string tolower $longtoken] $longtoken"

  do_icu_test fts2token-4.6 MiddleOfTheOcean  $input $output
  do_icu_test fts2token-4.7 th_TH  $input $output
  do_icu_test fts2token-4.8 en_US  $input $output
}

do_test fts2token-internal {
  execsql { SELECT fts2_tokenizer_internal_test() }
} {ok}

finish_test