/ Check-in [22491e7b]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix problems with combining content= and languageid= in a single fts4 table.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts4-languageid
Files: files | file ages | folders
SHA1: 22491e7bc38aee43819b888e04241cb6a6ef73a3
User & Date: dan 2012-03-02 19:53:02
Context
2012-03-03
18:46
Add the xLanguageid method to sqlite3_fts3_tokenizer versions 1 and greater. Closed-Leaf check-in: f8e9c445 user: dan tags: fts4-languageid
2012-03-02
19:53
Fix problems with combining content= and languageid= in a single fts4 table. check-in: 22491e7b user: dan tags: fts4-languageid
16:18
Add test for FTS 'rebuild' command. check-in: 181bc357 user: dan tags: fts4-languageid
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3.c.

796
797
798
799
800
801
802
803


804
805
806
807
808
809



810
811
812
813
814
815
816
....
1211
1212
1213
1214
1215
1216
1217
1218
1219












1220
1221
1222
1223
1224
1225
1226
....
3041
3042
3043
3044
3045
3046
3047


3048
3049
3050
3051
3052
3053
3054
    }else{
      zFree = zFunction = fts3QuoteId(zFunc);
    }
    fts3Appendf(pRc, &zRet, "docid");
    for(i=0; i<p->nColumn; i++){
      fts3Appendf(pRc, &zRet, ",%s(x.'c%d%q')", zFunction, i, p->azColumn[i]);
    }
    if( p->zLanguageid ) fts3Appendf(pRc, &zRet, ",langid");


    sqlite3_free(zFree);
  }else{
    fts3Appendf(pRc, &zRet, "rowid");
    for(i=0; i<p->nColumn; i++){
      fts3Appendf(pRc, &zRet, ", x.'%q'", p->azColumn[i]);
    }



  }
  fts3Appendf(pRc, &zRet, " FROM '%q'.'%q%s' AS x", 
      p->zDb,
      (p->zContentTbl ? p->zContentTbl : p->zName),
      (p->zContentTbl ? "" : "_content")
  );
  return zRet;
................................................................................
    sqlite3_free(zUncompress); 
    zCompress = 0;
    zUncompress = 0;
    if( nCol==0 ){
      sqlite3_free((void*)aCol); 
      aCol = 0;
      rc = fts3ContentColumns(db, argv[1], zContent, &aCol, &nCol, &nString);
    }
    assert( rc!=SQLITE_OK || nCol>0 );












  }
  if( rc!=SQLITE_OK ) goto fts3_init_out;

  if( nCol==0 ){
    assert( nString==0 );
    aCol[0] = "content";
    nString = 8;
................................................................................
    ** alias for "rowid", use the xRowid() method to obtain the value.
    */
    sqlite3_result_int64(pCtx, pCsr->iPrevId);
  }else if( iCol==p->nColumn ){
    /* The extra column whose name is the same as the table.
    ** Return a blob which is a pointer to the cursor.  */
    sqlite3_result_blob(pCtx, &pCsr, sizeof(pCsr), SQLITE_TRANSIENT);


  }else{
    /* The requested column is either a user column (one that contains 
    ** indexed data), or the language-id column.  */
    rc = fts3CursorSeek(0, pCsr);

    if( rc==SQLITE_OK ){
      if( iCol==p->nColumn+2 ){







|
>
>






>
>
>







 







|
|
>
>
>
>
>
>
>
>
>
>
>
>







 







>
>







796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
....
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
....
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
    }else{
      zFree = zFunction = fts3QuoteId(zFunc);
    }
    fts3Appendf(pRc, &zRet, "docid");
    for(i=0; i<p->nColumn; i++){
      fts3Appendf(pRc, &zRet, ",%s(x.'c%d%q')", zFunction, i, p->azColumn[i]);
    }
    if( p->zLanguageid ){
      fts3Appendf(pRc, &zRet, ", x.%Q", "langid");
    }
    sqlite3_free(zFree);
  }else{
    fts3Appendf(pRc, &zRet, "rowid");
    for(i=0; i<p->nColumn; i++){
      fts3Appendf(pRc, &zRet, ", x.'%q'", p->azColumn[i]);
    }
    if( p->zLanguageid ){
      fts3Appendf(pRc, &zRet, ", x.%Q", p->zLanguageid);
    }
  }
  fts3Appendf(pRc, &zRet, " FROM '%q'.'%q%s' AS x", 
      p->zDb,
      (p->zContentTbl ? p->zContentTbl : p->zName),
      (p->zContentTbl ? "" : "_content")
  );
  return zRet;
................................................................................
    sqlite3_free(zUncompress); 
    zCompress = 0;
    zUncompress = 0;
    if( nCol==0 ){
      sqlite3_free((void*)aCol); 
      aCol = 0;
      rc = fts3ContentColumns(db, argv[1], zContent, &aCol, &nCol, &nString);

      /* If a languageid= option was specified, remove the language id
      ** column from the aCol[] array. */ 
      if( rc==SQLITE_OK && zLanguageid ){
        int j;
        for(j=0; j<nCol; j++){
          if( sqlite3_stricmp(zLanguageid, aCol[j])==0 ){
            memmove(&aCol[j], &aCol[j+1], (nCol-j) * sizeof(aCol[0]));
            nCol--;
            break;
          }
        }
      }
    }
  }
  if( rc!=SQLITE_OK ) goto fts3_init_out;

  if( nCol==0 ){
    assert( nString==0 );
    aCol[0] = "content";
    nString = 8;
................................................................................
    ** alias for "rowid", use the xRowid() method to obtain the value.
    */
    sqlite3_result_int64(pCtx, pCsr->iPrevId);
  }else if( iCol==p->nColumn ){
    /* The extra column whose name is the same as the table.
    ** Return a blob which is a pointer to the cursor.  */
    sqlite3_result_blob(pCtx, &pCsr, sizeof(pCsr), SQLITE_TRANSIENT);
  }else if( iCol==p->nColumn+2 && pCsr->pExpr ){
    sqlite3_result_int64(pCtx, pCsr->iLangid);
  }else{
    /* The requested column is either a user column (one that contains 
    ** indexed data), or the language-id column.  */
    rc = fts3CursorSeek(0, pCsr);

    if( rc==SQLITE_OK ){
      if( iCol==p->nColumn+2 ){

Changes to test/fts4langid.test.

33
34
35
36
37
38
39




40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
...
147
148
149
150
151
152
153





154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
...
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290


















































291
292
293
#   2.1.* - Test that FTS queries only ever return rows associated with
#           the requested language.
#
#   2.2.* - Same as 2.1.*, after an 'optimize' command.
#
#   2.3.* - Same as 2.1.*, after a 'rebuild' command.
#




#   3.* - Test that if one is provided, the tokenizer xLanguage method
#         is called to configure the tokenizer before tokenizing query
#         or document text.
#
#   4.* - Test the fts4aux table when the associated FTS4 table contains
#         multiple languages.
#
#   5.* - Tests with content= tables. Both where there is a real 
#         underlying content table and where there is not.
#


do_execsql_test 1.1 {
  CREATE VIRTUAL TABLE t1 USING fts4(a, b, languageid=lang_id);
}

do_execsql_test 1.2 {
  SELECT sql FROM sqlite_master WHERE name = 't1_content';
................................................................................
    lappend y [lindex $ywords [expr ($i / 1000) % 10]]
    lappend y [lindex $ywords [expr ($i / 100)  % 10]]
    lappend y [lindex $ywords [expr ($i / 10)   % 10]]
    lappend y [lindex $ywords [expr ($i / 1)   % 10]]

    $db eval { INSERT INTO t2(docid, x, y, l) VALUES($i, $x, $y, $iLangid) }
  }





}

proc rowid_list_set_langid {langid} {
  set ::rowid_list_langid $langid
}
proc rowid_list {pattern} {
  set langid $::rowid_list_langid
  set res [list]
  db eval {SELECT docid, x, y FROM t2 WHERE l = $langid ORDER BY docid ASC} {
    if {[string match "*$pattern*" $x] || [string match "*$pattern*" $y]} {
      lappend res $docid
    }
  }
  return $res
}

proc or_merge_list {list1 list2} {
  set res [list]
................................................................................
}

do_test 2.0 { 
  reset_db
  build_multilingual_db_1 db
} {}

proc do_test_2 {tn query res_script} {
  for {set langid 0} {$langid < 10} {incr langid} {
    rowid_list_set_langid $langid
    set res [eval $res_script]

    set actual [
      execsql {SELECT docid FROM t2 WHERE t2 MATCH $query AND l = $langid}
    ]
    do_test 2.$tn.$langid [list set {} $actual] $res
  }
}

# Run some queries. 
do_test_2 1.1  {delta}          { rowid_list delta }
do_test_2 1.2  {"zero one two"} { rowid_list "zero one two" }
do_test_2 1.3  {zero one two} {
  and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
}
do_test_2 1.4  {"zero one" OR "one two"} {
  or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
}

# Now try the same tests as above, but after running the 'optimize'
# command on the FTS table.
#
do_execsql_test 2.2 {
  INSERT INTO t2(t2) VALUES('optimize');
  SELECT count(*) FROM t2_segdir;
} {9}
do_test_2 2.1 {delta}          { rowid_list delta }
do_test_2 2.2 {"zero one two"} { rowid_list "zero one two" }
do_test_2 2.3 {zero one two} {
  and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
}
do_test_2 2.4 {"zero one" OR "one two"} {
  or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
}

# And rebuild.
#
do_test 2.3 { 
  reset_db
  build_multilingual_db_1 db
  execsql { INSERT INTO t2(t2) VALUES('rebuild') }
} {}
do_test_2 3.1 {delta}          { rowid_list delta }
do_test_2 3.2 {"zero one two"} { rowid_list "zero one two" }
do_test_2 3.3 {zero one two} {
  and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
}
do_test_2 3.4 {"zero one" OR "one two"} {
  or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
}



















































finish_test








>
>
>
>
|



|


<
<
<
<







 







>
>
>
>
>








|

|







 







|







|




|
|
|


|










|
|
|


|










|
|
|


|


>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50




51
52
53
54
55
56
57
...
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
...
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
#   2.1.* - Test that FTS queries only ever return rows associated with
#           the requested language.
#
#   2.2.* - Same as 2.1.*, after an 'optimize' command.
#
#   2.3.* - Same as 2.1.*, after a 'rebuild' command.
#
#   3.* - Tests with content= tables. Both where there is a real 
#         underlying content table and where there is not.
#
#
#   4.* - Test that if one is provided, the tokenizer xLanguage method
#         is called to configure the tokenizer before tokenizing query
#         or document text.
#
#   5.* - Test the fts4aux table when the associated FTS4 table contains
#         multiple languages.
#





do_execsql_test 1.1 {
  CREATE VIRTUAL TABLE t1 USING fts4(a, b, languageid=lang_id);
}

do_execsql_test 1.2 {
  SELECT sql FROM sqlite_master WHERE name = 't1_content';
................................................................................
    lappend y [lindex $ywords [expr ($i / 1000) % 10]]
    lappend y [lindex $ywords [expr ($i / 100)  % 10]]
    lappend y [lindex $ywords [expr ($i / 10)   % 10]]
    lappend y [lindex $ywords [expr ($i / 1)   % 10]]

    $db eval { INSERT INTO t2(docid, x, y, l) VALUES($i, $x, $y, $iLangid) }
  }

  $db eval {
    CREATE TABLE data(x, y, l);
    INSERT INTO data(rowid, x, y, l) SELECT docid, x, y, l FROM t2;
  }
}

proc rowid_list_set_langid {langid} {
  set ::rowid_list_langid $langid
}
proc rowid_list {pattern} {
  set langid $::rowid_list_langid
  set res [list]
  db eval {SELECT rowid, x, y FROM data WHERE l = $langid ORDER BY rowid ASC} {
    if {[string match "*$pattern*" $x] || [string match "*$pattern*" $y]} {
      lappend res $rowid
    }
  }
  return $res
}

proc or_merge_list {list1 list2} {
  set res [list]
................................................................................
}

do_test 2.0 { 
  reset_db
  build_multilingual_db_1 db
} {}

proc do_test_query1 {tn query res_script} {
  for {set langid 0} {$langid < 10} {incr langid} {
    rowid_list_set_langid $langid
    set res [eval $res_script]

    set actual [
      execsql {SELECT docid FROM t2 WHERE t2 MATCH $query AND l = $langid}
    ]
    do_test $tn.$langid [list set {} $actual] $res
  }
}

# Run some queries. 
do_test_query1 2.1.1  {delta}          { rowid_list delta }
do_test_query1 2.1.2  {"zero one two"} { rowid_list "zero one two" }
do_test_query1 2.1.3  {zero one two} {
  and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
}
do_test_query1 2.1.4  {"zero one" OR "one two"} {
  or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
}

# Now try the same tests as above, but after running the 'optimize'
# command on the FTS table.
#
do_execsql_test 2.2 {
  INSERT INTO t2(t2) VALUES('optimize');
  SELECT count(*) FROM t2_segdir;
} {9}
do_test_query1 2.2.1 {delta}          { rowid_list delta }
do_test_query1 2.2.2 {"zero one two"} { rowid_list "zero one two" }
do_test_query1 2.2.3 {zero one two} {
  and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
}
do_test_query1 2.2.4 {"zero one" OR "one two"} {
  or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
}

# And rebuild.
#
do_test 2.3 { 
  reset_db
  build_multilingual_db_1 db
  execsql { INSERT INTO t2(t2) VALUES('rebuild') }
} {}
do_test_query1 2.3.1 {delta}          { rowid_list delta }
do_test_query1 2.3.2 {"zero one two"} { rowid_list "zero one two" }
do_test_query1 2.3.3 {zero one two} {
  and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
}
do_test_query1 2.3.4 {"zero one" OR "one two"} {
  or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
}

#-------------------------------------------------------------------------
# Test cases 3.*
#
do_test 3.0 {
  reset_db
  build_multilingual_db_1 db
  execsql {
    CREATE TABLE t3_data(l, x, y);
    INSERT INTO t3_data(rowid, l, x, y) SELECT docid, l, x, y FROM t2;
    DROP TABLE t2;
  }
} {}
do_execsql_test 3.1 {
  CREATE VIRTUAL TABLE t2 USING fts4(content=t3_data, languageid=l);
  INSERT INTO t2(t2) VALUES('rebuild');
}

do_test_query1 3.1.1 {delta}          { rowid_list delta }
do_test_query1 3.1.2 {"zero one two"} { rowid_list "zero one two" }
do_test_query1 3.1.3 {zero one two} {
  and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
}
do_test_query1 3.1.4 {"zero one" OR "one two"} {
  or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
}

do_execsql_test 3.2.1 {
  DROP TABLE t2;
  CREATE VIRTUAL TABLE t2 USING fts4(x, y, languageid=l, content=nosuchtable);
}

do_execsql_test 3.2.2 {
  INSERT INTO t2(docid, x, y, l) SELECT rowid, x, y, l FROM t3_data;
}

do_execsql_test 3.2.3 {
  DROP TABLE t3_data;
}

do_test_query1 3.3.1 {delta}          { rowid_list delta }
do_test_query1 3.3.2 {"zero one two"} { rowid_list "zero one two" }
do_test_query1 3.3.3 {zero one two} {
  and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
}
do_test_query1 3.3.4 {"zero one" OR "one two"} {
  or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
}



finish_test