Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Begin adding fts5 tests involving synonyms and detail=none/col tables. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts5-offsets |
Files: | files | file ages | folders |
SHA1: |
b3e6f15ec2d9a834e2c80b91ffd70975 |
User & Date: | dan 2016-01-08 17:21:18.901 |
Context
2016-01-11
| ||
17:30 | Fix bugs in fts5 synonym processing for detail=col and other modes. (check-in: 0e3c545423 user: dan tags: fts5-offsets) | |
2016-01-08
| ||
17:21 | Begin adding fts5 tests involving synonyms and detail=none/col tables. (check-in: b3e6f15ec2 user: dan tags: fts5-offsets) | |
07:53 | Fix fts5vocab.test so that it works with detail=none tables. (check-in: d9135cc723 user: dan tags: fts5-offsets) | |
Changes
Changes to ext/fts5/test/fts5_common.tcl.
︙ | ︙ | |||
198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 | # <phrase number> . <column number> . <token offset> # # Options: # # -near N (NEAR distance. Default 10) # -col C (List of column indexes to match against) # -pc VARNAME (variable in caller frame to use for phrase numbering) # proc nearset {aCol args} { set O(-near) 10 set O(-col) {} set O(-pc) "" set nOpt [lsearch -exact $args --] if {$nOpt<0} { error "no -- option" } foreach {k v} [lrange $args 0 [expr $nOpt-1]] { if {[info exists O($k)]==0} { error "unrecognized option $k" } set O($k) $v } if {$O(-pc) == ""} { set counter 0 } else { upvar $O(-pc) counter } | > > > > > > > > > | < < > | | < > > > > | > > > > > > > > > > > > > > > > > > | | < < | > | | | | < | < < | < | < < | < < | | 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 | # <phrase number> . <column number> . <token offset> # # Options: # # -near N (NEAR distance. Default 10) # -col C (List of column indexes to match against) # -pc VARNAME (variable in caller frame to use for phrase numbering) # -dict VARNAME (array in caller frame to use for synonyms) # proc nearset {aCol args} { # Process the command line options. # set O(-near) 10 set O(-col) {} set O(-pc) "" set O(-dict) "" set nOpt [lsearch -exact $args --] if {$nOpt<0} { error "no -- option" } # Set $lPhrase to be a list of phrases. $nPhrase its length. set lPhrase [lrange $args [expr $nOpt+1] end] set nPhrase [llength $lPhrase] foreach {k v} [lrange $args 0 [expr $nOpt-1]] { if {[info exists O($k)]==0} { error "unrecognized option $k" } set O($k) $v } if {$O(-pc) == ""} { set counter 0 } else { upvar $O(-pc) counter } if {$O(-dict)!=""} { upvar $O(-dict) aDict } for {set j 0} {$j < [llength $aCol]} {incr j} { for {set i 0} {$i < $nPhrase} {incr i} { set A($j,$i) [list] } } # Loop through each column of the current row. for {set iCol 0} {$iCol < [llength $aCol]} {incr iCol} { # If there is a column filter, test whether this column is excluded. If # so, skip to the next iteration of this loop. Otherwise, set zCol to the # column value and nToken to the number of tokens that comprise it. if {$O(-col)!="" && [lsearch $O(-col) $iCol]<0} continue set zCol [lindex $aCol $iCol] set nToken [llength $zCol] # Each iteration of the following loop searches a substring of the # column value for phrase matches. The last token of the substring # is token $iLast of the column value. The first token is: # # iFirst = ($iLast - $O(-near) - 1) # # where $sz is the length of the phrase being searched for. A phrase # counts as matching the substring if its first token lies on or before # $iLast and its last token on or after $iFirst. # # For example, if the query is "NEAR(a+b c, 2)" and the column value: # # "x x x x A B x x C x" # 0 1 2 3 4 5 6 7 8 9" # # when (iLast==8 && iFirst=5) the range will contain both phrases and # so both instances can be added to the output poslists. # set iLast [expr $O(-near) >= $nToken ? $nToken - 1 : $O(-near)] for { } {$iLast < $nToken} {incr iLast} { catch { array unset B } for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { set p [lindex $lPhrase $iPhrase] set nPm1 [expr {[llength $p] - 1}] set iFirst [expr $iLast - $O(-near) - [llength $p]] for {set i $iFirst} {$i <= $iLast} {incr i} { set lCand [lrange $zCol $i [expr $i+$nPm1]] set bMatch 1 foreach tok $p term $lCand { if {[nearset_match aDict $tok $term]==0} { set bMatch 0 ; break } } if {$bMatch} { lappend B($iPhrase) $i } } if {![info exists B($iPhrase)]} break } if {$iPhrase==$nPhrase} { for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { set A($iCol,$iPhrase) [concat $A($iCol,$iPhrase) $B($iPhrase)] set A($iCol,$iPhrase) [lsort -integer -uniq $A($iCol,$iPhrase)] } |
︙ | ︙ | |||
289 290 291 292 293 294 295 296 297 298 299 300 301 302 | } incr counter } #puts "$aCol -> $res" sort_poslist $res } #------------------------------------------------------------------------- # Usage: # # sort_poslist LIST # # Sort a position list of the type returned by command [nearset] | > > > > > > > > > > > > | 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 | } incr counter } #puts "$aCol -> $res" sort_poslist $res } proc nearset_match {aDictVar tok term} { if {[string match $tok $term]} { return 1 } upvar $aDictVar aDict if {[info exists aDict($tok)]} { foreach s $aDict($tok) { if {[string match $s $term]} { return 1 } } } return 0; } #------------------------------------------------------------------------- # Usage: # # sort_poslist LIST # # Sort a position list of the type returned by command [nearset] |
︙ | ︙ | |||
401 402 403 404 405 406 407 | lappend res $cand } set res [lsort -command fts5_collist_elem_compare -unique $res] return $res } # Comparison function used by fts5_poslist2collist to sort collist entries. | < | > > > > > > | | > > > > > > > > | < < < < | < < > > > | < < < < < < < > > > | | 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 | lappend res $cand } set res [lsort -command fts5_collist_elem_compare -unique $res] return $res } # Comparison function used by fts5_poslist2collist to sort collist entries. proc fts5_collist_elem_compare {a b} { foreach {a1 a2} [split $a .] {} foreach {b1 b2} [split $b .] {} if {$a1==$b1} { return [expr $a2 - $b2] } return [expr $a1 - $b1] } #-------------------------------------------------------------------------- # Construct and return a tcl list equivalent to that returned by the SQL # query executed against database handle [db]: # # SELECT # rowid, # fts5_test_poslist($tbl), # fts5_test_collist($tbl) # FROM $tbl('$expr') # ORDER BY rowid $order; # proc fts5_query_data {expr tbl {order ASC} {aDictVar ""}} { # Figure out the set of columns in the FTS5 table. This routine does # not handle tables with UNINDEXED columns, but if it did, it would # have to be here. db eval "PRAGMA table_info = $tbl" x { lappend lCols $x(name) } set d "" if {$aDictVar != ""} { upvar $aDictVar aDict set d aDict } set cols "" foreach e $lCols { append cols ", '$e'" } set tclexpr [db one [subst -novar { SELECT fts5_expr_tcl( $expr, 'nearset $cols -dict $d -pc ::pc' [set cols] ) }]] set res [list] db eval "SELECT rowid, * FROM $tbl ORDER BY rowid $order" x { set cols [list] foreach col $lCols { lappend cols $x($col) } set ::pc 0 set rowdata [eval $tclexpr] if {$rowdata != ""} { lappend res $x(rowid) $rowdata [fts5_poslist2collist $rowdata] } } set res } #------------------------------------------------------------------------- # Similar to [fts5_query_data], but omit the collist field. # proc fts5_poslist_data {expr tbl {order ASC} {aDictVar ""}} { set res [list] if {$aDictVar!=""} { upvar $aDictVar aDict set dict aDict } else { set dict "" } foreach {rowid poslist collist} [fts5_query_data $expr $tbl $order $dict] { lappend res $rowid $poslist } set res } #------------------------------------------------------------------------- # # This command will only work inside a [foreach_detail_mode] block. It tests # whether or not expression $expr run on FTS5 table $tbl is supported by # the current mode. If so, 1 is returned. If not, 0. # # detail=full (all queries supported) # detail=col (all but phrase queries and NEAR queries) # detail=none (all but phrase queries, NEAR queries, and column filters) # proc fts5_expr_ok {expr tbl} { if {![detail_is_full]} { set nearset "nearset_rc" if {[detail_is_col]} { set nearset "nearset_rf" } set ::expr_not_ok 0 |
︙ | ︙ | |||
500 501 502 503 504 505 506 507 | }]] eval $tclexpr if {$::expr_not_ok} { return 0 } } return 1 } | > > > > > > > > > > > > > > > > > > | 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 | }]] eval $tclexpr if {$::expr_not_ok} { return 0 } } return 1 } # Helper for [fts5_expr_ok] proc nearset_rf {aCol args} { set idx [lsearch -exact $args --] if {$idx != [llength $args]-2 || [llength [lindex $args end]]!=1} { set ::expr_not_ok 1 } list } # Helper for [fts5_expr_ok] proc nearset_rc {aCol args} { nearset_rf $aCol {*}$args if {[lsearch $args -col]>=0} { set ::expr_not_ok 1 } list } |
Added ext/fts5/test/fts5synonym2.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 | # 2014 Dec 20 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # Tests focusing on custom tokenizers that support synonyms. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5synonym # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } #------------------------------------------------------------------------- # Code for a simple Tcl tokenizer that supports synonyms at query time. # foreach SYNDICT { {zero 0} {one 1 i} {two 2 ii} {three 3 iii} {four 4 iv} {five 5 v} {six 6 vi} {seven 7 vii} {eight 8 viii} {nine 9 ix} } { foreach s $SYNDICT { set o [list] foreach x $SYNDICT {if {$x!=$s} {lappend o $x}} set ::syn($s) $o } } proc tcl_tokenize {tflags text} { foreach {w iStart iEnd} [fts5_tokenize_split $text] { sqlite3_fts5_token $w $iStart $iEnd if {$tflags == "query"} { foreach s $::SYNDICT($w) { sqlite3_fts5_token -colo $s $iStart $iEnd } } } } proc tcl_create {args} { return "tcl_tokenize" } # # End of tokenizer code. #------------------------------------------------------------------------- foreach_detail_mode $testprefix { sqlite3_fts5_create_tokenizer db tcl tcl_create fts5_aux_test_functions db do_execsql_test 1.0 { CREATE VIRTUAL TABLE ss USING fts5(a, b, tokenize=tcl, detail=%DETAIL%); INSERT INTO ss VALUES('5 5 five seven 3 seven i', '2 1 5 0 two 1 i'); INSERT INTO ss VALUES('six ix iii 7 i vii iii', 'one seven nine 4 9 1 vi'); INSERT INTO ss VALUES('6 viii i five six zero seven', '5 v iii iv iv 3'); INSERT INTO ss VALUES('9 ii six 8 1 6', 'six 4 iv iv 7'); INSERT INTO ss VALUES('1 5 4 eight ii iv iii', 'nine 2 eight ix v vii'); INSERT INTO ss VALUES('one 7 seven six 2 two', '1 2 four 7 4 3 4'); INSERT INTO ss VALUES('eight iv 4 nine vii six 1', '5 6 v one zero 4'); INSERT INTO ss VALUES('v 9 8 iii 4', '9 4 seven two vi vii'); INSERT INTO ss VALUES('3 ix two 9 0 nine i', 'five ii nine two viii i five'); INSERT INTO ss VALUES('six iii 9 two eight 2', 'nine i nine vii nine'); INSERT INTO ss VALUES('6 three zero seven vii five', '8 vii ix 0 7 seven'); INSERT INTO ss VALUES('8 vii 8 7 3 4', 'eight iii four viii nine iv three'); INSERT INTO ss VALUES('4 v 7 two 0 one 8', 'vii 1 two five i zero 9'); INSERT INTO ss VALUES('3 ii vii vi eight', '8 4 ix one three eight'); INSERT INTO ss VALUES('iv eight seven 6 9 seven', 'one vi two five seven'); INSERT INTO ss VALUES('i i 5 i v vii eight', '2 seven i 2 2 four'); INSERT INTO ss VALUES('0 i iii nine 3 ix five', '0 eight iv 0 six 2'); INSERT INTO ss VALUES('iv vii three 3 9 one 8', '2 ii 6 eight ii six six'); INSERT INTO ss VALUES('eight one two nine six', '8 9 3 viii vi'); INSERT INTO ss VALUES('one 0 four ii eight one 3', 'iii eight vi vi vi'); INSERT INTO ss VALUES('4 0 eight 0 0', '1 four one vii seven ii'); INSERT INTO ss VALUES('1 zero nine 2 2', 'viii iv two vi nine v iii'); INSERT INTO ss VALUES('5 five viii four four vi', '8 five 7 vii 6 4'); INSERT INTO ss VALUES('7 ix four 8 vii', 'nine three nine ii ix vii'); INSERT INTO ss VALUES('nine iv v i 0 v', 'two iv vii six i ix 4'); INSERT INTO ss VALUES('one v v one viii 3 8', '2 1 3 five iii'); INSERT INTO ss VALUES('six ii 5 nine 4 viii seven', 'eight i ix ix 7 four'); INSERT INTO ss VALUES('9 ii two seven three 7 0', 'six viii seven 7 five'); INSERT INTO ss VALUES('five two 4 viii nine', '9 7 nine zero 1 two one'); INSERT INTO ss VALUES('viii 8 iii i ii 8 3', '4 2 7 v 8 8'); INSERT INTO ss VALUES('four vii 4 iii zero 0 vii', '3 viii iii zero 9 i'); INSERT INTO ss VALUES('0 seven v five i five v', 'one 4 2 ix 9'); INSERT INTO ss VALUES('two 5 two two ix 4 1', '3 nine ii v nine 3 five'); INSERT INTO ss VALUES('five 5 7 4 6 vii', 'three 2 ix 2 8 6'); INSERT INTO ss VALUES('six iii vi iv seven eight', '8 six 7 0 4'); INSERT INTO ss VALUES('vi vi iv 3 0 one one', '9 6 eight ix iv'); INSERT INTO ss VALUES('7 2 2 iii 0', '0 0 seven 1 nine'); INSERT INTO ss VALUES('8 6 iv six ii', 'iv 6 3 4 ii five'); INSERT INTO ss VALUES('0 two two seven ii', 'vii ix four 4 zero vi vi'); INSERT INTO ss VALUES('2 one eight 8 9 7', 'vi 3 0 3 vii'); INSERT INTO ss VALUES('iii ii ix iv three', 'vi i 6 1 two'); INSERT INTO ss VALUES('eight four nine 8 seven', 'one three i nine iii one'); INSERT INTO ss VALUES('iii seven five ix 8', 'ii 7 seven 0 four ii'); INSERT INTO ss VALUES('four 0 1 5 two', 'iii 9 5 ii ii 2 4'); INSERT INTO ss VALUES('iii nine four vi 8 five six', 'i i ii seven vi vii'); INSERT INTO ss VALUES('eight vii eight six 3', 'i vii 1 six 9 vii'); INSERT INTO ss VALUES('9 0 viii viii five', 'i 1 viii ix 3 4'); INSERT INTO ss VALUES('three nine 5 nine viii four zero', 'ii i 1 5 2 viii'); INSERT INTO ss VALUES('5 vii three 9 four', 'three five one 7 2 eight one'); } foreach {tn expr} { 1 "eight" } { if {[fts5_expr_ok $expr ss]==0} { do_test 1.$tn.OMITTED { list } [list] continue } set res [fts5_query_data $expr ss ASC ::SYNDICT] do_execsql_test 1.$tn.[llength $res].asc { SELECT rowid, fts5_test_poslist(ss), fts5_test_collist(ss) FROM ss($expr) } $res } } finish_test |