Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Add tests for the fts4 unicode61 tokenchars and separators options. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
9ce6f40dfb54b35cecba3cc9c1ec0d11 |
User & Date: | dan 2013-09-13 12:10:09.872 |
Context
2013-09-13
| ||
16:36 | Change the PRAGMA parser to use a binary search for the pragma name. Also: Minor performance enhancement to sqlite3DbFree() and to the token dequoter. (check-in: 870c030b4e user: drh tags: trunk) | |
12:10 | Add tests for the fts4 unicode61 tokenchars and separators options. (check-in: 9ce6f40dfb user: dan tags: trunk) | |
2013-09-12
| ||
23:12 | Fix typo in a macro name: "GlogUpperToLower" should be "GlobUpperToLower" (check-in: 73634ca463 user: drh tags: trunk) | |
Changes
Changes to test/fts4unicode.test.
︙ | ︙ | |||
433 434 435 436 437 438 439 440 441 | } do_execsql_test 8.2.2 { SELECT rowid FROM t4 WHERE t4 MATCH 'o'; } {1 3} do_execsql_test 8.2.3 { SELECT rowid FROM t4 WHERE t4 MATCH 'a'; } {2 4} finish_test | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 | } do_execsql_test 8.2.2 { SELECT rowid FROM t4 WHERE t4 MATCH 'o'; } {1 3} do_execsql_test 8.2.3 { SELECT rowid FROM t4 WHERE t4 MATCH 'a'; } {2 4} #------------------------------------------------------------------------- # foreach {tn sql} { 1 { CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 [tokenchars= .]); CREATE VIRTUAL TABLE t6 USING fts4( tokenize=unicode61 [tokenchars=="] "tokenchars=[]"); CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 [separators=x\xC4]); } 2 { CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 "tokenchars= ."); CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 "tokenchars=[=""]"); CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 "separators=x\xC4"); } 3 { CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 'tokenchars= .'); CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 'tokenchars=="[]'); CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 'separators=x\xC4'); } 4 { CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 `tokenchars= .`); CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 `tokenchars=[="]`); CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 `separators=x\xC4`); } } { do_execsql_test 9.$tn.0 { DROP TABLE IF EXISTS t5; DROP TABLE IF EXISTS t5aux; DROP TABLE IF EXISTS t6; DROP TABLE IF EXISTS t6aux; DROP TABLE IF EXISTS t7; DROP TABLE IF EXISTS t7aux; } do_execsql_test 9.$tn.1 $sql do_execsql_test 9.$tn.2 { CREATE VIRTUAL TABLE t5aux USING fts4aux(t5); INSERT INTO t5 VALUES('one two three/four.five.six'); SELECT * FROM t5aux; } { four.five.six * 1 1 four.five.six 0 1 1 {one two three} * 1 1 {one two three} 0 1 1 } do_execsql_test 9.$tn.3 { CREATE VIRTUAL TABLE t6aux USING fts4aux(t6); INSERT INTO t6 VALUES('alpha=beta"gamma/delta[epsilon]zeta'); SELECT * FROM t6aux; } { {alpha=beta"gamma} * 1 1 {alpha=beta"gamma} 0 1 1 {delta[epsilon]zeta} * 1 1 {delta[epsilon]zeta} 0 1 1 } do_execsql_test 9.$tn.4 { CREATE VIRTUAL TABLE t7aux USING fts4aux(t7); INSERT INTO t7 VALUES('alephxbeth\xC4gimel'); SELECT * FROM t7aux; } { aleph * 1 1 aleph 0 1 1 beth * 1 1 beth 0 1 1 gimel * 1 1 gimel 0 1 1 } } # Check that multiple options are handled correctly. # do_execsql_test 10.1 { DROP TABLE IF EXISTS t1; CREATE VIRTUAL TABLE t1 USING fts4(tokenize=unicode61 "tokenchars=xyz" "tokenchars=.=" "separators=.=" "separators=xy" "separators=a" "separators=a" "tokenchars=a" "tokenchars=a" ); INSERT INTO t1 VALUES('oneatwoxthreeyfour'); INSERT INTO t1 VALUES('a.single=word'); CREATE VIRTUAL TABLE t1aux USING fts4aux(t1); SELECT * FROM t1aux; } { .single=word * 1 1 .single=word 0 1 1 four * 1 1 four 0 1 1 one * 1 1 one 0 1 1 three * 1 1 three 0 1 1 two * 1 1 two 0 1 1 } # Test that case folding happens after tokenization, not before. # do_execsql_test 10.2 { DROP TABLE IF EXISTS t2; CREATE VIRTUAL TABLE t2 USING fts4(tokenize=unicode61 "separators=aB"); INSERT INTO t2 VALUES('oneatwoBthree'); INSERT INTO t2 VALUES('onebtwoAthree'); CREATE VIRTUAL TABLE t2aux USING fts4aux(t2); SELECT * FROM t2aux; } { one * 1 1 one 0 1 1 onebtwoathree * 1 1 onebtwoathree 0 1 1 three * 1 1 three 0 1 1 two * 1 1 two 0 1 1 } finish_test |