Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Enable prefix-search in query-parsing and snippet generation. If the character immediately after the end of a term is '*', that term is marked for prefix matching. Modify term comparison in snippetOffsetsOfColumn() to respect isPrefix. fts2n.test runs prefix searching through some obvious test cases. (CVS 3893) |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
7c4c65924035d9f260f6b64eb92c5c6c |
User & Date: | shess 2007-05-01 18:25:53.000 |
Context
2007-05-02
| ||
01:34 | Begin adding the zeroblob API to support incremental blob i/o. (CVS 3894) (check-in: 7a01836dde user: drh tags: trunk) | |
2007-05-01
| ||
18:25 | Enable prefix-search in query-parsing and snippet generation. If the character immediately after the end of a term is '*', that term is marked for prefix matching. Modify term comparison in snippetOffsetsOfColumn() to respect isPrefix. fts2n.test runs prefix searching through some obvious test cases. (CVS 3893) (check-in: 7c4c659240 user: shess tags: trunk) | |
17:49 | First approximation of incremental blob IO API. (CVS 3892) (check-in: c444836e7b user: danielk1977 tags: trunk) | |
Changes
Changes to ext/fts2/fts2.c.
︙ | ︙ | |||
3052 3053 3054 3055 3056 3057 3058 | iRotorBegin[iRotor&FTS2_ROTOR_MASK] = iBegin; iRotorLen[iRotor&FTS2_ROTOR_MASK] = iEnd-iBegin; match = 0; for(i=0; i<nTerm; i++){ int iCol; iCol = aTerm[i].iColumn; if( iCol>=0 && iCol<nColumn && iCol!=iColumn ) continue; | | > > | | 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 | iRotorBegin[iRotor&FTS2_ROTOR_MASK] = iBegin; iRotorLen[iRotor&FTS2_ROTOR_MASK] = iEnd-iBegin; match = 0; for(i=0; i<nTerm; i++){ int iCol; iCol = aTerm[i].iColumn; if( iCol>=0 && iCol<nColumn && iCol!=iColumn ) continue; if( aTerm[i].nTerm>nToken ) continue; if( !aTerm[i].isPrefix && aTerm[i].nTerm<nToken ) continue; assert( aTerm[i].nTerm<=nToken ); if( memcmp(aTerm[i].pTerm, zToken, aTerm[i].nTerm) ) continue; if( aTerm[i].iPhrase>1 && (prevMatch & (1<<i))==0 ) continue; match |= 1<<i; if( i==nTerm-1 || aTerm[i+1].iPhrase==1 ){ for(j=aTerm[i].iPhrase-1; j>=0; j--){ int k = (iRotor-j) & FTS2_ROTOR_MASK; snippetAppendMatch(pSnippet, iColumn, i-j, iRotorBegin[k], iRotorLen[k]); |
︙ | ︙ | |||
3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 | pQuery->nextIsOr = 1; continue; } queryAdd(pQuery, pToken, nToken); if( !inPhrase && iBegin>0 && pSegment[iBegin-1]=='-' ){ pQuery->pTerms[pQuery->nTerms-1].isNot = 1; } pQuery->pTerms[pQuery->nTerms-1].iPhrase = nTerm; if( inPhrase ){ nTerm++; } } if( inPhrase && pQuery->nTerms>firstIndex ){ | > > > | 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 | pQuery->nextIsOr = 1; continue; } queryAdd(pQuery, pToken, nToken); if( !inPhrase && iBegin>0 && pSegment[iBegin-1]=='-' ){ pQuery->pTerms[pQuery->nTerms-1].isNot = 1; } if( iEnd<nSegment && pSegment[iEnd]=='*' ){ pQuery->pTerms[pQuery->nTerms-1].isPrefix = 1; } pQuery->pTerms[pQuery->nTerms-1].iPhrase = nTerm; if( inPhrase ){ nTerm++; } } if( inPhrase && pQuery->nTerms>firstIndex ){ |
︙ | ︙ |
Added test/fts2n.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 | # 2007 April 26 # # The author disclaims copyright to this source code. # #************************************************************************* # This file implements tests for prefix-searching in the fts2 # component of the SQLite library. # # $Id: fts2n.test,v 1.1 2007/05/01 18:25:53 shess Exp $ # set testdir [file dirname $argv0] source $testdir/tester.tcl # If SQLITE_ENABLE_FTS2 is defined, omit this file. ifcapable !fts2 { finish_test return } # A large string to prime the pump with. set text { Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Maecenas iaculis mollis ipsum. Praesent rhoncus placerat justo. Duis non quam sed turpis posuere placerat. Curabitur et lorem in lorem porttitor aliquet. Pellentesque bibendum tincidunt diam. Vestibulum blandit ante nec elit. In sapien diam, facilisis eget, dictum sed, viverra at, felis. Vestibulum magna. Sed magna dolor, vestibulum rhoncus, ornare vel, vulputate sit amet, felis. Integer malesuada, tellus at luctus gravida, diam nunc porta nibh, nec imperdiet massa metus eu lectus. Aliquam nisi. Nunc fringilla nulla at lectus. Suspendisse potenti. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Pellentesque odio nulla, feugiat eu, suscipit nec, consequat quis, risus. } db eval { CREATE VIRTUAL TABLE t1 USING fts2(c); INSERT INTO t1(rowid, c) VALUES(1, $text); INSERT INTO t1(rowid, c) VALUES(2, 'Another lovely row'); } # Exact match do_test fts2n-1.1 { execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lorem'" } {1} # And a prefix do_test fts2n-1.2 { execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lore*'" } {1} # Prefix includes exact match do_test fts2n-1.3 { execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lorem*'" } {1} # Make certain everything isn't considered a prefix! do_test fts2n-1.4 { execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lore'" } {} # Prefix across multiple rows. do_test fts2n-1.5 { execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lo*'" } {1 2} # Likewise, with multiple hits in one document. do_test fts2n-1.6 { execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'l*'" } {1 2} # Prefix which should only hit one document. do_test fts2n-1.7 { execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lov*'" } {2} # * not at end is dropped. do_test fts2n-1.8 { execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lo *'" } {} # Stand-alone * is dropped. do_test fts2n-1.9 { execsql "SELECT rowid FROM t1 WHERE t1 MATCH '*'" } {} # Phrase-query prefix. do_test fts2n-1.10 { execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"lovely r*\"'" } {2} do_test fts2n-1.11 { execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"lovely r\"'" } {} # Phrase query with multiple prefix matches. do_test fts2n-1.12 { execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"a* l*\"'" } {1 2} # Phrase query with multiple prefix matches. do_test fts2n-1.13 { execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"a* l* row\"'" } {2} # Test across updates (and, by implication, deletes). # Version of text without "lorem". regsub -all {[Ll]orem} $text '' ntext db eval { CREATE VIRTUAL TABLE t2 USING fts2(c); INSERT INTO t2(rowid, c) VALUES(1, $text); INSERT INTO t2(rowid, c) VALUES(2, 'Another lovely row'); UPDATE t2 SET c = $ntext WHERE rowid = 1; } # Can't see lorem as an exact match. do_test fts2n-2.1 { execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lorem'" } {} # Can't see a prefix of lorem, either. do_test fts2n-2.2 { execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lore*'" } {} # Can see lovely in the other document. do_test fts2n-2.3 { execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lo*'" } {2} # Can still see other hits. do_test fts2n-2.4 { execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'l*'" } {1 2} # Prefix which should only hit one document. do_test fts2n-2.5 { execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lov*'" } {2} # Test with a segment which will have multiple levels in the tree. # Build a big document with lots of unique terms. set bigtext $text foreach c {a b c d e} { regsub -all {[A-Za-z]+} $bigtext "&$c" t append bigtext $t } # Populate a table with many copies of the big document, so that we # can test the number of hits found. Populate $ret with the expected # hit counts for each row. offsets() returns 4 elements for every # hit. We'll have 6 hits for row 1, 1 for row 2, and 6*(2^5)==192 for # $bigtext. set ret {6 1} db eval { BEGIN; CREATE VIRTUAL TABLE t3 USING fts2(c); INSERT INTO t3(rowid, c) VALUES(1, $text); INSERT INTO t3(rowid, c) VALUES(2, 'Another lovely row'); } for {set i 0} {$i<100} {incr i} { db eval {INSERT INTO t3(rowid, c) VALUES(3+$i, $bigtext)} lappend ret 192 } db eval {COMMIT;} # Test that we get the expected number of hits. do_test fts2n-3.1 { set t {} db eval {SELECT offsets(t3) as o FROM t3 WHERE t3 MATCH 'l*'} { set l [llength $o] lappend t [expr {$l/4}] } set t } $ret # TODO(shess) It would be useful to test a couple edge cases, but I # don't know if we have the precision to manage it from here at this # time. Prefix hits can cross leaves, which the code above _should_ # hit by virtue of size. There are two variations on this. If the # tree is 2 levels high, the code will find the leaf-node extent # directly, but if it's higher, the code will have to follow two # separate interior branches down the tree. Both should be tested. finish_test |