Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Fix some documentation issues in fts5. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts5 |
Files: | files | file ages | folders |
SHA1: |
512e1bdb4093b59d1494dfc63391476e |
User & Date: | dan 2015-01-10 20:34:27.199 |
Context
2015-01-12
| ||
17:58 | Optimize the unicode61 tokenizer so that it handles ascii text faster. Make it the default tokenizer. Change the name of the simple tokenizer to "ascii". (check-in: f22dbccad9 user: dan tags: fts5) | |
2015-01-10
| ||
20:34 | Fix some documentation issues in fts5. (check-in: 512e1bdb40 user: dan tags: fts5) | |
2015-01-07
| ||
19:33 | Add the fts5 'optimize' command. (check-in: e749be563d user: dan tags: fts5) | |
Changes
Changes to ext/fts5/extract_api_docs.tcl.
︙ | ︙ | |||
13 14 15 16 17 18 19 20 21 22 23 24 | # This script extracts the documentation for the API used by fts5 auxiliary # functions from header file fts5.h. It outputs html text on stdout that # is included in the documentation on the web. # set ::fts5_docs_output "" if {[info commands hd_putsnl]==""} { proc output {text} { puts $text } } else { proc output {text} { | > | > > | | 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 | # This script extracts the documentation for the API used by fts5 auxiliary # functions from header file fts5.h. It outputs html text on stdout that # is included in the documentation on the web. # set ::fts5_docs_output "" if {[info commands hd_putsnl]==""} { if {[llength $argv]>0} { set ::extract_api_docs_mode [lindex $argv 0] } proc output {text} { puts $text } } else { proc output {text} { append ::fts5_docs_output "$text\n" } } if {[info exists ::extract_api_docs_mode]==0} {set ::extract_api_docs_mode api} set input_file [file join [file dir [info script]] fts5.h] set fd [open $input_file] set data [read $fd] close $fd # Argument $data is the entire text of the fts5.h file. This function # extracts the definition of the Fts5ExtensionApi structure from it and # returns a key/value list of structure member names and definitions. i.e. # # iVersion {int iVersion} xUserData {void *(*xUserData)(Fts5Context*)} ... # proc get_struct_members {data} { # Extract the structure definition from the fts5.h file. regexp "struct Fts5ExtensionApi {(.*?)};" $data -> defn # Remove all comments from the structure definition regsub -all {/[*].*?[*]/} $defn {} defn2 set res [list] foreach member [split $defn2 {;}] { |
︙ | ︙ | |||
91 92 93 94 95 96 97 | if {$current_doc != ""} { lappend res $current_header $current_doc } set res } | < < < < < < < < | < < < < | < < < | < | < < > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | | | | | | | | | | | | | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 | if {$current_doc != ""} { lappend res $current_header $current_doc } set res } proc get_tokenizer_docs {data} { regexp {(xCreate:.*?)[*]/} $data -> docs set res "<dl>\n" foreach line [split [string trim $docs] "\n"] { regexp {[*][*](.*)} $line -> line if {[regexp {^ ?x.*:} $line]} { append res "<dt><b>$line</b></dt><dd><p style=margin-top:0>\n" continue } if {[string trim $line] == ""} { append res "<p>\n" } else { append res "$line\n" } } append res "</dl>\n" set res } proc get_api_docs {data} { # Initialize global array M as a map from Fts5StructureApi member name # to member definition. i.e. # # iVersion -> {int iVersion} # xUserData -> {void *(*xUserData)(Fts5Context*)} # ... # array set M [get_struct_members $data] # Initialize global list D as a map from section name to documentation # text. Most (all?) section names are structure member names. # set D [get_struct_docs $data [array names M]] foreach {sub docs} $D { if {[info exists M($sub)]} { set hdr $M($sub) set link " id=$sub" } else { set link "" } output "<hr color=#eeeee style=\"margin:1em 8.4ex 0 8.4ex;\"$link>" set style "padding-left:6ex;font-size:1.4em;display:block" output "<h style=\"$style\"><pre>$hdr</pre></h>" set mode "" set bEmpty 1 foreach line [split [string trim $docs] "\n"] { if {[string trim $line]==""} { if {$mode != ""} {output "</$mode>"} set mode "" } elseif {$mode == ""} { if {[regexp {^ } $line]} { set mode codeblock } else { set mode p } output "<$mode>" } output $line } if {$mode != ""} {output "</$mode>"} } } proc get_fts5_struct {data start end} { set res "" set bOut 0 foreach line [split $data "\n"] { if {$bOut==0} { if {[regexp $start $line]} { set bOut 1 } } if {$bOut} { append res "$line\n" } if {$bOut} { if {[regexp $end $line]} { set bOut 0 } } } set map [list /* <i>/* */ */</i>] string map $map $res } proc main {data} { switch $::extract_api_docs_mode { fts5_api { output [get_fts5_struct $data "typedef struct fts5_api" "^\};"] } fts5_tokenizer { output [get_fts5_struct $data "typedef struct Fts5Tokenizer" "^\};"] } fts5_extension { output [get_fts5_struct $data "typedef.*Fts5ExtensionApi" "^.;"] } Fts5ExtensionApi { set struct [get_fts5_struct $data "^struct Fts5ExtensionApi" "^.;"] set map [list] foreach {k v} [get_struct_members $data] { if {[string match x* $k]==0} continue lappend map $k "<a href=#$k>$k</a>" } output [string map $map $struct] } api { get_api_docs $data } tokenizer_api { output [get_tokenizer_docs $data] } default { } } } main $data set ::fts5_docs_output |
Changes to ext/fts5/fts5.h.
︙ | ︙ | |||
96 97 98 99 100 101 102 | ** xTokenize: ** Tokenize text using the tokenizer belonging to the FTS5 table. ** ** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback): ** This API function is used to query the FTS table for phrase iPhrase ** of the current query. Specifically, a query equivalent to: ** | | | 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 | ** xTokenize: ** Tokenize text using the tokenizer belonging to the FTS5 table. ** ** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback): ** This API function is used to query the FTS table for phrase iPhrase ** of the current query. Specifically, a query equivalent to: ** ** ... FROM ftstable WHERE ftstable MATCH $p ORDER BY rowid DESC ** ** with $p set to a phrase equivalent to the phrase iPhrase of the ** current query is executed. For each row visited, the callback function ** passed as the fourth argument is invoked. The context and API objects ** passed to the callback function may be used to access the properties of ** each matched row. Invoking Api.xUserData() returns a copy of the pointer ** passed as the third argument to pUserData. |
︙ | ︙ | |||
208 209 210 211 212 213 214 | ** ** xCreate: ** This function is used to allocate and inititalize a tokenizer instance. ** A tokenizer instance is required to actually tokenize text. ** ** The first argument passed to this function is a copy of the (void*) ** pointer provided by the application when the fts5_tokenizer object | > | | | | | 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 | ** ** xCreate: ** This function is used to allocate and inititalize a tokenizer instance. ** A tokenizer instance is required to actually tokenize text. ** ** The first argument passed to this function is a copy of the (void*) ** pointer provided by the application when the fts5_tokenizer object ** was registered with FTS5 (the third argument to xCreateTokenizer()). ** The second and third arguments are an array of nul-terminated strings ** containing the tokenizer arguments, if any, specified following the ** tokenizer name as part of the CREATE VIRTUAL TABLE statement used ** to create the FTS5 table. ** ** The final argument is an output variable. If successful, (*ppOut) ** should be set to point to the new tokenizer handle and SQLITE_OK ** returned. If an error occurs, some value other than SQLITE_OK should ** be returned. In this case, fts5 assumes that the final value of *ppOut ** is undefined. ** |
︙ | ︙ | |||
236 237 238 239 240 241 242 | ** ** For each token in the input string, the supplied callback xToken() must ** be invoked. The first argument to it should be a copy of the pointer ** passed as the second argument to xTokenize(). The next two arguments ** are a pointer to a buffer containing the token text, and the size of ** the token in bytes. The 4th and 5th arguments are the byte offsets of ** the first byte of and first byte immediately following the text from | | < < | | < | | 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 | ** ** For each token in the input string, the supplied callback xToken() must ** be invoked. The first argument to it should be a copy of the pointer ** passed as the second argument to xTokenize(). The next two arguments ** are a pointer to a buffer containing the token text, and the size of ** the token in bytes. The 4th and 5th arguments are the byte offsets of ** the first byte of and first byte immediately following the text from ** which the token is derived within the input. ** ** FTS5 assumes the xToken() callback is invoked for each token in the ** order that they occur within the input text. ** ** If an xToken() callback returns any value other than SQLITE_OK, then ** the tokenization should be abandoned and the xTokenize() method should ** immediately return a copy of the xToken() return value. Or, if the ** input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally, ** if an error occurs with the xTokenize() implementation itself, it ** may abandon the tokenization and return any error code other than ** SQLITE_OK or SQLITE_DONE. ** */ typedef struct Fts5Tokenizer Fts5Tokenizer; typedef struct fts5_tokenizer fts5_tokenizer; struct fts5_tokenizer { int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut); void (*xDelete)(Fts5Tokenizer*); int (*xTokenize)(Fts5Tokenizer*, void *pCtx, const char *pText, int nText, int (*xToken)( |
︙ | ︙ |
Changes to ext/fts5/fts5_expr.c.
︙ | ︙ | |||
699 700 701 702 703 704 705 | ** ** SQLITE_OK is returned if an error occurs, or an SQLite error code ** otherwise. It is not considered an error code if an iterator reaches ** EOF. */ static int fts5ExprNearNextMatch( Fts5Expr *pExpr, /* Expression that pNear is a part of */ | | > > > | 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 | ** ** SQLITE_OK is returned if an error occurs, or an SQLite error code ** otherwise. It is not considered an error code if an iterator reaches ** EOF. */ static int fts5ExprNearNextMatch( Fts5Expr *pExpr, /* Expression that pNear is a part of */ Fts5ExprNode *pNode, /* The "NEAR" node (FTS5_STRING) */ int bFromValid, i64 iFrom ){ int rc = SQLITE_OK; Fts5ExprNearset *pNear = pNode->pNear; while( 1 ){ int i; /* Advance the iterators until they all point to the same rowid */ rc = fts5ExprNearNextRowidMatch(pExpr, pNode, bFromValid, iFrom); if( pNode->bEof || rc!=SQLITE_OK ) break; /* Check that each phrase in the nearset matches the current row. ** Populate the pPhrase->poslist buffers at the same time. If any ** phrase is not a match, break out of the loop early. */ for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; if( pPhrase->nTerm>1 || pNear->iCol>=0 ){ int bMatch = 0; rc = fts5ExprPhraseIsMatch(pExpr, pNear->iCol, pPhrase, &bMatch); if( bMatch==0 ) break; }else{ |
︙ | ︙ |
Added ext/fts5/test/fts5near.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 | # 2014 Jan 08 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # Tests focused on the NEAR operator. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5near proc do_near_test {tn doc near res} { uplevel [list do_execsql_test $tn " DELETE FROM t1; INSERT INTO t1 VALUES('$doc'); SELECT count(*) FROM t1 WHERE t1 MATCH '$near'; " $res] } execsql { CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = 'simple tokenchars .') } do_near_test 1.1 ". . a . . . b . ." { NEAR(a b, 5) } 1 do_near_test 1.2 ". . a . . . b . ." { NEAR(a b, 4) } 1 do_near_test 1.3 ". . a . . . b . ." { NEAR(a b, 3) } 1 do_near_test 1.4 ". . a . . . b . ." { NEAR(a b, 2) } 0 do_near_test 1.5 ". . a . . . b . ." { NEAR(b a, 5) } 1 do_near_test 1.6 ". . a . . . b . ." { NEAR(b a, 4) } 1 do_near_test 1.7 ". . a . . . b . ." { NEAR(b a, 3) } 1 do_near_test 1.8 ". . a . . . b . ." { NEAR(b a, 2) } 0 do_near_test 1.9 ". a b . . . c . ." { NEAR("a b" c, 3) } 1 do_near_test 1.10 ". a b . . . c . ." { NEAR("a b" c, 2) } 0 do_near_test 1.11 ". a b . . . c . ." { NEAR(c "a b", 3) } 1 do_near_test 1.12 ". a b . . . c . ." { NEAR(c "a b", 2) } 0 do_near_test 1.13 ". a b . . . c d ." { NEAR(a+b c+d, 3) } 1 do_near_test 1.14 ". a b . . . c d ." { NEAR(a+b c+d, 2) } 0 do_near_test 1.15 ". a b . . . c d ." { NEAR(c+d a+b, 3) } 1 do_near_test 1.16 ". a b . . . c d ." { NEAR(c+d a+b, 2) } 0 do_near_test 1.17 ". a b . . . c d ." { NEAR(a b c d, 5) } 1 do_near_test 1.18 ". a b . . . c d ." { NEAR(a b c d, 4) } 0 do_near_test 1.19 ". a b . . . c d ." { NEAR(a+b c d, 4) } 1 do_near_test 1.20 "a b c d e f g h i" { NEAR(b+c a+b+c+d i, 5) } 1 do_near_test 1.21 "a b c d e f g h i" { NEAR(b+c a+b+c+d i, 4) } 0 do_near_test 1.22 "a b c d e f g h i" { NEAR(a+b+c+d i b+c, 5) } 1 do_near_test 1.23 "a b c d e f g h i" { NEAR(a+b+c+d i b+c, 4) } 0 do_near_test 1.24 "a b c d e f g h i" { NEAR(i a+b+c+d b+c, 5) } 1 do_near_test 1.25 "a b c d e f g h i" { NEAR(i a+b+c+d b+c, 4) } 0 finish_test |