Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Fix some documentation issues in fts5. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts5 |
Files: | files | file ages | folders |
SHA1: |
512e1bdb4093b59d1494dfc63391476e |
User & Date: | dan 2015-01-10 20:34:27.199 |
Context
2015-01-12
| ||
17:58 | Optimize the unicode61 tokenizer so that it handles ascii text faster. Make it the default tokenizer. Change the name of the simple tokenizer to "ascii". (check-in: f22dbccad9 user: dan tags: fts5) | |
2015-01-10
| ||
20:34 | Fix some documentation issues in fts5. (check-in: 512e1bdb40 user: dan tags: fts5) | |
2015-01-07
| ||
19:33 | Add the fts5 'optimize' command. (check-in: e749be563d user: dan tags: fts5) | |
Changes
Changes to ext/fts5/extract_api_docs.tcl.
︙ | ︙ | |||
13 14 15 16 17 18 19 20 21 22 23 24 | # This script extracts the documentation for the API used by fts5 auxiliary # functions from header file fts5.h. It outputs html text on stdout that # is included in the documentation on the web. # set ::fts5_docs_output "" if {[info commands hd_putsnl]==""} { proc output {text} { puts $text } } else { proc output {text} { | > | > > | | 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 | # This script extracts the documentation for the API used by fts5 auxiliary # functions from header file fts5.h. It outputs html text on stdout that # is included in the documentation on the web. # set ::fts5_docs_output "" if {[info commands hd_putsnl]==""} { if {[llength $argv]>0} { set ::extract_api_docs_mode [lindex $argv 0] } proc output {text} { puts $text } } else { proc output {text} { append ::fts5_docs_output "$text\n" } } if {[info exists ::extract_api_docs_mode]==0} {set ::extract_api_docs_mode api} set input_file [file join [file dir [info script]] fts5.h] set fd [open $input_file] set data [read $fd] close $fd # Argument $data is the entire text of the fts5.h file. This function # extracts the definition of the Fts5ExtensionApi structure from it and # returns a key/value list of structure member names and definitions. i.e. # # iVersion {int iVersion} xUserData {void *(*xUserData)(Fts5Context*)} ... # proc get_struct_members {data} { # Extract the structure definition from the fts5.h file. regexp "struct Fts5ExtensionApi {(.*?)};" $data -> defn # Remove all comments from the structure definition regsub -all {/[*].*?[*]/} $defn {} defn2 set res [list] foreach member [split $defn2 {;}] { |
︙ | ︙ | |||
91 92 93 94 95 96 97 | if {$current_doc != ""} { lappend res $current_header $current_doc } set res } | < < < < < < < < | < < < < | < < < | < | < < > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | | | | | | | | | | | | | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > || if {$current_doc != ""} { lappend res $current_header $current_doc } set res } proc get_tokenizer_docs {data} { regexp {(xCreate:.*?)[*]/} $data -> docs set res "<dl>\n" foreach line [split [string trim $docs] "\n"] { regexp {[*][*](.*)} $line -> line if {[regexp {^ ?x.*:} $line]} { append res "<dt><b>$line</b></dt><dd><p style=margin-top:0>\n" continue } if {[string trim $line] == ""} { append res "<p>\n" } else { append res "$line\n" } } append res "</dl>\n" set res } proc get_api_docs {data} { # Initialize global array M as a map from Fts5StructureApi member name # to member definition. i.e. # # iVersion -> {int iVersion} # xUserData -> {void *(*xUserData)(Fts5Context*)} # ... # array set M [get_struct_members $data] # Initialize global list D as a map from section name to documentation # text. Most (all?) section names are structure member names. # set D [get_struct_docs $data [array names M]] foreach {sub docs} $D { if {[info exists M($sub)]} { set hdr $M($sub) set link " id=$sub" } else { set link "" } output "<hr color=#eeeee style=\"margin:1em 8.4ex 0 8.4ex;\"$link>" set style "padding-left:6ex;font-size:1.4em;display:block" output "<h style=\"$style\"><pre>$hdr</pre></h>" set mode "" set bEmpty 1 foreach line [split [string trim $docs] "\n"] { if {[string trim $line]==""} { if {$mode != ""} {output "</$mode>"} set mode "" } elseif {$mode == ""} { if {[regexp {^ } $line]} { set mode codeblock } else { set mode p } output "<$mode>" } output $line } if {$mode != ""} {output "</$mode>"} } } proc get_fts5_struct {data start end} { set res "" set bOut 0 foreach line [split $data "\n"] { if {$bOut==0} { if {[regexp $start $line]} { set bOut 1 } } if {$bOut} { append res "$line\n" } if {$bOut} { if {[regexp $end $line]} { set bOut 0 } } } set map [list /* <i>/* */ */</i>] string map $map $res } proc main {data} { switch $::extract_api_docs_mode { fts5_api { output [get_fts5_struct $data "typedef struct fts5_api" "^\};"] } fts5_tokenizer { output [get_fts5_struct $data "typedef struct Fts5Tokenizer" "^\};"] } fts5_extension { output [get_fts5_struct $data "typedef.*Fts5ExtensionApi" "^.;"] } Fts5ExtensionApi { set struct [get_fts5_struct $data "^struct Fts5ExtensionApi" "^.;"] set map [list] foreach {k v} [get_struct_members $data] { if {[string match x* $k]==0} continue lappend map $k "<a href=#$k>$k</a>" } output [string map $map $struct] } api { get_api_docs $data } tokenizer_api { output [get_tokenizer_docs $data] } default { } } } main $data set ::fts5_docs_output |
Changes to ext/fts5/fts5.h.
︙ | ︙ | |||
96 97 98 99 100 101 102 | ** xTokenize: ** Tokenize text using the tokenizer belonging to the FTS5 table. ** ** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback): ** This API function is used to query the FTS table for phrase iPhrase ** of the current query. Specifically, a query equivalent to: ** | | | 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 | ** xTokenize: ** Tokenize text using the tokenizer belonging to the FTS5 table. ** ** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback): ** This API function is used to query the FTS table for phrase iPhrase ** of the current query. Specifically, a query equivalent to: ** ** ... FROM ftstable WHERE ftstable MATCH $p ORDER BY rowid DESC ** ** with $p set to a phrase equivalent to the phrase iPhrase of the ** current query is executed. For each row visited, the callback function ** passed as the fourth argument is invoked. The context and API objects ** passed to the callback function may be used to access the properties of ** each matched row. Invoking Api.xUserData() returns a copy of the pointer ** passed as the third argument to pUserData. |
︙ | ︙ | |||
208 209 210 211 212 213 214 | ** ** xCreate: ** This function is used to allocate and inititalize a tokenizer instance. ** A tokenizer instance is required to actually tokenize text. ** ** The first argument passed to this function is a copy of the (void*) ** pointer provided by the application when the fts5_tokenizer object | > | | | | | 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 | ** ** xCreate: ** This function is used to allocate and inititalize a tokenizer instance. ** A tokenizer instance is required to actually tokenize text. ** ** The first argument passed to this function is a copy of the (void*) ** pointer provided by the application when the fts5_tokenizer object ** was registered with FTS5 (the third argument to xCreateTokenizer()). ** The second and third arguments are an array of nul-terminated strings ** containing the tokenizer arguments, if any, specified following the ** tokenizer name as part of the CREATE VIRTUAL TABLE statement used ** to create the FTS5 table. ** ** The final argument is an output variable. If successful, (*ppOut) ** should be set to point to the new tokenizer handle and SQLITE_OK ** returned. If an error occurs, some value other than SQLITE_OK should ** be returned. In this case, fts5 assumes that the final value of *ppOut ** is undefined. ** |
︙ | ︙ | |||
236 237 238 239 240 241 242 | ** ** For each token in the input string, the supplied callback xToken() must ** be invoked. The first argument to it should be a copy of the pointer ** passed as the second argument to xTokenize(). The next two arguments ** are a pointer to a buffer containing the token text, and the size of ** the token in bytes. The 4th and 5th arguments are the byte offsets of ** the first byte of and first byte immediately following the text from | | < < | | < | | 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 | ** ** For each token in the input string, the supplied callback xToken() must ** be invoked. The first argument to it should be a copy of the pointer ** passed as the second argument to xTokenize(). The next two arguments ** are a pointer to a buffer containing the token text, and the size of ** the token in bytes. The 4th and 5th arguments are the byte offsets of ** the first byte of and first byte immediately following the text from ** which the token is derived within the input. ** ** FTS5 assumes the xToken() callback is invoked for each token in the ** order that they occur within the input text. ** ** If an xToken() callback returns any value other than SQLITE_OK, then ** the tokenization should be abandoned and the xTokenize() method should ** immediately return a copy of the xToken() return value. Or, if the ** input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally, ** if an error occurs with the xTokenize() implementation itself, it ** may abandon the tokenization and return any error code other than ** SQLITE_OK or SQLITE_DONE. ** */ typedef struct Fts5Tokenizer Fts5Tokenizer; typedef struct fts5_tokenizer fts5_tokenizer; struct fts5_tokenizer { int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut); void (*xDelete)(Fts5Tokenizer*); int (*xTokenize)(Fts5Tokenizer*, void *pCtx, const char *pText, int nText, int (*xToken)( |
︙ | ︙ |
Changes to ext/fts5/fts5_expr.c.
︙ | ︙ | |||
699 700 701 702 703 704 705 | ** ** SQLITE_OK is returned if an error occurs, or an SQLite error code ** otherwise. It is not considered an error code if an iterator reaches ** EOF. */ static int fts5ExprNearNextMatch( Fts5Expr *pExpr, /* Expression that pNear is a part of */ | | > > > | 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 | ** ** SQLITE_OK is returned if an error occurs, or an SQLite error code ** otherwise. It is not considered an error code if an iterator reaches ** EOF. */ static int fts5ExprNearNextMatch( Fts5Expr *pExpr, /* Expression that pNear is a part of */ Fts5ExprNode *pNode, /* The "NEAR" node (FTS5_STRING) */ int bFromValid, i64 iFrom ){ int rc = SQLITE_OK; Fts5ExprNearset *pNear = pNode->pNear; while( 1 ){ int i; /* Advance the iterators until they all point to the same rowid */ rc = fts5ExprNearNextRowidMatch(pExpr, pNode, bFromValid, iFrom); if( pNode->bEof || rc!=SQLITE_OK ) break; /* Check that each phrase in the nearset matches the current row. ** Populate the pPhrase->poslist buffers at the same time. If any ** phrase is not a match, break out of the loop early. */ for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; if( pPhrase->nTerm>1 || pNear->iCol>=0 ){ int bMatch = 0; rc = fts5ExprPhraseIsMatch(pExpr, pNear->iCol, pPhrase, &bMatch); if( bMatch==0 ) break; }else{ |
︙ | ︙ |
Added ext/fts5/test/fts5near.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 | # 2014 Jan 08 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # Tests focused on the NEAR operator. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5near proc do_near_test {tn doc near res} { uplevel [list do_execsql_test $tn " DELETE FROM t1; INSERT INTO t1 VALUES('$doc'); SELECT count(*) FROM t1 WHERE t1 MATCH '$near'; " $res] } execsql { CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = 'simple tokenchars .') } do_near_test 1.1 ". . a . . . b . ." { NEAR(a b, 5) } 1 do_near_test 1.2 ". . a . . . b . ." { NEAR(a b, 4) } 1 do_near_test 1.3 ". . a . . . b . ." { NEAR(a b, 3) } 1 do_near_test 1.4 ". . a . . . b . ." { NEAR(a b, 2) } 0 do_near_test 1.5 ". . a . . . b . ." { NEAR(b a, 5) } 1 do_near_test 1.6 ". . a . . . b . ." { NEAR(b a, 4) } 1 do_near_test 1.7 ". . a . . . b . ." { NEAR(b a, 3) } 1 do_near_test 1.8 ". . a . . . b . ." { NEAR(b a, 2) } 0 do_near_test 1.9 ". a b . . . c . ." { NEAR("a b" c, 3) } 1 do_near_test 1.10 ". a b . . . c . ." { NEAR("a b" c, 2) } 0 do_near_test 1.11 ". a b . . . c . ." { NEAR(c "a b", 3) } 1 do_near_test 1.12 ". a b . . . c . ." { NEAR(c "a b", 2) } 0 do_near_test 1.13 ". a b . . . c d ." { NEAR(a+b c+d, 3) } 1 do_near_test 1.14 ". a b . . . c d ." { NEAR(a+b c+d, 2) } 0 do_near_test 1.15 ". a b . . . c d ." { NEAR(c+d a+b, 3) } 1 do_near_test 1.16 ". a b . . . c d ." { NEAR(c+d a+b, 2) } 0 do_near_test 1.17 ". a b . . . c d ." { NEAR(a b c d, 5) } 1 do_near_test 1.18 ". a b . . . c d ." { NEAR(a b c d, 4) } 0 do_near_test 1.19 ". a b . . . c d ." { NEAR(a+b c d, 4) } 1 do_near_test 1.20 "a b c d e f g h i" { NEAR(b+c a+b+c+d i, 5) } 1 do_near_test 1.21 "a b c d e f g h i" { NEAR(b+c a+b+c+d i, 4) } 0 do_near_test 1.22 "a b c d e f g h i" { NEAR(a+b+c+d i b+c, 5) } 1 do_near_test 1.23 "a b c d e f g h i" { NEAR(a+b+c+d i b+c, 4) } 0 do_near_test 1.24 "a b c d e f g h i" { NEAR(i a+b+c+d b+c, 5) } 1 do_near_test 1.25 "a b c d e f g h i" { NEAR(i a+b+c+d b+c, 4) } 0 finish_test |