Documentation Source Text

Check-in [519916c18a]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add fts5.html.
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 519916c18a0b7a5db704035a4aa083da0dcc68ac
User & Date: dan 2015-03-04 20:19:09
Context
2015-03-04
20:56
Clarification of the "INSERT OR" and "REPLACE" forms of the INSERT statement. check-in: 58895ad201 user: drh tags: trunk
20:19
Add fts5.html. check-in: 519916c18a user: dan tags: trunk
16:59
Merge typo fix from the 3.8.8 branch. check-in: 9621e94943 user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Added images/fts5_formula.odf.

cannot compute difference between binary files

Added images/fts5_formula1.png.

cannot compute difference between binary files

Added images/fts5_formula2.png.

cannot compute difference between binary files

Added images/fts5_formula3.png.

cannot compute difference between binary files

Changes to pages/fancyformat.tcl.

   195    195         }
   196    196       }
   197    197     }
   198    198     foreach line [split $txt "\n"] {
   199    199       set line [string range $line $nMinSpace end]
   200    200       append out "$line\n"
   201    201     }
   202         -  append out "</table></div>"
          202  +  append out "</pre></table></div>"
   203    203     return $out
   204    204   }
   205    205   
   206    206   proc fancyformat_document {zTitle lReqfile zBody} {
   207    207     unset -nocomplain ::ffreq
   208    208     unset -nocomplain ::ffreq_children
   209    209     foreach f $lReqfile {
................................................................................
   378    378   
   379    379   proc addtoc {zDoc} {
   380    380     # If the extension with the [parsehtml] command has not been loaded,
   381    381     # load it now.
   382    382     #
   383    383     if {[info commands parsehtml] == ""} { load ./parsehtml.so }
   384    384   
          385  +
          386  +  # Handle any <tclscript> blocks.
          387  +  #
          388  +  while { [regexp -nocase {<tclscript>(.*?)</tclscript>} $zDoc -> script] } {
          389  +    set sub [eval $script]
          390  +    set sub [string map {& {\&}} $sub]
          391  +    set zDoc [regsub -nocase {<tclscript>.*?</tclscript>} $zDoc $sub]
          392  +  }
          393  +
   385    394     # These variables are all used to store state between invocations of
   386    395     # the [parsehtml] callback used to do preprocessing.
   387    396     #
   388    397     set ::Addtoc(heading:1) 0
   389    398     set ::Addtoc(heading:2) 0
   390    399     set ::Addtoc(heading:3) 0
   391    400     set ::Addtoc(heading:4) 0

Added pages/fts5.in.

            1  +
            2  +<tcl>hd_keywords *fts5 FTS5</tcl>
            3  +<title>SQLite FTS5 Extension</title>
            4  +
            5  +<table_of_contents>
            6  +
            7  +<h2 style="margin-left:1.0em" notoc> Overview</h2>
            8  +
            9  +<h1>CREATE TABLE Arguments</h1>
           10  +
           11  +<p>Each argument specified as part of a "CREATE VIRTUAL TABLE ... USING fts5 
           12  +..." statement is either a column name or a configuration option. A column
           13  +name consists of a single FTS5 bareword or a single string literal quoted
           14  +in any manner acceptable to SQLite. A configuration option consists of an
           15  +FTS5 bareword - the option name - followed by an "=" character, followed by
           16  +the option value. The option value is specified using either a single FTS5 
           17  +bareword or a string literal, again quoted in any manner acceptable to the
           18  +SQLite core. Anything else is a syntax error.
           19  +
           20  +<h2>Column Names</h2>
           21  +
           22  +<p>It is an error to attempt to name an fts5 table column "rowid" or "rank". 
           23  +This is not supported.
           24  +
           25  +<h2>Configuration Options</h2>
           26  +
           27  +<p> A configuration option consists of an FTS5 bareword - the option name -
           28  +followed by an "=" character, followed by a either an FTS5 bareword or a
           29  +string literal. For example:
           30  +
           31  +<codeblock>
           32  +  CREATE VIRTUAL TABLE mail USING fts5(sender, title, body, tokenize = 'porter ascii');
           33  +</codeblock>
           34  +
           35  +<p> There are currently the following configuration options:
           36  +
           37  +<ul>
           38  +  <li> The "tokenize" option, used to configure a [FTS5 tokenizers | custom tokenizer].
           39  +  <li> The "prefix" option, used to add [FTS5 prefix indexes | prefix indexes]
           40  +       to an FTS5 table.
           41  +  <li> The "content" option, used to make the FTS5 table an 
           42  +       [FTS5 content option | external content or contentless table].
           43  +  <li> The "content_rowid" option, used to set the rowid field of an 
           44  +  [FTS5 external content tables | external content table].
           45  +</ul>
           46  +
           47  +<h1>Full-text Query Syntax</h1>
           48  +
           49  +<p>
           50  +The following block contains a summary of the FTS query syntax in BNF form.
           51  +A detailed explanation follows.
           52  +
           53  +<codeblock>
           54  +&lt;phrase&gt;    := string &#91;*]
           55  +&lt;phrase&gt;    := &lt;phrase&gt; + &lt;phrase&gt;
           56  +&lt;neargroup&gt; := NEAR ( &lt;phrase&gt; &lt;phrase&gt; ... &#91;, N] )
           57  +&lt;query&gt;     := &#91;colname :] &lt;phrase&gt;
           58  +&lt;query&gt;     := &#91;colname :] &lt;neargroup&gt;
           59  +&lt;query&gt;     := ( &lt;query&gt; )
           60  +&lt;query&gt;     := &lt;query&gt; AND &lt;query&gt;
           61  +&lt;query&gt;     := &lt;query&gt; OR &lt;query&gt;
           62  +&lt;query&gt;     := &lt;query&gt; NOT &lt;query&gt;
           63  +</codeblock>
           64  +
           65  +<p>
           66  +Within an FTS expression a <b>string</b> may be specified in one of two ways:
           67  +
           68  +<ul>
           69  +  <li> <p>By enclosing it in double quotes ("). Within a string, any embedded
           70  +       double quote characters may be escaped SQL-style - by adding a second
           71  +       double-quote character.
           72  +
           73  +  <li> <p>As a bareword that includes no whitespace or reserved characters, 
           74  +       and is not "AND", "OR" or "NOT" (case sensitive). Reserved characters
           75  +       are: 
           76  +<pre>
           77  +    : ~ ! @ # $ % ^ & * ( ) + , =
           78  +</pre>
           79  +       In other words, the top row of a regular US keyboard, the plus sign,
           80  +       comma and colon characters. Strings that include any of these 
           81  +       characters must be quoted.
           82  +</ul>
           83  +
           84  +<p>
           85  +FTS queries are made up of <b>phrases</b>. A phrase is an ordered list of 
           86  +one or more tokens. A string is transformed into a phrase by passing it to
           87  +the FTS table tokenizer. Two phrases can be concatenated into a single 
           88  +large phrase using the "+" operator. For example, assuming the tokenizer
           89  +module being used tokenizes the input "one.two.three" to three separate
           90  +tokens, the following three queries all specify the same phrase:
           91  +
           92  +<codeblock>
           93  +  ... MATCH '"one two three"'
           94  +  ... MATCH 'one + two + three'
           95  +  ... MATCH '"one two" + three'
           96  +  ... MATCH 'one.two.three'
           97  +</codeblock>
           98  +
           99  +<p>
          100  +A phrase matches a document if the document contains at least one sub-sequence
          101  +of tokens that matches the sequence of tokens that make up the phrase.
          102  +
          103  +<p>
          104  +If a "*" character follows a string within an FTS expression, then the final
          105  +token extracted from the string is marked as a <b>prefix token</b>. As you
          106  +might expect, a prefix token matches any document token of which it is a 
          107  +prefix. For example, the first two queries in the following block will match
          108  +any document that contains the token "one" immediately followed by the token
          109  +"two" and then any token that begins with "thr".
          110  +
          111  +<codeblock>
          112  +  ... MATCH '"one two thr" * '
          113  +  ... MATCH 'one + two + thr*'
          114  +  ... MATCH '"one two thr*"'      <b>-- May not work as expected!</b>
          115  +</codeblock>
          116  +
          117  +<p>The final query in the block above may not work as expected. Because the
          118  +"*" character is inside the double-quotes, it will be passed to the tokenizer,
          119  +which will likely discard it (or perhaps, depending on the specific tokenizer
          120  +in use, include it as part of the final token) instead of recognizing it as
          121  +a special FTS character.
          122  +
          123  +<p>Two or more phrases may be grouped into a <b>NEAR group</b>. A NEAR group
          124  +is specified by the token "NEAR" (case sensitive) followed by an open
          125  +parenthesis character, followed by two or more whitespace separated phrases, optionally followed by a comma and the numeric parameter <i>N</i>, followed by
          126  +a close parenthesis. For example:
          127  +
          128  +<codeblock>
          129  +  ... MATCH 'NEAR("one two" "three four", 10)'
          130  +  ... MATCH 'NEAR("one two" thr* + four)'
          131  +</codeblock>
          132  +
          133  +<p>If no <i>N</i> parameter is supplied, it defaults to 10. A NEAR group
          134  +matches a document if the document contains at least one clump of tokens that: 
          135  +
          136  +<ol> 
          137  +  <li> contains at least one instance of each phrase, and 
          138  +  <li> for which the number of tokens between the end of the first phrase 
          139  +       and the beginning of the last phrase in the clump is less than <i>N</i>.
          140  +</ol>
          141  +
          142  +<p>For example:
          143  +
          144  +<codeblock>
          145  +  CREATE VIRTUAL TABLE f USING fts5(x);
          146  +  INSERT INTO f(rowid, x) VALUES(1, 'A B C D x x x E F x');
          147  +
          148  +  ... MATCH 'NEAR(e d, 4)';                      <i>-- Matches!</i>
          149  +  ... MATCH 'NEAR(e d, 3)';                      <i>-- Matches!</i>
          150  +  ... MATCH 'NEAR(e d, 2)';                      <i>-- Does not match!</i>
          151  +
          152  +  ... MATCH 'NEAR("c d" "e f", 3)';              <i>-- Matches!</i>
          153  +  ... MATCH 'NEAR("c"   "e f", 3)';              <i>-- Does not match!</i>
          154  +
          155  +  ... MATCH 'NEAR(a d e, 6)';                    <i>-- Matches!</i>
          156  +  ... MATCH 'NEAR(a d e, 5)';                    <i>-- Does not match!</i>
          157  +
          158  +  ... MATCH 'NEAR("a b c d" "b c" "e f", 4)';    <i>-- Matches!</i>
          159  +  ... MATCH 'NEAR("a b c d" "b c" "e f", 3)';    <i>-- Does not match!</i>
          160  +
          161  +</codeblock>
          162  +
          163  +
          164  +<p>
          165  +A single phrase or NEAR group may be restricted to matching text within a
          166  +specified column of the FTS table by prefixing it with the column name 
          167  +followed by a colon character. Column names may be specified using either
          168  +of the two forms described for strings above. Unlike strings that are part
          169  +of phrases, column names are not passed to the tokenizer module. Column 
          170  +names are case-insensitive in the usual way for SQLite column names -
          171  +upper/lower case equivalence is understood for ASCII-range characters only.
          172  +
          173  +<codeblock>
          174  +  ... MATCH 'colname : NEAR("one two" "three four", 10)'
          175  +  ... MATCH '"colname" : one + two + three'
          176  +</codeblock>
          177  +
          178  +<p>
          179  +Phrases and NEAR groups may be arranged into expressions using <b>boolean
          180  +operators</b>. In order of precedence, from highest to lowest, the operators 
          181  +are:
          182  +
          183  +<table striped=1>
          184  +  <tr><th>Operator <th>Function
          185  +  <tr><td><code>&lt;query1&gt; AND &lt;query2&gt;</code> 
          186  +      <td>Matches if both query1 and query2 match.
          187  +
          188  +  <tr><td><code>&lt;query1&gt; OR &lt;query2&gt;</code> 
          189  +      <td>Matches if either query1 or query2 match.
          190  +
          191  +  <tr><td><code>&lt;query1&gt; NOT &lt;query2&gt;</code> 
          192  +      <td>Matches if query1 matches and query2 does not match.
          193  +
          194  +</table>
          195  +
          196  +<p>
          197  +Parenthesis may be used to group expressions in order to modify operator
          198  +precedence in the usual ways. For example:
          199  +
          200  +<codeblock>
          201  +  <i>-- Matches documents that contain at least one instance of either "one"</i>
          202  +  <i>-- or "two", but do not contain any instances of token "three".</i>
          203  +  ... MATCH 'one OR two NOT three'
          204  +
          205  +  <i>-- Match all documents that contain the token "two" but not "three", or</i>
          206  +  <i>-- contain the token "one".</i>
          207  +  ... MATCH 'one OR (two NOT three)'
          208  +</codeblock>
          209  +
          210  +<p>
          211  +Phrases and NEAR groups may also be connected by <b>implicit AND operators</b>.
          212  +For simplicity, these are not shown in the BNF grammar above. Essentially, any
          213  +sequence of phrases or NEAR groups (including those restricted to matching
          214  +specified columns) separated only by whitespace are handled as if there were an
          215  +implicit AND operator between each pair of phrases or NEAR groups. Implicit
          216  +AND operators are never inserted after or before an expression enclosed in
          217  +parenthesis. For example:
          218  +
          219  +<codeblock>
          220  +  ... MATCH 'one two three'         <i>-- 'one AND two AND three'</i>
          221  +  ... MATCH 'three "one two"'       <i>-- 'three AND "one two"'</i>
          222  +  ... MATCH 'NEAR(one two) three'   <i>-- 'NEAR(one two) AND three'</i>
          223  +  ... MATCH 'one OR two three'      <i>-- 'one OR two AND three'</i>
          224  +
          225  +  ... MATCH '(one OR two) three'    <i>-- Syntax error!</i>
          226  +  ... MATCH 'func(one two)'         <i>-- Syntax error!</i>
          227  +</codeblock>
          228  +
          229  +<h1 tags="FTS5 prefix indexes">Prefix Indexes</h1>
          230  +
          231  +<p> By default, FTS5 maintains a single index recording the location of each
          232  +token instance within the document set. This means that querying for complete
          233  +tokens is fast, as it requires a single lookup, but querying for a prefix 
          234  +token can be slow, as it requires a range scan. For example, to query for
          235  +the prefix token "abc*" requires a range scan of all tokens greater than
          236  +or equal to "abc" and less than "abd".
          237  +
          238  +<p> A prefix index is a separate index that records the location of all
          239  +instances of prefix tokens of a certain length in characters used to speed
          240  +up queries for prefix tokens. For example, optimizing a query for prefix
          241  +token "abc*" requires a prefix index of three-character prefixes.
          242  +
          243  +<p> To add prefix indexes to an FTS5 table, the "prefix" option is set to
          244  +either a single positive integer or a text value containing a white-space
          245  +separated list of one or more positive integer values. A prefix index is
          246  +created for each integer specified. If more than one "prefix" option is
          247  +specified as part of a single CREATE VIRTUAL TABLE statement, all apply.
          248  +
          249  +<codeblock>
          250  +  <i>-- Two ways to create an FTS5 table that maintains prefix indexes for
          251  +  -- two and three character prefix tokens.</i>
          252  +  CREATE VIRTUAL TABLE ft USING fts5(a, b, prefix='2 3');
          253  +  CREATE VIRTUAL TABLE ft USING fts5(a, b, prefix=2, prefix=3);
          254  +</codeblock>
          255  +
          256  +<h1 tags="FTS5 tokenizers">Tokenizers</h1>
          257  +
          258  +<p> The CREATE VIRTUAL TABLE "tokenize" option is used to configure the
          259  +specific tokenizer used by the FTS5 table. The option argument must be either
          260  +an FTS5 bareword, or an SQL text literal. The text of the argument is itself
          261  +treated as a white-space series of one or more FTS5 barewords or SQL text
          262  +literals. The first of these is the name of the tokenizer to use. The second
          263  +and subsequent list elements, if they exist, are arguments passed to the
          264  +tokenizer implementation.
          265  +
          266  +<p> Unlike option values and column names, SQL text literals intended as
          267  +tokenizers must be quoted using single quote characters. For example:
          268  +
          269  +<codeblock>
          270  +  <i>-- The following are all equivalent</i>
          271  +  CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = 'porter ascii');
          272  +  CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = "porter ascii");
          273  +  CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = "'porter' 'ascii'");
          274  +  CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = '''porter'' ''ascii''');
          275  +
          276  +  <i>-- But this will fail:</i>
          277  +  CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = '"porter" "ascii"');
          278  +
          279  +  <i>-- This will fail too:</i>
          280  +  CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = 'porter' 'ascii');
          281  +</codeblock>
          282  +
          283  +
          284  +<p>
          285  +FTS5 features three built-in tokenizer modules, described in subsequent
          286  +sections:
          287  +
          288  +<ul>
          289  +  <li> The <b>unicode61</b> tokenizer, based on the Unicode 6.1 standard. This
          290  +       is the default.
          291  +
          292  +  <li> The <b>ascii</b> tokenizer, which assumes all characters outside of
          293  +  the ASCII codepoint range (0-127) are to be treated as token characters.
          294  +
          295  +  <li> The <b>porter</b> tokenizer, which implements the 
          296  +<a href=http://tartarus.org/martin/PorterStemmer/>porter stemming algorithm</a>.
          297  +</ul>
          298  +
          299  +<p> It is also possible to create custom tokenizers for FTS5. The API for doing so is [custom tokenizers | described here].
          300  +
          301  +<h2>Unicode61 Tokenizer</h2>
          302  +
          303  +<p> The unicode tokenizer classifies all unicode characters as either 
          304  +"separator" or "token" characters. By default all space and punctuation
          305  +characters, as defined by Unicode 6.1, are considered separators, and all 
          306  +other characters as token characters. Each contiguous run of one or more 
          307  +token characters is considered to be a token. The tokenizer is case-insensitive
          308  +according to the rules defined by Unicode 6.1.
          309  +
          310  +<p> By default, diacritics are removed from all Latin script characters. This
          311  +means, for example, that "A", "a", "&#192;", "&#224;", "&#194;" and "&#226;"
          312  +are all considered to be equivalent.
          313  +
          314  +<p> Any arguments following "unicode61" in the token specification are treated
          315  +as a list of alternating option names and values. Unicode61 supports the
          316  +following options:
          317  +
          318  +<table striped=1>
          319  +  <tr><th> Option <th> Usage
          320  +  <tr><td> remove_diacritics
          321  +  <td>This option should be set to "0" or "1". If it is set (the default),
          322  +  diacritics are removed from all latin script characters as described above.
          323  +  If it is clear, they are not. 
          324  +
          325  +  <tr><td> tokenchars
          326  +  <td> This option is used to specify additional unicode characters that 
          327  +  should be considered token characters, even if they are white-space or
          328  +  punctuation characters according to Unicode 6.1. All characters in the
          329  +  string that this option is set to are considered token characters.
          330  +
          331  +  <tr><td> separators
          332  +  <td> This option is used to specify additional unicode characters that 
          333  +  should be considered as separator characters, even if they are token
          334  +  characters according to Unicode 6.1. All characters in the string that 
          335  +  this option is set to are considered separators.
          336  +</table>
          337  +
          338  +<p> For example:
          339  +
          340  +<codeblock>
          341  +  <i>-- Create an FTS5 table that does not remove diacritics from Latin
          342  +  -- script characters, and that considers hyphens and underscore characters
          343  +  -- to be part of tokens. </i>
          344  +  CREATE VIRTUAL TABLE ft USING fts5(a, b, 
          345  +      tokenize = "unicode61 remove_diacritics 0 tokenchars '-_'"
          346  +  );
          347  +</codeblock>
          348  +
          349  +<h2>Ascii Tokenizer</h2>
          350  +
          351  +<p> The Ascii tokenizer is similar to the Unicode61 tokenizer, except that:
          352  +
          353  +<ul>
          354  +  <li> All non-ASCII characters (those with codepoints greater than 127) are
          355  +  always considered token characters. If any non-ASCII characters are specified
          356  +  as part of the separators option, they are ignored.  
          357  +
          358  +  <li> Case-folding is only performed for ASCII characters. So while "A" and
          359  +  "a" are considered to be equivalent, "&#195" and "&#227;" are distinct.
          360  +
          361  +  <li> The remove_diacritics option is not supported.
          362  +</ul>
          363  +
          364  +<p> For example:
          365  +
          366  +<codeblock>
          367  +  <i>-- Create an FTS5 table that uses the ascii tokenizer, but does not
          368  +  -- consider numeric characters to be part of tokens.</i>
          369  +  CREATE VIRTUAL TABLE ft USING fts5(a, b, 
          370  +      tokenize = "ascii separators '0123456789'"
          371  +  );
          372  +</codeblock>
          373  +
          374  +<h2>Porter Tokenizer</h2>
          375  +
          376  +<p> The porter tokenizer is a wrapper tokenizer. It takes the output of some
          377  +other tokenizer and applies the 
          378  +<a href=http://tartarus.org/martin/PorterStemmer/>porter stemming algorithm</a>
          379  +to each token before it returns it to FTS5. This allows search terms like
          380  +"correction" to match similar words such as "corrected" or "correcting". The
          381  +porter stemmer algorithm is designed for use with English language terms 
          382  +only - using it with other languages may or may not improve search utility.
          383  +
          384  +<p> By default, the porter tokenizer operates as a wrapper around the default
          385  +tokenizer (unicode61). Or, if one or more extra arguments are added to the
          386  +"tokenize" option following "porter", they are treated as a specification for
          387  +the underlying tokenizer that the porter stemmer uses. For example:
          388  +
          389  +<codeblock>
          390  +  <i>-- Two ways to create an FTS5 table that uses the porter tokenizer to
          391  +  -- stem the output of the default tokenizer (unicode61). </i>
          392  +  CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = porter); 
          393  +  CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = 'porter unicode61');
          394  +
          395  +  <i>-- A porter tokenizer used to stem the output of the unicode61 tokenizer,
          396  +  -- with diacritics removed before stemming.</i>
          397  +  CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = 'porter unicode61 remove_diacritics 1');
          398  +</codeblock>
          399  +
          400  +<h1 tags="FTS5 content option">External Content and Contentless Tables</h1>
          401  +
          402  +<p>
          403  +Normally, when a row is inserted into an FTS5 table, as well as the various
          404  +full-text index entries and other data a copy of the row is stored in a private
          405  +table managed by the FTS5 module. When column values are requested from the
          406  +FTS5 table by the user or by an auxiliary function implementation, they are
          407  +read from this private table. The "content" option may be used to create an
          408  +FTS5 table that stores only FTS full-text index entries. Because the column
          409  +values themselves are usually much larger than the associated full-text index
          410  +entries, this can save significant database space.
          411  +
          412  +<p>
          413  +There are two ways to use the "content" option:
          414  +<ul>
          415  +  <li> By setting it to an empty string to create a contentless FTS5 table. In
          416  +       this case FTS5 assumes that the original column values are unavailable
          417  +       to it when processing queries. Full-text queries and some auxiliary
          418  +       functions can still be used, but no column values apart from the rowid
          419  +       may be read from the table.
          420  +
          421  +  <li> By setting it to the name of a database object (table, virtual table or
          422  +       view) that may be queried by FTS5 at any time to retrieve the column
          423  +       values. This is known as an "external content" table. In this case all
          424  +       FTS5 functionality may be used, but it is the responsibility of the user
          425  +       to ensure that the contents of the full-text index are consistent with
          426  +       the named database object. If they are not, query results may be
          427  +       unpredictable.  
          428  +</ul>
          429  +
          430  +<h2 tags="FTS5 contentless tables">Contentless Tables</h2>
          431  +
          432  +<p> A contentless FTS5 table is created by setting the "content" option to
          433  +an empty string. For example:
          434  +
          435  +<codeblock>
          436  +  CREATE VIRTUAL TABLE f1 USING fts5(a, b, c, content='');
          437  +</codeblock>
          438  +
          439  +<p> Contentless FTS5 tables do not support UPDATE or DELETE statements, or
          440  +INSERT statements that do not supply a non-NULL value for the rowid field.
          441  +Rows may be deleted from a contentless table using an [FTS5 delete command].
          442  +
          443  +<p> Attempting to read any column value except the rowid from a contentless
          444  +FTS5 table returns an SQL NULL value.
          445  +
          446  +<h2 tags="FTS5 external content tables">External Content Tables</h2>
          447  +
          448  +<p> An external content FTS5 table is created by setting the content 
          449  +option to the name of a table, virtual table or view (hereafter the "content
          450  +table") within the same database. Whenever column values are required by
          451  +FTS5, it queries the content table as follows, with the rowid of the row
          452  +for which values are required bound to the SQL variable:
          453  +
          454  +<codeblock>
          455  +  SELECT * FROM &lt;content&gt; WHERE &lt;content_rowid&gt; = ?;
          456  +</codeblock>
          457  +
          458  +<p> In the above, &lt;content&gt; is replaced by the name of the content table.
          459  +By default, &lt;content_rowid&gt; is replaced by the literal text "rowid". Or,
          460  +if the "content_rowid" option is set within the CREATE VIRTUAL TABLE statement,
          461  +by the value of that option.
          462  +
          463  +<p> The "*" in the above query must expand to a set of columns consisting of
          464  +the &lt;column_rowid&gt; column followed by each indexed column, in the same
          465  +order as they are present in the external content fts5 table.
          466  +
          467  +<p> The content table may also be queried as follows:
          468  +
          469  +<codeblock>
          470  +  SELECT * FROM &lt;content&gt; ORDER BY &lt;content_rowid&gt; ASC;
          471  +  SELECT * FROM &lt;content&gt; ORDER BY &lt;content_rowid&gt; DESC;
          472  +</codeblock>
          473  +
          474  +<p> It is still the responsibility of the user to ensure that the contents of
          475  +an external content FTS5 table are kept up to date with the content table. 
          476  +One way to do this is with triggers. For example:
          477  +
          478  +<codeblock>
          479  +  <i>-- Create a table. And an external content fts5 table to index it.</i>
          480  +  CREATE TABLE tbl(a INTEGER PRIMARY KEY, b, c);
          481  +  CREATE VIRTUAL TABLE fts_idx USING fts5(b, c, content='tbl', content_rowid='a');
          482  +
          483  +  <i>-- Triggers to keep the FTS index up to date.</i>
          484  +  CREATE TRIGGER tbl_ai AFTER INSERT ON tbl BEGIN
          485  +    INSERT INTO fts_idx(rowid, b, c) VALUES (new.a, new.b, new.c);
          486  +  END;
          487  +  CREATE TRIGGER tbl_ad AFTER DELETE ON tbl BEGIN
          488  +    INSERT INTO fts_idx(fts_idx, rowid, b, c) VALUES('delete', old.a, old.b, old.c);
          489  +  END;
          490  +  CREATE TRIGGER tbl_au AFTER UPDATE ON tbl BEGIN
          491  +    INSERT INTO fts_idx(fts_idx, rowid, b, c) VALUES('delete', old.a, old.b, old.c);
          492  +    INSERT INTO fts_idx(rowid, b, c) VALUES (new.a, new.b, new.c);
          493  +  END;
          494  +</codeblock>
          495  +
          496  +
          497  +<h1> Auxiliary Functions </h1>
          498  +
          499  +<h2>Built-in Auxiliary Functions</h2>
          500  +
          501  +<h3>The bm25() function</h3>
          502  +
          503  +<p> The built-in auxiliary function bm25() returns a real value indicating
          504  +how well the current row matches the full-text query. The better the match,
          505  +the larger the value returned. A query such as the following may be used
          506  +to return matches in order from best to worst match:
          507  +
          508  +<codeblock>
          509  +  SELECT * FROM fts WHERE fts MATCH ? ORDER BY bm25(fts) DESC
          510  +</codeblock>
          511  +
          512  +<p> In order to calculate a documents score, the full-text query is separated
          513  +    into its component phrases. The bm25 score for document <i>D</i> and 
          514  +    query <i>Q</i> is then calculated as follows:
          515  +
          516  +<p> <img src="images/fts5_formula1.png" style="width:55ex;margin-left:5ex">
          517  +
          518  +<p> In the above, <i>nPhrase</i> is the number of phrases in the query.
          519  +    <i>|D|</i> is the number of tokens in the current document, and
          520  +    <i>avgdl</i> is the average number of tokens in all documents within the
          521  +    FTS5 table.  <i>k<sub>1</sub></i> and <i>b</i> are both constants,
          522  +    hard-coded at 1.2 and 0.75 respectively.
          523  +
          524  +<p> <i>IDF(q<sub>i</sub>)</i> is the inverse-document-frequency of query 
          525  +    phrase <i>i</i>. It is calculated as follows, where <i>N</i> is the total
          526  +    number of rows in the FTS5 table and <i>n(q<sub>i</sub>)</i> is the total
          527  +    number of rows that contain at least one instance of phrase <i>i</i>:
          528  +
          529  +<p> <img src="images/fts5_formula2.png" style="width:55ex;margin-left:5ex">
          530  +
          531  +<p> Finally, <i>f(q<sub>i</sub>,D)</i> is the phrase frequency of phrase 
          532  +<i>i</i>. By default, this is simply the number of occurrences of the phrase
          533  +within the current row. However, by passing extra real value arguments to 
          534  +the bm25() SQL function, each column of the table may be assigned a different
          535  +weight and the phrase frequency calculated as follows:
          536  +
          537  +<p> <img src="images/fts5_formula3.png" style="width:55ex;margin-left:5ex">
          538  +
          539  +<p> where <i>w<sub>c</sub></i> is the weight assigned to column <i>c</i> and
          540  +<i>n(q<sub>i</sub>,c)</i> is the number of occurrences of phrase <i>i</i> in
          541  +column <i>c</i> of the current row. The first argument passed to bm25()
          542  +following the table name is the weight assigned to the leftmost column of
          543  +the FTS5 table. The second is the weight assigned to the second leftmost
          544  +column, and so on. If there are not enough arguments for all table columns,
          545  +remaining columns are assigned a weight of 1.0. If there are too many 
          546  +trailing arguments, the extras are ignored. For example:
          547  +
          548  +<codeblock>
          549  +  <i>-- Assuming the following schema:</i>
          550  +  CREATE VIRTUAL TABLE email USING fts5(sender, title, body);
          551  +
          552  +  <i>-- Return results in bm25 order, with each phrase hit in the "sender"</i>
          553  +  <i>-- column considered the equal of 10 hits in the "body" column, and</i>
          554  +  <i>-- each hit in the "title" column considered as valuable as 5 hits in</i>
          555  +  <i>-- the "body" column.</i>
          556  +  SELECT * FROM email WHERE email MATCH ? ORDER BY bm25(email, 10.0, 5.0) DESC;
          557  +</codeblock>
          558  +
          559  +<p>Refer to wikipedia for 
          560  +<a href="http://en.wikipedia.org/wiki/Okapi_BM25">more information regarding
          561  +BM25</a> and its variants.
          562  +
          563  +<h3>The highlight() function</h3>
          564  +
          565  +<p> The highlight() function returns a copy of the text from a specified 
          566  +column of the current row with extra markup text inserted to mark the start 
          567  +and end of phrase matches. 
          568  +
          569  +<p>The highlight() must be invoked with exactly three arguments following 
          570  +the table name. To be interpreted as follows:
          571  +
          572  +<ol>
          573  +  <li> An integer indicating the index of the FTS table column to read the 
          574  +       text from. Columns are numbered from left to right starting at zero.
          575  +
          576  +  <li> The text to insert before each phrase match.
          577  +
          578  +  <li> The text to insert after each phrase match.
          579  +</ol>
          580  +
          581  +<p>For example:
          582  +
          583  +<codeblock>
          584  +  <i>-- Return a copy of the text from the leftmost column of the current</i>
          585  +  <i>-- row, with phrase matches marked using html "b" tags.</i>
          586  +  SELECT highlight(fts, 0, '&lt;b&gt;' '&lt;/b&gt;') FROM fts WHERE fts MATCH ?
          587  +</codeblock>
          588  +
          589  +<p>In cases where two or more phrase instances overlap (share one or more
          590  +tokens in common), a single open and close marker is inserted for each set
          591  +of overlapping phrases. For example:
          592  +
          593  +<codeblock>
          594  +  <i>-- Assuming this:</i>
          595  +  CREATE VIRTUAL TABLE ft USING fts5(a);
          596  +  INSERT INTO ft VALUES('a b c x c d e');
          597  +  INSERT INTO ft VALUES('a b c c d e');
          598  +  INSERT INTO ft VALUES('a b c d e');
          599  +
          600  +  <i>-- The following SELECT statement returns these three rows:</i>
          601  +  <i>--   '&#91;a b c&#93; x &#91;c d e&#93;'</i>
          602  +  <i>--   '&#91;a b c&#93; &#91;c d e&#93;'</i>
          603  +  <i>--   '&#91;a b c d e&#93;'</i>
          604  +  SELECT highlight(ft, 0, '&#91;', '&#93;') FROM ft WHERE ft MATCH 'a+b+c AND c+d+e';
          605  +</codeblock>
          606  +
          607  +<h3>The snippet() function</h3>
          608  +
          609  +<p>The snippet() function is similar to highlight(), except that instead of
          610  +returning entire column values, it automatically selects and extracts a
          611  +short fragment of document text to process and return. The snippet() function
          612  +must be passed five parameters following the table name argument:
          613  +
          614  +<ol>
          615  +  <li> An integer indicating the index of the FTS table column to select
          616  +       the returned text from. Columns are numbered from left to right 
          617  +       starting at zero. A negative value indicates that the column should
          618  +       be automatically selected.
          619  +
          620  +  <li> The text to insert before each phrase match within the returned text.
          621  +
          622  +  <li> The text to insert after each phrase match within the returned text.
          623  +
          624  +  <li> The text to add to the start or end of the selected text to indicate
          625  +       that the returned text does not occur at the start or end of its column,
          626  +       respectively.
          627  +
          628  +  <li> The maximum number of tokens in the returned text. This must be greater
          629  +       than zero and equal to or less than 64. 
          630  +</ol>
          631  +
          632  +<h2 tags="auxiliary function mapping">Sorting by Auxiliary Function Results</h2>
          633  +
          634  +<p> All FTS5 tables feature a special hidden column named "rank". If the
          635  +current query is not a full-text query (i.e. if it does not include a MATCH
          636  +operator), the value of the "rank" column is always NULL. Otherwise, in a
          637  +full-text query, column rank contains by default the same value as would be
          638  +returned by executing the bm25() auxiliary function with no trailing 
          639  +arguments.
          640  +
          641  +<p> The difference between reading from the rank column and using the bm25()
          642  +function directly within the query is only significant when sorting by the
          643  +returned value. In this case, using "rank" is faster than using bm25().
          644  +
          645  +<codeblock>
          646  +  <i>-- The following queries are logically equivalent. But the second may</i>
          647  +  <i>-- be faster, particularly if the caller abandons the query before</i>
          648  +  <i>-- all rows have been returned (or if the queries were modified to </i>
          649  +  <i>-- include LIMIT clauses).</i>
          650  +  SELECT * FROM fts WHERE fts MATCH ? ORDER BY bm25(fts) DESC;
          651  +  SELECT * FROM fts WHERE fts MATCH ? ORDER BY rank DESC;
          652  +</codeblock>
          653  +
          654  +<p> Instead of using bm25() with no trailing arguments, the specific auxiliary
          655  +function mapped to the rank column may be configured either on a per-query
          656  +basis, or by setting a different persistent default for the FTS table.
          657  +
          658  +<p> In order to change the mapping of the rank column for a single query, 
          659  +a term similar to the following is added to the WHERE clause of a query:
          660  +
          661  +<codeblock>
          662  +  rank MATCH 'auxiliary-function-name(arg1, arg2, ...)'
          663  +</codeblock>
          664  +
          665  +<p> The right-hand-side of the MATCH clause must be a constant expression that
          666  +evaluates to a string consisting of the auxiliary function to invoke, followed
          667  +by zero or more comma separated arguments within parenthesis. Arguments must
          668  +be SQL literals. For example:
          669  +
          670  +<codeblock>
          671  +  <i>-- The following queries are logically equivalent. But the second may</i>
          672  +  <i>-- be faster. See above. </i>
          673  +  SELECT * FROM fts WHERE fts MATCH ? ORDER BY bm25(fts, 10.0, 5.0) DESC;
          674  +  SELECT * FROM fts WHERE fts MATCH ? AND rank MATCH 'bm25(10.0, 5.0)' ORDER BY rank DESC;
          675  +</codeblock>
          676  +
          677  +<p> The default mapping of the rank column for a table may be modified 
          678  +using the [FTS5 rank configuration option].
          679  +
          680  +<h1>Special INSERT Commands</h1>
          681  +
          682  +<h2 tags="FTS5 automerge option">The 'automerge' Configuration Option</h2>
          683  +
          684  +<p>
          685  +  Instead of using a single data structure on disk to store the full-text
          686  +  index, FTS5 uses a series of b-trees. Each time a new transaction is
          687  +  committed, a new b-tree containing the contents of the committed transaction
          688  +  is written into the database file. When the full-text index is queried, each
          689  +  b-tree must be queried individually and the results merged before being
          690  +  returned to the user.
          691  +
          692  +<p>
          693  +  In order to prevent the number of b-trees in the database from becoming too
          694  +  large (slowing down queries), smaller b-trees are periodically merged into
          695  +  single larger b-trees containing the same data. By default, this happens
          696  +  automatically within INSERT, UPDATE or DELETE statements that modify the
          697  +  full-text index. The 'automerge' parameter determines how many smaller
          698  +  b-trees are merged together at a time. Setting it to a small value can
          699  +  speed up queries (as they have to query and merge the results from fewer 
          700  +  b-trees), but can also slow down writing to the database (as each INSERT,
          701  +  UPDATE or DELETE statement has to do more work as part of the automatic
          702  +  merging process).
          703  +
          704  +<p>
          705  +  Each of the b-trees that make up the full-text index is assigned to a "level"
          706  +  based on its size. Level-0 b-trees are the smallest, as they contain the
          707  +  contents of a single transaction. Higher level b-trees are the result of
          708  +  merging two or more level-0 b-trees together and so they are larger. FTS5
          709  +  begins to merge b-trees together once there exist <i>M</i> or more b-trees 
          710  +  with the same level, where <i>M</i> is the value of the 'automerge' 
          711  +  parameter.
          712  +
          713  +<p>
          714  +  The maximum allowed value for the 'automerge' parameter is 16. The default
          715  +  value is 4. Setting the 'automerge' parameter to 0 disables the automatic 
          716  +  incremental merging of b-trees altogether.
          717  +
          718  +<codeblock>
          719  +  INSERT INTO ft(ft, rank) VALUES('automerge', 8);
          720  +</codeblock>
          721  +
          722  +<h2>The 'crisismerge' Configuration Option</h2>
          723  +
          724  +<p>The 'crisismerge' option is similar to 'automerge', in that it determines
          725  +how and how often the component b-trees that make up the full-text index are
          726  +merged together. Once there exist <i>C</i> or more b-trees on a single level
          727  +within the full-text index, where <i>C</i> is the value of the 'crisismerge'
          728  +option, all b-trees on the level are immediately merged into a single b-tree.
          729  +
          730  +<p>The difference between this option and the 'automerge' option is that when
          731  +the 'automerge' limit is reached FTS5 only begins to merge the b-trees
          732  +together. Most of the work is performed as part of subsequent INSERT, 
          733  +UPDATE or DELETE operations. Whereas when the 'crisismerge' limit is reached,
          734  +the offending b-trees are all merged immediately. This means that an INSERT,
          735  +UPDATE or DELETE that triggers a crisis-merge may take a long time to 
          736  +complete.
          737  +
          738  +<p>The default 'crisismerge' value is 16. There is no maximum limit. Attempting
          739  +to set the 'crisismerge' parameter to a value of 0 or 1 is equivalent to
          740  +setting it to the default value (16). It is an error to attempt to set the
          741  +'crisismerge' option to a negative value.
          742  +
          743  +<codeblock>
          744  +  INSERT INTO ft(ft, rank) VALUES('crisismerge', 16);
          745  +</codeblock>
          746  +
          747  +<h2 tags="FTS5 delete command">The 'delete' Command</h2>
          748  +
          749  +<p> This command is only available with [FTS5 external content tables |
          750  +external content] and [FTS5 contentless tables | contentless] tables. It
          751  +is used to delete the index entries associated with a single row from the
          752  +full-text index. This command and the [FTS5 delete-all command | delete-all]
          753  +command are the only ways to remove entries from the full-text index of a
          754  +contentless table.
          755  +
          756  +<p> In order to use this command to delete a row, the text value 'delete' 
          757  +must be inserted into the special column with the same name as the table.
          758  +The rowid of the row to delete is inserted into the rowid column. The
          759  +values inserted into the other columns must match the values currently
          760  +stored in the table. For example:
          761  +
          762  +<codeblock>
          763  +  <i>-- Insert a row with rowid=14 into the fts5 table.</i>
          764  +  INSERT INTO ft(rowid, a, b, c) VALUES(14, $a, $b, $c);
          765  +  
          766  +  <i>-- Remove the same row from the fts5 table.</i>
          767  +  INSERT INTO ft(ft, rowid, a, b, c) VALUES('delete', 14, $a, $b, $c);
          768  +</codeblock>
          769  +
          770  +<p> If the values "inserted" into the text columns as part of a 'delete'
          771  +command are not the same as those currently stored within the table, the
          772  +results may be unpredictable.
          773  +
          774  +<p><i style=color:red>todo: explain why</i>
          775  +
          776  +<h2 tags="FTS5 delete-all command">The 'delete-all' Command</h2>
          777  +
          778  +<p> This command is only available with [FTS5 external content tables |
          779  +external content] and [FTS5 contentless tables | contentless] tables. It
          780  +deletes all entries from the full-text index.
          781  +
          782  +<codeblock>
          783  +  INSERT INTO ft(ft) VALUES('delete-all');
          784  +</codeblock>
          785  +
          786  +<h2>The 'integrity-check' Command</h2>
          787  +
          788  +<p> This command is used to verify that the full-text index is consistent 
          789  +with the contents of the FTS5 table or [FTS5 external content tables | content 
          790  +table]. It is not available with [FTS5 contentless tables | contentless tables].
          791  +
          792  +<p>The integrity-check command is invoked by inserting the text value
          793  +'integrity-check' into the special column with the same name as the FTS5
          794  +table. For example:
          795  +
          796  +<codeblock>
          797  +  INSERT INTO ft(ft) VALUES('integrity-check');
          798  +</codeblock>
          799  +
          800  +<p>If the full-text index is consistent with the contents of the table, the
          801  +INSERT used to invoke the integrity-check command succeeds. Or, if any
          802  +discrepancy is found, it fails with an [SQLITE_CORRUPT_VTAB] error.
          803  +
          804  +<h2 tags="FTS5 optimize command">The 'optimize' Command</h2>
          805  +
          806  +<p>This command merges all individual b-trees that currently make up the
          807  +full-text index into a single large b-tree structure. This ensures that the
          808  +full-text index consumes the mimimum space within the database and is in the
          809  +fastest form to query.
          810  +
          811  +<p>Refer to the documentation for the [FTS5 automerge option] for more details
          812  +regarding the relationship between the full-text index and its component
          813  +b-trees.
          814  +
          815  +<codeblock>
          816  +  INSERT INTO ft(ft) VALUES('optimize');
          817  +</codeblock>
          818  +
          819  +<h2>The 'pgsz' Configuration Option</h2>
          820  +
          821  +<p> This command is used to set the persistent "pgsz" option.
          822  +
          823  +<p> The full-text index maintained by FTS5 is stored as a series of fixed-size
          824  +blobs in a database table. It is not strictly necessary for all blobs that make
          825  +up a full-text index to be the same size. The pgsz option determines the size
          826  +of all blobs created by subsequent index writers. The default value is 1000.
          827  +
          828  +<codeblock>
          829  +  INSERT INTO ft(ft, rank) VALUES('pgsz', 4072);
          830  +</codeblock>
          831  +
          832  +<h2 tags="FTS5 rank configuration option">The 'rank' Configuration Option</h2>
          833  +
          834  +<p> This command is used to set the persistent "rank" option.
          835  +
          836  +<p> The rank option is used to change the default auxiliary function mapping
          837  +for the rank column. The option should be set to a text value in the same
          838  +format as described for [auxiliary function mapping | "rank MATCH ?"] terms 
          839  +above. For example:
          840  +
          841  +<codeblock>
          842  +  INSERT INTO ft(ft, rank) VALUES('rank', 'bm25(10.0, 5.0)');
          843  +</codeblock>
          844  +
          845  +<h2 tags="FTS5 rebuild command">The 'rebuild' Command</h2>
          846  +
          847  +<p> This command first deletes the entire full-text index, then rebuilds it
          848  +based on the contents of the table or [FTS5 external content tables | content
          849  +table].  It is not available with [FTS5 contentless tables | contentless
          850  +tables].
          851  +
          852  +<codeblock>
          853  +  INSERT INTO ft(ft) VALUES('rebuild');
          854  +</codeblock>
          855  +
          856  +
          857  +<h1>Extending FTS5</h1>
          858  +
          859  +<p>FTS5 features APIs allowing it to be extended by:
          860  +
          861  +<ul>
          862  +  <li> Adding new auxiliary functions implemented in C, and
          863  +  <li> Adding new tokenizers, also implemented in C.
          864  +</ul>
          865  +
          866  +<p> The built-in tokenizers and auxiliary functions described in this
          867  +documented are all implemented using the publicly available API described
          868  +below.
          869  +
          870  +<p> Before a new auxiliary function or tokenizer implementation may be 
          871  +registered with FTS5, an application must obtain a pointer to the "fts5_api"
          872  +structure. There is one fts5_api structure for each database connection with
          873  +which the FTS5 extension is registered. To obtain the pointer, the application
          874  +invokes the SQL user-defined function fts5(), which returns a blob value
          875  +containing the pointer to the fts5_api structure for the connection. The
          876  +following example code demonstrates the technique:
          877  +
          878  +<codeblock>
          879  +  <i>/*
          880  +  ** Return a pointer to the fts5_api pointer for database connection db.
          881  +  ** If an error occurs, return NULL and leave an error in the database 
          882  +  ** handle (accessible using sqlite3_errcode()/errmsg().
          883  +  */</i>
          884  +  fts5_api *fts5_api_from_db(sqlite3 *db){
          885  +    fts5_api *pRet = 0;
          886  +    sqlite3_stmt *pStmt = 0;
          887  +
          888  +    if( SQLITE_OK==sqlite3_prepare(db, "SELECT fts5()", -1, &pStmt, 0)
          889  +     && SQLITE_ROW==sqlite3_step(pStmt) 
          890  +     && sizeof(pRet)==sqlite3_column_bytes(pStmt, 0)
          891  +    ){
          892  +      memcpy(&pRet, sqlite3_column_blob(pStmt, 0), sizeof(pRet));
          893  +    }
          894  +    sqlite3_finalize(pStmt);
          895  +    return pRet;
          896  +  }
          897  +</codeblock>
          898  +
          899  +<p> The fts5_api structure is defined as follows. It exposes three methods, 
          900  +one each for registering new auxiliary functions and tokenizers, and one for
          901  +retrieving existing tokenizer. The latter is intended to facilitate the
          902  +implementation of "tokenizer wrappers" similar to the built-in
          903  +porter tokenizer.
          904  +
          905  +<codeblock>
          906  +<tclscript>
          907  +  set res ""
          908  +  set ::extract_api_docs_mode fts5_api
          909  +  catch { set res [source [file join $::SRC ext/fts5/extract_api_docs.tcl]] }
          910  +  set res
          911  +</tclscript>
          912  +</codeblock>
          913  +
          914  +<p> To invoke a method of the fts5_api object, the fts5_api pointer itself
          915  +should be passed as the methods first argument followed by the other, method
          916  +specific, arguments. For example:
          917  +
          918  +<codeblock>
          919  +    rc = pFts5Api->xCreateTokenizer(pFts5Api, ... other args ...);
          920  +</codeblock>
          921  +
          922  +<p> The fts5_api structure methods are described individually in the following
          923  +sections.
          924  +
          925  +<h2 tags="custom tokenizers">Custom Tokenizers</h2>
          926  +
          927  +<p> To create a custom tokenizer, an application must implement three
          928  +functions: a tokenizer constructor (xCreate), a destructor (xDelete) and a
          929  +function to do the actual tokenization (xTokenize). The type of each
          930  +function is as for the member variables of the fts5_tokenizer struct:
          931  +
          932  +<codeblock>
          933  +<tclscript>
          934  +  set res ""
          935  +  set ::extract_api_docs_mode fts5_tokenizer
          936  +  catch { set res [source [file join $::SRC ext/fts5/extract_api_docs.tcl]] }
          937  +  set res
          938  +</tclscript>
          939  +</codeblock>
          940  +
          941  +<p> When an FTS5 table uses the custom tokenizer, the FTS5 core calls xCreate()
          942  +once to create a tokenizer, then xTokenize() zero or more times to tokenize
          943  +strings, then xDelete() to free any resources allocated by xCreate(). More
          944  +specifically:
          945  +
          946  +<tclscript>
          947  +  set res ""
          948  +  set ::extract_api_docs_mode tokenizer_api
          949  +  catch { set res [source [file join $::SRC ext/fts5/extract_api_docs.tcl]] }
          950  +  set res
          951  +</tclscript>
          952  +
          953  +<h2>Custom Auxiliary Functions</h2>
          954  +
          955  +<p> Implementing a custom auxiliary function is similar to implementing an
          956  +[application-defined SQL function | scalar SQL function]. The implementation
          957  +should be a C function of type fts5_extension_function, defined as follows:
          958  +
          959  +<codeblock>
          960  +<tclscript>
          961  +  set res ""
          962  +  set ::extract_api_docs_mode fts5_extension
          963  +  catch { set res [source [file join $::SRC ext/fts5/extract_api_docs.tcl]] }
          964  +  set res
          965  +</tclscript>
          966  +</codeblock>
          967  +
          968  +<p> The implementation is registered with the FTS5 module by calling the
          969  +xCreateFunction() method of the fts5_api object. If there is already an
          970  +auxiliary function with the same name, it is replaced by the new function.
          971  +If a non-NULL xDestroy parameter is passed to xCreateFunction(), it is invoked
          972  +with a copy of the pContext pointer passed as the only argument when the
          973  +database handle is closed or when the registered auxiliary function is
          974  +replaced.
          975  +
          976  +<p> The final three arguments passed to the auxiliary function callback are
          977  +similar to the three arguments passed to the implementation of a scalar SQL
          978  +function. All arguments except the first passed to the auxiliary function are
          979  +available to the implementation in the apVal&#91;&#93; array. The
          980  +implementation should return a result or error via the content handle pCtx.
          981  +
          982  +<p> The first argument passed to an auxiliary function callback is a pointer
          983  +to a structure containing methods that may be invoked in order to obtain
          984  +information regarding the current query or row. The second argument is an
          985  +opaque handle that should be passed as the first argument to any such method 
          986  +invocation. For example, the following auxiliary function definition returns
          987  +the total number of tokens in all columns of the current row:
          988  +
          989  +<codeblock>
          990  +<i>/*
          991  +** Implementation of an auxiliary function that returns the number
          992  +** of tokens in the current row (including all columns).
          993  +*/</i>
          994  +static void column_size_imp(
          995  +  const Fts5ExtensionApi *pApi,
          996  +  Fts5Context *pFts,
          997  +  sqlite3_context *pCtx,
          998  +  int nVal,
          999  +  sqlite3_value **apVal
         1000  +){
         1001  +  int rc;
         1002  +  int nToken;
         1003  +  rc = pApi->xColumnSize(pFts, -1, &nToken);
         1004  +  if( rc==SQLITE_OK ){
         1005  +    sqlite3_result_int(pCtx, nToken);
         1006  +  }else{
         1007  +    sqlite3_result_error_code(pCtx, rc);
         1008  +  }
         1009  +}
         1010  +</codeblock>
         1011  +
         1012  +<p>The following section describes the API offered to auxiliary function
         1013  +implementations in detail. Further examples may be found in the "fts5_aux.c"
         1014  +file of the source code.
         1015  +
         1016  +<h3>Custom Auxiliary Functions API Reference</h3>
         1017  +
         1018  +<codeblock>
         1019  +<tclscript>
         1020  +  set res ""
         1021  +  set ::extract_api_docs_mode Fts5ExtensionApi
         1022  +  catch { set res [source [file join $::SRC ext/fts5/extract_api_docs.tcl]] }
         1023  +  set res
         1024  +</tclscript>
         1025  +</codeblock>
         1026  +
         1027  +<tclscript>
         1028  +  set res ""
         1029  +  unset -nocomplain ::extract_api_docs_mode 
         1030  +  catch { set res [source [file join $::SRC ext/fts5/extract_api_docs.tcl]] }
         1031  +  set res
         1032  +</tclscript>
         1033  +
         1034  +

Changes to wrap.tcl.

   456    456       .fancy th, .fancy td {padding: 0.2em 1ex; vertical-align:top}
   457    457       .fancy #toc a        { color: darkblue ; text-decoration: none }
   458    458       .fancy .todo         { color: #AA3333 ; font-style : italic }
   459    459       .fancy .todo:before  { content: 'TODO:' }
   460    460       .fancy p.todo        { border: solid #AA3333 1px; padding: 1ex }
   461    461       .fancy img { display:block; }
   462    462       .fancy :link:hover, .fancy :visited:hover { background: wheat }
   463         -    .fancy p,.fancy ul,.fancy ol { margin: 1em 5ex }
          463  +    .fancy p,.fancy ul,.fancy ol,.fancy dl { margin: 1em 5ex }
   464    464       .fancy li p { margin: 1em 0 }
   465    465       /* End of "fancyformat" specific rules. */
   466    466   
   467    467       </style>
   468    468     }
   469    469     puts $fd {</head>}
   470    470     if {[file exists DRAFT]} {