Documentation Source Text

Check-in [cc51dec17e]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Further updates to search database and script.
Timelines: family | ancestors | descendants | both | experimental
Files: files | file ages | folders
SHA1: cc51dec17e8d87bcb2d99bcfccec396ed9e7c26b
User & Date: dan 2016-08-24 19:26:13
Context
2016-08-25
12:18
Add <fancy_format> or <table_of_contents> markup to a few more documents. To ensure that there are enough anchors in the longer documents for the search script to use. check-in: 066e5931ce user: dan tags: experimental
2016-08-24
19:26
Further updates to search database and script. check-in: cc51dec17e user: dan tags: experimental
2016-08-20
17:22
Add "jump to" links to relevant sections of large documents in search results. check-in: 9d49a78f9b user: dan tags: experimental
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to document_header.tcl.

139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
        <a href="${path}copyright.html">License</a>
        <a href="${path}support.html">Support</a>
        <a href="http://www.hwaci.com/sw/sqlite/prosupport.html">Purchase</a>
      </div>
  }]

  if {$search==""} {
    set initval   "Search SQLite Docs..."
    set initstyle {font-style:italic;color:#044a64}
  } else {
    set initval   $search
    set initstyle {font-style:normal;color:black}
  }

  append ret [subst -nocommands {
    <script>
      gMsg = "Search SQLite Docs..."
      function entersearch() {
        var q = document.getElementById("q");
        if( q.value == gMsg ) { q.value = "" }
        q.style.color = "black"
        q.style.fontStyle = "normal"
      }
      function leavesearch() {







|








|







139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
        <a href="${path}copyright.html">License</a>
        <a href="${path}support.html">Support</a>
        <a href="http://www.hwaci.com/sw/sqlite/prosupport.html">Purchase</a>
      </div>
  }]

  if {$search==""} {
    set initval   "Search with FTS5..."
    set initstyle {font-style:italic;color:#044a64}
  } else {
    set initval   $search
    set initstyle {font-style:normal;color:black}
  }

  append ret [subst -nocommands {
    <script>
      gMsg = "Search with FTS5..."
      function entersearch() {
        var q = document.getElementById("q");
        if( q.value == gMsg ) { q.value = "" }
        q.style.color = "black"
        q.style.fontStyle = "normal"
      }
      function leavesearch() {

Changes to main.mk.

40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
...
136
137
138
139
140
141
142
143
144




145
146


147







148
149
150
151
152
153
154
155
156
157
private:	base evidence private_evidence matrix doc

fast:	base doc

tclsh:	$(TCLSQLITE3C)
	$(CC) -g -o tclsh -DSQLITE_ENABLE_FTS3 -DSQLITE_ENABLE_FTS5 -DTCLSH=1 -DSQLITE_TCLMD5 $(TCLINC) $(TCLSQLITE3C) $(TCLFLAGS)

tclsqlite3.search:	$(TCLSQLITE3C) $(DOC)/search/searchc.c
	$(CC) -g -o tclsqlite3.search -I. -DSQLITE_THREADSAFE=0 -DSQLITE_ENABLE_FTS3 -DSQLITE_ENABLE_FTS5 $(TCLINC) $(DOC)/search/searchc.c $(TCLSQLITE3C) $(TCLFLAGS)

sqlite3.h:	tclsh $(SRC)/src/sqlite.h.in $(SRC)/manifest.uuid $(SRC)/VERSION
	./tclsh $(SRC)/tool/mksqlite3h.tcl $(SRC) | \
	sed 's/^SQLITE_API //' >sqlite3.h

# Generate the directory into which generated documentation files will
# be written.
................................................................................
# Generate the traceability matrix
#
matrix:	
	rm -rf doc/matrix/images
	cp -r doc/images doc/matrix
	./tclsh $(DOC)/matrix.tcl

# Build the fts3 database used by the search script
#




parsehtml.so: $(DOC)/search/parsehtml.c
	gcc -g -shared -fPIC $(TCLINC) -I. -I$(SRC)/ext/fts3 $(DOC)/search/parsehtml.c $(TCLSTUBFLAGS) -o parsehtml.so










searchdb: parsehtml.so tclsh
	./tclsh $(DOC)/search/buildsearchdb.tcl
	cp $(DOC)/document_header.tcl doc/document_header.tcl
	cp $(DOC)/search/search.tcl doc/search
	chmod +x doc/search

always:	

clean:	
	rm -rf tclsh doc sqlite3.h







|
<







 







<
|
>
>
>
>
|
|
>
>

>
>
>
>
>
>
>
|
|








40
41
42
43
44
45
46
47

48
49
50
51
52
53
54
...
135
136
137
138
139
140
141

142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
private:	base evidence private_evidence matrix doc

fast:	base doc

tclsh:	$(TCLSQLITE3C)
	$(CC) -g -o tclsh -DSQLITE_ENABLE_FTS3 -DSQLITE_ENABLE_FTS5 -DTCLSH=1 -DSQLITE_TCLMD5 $(TCLINC) $(TCLSQLITE3C) $(TCLFLAGS)




sqlite3.h:	tclsh $(SRC)/src/sqlite.h.in $(SRC)/manifest.uuid $(SRC)/VERSION
	./tclsh $(SRC)/tool/mksqlite3h.tcl $(SRC) | \
	sed 's/^SQLITE_API //' >sqlite3.h

# Generate the directory into which generated documentation files will
# be written.
................................................................................
# Generate the traceability matrix
#
matrix:	
	rm -rf doc/matrix/images
	cp -r doc/images doc/matrix
	./tclsh $(DOC)/matrix.tcl



#-------------------------------------------------------------------------

# Source files for the [tclsqlite3.search] executable. 
#
SSRC = $(DOC)/search/searchc.c \
	    $(DOC)/search/parsehtml.c \
	    $(DOC)/search/fts5ext.c \
	    $(TCLSQLITE3C)

# Flags to build [tclsqlite3.search] with.
#
SFLAGS = -DSQLITE_THREADSAFE=0 -DSQLITE_ENABLE_FTS5

tclsqlite3.search: $(SSRC)
	$(CC) -static -O2 -o $@ -I. $(SFLAGS) $(SSRC) $(STATICTCLFLAGS)

searchdb: tclsqlite3.search
	./tclsqlite3.search $(DOC)/search/buildsearchdb.tcl
	cp $(DOC)/document_header.tcl doc/document_header.tcl
	cp $(DOC)/search/search.tcl doc/search
	chmod +x doc/search

always:	

clean:	
	rm -rf tclsh doc sqlite3.h

Changes to pages/amalgamation.in.

1
2
3


4
5
6
7
8
9
10
11
..
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
..
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
...
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
<title>The SQLite Amalgamation</title>
<tcl>hd_keywords {amalgamation} {the amalgamation}</tcl>



<h2>1.0 The SQLite Amalgamation</h2>

<p>The SQLite library consists of 102 files of C code
(as of [Version 3.9.0]) in the core with 32 additional files that
implement the [FTS3], [FTS5], [RTREE], [dbstat|DBSTAT], [json1|JSON1], and
[RBU] extensions.
Most of these are "source" files in the sense that they are stored 
in the [https://www.sqlite.org/src | SQLite version control system]
................................................................................
of between 5 and 10% when we use the amalgamation to compile 
SQLite rather than individual source files.  The downside of this
is that the additional optimizations often take the form of 
function inlining which tends to make the size of the resulting
binary image larger.</p>

<tcl>hd_fragment amal32k {split amalgamation}</tcl>
<h2>2.0 The Split Amalgamation</h2>

<p>Developers sometimes experience trouble debugging the
185,000-line-long amalgamation source file because some debuggers
are only able to handle source code line numbers less than 32,768.
The amalgamation source code runs fine.  One just cannot single-step
through it in a debugger.

................................................................................
<p>Applications using the split amalgamation simply compile against
"sqlite3-all.c" instead of "sqlite3.c".  The two files work exactly
the same.  But with "sqlite3-all.c", no single source file contains more
than 32,767 lines of code, and so it is more convenient to use some
debuggers.  The downside of the split amalgamation is that it consists
of 6 C source code files instead of just 1.

<h2>3.0 Download Copies Of The Amalgamation</h2>

<p>The amalgamation and
the sqlite3.h header file are available on
the <a href="download.html">download page</a> as a file 
named sqlite-amalgamation-X.zip
where the X is replaced by the appropriate version number.</p>

<tcl>hd_fragment amalgbuild</tcl>
<h2>4.0 Building The Amalgamation From Canonical Source Code</h2>

<p>To build the amalgamation (either the full amalgamation or the
split amalgamation), first
[get the canonical source code] from one of the three servers.
Then, on both unix-like systems and on Windows systems that have the
free [http://mingw.org/wiki/msys|MinGW] development environment
installed, the amalgamation can be built using the
................................................................................
<blockquote><pre>
nmake /f makefile.msc sqlite3.c
</pre></blockquote>

<p>In both cases, the split amalgamation can be obtained by
substituting "sqlite3-all.c" for "sqlite3.c" as the make target.

<h3>4.1 Dependencies</h3>

<p>The build process makes extensive use of the 
[http://www.tcl-lang.org/|Tcl] scripting language.  You will need to have a
copy of TCL installed in order for the make targets above to work.
Easy-to-use installers can be obtained from [http://www.tcl-lang.org/].
Many unix workstations have Tcl installed by default.

<h3>4.2 See Also</h3>

<p>Additional notes on compiling SQLite can be found on the
[How To Compile SQLite] page.



>
>
|







 







|







 







|








|







 







|







|



1
2
3
4
5
6
7
8
9
10
11
12
13
..
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
..
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
...
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
<title>The SQLite Amalgamation</title>
<tcl>hd_keywords {amalgamation} {the amalgamation}</tcl>

<fancy_format>

<h1>The SQLite Amalgamation</h1>

<p>The SQLite library consists of 102 files of C code
(as of [Version 3.9.0]) in the core with 32 additional files that
implement the [FTS3], [FTS5], [RTREE], [dbstat|DBSTAT], [json1|JSON1], and
[RBU] extensions.
Most of these are "source" files in the sense that they are stored 
in the [https://www.sqlite.org/src | SQLite version control system]
................................................................................
of between 5 and 10% when we use the amalgamation to compile 
SQLite rather than individual source files.  The downside of this
is that the additional optimizations often take the form of 
function inlining which tends to make the size of the resulting
binary image larger.</p>

<tcl>hd_fragment amal32k {split amalgamation}</tcl>
<h1>The Split Amalgamation</h1>

<p>Developers sometimes experience trouble debugging the
185,000-line-long amalgamation source file because some debuggers
are only able to handle source code line numbers less than 32,768.
The amalgamation source code runs fine.  One just cannot single-step
through it in a debugger.

................................................................................
<p>Applications using the split amalgamation simply compile against
"sqlite3-all.c" instead of "sqlite3.c".  The two files work exactly
the same.  But with "sqlite3-all.c", no single source file contains more
than 32,767 lines of code, and so it is more convenient to use some
debuggers.  The downside of the split amalgamation is that it consists
of 6 C source code files instead of just 1.

<h1>Download Copies Of The Amalgamation</h1>

<p>The amalgamation and
the sqlite3.h header file are available on
the <a href="download.html">download page</a> as a file 
named sqlite-amalgamation-X.zip
where the X is replaced by the appropriate version number.</p>

<tcl>hd_fragment amalgbuild</tcl>
<h1>Building The Amalgamation From Canonical Source Code</h1>

<p>To build the amalgamation (either the full amalgamation or the
split amalgamation), first
[get the canonical source code] from one of the three servers.
Then, on both unix-like systems and on Windows systems that have the
free [http://mingw.org/wiki/msys|MinGW] development environment
installed, the amalgamation can be built using the
................................................................................
<blockquote><pre>
nmake /f makefile.msc sqlite3.c
</pre></blockquote>

<p>In both cases, the split amalgamation can be obtained by
substituting "sqlite3-all.c" for "sqlite3.c" as the make target.

<h2>Dependencies</h2>

<p>The build process makes extensive use of the 
[http://www.tcl-lang.org/|Tcl] scripting language.  You will need to have a
copy of TCL installed in order for the make targets above to work.
Easy-to-use installers can be obtained from [http://www.tcl-lang.org/].
Many unix workstations have Tcl installed by default.

<h2>See Also</h2>

<p>Additional notes on compiling SQLite can be found on the
[How To Compile SQLite] page.

Changes to pages/autoinc.in.

1
2
3
4
5
6
7
8
9
10
11
12
..
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
..
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
<title>SQLite Autoincrement</title>
<tcl>hd_keywords AUTOINCREMENT</tcl>
<h1 align=center>Autoincrement In SQLite</h1>

<h2>Summary</h2>

<ol type="1">
<li><p>
  The AUTOINCREMENT keyword imposes extra CPU, memory, disk space,
  and disk I/O overhead and should be avoided if not strictly needed.
  It is usually not needed.
<li><p>
................................................................................
  If the AUTOINCREMENT keyword appears after INTEGER PRIMARY KEY, that
  changes the automatic ROWID assignment algorithm to prevent
  the reuse of ROWIDs over the lifetime of the database.  In other words,
  the purpose of AUTOINCREMENT is to prevent the reuse of ROWIDs from
  previously deleted rows.
</ol>

<h2>Background</h2>

<p>
^In SQLite, table rows normally have a 64-bit signed integer [ROWID]
which is unique among all rows in the same table.
([WITHOUT ROWID] tables are the exception.)
</p>

................................................................................
delete the entry in the table with the largest ROWID. 
^If you ever delete rows or if you ever create a row with the maximum possible
ROWID, then ROWIDs from previously deleted rows might be reused when creating
new rows and newly created ROWIDs might not be in strictly ascending order.
</p>


<h2>The AUTOINCREMENT Keyword</h2>

<p>
^If a column has the type INTEGER PRIMARY KEY AUTOINCREMENT then a slightly
different ROWID selection algorithm is used.  
^The ROWID chosen for the new row is at least one larger than the largest ROWID
that has ever before existed in that same table.  ^If the table has never
before contained any data, then a ROWID of 1 is used.  ^If the table


|

|







 







|







 







|







1
2
3
4
5
6
7
8
9
10
11
12
..
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
..
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
<title>SQLite Autoincrement</title>
<tcl>hd_keywords AUTOINCREMENT</tcl>
<fancy_format>

<h1>Summary</h1>

<ol type="1">
<li><p>
  The AUTOINCREMENT keyword imposes extra CPU, memory, disk space,
  and disk I/O overhead and should be avoided if not strictly needed.
  It is usually not needed.
<li><p>
................................................................................
  If the AUTOINCREMENT keyword appears after INTEGER PRIMARY KEY, that
  changes the automatic ROWID assignment algorithm to prevent
  the reuse of ROWIDs over the lifetime of the database.  In other words,
  the purpose of AUTOINCREMENT is to prevent the reuse of ROWIDs from
  previously deleted rows.
</ol>

<h1>Background</h1>

<p>
^In SQLite, table rows normally have a 64-bit signed integer [ROWID]
which is unique among all rows in the same table.
([WITHOUT ROWID] tables are the exception.)
</p>

................................................................................
delete the entry in the table with the largest ROWID. 
^If you ever delete rows or if you ever create a row with the maximum possible
ROWID, then ROWIDs from previously deleted rows might be reused when creating
new rows and newly created ROWIDs might not be in strictly ascending order.
</p>


<h1>The AUTOINCREMENT Keyword</h1>

<p>
^If a column has the type INTEGER PRIMARY KEY AUTOINCREMENT then a slightly
different ROWID selection algorithm is used.  
^The ROWID chosen for the new row is at least one larger than the largest ROWID
that has ever before existed in that same table.  ^If the table has never
before contained any data, then a ROWID of 1 is used.  ^If the table

Changes to pages/partialindex.in.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
..
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
..
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
...
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
...
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
<title>Partial Indexes</title>
<tcl>
hd_keywords {partial index} {partial indexes} {partial indices}
</tcl>
<h1 align="center">Partial Indexes</h1>

<h2>1.0 Introduction</h2>

<p>
A partial index is an index over a subset of the rows of a table.
</p>

<p>
^In ordinary indexes, there is exactly one entry in the index for every
................................................................................
row in the table.  ^In partial indexes, only some subset of the rows in the
table have corresponding index entries.  ^For example, a partial index might
omit entries for which the column being indexed is NULL.  When used 
judiciously, partial indexes can result in smaller database files and
improvements in both query and write performance.
</p>

<h2>2.0 Creating Partial Indexes</h2>

<p>
^Create a partial index by adding a WHERE clause to the end of an 
ordinary [CREATE INDEX] statement.
</p>

<tcl>RecursiveBubbleDiagram create-index-stmt</tcl>
................................................................................
</blockquote>)^

<p>^The query above will use the po_parent index to help find the answer,
since the po_parent index contains entries for all rows of interest.
Note that since po_parent is smaller than a full index, the query will
likely run faster too.</p>

<h3>2.1 Unique Partial Indexes</h3>

<p>^A partial index definition may include the UNIQUE keyword.  ^If it
does, then SQLite requires every entry <em>in the index</em> to be unique.
This provides a mechanism for enforcing uniqueness across some subset of
the rows in a table.</p>

<p>For example, suppose you have a database of the members of a large
................................................................................
^(<p>Coincidentally, that same index is useful for locating the team leader
of a particular team:</p>

<blockquote>
SELECT person_id FROM person WHERE is_team_leader AND team_id=?1;
</blockquote>)^

<h2>3.0 Queries Using Partial Indexes</h2>

<p>Let X be the expression in the WHERE clause of a partial
index, and let W be the WHERE clause of a query that uses the
table that is indexed.  Then, the query is permitted to use 
the partial index if W&#x21d2;X, where the &#x21d2; operator
(usually pronounced "implies") is the logic operator 
equivalent to "X or not W".
................................................................................

<p>These two rules describe how the query planner for SQLite works as of
this writing (2013-08-01).  And the rules above will always be honored.
However, future versions of SQLite might incorporate a better theorem prover
that can find other cases where W&#x21d2;X is true and thus may
find more instances where partial indexes are useful.</p>

<h2>4.0 Supported Versions</h2>

<p>
Partial indexes have been supported in SQLite since version 3.8.0.
</p>

<p>Database files that contain partial indices are not readable or writable
by versions of SQLite prior to 3.8.0.  However, a database file created
by SQLite 3.8.0 is still readable and writable by prior versions as long
as its schema contains no partial indexes.  A database that is unreadable
by legacy versions of SQLite can be made readable simply by running
[DROP INDEX] on the partial indexes.</p>




|

|







 







|







 







|







 







|







 







|











1
2
3
4
5
6
7
8
9
10
11
12
13
14
..
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
..
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
...
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
...
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
<title>Partial Indexes</title>
<tcl>
hd_keywords {partial index} {partial indexes} {partial indices}
</tcl>
<fancy_format>

<h1>Introduction</h1>

<p>
A partial index is an index over a subset of the rows of a table.
</p>

<p>
^In ordinary indexes, there is exactly one entry in the index for every
................................................................................
row in the table.  ^In partial indexes, only some subset of the rows in the
table have corresponding index entries.  ^For example, a partial index might
omit entries for which the column being indexed is NULL.  When used 
judiciously, partial indexes can result in smaller database files and
improvements in both query and write performance.
</p>

<h1>Creating Partial Indexes</h1>

<p>
^Create a partial index by adding a WHERE clause to the end of an 
ordinary [CREATE INDEX] statement.
</p>

<tcl>RecursiveBubbleDiagram create-index-stmt</tcl>
................................................................................
</blockquote>)^

<p>^The query above will use the po_parent index to help find the answer,
since the po_parent index contains entries for all rows of interest.
Note that since po_parent is smaller than a full index, the query will
likely run faster too.</p>

<h2>Unique Partial Indexes</h2>

<p>^A partial index definition may include the UNIQUE keyword.  ^If it
does, then SQLite requires every entry <em>in the index</em> to be unique.
This provides a mechanism for enforcing uniqueness across some subset of
the rows in a table.</p>

<p>For example, suppose you have a database of the members of a large
................................................................................
^(<p>Coincidentally, that same index is useful for locating the team leader
of a particular team:</p>

<blockquote>
SELECT person_id FROM person WHERE is_team_leader AND team_id=?1;
</blockquote>)^

<h1>Queries Using Partial Indexes</h1>

<p>Let X be the expression in the WHERE clause of a partial
index, and let W be the WHERE clause of a query that uses the
table that is indexed.  Then, the query is permitted to use 
the partial index if W&#x21d2;X, where the &#x21d2; operator
(usually pronounced "implies") is the logic operator 
equivalent to "X or not W".
................................................................................

<p>These two rules describe how the query planner for SQLite works as of
this writing (2013-08-01).  And the rules above will always be honored.
However, future versions of SQLite might incorporate a better theorem prover
that can find other cases where W&#x21d2;X is true and thus may
find more instances where partial indexes are useful.</p>

<h1>Supported Versions</h1>

<p>
Partial indexes have been supported in SQLite since version 3.8.0.
</p>

<p>Database files that contain partial indices are not readable or writable
by versions of SQLite prior to 3.8.0.  However, a database file created
by SQLite 3.8.0 is still readable and writable by prior versions as long
as its schema contains no partial indexes.  A database that is unreadable
by legacy versions of SQLite can be made readable simply by running
[DROP INDEX] on the partial indexes.</p>

Changes to search/buildsearchdb.tcl.

1
2
3


4
5
6
7
8
9
10
...
353
354
355
356
357
358
359

360
361
362
363
364
365
366
...
375
376
377
378
379
380
381
382
383
384
385
386


387
388
389
390
391
392
393


load ./parsehtml.so


source [file join [file dirname [info script]] hdom.tcl]

set ::G(rowid) 1

# Return a list of relative paths to documents that should be included 
# in the index.
proc document_list {type} {
................................................................................
    set lSection [extract_sections_from_dom $dom generic_filterscript]
  }

  set i [expr $rowid*1000]
  foreach section $lSection {
    incr i
    foreach { tag hdr text } $section {}

    set url "${doc}#${tag}"
    insert_entry -rowid $i -url $url -title1 $title -title2 $hdr -content $text
  }
}

proc rebuild_database {} {

................................................................................
        apis,                               -- C APIs 
        keywords,                           -- Keywords
        title1,                             -- Document title
        title2,                             -- Heading title, if any
        content,                            -- Document text

        url UNINDEXED,                      -- Indexed URL
        tokenize='porter unicode61 tokenchars _' -- Tokenizer definition
      );

      DROP TABLE IF EXISTS weight;
      CREATE TABLE weight(id INTEGER PRIMARY KEY, percent FLOAT);


    }

    foreach doc [document_list lang] {
      puts "Indexing $doc..."
      lang_document_import $doc
    }



|
>
>







 







>







 







|




>
>







1
2
3
4
5
6
7
8
9
10
11
12
...
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
...
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398


#load ./parsehtml.so
#load ./tokenize.so

source [file join [file dirname [info script]] hdom.tcl]

set ::G(rowid) 1

# Return a list of relative paths to documents that should be included 
# in the index.
proc document_list {type} {
................................................................................
    set lSection [extract_sections_from_dom $dom generic_filterscript]
  }

  set i [expr $rowid*1000]
  foreach section $lSection {
    incr i
    foreach { tag hdr text } $section {}
    if {[string trim $text]==""} continue
    set url "${doc}#${tag}"
    insert_entry -rowid $i -url $url -title1 $title -title2 $hdr -content $text
  }
}

proc rebuild_database {} {

................................................................................
        apis,                               -- C APIs 
        keywords,                           -- Keywords
        title1,                             -- Document title
        title2,                             -- Heading title, if any
        content,                            -- Document text

        url UNINDEXED,                      -- Indexed URL
        tokenize='stoken unicode61 tokenchars _' -- Tokenizer definition
      );

      DROP TABLE IF EXISTS weight;
      CREATE TABLE weight(id INTEGER PRIMARY KEY, percent FLOAT);

      INSERT INTO page(page, rank) VALUES('rank', 'bm25(20.0,20.0,10.0,10.0)');
    }

    foreach doc [document_list lang] {
      puts "Indexing $doc..."
      lang_document_import $doc
    }

Added search/fts5ext.c.





































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178

/*
** This file contains the implementation of a custom FTS5 tokenizer. This
** tokenizer implements the following special features:
**
**   * For all tokens that match the pattern "SQLITE_XXX" (case sensitive),
**     "XXX" is added as a synonym for SQLITE_XXX.
**
**   * For all tokens that match the pattern "sqlite3_xxx" (case sensitive),
**     "xxx" is added as a synonym for sqlite3_xxx.
*/

#include <sqlite3.h>
#include <tcl.h>
#include <string.h>


/*************************************************************************
** This is generic code copied from the FTS5 documentation.
**
** Return a pointer to the fts5_api pointer for database connection db.
** If an error occurs, return NULL and leave an error in the database 
** handle (accessible using sqlite3_errcode()/errmsg()).
*/
fts5_api *fts5_api_from_db(sqlite3 *db){
  fts5_api *pRet = 0;
  sqlite3_stmt *pStmt = 0;
  if( SQLITE_OK==sqlite3_prepare(db, "SELECT fts5()", -1, &pStmt, 0)
   && SQLITE_ROW==sqlite3_step(pStmt) 
   && sizeof(pRet)==sqlite3_column_bytes(pStmt, 0)
  ){
    memcpy(&pRet, sqlite3_column_blob(pStmt, 0), sizeof(pRet));
  }
  sqlite3_finalize(pStmt);
  return pRet;
}
/************************************************************************/


typedef struct STokenizer STokenizer;
typedef struct STokenCtx STokenCtx;

/*
** Tokenizer type. Casts to Fts5Tokenizer.
*/
struct STokenizer {
  fts5_tokenizer porter;
  Fts5Tokenizer *pPorter;
};

/*
** Context passed through underlying tokenizer to wrapper callback.
*/
struct STokenCtx {
  void *pCtx;
  int (*xToken)(void*, int, const char*, int, int, int);
};

static int stokenCreate(
  void *pCtx, 
  const char **azArg, int nArg, 
  Fts5Tokenizer **ppOut
){
  fts5_api *pApi = (fts5_api*)pCtx;
  STokenizer *p;
  void *pPorterCtx;
  int rc;

  /* Allocate the Fts5Tokenizer object for this tokenizer. */
  p = sqlite3_malloc(sizeof(STokenizer));
  if( p ){
    memset(p, 0, sizeof(STokenizer));
  }else{
    return SQLITE_NOMEM;
  }

  /* Locate and allocate the porter tokenizer */
  rc = pApi->xFindTokenizer(pApi, "porter", &pPorterCtx, &p->porter);
  if( rc==SQLITE_OK ){
    rc = p->porter.xCreate(pPorterCtx, azArg, nArg, &p->pPorter);
  }

  /* Return the new tokenizer to the caller */
  if( rc!=SQLITE_OK ){
    sqlite3_free(p);
    p = 0;
  }
  *ppOut = (Fts5Tokenizer*)p;
  return rc;
}

static void stokenDelete(Fts5Tokenizer *pTokenizer){
  STokenizer *p = (STokenizer*)pTokenizer;
  p->porter.xDelete(p->pPorter);
  sqlite3_free(p);
}

static int stokenTokenizeCb(
  void *pCtx,         /* Copy of 2nd argument to xTokenize() */
  int tflags,         /* Mask of FTS5_TOKEN_* flags */
  const char *pToken, /* Pointer to buffer containing token */
  int nToken,         /* Size of token in bytes */
  int iStart,         /* Byte offset of token within input text */
  int iEnd            /* Byte offset of end of token within input text */
){
  STokenCtx *p = (STokenCtx*)pCtx;
  int rc = p->xToken(p->pCtx, 0, pToken, nToken, iStart, iEnd);
  if( rc==SQLITE_OK && nToken>7 && 0==memcmp("sqlite_", pToken, 7) ){
    rc = p->xToken(
        p->pCtx, FTS5_TOKEN_COLOCATED, pToken+7, nToken-7, iStart, iEnd);
  }

  if( rc==SQLITE_OK && nToken>8 && 0==memcmp("sqlite3_", pToken, 8) ){
    rc = p->xToken(
        p->pCtx, FTS5_TOKEN_COLOCATED, pToken+8, nToken-8, iStart, iEnd);
  }

  return rc;
}

static int stokenTokenize(
  Fts5Tokenizer *pTokenizer, 
  void *pCtx,
  int flags,            /* Mask of FTS5_TOKENIZE_* flags */
  const char *pText, int nText, 
  int (*xToken)(
    void *pCtx,         /* Copy of 2nd argument to xTokenize() */
    int tflags,         /* Mask of FTS5_TOKEN_* flags */
    const char *pToken, /* Pointer to buffer containing token */
    int nToken,         /* Size of token in bytes */
    int iStart,         /* Byte offset of token within input text */
    int iEnd            /* Byte offset of end of token within input text */
  )
){
  STokenizer *p = (STokenizer*)pTokenizer;
  int rc;

  if( flags==FTS5_TOKENIZE_DOCUMENT ){
    STokenCtx ctx;
    ctx.xToken = xToken;
    ctx.pCtx = pCtx;
    rc = p->porter.xTokenize(
        p->pPorter, (void*)&ctx, flags, pText, nText, stokenTokenizeCb
    );
  }else{
    rc = p->porter.xTokenize(p->pPorter, pCtx, flags, pText, nText, xToken);
  }

  return rc;
}

static int register_tokenizer(sqlite3 *db, char **pzErr, void *p){
  fts5_api *pApi;
  fts5_tokenizer t;

  pApi = fts5_api_from_db(db);
  if( pApi==0 ){
    *pzErr = sqlite3_mprintf("fts5_api_from_db: %s", sqlite3_errmsg(db));
    return SQLITE_ERROR;
  }

  t.xCreate = stokenCreate;
  t.xDelete = stokenDelete;
  t.xTokenize = stokenTokenize;

  return pApi->xCreateTokenizer(pApi, "stoken", (void*)pApi, &t, 0);
}

int Fts5ext_Init(Tcl_Interp *interp){
#ifdef USE_TCL_STUBS
  if (Tcl_InitStubs(interp, "8.4", 0) == 0) {
    return TCL_ERROR;
  }
#endif
  sqlite3_auto_extension((void (*)(void))register_tokenizer);
  return TCL_OK;
}

Changes to search/hdom.tcl.

38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#
#    $node attr ?-default VALUE? ATTR
#
#    $node search PATTERN
#


load ./parsehtml.so

#-------------------------------------------------------------------------
# Throw an exception if the expression passed as the only argument does
# not evaluate to true.
#
proc assert {condition} {
  uplevel [list if "! ($condition)" [list error "assert failed: $condition"]]







|







38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#
#    $node attr ?-default VALUE? ATTR
#
#    $node search PATTERN
#


catch { load ./parsehtml.so }

#-------------------------------------------------------------------------
# Throw an exception if the expression passed as the only argument does
# not evaluate to true.
#
proc assert {condition} {
  uplevel [list if "! ($condition)" [list error "assert failed: $condition"]]

Changes to search/search.tcl.

1
2
3
4
5
6
7
8
...
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
...
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233

234
235
236
237
238
239
240

241
242
243
244
245
246
247
#!/home/dan/bin/tclsqlite3

source [file dirname [info script]]/document_header.tcl

# Decode an HTTP %-encoded string
#
proc percent_decode {str} {
    # rewrite "+" back to space
................................................................................
  #regsub -all {[^-/"A-Za-z0-9]} $::A(q) { } ::A(q)

  # Count the '"' characters in $::A(q). If there is an odd number of
  # occurences, add a " to the end of the query so that fts5 can parse
  # it without error.
  if {[regexp -all \x22 $::A(q)] % 2} { append ::A(q) \x22 }

  db one { INSERT INTO page(page, rank) VALUES('rank', 'bm25(20.0, 10.0)') }

  # Set iStart to the index of the first result to display. Results are
  # indexed starting at zero from most to least relevant.
  #
  set iStart [expr {([info exists ::A(i)] ? $::A(i) : 0)*10}]

  # Grab a list of rowid results.
  #
................................................................................
      set link $data($childid,url)
      set hdr $data($childid,s_title2)

      if {$hdr==""} {
        set s_content ""
      } else {
        set s_content [subst {
          <b><a style=color:#044a64 href=$link>$hdr</a></b>:
        }]
      }

      append s_content " $data($childid,s_content)"
    }

    append ret [subst -nocommands {<tr>
      <td valign=top style="line-height:150%">
        <div style="white-space:wrap;font-size:larger" class=nounderline>
          <a href="$url">$s_title1</a>

        </div>
          <div style="margin-left: 10ex; font:larger monospace">$s_apis</div>
        <div style="ffont-size:small;margin-left: 2ex">
          <div class=nounderline> $s_content </div>
          <div style="margin-left:1em; margin-bottom:1em">
            <a href="$url">$url</a>
          </div>

        </div>
      </td>
    }]
  }
  append ret { </table> }


|







 







<
<







 







|









|
>


<
<
|
<
<
>







1
2
3
4
5
6
7
8
...
129
130
131
132
133
134
135


136
137
138
139
140
141
142
...
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234


235


236
237
238
239
240
241
242
243
#!/usr/bin/tclsqlite3.search

source [file dirname [info script]]/document_header.tcl

# Decode an HTTP %-encoded string
#
proc percent_decode {str} {
    # rewrite "+" back to space
................................................................................
  #regsub -all {[^-/"A-Za-z0-9]} $::A(q) { } ::A(q)

  # Count the '"' characters in $::A(q). If there is an odd number of
  # occurences, add a " to the end of the query so that fts5 can parse
  # it without error.
  if {[regexp -all \x22 $::A(q)] % 2} { append ::A(q) \x22 }



  # Set iStart to the index of the first result to display. Results are
  # indexed starting at zero from most to least relevant.
  #
  set iStart [expr {([info exists ::A(i)] ? $::A(i) : 0)*10}]

  # Grab a list of rowid results.
  #
................................................................................
      set link $data($childid,url)
      set hdr $data($childid,s_title2)

      if {$hdr==""} {
        set s_content ""
      } else {
        set s_content [subst {
          <b><a style=color:#044a64 href=$link>$hdr</a></b>
        }]
      }

      append s_content " $data($childid,s_content)"
    }

    append ret [subst -nocommands {<tr>
      <td valign=top style="line-height:150%">
        <div style="white-space:wrap;font-size:larger" class=nounderline>
          <a href="$url">$s_title1</a> 
          <div style="float:right;font-size:smaller;color:#BBB">($url)</div>
        </div>
          <div style="margin-left: 10ex; font:larger monospace">$s_apis</div>


        <div style="margin-left: 4ex; margin-bottom:1.5em">


           $s_content 
        </div>
      </td>
    }]
  }
  append ret { </table> }


Changes to search/searchc.c.

1

2

3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32


33
34
35
36
37
38







39
40
41
42
43
44
45
46


#include <sqlite3.h>


/*
*/

/*
** Return a pointer to the fts5_api pointer for database connection db.
** If an error occurs, return NULL and leave an error in the database 
** handle (accessible using sqlite3_errcode()/errmsg()).
*/
fts5_api *fts5_api_from_db(sqlite3 *db){
  fts5_api *pRet = 0;
  sqlite3_stmt *pStmt = 0;

  if( SQLITE_OK==sqlite3_prepare(db, "SELECT fts5()", -1, &pStmt, 0)
      && SQLITE_ROW==sqlite3_step(pStmt) 
      && sizeof(pRet)==sqlite3_column_bytes(pStmt, 0)
    ){
    memcpy(&pRet, sqlite3_column_blob(pStmt, 0), sizeof(pRet));
  }
  sqlite3_finalize(pStmt);
  return pRet;
}

int Sqlite3_Init(Tcl_Interp *interp);

static int register_search_extensions(sqlite3 *db, char **pzErr, void *p){
  fts5_api *pApi = fts5_api_from_db(db);

  return SQLITE_OK;
}



static int AppInit(Tcl_Interp *interp) {
  int rc;
  rc = Sqlite3_Init(interp);
  if( rc!=TCL_OK ) return rc;
  sqlite3_auto_extension(register_search_extensions);







  return TCL_OK;
}

int main(int argc, char *argv[]) {
  Tcl_Main(argc, argv, AppInit);
  return 0;
}


>

>

<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
>
>





<
>
>
>
>
>
>
>








1
2
3
4
5






















6






7
8
9
10
11
12
13

14
15
16
17
18
19
20
21
22
23
24
25
26
27
28


#include <sqlite3.h>
#include <tcl.h>























int Sqlite3_Init(Tcl_Interp*);






int Parsehtml_Init(Tcl_Interp*);
int Fts5ext_Init(Tcl_Interp*);

static int AppInit(Tcl_Interp *interp) {
  int rc;
  rc = Sqlite3_Init(interp);
  if( rc!=TCL_OK ) return rc;


  rc = Parsehtml_Init(interp);
  if( rc!=TCL_OK ) return rc;

  rc = Fts5ext_Init(interp);
  if( rc!=TCL_OK ) return rc;

  return TCL_OK;
}

int main(int argc, char *argv[]) {
  Tcl_Main(argc, argv, AppInit);
  return 0;
}