Documentation Source Text

Check-in [9d49a78f9b]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add "jump to" links to relevant sections of large documents in search results.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | experimental
Files: files | file ages | folders
SHA1: 9d49a78f9b2ca1f7989b2574d401dff468192afc
User & Date: dan 2016-08-20 17:22:57.678
Context
2016-08-24
19:26
Further updates to search database and script. (check-in: cc51dec17e user: dan tags: experimental)
2016-08-20
17:22
Add "jump to" links to relevant sections of large documents in search results. (check-in: 9d49a78f9b user: dan tags: experimental)
2016-08-19
17:02
Fix more minor search problems. (check-in: c345859928 user: dan tags: experimental)
Changes
Unified Diff Ignore Whitespace Patch
Changes to main.mk.
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
private:	base evidence private_evidence matrix doc

fast:	base doc

tclsh:	$(TCLSQLITE3C)
	$(CC) -g -o tclsh -DSQLITE_ENABLE_FTS3 -DSQLITE_ENABLE_FTS5 -DTCLSH=1 -DSQLITE_TCLMD5 $(TCLINC) $(TCLSQLITE3C) $(TCLFLAGS)

tclsqlite3.fts3:	$(TCLSQLITE3C) $(DOC)/search/searchc.c
	$(CC) -static -O2 -o tclsqlite3.fts3 -I. -DSQLITE_ENABLE_FTS3 -DSQLITE_ENABLE_FTS5 $(TCLINC) $(DOC)/search/searchc.c $(TCLSQLITE3C) $(TCLFLAGS)

sqlite3.h:	tclsh $(SRC)/src/sqlite.h.in $(SRC)/manifest.uuid $(SRC)/VERSION
	./tclsh $(SRC)/tool/mksqlite3h.tcl $(SRC) | \
	sed 's/^SQLITE_API //' >sqlite3.h

# Generate the directory into which generated documentation files will
# be written.







|
|







40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
private:	base evidence private_evidence matrix doc

fast:	base doc

tclsh:	$(TCLSQLITE3C)
	$(CC) -g -o tclsh -DSQLITE_ENABLE_FTS3 -DSQLITE_ENABLE_FTS5 -DTCLSH=1 -DSQLITE_TCLMD5 $(TCLINC) $(TCLSQLITE3C) $(TCLFLAGS)

tclsqlite3.search:	$(TCLSQLITE3C) $(DOC)/search/searchc.c
	$(CC) -g -o tclsqlite3.search -I. -DSQLITE_THREADSAFE=0 -DSQLITE_ENABLE_FTS3 -DSQLITE_ENABLE_FTS5 $(TCLINC) $(DOC)/search/searchc.c $(TCLSQLITE3C) $(TCLFLAGS)

sqlite3.h:	tclsh $(SRC)/src/sqlite.h.in $(SRC)/manifest.uuid $(SRC)/VERSION
	./tclsh $(SRC)/tool/mksqlite3h.tcl $(SRC) | \
	sed 's/^SQLITE_API //' >sqlite3.h

# Generate the directory into which generated documentation files will
# be written.
Changes to pages/foreignkeys.in.
1
2
3
4
5
6
7
8
9
10
11
12
13
<tcl>hd_keywords {foreign key constraints} {foreign key constraint} {FOREIGN KEY constraints}</tcl>

<title>SQLite Foreign Key Support</title>
<table_of_contents>

<h2 style="margin-left:1.0em" notoc> Overview</h2>

<p>This document describes the support for SQL foreign key constraints
   introduced in SQLite version 3.6.19. 

<p>The first section introduces the 
   concept of an SQL foreign key by example and defines the terminology 
   used for the remainder of the document. Section 2 describes the steps 





|







1
2
3
4
5
6
7
8
9
10
11
12
13
<tcl>hd_keywords {foreign key constraints} {foreign key constraint} {FOREIGN KEY constraints}</tcl>

<title>SQLite Foreign Key Support</title>
<table_of_contents>

<h2 style="margin-left:1.0em" notoc id=overview> Overview</h2>

<p>This document describes the support for SQL foreign key constraints
   introduced in SQLite version 3.6.19. 

<p>The first section introduces the 
   concept of an SQL foreign key by example and defines the terminology 
   used for the remainder of the document. Section 2 describes the steps 
Changes to search/buildsearchdb.tcl.
1
2
3
4


5
6
7
8

9
10
11
12
13
14
15








16



17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35


36
37
38
39
40
41
42

43
44








45













46


47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63












64














65




66
67



68


69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94

95
96
97

98
99
100
101

102
103
104

105
106
107
108

109
110
111
112
113
114
115
116
117
118
119
120
121


load ./parsehtml.so
source [file join [file dirname [info script]] hdom.tcl]



# Return a list of relative paths to documents that should be included 
# in the index.
proc document_list {type} {

  set lFiles [list]
  switch -- $type {
    lang {
      foreach f [glob lang_*.html] { lappend lFiles $f }
    }

    c3ref {








      foreach f [glob c3ref/*.html] { lappend lFiles $f }



    }

    generic {
      set nosearch(doc_keyword_crossref.html) 1
      set nosearch(doc_backlink_crossref.html) 1
      set nosearch(doc_pagelink_crossref.html) 1
      set nosearch(doc_target_crossref.html) 1
      set nosearch(doclist.html) 1
      set nosearch(keyword_index.html) 1
      set nosearch(requirements.html) 1
      set nosearch(sitemap.html) 1
      set nosearch(fileio.html) 1
      set nosearch(btreemodule.html) 1
      set nosearch(capi3ref.html) 1
      set nosearch(changes.html) 1
      set nosearch(fileformat2.html) 1
      set nosearch(index.html) 1
      set nosearch(docs.html) 1



      foreach f [glob *.html] { 
        if {[string match lang_* $f]==0 && [info exists nosearch($f)]==0} {
          lappend lFiles $f 
        }
      }

      # "current.html" is a duplicate of the most recent release.

      set nosearch(releaselog/current.html) 1









      foreach f [glob releaselog/*.html] { 













        if {[info exists nosearch($f)]==0} { lappend lFiles $f }


      }
    }

    default {
      error "document_list: unknown file type $type"
    }
  }
  return $lFiles
}

proc readfile {zFile} {
  set fd [open $zFile]
  set ret [read $fd]
  close $fd
  return $ret
}













proc insert_entry {url apis title content} {














  set content [string trim $content]




  db eval {
    INSERT INTO page VALUES($apis, $title, $content, $url);



  }


}

# Extract a document title from DOM object $dom passed as the first
# argument. If no <title> node can be found in the DOM, use $fallback
# as the title.
#
proc extract_title {dom fallback} {
  set title_node [lindex [[$dom root] search title] 0]
  if {$title_node==""} {
    set title $fallback
  } else {
    set title [$title_node text]
  }

  set title
}

proc c3ref_document_apis {dom} {
  set blacklist(sqlite3_int64) 1

  set res [list]
  foreach N [[$dom root] search blockquote] {
    set text [$N text]
    while {[regexp {(sqlite3[0-9a-z_]*) *\((.*)} $text -> api text]} {
      if {[info exists blacklist($api)]==0} {
        lappend res "${api}()"

      }
    }


    set pattern {typedef +struct +(sqlite3[0-9a-z_]*)(.*)}
    while {[regexp $pattern $text -> api text]} {
      if {[info exists blacklist($api)]==0} {
        lappend res "struct ${api}"

      }
    }


    set pattern {#define +(SQLITE_[0-9A-Z_]*)(.*)}
    while {[regexp $pattern $text -> api text]} {
      if {[info exists blacklist($api)]==0} {
        lappend res "${api}"

      }
    }
  }

  set res [lsort -uniq $res]

  return [join $res ", "]
}

proc c3ref_filterscript {N} {
  for {set P [$N parent]} {$P!=""} {set P [$P parent]} {
    if {[$P attr -default "" class]=="nosearch"} { return 0 }
    if {[$P tag]=="blockquote" } { return 0 }




>
>




>







>
>
>
>
>
>
>
>
|
>
>
>



















>
>






|
>


>
>
>
>
>
>
>
>

>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>

















>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>

|
>
>
>

>
>


















|





|

>



>
|

|

>



>


|

>




<
<







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192


193
194
195
196
197
198
199


load ./parsehtml.so
source [file join [file dirname [info script]] hdom.tcl]

set ::G(rowid) 1

# Return a list of relative paths to documents that should be included 
# in the index.
proc document_list {type} {
  global weight
  set lFiles [list]
  switch -- $type {
    lang {
      foreach f [glob lang_*.html] { lappend lFiles $f }
    }

    c3ref {
      set blacklist(objlist.html) 1
      set blacklist(constlist.html) 1
      set blacklist(funclist.html) 1

      lappend lFiles c3ref/free.html
      lappend lFiles c3ref/mprintf.html
      lappend lFiles c3ref/io_methods.html

      foreach f [glob c3ref/*.html] { 
        if {[info exists blacklist([file tail $f])]} continue
        if {[lsearch $lFiles $f]<0} { lappend lFiles $f }
      }
    }

    generic {
      set nosearch(doc_keyword_crossref.html) 1
      set nosearch(doc_backlink_crossref.html) 1
      set nosearch(doc_pagelink_crossref.html) 1
      set nosearch(doc_target_crossref.html) 1
      set nosearch(doclist.html) 1
      set nosearch(keyword_index.html) 1
      set nosearch(requirements.html) 1
      set nosearch(sitemap.html) 1
      set nosearch(fileio.html) 1
      set nosearch(btreemodule.html) 1
      set nosearch(capi3ref.html) 1
      set nosearch(changes.html) 1
      set nosearch(fileformat2.html) 1
      set nosearch(index.html) 1
      set nosearch(docs.html) 1

      set weight(chronology.html) 25

      foreach f [glob *.html] { 
        if {[string match lang_* $f]==0 && [info exists nosearch($f)]==0} {
          lappend lFiles $f 
        }
      }

      # "current.html" is a duplicate of the most recent release. Don't
      # index it at all.
      set nosearch(releaselog/current.html) 1


      # As of version 3.7.16, sub-release changelogs duplicated the entries
      # from the major release. This block does the following:
      #
      #   * sets the weight of a changelog containing superceded content
      #     to 25%
      #   * sets the weights of other changelogs to 50%.
      #
      foreach f [glob releaselog/*.html] { 
        set tail [file tail $f]
        set ::weight($f) 50
        if {[regexp {^(3_8_[0-9]*).*} $tail -> prefix]
         || [regexp {^(3_7_16).*} $tail -> prefix]
         || [regexp {^(3_9_).*} $tail -> prefix]
         || [regexp {^(3_[1-9][0-9]).*} $tail -> prefix]
        } {
          set f1 [lindex [lsort -decreasing [glob releaselog/$prefix*.html]] 0]
          if {$f!=$f1} { set ::weight($f) 25 }
        } 
      }

      foreach f [glob releaselog/*.html] { 
        if {[info exists nosearch($f)]==0} { 
          lappend lFiles $f 
        }
      }
    }

    default {
      error "document_list: unknown file type $type"
    }
  }
  return $lFiles
}

proc readfile {zFile} {
  set fd [open $zFile]
  set ret [read $fd]
  close $fd
  return $ret
}

# Insert a new entry into the main "page" table of the search database.
# Values are determined by switches passed to this function:
#
#   -apis      List of APIs
#   -rowid     Rowid to use
#   -title1    Document title
#   -title2    Heading title (or NULL)
#   -content   Document content
#   -url       URL of this document
#
# Return the rowid of the row just inserted into the table.
# 
proc insert_entry {args} {
  global G
  if {[llength $args] % 2} { error "Bad arguments passed to insert_entry (1)" }

  set switches {
    -apis -title1 -title2 -content -url -keywords -rowid
  }
  set V(content) ""

  foreach {k v} $args {
    set idx [lsearch -all $switches $k*] 
    if {[llength $idx]!=1} { error "Bad switch passed to insert_entry: $k" }
    set V([string range [lindex $switches $idx] 1 end]) $v
  }
  
  set V(content) [string trim $V(content)]
  if {[info exists V(rowid)]==0} {
    set V(rowid) [incr G(rowid)];
  }

  db eval {
    INSERT INTO page(rowid, apis, keywords, title1, title2, content, url) 
    VALUES($V(rowid),
        $V(apis), $V(keywords), $V(title1), $V(title2), $V(content), $V(url)
    );
  }

  return [db last_insert_rowid]
}

# Extract a document title from DOM object $dom passed as the first
# argument. If no <title> node can be found in the DOM, use $fallback
# as the title.
#
proc extract_title {dom fallback} {
  set title_node [lindex [[$dom root] search title] 0]
  if {$title_node==""} {
    set title $fallback
  } else {
    set title [$title_node text]
  }

  set title
}

proc c3ref_document_apis {dom} {
  global c3ref_blacklist

  set res [list]
  foreach N [[$dom root] search blockquote] {
    set text [$N text]
    while {[regexp {(sqlite3[0-9a-z_]*) *\((.*)} $text -> api text]} {
      if {[info exists c3ref_blacklist($api)]==0} {
        lappend res "${api}()"
        set c3ref_blacklist($api) 1
      }
    }

    set text [$N text]
    set pattern {struct +(sqlite3[0-9a-z_]*)(.*)}
    while {[regexp $pattern $text -> api text]} {
      if {[info exists c3ref_blacklist($api)]==0} {
        lappend res "struct ${api}"
        set c3ref_blacklist($api) 1
      }
    }

    set text [$N text]
    set pattern {#define +(SQLITE_[0-9A-Z_]*)(.*)}
    while {[regexp $pattern $text -> api text]} {
      if {[info exists c3ref_blacklist($api)]==0} {
        lappend res "${api}"
        set c3ref_blacklist($api) 1
      }
    }
  }



  return [join $res ", "]
}

proc c3ref_filterscript {N} {
  for {set P [$N parent]} {$P!=""} {set P [$P parent]} {
    if {[$P attr -default "" class]=="nosearch"} { return 0 }
    if {[$P tag]=="blockquote" } { return 0 }
148
149
150
151
152
153
154











































































155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190

191
192
193

194













195
196
197
198
199
200
201
202
203
204
205
206
207
208

209

210

211
212
213



214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231







232
233
234
235
236
237
238
239
    if {[$N tag]==""} {
      if {[eval $filterscript $N]} { append text [$N text] }
    }
  }
  return $text
}













































































proc lang_document_import {doc} {
  set dom [::hdom::parse [readfile $doc]]

  # Find the <title> tag and extract the title.
  set title [extract_title $dom $doc]

  # Extract the entire document text.
  set text [extract_text_from_dom $dom lang_filterscript]

  # Insert into the database.
  insert_entry $doc {} $title $text

  $dom destroy
}

proc c3ref_document_import {doc} {
  set dom [::hdom::parse [readfile $doc]]
  
  # Find the <title> tag and extract the title.
  set title [extract_title $dom $doc]
  set title "C API: $title"

  set text [extract_text_from_dom $dom c3ref_filterscript]
  set apis [c3ref_document_apis $dom]

  # Insert into the database.
  insert_entry $doc $apis $title $text
}

proc generic_document_import {doc} {
  set dom [::hdom::parse [readfile $doc]]
  
  # Find the <title> tag and extract the title.
  set title [extract_title $dom $doc]


  set text [extract_text_from_dom $dom generic_filterscript]

  # Insert into the database.

  insert_entry $doc {} $title $text













}

proc rebuild_database {} {

  db transaction {
    # Create the database schema. If the schema already exists, then those
    # tables that contain document data are dropped and recreated by this
    # proc. The 'config' table is left untouched.
    #
    db eval {
      CREATE TABLE IF NOT EXISTS config(item TEXT, value TEXT);
      DROP TABLE IF EXISTS page;
      CREATE VIRTUAL TABLE page USING fts5(
        apis,                               -- C APIs 

        title,                              -- Title (or first heading)

        content,                            -- Complete document text

        url UNINDEXED,                      -- Indexed URL
        tokenize='porter unicode61 tokenchars _' -- Built-in porter tokenizer
      );



    }

    foreach doc [document_list lang] {
      puts "Indexing $doc..."
      lang_document_import $doc
    }

    foreach doc [document_list c3ref] {
      puts "Indexing $doc..."
      c3ref_document_import $doc
    }

    foreach doc [document_list generic] { 
      puts "Indexing $doc..."
      generic_document_import $doc 
    }

    db eval { INSERT INTO page(page) VALUES('optimize') }







  }

  db eval VACUUM
}

cd doc
sqlite3 db search.db
rebuild_database







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>











|















|








>



>
|
>
>
>
>
>
>
>
>
>
>
>
>
>










<



>
|
>
|
>

|

>
>
>


















>
>
>
>
>
>
>








226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372

373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
    if {[$N tag]==""} {
      if {[eval $filterscript $N]} { append text [$N text] }
    }
  }
  return $text
}

# This is a specialized command for extracting sections from the
# pragma.html document.
#
proc extract_sections_from_pragma_dom {dom} {
  set body [lindex [[$dom root] search body] 0]
  set res [list]

  # Find the section titles.
  foreach T [[$dom root] search table] {
    foreach A [$T search a] {
      if {[string match #pragma_* [$A attr -default "" href]]} {
        set name [$A attr href]
        lappend title($name) [$A text]
      }
    }
  }

  set url     ""
  set hdr     ""
  set content ""

  $body foreach_descendent N {
    set tag  [$N tag]
    if {$tag=="a" && [string match "pragma_*" [$N attr -default "" name]]} {
      lappend res [list $url $hdr $content]

      set name [$N attr name]
      set url "$name"
      set hdr "PRAGMA [join $title(#$name) ,]"
      set content ""
    }

    if {$tag=="" && [generic_filterscript $N]} {
      append content [$N text]
    }
  }
  lappend res [list $url $hdr $content]

  set res
}

proc extract_sections_from_dom {dom filterscript} {
  set body [lindex [[$dom root] search body] 0]

  set h(h) 1
  set h(h1) 1
  set h(h2) 1
  set h(h3) 1

  set res [list]

  $body foreach_descendent N {
    set tag [$N tag]

    if {[info exists h($tag)]} {
      set id [$N attr -default "" id]
      if {$id != ""} {
        if {[info exists H]} {
          lappend res [list [$H attr id] [$H text] $content]
        }
        set H $N
        set content ""
      }
    }

    if {[info exists H] && $tag==""} {
      for {set P [$N parent]} {$P!=""} {set P [$P parent]} {
        if {$P==$H} break
      }
      if {$P==""} { append content [$N text] }
    }
  }

  return $res
}

proc lang_document_import {doc} {
  set dom [::hdom::parse [readfile $doc]]

  # Find the <title> tag and extract the title.
  set title [extract_title $dom $doc]

  # Extract the entire document text.
  set text [extract_text_from_dom $dom lang_filterscript]

  # Insert into the database.
  insert_entry -url $doc -title1 $title -content $text

  $dom destroy
}

proc c3ref_document_import {doc} {
  set dom [::hdom::parse [readfile $doc]]
  
  # Find the <title> tag and extract the title.
  set title [extract_title $dom $doc]
  set title "C API: $title"

  set text [extract_text_from_dom $dom c3ref_filterscript]
  set apis [c3ref_document_apis $dom]

  # Insert into the database.
  insert_entry -url $doc -apis $apis -title1 $title -content $text
}

proc generic_document_import {doc} {
  set dom [::hdom::parse [readfile $doc]]
  
  # Find the <title> tag and extract the title.
  set title [extract_title $dom $doc]

  # Extract the document text
  set text [extract_text_from_dom $dom generic_filterscript]

  # Insert into the database.
  set rowid [insert_entry -url $doc -title1 $title -content $text]

  if {$doc=="pragma.html"} {
    set lSection [extract_sections_from_pragma_dom $dom]
  } else {
    set lSection [extract_sections_from_dom $dom generic_filterscript]
  }

  set i [expr $rowid*1000]
  foreach section $lSection {
    incr i
    foreach { tag hdr text } $section {}
    set url "${doc}#${tag}"
    insert_entry -rowid $i -url $url -title1 $title -title2 $hdr -content $text
  }
}

proc rebuild_database {} {

  db transaction {
    # Create the database schema. If the schema already exists, then those
    # tables that contain document data are dropped and recreated by this
    # proc. The 'config' table is left untouched.
    #
    db eval {

      DROP TABLE IF EXISTS page;
      CREATE VIRTUAL TABLE page USING fts5(
        apis,                               -- C APIs 
        keywords,                           -- Keywords
        title1,                             -- Document title
        title2,                             -- Heading title, if any
        content,                            -- Document text

        url UNINDEXED,                      -- Indexed URL
        tokenize='porter unicode61 tokenchars _' -- Tokenizer definition
      );

      DROP TABLE IF EXISTS weight;
      CREATE TABLE weight(id INTEGER PRIMARY KEY, percent FLOAT);
    }

    foreach doc [document_list lang] {
      puts "Indexing $doc..."
      lang_document_import $doc
    }

    foreach doc [document_list c3ref] {
      puts "Indexing $doc..."
      c3ref_document_import $doc
    }

    foreach doc [document_list generic] { 
      puts "Indexing $doc..."
      generic_document_import $doc 
    }

    db eval { INSERT INTO page(page) VALUES('optimize') }

    foreach f [array names ::weight] {
      set w $::weight($f)
      db eval {SELECT rowid FROM page WHERE url=$f} {
        db eval { INSERT INTO weight VALUES($rowid, $w); }
      }
    }
  }

  db eval VACUUM
}

cd doc
sqlite3 db search.db
rebuild_database
Changes to search/search.tcl.
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
    }
  } else {
    error "Unrecognized method: $env(REQUEST_METHOD)"
  }
}


#=========================================================================
# Redirect the web-browser to URL $url. This command does not return.
#
proc cgi_redirect {url} {
  set server $::env(SERVER_NAME)
  set path [file dirname $::env(REQUEST_URI)]
  if {[string range $path end end]!="/"} {
    append path /
  }

  puts "Status: 302 Redirect"
  puts "Location: http://${server}${path}${url}"
  puts "Content-Length: 0"
  puts ""
  exit
}

#=========================================================================
# The argument contains a key value list. The values in the list are
# transformed to an HTTP query key value list. For example:
#
#   % cgi_encode_args {s "search string" t "search \"type\""}
#   s=search+string&t=search+%22type%22
#







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







48
49
50
51
52
53
54

















55
56
57
58
59
60
61
    }
  } else {
    error "Unrecognized method: $env(REQUEST_METHOD)"
  }
}



















#=========================================================================
# The argument contains a key value list. The values in the list are
# transformed to an HTTP query key value list. For example:
#
#   % cgi_encode_args {s "search string" t "search \"type\""}
#   s=search+string&t=search+%22type%22
#
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139

    lappend reslist "$key=$value"
  }
  join $reslist &
}

proc htmlize {str} { string map {< &lt; > &gt;} $str }
proc attrize {str} { string map {< &lt; > &gt; \x22 \x5c\x22} $str }

#=========================================================================

proc cgi_env_dump {} {

  set ret "<h1>Arguments</h1><table>"
  foreach {key value} [array get ::A] {
    append ret "<tr><td>[htmlize $key]<td>[htmlize $value]"
  }
  append ret "</table>"

  append ret "<h1>Environment</h1><table>"
  foreach {key value} [array get ::env] {
    append ret "<tr><td>[htmlize $key]<td>[htmlize $value]"
  }
  append ret "</table>"
  return $ret
}

proc searchform {} {
  return {}
  set initial "Enter search term:"
  catch { set initial $::A(q) }
  return [subst {
    <table style="margin: 1em auto"> <tr><td>Search SQLite docs for:<td>
      <form name=f method=GET action=search>
        <input name=q type=text width=35 value="[attrize $initial]"></input>
        <input name=s type=submit value="Search"></input>
        <input name=s type=submit value="Lucky"></input>
      </form>
    </table>
    <script> 
      document.forms.f.q.focus()
      document.forms.f.q.select()
    </script>
  }]
}

proc footer {} {
  return {
    <hr>
    <table align=right>
    <td>
      <i>Powered by <a href="http://www.sqlite.org/fts5.html">FTS5</a>.</i>
    </table>







|



















<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96



















97
98
99
100
101
102
103

    lappend reslist "$key=$value"
  }
  join $reslist &
}

proc htmlize {str} { string map {< &lt; > &gt;} $str }
proc attrize {str} { string map {< &lt; > &gt; \x22 &quot;} $str }

#=========================================================================

proc cgi_env_dump {} {

  set ret "<h1>Arguments</h1><table>"
  foreach {key value} [array get ::A] {
    append ret "<tr><td>[htmlize $key]<td>[htmlize $value]"
  }
  append ret "</table>"

  append ret "<h1>Environment</h1><table>"
  foreach {key value} [array get ::env] {
    append ret "<tr><td>[htmlize $key]<td>[htmlize $value]"
  }
  append ret "</table>"
  return $ret
}




















proc footer {} {
  return {
    <hr>
    <table align=right>
    <td>
      <i>Powered by <a href="http://www.sqlite.org/fts5.html">FTS5</a>.</i>
    </table>
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
















244









245
246

247





248


249
250
251
















252
253
254
255
256
257
258
259

260
261
262
263

264


265

266
267





268

269



270
271
272
273
274
275
276
277
278

279

280
281
282
283
284
285
286
287
288
#   45.02 ms
#
proc ttime {script} {
  set t [lindex [time [list uplevel $script]] 0]
  if {$t>1000000} { return [format "%.2f s" [expr {$t/1000000.0}]] }
  return [format "%.2f ms" [expr {$t/1000.0}]]
}

proc rank {matchinfo args} {
  return 10.0
  binary scan $matchinfo i* I

  set nPhrase [lindex $I 0]
  set nCol [lindex $I 1]

  set G [lrange $I 2 [expr {1+$nCol*$nPhrase}]]
  set L [lrange $I [expr {2+$nCol*$nPhrase}] end]

  foreach a $args { lappend log [expr {log10(100+$a)}] }

  set score 0.0
  set i 0
  foreach l $L g $G {
    if {$l > 0} {
      set div [lindex $log [expr $i%3]]
      set div 1.0
      set score [expr {$score + (double($l) / double($g)) / $div}]
    }
    incr i
  }

  return $score
}
proc erank {matchinfo args} {
  eval rank [list $matchinfo] $args
}


proc searchresults {} {
  if {![info exists ::A(q)]} return ""
  #set ::A(q) [string map {' ''} $A(q)]
  #regsub -all {[^-/"A-Za-z0-9]} $::A(q) { } ::A(q)

  # Count the '"' characters in $::A(q). If there is an odd number of
  # occurences, add a " to the end of the query so that fts3 can parse
  # it without error.
  if {[regexp -all \x22 $::A(q)] % 2} { append ::A(q) \x22 }

  set ::TITLE "Results for: \"[htmlize $::A(q)]\""

  # Set nRes to the total number of documents that the user's query matches.
  #
  set rc [catch {
    set nRes [db one { SELECT count(*) FROM page WHERE page MATCH $::A(q) }]
  }]
  if {$rc} {
    set ::A(q) "\"$::A(q)\""
    set nRes [db one { SELECT count(*) FROM page WHERE page MATCH $::A(q) }]
  }

  db one { INSERT INTO page(page, rank) VALUES('rank', 'bm25(20.0, 10.0)') }

  # If the user has clicked the "Lucky" button and the query returns one or
  # more results, redirect the browser to the highest ranked result. If the
  # query returns zero results, fall through and display the "No results"
  # page as if the user had clicked "Search".
  #
  if {[info exists ::A(s)] && $::A(s) == "Lucky"} {
    set url [db one {
      SELECT url FROM page, pagedata 
      WHERE page MATCH $::A(q) AND page.docid = pagedata.docid
      ORDER BY rank(matchinfo(page), nk, nt, nc) DESC
    }]
    if {$url != ""} { cgi_redirect $url }
  }

  # If nRes is 0, then the user's query returned zero results. Return a short 
  # message to that effect.
  #
  if {$nRes == 0} {
    return [subst { No results for: <b>[htmlize $::A(q)]</b> }]
  }
  set score 0
  catch {set score $::A(score)}

  # Set iStart to the index of the first result to display. Results are
  # indexed starting at zero from most to least relevant.
  #
  set iStart [expr {([info exists ::A(i)] ? $::A(i) : 0)*10}]

















  # HTML markup used to highlight keywords within FTS3 generated snippets.









  #
  #set open {<span style="font-weight:xbold; color:navy">}

  set open {<span style="background-color:#b3e6cc">}





  set open {<span style="background-color:#c6ecd9">}


  set open {<span style="background-color:#d9f2e6">}
  set close {</span>}
  set ellipsis {<b>&nbsp;...&nbsp;</b>}

















  set ret [subst {
    <table border=0>
    <p>Search results 
       [expr $iStart+1]..[expr {($nRes < $iStart+10) ? $nRes : $iStart+10}] 
       of $nRes for: <b>[htmlize $::A(q)]</b>
  }]
  db eval {

    SELECT 
      COALESCE(NULLIF(title,''), 'No Title.') AS title,
      snippet(page, 0, $open, $close, $ellipsis, 6) AS snippet1,
      snippet(page, 1, $open, $close, '', 40) AS snippet2,

      snippet(page, 2, $open, $close, $ellipsis, 40) AS snippet3,


      url, rank

    FROM page($::A(q)) ORDER BY rank
    LIMIT 10 OFFSET $iStart;





  } {

    #if {$snippet1!=""} { set snippet1 "($snippet1)" }



    append ret [subst -nocommands {<tr>
      <td valign=top style="line-height:150%">
        <div style="white-space:wrap;font-size:larger" class=nounderline>
          <xi><a href="$url">$snippet2</a> </i>
        </div>
          <div style="margin-left: 10ex; font:larger monospace">$snippet1</div>
        <div style="ffont-size:small;margin-left: 2ex">
          <div> $snippet3 </div>
          <div style="margin-left:1em; margin-bottom:1em"><a href="$url">$url</a></div>

        </div>

      </td>

    }]
  }
  append ret { </table> }


  # If the query returned more than 10 results, add up to 10 links to 
  # each set of 10 results (first link to results 1-10, second to 11-20, 







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







|



<
<
<
<
<
<
<
<
<
<
<
<


<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<





>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
|
|
>
|
>
>
>
>
>
|
>
>



>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







|
>
|
<
|
|
>
|
>
>
|
>
|
<
>
>
>
>
>
|
>
|
>
>
>



|

|

|
|
>
|
>

<







118
119
120
121
122
123
124






























125
126
127
128
129
130
131
132
133
134
135












136
137























138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209

210
211
212
213
214
215
216
217
218

219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242

243
244
245
246
247
248
249
#   45.02 ms
#
proc ttime {script} {
  set t [lindex [time [list uplevel $script]] 0]
  if {$t>1000000} { return [format "%.2f s" [expr {$t/1000000.0}]] }
  return [format "%.2f ms" [expr {$t/1000.0}]]
}































proc searchresults {} {
  if {![info exists ::A(q)]} return ""
  #set ::A(q) [string map {' ''} $A(q)]
  #regsub -all {[^-/"A-Za-z0-9]} $::A(q) { } ::A(q)

  # Count the '"' characters in $::A(q). If there is an odd number of
  # occurences, add a " to the end of the query so that fts5 can parse
  # it without error.
  if {[regexp -all \x22 $::A(q)] % 2} { append ::A(q) \x22 }













  db one { INSERT INTO page(page, rank) VALUES('rank', 'bm25(20.0, 10.0)') }
























  # Set iStart to the index of the first result to display. Results are
  # indexed starting at zero from most to least relevant.
  #
  set iStart [expr {([info exists ::A(i)] ? $::A(i) : 0)*10}]

  # Grab a list of rowid results.
  #
  set q {
    SELECT rowid FROM page WHERE page MATCH $::A(q) 
    ORDER BY rank * COALESCE(
      (SELECT percent FROM weight WHERE id=page.rowid), 100
    );
  }
  if {[catch { set lRowid [db eval $q] }]} {
    set x ""
    foreach word [split $::A(q) " "] {
      append x " \"[string map [list "\"" "\"\""] $word]\""
    }
    set ::A(q) [string trim $x]
    set lRowid [db eval $q]
  }

  set lRes [list]
  foreach rowid $lRowid {
    if {$rowid > 1000} {
      set parent [expr $rowid / 1000]
      lappend subsections($parent) $rowid
    } else {
      lappend lRes $rowid
    }
  }

  set nRes [llength $lRes]
  set lRes [lrange $lRes $iStart [expr $iStart+9]]

  # If there are no results, return a message to that effect.
  #
  if {[llength $lRes] == 0} {
    return [subst { No results for: <b>[htmlize $::A(q)]</b> }]
  }
  
  # HTML markup used to highlight keywords within FTS5 generated snippets.
  #
  set open {<span style="background-color:#d9f2e6">}
  set close {</span>}
  set ellipsis {<b>&nbsp;...&nbsp;</b>}

  # Grab the required data
  #
  db eval [string map [list %LIST% [join $lRowid ,]] {
    SELECT 
      rowid AS parentid, 
      snippet(page, 0, $open, $close, $ellipsis, 6)  AS s_apis,
      snippet(page, 2, $open, $close, '', 40)        AS s_title1,
      snippet(page, 3, $open, $close, $ellipsis, 40) AS s_title2,
      snippet(page, 4, $open, $close, $ellipsis, 40) AS s_content,
      url, rank
    FROM page($::A(q))
    WHERE rowid IN (%LIST%)
  }] X {
    foreach k [array names X] { set data($X(parentid),$k) [set X($k)] }
  }

  set ret [subst {
    <table border=0>
    <p>Search results 
       [expr $iStart+1]..[expr {($nRes < $iStart+10) ? $nRes : $iStart+10}] 
       of $nRes for: <b>[htmlize $::A(q)]</b>
  }]

  foreach rowid $lRes {


    foreach a {parentid s_apis s_title1 s_content url rank} {
      set $a $data($rowid,$a)
    }

    if {[info exists subsections($parentid)]} {
      set childid [lindex $subsections($parentid) 0]
      set link $data($childid,url)
      set hdr $data($childid,s_title2)


      if {$hdr==""} {
        set s_content ""
      } else {
        set s_content [subst {
          <b><a style=color:#044a64 href=$link>$hdr</a></b>:
        }]
      }

      append s_content " $data($childid,s_content)"
    }

    append ret [subst -nocommands {<tr>
      <td valign=top style="line-height:150%">
        <div style="white-space:wrap;font-size:larger" class=nounderline>
          <a href="$url">$s_title1</a>
        </div>
          <div style="margin-left: 10ex; font:larger monospace">$s_apis</div>
        <div style="ffont-size:small;margin-left: 2ex">
          <div class=nounderline> $s_content </div>
          <div style="margin-left:1em; margin-bottom:1em">
            <a href="$url">$url</a>
          </div>
        </div>
      </td>

    }]
  }
  append ret { </table> }


  # If the query returned more than 10 results, add up to 10 links to 
  # each set of 10 results (first link to results 1-10, second to 11-20, 
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325

  db transaction {
    set t [ttime { 
      if {[catch searchresults srchout]} {
        set A(q) [string tolower $A(q)]
        set srchout [searchresults]
      }
      set doc "[searchform] $srchout [footer]" 
    }]
  }
  append doc "<p>Page generated in $t."
  return $doc

  # return [cgi_env_dump]
}







|







272
273
274
275
276
277
278
279
280
281
282
283
284
285
286

  db transaction {
    set t [ttime { 
      if {[catch searchresults srchout]} {
        set A(q) [string tolower $A(q)]
        set srchout [searchresults]
      }
      set doc "$srchout [footer]" 
    }]
  }
  append doc "<p>Page generated in $t."
  return $doc

  # return [cgi_env_dump]
}
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
  set env(QUERY_STRING) rebuild=1
  set ::HEADER ""

  #set env(QUERY_STRING) {q="one+two+three+four"+eleven}
  set env(QUERY_STRING) {q=windows}
  set ::HEADER ""
}



if {0==[catch main res]} {
  set title "Search SQLite Documentation"
  if {[info exists ::A(q)]} {
    set initsearch [attrize $::A(q)]
    append title " - [htmlize $::A(q)]"
  } else {







<
<







294
295
296
297
298
299
300


301
302
303
304
305
306
307
  set env(QUERY_STRING) rebuild=1
  set ::HEADER ""

  #set env(QUERY_STRING) {q="one+two+three+four"+eleven}
  set env(QUERY_STRING) {q=windows}
  set ::HEADER ""
}



if {0==[catch main res]} {
  set title "Search SQLite Documentation"
  if {[info exists ::A(q)]} {
    set initsearch [attrize $::A(q)]
    append title " - [htmlize $::A(q)]"
  } else {
Changes to search/searchc.c.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34

35
36
37



38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97

98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113

/*
** There are also two SQL user functions registered:
**
**   rank()
**   erank()
**
** rank() interprets the return value of the FTS3 matchinfo() function and
** returns a score for the match (a real number). The higher the score, the
** more relevant the document is considered. This is used to order query
** results when the user searchs the database. The rank() function takes
** (nCol+1) arguments, where nCol is the number of columns in the FTS3
** table. The first argument is the return value of matchinfo(). The
** second argument is the number of tokens in column 0 of the current FTS3 
** table row. The third argument is the number of tokens in column 1, and
** so on.
**
** Function erank() is called in exactly the same way as rank(). Instead
** of returning a score, it returns an HTML formatted table containing
** data that may be used to understand how the score for the current row
** was calculated.
*/

#include <tcl.h>
#include <string.h>
#include <assert.h>
#include <ctype.h>
#include <math.h>

#include "sqlite3.h"

typedef unsigned int u32;
typedef unsigned char u8;
typedef sqlite3_uint64 u64;


/*
** Implementation of search result ranking function.



*/
static void rankfunc(sqlite3_context *pCtx, int nVal, sqlite3_value **apVal){
  u32 *aMatchinfo;
  double score = 0.0;
  int iCol;
  int iPhrase;
  int nCol;
  int nPhrase;

  int isExplain = sqlite3_user_data(pCtx);
  char *zExplain = 0;

  if( nVal==0 ) goto wna;
  aMatchinfo = (u32 *)sqlite3_value_blob(apVal[0]);
  nPhrase = aMatchinfo[0];
  nCol = aMatchinfo[1];
  if( nVal!=nCol+1 ) goto wna;

  if( isExplain ) zExplain = sqlite3_mprintf("<table width=100%%>");

  for(iCol=0; iCol<nCol; iCol++){
    int nToken = sqlite3_value_int(apVal[iCol+1]);
    double colscore = 0.0;
    if( isExplain ){
      zExplain = sqlite3_mprintf("%z<tr><td>%d.<td>( ", zExplain, iCol);
    }
    for(iPhrase=0; iPhrase<nPhrase; iPhrase++){
      u32 nGlobal = aMatchinfo[2 + iPhrase*nCol + iCol];
      u32 nHit = aMatchinfo[2 + nPhrase*nCol + iPhrase*nCol + iCol];

      if( nHit ) colscore += (double)nHit / (double)nGlobal;
      if( isExplain ){
        const char *zDiv = (iPhrase==0 ? "" : "+ ");
        zExplain = sqlite3_mprintf("%z%s%d/%d ", zExplain, zDiv, nHit, nGlobal);
      }
    }
    colscore = colscore / (log(100+nToken)/log(10)); 
    score += colscore;
    if( isExplain ){
      zExplain = sqlite3_mprintf(
          "%z) / log(100+%d)<td> = %.4f", zExplain, nToken, colscore);
    }
  }

  if( isExplain ){
    sqlite3_result_text(pCtx, sqlite3_mprintf(
        "%z<tr><td><td width=100%%><td>= <b>%.4f</b></table>", zExplain, score
    ), -1, sqlite3_free);
  }else{
    sqlite3_result_double(pCtx, score);
  }
  return;

 wna:
  sqlite3_result_error(pCtx,"wrong number of arguments to function rank()",-1);
}

int Sqlite3_Init(Tcl_Interp *interp);

static int initDb(sqlite3 *db, char **pzErr, void *p){

  sqlite3_create_function(db, "rank",-1, SQLITE_UTF8, 0, rankfunc,0,0);
  sqlite3_create_function(db, "erank", -1, SQLITE_UTF8, (void*)1, rankfunc,0,0);
}

static int AppInit(Tcl_Interp *interp) {
  int rc;
  rc = Sqlite3_Init(interp);
  if( rc!=TCL_OK ) return rc;
  sqlite3_auto_extension(initDb);
  return TCL_OK;
}

int main(int argc, char *argv[]) {
  Tcl_Main(argc, argv, AppInit);
  return 0;
}

<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<

<
|
<
<
<
>


<
>
>
>

|
<
|
<
<
<
<
|
<
<

<
<
<
<
<
|
<
|
<
<
<
<
<
<
<
<
<
|
<
|
<
<
<
<
<
<
<
|
<
|
<
<
<
<
<
|
<
<
<
|
<
<
<




|
>
|
|






|







1






















2




3

4



5
6
7

8
9
10
11
12

13




14


15





16

17









18

19







20

21





22



23



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45























#include <sqlite3.h>






/*



*/

/*

** Return a pointer to the fts5_api pointer for database connection db.
** If an error occurs, return NULL and leave an error in the database 
** handle (accessible using sqlite3_errcode()/errmsg()).
*/
fts5_api *fts5_api_from_db(sqlite3 *db){

  fts5_api *pRet = 0;




  sqlite3_stmt *pStmt = 0;








  if( SQLITE_OK==sqlite3_prepare(db, "SELECT fts5()", -1, &pStmt, 0)

      && SQLITE_ROW==sqlite3_step(pStmt) 









      && sizeof(pRet)==sqlite3_column_bytes(pStmt, 0)

    ){







    memcpy(&pRet, sqlite3_column_blob(pStmt, 0), sizeof(pRet));

  }





  sqlite3_finalize(pStmt);



  return pRet;



}

int Sqlite3_Init(Tcl_Interp *interp);

static int register_search_extensions(sqlite3 *db, char **pzErr, void *p){
  fts5_api *pApi = fts5_api_from_db(db);

  return SQLITE_OK;
}

static int AppInit(Tcl_Interp *interp) {
  int rc;
  rc = Sqlite3_Init(interp);
  if( rc!=TCL_OK ) return rc;
  sqlite3_auto_extension(register_search_extensions);
  return TCL_OK;
}

int main(int argc, char *argv[]) {
  Tcl_Main(argc, argv, AppInit);
  return 0;
}