Artifact ad8c745d21163991438ea2fc30e379a91ff9115a:
- Executable file
search/search.tcl
— part of check-in
[cc51dec17e]
at
2016-08-24 19:26:13
on branch experimental
— Further updates to search database and script.
(user:
dan
size: 9129)
#!/usr/bin/tclsqlite3.search
source [file dirname [info script]]/document_header.tcl
# Decode an HTTP %-encoded string
#
proc percent_decode {str} {
# rewrite "+" back to space
# protect \ and [ and ] by quoting with '\'
set str [string map [list + { } "\\" "\\\\" \[ \\\[ \] \\\]] $str]
# prepare to process all %-escapes
regsub -all -- {%([A-Fa-f][A-Fa-f0-9])%([A-Fa-f89][A-Fa-f0-9])} \
$str {[encoding convertfrom utf-8 [binary decode hex \1\2]]} str
regsub -all -- {%([0-7][A-Fa-f0-9])} $str {\\u00\1} str
# process %-escapes
return [subst -novar $str]
}
#=========================================================================
# This proc is called to parse the arguments passed to this invocation of
# the CGI program (via either the GET or POST method). It returns a
# key/value list containing the arguments suitable for passing to [array
# set]. For example, if the CGI is invoked via a GET request on the URI:
#
# http://www.sqlite.org/search?query=fts3+table&results=10
#
# then the returned list value is:
#
# {query {fts3 table} results 10}
#
proc cgi_parse_args {} {
global env A
if {$env(REQUEST_METHOD) == "GET"} {
foreach q [split $env(QUERY_STRING) &] {
if {[regexp {([a-z0-9]*)=(.*)} $q all var value]} {
set A($var) [percent_decode $value]
}
}
} elseif {$env(REQUEST_METHOD) == "POST"} {
set qstring [read stdin $env(CONTENT_LENGTH)]
foreach q [split $qstring &] {
if {[regexp {([a-z0-9]*)=(.*)} $q all var value]} {
set A($var) [percent_decode $value]
}
}
} else {
error "Unrecognized method: $env(REQUEST_METHOD)"
}
}
#=========================================================================
# The argument contains a key value list. The values in the list are
# transformed to an HTTP query key value list. For example:
#
# % cgi_encode_args {s "search string" t "search \"type\""}
# s=search+string&t=search+%22type%22
#
proc cgi_encode_args {list} {
set reslist [list]
foreach {key value} $list {
set value [string map {
\x20 + \x21 %21 \x2A %2A \x22 %22 \x27 %27 \x28 %28 \x29 %29 \x3B %3B
\x3A %3A \x40 %40 \x26 %26 \x3D %3D \x2B %2B \x24 %24 \x2C %2C \x2F %2F
\x3F %3F \x25 %25 \x23 %23 \x5B %5B \x5D %5D
} $value]
lappend reslist "$key=$value"
}
join $reslist &
}
proc htmlize {str} { string map {< < > >} $str }
proc attrize {str} { string map {< < > > \x22 "} $str }
#=========================================================================
proc cgi_env_dump {} {
set ret "<h1>Arguments</h1><table>"
foreach {key value} [array get ::A] {
append ret "<tr><td>[htmlize $key]<td>[htmlize $value]"
}
append ret "</table>"
append ret "<h1>Environment</h1><table>"
foreach {key value} [array get ::env] {
append ret "<tr><td>[htmlize $key]<td>[htmlize $value]"
}
append ret "</table>"
return $ret
}
proc footer {} {
return {
<hr>
<table align=right>
<td>
<i>Powered by <a href="http://www.sqlite.org/fts5.html">FTS5</a>.</i>
</table>
}
}
#-------------------------------------------------------------------------
# This command is similar to the builtin Tcl [time] command, except that
# it only ever runs the supplied script once. Also, instead of returning
# a string like "xxx microseconds per iteration", it returns "x.yy ms" or
# "x.yy s", depending on the magnitude of the time spent running the
# command. For example:
#
# % ttime {after 1500}
# 1.50 s
# % ttime {after 45}
# 45.02 ms
#
proc ttime {script} {
set t [lindex [time [list uplevel $script]] 0]
if {$t>1000000} { return [format "%.2f s" [expr {$t/1000000.0}]] }
return [format "%.2f ms" [expr {$t/1000.0}]]
}
proc searchresults {} {
if {![info exists ::A(q)]} return ""
#set ::A(q) [string map {' ''} $A(q)]
#regsub -all {[^-/"A-Za-z0-9]} $::A(q) { } ::A(q)
# Count the '"' characters in $::A(q). If there is an odd number of
# occurences, add a " to the end of the query so that fts5 can parse
# it without error.
if {[regexp -all \x22 $::A(q)] % 2} { append ::A(q) \x22 }
# Set iStart to the index of the first result to display. Results are
# indexed starting at zero from most to least relevant.
#
set iStart [expr {([info exists ::A(i)] ? $::A(i) : 0)*10}]
# Grab a list of rowid results.
#
set q {
SELECT rowid FROM page WHERE page MATCH $::A(q)
ORDER BY rank * COALESCE(
(SELECT percent FROM weight WHERE id=page.rowid), 100
);
}
if {[catch { set lRowid [db eval $q] }]} {
set x ""
foreach word [split $::A(q) " "] {
append x " \"[string map [list "\"" "\"\""] $word]\""
}
set ::A(q) [string trim $x]
set lRowid [db eval $q]
}
set lRes [list]
foreach rowid $lRowid {
if {$rowid > 1000} {
set parent [expr $rowid / 1000]
lappend subsections($parent) $rowid
} else {
lappend lRes $rowid
}
}
set nRes [llength $lRes]
set lRes [lrange $lRes $iStart [expr $iStart+9]]
# If there are no results, return a message to that effect.
#
if {[llength $lRes] == 0} {
return [subst { No results for: <b>[htmlize $::A(q)]</b> }]
}
# HTML markup used to highlight keywords within FTS5 generated snippets.
#
set open {<span style="background-color:#d9f2e6">}
set close {</span>}
set ellipsis {<b> ... </b>}
# Grab the required data
#
db eval [string map [list %LIST% [join $lRowid ,]] {
SELECT
rowid AS parentid,
snippet(page, 0, $open, $close, $ellipsis, 6) AS s_apis,
snippet(page, 2, $open, $close, '', 40) AS s_title1,
snippet(page, 3, $open, $close, $ellipsis, 40) AS s_title2,
snippet(page, 4, $open, $close, $ellipsis, 40) AS s_content,
url, rank
FROM page($::A(q))
WHERE rowid IN (%LIST%)
}] X {
foreach k [array names X] { set data($X(parentid),$k) [set X($k)] }
}
set ret [subst {
<table border=0>
<p>Search results
[expr $iStart+1]..[expr {($nRes < $iStart+10) ? $nRes : $iStart+10}]
of $nRes for: <b>[htmlize $::A(q)]</b>
}]
foreach rowid $lRes {
foreach a {parentid s_apis s_title1 s_content url rank} {
set $a $data($rowid,$a)
}
if {[info exists subsections($parentid)]} {
set childid [lindex $subsections($parentid) 0]
set link $data($childid,url)
set hdr $data($childid,s_title2)
if {$hdr==""} {
set s_content ""
} else {
set s_content [subst {
<b><a style=color:#044a64 href=$link>$hdr</a></b>
}]
}
append s_content " $data($childid,s_content)"
}
append ret [subst -nocommands {<tr>
<td valign=top style="line-height:150%">
<div style="white-space:wrap;font-size:larger" class=nounderline>
<a href="$url">$s_title1</a>
<div style="float:right;font-size:smaller;color:#BBB">($url)</div>
</div>
<div style="margin-left: 10ex; font:larger monospace">$s_apis</div>
<div style="margin-left: 4ex; margin-bottom:1.5em">
$s_content
</div>
</td>
}]
}
append ret { </table> }
# If the query returned more than 10 results, add up to 10 links to
# each set of 10 results (first link to results 1-10, second to 11-20,
# third to 21-30, as required).
#
if {$nRes>10} {
set s(0) {border: solid #044a64 1px ; padding: 1ex ; margin: 1ex}
set s(1) "$s(0);background:#044a64;color:white"
append ret <center><p>
for {set i 0} {$i < 10 && ($i*10)<$nRes} {incr i} {
append ret [subst {
<a style="$s([expr {($iStart/10)==$i}])"
href="search?[cgi_encode_args [list q $::A(q) i $i]]">[expr $i+1]</a>
}]
}
append ret </center>
}
return $ret
}
proc main {} {
global A
sqlite3 db search.db
cgi_parse_args
db transaction {
set t [ttime {
if {[catch searchresults srchout]} {
set A(q) [string tolower $A(q)]
set srchout [searchresults]
}
set doc "$srchout [footer]"
}]
}
append doc "<p>Page generated in $t."
return $doc
# return [cgi_env_dump]
}
#=========================================================================
source [file dirname [info script]]/document_header.tcl
if {![info exists env(REQUEST_METHOD)]} {
set env(REQUEST_METHOD) GET
set env(QUERY_STRING) rebuild=1
set ::HEADER ""
#set env(QUERY_STRING) {q="one+two+three+four"+eleven}
set env(QUERY_STRING) {q=windows}
set ::HEADER ""
}
if {0==[catch main res]} {
set title "Search SQLite Documentation"
if {[info exists ::A(q)]} {
set initsearch [attrize $::A(q)]
append title " - [htmlize $::A(q)]"
} else {
set initsearch {}
}
set document [document_header $title "" $initsearch]
append document $res
} else {
set document "<pre>"
append document "Error: $res\n\n"
append document $::errorInfo
append document "</pre>"
}
puts "Content-type: text/html"
puts "Content-Length: [string length $document]"
puts ""
puts $document
puts ""
flush stdout
close stdout
exit