Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
| SHA1 Hash: | db6671c0ab897f4d0082404d2469bdba5ff22cc7 |
|---|---|
| Date: | 2013-01-14 19:22:34 |
| User: | dan |
| Comment: | Add scripts allowing src4 to be used as the search engine for sqlite.org. |
Tags And Properties
- branch=trunk inherited from [b2e03e19ab]
- sym-trunk inherited from [b2e03e19ab]
Changes
Added search/buildsearchdb4.tcl
> 1 > 2 load ./parsehtml.so > 3 > 4 #========================================================================= > 5 # Return a list of relative paths to documents that should be included > 6 # in the index. > 7 # > 8 proc document_list {} { > 9 set files [list] > 10 foreach f [glob *.html c3ref/*.html releaselog/*.html] { > 11 if {![string match *crossref* $f] > 12 && ![string match fileio.html $f] > 13 && ![string match capi3ref.html $f] > 14 && ![string match changes.html $f] > 15 && ![string match btreemodule.html $f] > 16 } { lappend files $f } > 17 } > 18 return $files > 19 } > 20 > 21 #========================================================================= > 22 # Read and return the contents of text file $zFile. > 23 # > 24 proc readfile {zFile} { > 25 set fd [open $zFile] > 26 set ret [read $fd] > 27 close $fd > 28 return $ret > 29 } > 30 > 31 #========================================================================= > 32 # [parsehtml] callback used for parsing keywords.html... > 33 # > 34 proc keywordparse_callback {tag details} { > 35 global K P > 36 switch -- [string tolower $tag] { > 37 "" { > 38 if {[info exists K(hyperlink)]} { > 39 append K($K(hyperlink)) $details > 40 } > 41 } > 42 "a" { > 43 array set D $details > 44 if {[info exists D(href)]} { set K(hyperlink) $D(href) } > 45 } > 46 "/a" { > 47 unset -nocomplain P(hyperlink) > 48 } > 49 } > 50 } > 51 > 52 #========================================================================= > 53 # This function is used as the callback when parsing ordinary documents > 54 # (not the keywords document). > 55 # > 56 # Rules for extracting fragment "titles". A fragment title consists of > 57 # all text that follows the tag that opens the fragment until either: > 58 # > 59 # 1. 80 characters have been parsed, or > 60 # 2. 8 characters have been parsed and one of the following is > 61 # encountered: > 62 # a) A block element opening or closing tag, or > 63 # b) A <br> element, or > 64 # c) A "." character. > 65 # > 66 # 3. 8 characters have been parsed and a <br> tag or "." character is > 67 # encountered > 68 # > 69 proc docparse_callback {tag details} { > 70 global P > 71 set tag [string tolower $tag] > 72 switch -glob -- $tag { > 73 "" { > 74 append P(text) " $details" > 75 if {$P(isTitle)} { append P(title) $details } > 76 if {[llength $P(fragments)]} { > 77 append P(ftext) " $details" > 78 } > 79 } > 80 > 81 "title" { set P(isTitle) 1 } > 82 "/title" { set P(isTitle) 0 } > 83 > 84 "a" { > 85 array set D $details > 86 if {[info exists D(name)]} { > 87 if {[llength $P(fragments)]} { > 88 lappend P(fragments) $P(ftitle) $P(ftext) > 89 } > 90 lappend P(fragments) $D(name) > 91 set P(ftext) "" > 92 set P(ftitle) "" > 93 catch { unset P(ftitleclose) } > 94 } > 95 } > 96 "h*" { > 97 array set D $details > 98 if {[info exists D(id)]} { > 99 if {[llength $P(fragments)]} { > 100 lappend P(fragments) $P(ftitle) $P(ftext) > 101 } > 102 lappend P(fragments) $D(id) > 103 set P(ftext) "" > 104 set P(ftitle) "" > 105 } > 106 } > 107 > 108 div { > 109 array set D $details > 110 if {[info exists D(class)] && $D(class) == "startsearch"} { > 111 set P(text) "" > 112 } > 113 } > 114 } > 115 > 116 set ftext [string trim $P(ftext) " \v\n"] > 117 if {[string length $ftext]>4 && $P(ftitle) == ""} { > 118 set blocktags [list \ > 119 br td /td th /th p /p \ > 120 h1 h2 h3 h4 h5 h /h1 /h2 /h3 /h4 /h5 /h > 121 ] > 122 if {[lsearch $blocktags $tag]>=0} { > 123 set P(ftitle) $ftext > 124 set P(ftext) "" > 125 } elseif {[string length $ftext]>80} { > 126 set idx [string last " " [string range $ftext 0 79]] > 127 if {$idx<0} { set idx 80 } > 128 set P(ftitle) [string range $ftext 0 [expr $idx-1]] > 129 set P(ftext) [string range $ftext $idx end] > 130 } > 131 } > 132 } > 133 > 134 proc findlinks_callback {tag details} { > 135 global P > 136 set doc $P(doc) > 137 > 138 set tag [string tolower $tag] > 139 switch -glob -- $tag { > 140 a { > 141 array set D $details > 142 if {[info exists D(href)]} { > 143 if { [string range $D(href) 0 0]=="#" } { > 144 set url "${doc}$D(href)" > 145 } else { > 146 set url "$D(href)" > 147 } > 148 > 149 set P(url) $url > 150 set P(link) "" > 151 } > 152 } > 153 /a { > 154 if {$P(url)!=""} { > 155 db eval { UPDATE pagedata SET links = links || ' ' || $P(link) WHERE url > 156 } > 157 set P(url) "" > 158 set P(link) "" > 159 } > 160 > 161 "" { > 162 append P(link) " $details" > 163 } > 164 } > 165 } > 166 > 167 proc trim {a} { > 168 set L [split $a] > 169 return [lsort -uniq $L] > 170 } > 171 > 172 #========================================================================= > 173 # Build the database. > 174 # > 175 proc rebuild_database {} { > 176 > 177 db transaction { > 178 db eval { > 179 DROP TABLE IF EXISTS pagedata; > 180 CREATE TABLE pagedata( > 181 url TEXT PRIMARY KEY, -- Relative URL for this document > 182 links, -- Text of all links to this URI > 183 title, -- Document or fragment title > 184 content -- Document or fragment content > 185 ); > 186 } > 187 > 188 # Scan the file-system for HTML documents. Add each document found to > 189 # the page and pagedata tables. > 190 foreach file [document_list] { > 191 set zHtml [readfile $file] > 192 > 193 array unset ::P > 194 set ::P(text) "" ;# The full document text > 195 set ::P(isTitle) 0 ;# True while parsing contents of <title> > 196 set ::P(fragments) [list] ;# List of document fragments parsed > 197 set ::P(ftext) "" ;# Text of current document fragment > 198 > 199 parsehtml $zHtml docparse_callback > 200 if {[info exists ::P(ftitle)]} { > 201 lappend ::P(fragments) $::P(ftitle) $::P(ftext) > 202 } > 203 > 204 set keyword "" > 205 catch { set keyword $::K($file) } > 206 if {![info exists ::P(title)]} {set ::P(title) "No Title"} > 207 db eval { REPLACE INTO pagedata VALUES($file, '', $::P(title), $::P(text)) > 208 > 209 foreach {name title text} $::P(fragments) { > 210 set url "$file#$name" > 211 puts $url > 212 db eval { REPLACE INTO pagedata VALUES($url, '', $title, $text) } > 213 } > 214 } > 215 > 216 foreach file [document_list] { > 217 set zHtml [readfile $file] > 218 > 219 array unset ::P > 220 set ::P(url) "" > 221 set ::P(doc) $file > 222 parsehtml $zHtml findlinks_callback > 223 } > 224 > 225 db func trim trim > 226 #db eval { UPDATE pagedata SET links = trim(links) } > 227 db eval { CREATE INDEX ft ON pagedata USING fts5() } > 228 } > 229 } > 230 > 231 sqlite4 db search4.db > 232 rebuild_database > 233
Added search/search4.tcl
> 1 #!/usr/bin/tclsqlite4 > 2 > 3 #========================================================================= > 4 # Decode an HTTP %-encoded string > 5 # > 6 proc percent_decode {str} { > 7 # rewrite "+" back to space > 8 # protect \ and [ and ] by quoting with '\' > 9 set str [string map [list + { } "\\" "\\\\" \[ \\\[ \] \\\]] $str] > 10 > 11 # prepare to process all %-escapes > 12 regsub -all -- {%([A-Fa-f][A-Fa-f0-9])%([A-Fa-f89][A-Fa-f0-9])} \ > 13 $str {[encoding convertfrom utf-8 [binary decode hex \1\2]]} str > 14 regsub -all -- {%([0-7][A-Fa-f0-9])} $str {\\u00\1} str > 15 > 16 # process %-escapes > 17 return [subst -novar $str] > 18 } > 19 > 20 #========================================================================= > 21 # This proc is called to parse the arguments passed to this invocation of > 22 # the CGI program (via either the GET or POST method). It returns a > 23 # key/value list containing the arguments suitable for passing to [array > 24 # set]. For example, if the CGI is invoked via a GET request on the URI: > 25 # > 26 # http://www.sqlite.org/search?query=fts3+table&results=10 > 27 # > 28 # then the returned list value is: > 29 # > 30 # {query {fts3 table} results 10} > 31 # > 32 proc cgi_parse_args {} { > 33 global env A > 34 > 35 if {$env(REQUEST_METHOD) == "GET"} { > 36 foreach q [split $env(QUERY_STRING) &] { > 37 if {[regexp {([a-z0-9]*)=(.*)} $q all var value]} { > 38 set A($var) [percent_decode $value] > 39 } > 40 } > 41 } elseif {$env(REQUEST_METHOD) == "POST"} { > 42 set qstring [read stdin $env(CONTENT_LENGTH)] > 43 foreach q [split $qstring &] { > 44 if {[regexp {([a-z0-9]*)=(.*)} $q all var value]} { > 45 set A($var) [percent_decode $value] > 46 } > 47 } > 48 } else { > 49 error "Unrecognized method: $env(REQUEST_METHOD)" > 50 } > 51 } > 52 > 53 > 54 #========================================================================= > 55 # Redirect the web-browser to URL $url. This command does not return. > 56 # > 57 proc cgi_redirect {url} { > 58 set server $::env(SERVER_NAME) > 59 set path [file dirname $::env(REQUEST_URI)] > 60 if {[string range $path end end]!="/"} { > 61 append path / > 62 } > 63 > 64 puts "Status: 302 Redirect" > 65 puts "Location: http://${server}${path}${url}" > 66 puts "Content-Length: 0" > 67 puts "" > 68 exit > 69 } > 70 > 71 #========================================================================= > 72 # The argument contains a key value list. The values in the list are > 73 # transformed to an HTTP query key value list. For example: > 74 # > 75 # % cgi_encode_args {s "search string" t "search \"type\""} > 76 # s=search+string&t=search+%22type%22 > 77 # > 78 proc cgi_encode_args {list} { > 79 set reslist [list] > 80 foreach {key value} $list { > 81 set value [string map { > 82 \x20 + \x21 %21 \x2A %2A \x22 %22 \x27 %27 \x28 %28 \x29 %29 \x3B %3B > 83 \x3A %3A \x40 %40 \x26 %26 \x3D %3D \x2B %2B \x24 %24 \x2C %2C \x2F %2F > 84 \x3F %3F \x25 %25 \x23 %23 \x5B %5B \x5D %5D > 85 } $value] > 86 > 87 lappend reslist "$key=$value" > 88 } > 89 join $reslist & > 90 } > 91 > 92 proc htmlize {str} { string map {< < > >} $str } > 93 proc attrize {str} { string map {< < > > \x22 \x5c\x22} $str } > 94 > 95 #========================================================================= > 96 > 97 proc cgi_env_dump {} { > 98 > 99 set ret "<h1>Arguments</h1><table>" > 100 foreach {key value} [array get ::A] { > 101 append ret "<tr><td>[htmlize $key]<td>[htmlize $value]" > 102 } > 103 append ret "</table>" > 104 > 105 append ret "<h1>Environment</h1><table>" > 106 foreach {key value} [array get ::env] { > 107 append ret "<tr><td>[htmlize $key]<td>[htmlize $value]" > 108 } > 109 append ret "</table>" > 110 return $ret > 111 } > 112 > 113 proc searchform {} { > 114 return {} > 115 set initial "Enter search term:" > 116 catch { set initial $::A(q) } > 117 return [subst { > 118 <table style="margin: 1em auto"> <tr><td>Search SQLite docs for:<td> > 119 <form name=f method=GET action=search4> > 120 <input name=q type=text width=35 value="[attrize $initial]"></input> > 121 <input name=s type=submit value="Search"></input> > 122 <input name=s type=submit value="Lucky"></input> > 123 </form> > 124 </table> > 125 <script> > 126 document.forms.f.q.focus() > 127 document.forms.f.q.select() > 128 </script> > 129 }] > 130 } > 131 > 132 proc footer {} { > 133 return { > 134 <hr> > 135 <table align=right> > 136 <td> > 137 <i>Powered by <a href="http://www.sqlite.org/src4">FTS5</a>.</i> > 138 </table> > 139 } > 140 } > 141 > 142 > 143 #------------------------------------------------------------------------- > 144 # This command is similar to the builtin Tcl [time] command, except that > 145 # it only ever runs the supplied script once. Also, instead of returning > 146 # a string like "xxx microseconds per iteration", it returns "x.yy ms" or > 147 # "x.yy s", depending on the magnitude of the time spent running the > 148 # command. For example: > 149 # > 150 # % ttime {after 1500} > 151 # 1.50 s > 152 # % ttime {after 45} > 153 # 45.02 ms > 154 # > 155 proc ttime {script} { > 156 set t [lindex [time [list uplevel $script]] 0] > 157 if {$t>1000000} { return [format "%.2f s" [expr {$t/1000000.0}]] } > 158 return [format "%.2f ms" [expr {$t/1000.0}]] > 159 } > 160 > 161 proc rank {matchinfo args} { > 162 binary scan $matchinfo i* I > 163 > 164 set nPhrase [lindex $I 0] > 165 set nCol [lindex $I 1] > 166 > 167 set G [lrange $I 2 [expr {1+$nCol*$nPhrase}]] > 168 set L [lrange $I [expr {2+$nCol*$nPhrase}] end] > 169 > 170 foreach a $args { lappend log [expr {log10(100+$a)}] } > 171 > 172 set score 0.0 > 173 set i 0 > 174 foreach l $L g $G { > 175 if {$l > 0} { > 176 set div [lindex $log [expr $i%3]] > 177 set score [expr {$score + (double($l) / double($g)) / $div}] > 178 } > 179 incr i > 180 } > 181 > 182 return $score > 183 } > 184 proc erank {matchinfo args} { > 185 eval rank [list $matchinfo] $args > 186 } > 187 > 188 > 189 proc searchresults {} { > 190 if {![info exists ::A(q)]} return "" > 191 #set ::A(q) [string map {' ''} $A(q)] > 192 #regsub -all {[^-/"A-Za-z0-9]} $::A(q) { } ::A(q) > 193 > 194 # Count the '"' characters in $::A(q). If there is an odd number of > 195 # occurences, add a " to the end of the query so that fts3 can parse > 196 # it without error. > 197 if {[regexp -all \x22 $::A(q)] % 2} { append ::A(q) \x22 } > 198 > 199 set ::TITLE "Results for: \"[htmlize $::A(q)]\"" > 200 > 201 #db func rank rank > 202 #db func erank erank > 203 > 204 set score 0 > 205 catch {set score $::A(score)} > 206 > 207 # Set nRes to the total number of documents that the users query matches. > 208 # If nRes is 0, then the users query returned zero results. Return a short > 209 # message to that effect. > 210 # > 211 set nRes [db one { SELECT count(*) FROM pagedata WHERE pagedata MATCH $::A(q) > 212 if {$nRes == 0} { > 213 return [subst { No results for: <b>[htmlize $::A(q)]</b> }] > 214 } > 215 > 216 # Set iStart to the index of the first result to display. Results are > 217 # indexed starting at zero from most to least relevant. > 218 # > 219 set iStart [expr {([info exists ::A(i)] ? $::A(i) : 0)*10}] > 220 > 221 # HTML markup used to highlight keywords within FTS3 generated snippets. > 222 # > 223 set open {<span style="font-weight:bold; color:navy">} > 224 set close {</span>} > 225 set ellipsis {<b> ... </b>} > 226 > 227 set ret [subst { > 228 <table border=0> > 229 <p>Search results > 230 [expr $iStart+1]..[expr {($nRes < $iStart+10) ? $nRes : $iStart+10}] > 231 of $nRes for: <b>[htmlize $::A(q)]</b> > 232 }] > 233 > 234 set open {<span style="font-weight:bold; color:navy">} > 235 set close {</span>} > 236 set ellipsis {<b> ... </b>} > 237 > 238 if {0==[info exists ::A(e)]} { > 239 set sqlquery { > 240 SELECT url, title, > 241 snippet(pagedata, $open, $close, $ellipsis, 3, 40) AS snippet, > 242 '' AS report > 243 FROM pagedata WHERE pagedata MATCH $::A(q) > 244 ORDER BY rankc(pagedata, 1.0, 5.0, 10.0, 1.0) DESC > 245 LIMIT 10 OFFSET $iStart > 246 } > 247 } else { > 248 set sqlquery { > 249 SELECT url, title, > 250 snippet(pagedata, $open, $close, $ellipsis, 3, 40) AS snippet, > 251 erankc(pagedata, 1.0, 5.0, 10.0, 1.0) AS report > 252 FROM pagedata WHERE pagedata MATCH $::A(q) > 253 ORDER BY rankc(pagedata, 1.0, 5.0, 10.0, 1.0) DESC > 254 LIMIT 10 OFFSET $iStart > 255 } > 256 } > 257 > 258 set resnum $iStart > 259 db eval $sqlquery { > 260 incr resnum > 261 > 262 append ret [subst -nocommands {<tr> > 263 <td valign=top>${resnum}.</td> > 264 <td valign=top> > 265 <div style="white-space:wrap"> > 266 <a href="$url">$title</a> > 267 </div> > 268 <div style="font-size:small;margin-left: 2ex"> > 269 <div style="width:80ex"> $snippet </div> > 270 <div style="margin-bottom:1em"><a href="$url">$url</a></div> > 271 </div> > 272 </td> > 273 > 274 <td width=100%> > 275 <td valign=top style="font-size:70%;white-space:nowrap;color:darkgreen"> $ > 276 }] > 277 } > 278 append ret { </table> } > 279 > 280 > 281 # If the query returned more than 10 results, add up to 10 links to > 282 # each set of 10 results (first link to results 1-10, second to 11-20, > 283 # third to 21-30, as required). > 284 # > 285 if {$nRes>10} { > 286 set s(0) {border: solid #044a64 1px ; padding: 1ex ; margin: 1ex} > 287 set s(1) "$s(0);background:#044a64;color:white" > 288 append ret <center><p> > 289 for {set i 0} {$i < 10 && ($i*10)<$nRes} {incr i} { > 290 append ret [subst { > 291 <a style="$s([expr {($iStart/10)==$i}])" > 292 href="search4?[cgi_encode_args [list q $::A(q) i $i]]">[expr $i+1]</a > 293 }] > 294 } > 295 append ret </center> > 296 } > 297 > 298 return $ret > 299 } > 300 > 301 proc main {} { > 302 global A > 303 sqlite4 db search4.db > 304 cgi_parse_args > 305 > 306 db transaction { > 307 set t [ttime { set doc "[searchform] [searchresults] [footer]" }] > 308 } > 309 append doc "<p>Page generated in $t." > 310 return $doc > 311 > 312 # return [cgi_env_dump] > 313 } > 314 > 315 #========================================================================= > 316 > 317 set ::HEADER { > 318 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" > 319 "http://www.w3.org/TR/html4/strict.dtd"> > 320 <html><head> > 321 <title>$TITLE</title> > 322 <style type="text/css"> > 323 body { > 324 margin: auto; > 325 font-family: Verdana, sans-serif; > 326 padding: 8px 1%; > 327 } > 328 > 329 a { color: #044a64 } > 330 a:visited { color: #734559 } > 331 > 332 .logo { position:absolute; margin:3px; } > 333 .tagline { > 334 float:right; > 335 text-align:right; > 336 font-style:italic; > 337 width:300px; > 338 margin:12px; > 339 margin-top:58px; > 340 } > 341 .menubar { > 342 clear: both; > 343 border-radius: 8px; > 344 background: #044a64; > 345 padding: 0px; > 346 margin: 0px; > 347 cell-spacing: 0px; > 348 } > 349 .toolbar { > 350 text-align: center; > 351 line-height: 1.6em; > 352 margin: 0; > 353 padding: 0px 8px; > 354 } > 355 .toolbar a { color: white; > 356 text-decoration: none; padding: 6px > 357 12px; } > 358 .toolbar a:visited { color: white; } > 359 .toolbar a:hover { color: #044a64; > 360 background: white; } > 361 > 362 .content { margin: 5%; } > 363 .content dt { font-weight:bold; } > 364 .content dd { margin-bottom: 25px; margin-left:20%; } > 365 .content ul { padding:0px; padding-left: 15px; margin:0px; } > 366 </style> > 367 <meta http-equiv="content-type" content="text/html; charset=UTF-8"> > 368 > 369 </head> > 370 <body> > 371 <div><!-- container div to satisfy validator --> > 372 > 373 <a href="index.html"> > 374 <img class="logo" src="images/sqlite370_banner.gif" alt="SQLite Logo" border=" > 375 <div><!-- IE hack to prevent disappearing logo--></div> > 376 <div class="tagline">Small. Fast. Reliable.<br>Choose any three.</div> > 377 > 378 <table width=100% class="menubar"><tr><td> > 379 <table width=100% style="padding:0;margin:0;cell-spacing:0"><tr> > 380 <td width=100%> > 381 <div class="toolbar"> > 382 <a href="about.html">About</a> > 383 <a href="sitemap.html">Sitemap</a> > 384 <a href="docs.html">Documentation</a> > 385 <a href="download.html">Download</a> > 386 <a href="copyright.html">License</a> > 387 <a href="news.html">News</a> > 388 <a href="support.html">Support</a> > 389 </div> > 390 <td> > 391 <div style="padding:0 1em 0px 0;white-space:nowrap"> > 392 <form name=f method="GET" action="search4"> > 393 <input id=q name=q type=text value="" > 394 onfocus="entersearch()" onblur="leavesearch()" style="width:24ex;padding: > 395 <input type=submit value="Go" style="border:solid white 1px;background-col > 396 </form> > 397 </div> > 398 </table> > 399 </div></div></div></div> > 400 </td></tr></table> > 401 > 402 <script> > 403 gMsg = "Search SQLite Docs..." > 404 function entersearch() { > 405 var q = document.getElementById("q"); > 406 if( q.value == gMsg ) { q.value = "" } > 407 q.style.color = "black" > 408 q.style.fontStyle = "normal" > 409 } > 410 function leavesearch() { > 411 var q = document.getElementById("q"); > 412 if( q.value == "" ) { > 413 q.value = gMsg > 414 q.style.color = "#044a64" > 415 q.style.fontStyle = "italic" > 416 } > 417 } > 418 function initsearch() { > 419 var q = document.getElementById("q"); > 420 q.value = "" > 421 q.value = $::INITSEARCH > 422 q.style.color = "black" > 423 q.style.fontStyle = "normal" > 424 } > 425 window.onload = initsearch > 426 </script> > 427 } > 428 > 429 if {![info exists env(REQUEST_METHOD)]} { > 430 set env(REQUEST_METHOD) GET > 431 set env(QUERY_STRING) rebuild=1 > 432 set ::HEADER "" > 433 > 434 set env(QUERY_STRING) {q=cache+size} > 435 set ::HEADER "" > 436 } > 437 > 438 > 439 set TITLE "Search SQLite Documentation (fts5)" > 440 > 441 if {0==[catch main res]} { > 442 if {[info exists ::A(q)]} { > 443 set ::INITSEARCH \"[attrize $::A(q)]\" > 444 } else { > 445 set ::INITSEARCH \"\" > 446 } > 447 set document [subst -nocommands $::HEADER] > 448 append document $res > 449 } else { > 450 set document "<pre>" > 451 append document "Error: $res\n\n" > 452 append document $::errorInfo > 453 append document "</pre>" > 454 } > 455 > 456 puts "Content-type: text/html" > 457 puts "Content-Length: [string length $document]" > 458 puts "" > 459 puts $document > 460 puts "" > 461 flush stdout > 462 close stdout > 463 > 464 exit