Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Use Wapp to implement the search function. Search can now be run from the command-line using "./tclsh.docsrc doc/search". |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | search-refactor |
Files: | files | file ages | folders |
SHA3-256: |
33c56bee814568abd863f684f92fdafe |
User & Date: | drh 2018-02-04 21:41:59.435 |
Context
2018-02-04
| ||
23:30 | Add the search.d/admin script. (check-in: 685371e833 user: drh tags: search-refactor) | |
21:41 | Use Wapp to implement the search function. Search can now be run from the command-line using "./tclsh.docsrc doc/search". (check-in: 33c56bee81 user: drh tags: search-refactor) | |
2018-02-01
| ||
14:44 | Merge fixes from the release branch. (check-in: 5279eb6057 user: drh tags: trunk) | |
Changes
Changes to main.mk.
︙ | ︙ | |||
153 154 155 156 157 158 159 | # Flags to build [tclsqlite3.search] with. # SFLAGS = $(TCLINC) -DSQLITE_THREADSAFE=0 -DSQLITE_ENABLE_FTS5 -DSQLITE_TCLMD5 -DTCLSH -Dmain=xmain $(TCLSH): $(SSRC) $(CC) -O2 -o $@ -I. $(SFLAGS) $(SSRC) $(TCLFLAGS) | > > > > | | < < < | | | 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 | # Flags to build [tclsqlite3.search] with. # SFLAGS = $(TCLINC) -DSQLITE_THREADSAFE=0 -DSQLITE_ENABLE_FTS5 -DSQLITE_TCLMD5 -DTCLSH -Dmain=xmain $(TCLSH): $(SSRC) $(CC) -O2 -o $@ -I. $(SFLAGS) $(SSRC) $(TCLFLAGS) doc/search: $(TCLSH) $(DOC)/search/mkscript.tcl $(DOC)/search/search.tcl.in $(DOC)/search/wapp.tcl $(DOC)/document_header.tcl ./$(TCLSH) $(DOC)/search/mkscript.tcl $(DOC)/search/search.tcl.in >doc/search chmod +x doc/search searchdb: $(TCLSH) doc/search mkdir -p doc/search.d/ ./$(TCLSH) $(DOC)/search/buildsearchdb.tcl # cp $(DOC)/search/search.tcl doc/search.d/admin # chmod +x doc/search.d/admin fts5ext.so: $(DOC)/search/fts5ext.c gcc -shared -fPIC -I. -DSQLITE_EXT \ $(DOC)/search/fts5ext.c -o fts5ext.so # Build the "docapp" application by adding an appropriate SQLAR # repository onto the end of the "sqltclsh" application. |
︙ | ︙ |
Deleted search/buildsearchdb4.tcl.
|
| < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < |
Added search/mkscript.tcl.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 | #!/usr/bin/tclsh # # Use this script to build the TCL scripts that implement the search # functions on the SQLite website. # # Usage example: # # tclsh mkscript.tcl search.tcl.in >search.tcl # # The input to this script is a template script file, named something # like "search.tcl.in". This script reads the template line-by-line and # applies some minor transformations: # # INCLUDE filename Lines match this pattern are replaced # by the complete text of filename. This # is used (for example) to insert the # complete text of wapp.tcl in the appropriate # place CGI scripts # # DOCHEADER title path Lines matching this pattern invoke the # document_header function contained in the # ../document_header.tcl file to generate # header text for the document, and then # insert that header text in place of the # line # # Other than these transformations, the input is copied through into # the output. # if {[llength $argv]!=1} { puts stderr "Usage: $argv0 TEMPLATE >OUTPUT" exit 1 } set infile [lindex $argv 0] set ROOT [file dir [file dir [file normalize $argv0]]] set HOME [file dir [file normalize $infile]] set in [open $infile rb] while {1} { set line [gets $in] if {[eof $in]} break if {[regexp {^INCLUDE (.*)} $line all path]} { set in2 [open $HOME/$path rb] puts [read $in2] close $in2 continue } if {[regexp {^DOCHEAD } $line] && [llength $line]==3} { source $ROOT/document_header.tcl puts [document_header [lindex $line 1] [lindex $line 2]] continue } puts $line } close $in |
Deleted search/search.tcl.
|
| < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < |
Added search/search.tcl.in.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 | #!/usr/bin/tclsh.docsrc #### Import of wapp.tcl INCLUDE wapp.tcl #### End of wapp.tcl # Generate all header content for the output document # proc search_header {} { wapp-trim { DOCHEAD {Search SQLite Documentation} {} } } #------------------------------------------------------------------------- # Add an entry to the log database for the current query. Which # returns $nRes results. # proc search_add_log_entry {nRes} { if {[wapp-param-exists donotlog]} return sqlite3 db2 [file dir [wapp-param SCRIPT_FILENAME]]/search.d/searchlog.db db2 timeout 10000 set ip [wapp-param REMOTE_ADDR] set query [wapp-param q] db2 eval { PRAGMA synchronous=OFF; PRAGMA journal_mode=OFF; BEGIN; CREATE TABLE IF NOT EXISTS log( ip, -- IP query was made from query, -- Fts5 query string nres, -- Number of results timestamp DEFAULT CURRENT_TIMESTAMP ); INSERT INTO log(ip, query, nres) VALUES($ip, $query, $nRes); COMMIT; } db2 close } #------------------------------------------------------------------------- # This command is similar to the builtin Tcl [time] command, except that # it only ever runs the supplied script once. Also, instead of returning # a string like "xxx microseconds per iteration", it returns "x.yy ms" or # "x.yy s", depending on the magnitude of the time spent running the # command. For example: # # % ttime {after 1500} # 1.50 s # % ttime {after 45} # 45.02 ms # proc ttime {script} { set t [lindex [time [list uplevel $script]] 0] if {$t>1000000} { return [format "%.2f s" [expr {$t/1000000.0}]] } return [format "%.2f ms" [expr {$t/1000.0}]] } #----------------------------------------------------------------------- # Do a search of the change log # proc searchchanges {} { set q [wapp-param q] if {$q==""} {return {}} set open {<span style="background-color:#d9f2e6">} set close {</span>} set query { SELECT url, version, idx, highlight(change, 3, $open, $close) AS text FROM change($q) ORDER BY rowid ASC } wapp-trim { <p>Change log entries mentioning: <b>%html($q)</b> <table border=0> } set s2 "style=\"margin-top:0\"" set s1 "style=\"font-size:larger; text-align:left\" class=nounderline" set prev "" db eval $query { if {$prev!=$version} { wapp-trim { <tr> <td %unsafe($s1) valign=top> <a href='%url($url)'>%html($version)</a> <td> <ul %unsafe($s2)> } set prev $version } wapp-subst {<li value=%html($idx)> (%html($idx)) %unsafe($text)\n} } wapp-trim { </table> <center><p>You can also see the <a href=changes.html>entire changelog as a single page</a> if you wish.</center> } } #----------------------------------------------------------------------- # Do a search over all documentation other than the change log # proc searchresults {} { set q [wapp-param q] if {$q==""} {return ""} # Count the '"' characters in $::A(q). If there is an odd number of # occurences, add a " to the end of the query so that fts5 can parse # it without error. if {[regexp -all \x22 $q] % 2} { append q \x22 } # Set iStart to the index of the first result to display. Results are # indexed starting at zero from most to least relevant. # set iStart 0 catch {set iStart [expr {[wapp-param i 0]*10}]} # Grab a list of rowid results. # set sql { SELECT rowid FROM page WHERE page MATCH $q ORDER BY srank(page) DESC, rank * COALESCE( (SELECT percent FROM weight WHERE id=page.rowid), 100 ); } if {[catch { set lRowid [db eval $sql] }]} { set x "" foreach word [split $q " "] { append x " \"[string map [list "\"" "\"\""] $word]\"" } set q [string trim $x] set lRowid [db eval $sql] } set lRes [list] foreach rowid $lRowid { if {$rowid > 1000} { set parent [expr $rowid / 1000] lappend subsections($parent) $rowid } else { lappend lRes $rowid } } set nRes [llength $lRes] set lRes [lrange $lRes $iStart [expr $iStart+9]] # Add an entry to the log database. # search_add_log_entry $nRes # If there are no results, return a message to that effect. # if {[llength $lRes] == 0} { wapp-subst {<p>No Results for: <b>%html($q)</b>\n} } # HTML markup used to highlight keywords within FTS5 generated snippets. # set open {<span style="background-color:#d9f2e6">} set close {</span>} set ellipsis {<b> ... </b>} # Grab the required data # db eval [string map [list %LIST% [join $lRowid ,]] { SELECT rowid AS parentid, snippet(page, 0, $open, $close, $ellipsis, 6) AS s_apis, snippet(page, 2, $open, $close, '', 40) AS s_title1, snippet(page, 3, $open, $close, $ellipsis, 40) AS s_title2, snippet(page, 4, $open, $close, $ellipsis, 40) AS s_content, url, rank FROM page($q) WHERE rowid IN (%LIST%) }] X { foreach k [array names X] { set data($X(parentid),$k) [set X($k)] } } set i1 [expr {$iStart+1}] set i2 [expr {($nRes < $iStart+10) ? $nRes : $iStart+10}] wapp-trim { <table border=0> <p>Search results %html($i1)..%html($i2) of %html($nRes) for: <b>%html($q)</b> } foreach rowid $lRes { foreach a {parentid s_apis s_title1 s_content url rank} { set $a $data($rowid,$a) } if {[info exists subsections($parentid)]} { set childid [lindex $subsections($parentid) 0] set link $data($childid,url) set hdr $data($childid,s_title2) if {$hdr==""} { set s_content "" } else { set s_content [subst { <b><a style=color:#044a64 href=$link>$hdr</a></b> }] } append s_content " $data($childid,s_content)" } wapp-trim {<tr> <td valign=top style="line-height:150%"> <div style="white-space:wrap;font-size:larger" class=nounderline> <a href="%url($url)">%unsafe($s_title1)</a> <div style="float:right;font-size:smaller;color:#BBB">(%url($url))</div> </div> <div style="margin-left: 10ex; font:larger monospace">%unsafe($s_apis)</div> <div style="margin-left: 4ex; margin-bottom:1.5em"> %unsafe($s_content) </div> </td> } } wapp-subst {</table>\n} # If the query returned more than 10 results, add up to 10 links to # each set of 10 results (first link to results 1-10, second to 11-20, # third to 21-30, as required). # if {$nRes>10} { set s(0) {border:solid #044a64 1px;padding:1ex;margin:1ex;line-height:300%;} set s(1) "$s(0);background:#044a64;color:white" wapp-subst {<center><p>\n} for {set i 0} {$i < 10 && ($i*10)<$nRes} {incr i} { set style $s([expr {($iStart/10)==$i}]) wapp-trim { <a style="%html($style)" href="search?q=%qp($q)&i=%qp($i)">%html([expr $i+1])</a> } } wapp-subst {</center>\n} } } # This is the main entry point into the search result page generator # proc wapp-default {} { wapp-content-security-policy off wapp-allow-xorigin-params if {[wapp-param-exists env]} { search_header wapp-trim { <h1>Environment Dump For Debugging</h1> <pre>%html([wapp-debug-env])</pre> } return } # When running using the built-in webserver in Wapp (in other words, # when not running as CGI) any filename that contains a "." loads # directly from the filesystem. if {[string match *//127.0.0.1:* [wapp-param BASE_URL]] && [string match *.* [wapp-param PATH_INFO]] } { set altfile [file dir [wapp-param SCRIPT_FILENAME]][wapp-param PATH_INFO] set fd [open $altfile rb] fconfigure $fd -translation binary wapp-unsafe [read $fd] close $fd switch -glob -- $altfile { *.html { wapp-mimetype text/html } *.css { wapp-mimetype text/css } *.gif { wapp-mimetype image/gif } } } search_header sqlite3 db [file dir [wapp-param SCRIPT_FILENAME]]/search.d/search.db set searchType [wapp-param s d] if {$searchType=="c"} { set cmd searchchanges } else { set cmd searchresults } db transaction { set t [ttime {$cmd}] } wapp-trim { <center> <p>Page generated by <a href='fts5.html'>FTS5</a> in about %html($t). </center> <script> window.addEventListener('load', function() { var w = document.getElementById("searchmenu"); w.style.display = "block"; w = document.getElementById("searchtype"); w.value = "%string($searchType)" setTimeout(function(){ var s = document.getElementById("searchbox"); s.value = "%string([wapp-param q])" s.focus(); s.select(); }, 30); }); </script> } } wapp-start $argv |
Deleted search/search4.tcl.
|
| < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < |
Added search/wapp.tcl.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 | # Copyright (c) 2017 D. Richard Hipp # # This program is free software; you can redistribute it and/or # modify it under the terms of the Simplified BSD License (also # known as the "2-Clause License" or "FreeBSD License".) # # This program is distributed in the hope that it will be useful, # but without any warranty; without even the implied warranty of # merchantability or fitness for a particular purpose. # #--------------------------------------------------------------------------- # # Design rules: # # (1) All identifiers in the global namespace begin with "wapp" # # (2) Indentifiers intended for internal use only begin with "wappInt" # # Add text to the end of the HTTP reply. No interpretation or transformation # of the text is performs. The argument should be enclosed within {...} # proc wapp {txt} { global wapp dict append wapp .reply $txt } # Add text to the page under construction. Do no escaping on the text. # # Though "unsafe" in general, there are uses for this kind of thing. # For example, if you want to return the complete, unmodified content of # a file: # # set fd [open content.html rb] # wapp-unsafe [read $fd] # close $fd # # You could do the same thing using ordinary "wapp" instead of "wapp-unsafe". # The difference is that wapp-safety-check will complain about the misuse # of "wapp", but it assumes that the person who write "wapp-unsafe" understands # the risks. # # Though occasionally necessary, the use of this interface should be minimized. # proc wapp-unsafe {txt} { global wapp dict append wapp .reply $txt } # Add text to the end of the reply under construction. The following # substitutions are made: # # %html(...) Escape text for inclusion in HTML # %url(...) Escape text for use as a URL # %qp(...) Escape text for use as a URI query parameter # %string(...) Escape text for use within a JSON string # %unsafe(...) No transformations of the text # # The %unsafe substitution should be avoided whenever possible, obviously. # In addition to the substitutions above, the text also does backslash # escapes. # proc wapp-subst {txt} { global wapp regsub -all {%(html|url|qp|string|unsafe)\(([^)]+)\)} $txt \ {[wappInt-enc-\1 "\2"]} txt dict append wapp .reply [uplevel 1 [list subst -novariables $txt]] } # There must be a wappInt-enc-NAME routine for each possible substitution # in wapp-subst. Thus there are routines for "html", "url", "qp", and "unsafe". # # wappInt-enc-html Escape text so that it is safe to use in the # body of an HTML document. # # wappInt-enc-url Escape text so that it is safe to pass as an # argument to href= and src= attributes in HTML. # # wappInt-enc-qp Escape text so that it is safe to use as the # value of a query parameter in a URL or in # post data or in a cookie. # # wappInt-enc-string Escape ", ', and \ for using inside of a # javascript string literal. # # wappInt-enc-unsafe Perform no encoding at all. Unsafe. # proc wappInt-enc-html {txt} { return [string map {& & < < > >} $txt] } proc wappInt-enc-unsafe {txt} { return $txt } proc wappInt-enc-url {s} { if {[regsub -all {[^-{}@~?=#_.:/a-zA-Z0-9]} $s {[wappInt-%HHchar {&}]} s]} { set s [subst -novar -noback $s] } if {[regsub -all {[{}]} $s {[wappInt-%HHchar \\&]} s]} { set s [subst -novar -noback $s] } return $s } proc wappInt-enc-qp {s} { if {[regsub -all {[^-{}_.a-zA-Z0-9]} $s {[wappInt-%HHchar {&}]} s]} { set s [subst -novar -noback $s] } if {[regsub -all {[{}]} $s {[wappInt-%HHchar \\&]} s]} { set s [subst -novar -noback $s] } return $s } proc wappInt-enc-string {s} { return [string map {\\ \\\\ \" \\\" ' \\'} $s] } # Works like wapp-subst, but also removes whitespace from the beginning # of lines. # proc wapp-trim {txt} { global wapp regsub -all {\n\s+} [string trim $txt] \n txt regsub -all {%(html|url|qp|string|unsafe)\(([^)]+)\)} $txt \ {[wappInt-enc-\1 "\2"]} txt dict append wapp .reply [uplevel 1 [list subst -novariables $txt]] } # This is a helper routine for wappInt-enc-url and wappInt-enc-qp. It returns # an appropriate %HH encoding for the single character c. If c is a unicode # character, then this routine might return multiple bytes: %HH%HH%HH # proc wappInt-%HHchar {c} { if {$c==" "} {return +} return [regsub -all .. [binary encode hex [encoding convertto utf-8 $c]] {%&}] } # Undo the www-url-encoded format. # # HT: This code stolen from ncgi.tcl # proc wappInt-decode-url {str} { set str [string map [list + { } "\\" "\\\\" \[ \\\[ \] \\\]] $str] regsub -all -- \ {%([Ee][A-Fa-f0-9])%([89ABab][A-Fa-f0-9])%([89ABab][A-Fa-f0-9])} \ $str {[encoding convertfrom utf-8 [binary decode hex \1\2\3]]} str regsub -all -- \ {%([CDcd][A-Fa-f0-9])%([89ABab][A-Fa-f0-9])} \ $str {[encoding convertfrom utf-8 [binary decode hex \1\2]]} str regsub -all -- {%([0-7][A-Fa-f0-9])} $str {\\u00\1} str return [subst -novar $str] } # Reset the document back to an empty string. # proc wapp-reset {} { global wapp dict set wapp .reply {} } # Change the mime-type of the result document. # proc wapp-mimetype {x} { global wapp dict set wapp .mimetype $x } # Change the reply code. # proc wapp-reply-code {x} { global wapp dict set wapp .reply-code $x } # Set a cookie # proc wapp-set-cookie {name value} { global wapp dict lappend wapp .new-cookies $name $value } # Unset a cookie # proc wapp-clear-cookie {name} { wapp-set-cookie $name {} } # Add extra entries to the reply header # proc wapp-reply-extra {name value} { global wapp dict lappend wapp .reply-extra $name $value } # Specifies how the web-page under construction should be cached. # The argument should be one of: # # no-cache # max-age=N (for some integer number of seconds, N) # private,max-age=N # proc wapp-cache-control {x} { wapp-reply-extra Cache-Control $x } # Redirect to a different web page # proc wapp-redirect {uri} { wapp-reply-code {307 Redirect} wapp-reply-extra Location $uri } # Return the value of a wapp parameter # proc wapp-param {name {dflt {}}} { global wapp if {![dict exists $wapp $name]} {return $dflt} return [dict get $wapp $name] } # Return true if a and only if the wapp parameter $name exists # proc wapp-param-exists {name} { global wapp return [dict exists $wapp $name] } # Set the value of a wapp parameter # proc wapp-set-param {name value} { global wapp dict set wapp $name $value } # Return all parameter names that match the GLOB pattern, or all # names if the GLOB pattern is omitted. # proc wapp-param-list {{glob {*}}} { global wapp return [dict keys $wapp $glob] } # By default, Wapp does not decode query parameters and POST parameters # for cross-origin requests. This is a security restriction, designed to # help prevent cross-site request forgery (CSRF) attacks. # # As a consequence of this restriction, URLs for sites generated by Wapp # that contain query parameters will not work as URLs found in other # websites. You cannot create a link from a second website into a Wapp # website if the link contains query planner, by default. # # Of course, it is sometimes desirable to allow query parameters on external # links. For URLs for which this is safe, the application should invoke # wapp-allow-xorigin-params. This procedure tells Wapp that it is safe to # go ahead and decode the query parameters even for cross-site requests. # # In other words, for Wapp security is the default setting. Individual pages # need to actively disable the cross-site request security if those pages # are safe for cross-site access. # proc wapp-allow-xorigin-params {} { global wapp if {![dict exists $wapp .qp] && ![dict get $wapp SAME_ORIGIN]} { wappInt-decode-query-params } } # Set the content-security-policy. # # The default content-security-policy is very strict: "default-src 'self'" # The default policy prohibits the use of in-line javascript or CSS. # # Provide an alternative CSP as the argument. Or use "off" to disable # the CSP completely. # proc wapp-content-security-policy {val} { global wapp if {$val=="off"} { dict unset wapp .csp } else { dict set wapp .csp $val } } # Examine the bodys of all procedures in this program looking for # unsafe calls to various Wapp interfaces. Return a text string # containing warnings. Return an empty string if all is ok. # # This routine is advisory only. It misses some constructs that are # dangerous and flags others that are safe. # proc wapp-safety-check {} { set res {} foreach p [info procs] { set ln 0 foreach x [split [info body $p] \n] { incr ln if {[regexp {^[ \t]*wapp[ \t]+([^\n]+)} $x all tail] && [string index $tail 0]!="\173" && [regexp {[[$]} $tail] } { append res "$p:$ln: unsafe \"wapp\" call: \"[string trim $x]\"\n" } if {[regexp {^[ \t]*wapp-(subst|trim)[ \t]+[^\173]} $x all cx]} { append res "$p:$ln: unsafe \"wapp-$cx\" call: \"[string trim $x]\"\n" } } } return $res } # Return a string that descripts the current environment. Applications # might find this useful for debugging. # proc wapp-debug-env {} { global wapp set out {} foreach var [lsort [dict keys $wapp]] { if {[string index $var 0]=="."} continue append out "$var = [list [dict get $wapp $var]]\n" } return $out } # Start up the wapp framework. Parameters are a list passed as the # single argument. # # -server $PORT Listen for HTTP requests on this TCP port $PORT # # -scgi $PORT Listen for SCGI requests on TCP port $PORT # # -cgi Handle a single CGI request # # With no arguments, the behavior is called "auto". In "auto" mode, # if the GATEWAY_INTERFACE environment variable indicates CGI, then run # as CGI. Otherwise, start an HTTP server bound to the loopback address # only, on an arbitrary TCP port, and automatically launch a web browser # on that TCP port. # # Additional options: # # -trace "puts" each request URL as it is handled, for # debugging # # -lint Run wapp-safety-check on the application instead # of running the application itself # # -Dvar=value Set TCL global variable "var" to "value" # # proc wapp-start {arglist} { global env set mode auto set port 0 set n [llength $arglist] for {set i 0} {$i<$n} {incr i} { set term [lindex $arglist $i] if {[string match --* $term]} {set term [string range $term 1 end]} switch -glob -- $term { -server { incr i; set mode "server" set port [lindex $arglist $i] } -scgi { incr i; set mode "scgi" set port [lindex $arglist $i] } -cgi { set mode "cgi" } -trace { proc wappInt-trace {} { set q [wapp-param QUERY_STRING] set uri [wapp-param BASE_URL][wapp-param PATH_INFO] if {$q!=""} {append uri ?$q} puts $uri } } -lint { set res [wapp-safety-check] if {$res!=""} { puts "Potential problems in this code:" puts $res exit 1 } else { exit } } -D*=* { if {[regexp {^.D([^=]+)=(.*)$} $term all var val]} { set ::$var $val } } default { error "unknown option: $term" } } } if {($mode=="auto" && [info exists env(GATEWAY_INTERFACE)] && $env(GATEWAY_INTERFACE)=="CGI/1.0") || $mode=="cgi" } { wappInt-handle-cgi-request return } if {$mode=="scgi"} { wappInt-start-listener $port 1 0 1 } elseif {$mode=="server"} { wappInt-start-listener $port 0 0 0 } else { wappInt-start-listener $port 1 1 0 } vwait ::forever } # Tracing function for each HTTP request. This is overridden by wapp-start # if tracing is enabled. # proc wappInt-trace {} {} # Start up a listening socket. Arrange to invoke wappInt-new-connection # for each inbound HTTP connection. # # localonly - If true, listen on 127.0.0.1 only # # browser - If true, launch a web browser pointing to the new server # proc wappInt-start-listener {port localonly browser scgi} { if {$scgi} { set type SCGI set server [list wappInt-new-connection wappInt-scgi-readable] } else { set type HTTP set server [list wappInt-new-connection wappInt-http-readable] } if {$localonly} { set x [socket -server $server -myaddr 127.0.0.1 $port] } else { set x [socket -server $server $port] } set coninfo [chan configure $x -sockname] set port [lindex $coninfo 2] if {$browser} { wappInt-start-browser http://127.0.0.1:$port/ } else { puts "Listening for $type requests on TCP port $port" } } # Start a web-browser and point it at $URL # proc wappInt-start-browser {url} { global tcl_platform if {$tcl_platform(platform)=="windows"} { exec cmd /c start $url & } elseif {$tcl_platform(os)=="Darwin"} { exec open $url & } elseif {[catch {exec xdg-open $url}]} { exec firefox $url & } } # Accept a new inbound HTTP request # proc wappInt-new-connection {callback chan ip port} { upvar #0 wappInt-$chan W set W [dict create REMOTE_ADDR $ip REMOTE_PORT $port .header {}] fconfigure $chan -blocking 0 -translation binary fileevent $chan readable [list $callback $chan] } # Close an input channel # proc wappInt-close-channel {chan} { if {$chan=="stdout"} { # This happens after completing a CGI request exit 0 } else { unset ::wappInt-$chan close $chan } } # Process new text received on an inbound HTTP request # proc wappInt-http-readable {chan} { if {[catch [list wappInt-http-readable-unsafe $chan] msg]} { puts stderr "$msg\n$::errorInfo" wappInt-close-channel $chan } } proc wappInt-http-readable-unsafe {chan} { upvar #0 wappInt-$chan W wapp wapp if {![dict exists $W .toread]} { # If the .toread key is not set, that means we are still reading # the header set line [string trimright [gets $chan]] set n [string length $line] if {$n>0} { if {[dict get $W .header]=="" || [regexp {^\s+} $line]} { dict append W .header $line } else { dict append W .header \n$line } if {[string length [dict get $W .header]]>100000} { error "HTTP request header too big - possible DOS attack" } } elseif {$n==0} { # We have reached the blank line that terminates the header. global argv0 dict set W SCRIPT_FILENAME $argv0 dict set W DOCUMENT_ROOT [file dir $argv0] if {[wappInt-parse-header $chan]} { catch {close $chan} return } set len 0 if {[dict exists $W CONTENT_LENGTH]} { set len [dict get $W CONTENT_LENGTH] } if {$len>0} { # Still need to read the query content dict set W .toread $len } else { # There is no query content, so handle the request immediately set wapp $W wappInt-handle-request $chan 0 } } } else { # If .toread is set, that means we are reading the query content. # Continue reading until .toread reaches zero. set got [read $chan [dict get $W .toread]] dict append W CONTENT $got dict set W .toread [expr {[dict get $W .toread]-[string length $got]}] if {[dict get $W .toread]<=0} { # Handle the request as soon as all the query content is received set wapp $W wappInt-handle-request $chan 0 } } } # Decode the HTTP request header. # # This routine is always running inside of a [catch], so if # any problems arise, simply raise an error. # proc wappInt-parse-header {chan} { upvar #0 wappInt-$chan W set hdr [split [dict get $W .header] \n] if {$hdr==""} {return 1} set req [lindex $hdr 0] dict set W REQUEST_METHOD [set method [lindex $req 0]] if {[lsearch {GET HEAD POST} $method]<0} { error "unsupported request method: \"[dict get $W REQUEST_METHOD]\"" } set uri [lindex $req 1] set split_uri [split $uri ?] set uri0 [lindex $split_uri 0] if {![regexp {^/[-.a-z0-9_/]*$} $uri0]} { error "invalid request uri: \"$uri0\"" } dict set W REQUEST_URI $uri0 dict set W PATH_INFO $uri0 set uri1 [lindex $split_uri 1] dict set W QUERY_STRING $uri1 set n [llength $hdr] for {set i 1} {$i<$n} {incr i} { set x [lindex $hdr $i] if {![regexp {^(.+): +(.*)$} $x all name value]} { error "invalid header line: \"$x\"" } set name [string toupper $name] switch -- $name { REFERER {set name HTTP_REFERER} USER-AGENT {set name HTTP_USER_AGENT} CONTENT-LENGTH {set name CONTENT_LENGTH} CONTENT-TYPE {set name CONTENT_TYPE} HOST {set name HTTP_HOST} COOKIE {set name HTTP_COOKIE} ACCEPT-ENCODING {set name HTTP_ACCEPT_ENCODING} default {set name .hdr:$name} } dict set W $name $value } return 0 } # Decode the QUERY_STRING parameters from a GET request or the # application/x-www-form-urlencoded CONTENT from a POST request. # # This routine sets the ".qp" element of the ::wapp dict as a signal # that query parameters have already been decoded. # proc wappInt-decode-query-params {} { global wapp dict set wapp .qp 1 if {[dict exists $wapp QUERY_STRING]} { foreach qterm [split [dict get $wapp QUERY_STRING] &] { set qsplit [split $qterm =] set nm [lindex $qsplit 0] if {[regexp {^[a-z][a-z0-9]*$} $nm]} { dict set wapp $nm [wappInt-decode-url [lindex $qsplit 1]] } } } if {[dict exists $wapp CONTENT_TYPE] && [dict get $wapp CONTENT_TYPE]=="application/x-www-form-urlencoded" && [dict exists $wapp CONTENT] } { foreach qterm [split [string trim [dict get $wapp CONTENT]] &] { set qsplit [split $qterm =] set nm [lindex $qsplit 0] if {[regexp {^[a-z][-a-z0-9_]*$} $nm]} { dict set wapp $nm [wappInt-decode-url [lindex $qsplit 1]] } } } # To-Do: Perhaps add support for multipart/form-data decoding. # Alternatively, perhaps multipart/form-data decoding can be done # by application code using a separate helper function, like # "wapp_decode_multipart_formdata" or somesuch. } # Invoke application-supplied methods to generate a reply to # a single HTTP request. # # This routine always runs within [catch], so handle exceptions by # invoking [error]. # proc wappInt-handle-request {chan useCgi} { global wapp dict set wapp .reply {} dict set wapp .mimetype {text/html; charset=utf-8} dict set wapp .reply-code {200 Ok} dict set wapp .csp {default-src 'self'} # Set up additional CGI environment values # if {![dict exists $wapp HTTP_HOST]} { dict set wapp BASE_URL {} } elseif {[dict exists $wapp HTTPS]} { dict set wapp BASE_URL https://[dict get $wapp HTTP_HOST] } else { dict set wapp BASE_URL http://[dict get $wapp HTTP_HOST] } if {![dict exists $wapp REQUEST_URI]} { dict set wapp REQUEST_URI / } elseif {[regsub {\?.*} [dict get $wapp REQUEST_URI] {} newR]} { # Some servers (ex: nginx) append the query parameters to REQUEST_URI. # These need to be stripped off dict set wapp REQUEST_URI $newR } if {[dict exists $wapp SCRIPT_NAME]} { dict append wapp BASE_URL [dict get $wapp SCRIPT_NAME] } else { dict set wapp SCRIPT_NAME {} } if {![dict exists $wapp PATH_INFO]} { # If PATH_INFO is missing (ex: nginx) the construct it set URI [dict get $wapp REQUEST_URI] set skip [string length [dict get $wapp SCRIPT_NAME]] dict set wapp PATH_INFO [string range $URI $skip end] } if {[regexp {^/([^/]+)(.*)$} [dict get $wapp PATH_INFO] all head tail]} { dict set wapp PATH_HEAD $head dict set wapp PATH_TAIL [string trimleft $tail /] } else { dict set wapp PATH_INFO {} dict set wapp PATH_HEAD {} dict set wapp PATH_TAIL {} } dict set wapp SELF_URL [dict get $wapp BASE_URL]/[dict get $wapp PATH_HEAD] # Parse query parameters from the query string, the cookies, and # POST data # if {[dict exists $wapp HTTP_COOKIE]} { foreach qterm [split [dict get $wapp HTTP_COOKIE] {;}] { set qsplit [split [string trim $qterm] =] set nm [lindex $qsplit 0] if {[regexp {^[a-z][-a-z0-9_]*$} $nm]} { dict set wapp $nm [wappInt-decode-url [lindex $qsplit 1]] } } } if {[dict exists $wapp HTTP_REFERER] && [string match [dict get $wapp BASE_URL]/* [dict get $wapp HTTP_REFERER]] } { set same_origin 1 } else { set same_origin 0 } dict set wapp SAME_ORIGIN $same_origin if {$same_origin} { wappInt-decode-query-params } # Invoke the application-defined handler procedure for this page # request. If an error occurs while running that procedure, generate # an HTTP reply that contains the error message. # wapp-before-dispatch-hook wappInt-trace set mname [dict get $wapp PATH_HEAD] if {[catch { if {$mname!="" && [llength [info proc wapp-page-$mname]]>0} { wapp-page-$mname } else { wapp-default } } msg]} { wapp-reset wapp-reply-code "500 Internal Server Error" wapp-mimetype text/html wapp-trim { <h1>Wapp Application Error</h1> <pre>%html($::errorInfo)</pre> } dict unset wapp .new-cookies } # Transmit the HTTP reply # if {$chan=="stdout"} { puts $chan "Status: [dict get $wapp .reply-code]\r" } else { puts $chan "HTTP/1.1 [dict get $wapp .reply-code]\r" puts $chan "Server: wapp\r" puts $chan "Connection: close\r" } if {[dict exists $wapp .reply-extra]} { foreach {name value} [dict get $wapp .reply-extra] { puts $chan "$name: $value\r" } } if {[dict exists $wapp .csp]} { puts $chan "Content-Security-Policy: [dict get $wapp .csp]\r" } set mimetype [dict get $wapp .mimetype] puts $chan "Content-Type: $mimetype\r" if {[dict exists $wapp .new-cookies]} { foreach {nm val} [dict get $wapp .new-cookies] { if {[regexp {^[a-z][-a-z0-9_]*$} $nm]} { if {$val==""} { puts $chan "Set-Cookie: $nm=; HttpOnly; Path=/; Max-Age=1\r" } else { set val [wappInt-enc-url $val] puts $chan "Set-Cookie: $nm=$val; HttpOnly; Path=/\r" } } } } if {[string match text/* $mimetype]} { set reply [encoding convertto utf-8 [dict get $wapp .reply]] if {[regexp {\ygzip\y} [wapp-param HTTP_ACCEPT_ENCODING]]} { catch { set x [zlib gzip $reply] set reply $x puts $chan "Content-Encoding: gzip\r" } } } else { set reply [dict get $wapp .reply] } puts $chan "Content-Length: [string length $reply]\r" puts $chan \r puts $chan $reply flush $chan wappInt-close-channel $chan } # This routine runs just prior to request-handler dispatch. The # default implementation is a no-op, but applications can override # to do additional transformations or checks. # proc wapp-before-dispatch-hook {} {return} # Process a single CGI request # proc wappInt-handle-cgi-request {} { global wapp env foreach key { CONTENT_LENGTH CONTENT_TYPE DOCUMENT_ROOT HTTP_ACCEPT_ENCODING HTTP_COOKIE HTTP_HOST HTTP_REFERER HTTP_USER_AGENT HTTPS PATH_INFO QUERY_STRING REMOTE_ADDR REQUEST_METHOD REQUEST_URI REMOTE_USER SCRIPT_FILENAME SCRIPT_NAME SERVER_NAME SERVER_PORT SERVER_PROTOCOL } { if {[info exists env($key)]} { dict set wapp $key $env($key) } } set len 0 if {[dict exists $wapp CONTENT_LENGTH]} { set len [dict get $wapp CONTENT_LENGTH] } if {$len>0} { fconfigure stdin -translation binary dict set wapp CONTENT [read stdin $len] } fconfigure stdout -translation binary wappInt-handle-request stdout 1 } # Process new text received on an inbound SCGI request # proc wappInt-scgi-readable {chan} { if {[catch [list wappInt-scgi-readable-unsafe $chan] msg]} { puts stderr "$msg\n$::errorInfo" wappInt-close-channel $chan } } proc wappInt-scgi-readable-unsafe {chan} { upvar #0 wappInt-$chan W wapp wapp if {![dict exists $W .toread]} { # If the .toread key is not set, that means we are still reading # the header. # # An SGI header is short. This implementation assumes the entire # header is available all at once. # set req [read $chan 15] set n [string length $req] scan $req %d:%s len hdr incr len [string length "$len:,"] append hdr [read $chan [expr {$len-15}]] foreach {nm val} [split $hdr \000] { if {$nm==","} break dict set W $nm $val } set len 0 if {[dict exists $W CONTENT_LENGTH]} { set len [dict get $W CONTENT_LENGTH] } if {$len>0} { # Still need to read the query content dict set W .toread $len } else { # There is no query content, so handle the request immediately set wapp $W wappInt-handle-request $chan 0 } } else { # If .toread is set, that means we are reading the query content. # Continue reading until .toread reaches zero. set got [read $chan [dict get $W .toread]] dict append W CONTENT $got dict set W .toread [expr {[dict get $W .toread]-[string length $got]}] if {[dict get $W .toread]<=0} { # Handle the request as soon as all the query content is received set wapp $W wappInt-handle-request $chan 0 } } } # Call this version 1.0 package provide wapp 1.0 |