Index: main.mk ================================================================== --- main.mk +++ main.mk @@ -155,19 +155,24 @@ SFLAGS = $(TCLINC) -DSQLITE_THREADSAFE=0 -DSQLITE_ENABLE_FTS5 -DSQLITE_TCLMD5 -DTCLSH -Dmain=xmain $(TCLSH): $(SSRC) $(CC) -O2 -o $@ -I. $(SFLAGS) $(SSRC) $(TCLFLAGS) -searchdb: $(TCLSH) - mkdir -p doc/search.d/ - ./$(TCLSH) $(DOC)/search/buildsearchdb.tcl - cp $(DOC)/document_header.tcl doc/document_header.tcl - cp $(DOC)/document_header.tcl doc/search.d/document_header.tcl - cp $(DOC)/search/search.tcl doc/search +doc/search: $(TCLSH) $(DOC)/search/mkscript.tcl $(DOC)/search/search.tcl.in $(DOC)/search/wapp.tcl $(DOC)/document_header.tcl + ./$(TCLSH) $(DOC)/search/mkscript.tcl $(DOC)/search/search.tcl.in >doc/search chmod +x doc/search - cp $(DOC)/search/search.tcl doc/search.d/admin + +doc/search.d/admin: $(TCLSH) $(DOC)/search/mkscript.tcl $(DOC)/search/admin.tcl.in $(DOC)/search/wapp.tcl $(DOC)/document_header.tcl + mkdir -p doc/search.d/ + ./$(TCLSH) $(DOC)/search/mkscript.tcl $(DOC)/search/admin.tcl.in >doc/search.d/admin chmod +x doc/search.d/admin + +searchdb: $(TCLSH) doc/search doc/search.d/admin + ./$(TCLSH) $(DOC)/search/buildsearchdb.tcl + +# cp $(DOC)/search/search.tcl doc/search.d/admin +# chmod +x doc/search.d/admin fts5ext.so: $(DOC)/search/fts5ext.c gcc -shared -fPIC -I. -DSQLITE_EXT \ $(DOC)/search/fts5ext.c -o fts5ext.so Index: pages/about.in ================================================================== --- pages/about.in +++ pages/about.in @@ -54,19 +54,15 @@

SQLite is a compact library. With all features enabled, the [library size] can be less than 500KiB, depending on the target platform and compiler optimization settings. (64-bit code is larger. And some compiler optimizations such as aggressive function inlining and loop unrolling can cause the -object code to be much larger.) If optional features are omitted, the -size of the SQLite library can be reduced below 300KiB. SQLite can also -be made to run in minimal stack space (4KiB) and -very little heap (100KiB), making SQLite a popular database engine -choice on memory constrained gadgets such as cellphones, PDAs, and MP3 players. +object code to be much larger.) There is a tradeoff between memory usage and speed. SQLite generally runs faster the more memory you give it. Nevertheless, performance is usually quite good even -in low-memory environments. Depending on how it is used, SQLite be +in low-memory environments. Depending on how it is used, SQLite can be [faster than the filesystem|faster than direct filesystem I/O].

SQLite is very carefully tested prior to every release and has a reputation for being very reliable. ADDED pages/assert.in Index: pages/assert.in ================================================================== --- /dev/null +++ pages/assert.in @@ -0,0 +1,129 @@ +The Use Of assert() In SQLite + + + +

Work In Progress - Do Not Link!

+

+This document is a work-in-progress and is not yet ready for release. +Please do not link to it, yet. +

+ +

Assert() And Other Assert()-like Macros In SQLite

+ +

+The assert(X) macro is +[https://en.wikipedia.org/wiki/Assert.h|part of standard C], in the the +<assert.h> header file. +SQLite adds three other assert()-like macros named NEVER(X), ALWAYS(X), +and testcase(X). + +

+ +

+SQLite version 3.22.0 ([dateof:3.22.0]) contains 5290 assert() macros, +839 testcase() macros, 88 ALWAYS() macros, and 63 NEVER() macros. + +

Philosophy of assert()

+ +

In SQLite, the presence of assert(X) means that the developers have +a proof that X is always true. Readers can depend upon X being true to +help them reason about the code. An assert(X) is a strong statement +about the truth of X. There is no doubt. + +

The ALWAYS(X) and NEVER(X) macros are a weaker statement about the +truth of X. The presence of ALWAYS(X) or NEVER(X) means that the developers +believe X is always or never true, but there is no proof, or the proof +is complex and error-prone, or the proof depends on other aspects +of the system that seem likely to change. + +

In other systems, developers sometimes use assert(X) in a way that is +similar to the use of ALWAYS(X) in SQLite. Developers will add an +assert(X) as a +[https://blog.regehr.org/archives/1576|tacit acknowledgement that they +do not fully believe that X is always true]. +We believe that this use of assert(X) is wrong and violates the intent +and purpose of having assert(X) available in C in the first place. +An assert(X) should not be seen as a safety-net or top-rope used to +guard against mistakes. Nor is assert(X) appropriate for defense-in-depth. +An ALWAYS(X) macro, or something similar, should be used in those cases +because ALWAYS(X) is followed by code to actually deal with the problem. + +

The [https://golang.org|Go programming language] omits assert(). +The Go developers +[https://golang.org/doc/faq#assertions|recognize this is contentious]. +Disallowing assert() is essentially telling developers that they are +not allowed to expression invariants. It is as if the developers of +Go do not want coders to prove that the software is correct. +The SQLite developers believe that the lack of assert() disqualifies +Go as a language for serious development work. + +

Different Behaviors According To Build Type

+ +

Three separate builds are used to validate the SQLite software. +A functionality testing build is used to validate the source code. +A coverage testing build is used to validate the test suite, to confirm +that the test suite provides 100% MC/DC. The release build is used +to validate the machine code. +All tests must give the same answer in all three +builds. See the [testing|"How SQLite Is Tested"] document for more detail. + +

The assert() macros behave differently according to how SQLite is built. + +

+ + + + + + +
Functionality TestingCoverage TestingRelease
assert(X) +abort() if X is false +no-op +no-op +
ALWAYS(X) +abort() if X is false +always true +pass through the value X +
NEVER(X) +abort() if X is true +always false +pass through the value X +
testcase(X) +no-op +do some harmless work if X is true +no-op +
+
+ +

The default behavior of assert(X) in standard C is that it is enabled +for release builds. We find this acceptable in general. However, the +SQLite code base has many assert() statements in performance-sensitive +areas of the code. Leaving assert(X) turned causes SQLite to run about +three times slower. Also, SQLite strives to provide 100% MC/DC in an +as-delivered configuration, which is obviously impossible if assert(X) +statements are enabled. For these reasons, assert(X) is a no-op for +release builds in SQLite. ADDED pages/codeofconduct.in Index: pages/codeofconduct.in ================================================================== --- /dev/null +++ pages/codeofconduct.in @@ -0,0 +1,124 @@ +Code Of Conduct + + + +

Overview

+ +

Having been encouraged by clients to adopt a written +code of conduct, the SQLite developers elected to govern their +interactions with each other, with their clients, +and with the larger SQLite user community in +accordance with the "instruments of good works" from chapter 4 of +[https://en.wikipedia.org/wiki/Rule_of_Saint_Benedict|The Rule of St. Benedict]. +This code of conduct has proven its mettle in thousands of diverse +communities for over 1,500 years, and has served as a baseline for many +civil law codes since the time of Charlemagne. + +

+This rule is strict, and none are able to comply perfectly. +Grace is readily granted for minor transgressions. +All are encouraged to follow this rule closely, as in so +doing they may expect to live happier, healthier, and more +productive lives. The entire rule is good and wholesome, and +yet we make no enforcement of the more introspective aspects. + +

+Everyone is free to use the SQLite source code, object code, +and/or documentation regardless of their opinion of and adherence +to this rule. +SQLite has been and continues to be completely free to everyone, +without precondition. + +

+However, those who wish to participate in the SQLite community, +either by commenting on the public mailing lists or by contributing +patches or suggestions or in any other way, +are expected to present themselves in a manner +that honors the overarching spirit of the rule, even if they +disagree with specific details. +Polite and professional discussion is always welcomed, from anyone. + +

The Rule

+ +
    +
  1. First of all, love the Lord God with your whole heart, + your whole soul, and your whole strength. +
  2. Then, love your neighbor as yourself. +
  3. Do not murder. +
  4. Do not commit adultery. +
  5. Do not steal. +
  6. Do not covet. +
  7. Do not bear false witness. +
  8. Honor all. +
  9. Do not do to another what you would not have done to yourself. +
  10. Deny oneself in order to follow Christ. +
  11. Chastise the body. +
  12. Do not become attached to pleasures. +
  13. Love fasting. +
  14. Relieve the poor. +
  15. Clothe the naked. +
  16. Visit the sick. +
  17. Bury the dead. +
  18. Be a help in times of trouble. +
  19. Console the sorrowing. +
  20. Be a stranger to the world's ways. +
  21. Prefer nothing more than the love of Christ. +
  22. Do not give way to anger. +
  23. Do not nurse a grudge. +
  24. Do not entertain deceit in your heart. +
  25. Do not give a false peace. +
  26. Do not forsake charity. +
  27. Do not swear, for fear of perjuring yourself. +
  28. Utter only truth from heart and mouth. +
  29. Do not return evil for evil. +
  30. Do no wrong to anyone, and bear patiently wrongs done to yourself. +
  31. Love your enemies. +
  32. Do not curse those who curse you, but rather bless them. +
  33. Bear persecution for justice's sake. +
  34. Be not proud. +
  35. Be not addicted to wine. +
  36. Be not a great eater. +
  37. Be not drowsy. +
  38. Be not lazy. +
  39. Be not a grumbler. +
  40. Be not a detractor. +
  41. Put your hope in God. +
  42. Attribute to God, and not to self, whatever good you see in yourself. +
  43. Recognize always that evil is your own doing, + and to impute it to yourself. +
  44. Fear the Day of Judgment. +
  45. Be in dread of hell. +
  46. Desire eternal life with all the passion of the spirit. +
  47. Keep death daily before your eyes. +
  48. Keep constant guard over the actions of your life. +
  49. Know for certain that God sees you everywhere. +
  50. When wrongful thoughts come into your heart, dash them against + Christ immediately. +
  51. Disclose wrongful thoughts to your spiritual mentor. +
  52. Guard your tongue against evil and depraved speech. +
  53. Do not love much talking. +
  54. Speak no useless words or words that move to laughter. +
  55. Do not love much or boisterous laughter. +
  56. Listen willingly to holy reading. +
  57. Devote yourself frequently to prayer. +
  58. Daily in your prayers, with tears and sighs, confess your + past sins to God, and amend them for the future. +
  59. Fulfill not the desires of the flesh; hate your own will. +
  60. Obey in all things the commands of those whom God has placed + in authority over you even though they (which God forbid) should + act otherwise, mindful of the Lord's precept, "Do what they say, + but not what they do." +
  61. Do not wish to be called holy before one is holy; but first to be + holy, that you may be truly so called. +
  62. Fulfill God's commandments daily in your deeds. +
  63. Love chastity. +
  64. Hate no one. +
  65. Be not jealous, nor harbor envy. +
  66. Do not love quarreling. +
  67. Shun arrogance. +
  68. Respect your seniors. +
  69. Love your juniors. +
  70. Pray for your enemies in the love of Christ. +
  71. Make peace with your adversary before the sun sets. +
  72. Never despair of God's mercy. +
Index: pages/copyright.in ================================================================== --- pages/copyright.in +++ pages/copyright.in @@ -43,27 +43,56 @@ author, and all of those authors have public domain dedications on file. So the SQLite code base is clean and is uncontaminated with licensed code from other projects.

+hd_fragment notopencontrib {not open-contributin} +

Open-Source, not Open-Contribution

+ +

+SQLite is open-source, meaning that you can make as many copies of it as +you want and do whatever you want with those copies, without limitation. +But SQLite is not open-contribution. The project does not accept patches. +Only 27 individuals have ever contributed any code to SQLite, and of those +only 16 still have traces in the latest release. +Only 3 developers have contributed +non-comment changes within the previous five years and 96.4% of the latest +release code was written by just two people. +(The statistics in this paragraph were gathered on 2018-02-05.) + +

+Furthermore, all of the code in SQLite is original, having been written +specifically for use by SQLite. No code has been copied from unknown +sources on the internet. + +

+Many people associated "open-source" software with software that has +grown organically through contributions from countless individuals. +And, indeed, there is some open-source software that works that way. +But not SQLite. SQLite uses the the +[https://en.wikipedia.org/wiki/The_Cathedral_and_the_Bazaar|cathedral development philosopy] +not the bazaar approach. All of the SQLite code has been written +by people who are well known to each other. + +hd_fragment warrantyoftitle {Warranty of Title} +

-

Obtaining A License To Use SQLite

+

Warranty of Title

-Even though SQLite is in the public domain and does not require -a license, some users want to obtain a license anyway. Some reasons -for obtaining a license include: +SQLite is in the public domain and does not require a license. +Even so, some organizations want legal proof of their right to use +SQLite. Circumstances where this occurs include the following:

-If you feel like you really need to purchase a license for SQLite, +If any of the above circumstances apply to you, Hwaci, the company that employs all the developers of SQLite, will sell you -one. -All proceeds from the sale of SQLite licenses are used to fund +a Warranty of Title for SQLite. +A Warranty of Title is a legal document that asserts that the claimed +authors of SQLite are the true authors, and that the authors +have the legal right to dedicate the SQLite to the public domain, and +that Hwaci will vigorously defend against challenges to those claims. +All proceeds from the sale of SQLite Warranties of Title are used to fund continuing improvement and support of SQLite.

Contributed Code

In order to keep SQLite completely free and unencumbered by copyright, -all new contributors to the SQLite code base are asked to dedicate -their contributions to the public domain. -If you want to send a patch or enhancement for possible inclusion in the -SQLite source tree, please accompany the patch with the following statement: -

- -
-The author or authors of this code dedicate any and all copyright interest -in this code to the public domain. We make this dedication for the benefit -of the public at large and to the detriment of our heirs and successors. -We intend this dedication to be an overt act of relinquishment in -perpetuity of all present and future rights to this code under copyright law. -
- -

-We are not able to accept patches or changes to -SQLite that are not accompanied by a statement such as the above. -In addition, if you make -changes or enhancements as an employee, then a simple statement such as the -above is insufficient. You must also send by surface mail a copyright release -signed by a company officer. -A signed original of the copyright release should be mailed to:

- -
-Hwaci
-6200 Maple Cove Lane
-Charlotte, NC 28269
-USA -
- -

-A template copyright release is available -in PDF or -HTML. -You can use this release to make future changes. -

+the project does not accept patches. If you would like to make a +suggested change, and include a patch as a proof-of-concept, that would +be great. However please do not be offended if we rewrite your patch +from scratch. Index: pages/howtocorrupt.in ================================================================== --- pages/howtocorrupt.in +++ pages/howtocorrupt.in @@ -473,11 +473,11 @@

This problem was discovered during internal testing and has never been observed in the wild. The problem was fixed on 2011-01-27 and in version 3.7.5.

-

I/O while obtaining a lock leads to corruption

+

I/O error while obtaining a lock leads to corruption

If the operating system returns an I/O error while attempting to obtain a certain lock on shared memory in [WAL | WAL mode] then SQLite might fail to reset its cache, which could lead to database corruption if subsequent writes are attempted.

@@ -530,10 +530,10 @@ that database at the same time, then the race condition might cause one of those processes to get a false indication that the recovery has already completed, allowing that process to continue using the database file without running recovery first. If that process writes to the file, then the file might go corrupt. This race condition -had apparently existing in all prior versions of SQLite for Windows going +had apparently existed in all prior versions of SQLite for Windows going back to 2004. But the race was very tight. Practically speaking, you need a fast multi-core machine in which you launch two processes to run recovery at the same moment on two separate cores. This defect was on Windows systems only and did not affect the posix OS interface.

Index: pages/optoverview.in ================================================================== --- pages/optoverview.in +++ pages/optoverview.in @@ -1074,11 +1074,11 @@
  • (Obsolete. Query flattening is no longer attempted for aggregate subqueries.)
  • - ^If the subquery is not the right operand of a LEFT JOIN then + ^If the subquery is the right operand of a LEFT JOIN then
    1. the subquery may not be a join, and
    2. the FROM clause of the subquery may not contain a virtual table, and
    3. the outer query may not be an aggregate.
  • Index: pages/pragma.in ================================================================== --- pages/pragma.in +++ pages/pragma.in @@ -1202,14 +1202,10 @@ consistent with or without the extra syncs provided by synchronous=FULL. If durability is not a concern, then synchronous=NORMAL is normally all one needs in WAL mode.

    -

    ^The default setting is usually synchronous=FULL. - The [SQLITE_EXTRA_DURABLE] compile-time option changes the - default to synchronous=EXTRA.

    -

    The TEMP schema always has synchronous=OFF since the content of of TEMP is ephemeral and is not expected to survive a power outage. Attempts to change the synchronous setting for TEMP are silently ignored. Index: pages/th3.in ================================================================== --- pages/th3.in +++ pages/th3.in @@ -312,11 +312,11 @@ 0 failures on 35 th3makes and 171555634 tests in (05:08:31) 3 cores on bella SQLite 3.14.1 2016-08-11 13:08:14 34aed3a318a413fd180604365546c1f530d1c60c

    As can be seen above, a single run -of multitest.tcl invokes th3make dozens or times and takes between 12 and 24 +of multitest.tcl invokes th3make dozens of times and takes between 12 and 24 CPU hours. The middle section of the output shows the arguments to each individual th3make run and the result and elapse time for that th3make. All build products and output for the separate th3make runs are captures in subdirectories for post-test analysis. The two-line summary at the bottom shows the total number of errors and tests @@ -374,12 +374,12 @@

    Mutation testing can be slow, since each test can take up to 5 minutes on a fast workstation, and there are two tests for each branch instructions, and over 20,000 branch instructions. Efforts are made to expedite operation. For example, TH3 is compiled in such a -way that it exists as soon as it finds the first error, and as many -of the mutations are easily detected, many cycles happen in ly +way that it exits as soon as it finds the first error, and as many +of the mutations are easily detected, many cycles happen in only a few seconds. Nevertheless, the mutation-test.tcl script includes command-line options to limit the range of code lines tested so that mutation testing only needs to be performed on blocks of code that have recently changed. Index: pages/vtab.in ================================================================== --- pages/vtab.in +++ pages/vtab.in @@ -134,11 +134,12 @@

    A virtual table is eponymous if its [xCreate] method is the exact same function as the [xConnect] method, or if the [xCreate] method is NULL. The [xCreate] method is called when a virtual table is first created -using the [CREATE VIRTUAL TABLE] statement. The [xConnect] method whenever +using the [CREATE VIRTUAL TABLE] statement. The [xConnect] method +is invoked whenever a database connection attaches to or reparses a schema. When these two methods are the same, that indicates that the virtual table has no persistent state that needs to be created and destroyed. hd_fragment epoonlyvtab {eponymous-only virtual table} Index: pages/whyc.in ================================================================== --- pages/whyc.in +++ pages/whyc.in @@ -49,11 +49,11 @@ libraries written in Java. Thus, SQLite would be unusable on iPhones had it been written in Java.

    Low-Dependency

    -

    Libraries written in C doe not have a huge run-time dependency. +

    Libraries written in C do not have a huge run-time dependency. In its minimum configuration, SQLite requires only the following routines from the standard C library:

    ADDED search/admin.tcl.in Index: search/admin.tcl.in ================================================================== --- /dev/null +++ search/admin.tcl.in @@ -0,0 +1,124 @@ +#!/usr/bin/tclsh.docsrc +#### Import of wapp.tcl +INCLUDE wapp.tcl +#### End of wapp.tcl + +# Generate all header content for the output document +# +proc search_header {} { + wapp-trim { +DOCHEAD {Search SQLite Stats} {../} + } +} + +proc wapp-default {} { + # When running using the built-in webserver in Wapp (in other words, + # when not running as CGI) any filename that contains a "." loads + # directly from the filesystem. + if {[string match *//127.0.0.1:* [wapp-param BASE_URL]] + && [string match *.* [wapp-param PATH_INFO]] + } { + set altfile [file dir [wapp-param SCRIPT_FILENAME]]/../[wapp-param PATH_INFO] + set fd [open $altfile rb] + fconfigure $fd -translation binary + wapp-unsafe [read $fd] + close $fd + switch -glob -- $altfile { + *.html { + wapp-mimetype text/html + } + *.css { + wapp-mimetype text/css + } + *.gif { + wapp-mimetype image/gif + } + } + return + } + wapp-page-admin +} +proc wapp-page-admin {} { + wapp-allow-xorigin-params + wapp-content-security-policy off + if {[wapp-param-exists env]} { + search_header + wapp-trim { +

    Environment

    +
    %html([wapp-debug-env])
    + } + return + } + sqlite3 db2 [file dir [wapp-param SCRIPT_FILENAME]]/searchlog.db + set where "" + set res "" + + set ipfilter "" + set ipaddr [wapp-param ip] + if {$ipaddr!=""} { + set where {WHERE ip = $ipaddr} + set ipfilter $ipaddr + } + + set checked "" + set isUnique [expr {[wapp-param unique 0]+0}] + if {$isUnique} { + set checked "checked" + } + + set limit [wapp-param limit 10] + set s10 "" + set s100 "" + set s1000 "" + if {$limit==10} {set s10 selected} + if {$limit==100} {set s100 selected} + if {$limit==1000} {set s1000 selected} + + search_header + set self [wapp-param PATH_HEAD] + wapp-trim { +
    +
    +
    + Results: + IP: + Unique: + + +
    +
    +
    +
    IP Query Results Timestamp + } + + set i 0 + db2 eval " + SELECT rowid, ip, query, nres, timestamp FROM log $where + ORDER BY rowid DESC + " { + + if {$isUnique} { + if {[info exists seen($query)]} continue + set seen($query) 1 + } + + wapp-trim { +
    %html($rowid) + %html($ip) + %html($query) + %html($nres)%html($timestamp) + } + incr i + if {$i >= $limit} break + } + wapp-subst { element, or -# c) A "." character. -# -# 3. 8 characters have been parsed and a
    tag or "." character is -# encountered -# -proc docparse_callback {tag details} { - global P - set tag [string tolower $tag] - switch -glob -- $tag { - "" { - append P(text) " $details" - if {$P(isTitle)} { append P(title) $details } - if {[llength $P(fragments)]} { - append P(ftext) " $details" - } - } - - "title" { set P(isTitle) 1 } - "/title" { set P(isTitle) 0 } - - "a" { - array set D $details - if {[info exists D(name)]} { - if {[llength $P(fragments)]} { - lappend P(fragments) $P(ftitle) $P(ftext) - } - lappend P(fragments) $D(name) - set P(ftext) "" - set P(ftitle) "" - catch { unset P(ftitleclose) } - } - } - "h*" { - array set D $details - if {[info exists D(id)]} { - if {[llength $P(fragments)]} { - lappend P(fragments) $P(ftitle) $P(ftext) - } - lappend P(fragments) $D(id) - set P(ftext) "" - set P(ftitle) "" - } - } - - div { - array set D $details - if {[info exists D(class)] && $D(class) == "startsearch"} { - set P(text) "" - } - } - } - - set ftext [string trim $P(ftext) " \v\n"] - if {[string length $ftext]>4 && $P(ftitle) == ""} { - set blocktags [list \ - br td /td th /th p /p \ - h1 h2 h3 h4 h5 h /h1 /h2 /h3 /h4 /h5 /h - ] - if {[lsearch $blocktags $tag]>=0} { - set P(ftitle) $ftext - set P(ftext) "" - } elseif {[string length $ftext]>80} { - set idx [string last " " [string range $ftext 0 79]] - if {$idx<0} { set idx 80 } - set P(ftitle) [string range $ftext 0 [expr $idx-1]] - set P(ftext) [string range $ftext $idx end] - } - } -} - -proc findlinks_callback {tag details} { - global P - set doc $P(doc) - - set tag [string tolower $tag] - switch -glob -- $tag { - a { - array set D $details - if {[info exists D(href)]} { - if { [string range $D(href) 0 0]=="#" } { - set url "${doc}$D(href)" - } else { - set url "$D(href)" - } - - set P(url) $url - set P(link) "" - } - } - /a { - if {$P(url)!=""} { - db eval { UPDATE pagedata SET links = links || ' ' || $P(link) WHERE url=$P(url) } - } - set P(url) "" - set P(link) "" - } - - "" { - append P(link) " $details" - } - } -} - -proc trim {a} { - set L [split $a] - return [lsort -uniq $L] -} - -#========================================================================= -# Build the database. -# -proc rebuild_database {} { - - db transaction { - db eval { - DROP TABLE IF EXISTS pagedata; - CREATE TABLE pagedata( - url TEXT PRIMARY KEY, -- Relative URL for this document - links, -- Text of all links to this URI - title, -- Document or fragment title - content -- Document or fragment content - ); - } - - # Scan the file-system for HTML documents. Add each document found to - # the page and pagedata tables. - foreach file [document_list] { - set zHtml [readfile $file] - - array unset ::P - set ::P(text) "" ;# The full document text - set ::P(isTitle) 0 ;# True while parsing contents of - set ::P(fragments) [list] ;# List of document fragments parsed - set ::P(ftext) "" ;# Text of current document fragment - - parsehtml $zHtml docparse_callback - if {[info exists ::P(ftitle)]} { - lappend ::P(fragments) $::P(ftitle) $::P(ftext) - } - - set keyword "" - catch { set keyword $::K($file) } - if {![info exists ::P(title)]} {set ::P(title) "No Title"} - db eval { REPLACE INTO pagedata VALUES($file, '', $::P(title), $::P(text)) } - - foreach {name title text} $::P(fragments) { - set url "$file#$name" - puts $url - db eval { REPLACE INTO pagedata VALUES($url, '', $title, $text) } - } - } - - foreach file [document_list] { - set zHtml [readfile $file] - - array unset ::P - set ::P(url) "" - set ::P(doc) $file - parsehtml $zHtml findlinks_callback - } - - db func trim trim - #db eval { UPDATE pagedata SET links = trim(links) } - db eval { CREATE INDEX ft ON pagedata USING fts5() } - } -} - -sqlite4 db search4.db -rebuild_database - ADDED search/mkscript.tcl Index: search/mkscript.tcl ================================================================== --- /dev/null +++ search/mkscript.tcl @@ -0,0 +1,54 @@ +#!/usr/bin/tclsh +# +# Use this script to build the TCL scripts that implement the search +# functions on the SQLite website. +# +# Usage example: +# +# tclsh mkscript.tcl search.tcl.in >search.tcl +# +# The input to this script is a template script file, named something +# like "search.tcl.in". This script reads the template line-by-line and +# applies some minor transformations: +# +# INCLUDE filename Lines match this pattern are replaced +# by the complete text of filename. This +# is used (for example) to insert the +# complete text of wapp.tcl in the appropriate +# place CGI scripts +# +# DOCHEADER title path Lines matching this pattern invoke the +# document_header function contained in the +# ../document_header.tcl file to generate +# header text for the document, and then +# insert that header text in place of the +# line +# +# Other than these transformations, the input is copied through into +# the output. +# +if {[llength $argv]!=1} { + puts stderr "Usage: $argv0 TEMPLATE >OUTPUT" + exit 1 +} +set infile [lindex $argv 0] +set ROOT [file dir [file dir [file normalize $argv0]]] +set HOME [file dir [file normalize $infile]] +set in [open $infile rb] +while {1} { + set line [gets $in] + if {[eof $in]} break + if {[regexp {^INCLUDE (.*)} $line all path]} { + set in2 [open $HOME/$path rb] + puts [read $in2] + close $in2 + continue + } + if {[regexp {^DOCHEAD } $line] && [llength $line]==3} { + source $ROOT/document_header.tcl + puts [document_header [lindex $line 1] [lindex $line 2]] + continue + } + puts $line +} +close $in DELETED search/search.tcl Index: search/search.tcl ================================================================== --- search/search.tcl +++ /dev/null @@ -1,499 +0,0 @@ -#!/usr/bin/tclsh.docsrc - -source [file dirname [info script]]/document_header.tcl - -# Decode an HTTP %-encoded string -# -proc percent_decode {str} { - # rewrite "+" back to space - # protect \ and [ and ] by quoting with '\' - set str [string map [list + { } "\\" "\\\\" \[ \\\[ \] \\\]] $str] - - # prepare to process all %-escapes - regsub -all -- {%([A-Fa-f][A-Fa-f0-9])%([A-Fa-f89][A-Fa-f0-9])} \ - $str {[encoding convertfrom utf-8 [binary decode hex \1\2]]} str - regsub -all -- {%([0-7][A-Fa-f0-9])} $str {\\u00\1} str - - # process %-escapes - return [subst -novar $str] -} - -#========================================================================= -# This proc is called to parse the arguments passed to this invocation of -# the CGI program (via either the GET or POST method). It returns a -# key/value list containing the arguments suitable for passing to [array -# set]. For example, if the CGI is invoked via a GET request on the URI: -# -# http://www.sqlite.org/search?query=fts3+table&results=10 -# -# then the returned list value is: -# -# {query {fts3 table} results 10} -# -proc cgi_parse_args {} { - global env A - - if {$env(REQUEST_METHOD) == "GET"} { - foreach q [split $env(QUERY_STRING) &] { - if {[regexp {([a-z0-9]*)=(.*)} $q all var value]} { - set A($var) [percent_decode $value] - } - } - } elseif {$env(REQUEST_METHOD) == "POST"} { - set qstring [read stdin $env(CONTENT_LENGTH)] - foreach q [split $qstring &] { - if {[regexp {([a-z0-9]*)=(.*)} $q all var value]} { - set A($var) [percent_decode $value] - } - } - } else { - error "Unrecognized method: $env(REQUEST_METHOD)" - } -} - - -#========================================================================= -# The argument contains a key value list. The values in the list are -# transformed to an HTTP query key value list. For example: -# -# % cgi_encode_args {s "search string" t "search \"type\""} -# s=search+string&t=search+%22type%22 -# -proc cgi_encode_args {list} { - set reslist [list] - foreach {key value} $list { - set value [string map { - \x20 + \x21 %21 \x2A %2A \x22 %22 \x27 %27 \x28 %28 \x29 %29 \x3B %3B - \x3A %3A \x40 %40 \x26 %26 \x3D %3D \x2B %2B \x24 %24 \x2C %2C \x2F %2F - \x3F %3F \x25 %25 \x23 %23 \x5B %5B \x5D %5D - } $value] - - lappend reslist "$key=$value" - } - join $reslist & -} - -proc htmlize {str} { string map {< < > >} $str } -proc attrize {str} { string map {< < > > \x22 "} $str } - -#========================================================================= - -proc cgi_env_dump {} { - - set ret "<h1>Arguments</h1><table>" - foreach {key value} [array get ::A] { - append ret "<tr><td>[htmlize $key]<td>[htmlize $value]" - } - append ret "</table>" - - append ret "<h1>Environment</h1><table>" - foreach {key value} [array get ::env] { - append ret "<tr><td>[htmlize $key]<td>[htmlize $value]" - } - append ret "</table>" - return $ret -} - -#------------------------------------------------------------------------- -# Add an entry to the log database for the current query. Which -# returns $nRes results. -# -proc search_add_log_entry {nRes} { - if {[info exists ::A(donotlog)]} return - - sqlite3 db2 search.d/searchlog.db - db2 timeout 10000 - - set ip $::env(REMOTE_ADDR) - set query $::A(q) - - db2 eval { - PRAGMA synchronous=OFF; - PRAGMA journal_mode=OFF; - BEGIN; - CREATE TABLE IF NOT EXISTS log( - ip, -- IP query was made from - query, -- Fts5 query string - nres, -- Number of results - timestamp DEFAULT CURRENT_TIMESTAMP - ); - INSERT INTO log(ip, query, nres) VALUES($ip, $query, $nRes); - COMMIT; - } - - db2 close -} - -proc sqlize {text} { - return "'[string map [list ' ''] $text]'" -} - -proc admin_list {} { - sqlite3 db2 searchlog.db - - set where "" - set res "" - - set ipfilter "" - if {[info exists ::A(ip)] && $::A(ip)!=""} { - set where "WHERE ip = [sqlize $::A(ip)]" - set ipfilter $::A(ip) - } - - set checked "" - if {[info exists ::A(unique)] && $::A(unique)} { - set checked "checked" - } - - set limit 10 - if {[info exists ::A(limit)]} { - set limit $::A(limit) - } - set s10 "" - set s100 "" - set s1000 "" - if {$limit==10} {set s10 selected} - if {$limit==100} {set s100 selected} - if {$limit==1000} {set s1000 selected} - - append res " - <div style=\"margin:2em\"> - <center> - <form action=admin method=get> - Results: <select name=limit onChange=\"this.form.submit()\"> - <option $s10 value=\"10\">10</option> - <option $s100 value=\"100\">100</option> - <option $s1000 value=\"1000\">1000</option> - </select> - IP: <input type=input name=ip value=\"[attrize $ipfilter]\"> - Unique: <input - type=checkbox name=unique value=1 - $checked - onChange=\"this.form.submit()\" - > - <input type=submit> - </form> - </center> - </div> - " - - set i 0 - append res "<table border=1 cellpadding=10 align=center>\n" - append res "<tr><td><th>IP <th>Query <th> Results <th> Timestamp\n" - db2 eval " - SELECT rowid, ip, query, nres, timestamp FROM log $where - ORDER BY rowid DESC - " { - - if {[info exists ::A(unique)] && $::A(unique)} { - if {[info exists seen($query)]} continue - set seen($query) 1 - } - - set querylink "<a href=\"../search?q=[attrize $query]&donotlog=1\">$query</a>" - set iplink "<a href=\"?admin=1&ip=$ip\">$ip</a>" - - append res " <tr> <td> $rowid <td> $iplink <td> $querylink" - append res " <td> $nres <td> $timestamp\n" - - incr i - if {$i >= $limit} break - } - append res "</table>\n" - - return $res -} - - -#------------------------------------------------------------------------- -# This command is similar to the builtin Tcl [time] command, except that -# it only ever runs the supplied script once. Also, instead of returning -# a string like "xxx microseconds per iteration", it returns "x.yy ms" or -# "x.yy s", depending on the magnitude of the time spent running the -# command. For example: -# -# % ttime {after 1500} -# 1.50 s -# % ttime {after 45} -# 45.02 ms -# -proc ttime {script} { - set t [lindex [time [list uplevel $script]] 0] - if {$t>1000000} { return [format "%.2f s" [expr {$t/1000000.0}]] } - return [format "%.2f ms" [expr {$t/1000.0}]] -} - -proc searchchanges {} { - global A - if {![info exists A(q)]} return "" - - set open {<span style="background-color:#d9f2e6">} - set close {</span>} - set query { - SELECT url, version, idx, highlight(change, 3, $open, $close) AS text - FROM change($A(q)) ORDER BY rowid ASC - } - - set ret [subst { - <p>Change log entries mentioning: <b>[htmlize $::A(q)]</b> - <table border=0> - }] - - set s2 "style=\"margin-top:0\"" - set s1 "style=\"font-size:larger; text-align:left\" class=nounderline" - set prev "" - db eval $query { - if {$prev!=$version} { - append ret [subst { - <tr> <td $s1 valign=top> <a href=$url>$version</a> <td> <ul $s2> - }] - set prev $version - } - append ret [subst { <li value=$idx> ($idx) $text }] - } - - append ret "</table>" - append ret "<center><p>You can also see the <a href=changes.html>entire" - append ret " changelog as a single page</a> if you wish.</center>" - - return $ret -} - -proc searchresults {} { - if {![info exists ::A(q)]} return "" - #set ::A(q) [string map {' ''} $A(q)] - #regsub -all {[^-/"A-Za-z0-9]} $::A(q) { } ::A(q) - - # Count the '"' characters in $::A(q). If there is an odd number of - # occurences, add a " to the end of the query so that fts5 can parse - # it without error. - if {[regexp -all \x22 $::A(q)] % 2} { append ::A(q) \x22 } - - # Set iStart to the index of the first result to display. Results are - # indexed starting at zero from most to least relevant. - # - set iStart [expr {([info exists ::A(i)] ? $::A(i) : 0)*10}] - - # Grab a list of rowid results. - # - set q { - SELECT rowid FROM page WHERE page MATCH $::A(q) - ORDER BY srank(page) DESC, - rank * COALESCE( - (SELECT percent FROM weight WHERE id=page.rowid), 100 - ); - } - if {[catch { set lRowid [db eval $q] }]} { - set x "" - foreach word [split $::A(q) " "] { - append x " \"[string map [list "\"" "\"\""] $word]\"" - } - set ::A(q) [string trim $x] - set lRowid [db eval $q] - } - - set lRes [list] - foreach rowid $lRowid { - if {$rowid > 1000} { - set parent [expr $rowid / 1000] - lappend subsections($parent) $rowid - } else { - lappend lRes $rowid - } - } - - set nRes [llength $lRes] - set lRes [lrange $lRes $iStart [expr $iStart+9]] - - # Add an entry to the log database. - # - search_add_log_entry $nRes - - # If there are no results, return a message to that effect. - # - if {[llength $lRes] == 0} { - return [subst { No results for: <b>[htmlize $::A(q)]</b> }] - } - - # HTML markup used to highlight keywords within FTS5 generated snippets. - # - set open {<span style="background-color:#d9f2e6">} - set close {</span>} - set ellipsis {<b> ... </b>} - - # Grab the required data - # - db eval [string map [list %LIST% [join $lRowid ,]] { - SELECT - rowid AS parentid, - snippet(page, 0, $open, $close, $ellipsis, 6) AS s_apis, - snippet(page, 2, $open, $close, '', 40) AS s_title1, - snippet(page, 3, $open, $close, $ellipsis, 40) AS s_title2, - snippet(page, 4, $open, $close, $ellipsis, 40) AS s_content, - url, rank - FROM page($::A(q)) - WHERE rowid IN (%LIST%) - }] X { - foreach k [array names X] { set data($X(parentid),$k) [set X($k)] } - } - - set ret [subst { - <table border=0> - <p>Search results - [expr $iStart+1]..[expr {($nRes < $iStart+10) ? $nRes : $iStart+10}] - of $nRes for: <b>[htmlize $::A(q)]</b> - }] - - foreach rowid $lRes { - - foreach a {parentid s_apis s_title1 s_content url rank} { - set $a $data($rowid,$a) - } - - if {[info exists subsections($parentid)]} { - set childid [lindex $subsections($parentid) 0] - set link $data($childid,url) - set hdr $data($childid,s_title2) - - if {$hdr==""} { - set s_content "" - } else { - set s_content [subst { - <b><a style=color:#044a64 href=$link>$hdr</a></b> - }] - } - - append s_content " $data($childid,s_content)" - } - - append ret [subst -nocommands {<tr> - <td valign=top style="line-height:150%"> - <div style="white-space:wrap;font-size:larger" class=nounderline> - <a href="$url">$s_title1 </a> - <div style="float:right;font-size:smaller;color:#BBB">($url)</div> - </div> - <div style="margin-left: 10ex; font:larger monospace">$s_apis</div> - <div style="margin-left: 4ex; margin-bottom:1.5em"> - $s_content - </div> - </td> - }] - } - append ret { </table> } - - - # If the query returned more than 10 results, add up to 10 links to - # each set of 10 results (first link to results 1-10, second to 11-20, - # third to 21-30, as required). - # - if {$nRes>10} { - set s(0) {border:solid #044a64 1px;padding:1ex;margin:1ex;line-height:300%;} - set s(1) "$s(0);background:#044a64;color:white" - append ret <center><p> - for {set i 0} {$i < 10 && ($i*10)<$nRes} {incr i} { - append ret [subst { - <a style="$s([expr {($iStart/10)==$i}])" - href="search?[cgi_encode_args [list q $::A(q) s $::A(s) i $i]]">[expr $i+1]</a> - }] - } - append ret </center> - } - - return $ret -} - -proc main {} { - global A - cgi_parse_args - - # If "env=1" is specified, dump the environment variables instead - # of running any search. - if {[info exists ::A(env)]} { return [cgi_env_dump] } - - # If "admin=1" is specified, jump to the admin screen. - if {[string match *admin* $::env(REQUEST_URI)]} { - set ::PATH ../ - return [admin_list] - } - - sqlite3 db search.d/search.db - - set cmd searchresults - if {[info exists A(s)] && $A(s)=="c"} { - set cmd searchchanges - } - - db transaction { - set t [ttime { - if {[catch $cmd srchout]} { - set A(q) [string tolower $A(q)] - set srchout [$cmd] - } - set doc $srchout - }] - } - append doc "<center>" - append doc "<p>Page generated by <a href='fts5.html'>FTS5</a> in about $t." - append doc "</center>" - return $doc - - # return [cgi_env_dump] -} - -#========================================================================= - -source [file dirname [info script]]/document_header.tcl - -if {![info exists env(REQUEST_METHOD)]} { - set env(REQUEST_METHOD) GET - set env(QUERY_STRING) rebuild=1 - set ::HEADER "" - - #set env(QUERY_STRING) {q="one+two+three+four"+eleven} - set env(QUERY_STRING) {q=windows} - set ::HEADER "" -} - -set ::PATH "" -if {0==[catch main res]} { - set title "Search SQLite Documentation" - if {[info exists ::A(q)]} { - set initsearch [attrize $::A(q)] - append title " - [htmlize $::A(q)]" - } else { - set initsearch {} - } - set document [document_header $title $::PATH $initsearch] - append document [subst { - <script> - window.addEventListener('load', function() { - var w = document.getElementById("searchmenu"); - w.style.display = "block"; - - document.getElementById("searchtype").value = "$::A(s)" - - setTimeout(function(){ - var s = document.getElementById("searchbox"); - s.focus(); - s.select(); - }, 30); - }); - </script> - }] - append document $res -} else { - set document "<pre>" - append document "Error: $res\n\n" - append document $::errorInfo - append document "</pre>" -} - -puts "Content-type: text/html" -puts "Content-Length: [string length $document]" -puts "" -puts $document -puts "" -flush stdout -close stdout - -exit ADDED search/search.tcl.in Index: search/search.tcl.in ================================================================== --- /dev/null +++ search/search.tcl.in @@ -0,0 +1,309 @@ +#!/usr/bin/tclsh.docsrc +#### Import of wapp.tcl +INCLUDE wapp.tcl +#### End of wapp.tcl + +# Generate all header content for the output document +# +proc search_header {} { + wapp-trim { +DOCHEAD {Search SQLite Documentation} {} + } +} + +#------------------------------------------------------------------------- +# Add an entry to the log database for the current query. Which +# returns $nRes results. +# +proc search_add_log_entry {nRes} { + if {[wapp-param-exists donotlog]} return + sqlite3 db2 [file dir [wapp-param SCRIPT_FILENAME]]/search.d/searchlog.db + db2 timeout 10000 + set ip [wapp-param REMOTE_ADDR] + set query [wapp-param q] + db2 eval { + PRAGMA synchronous=OFF; + PRAGMA journal_mode=OFF; + BEGIN; + CREATE TABLE IF NOT EXISTS log( + ip, -- IP query was made from + query, -- Fts5 query string + nres, -- Number of results + timestamp DEFAULT CURRENT_TIMESTAMP + ); + INSERT INTO log(ip, query, nres) VALUES($ip, $query, $nRes); + COMMIT; + } + db2 close +} + +#------------------------------------------------------------------------- +# This command is similar to the builtin Tcl [time] command, except that +# it only ever runs the supplied script once. Also, instead of returning +# a string like "xxx microseconds per iteration", it returns "x.yy ms" or +# "x.yy s", depending on the magnitude of the time spent running the +# command. For example: +# +# % ttime {after 1500} +# 1.50 s +# % ttime {after 45} +# 45.02 ms +# +proc ttime {script} { + set t [lindex [time [list uplevel $script]] 0] + if {$t>1000000} { return [format "%.2f s" [expr {$t/1000000.0}]] } + return [format "%.2f ms" [expr {$t/1000.0}]] +} + +#----------------------------------------------------------------------- +# Do a search of the change log +# +proc searchchanges {} { + set q [wapp-param q] + if {$q==""} {return {}} + set open {<span style="background-color:#d9f2e6">} + set close {</span>} + set query { + SELECT url, version, idx, highlight(change, 3, $open, $close) AS text + FROM change($q) ORDER BY rowid ASC + } + wapp-trim { + <p>Change log entries mentioning: <b>%html($q)</b> + <table border=0> + } + set s2 "style=\"margin-top:0\"" + set s1 "style=\"font-size:larger; text-align:left\" class=nounderline" + set prev "" + db eval $query { + if {$prev!=$version} { + wapp-trim { + <tr> <td %unsafe($s1) valign=top> <a href='%url($url)'>%html($version)</a> + <td> <ul %unsafe($s2)> + } + set prev $version + } + wapp-subst {<li value=%html($idx)> (%html($idx)) %unsafe($text)\n} + } + wapp-trim { + </table> + <center><p>You can also see the <a href=changes.html>entire + changelog as a single page</a> if you wish.</center> + } +} + +#----------------------------------------------------------------------- +# Do a search over all documentation other than the change log +# +proc searchresults {} { + set q [wapp-param q] + if {$q==""} {return ""} + + # Count the '"' characters in $::A(q). If there is an odd number of + # occurences, add a " to the end of the query so that fts5 can parse + # it without error. + if {[regexp -all \x22 $q] % 2} { append q \x22 } + + # Set iStart to the index of the first result to display. Results are + # indexed starting at zero from most to least relevant. + # + set iStart 0 + catch {set iStart [expr {[wapp-param i 0]*10}]} + + # Grab a list of rowid results. + # + set sql { + SELECT rowid FROM page WHERE page MATCH $q + ORDER BY srank(page) DESC, + rank * COALESCE( + (SELECT percent FROM weight WHERE id=page.rowid), 100 + ); + } + if {[catch { set lRowid [db eval $sql] }]} { + set x "" + foreach word [split $q " "] { + append x " \"[string map [list "\"" "\"\""] $word]\"" + } + set q [string trim $x] + set lRowid [db eval $sql] + } + + set lRes [list] + foreach rowid $lRowid { + if {$rowid > 1000} { + set parent [expr $rowid / 1000] + lappend subsections($parent) $rowid + } else { + lappend lRes $rowid + } + } + + set nRes [llength $lRes] + set lRes [lrange $lRes $iStart [expr $iStart+9]] + + # Add an entry to the log database. + # + search_add_log_entry $nRes + + # If there are no results, return a message to that effect. + # + if {[llength $lRes] == 0} { + wapp-subst {<p>No Results for: <b>%html($q)</b>\n} + } + + # HTML markup used to highlight keywords within FTS5 generated snippets. + # + set open {<span style="background-color:#d9f2e6">} + set close {</span>} + set ellipsis {<b> ... </b>} + + # Grab the required data + # + db eval [string map [list %LIST% [join $lRowid ,]] { + SELECT + rowid AS parentid, + snippet(page, 0, $open, $close, $ellipsis, 6) AS s_apis, + snippet(page, 2, $open, $close, '', 40) AS s_title1, + snippet(page, 3, $open, $close, $ellipsis, 40) AS s_title2, + snippet(page, 4, $open, $close, $ellipsis, 40) AS s_content, + url, rank + FROM page($q) + WHERE rowid IN (%LIST%) + }] X { + foreach k [array names X] { set data($X(parentid),$k) [set X($k)] } + } + + set i1 [expr {$iStart+1}] + set i2 [expr {($nRes < $iStart+10) ? $nRes : $iStart+10}] + wapp-trim { + <table border=0> + <p>Search results %html($i1)..%html($i2) of %html($nRes) for: <b>%html($q)</b> + } + + foreach rowid $lRes { + + foreach a {parentid s_apis s_title1 s_content url rank} { + set $a $data($rowid,$a) + } + + if {[info exists subsections($parentid)]} { + set childid [lindex $subsections($parentid) 0] + set link $data($childid,url) + set hdr $data($childid,s_title2) + + if {$hdr==""} { + set s_content "" + } else { + set s_content [subst { + <b><a style=color:#044a64 href=$link>$hdr</a></b> + }] + } + + append s_content " $data($childid,s_content)" + } + + wapp-trim {<tr> + <td valign=top style="line-height:150%"> + <div style="white-space:wrap;font-size:larger" class=nounderline> + <a href="%url($url)">%unsafe($s_title1)</a> + <div style="float:right;font-size:smaller;color:#BBB">(%url($url))</div> + </div> + <div style="margin-left: 10ex; font:larger monospace">%unsafe($s_apis)</div> + <div style="margin-left: 4ex; margin-bottom:1.5em"> + %unsafe($s_content) + </div> + </td> + } + } + wapp-subst {</table>\n} + + + # If the query returned more than 10 results, add up to 10 links to + # each set of 10 results (first link to results 1-10, second to 11-20, + # third to 21-30, as required). + # + if {$nRes>10} { + set s(0) {border:solid #044a64 1px;padding:1ex;margin:1ex;line-height:300%;} + set s(1) "$s(0);background:#044a64;color:white" + wapp-subst {<center><p>\n} + for {set i 0} {$i < 10 && ($i*10)<$nRes} {incr i} { + set style $s([expr {($iStart/10)==$i}]) + wapp-trim { + <a style="%html($style)" + href="search?q=%qp($q)&i=%qp($i)">%html([expr $i+1])</a> + } + } + wapp-subst {</center>\n} + } +} + +# This is the main entry point into the search result page generator +# +proc wapp-default {} { + wapp-content-security-policy {default-src 'self' 'unsafe-inline'} + wapp-allow-xorigin-params + if {[wapp-param-exists env]} { + search_header + wapp-trim { + <h1>Environment Dump For Debugging</h1> + <pre>%html([wapp-debug-env])</pre> + } + return + } + + # When running using the built-in webserver in Wapp (in other words, + # when not running as CGI) any filename that contains a "." loads + # directly from the filesystem. + if {[string match *//127.0.0.1:* [wapp-param BASE_URL]] + && [string match *.* [wapp-param PATH_INFO]] + } { + set altfile [file dir [wapp-param SCRIPT_FILENAME]][wapp-param PATH_INFO] + set fd [open $altfile rb] + fconfigure $fd -translation binary + wapp-unsafe [read $fd] + close $fd + switch -glob -- $altfile { + *.html { + wapp-mimetype text/html + } + *.css { + wapp-mimetype text/css + } + *.gif { + wapp-mimetype image/gif + } + } + return + } + + search_header + sqlite3 db [file dir [wapp-param SCRIPT_FILENAME]]/search.d/search.db + set searchType [wapp-param s d] + if {$searchType=="c"} { + set cmd searchchanges + } else { + set cmd searchresults + } + db transaction { + set t [ttime {$cmd}] + } + wapp-trim { + <center> + <p>Page generated by <a href='fts5.html'>FTS5</a> in about %html($t). + </center> + <script> + window.addEventListener('load', function() { + var w = document.getElementById("searchmenu"); + w.style.display = "block"; + w = document.getElementById("searchtype"); + w.value = "%string($searchType)" + setTimeout(function(){ + var s = document.getElementById("searchbox"); + s.value = "%string([wapp-param q])" + s.focus(); + s.select(); + }, 30); + }); + </script> + } +} +wapp-start $argv DELETED search/search4.tcl Index: search/search4.tcl ================================================================== --- search/search4.tcl +++ /dev/null @@ -1,464 +0,0 @@ -#!/usr/bin/tclsqlite4 - -#========================================================================= -# Decode an HTTP %-encoded string -# -proc percent_decode {str} { - # rewrite "+" back to space - # protect \ and [ and ] by quoting with '\' - set str [string map [list + { } "\\" "\\\\" \[ \\\[ \] \\\]] $str] - - # prepare to process all %-escapes - regsub -all -- {%([A-Fa-f][A-Fa-f0-9])%([A-Fa-f89][A-Fa-f0-9])} \ - $str {[encoding convertfrom utf-8 [binary decode hex \1\2]]} str - regsub -all -- {%([0-7][A-Fa-f0-9])} $str {\\u00\1} str - - # process %-escapes - return [subst -novar $str] -} - -#========================================================================= -# This proc is called to parse the arguments passed to this invocation of -# the CGI program (via either the GET or POST method). It returns a -# key/value list containing the arguments suitable for passing to [array -# set]. For example, if the CGI is invoked via a GET request on the URI: -# -# http://www.sqlite.org/search?query=fts3+table&results=10 -# -# then the returned list value is: -# -# {query {fts3 table} results 10} -# -proc cgi_parse_args {} { - global env A - - if {$env(REQUEST_METHOD) == "GET"} { - foreach q [split $env(QUERY_STRING) &] { - if {[regexp {([a-z0-9]*)=(.*)} $q all var value]} { - set A($var) [percent_decode $value] - } - } - } elseif {$env(REQUEST_METHOD) == "POST"} { - set qstring [read stdin $env(CONTENT_LENGTH)] - foreach q [split $qstring &] { - if {[regexp {([a-z0-9]*)=(.*)} $q all var value]} { - set A($var) [percent_decode $value] - } - } - } else { - error "Unrecognized method: $env(REQUEST_METHOD)" - } -} - - -#========================================================================= -# Redirect the web-browser to URL $url. This command does not return. -# -proc cgi_redirect {url} { - set server $::env(SERVER_NAME) - set path [file dirname $::env(REQUEST_URI)] - if {[string range $path end end]!="/"} { - append path / - } - - puts "Status: 302 Redirect" - puts "Location: http://${server}${path}${url}" - puts "Content-Length: 0" - puts "" - exit -} - -#========================================================================= -# The argument contains a key value list. The values in the list are -# transformed to an HTTP query key value list. For example: -# -# % cgi_encode_args {s "search string" t "search \"type\""} -# s=search+string&t=search+%22type%22 -# -proc cgi_encode_args {list} { - set reslist [list] - foreach {key value} $list { - set value [string map { - \x20 + \x21 %21 \x2A %2A \x22 %22 \x27 %27 \x28 %28 \x29 %29 \x3B %3B - \x3A %3A \x40 %40 \x26 %26 \x3D %3D \x2B %2B \x24 %24 \x2C %2C \x2F %2F - \x3F %3F \x25 %25 \x23 %23 \x5B %5B \x5D %5D - } $value] - - lappend reslist "$key=$value" - } - join $reslist & -} - -proc htmlize {str} { string map {< < > >} $str } -proc attrize {str} { string map {< < > > \x22 \x5c\x22} $str } - -#========================================================================= - -proc cgi_env_dump {} { - - set ret "<h1>Arguments</h1><table>" - foreach {key value} [array get ::A] { - append ret "<tr><td>[htmlize $key]<td>[htmlize $value]" - } - append ret "</table>" - - append ret "<h1>Environment</h1><table>" - foreach {key value} [array get ::env] { - append ret "<tr><td>[htmlize $key]<td>[htmlize $value]" - } - append ret "</table>" - return $ret -} - -proc searchform {} { - return {} - set initial "Enter search term:" - catch { set initial $::A(q) } - return [subst { - <table style="margin: 1em auto"> <tr><td>Search SQLite docs for:<td> - <form name=f method=GET action=search4> - <input name=q type=text width=35 value="[attrize $initial]"></input> - <input name=s type=submit value="Search"></input> - <input name=s type=submit value="Lucky"></input> - </form> - </table> - <script> - document.forms.f.q.focus() - document.forms.f.q.select() - </script> - }] -} - -proc footer {} { - return { - <hr> - <table align=right> - <td> - <i>Powered by <a href="http://www.sqlite.org/src4">FTS5</a>.</i> - </table> - } -} - - -#------------------------------------------------------------------------- -# This command is similar to the builtin Tcl [time] command, except that -# it only ever runs the supplied script once. Also, instead of returning -# a string like "xxx microseconds per iteration", it returns "x.yy ms" or -# "x.yy s", depending on the magnitude of the time spent running the -# command. For example: -# -# % ttime {after 1500} -# 1.50 s -# % ttime {after 45} -# 45.02 ms -# -proc ttime {script} { - set t [lindex [time [list uplevel $script]] 0] - if {$t>1000000} { return [format "%.2f s" [expr {$t/1000000.0}]] } - return [format "%.2f ms" [expr {$t/1000.0}]] -} - -proc rank {matchinfo args} { - binary scan $matchinfo i* I - - set nPhrase [lindex $I 0] - set nCol [lindex $I 1] - - set G [lrange $I 2 [expr {1+$nCol*$nPhrase}]] - set L [lrange $I [expr {2+$nCol*$nPhrase}] end] - - foreach a $args { lappend log [expr {log10(100+$a)}] } - - set score 0.0 - set i 0 - foreach l $L g $G { - if {$l > 0} { - set div [lindex $log [expr $i%3]] - set score [expr {$score + (double($l) / double($g)) / $div}] - } - incr i - } - - return $score -} -proc erank {matchinfo args} { - eval rank [list $matchinfo] $args -} - - -proc searchresults {} { - if {![info exists ::A(q)]} return "" - #set ::A(q) [string map {' ''} $A(q)] - #regsub -all {[^-/"A-Za-z0-9]} $::A(q) { } ::A(q) - - # Count the '"' characters in $::A(q). If there is an odd number of - # occurences, add a " to the end of the query so that fts3 can parse - # it without error. - if {[regexp -all \x22 $::A(q)] % 2} { append ::A(q) \x22 } - - set ::TITLE "Results for: \"[htmlize $::A(q)]\"" - - #db func rank rank - #db func erank erank - - set score 0 - catch {set score $::A(score)} - - # Set nRes to the total number of documents that the users query matches. - # If nRes is 0, then the users query returned zero results. Return a short - # message to that effect. - # - set nRes [db one { SELECT count(*) FROM pagedata WHERE pagedata MATCH $::A(q) }] - if {$nRes == 0} { - return [subst { No results for: <b>[htmlize $::A(q)]</b> }] - } - - # Set iStart to the index of the first result to display. Results are - # indexed starting at zero from most to least relevant. - # - set iStart [expr {([info exists ::A(i)] ? $::A(i) : 0)*10}] - - # HTML markup used to highlight keywords within FTS3 generated snippets. - # - set open {<span style="font-weight:bold; color:navy">} - set close {</span>} - set ellipsis {<b> ... </b>} - - set ret [subst { - <table border=0> - <p>Search results - [expr $iStart+1]..[expr {($nRes < $iStart+10) ? $nRes : $iStart+10}] - of $nRes for: <b>[htmlize $::A(q)]</b> - }] - - set open {<span style="font-weight:bold; color:navy">} - set close {</span>} - set ellipsis {<b> ... </b>} - - if {0==[info exists ::A(e)]} { - set sqlquery { - SELECT url, title, - snippet(pagedata, $open, $close, $ellipsis, 3, 40) AS snippet, - '' AS report - FROM pagedata WHERE pagedata MATCH $::A(q) - ORDER BY rankc(pagedata, 1.0, 5.0, 10.0, 1.0) DESC - LIMIT 10 OFFSET $iStart - } - } else { - set sqlquery { - SELECT url, title, - snippet(pagedata, $open, $close, $ellipsis, 3, 40) AS snippet, - erankc(pagedata, 1.0, 5.0, 10.0, 1.0) AS report - FROM pagedata WHERE pagedata MATCH $::A(q) - ORDER BY rankc(pagedata, 1.0, 5.0, 10.0, 1.0) DESC - LIMIT 10 OFFSET $iStart - } - } - - set resnum $iStart - db eval $sqlquery { - incr resnum - - append ret [subst -nocommands {<tr> - <td valign=top>${resnum}.</td> - <td valign=top> - <div style="white-space:wrap"> - <a href="$url">$title</a> - </div> - <div style="font-size:small;margin-left: 2ex"> - <div style="width:80ex"> $snippet </div> - <div style="margin-bottom:1em"><a href="$url">$url</a></div> - </div> - </td> - - <td width=100%> - <td valign=top style="font-size:70%;white-space:nowrap;color:darkgreen"> $report </td> - }] - } - append ret { </table> } - - - # If the query returned more than 10 results, add up to 10 links to - # each set of 10 results (first link to results 1-10, second to 11-20, - # third to 21-30, as required). - # - if {$nRes>10} { - set s(0) {border: solid #044a64 1px ; padding: 1ex ; margin: 1ex} - set s(1) "$s(0);background:#044a64;color:white" - append ret <center><p> - for {set i 0} {$i < 10 && ($i*10)<$nRes} {incr i} { - append ret [subst { - <a style="$s([expr {($iStart/10)==$i}])" - href="search4?[cgi_encode_args [list q $::A(q) i $i]]">[expr $i+1]</a> - }] - } - append ret </center> - } - - return $ret -} - -proc main {} { - global A - sqlite4 db search4.db - cgi_parse_args - - db transaction { - set t [ttime { set doc "[searchform] [searchresults] [footer]" }] - } - append doc "<p>Page generated in $t." - return $doc - - # return [cgi_env_dump] -} - -#========================================================================= - -set ::HEADER { - <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> - <html><head> - <title>$TITLE - - - - - -
    - - - -
    -
    Small. Fast. Reliable.
    Choose any three.
    - - - - -} - -if {![info exists env(REQUEST_METHOD)]} { - set env(REQUEST_METHOD) GET - set env(QUERY_STRING) rebuild=1 - set ::HEADER "" - - set env(QUERY_STRING) {q=cache+size} - set ::HEADER "" -} - - -set TITLE "Search SQLite Documentation (fts5)" - -if {0==[catch main res]} { - if {[info exists ::A(q)]} { - set ::INITSEARCH \"[attrize $::A(q)]\" - } else { - set ::INITSEARCH \"\" - } - set document [subst -nocommands $::HEADER] - append document $res -} else { - set document "
    "
    -  append document "Error: $res\n\n"
    -  append document $::errorInfo
    -  append document "
    " -} - -puts "Content-type: text/html" -puts "Content-Length: [string length $document]" -puts "" -puts $document -puts "" -flush stdout -close stdout - -exit ADDED search/wapp.tcl Index: search/wapp.tcl ================================================================== --- /dev/null +++ search/wapp.tcl @@ -0,0 +1,924 @@ +# Copyright (c) 2017 D. Richard Hipp +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the Simplified BSD License (also +# known as the "2-Clause License" or "FreeBSD License".) +# +# This program is distributed in the hope that it will be useful, +# but without any warranty; without even the implied warranty of +# merchantability or fitness for a particular purpose. +# +#--------------------------------------------------------------------------- +# +# Design rules: +# +# (1) All identifiers in the global namespace begin with "wapp" +# +# (2) Indentifiers intended for internal use only begin with "wappInt" +# + +# Add text to the end of the HTTP reply. No interpretation or transformation +# of the text is performs. The argument should be enclosed within {...} +# +proc wapp {txt} { + global wapp + dict append wapp .reply $txt +} + +# Add text to the page under construction. Do no escaping on the text. +# +# Though "unsafe" in general, there are uses for this kind of thing. +# For example, if you want to return the complete, unmodified content of +# a file: +# +# set fd [open content.html rb] +# wapp-unsafe [read $fd] +# close $fd +# +# You could do the same thing using ordinary "wapp" instead of "wapp-unsafe". +# The difference is that wapp-safety-check will complain about the misuse +# of "wapp", but it assumes that the person who write "wapp-unsafe" understands +# the risks. +# +# Though occasionally necessary, the use of this interface should be minimized. +# +proc wapp-unsafe {txt} { + global wapp + dict append wapp .reply $txt +} + +# Add text to the end of the reply under construction. The following +# substitutions are made: +# +# %html(...) Escape text for inclusion in HTML +# %url(...) Escape text for use as a URL +# %qp(...) Escape text for use as a URI query parameter +# %string(...) Escape text for use within a JSON string +# %unsafe(...) No transformations of the text +# +# The substitutions above terminate at the first ")" character. If the +# text of the TCL string in ... contains ")" characters itself, use instead: +# +# %html%(...)% +# %url%(...)% +# %qp%(...)% +# %string%(...)% +# %unsafe%(...)% +# +# In other words, use "%(...)%" instead of "(...)" to include the TCL string +# to substitute. +# +# The %unsafe substitution should be avoided whenever possible, obviously. +# In addition to the substitutions above, the text also does backslash +# escapes. +# +proc wapp-subst {txt} { + global wapp + regsub -all {%(html|url|qp|string|unsafe){1,1}?(|%)\((.+)\)\2} $txt \ + {[wappInt-enc-\1 "\3"]} txt + dict append wapp .reply [uplevel 1 [list subst -novariables $txt]] +} + +# Works like wapp-subst, but also removes whitespace from the beginning +# of lines. +# +proc wapp-trim {txt} { + global wapp + regsub -all {\n\s+} [string trim $txt] \n txt + regsub -all {%(html|url|qp|string|unsafe){1,1}?(|%)\((.+)\)\2} $txt \ + {[wappInt-enc-\1 "\3"]} txt + dict append wapp .reply [uplevel 1 [list subst -novariables $txt]] +} + +# There must be a wappInt-enc-NAME routine for each possible substitution +# in wapp-subst. Thus there are routines for "html", "url", "qp", and "unsafe". +# +# wappInt-enc-html Escape text so that it is safe to use in the +# body of an HTML document. +# +# wappInt-enc-url Escape text so that it is safe to pass as an +# argument to href= and src= attributes in HTML. +# +# wappInt-enc-qp Escape text so that it is safe to use as the +# value of a query parameter in a URL or in +# post data or in a cookie. +# +# wappInt-enc-string Escape ", ', \, and < for using inside of a +# javascript string literal. The < character +# is escaped to prevent "" from causing +# problems in embedded javascript. +# +# wappInt-enc-unsafe Perform no encoding at all. Unsafe. +# +proc wappInt-enc-html {txt} { + return [string map {& & < < > > \" " \\ \} $txt] +} +proc wappInt-enc-unsafe {txt} { + return $txt +} +proc wappInt-enc-url {s} { + if {[regsub -all {[^-{}@~?=#_.:/a-zA-Z0-9]} $s {[wappInt-%HHchar {&}]} s]} { + set s [subst -novar -noback $s] + } + if {[regsub -all {[{}]} $s {[wappInt-%HHchar \\&]} s]} { + set s [subst -novar -noback $s] + } + return $s +} +proc wappInt-enc-qp {s} { + if {[regsub -all {[^-{}_.a-zA-Z0-9]} $s {[wappInt-%HHchar {&}]} s]} { + set s [subst -novar -noback $s] + } + if {[regsub -all {[{}]} $s {[wappInt-%HHchar \\&]} s]} { + set s [subst -novar -noback $s] + } + return $s +} +proc wappInt-enc-string {s} { + return [string map {\\ \\\\ \" \\\" ' \\' < \\u003c} $s] +} + +# This is a helper routine for wappInt-enc-url and wappInt-enc-qp. It returns +# an appropriate %HH encoding for the single character c. If c is a unicode +# character, then this routine might return multiple bytes: %HH%HH%HH +# +proc wappInt-%HHchar {c} { + if {$c==" "} {return +} + return [regsub -all .. [binary encode hex [encoding convertto utf-8 $c]] {%&}] +} + + +# Undo the www-url-encoded format. +# +# HT: This code stolen from ncgi.tcl +# +proc wappInt-decode-url {str} { + set str [string map [list + { } "\\" "\\\\" \[ \\\[ \] \\\]] $str] + regsub -all -- \ + {%([Ee][A-Fa-f0-9])%([89ABab][A-Fa-f0-9])%([89ABab][A-Fa-f0-9])} \ + $str {[encoding convertfrom utf-8 [binary decode hex \1\2\3]]} str + regsub -all -- \ + {%([CDcd][A-Fa-f0-9])%([89ABab][A-Fa-f0-9])} \ + $str {[encoding convertfrom utf-8 [binary decode hex \1\2]]} str + regsub -all -- {%([0-7][A-Fa-f0-9])} $str {\\u00\1} str + return [subst -novar $str] +} + +# Reset the document back to an empty string. +# +proc wapp-reset {} { + global wapp + dict set wapp .reply {} +} + +# Change the mime-type of the result document. +# +proc wapp-mimetype {x} { + global wapp + dict set wapp .mimetype $x +} + +# Change the reply code. +# +proc wapp-reply-code {x} { + global wapp + dict set wapp .reply-code $x +} + +# Set a cookie +# +proc wapp-set-cookie {name value} { + global wapp + dict lappend wapp .new-cookies $name $value +} + +# Unset a cookie +# +proc wapp-clear-cookie {name} { + wapp-set-cookie $name {} +} + +# Add extra entries to the reply header +# +proc wapp-reply-extra {name value} { + global wapp + dict lappend wapp .reply-extra $name $value +} + +# Specifies how the web-page under construction should be cached. +# The argument should be one of: +# +# no-cache +# max-age=N (for some integer number of seconds, N) +# private,max-age=N +# +proc wapp-cache-control {x} { + wapp-reply-extra Cache-Control $x +} + +# Redirect to a different web page +# +proc wapp-redirect {uri} { + wapp-reply-code {307 Redirect} + wapp-reply-extra Location $uri +} + +# Return the value of a wapp parameter +# +proc wapp-param {name {dflt {}}} { + global wapp + if {![dict exists $wapp $name]} {return $dflt} + return [dict get $wapp $name] +} + +# Return true if a and only if the wapp parameter $name exists +# +proc wapp-param-exists {name} { + global wapp + return [dict exists $wapp $name] +} + +# Set the value of a wapp parameter +# +proc wapp-set-param {name value} { + global wapp + dict set wapp $name $value +} + +# Return all parameter names that match the GLOB pattern, or all +# names if the GLOB pattern is omitted. +# +proc wapp-param-list {{glob {*}}} { + global wapp + return [dict keys $wapp $glob] +} + +# By default, Wapp does not decode query parameters and POST parameters +# for cross-origin requests. This is a security restriction, designed to +# help prevent cross-site request forgery (CSRF) attacks. +# +# As a consequence of this restriction, URLs for sites generated by Wapp +# that contain query parameters will not work as URLs found in other +# websites. You cannot create a link from a second website into a Wapp +# website if the link contains query planner, by default. +# +# Of course, it is sometimes desirable to allow query parameters on external +# links. For URLs for which this is safe, the application should invoke +# wapp-allow-xorigin-params. This procedure tells Wapp that it is safe to +# go ahead and decode the query parameters even for cross-site requests. +# +# In other words, for Wapp security is the default setting. Individual pages +# need to actively disable the cross-site request security if those pages +# are safe for cross-site access. +# +proc wapp-allow-xorigin-params {} { + global wapp + if {![dict exists $wapp .qp] && ![dict get $wapp SAME_ORIGIN]} { + wappInt-decode-query-params + } +} + +# Set the content-security-policy. +# +# The default content-security-policy is very strict: "default-src 'self'" +# The default policy prohibits the use of in-line javascript or CSS. +# +# Provide an alternative CSP as the argument. Or use "off" to disable +# the CSP completely. +# +proc wapp-content-security-policy {val} { + global wapp + if {$val=="off"} { + dict unset wapp .csp + } else { + dict set wapp .csp $val + } +} + +# Examine the bodys of all procedures in this program looking for +# unsafe calls to various Wapp interfaces. Return a text string +# containing warnings. Return an empty string if all is ok. +# +# This routine is advisory only. It misses some constructs that are +# dangerous and flags others that are safe. +# +proc wapp-safety-check {} { + set res {} + foreach p [info procs] { + set ln 0 + foreach x [split [info body $p] \n] { + incr ln + if {[regexp {^[ \t]*wapp[ \t]+([^\n]+)} $x all tail] + && [string index $tail 0]!="\173" + && [regexp {[[$]} $tail] + } { + append res "$p:$ln: unsafe \"wapp\" call: \"[string trim $x]\"\n" + } + if {[regexp {^[ \t]*wapp-(subst|trim)[ \t]+[^\173]} $x all cx]} { + append res "$p:$ln: unsafe \"wapp-$cx\" call: \"[string trim $x]\"\n" + } + } + } + return $res +} + +# Return a string that descripts the current environment. Applications +# might find this useful for debugging. +# +proc wapp-debug-env {} { + global wapp + set out {} + foreach var [lsort [dict keys $wapp]] { + if {[string index $var 0]=="."} continue + append out "$var = [list [dict get $wapp $var]]\n" + } + append out "\[pwd\] = [list [pwd]]\n" + return $out +} + +# Tracing function for each HTTP request. This is overridden by wapp-start +# if tracing is enabled. +# +proc wappInt-trace {} {} + +# Start up a listening socket. Arrange to invoke wappInt-new-connection +# for each inbound HTTP connection. +# +# port Listen on this TCP port. 0 means to select a port +# that is not currently in use +# +# wappmode One of "scgi", "server", or "local". +# +proc wappInt-start-listener {port wappmode} { + if {$wappmode=="scgi"} { + set type SCGI + set server [list wappInt-new-connection wappInt-scgi-readable $wappmode] + } else { + set type HTTP + set server [list wappInt-new-connection wappInt-http-readable $wappmode] + } + if {$wappmode=="local"} { + set x [socket -server $server -myaddr 127.0.0.1 $port] + } else { + set x [socket -server $server $port] + } + set coninfo [chan configure $x -sockname] + set port [lindex $coninfo 2] + if {$wappmode=="local"} { + wappInt-start-browser http://127.0.0.1:$port/ + } else { + puts "Listening for $type requests on TCP port $port" + } +} + +# Start a web-browser and point it at $URL +# +proc wappInt-start-browser {url} { + global tcl_platform + if {$tcl_platform(platform)=="windows"} { + exec cmd /c start $url & + } elseif {$tcl_platform(os)=="Darwin"} { + exec open $url & + } elseif {[catch {exec xdg-open $url}]} { + exec firefox $url & + } +} + +# This routine is a "socket -server" callback. The $chan, $ip, and $port +# arguments are added by the socket command. +# +# Arrange to invoke $callback when content is available on the new socket. +# The $callback will process inbound HTTP or SCGI content. +# +proc wappInt-new-connection {callback wappmode chan ip port} { + upvar #0 wappInt-$chan W + set W [dict create REMOTE_ADDR $ip REMOTE_PORT $port WAPP_MODE $wappmode \ + .header {}] + fconfigure $chan -blocking 0 -translation binary + fileevent $chan readable [list $callback $chan] +} + +# Close an input channel +# +proc wappInt-close-channel {chan} { + if {$chan=="stdout"} { + # This happens after completing a CGI request + exit 0 + } else { + unset ::wappInt-$chan + close $chan + } +} + +# Process new text received on an inbound HTTP request +# +proc wappInt-http-readable {chan} { + if {[catch [list wappInt-http-readable-unsafe $chan] msg]} { + puts stderr "$msg\n$::errorInfo" + wappInt-close-channel $chan + } +} +proc wappInt-http-readable-unsafe {chan} { + upvar #0 wappInt-$chan W wapp wapp + if {![dict exists $W .toread]} { + # If the .toread key is not set, that means we are still reading + # the header + set line [string trimright [gets $chan]] + set n [string length $line] + if {$n>0} { + if {[dict get $W .header]=="" || [regexp {^\s+} $line]} { + dict append W .header $line + } else { + dict append W .header \n$line + } + if {[string length [dict get $W .header]]>100000} { + error "HTTP request header too big - possible DOS attack" + } + } elseif {$n==0} { + # We have reached the blank line that terminates the header. + global argv0 + set a0 [file normalize $argv0] + dict set W SCRIPT_FILENAME $a0 + dict set W DOCUMENT_ROOT [file dir $a0] + if {[wappInt-parse-header $chan]} { + catch {close $chan} + return + } + set len 0 + if {[dict exists $W CONTENT_LENGTH]} { + set len [dict get $W CONTENT_LENGTH] + } + if {$len>0} { + # Still need to read the query content + dict set W .toread $len + } else { + # There is no query content, so handle the request immediately + set wapp $W + wappInt-handle-request $chan 0 + } + } + } else { + # If .toread is set, that means we are reading the query content. + # Continue reading until .toread reaches zero. + set got [read $chan [dict get $W .toread]] + dict append W CONTENT $got + dict set W .toread [expr {[dict get $W .toread]-[string length $got]}] + if {[dict get $W .toread]<=0} { + # Handle the request as soon as all the query content is received + set wapp $W + wappInt-handle-request $chan 0 + } + } +} + +# Decode the HTTP request header. +# +# This routine is always running inside of a [catch], so if +# any problems arise, simply raise an error. +# +proc wappInt-parse-header {chan} { + upvar #0 wappInt-$chan W + set hdr [split [dict get $W .header] \n] + if {$hdr==""} {return 1} + set req [lindex $hdr 0] + dict set W REQUEST_METHOD [set method [lindex $req 0]] + if {[lsearch {GET HEAD POST} $method]<0} { + error "unsupported request method: \"[dict get $W REQUEST_METHOD]\"" + } + set uri [lindex $req 1] + set split_uri [split $uri ?] + set uri0 [lindex $split_uri 0] + if {![regexp {^/[-.a-z0-9_/]*$} $uri0]} { + error "invalid request uri: \"$uri0\"" + } + dict set W REQUEST_URI $uri0 + dict set W PATH_INFO $uri0 + set uri1 [lindex $split_uri 1] + dict set W QUERY_STRING $uri1 + set n [llength $hdr] + for {set i 1} {$i<$n} {incr i} { + set x [lindex $hdr $i] + if {![regexp {^(.+): +(.*)$} $x all name value]} { + error "invalid header line: \"$x\"" + } + set name [string toupper $name] + switch -- $name { + REFERER {set name HTTP_REFERER} + USER-AGENT {set name HTTP_USER_AGENT} + CONTENT-LENGTH {set name CONTENT_LENGTH} + CONTENT-TYPE {set name CONTENT_TYPE} + HOST {set name HTTP_HOST} + COOKIE {set name HTTP_COOKIE} + ACCEPT-ENCODING {set name HTTP_ACCEPT_ENCODING} + default {set name .hdr:$name} + } + dict set W $name $value + } + return 0 +} + +# Decode the QUERY_STRING parameters from a GET request or the +# application/x-www-form-urlencoded CONTENT from a POST request. +# +# This routine sets the ".qp" element of the ::wapp dict as a signal +# that query parameters have already been decoded. +# +proc wappInt-decode-query-params {} { + global wapp + dict set wapp .qp 1 + if {[dict exists $wapp QUERY_STRING]} { + foreach qterm [split [dict get $wapp QUERY_STRING] &] { + set qsplit [split $qterm =] + set nm [lindex $qsplit 0] + if {[regexp {^[a-z][a-z0-9]*$} $nm]} { + dict set wapp $nm [wappInt-decode-url [lindex $qsplit 1]] + } + } + } + if {[dict exists $wapp CONTENT_TYPE] && [dict exists $wapp CONTENT]} { + set ctype [dict get $wapp CONTENT_TYPE] + if {$ctype=="application/x-www-form-urlencoded"} { + foreach qterm [split [string trim [dict get $wapp CONTENT]] &] { + set qsplit [split $qterm =] + set nm [lindex $qsplit 0] + if {[regexp {^[a-z][-a-z0-9_]*$} $nm]} { + dict set wapp $nm [wappInt-decode-url [lindex $qsplit 1]] + } + } + } elseif {[string match multipart/form-data* $ctype]} { + regexp {^(.*?)\r\n(.*)$} [dict get $wapp CONTENT] all divider body + set ndiv [string length $divider] + while {[string length $body]} { + set idx [string first $divider $body] + set unit [string range $body 0 [expr {$idx-3}]] + set body [string range $body [expr {$idx+$ndiv+2}] end] + if {[regexp {^Content-Disposition: form-data; (.*?)\r\n\r\n(.*)$} \ + $unit unit hdr content] && + [regexp {name="(.*)"; filename="(.*)"\r\nContent-Type: (.*?)$}\ + $hdr hr name filename mimetype]} { + dict set wapp $name.filename \ + [string map [list \\\" \" \\\\ \\] $filename] + dict set wapp $name.mimetype $mimetype + dict set wapp $name.content $content + } + } + } + } +} + +# Invoke application-supplied methods to generate a reply to +# a single HTTP request. +# +# This routine always runs within [catch], so handle exceptions by +# invoking [error]. +# +proc wappInt-handle-request {chan useCgi} { + global wapp + dict set wapp .reply {} + dict set wapp .mimetype {text/html; charset=utf-8} + dict set wapp .reply-code {200 Ok} + dict set wapp .csp {default-src 'self'} + + # Set up additional CGI environment values + # + if {![dict exists $wapp HTTP_HOST]} { + dict set wapp BASE_URL {} + } elseif {[dict exists $wapp HTTPS]} { + dict set wapp BASE_URL https://[dict get $wapp HTTP_HOST] + } else { + dict set wapp BASE_URL http://[dict get $wapp HTTP_HOST] + } + if {![dict exists $wapp REQUEST_URI]} { + dict set wapp REQUEST_URI / + } elseif {[regsub {\?.*} [dict get $wapp REQUEST_URI] {} newR]} { + # Some servers (ex: nginx) append the query parameters to REQUEST_URI. + # These need to be stripped off + dict set wapp REQUEST_URI $newR + } + if {[dict exists $wapp SCRIPT_NAME]} { + dict append wapp BASE_URL [dict get $wapp SCRIPT_NAME] + } else { + dict set wapp SCRIPT_NAME {} + } + if {![dict exists $wapp PATH_INFO]} { + # If PATH_INFO is missing (ex: nginx) the construct it + set URI [dict get $wapp REQUEST_URI] + set skip [string length [dict get $wapp SCRIPT_NAME]] + dict set wapp PATH_INFO [string range $URI $skip end] + } + if {[regexp {^/([^/]+)(.*)$} [dict get $wapp PATH_INFO] all head tail]} { + dict set wapp PATH_HEAD $head + dict set wapp PATH_TAIL [string trimleft $tail /] + } else { + dict set wapp PATH_INFO {} + dict set wapp PATH_HEAD {} + dict set wapp PATH_TAIL {} + } + dict set wapp SELF_URL [dict get $wapp BASE_URL]/[dict get $wapp PATH_HEAD] + + # Parse query parameters from the query string, the cookies, and + # POST data + # + if {[dict exists $wapp HTTP_COOKIE]} { + foreach qterm [split [dict get $wapp HTTP_COOKIE] {;}] { + set qsplit [split [string trim $qterm] =] + set nm [lindex $qsplit 0] + if {[regexp {^[a-z][-a-z0-9_]*$} $nm]} { + dict set wapp $nm [wappInt-decode-url [lindex $qsplit 1]] + } + } + } + set same_origin 0 + if {[dict exists $wapp HTTP_REFERER]} { + set referer [dict get $wapp HTTP_REFERER] + set base [dict get $wapp BASE_URL] + if {$referer==$base || [string match $base/* $referer]} { + set same_origin 1 + } + } + dict set wapp SAME_ORIGIN $same_origin + if {$same_origin} { + wappInt-decode-query-params + } + + # Invoke the application-defined handler procedure for this page + # request. If an error occurs while running that procedure, generate + # an HTTP reply that contains the error message. + # + wapp-before-dispatch-hook + wappInt-trace + set mname [dict get $wapp PATH_HEAD] + if {[catch { + if {$mname!="" && [llength [info proc wapp-page-$mname]]>0} { + wapp-page-$mname + } else { + wapp-default + } + } msg]} { + if {[wapp-param WAPP_MODE]=="local" || [wapp-param WAPP_MODE]=="server"} { + puts "ERROR: $::errorInfo" + } + wapp-reset + wapp-reply-code "500 Internal Server Error" + wapp-mimetype text/html + wapp-trim { +

    Wapp Application Error

    +
    %html($::errorInfo)
    + } + dict unset wapp .new-cookies + } + + # Transmit the HTTP reply + # + if {$chan=="stdout"} { + puts $chan "Status: [dict get $wapp .reply-code]\r" + } else { + puts $chan "HTTP/1.1 [dict get $wapp .reply-code]\r" + puts $chan "Server: wapp\r" + puts $chan "Connection: close\r" + } + if {[dict exists $wapp .reply-extra]} { + foreach {name value} [dict get $wapp .reply-extra] { + puts $chan "$name: $value\r" + } + } + if {[dict exists $wapp .csp]} { + puts $chan "Content-Security-Policy: [dict get $wapp .csp]\r" + } + set mimetype [dict get $wapp .mimetype] + puts $chan "Content-Type: $mimetype\r" + if {[dict exists $wapp .new-cookies]} { + foreach {nm val} [dict get $wapp .new-cookies] { + if {[regexp {^[a-z][-a-z0-9_]*$} $nm]} { + if {$val==""} { + puts $chan "Set-Cookie: $nm=; HttpOnly; Path=/; Max-Age=1\r" + } else { + set val [wappInt-enc-url $val] + puts $chan "Set-Cookie: $nm=$val; HttpOnly; Path=/\r" + } + } + } + } + if {[string match text/* $mimetype]} { + set reply [encoding convertto utf-8 [dict get $wapp .reply]] + if {[regexp {\ygzip\y} [wapp-param HTTP_ACCEPT_ENCODING]]} { + catch { + set x [zlib gzip $reply] + set reply $x + puts $chan "Content-Encoding: gzip\r" + } + } + } else { + set reply [dict get $wapp .reply] + } + puts $chan "Content-Length: [string length $reply]\r" + puts $chan \r + puts $chan $reply + flush $chan + wappInt-close-channel $chan +} + +# This routine runs just prior to request-handler dispatch. The +# default implementation is a no-op, but applications can override +# to do additional transformations or checks. +# +proc wapp-before-dispatch-hook {} {return} + +# Process a single CGI request +# +proc wappInt-handle-cgi-request {} { + global wapp env + foreach key { + CONTENT_LENGTH + CONTENT_TYPE + DOCUMENT_ROOT + HTTP_ACCEPT_ENCODING + HTTP_COOKIE + HTTP_HOST + HTTP_REFERER + HTTP_USER_AGENT + HTTPS + PATH_INFO + QUERY_STRING + REMOTE_ADDR + REQUEST_METHOD + REQUEST_URI + REMOTE_USER + SCRIPT_FILENAME + SCRIPT_NAME + SERVER_NAME + SERVER_PORT + SERVER_PROTOCOL + } { + if {[info exists env($key)]} { + dict set wapp $key $env($key) + } + } + set len 0 + if {[dict exists $wapp CONTENT_LENGTH]} { + set len [dict get $wapp CONTENT_LENGTH] + } + if {$len>0} { + fconfigure stdin -translation binary + dict set wapp CONTENT [read stdin $len] + } + dict set wapp WAPP_MODE cgi + fconfigure stdout -translation binary + wappInt-handle-request stdout 1 +} + +# Process new text received on an inbound SCGI request +# +proc wappInt-scgi-readable {chan} { + if {[catch [list wappInt-scgi-readable-unsafe $chan] msg]} { + puts stderr "$msg\n$::errorInfo" + wappInt-close-channel $chan + } +} +proc wappInt-scgi-readable-unsafe {chan} { + upvar #0 wappInt-$chan W wapp wapp + if {![dict exists $W .toread]} { + # If the .toread key is not set, that means we are still reading + # the header. + # + # An SGI header is short. This implementation assumes the entire + # header is available all at once. + # + set req [read $chan 15] + set n [string length $req] + scan $req %d:%s len hdr + incr len [string length "$len:,"] + append hdr [read $chan [expr {$len-15}]] + foreach {nm val} [split $hdr \000] { + if {$nm==","} break + dict set W $nm $val + } + set len 0 + if {[dict exists $W CONTENT_LENGTH]} { + set len [dict get $W CONTENT_LENGTH] + } + if {$len>0} { + # Still need to read the query content + dict set W .toread $len + } else { + # There is no query content, so handle the request immediately + set wapp $W + wappInt-handle-request $chan 0 + } + } else { + # If .toread is set, that means we are reading the query content. + # Continue reading until .toread reaches zero. + set got [read $chan [dict get $W .toread]] + dict append W CONTENT $got + dict set W .toread [expr {[dict get $W .toread]-[string length $got]}] + if {[dict get $W .toread]<=0} { + # Handle the request as soon as all the query content is received + set wapp $W + wappInt-handle-request $chan 0 + } + } +} + +# Start up the wapp framework. Parameters are a list passed as the +# single argument. +# +# -server $PORT Listen for HTTP requests on this TCP port $PORT +# +# -local $PORT Listen for HTTP requests on 127.0.0.1:$PORT +# +# -scgi $PORT Listen for SCGI requests on TCP port $PORT +# +# -cgi Handle a single CGI request +# +# With no arguments, the behavior is called "auto". In "auto" mode, +# if the GATEWAY_INTERFACE environment variable indicates CGI, then run +# as CGI. Otherwise, start an HTTP server bound to the loopback address +# only, on an arbitrary TCP port, and automatically launch a web browser +# on that TCP port. +# +# Additional options: +# +# -trace "puts" each request URL as it is handled, for +# debugging +# +# -lint Run wapp-safety-check on the application instead +# of running the application itself +# +# -Dvar=value Set TCL global variable "var" to "value" +# +# +proc wapp-start {arglist} { + global env + set mode auto + set port 0 + set n [llength $arglist] + for {set i 0} {$i<$n} {incr i} { + set term [lindex $arglist $i] + if {[string match --* $term]} {set term [string range $term 1 end]} + switch -glob -- $term { + -server { + incr i; + set mode "server" + set port [lindex $arglist $i] + } + -local { + incr i; + set mode "local" + set port [lindex $arglist $i] + } + -scgi { + incr i; + set mode "scgi" + set port [lindex $arglist $i] + } + -cgi { + set mode "cgi" + } + -trace { + proc wappInt-trace {} { + set q [wapp-param QUERY_STRING] + set uri [wapp-param BASE_URL][wapp-param PATH_INFO] + if {$q!=""} {append uri ?$q} + puts $uri + } + } + -lint { + set res [wapp-safety-check] + if {$res!=""} { + puts "Potential problems in this code:" + puts $res + exit 1 + } else { + exit + } + } + -D*=* { + if {[regexp {^.D([^=]+)=(.*)$} $term all var val]} { + set ::$var $val + } + } + default { + error "unknown option: $term" + } + } + } + if {($mode=="auto" + && [info exists env(GATEWAY_INTERFACE)] + && [string match CGI/1.* $env(GATEWAY_INTERFACE)]) + || $mode=="cgi" + } { + wappInt-handle-cgi-request + return + } + if {$mode=="scgi"} { + wappInt-start-listener $port scgi + } elseif {$mode=="server"} { + wappInt-start-listener $port server + } else { + wappInt-start-listener $port local + } + vwait ::forever +} + +# Call this version 1.0 +package provide wapp 1.0