Documentation Source Text

Artifact Content
Login

Artifact c2ac8f495a20b28f304252d8e86a9ee6e924ee5c:


#!/usr/bin/tclsqlite3.fts3

#=========================================================================
# This proc is called to parse the arguments passed to this invocation of
# the CGI program (via either the GET or POST method). It returns a
# key/value list containing the arguments suitable for passing to [array
# set]. For example, if the CGI is invoked via a GET request on the URI:
#
#   http://www.sqlite.org/search?query=fts3+table&results=10
#
# then the returned list value is: 
#
#   {query {fts3 table} results 10}
#
proc cgi_parse_args {} {
  global env

  set charmap {
    + { } %20 { } %21 ! %22 \x22 %23 # %24 {$} %25 % %26 & %27 ' %28 ( 
    %29 ) %2A * %2B + %2C , %2D - %2E . %2F / %30 0 %31 1 %32 2 %33 3 
    %34 4 %35 5 %36 6 %37 7 %38 8 %39 9 %3A : %3B {;} %3C < %3D = %3E > 
    %3F ? %40 @ %41 A %42 B %43 C %44 D %45 E %46 F %47 G %48 H %49 I 
    %4A J %4B K %4C L %4D M %4E N %4F O %50 P %51 Q %52 R %53 S %54 T 
    %55 U %56 V %57 W %58 X %59 Y %5A Z %5B {[} %5C \\ %5D \] %5E ^ 
    %5F _ %60 ` %61 a %62 b %63 c %64 d %65 e %66 f %67 g %68 h %69 i 
    %6A j %6B k %6C l %6D m %6E n %6F o %70 p %71 q %72 r %73 s %74 t 
    %75 u %76 v %77 w %78 x %79 y %7A z %7B \{ %7C | %7D \} %7E ~
  }

  if {$env(REQUEST_METHOD) == "GET"} {
    foreach q [split $::env(QUERY_STRING) &] {
      if {0==[regexp {([a-z]*)=(.*)} $q -> var value]} {
        error "Failed to parse: $q"
      }

      set A($var) [string map $charmap $value]
    }
  } elseif {$env(REQUEST_METHOD) == "POST"} {
    # TODO!
  } else {
    error "Unrecognized method: $env(REQUEST_METHOD)"
  }

  array get A
}


#=========================================================================
# Redirect the web-browser to URL $url. This command does not return.
#
proc cgi_redirect {url} {
  set server $::env(SERVER_NAME)
  set path [file dirname $::env(REQUEST_URI)]
  if {[string range $path end end]!="/"} {
    append path /
  }

  puts "Status: 302 Redirect"
  puts "Location: http://${server}${path}${url}"
  puts "Content-Length: 0"
  puts ""
  exit
}

#=========================================================================
# The argument contains a key value list. The values in the list are
# transformed to an HTTP query key value list. For example:
#
#   % cgi_encode_args {s "search string" t "search \"type\""}
#   s=search+string&t=search+%22type%22
#
proc cgi_encode_args {list} {
  set reslist [list]
  foreach {key value} $list {
    set value [string map {
      \x20 +   \x21 %21 \x2A %2A \x22 %22 \x27 %27 \x28 %28 \x29 %29 \x3B %3B 
      \x3A %3A \x40 %40 \x26 %26 \x3D %3D \x2B %2B \x24 %24 \x2C %2C \x2F %2F 
      \x3F %3F \x25 %25 \x23 %23 \x5B %5B \x5D %5D
    } $value]

    lappend reslist "$key=$value"
  }
  join $reslist &
}

proc htmlize {str} { string map {< &lt; > &gt;} $str }
proc attrize {str} { string map {< &lt; > &gt; \x22 &quot;} $str }

#=========================================================================

proc cgi_env_dump {} {

  set ret "<h1>Arguments</h1><table>"
  foreach {key value} [array get ::A] {
    append ret "<tr><td>[htmlize $key]<td>[htmlize $value]"
  }
  append ret "</table>"

  append ret "<h1>Environment</h1><table>"
  foreach {key value} [array get ::env] {
    append ret "<tr><td>[htmlize $key]<td>[htmlize $value]"
  }
  append ret "</table>"
  return $ret
}

proc searchform {} {
  set initial "Enter search term:"
  catch { set initial $::A(q) }
  return [subst {
    <table style="margin: 1em auto"> <tr><td>Search SQLite docs for:<td>
      <form name=f method=GET action=search>
        <input name=q type=text width=35 value="[attrize $initial]"></input>
        <input name=s type=submit value="Search"></input>
        <input name=s type=submit value="Lucky"></input>
      </form>
    </table>
    <script> 
      document.forms.f.q.focus()
      document.forms.f.q.select()
    </script>
  }]
}

proc rebuildform {} {
  return {
    <hr>
    <table align=right>
    <td><form name=r method=GET action=search>
      <input name=rebuild type=submit value="Rebuild Db"></input>
    </form>
    </table>
  }
}


#-------------------------------------------------------------------------
# This command is similar to the builtin Tcl [time] command, except that
# it only ever runs the supplied script once. Also, instead of returning
# a string like "xxx microseconds per iteration", it returns "x.yy ms" or
# "x.yy s", depending on the magnitude of the time spent running the 
# command. For example:
#
#   % ttime {after 1500}
#   1.50 s
#   % ttime {after 45}
#   45.02 ms
#
proc ttime {script} {
  set t [lindex [time [list uplevel $script]] 0]
  if {$t>1000000} { return [format "%.2f s" [expr {$t/1000000.0}]] }
  return [format "%.2f ms" [expr {$t/1000.0}]]
}


proc searchresults {} {
  if {![info exists ::A(q)]} return ""

  set ::TITLE "Results for: \"[htmlize $::A(q)]\""

  # If the user has clicked the "Lucky" button and the query returns one or
  # more results, redirect the browser to the highest ranked result. If the
  # query returns zero results, fall through and display the "No results"
  # page as if the user had clicked "Search".
  #
  if {[info exists ::A(s)] && $::A(s) == "Lucky"} {
    set url [db one {
      SELECT url FROM page, pagedata 
      WHERE page MATCH $::A(q) AND page.docid = pagedata.docid
      ORDER BY rank(matchinfo(page), nk, nt, nc) DESC
    }]
    if {$url != ""} { cgi_redirect $url }
  }

  # Set nRes to the total number of documents that the users query matches.
  # If nRes is 0, then the users query returned zero results. Return a short 
  # message to that effect.
  #
  set nRes [db one { SELECT count(*) FROM page WHERE page MATCH $::A(q) }]
  if {$nRes == 0} {
    return [subst { No results for: <b>[htmlize $::A(q)]</b> }]
  }

  # Set iStart to the index of the first result to display. Results are
  # indexed starting at zero from most to least relevant.
  #
  set iStart [expr {([info exists ::A(i)] ? $::A(i) : 0)*10}]

  # HTML markup used to highlight keywords within FTS3 generated snippets.
  #
  set open {<span style="font-weight:bold; color:navy">}
  set close {</span>}
  set ellipsis {<b>&nbsp;...&nbsp;</b>}

  set ret [subst {
    <table border=0>
    <p>Search results 
       [expr $iStart+1]..[expr {($nRes < $iStart+10) ? $nRes : $iStart+10}] 
       of $nRes for: <b>[htmlize $::A(q)]</b>
  }]
  db eval {
    SELECT 
      result.rowid+$iStart AS resnum,
      COALESCE(title, 'No Title.') AS title,
      snippet(page, $open, $close, $ellipsis, 0, 10) AS keywords,
      snippet(page, $open, $close, $ellipsis, 2, 40) AS snippet,
      url,
      erank(matchinfo(page), nk, nt, nc) AS report
    FROM page, (
        SELECT 
          page.docid AS docid, url, nk, nt, nc
        FROM page, pagedata 
          WHERE page MATCH $::A(q) AND page.docid = pagedata.docid
          ORDER BY rank(matchinfo(page), nk, nt, nc)
          DESC LIMIT 10 OFFSET $iStart
      ) AS result
    WHERE page MATCH $::A(q) AND page.docid = result.docid
    ORDER BY resnum;
  } {
    append ret [subst -nocommands {<tr>
      <td valign=top>${resnum}.</td>
      <td valign=top>
        <div style="white-space:nowrap">
          <a href="$url">$title</a>
          <span style="font-size:small:margin-left:2ex">$keywords</span>
        </div>
        <div style="font-size:small;margin-left: 2ex">
          <div style="width:80ex"> $snippet </div>
          <div style="margin-bottom:1em"><a href="$url">$url</a></div>
        </div>
      </td>

      <td width=100%>
      <td valign=top style="color:grey;white-space:nowrap"> $report </td>
    }]
  }
  append ret { </table> }


  # If the query returned more than 10 results, add up to 10 links to 
  # each set of 10 results (first link to results 1-10, second to 11-20, 
  # third to 21-30, as required).
  #
  if {$nRes>10} {
    set s(0) {border: solid #80a796 1px ; padding: 1ex ; margin: 1ex}
    set s(1) "$s(0);background:#80a796;color:white"
    append ret <center><p>
    for {set i 0} {$i < 10 && ($i*10)<$nRes} {incr i} {
      append ret [subst {
        <a style="$s([expr {($iStart/10)==$i}])" 
           href="search?[cgi_encode_args [list q $::A(q) i $i]]">[expr $i+1]</a>
      }]
    }
    append ret </center>
  }

  return $ret
}

# Return a list of relative paths to documents that should be included 
# in the index.
#
proc document_list {} {
  set files [list]
  foreach f [glob *.html c3ref/*.html releaselog/*.html] {
    if {![string match *crossref* $f]} { lappend files $f }
  }
  return $files
}

proc readfile {zFile} {
  set fd [open $zFile]
  set ret [read $fd]
  close $fd
  return $ret
}

proc keywordparse_callback {tag details} {
  global K P
  switch -- [string tolower $tag] {
    "" {
      if {[info exists K(hyperlink)]} {
        append K($K(hyperlink)) $details
      }
    }
    "a" {
      array set D $details
      if {[info exists D(href)]} { set K(hyperlink) $D(href) }
    }
    "/a" {
      unset -nocomplain P(hyperlink)
    }
  }
}

#-------------------------------------------------------------------------
# This function is used as the callback when parsing ordinary documents 
# (not the keywords document).
#
# Rules for extracting fragment "titles". A fragment title consists of
# all text that follows the tag that opens the fragment until either:
#
#   1. 80 characters have been parsed, or
#   2. 8 characters have been parsed and one of the following is 
#        encountered:
#      a) A block element opening or closing tag, or
#      b) A <br> element, or
#      c) A "." character.
#
#   3. 8 characters have been parsed and a <br> tag or "." character is
#      encountered
#
proc docparse_callback {tag details} {
  global P
  set tag [string tolower $tag]
  switch -glob -- $tag {
    "" {
      append P(text) " $details"
      if {$P(isTitle)} { append P(title) $details }
      if {[llength $P(fragments)]} { 
        append P(ftext) " $details" 
      }
    }

    "/table" {
      if {[info exists P(skipheader)]} {
        set P(text) ""
        unset P(skipheader)
      }
    }

    "title"  { set P(isTitle) 1 }
    "/title" { set P(isTitle) 0 }

    "a" { 
      array set D $details
      if {[info exists D(name)]} {
        if {[llength $P(fragments)]} { 
          lappend P(fragments) $P(ftitle) $P(ftext) 
        }
        lappend P(fragments) $D(name)
        set P(ftext) ""
        set P(ftitle) ""
        catch { unset P(ftitleclose) }
      }
    }
    "h*" {
      array set D $details
      if {[info exists D(id)]} {
        if {[llength $P(fragments)]} { 
          lappend P(fragments) $P(ftitle) $P(ftext) 
        }
        lappend P(fragments) $D(id)
        set P(ftext) ""
        set P(ftitle) ""
      }
    }
  }

  set ftext [string trim $P(ftext) " \v\n"]
  if {[string length $ftext]>4 && $P(ftitle) == ""} {
    set blocktags [list                               \
      br td /td th /th p /p                           \
      h1 h2 h3 h4 h5 h /h1 /h2 /h3 /h4 /h5 /h
    ]
    if {[lsearch $blocktags $tag]>=0} {
      set P(ftitle) $ftext
      set P(ftext)  ""
      puts "setting title to \"$P(ftitle)\""
    } elseif {[string length $ftext]>80} {
      set idx [string last " " [string range $ftext 0 79]]
      if {$idx<0} { set idx 80 }
      set P(ftitle) [string range $ftext 0 [expr $idx-1]]
      set P(ftext)  [string range $ftext $idx end]
      puts "setting title to \"$P(ftitle)\""
    } 
  }
}

proc insert_entry {url keywords title content} {
  set nTitle    [tokencount $::tokenizer $title]
  set nKeywords [tokencount $::tokenizer $keywords]
  set nContent  [tokencount $::tokenizer $content]
  db eval {
    INSERT INTO page VALUES($keywords, $title, $content);
    INSERT INTO pagedata VALUES(NULL, $nKeywords, $nTitle, $nContent, $url);
  }
}

proc rebuild_database {} {
  set report [list]

  db transaction {
    # Create the database schema. If the schema already exists, then those
    # tables that contain document data are dropped and recreated by this
    # proc. The 'config' table is left untouched.
    #
    db eval {
      CREATE TABLE IF NOT EXISTS config(item TEXT, value TEXT);

      DROP TABLE IF EXISTS page;
      DROP TABLE IF EXISTS pagedata;
      CREATE VIRTUAL TABLE page USING fts3(
        keywords,                           -- Document keywords
        title,                              -- Title (or first heading)
        content,                            -- Complete document text
        tokenize porter                     -- Built-in porter tokenizer
      );
      CREATE TABLE pagedata(
        docid INTEGER PRIMARY KEY,          -- Maps to docid of page
        nk INTEGER, nt INTEGER, nc INTEGER, -- Tokens in each ft field
        url TEXT                            -- Relative path to document
      );
    }

    set zHtml [readfile keyword_index.html]
    parsehtml $zHtml keywordparse_callback

    # Scan the file-system for HTML documents. Add each document found to
    # the page and pagedata tables.
    foreach file [document_list] {
      set zHtml [readfile $file]

      array unset ::P
      set ::P(text) ""                 ;# The full document text
      set ::P(isTitle) 0               ;# True while parsing contents of <title>
      set ::P(fragments) [list]        ;# List of document fragments parsed
      set ::P(ftext) ""                ;# Text of current document fragment 

      # If the document appears to contain something that looks like the 
      # header on all website pages, set the P(skipheader) variable. This
      # tells the document parser to ignore all text (except for the document
      # title) up to the first </table> tag in the HTML.
      set tagline {<div class="tagline">Small. Fast. Reliable.}
      if {[string first $tagline $zHtml]>0} {
        set ::P(skipheader) 1
      }

      parsehtml $zHtml docparse_callback
      if {[info exists ::P(ftitle)]} {
        lappend ::P(fragments) $::P(ftitle) $::P(ftext)
      }

      set len [string length $::P(text)]
      set keyword ""
      catch { set keyword $::K($file) }
      if {![info exists ::P(title)]} {set ::P(title) "No Title"}
      insert_entry $file $keyword $::P(title) $::P(text)

      foreach {name title text} $::P(fragments) {
        set url "$file#$name"
        set keyword ""
        catch { set keyword [string trim $::K($url) " \n\v"] }
        insert_entry $url $keyword $title $text
      }
    }

    db eval { INSERT INTO page(page) VALUES('optimize') }
  }
  db eval VACUUM

  set report
}

proc main {} {
  sqlite3 db search.db

  array set ::A [cgi_parse_args]

  set ::tokenizer [db one {SELECT fts3_tokenizer('porter')}]

  # If ::A(rebuild) is set and this is a POST request, it is a command to
  # rebuild the full-text database.
  if {[info exists ::A(rebuild)] && (1 || $::env(REQUEST_METHOD) == "POST")} {
    set t [ttime {set report [rebuild_database]}]
    set msg "<p><b>Database rebuilt. ($t)</b>"
    return "[searchform] $msg $report [rebuildform]"
  }

  # If ::A(q) is set, this is a query. Search for documents that MATCH $::A(q).
  db transaction {
    set t [ttime { set doc "[searchform] [searchresults] [rebuildform]" }]
  }
  append doc "<p>Page generated in $t"
  return $doc

  return [cgi_env_dump]
}

#=========================================================================

set ::HEADER {
  <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
  "http://www.w3.org/TR/html4/strict.dtd">
  <html><head>
  <title>$TITLE</title>
  <style type="text/css">
  body {
    margin: auto;
    font-family: Verdana, sans-serif;
    padding: 8px 1%;
  }

  a { color: #45735f }
  a:visited { color: #734559 }

  .logo { position:absolute; margin:3px; }
  .tagline {
    float:right;
    text-align:right;
    font-style:italic;
    width:300px;
    margin:12px;
    margin-top:58px;
  }

  .toolbar {
    font-variant: small-caps;
    text-align: center;
    line-height: 1.6em;
    margin: 0;
    padding:1px 8px;
  }
  .toolbar a { color: white;
  text-decoration: none; padding: 6px
  12px; }
  .toolbar a:visited { color: white; }
  .toolbar a:hover { color: #80a796;
  background: white; }

  .content    { margin: 5%; }
  .content dt { font-weight:bold; }
  .content dd { margin-bottom: 25px; margin-left:20%; }
  .content ul { padding:0px; padding-left: 15px; margin:0px; }

  /* rounded corners */
  .se  { background: url(images/se.png) 100% 100% no-repeat #80a796 }
  .sw  { background: url(images/sw.png) 0% 100% no-repeat }
  .ne  { background: url(images/ne.png) 100% 0% no-repeat }
  .nw  { background: url(images/nw.png) 0% 0% no-repeat }
  </style>
  <meta http-equiv="content-type" content="text/html; charset=UTF-8">
    
  </head>
  <body>
  <div><!-- container div to satisfy validator -->

  <a href="index.html">
  <img class="logo" src="images/SQLite.gif" alt="SQLite Logo" border="0"></a>
    <div><!-- IE hack to prevent disappearing logo--></div>
    <div class="tagline">Small. Fast. Reliable.<br>Choose any three.</div>

    <table width=100% style="clear:both"><tr><td>
      <div class="se"><div class="sw"><div class="ne"><div class="nw">
        <div class="toolbar">
        <a href="about.html">About</a>
        <a href="sitemap.html">Sitemap</a>
        <a href="docs.html">Documentation</a>
        <a href="download.html">Download</a>

        <a href="copyright.html">License</a>
        <a href="news.html">News</a>
        <a href="support.html">Support</a>
        <a href="http://www.sqlite.org/search">Search</a>
        </div></div></div></div></div>
    </td></tr></table>
}

if {![info exists env(REQUEST_METHOD)]} {
  set env(REQUEST_METHOD) GET
  set env(QUERY_STRING) rebuild=1
  set ::HEADER ""

  #set env(QUERY_STRING) {q="one+two+three+four"+eleven}
  set env(QUERY_STRING) {q=windows}
  set ::HEADER ""
}


set TITLE "Search SQLite Documentation"

if {0==[catch main res]} {
  set document [subst -nocommands $::HEADER]
  append document $res
} else {
  set document "<pre>"
  append document "Error: $res\n\n"
  append document $::errorInfo
  append document "</pre>"
}

puts "Content-type: text/html" 
puts "Content-Length: [string length $document]"
puts ""
puts $document
puts ""
flush stdout
close stdout

exit