Documentation Source Text

Check-in [4dd2793f98]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Tweak the search engines ranking algorithm slightly. Add a logging database and a way for admins to view the most recent queries.
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 4dd2793f9806ce82e559b1af9614fd241f0636cb
User & Date: dan 2016-08-30 17:05:33
Context
2016-08-31
06:35
Store the search database, search-log database and search "admin" script in a separate directory. check-in: 28aa410b46 user: dan tags: trunk
2016-08-30
17:05
Tweak the search engines ranking algorithm slightly. Add a logging database and a way for admins to view the most recent queries. check-in: 4dd2793f98 user: dan tags: trunk
2016-08-29
20:44
Fix stray characters that managed to get into a commit of the header generator. check-in: 3cb5fe9998 user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to document_header.tcl.

215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
        }
        return false;
      }
    </script>
    <td>
        <div style="padding:0 1em 0px 0;white-space:nowrap">
        <form name=f method="GET" action="${path}search">
          <input id=q name=q type=text
           onfocus="entersearch()" onblur="leavesearch()" style="width:24ex;padding:1px 1ex; border:solid white 1px; font-size:0.9em ; $initstyle;" value="$initval">
          <input type=submit value="Go" style="border:solid white 1px;background-color:#044a64;color:white;font-size:0.9em;padding:0 1ex">
        </form>
        </div>
      </table>
    </div>    <!-- matches "div class=nosearch" -->
  }]

  return $ret
}







|










215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
        }
        return false;
      }
    </script>
    <td>
        <div style="padding:0 1em 0px 0;white-space:nowrap">
        <form name=f method="GET" action="${path}search">
          <input id=q name=q type=search
           onfocus="entersearch()" onblur="leavesearch()" style="width:24ex;padding:1px 1ex; border:solid white 1px; font-size:0.9em ; $initstyle;" value="$initval">
          <input type=submit value="Go" style="border:solid white 1px;background-color:#044a64;color:white;font-size:0.9em;padding:0 1ex">
        </form>
        </div>
      </table>
    </div>    <!-- matches "div class=nosearch" -->
  }]

  return $ret
}

Changes to search/buildsearchdb.tcl.

386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
        url UNINDEXED,                      -- Indexed URL
        tokenize='stoken unicode61 tokenchars _' -- Tokenizer definition
      );

      DROP TABLE IF EXISTS weight;
      CREATE TABLE weight(id INTEGER PRIMARY KEY, percent FLOAT);

      INSERT INTO page(page, rank) VALUES('rank', 'bm25(20.0,20.0,10.0,10.0)');
    }

    foreach doc [document_list lang] {
      puts "Indexing $doc..."
      lang_document_import $doc
    }








|







386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
        url UNINDEXED,                      -- Indexed URL
        tokenize='stoken unicode61 tokenchars _' -- Tokenizer definition
      );

      DROP TABLE IF EXISTS weight;
      CREATE TABLE weight(id INTEGER PRIMARY KEY, percent FLOAT);

      INSERT INTO page(page, rank) VALUES('rank', 'bm25(10.0,10.0,20.0,20.0)');
    }

    foreach doc [document_list lang] {
      puts "Indexing $doc..."
      lang_document_import $doc
    }

Changes to search/fts5ext.c.

44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
..
64
65
66
67
68
69
70

71
72
73
74

75

76
77
78
79

80
81
82
83
84
85





86
87

88
89
90
91

92
93
94
95
96
97
98
99
100
101
**     keywords,                  -- Keywords
**     title1,                    -- Document title
**     title2,                    -- Heading title, if any
**     content,                   -- Document text
**
** This function returns the following integer values:
**
**   10000 - all phrases present in (the combination of) "apis" or "keywords".
**    1000 - all phrases present in (the combination of) "apis", "keywords"
**           or either "title[12] column.
**
** It adds a bonus of 100 if either of the above and the condition 
** (xRowid()>1000 && (xRowid() % 1000)==1) is true.
**
*/
void srankFunc(
  const Fts5ExtensionApi *pApi,   /* API offered by current FTS version */
  Fts5Context *pFts,              /* First arg to pass to pApi functions */
  sqlite3_context *pCtx,          /* Context for returning result/error */
................................................................................
  sqlite3_value **apVal           /* Array of trailing arguments */
){
  int nPhrase;                    /* Number of phrases in query */
  int i;                          /* Used to iterate through phrases */
  int rc;                         /* Return code */
  int n1 = 0;
  int n2 = 0;

  int iScore = 0;                 /* Returned value */
  sqlite3_int64 iRowid;           /* Rowid for current row */

  iRowid = pApi->xRowid(pFts);

  if( iRowid<1000 ) return;

  nPhrase = pApi->xPhraseCount(pFts);
  for(i=0; i<nPhrase; i++){
    Fts5PhraseIter iter;
    int ic, io;

    rc = pApi->xPhraseFirst(pFts, i, &iter, &ic, &io);
    if( rc!=SQLITE_OK ){
      sqlite3_result_error(pCtx, "Error in xPhraseFirst", -1);
      return;
    }






    if( ic==0 || ic==1 ) n1++;
    if( ic==2 || ic==3 ) n2++;

  }

  if( n1==nPhrase ){ iScore = 10000; }
  else if( n1+n2==nPhrase ){ iScore = 1000; }


  if( iScore && iRowid>1000 && (iRowid % 1000)==1 ){
    iScore += 100;
  }

  sqlite3_result_int(pCtx, iScore);
}










|
|
|

|







 







>




>

>




>


|



>
>
>
>
>
|

>




>


|







44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
..
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
**     keywords,                  -- Keywords
**     title1,                    -- Document title
**     title2,                    -- Heading title, if any
**     content,                   -- Document text
**
** This function returns the following integer values:
**
**   10000 - all phrases present in "keywords".
**   1000 - all phrases present in "keywords", "title1" or "title2".
**   100 - all phrases present in "keywords", "title1" or "title2" or "apis".
**
** It adds a bonus of 10 if either of the above and the condition 
** (xRowid()>1000 && (xRowid() % 1000)==1) is true.
**
*/
void srankFunc(
  const Fts5ExtensionApi *pApi,   /* API offered by current FTS version */
  Fts5Context *pFts,              /* First arg to pass to pApi functions */
  sqlite3_context *pCtx,          /* Context for returning result/error */
................................................................................
  sqlite3_value **apVal           /* Array of trailing arguments */
){
  int nPhrase;                    /* Number of phrases in query */
  int i;                          /* Used to iterate through phrases */
  int rc;                         /* Return code */
  int n1 = 0;
  int n2 = 0;
  int n3 = 0;
  int iScore = 0;                 /* Returned value */
  sqlite3_int64 iRowid;           /* Rowid for current row */

  iRowid = pApi->xRowid(pFts);
#if 0
  if( iRowid<1000 ) return;
#endif
  nPhrase = pApi->xPhraseCount(pFts);
  for(i=0; i<nPhrase; i++){
    Fts5PhraseIter iter;
    int ic, io;

    rc = pApi->xPhraseFirst(pFts, i, &iter, &ic, &io);
    if( rc!=SQLITE_OK ){
      sqlite3_result_error(pCtx, "Error in xPhraseFirst/xPhraseNext", -1);
      return;
    }

    if( ic==0 ){
      while( ic==0 ) pApi->xPhraseNext(pFts, &iter, &ic, &io);
      if( ic<0 ) ic = 0;
    }

    if( ic==1 ) n1++;
    if( ic==2 || ic==3 ) n2++;
    if( ic==0 ) n3++;
  }

  if( n1==nPhrase ){ iScore = 10000; }
  else if( n1+n2==nPhrase ){ iScore = 1000; }
  else if( n1+n2+n3==nPhrase ){ iScore = 100; }

  if( iScore && iRowid>1000 && (iRowid % 1000)==1 ){
    iScore += 10;
  }

  sqlite3_result_int(pCtx, iScore);
}



Changes to search/search.tcl.

100
101
102
103
104
105
106















































































































107
108
109
110
111
112
113
...
164
165
166
167
168
169
170




171
172
173
174
175
176
177
...
262
263
264
265
266
267
268







269
270
271
272
273
274
275
    <table align=right>
    <td>
      <i>Powered by <a href="http://www.sqlite.org/fts5.html">FTS5</a>.</i>
    </table>
  }
}

















































































































#-------------------------------------------------------------------------
# This command is similar to the builtin Tcl [time] command, except that
# it only ever runs the supplied script once. Also, instead of returning
# a string like "xxx microseconds per iteration", it returns "x.yy ms" or
# "x.yy s", depending on the magnitude of the time spent running the 
# command. For example:
................................................................................
    } else {
      lappend lRes $rowid
    }
  }

  set nRes [llength $lRes]
  set lRes [lrange $lRes $iStart [expr $iStart+9]]





  # If there are no results, return a message to that effect.
  #
  if {[llength $lRes] == 0} {
    return [subst { No results for: <b>[htmlize $::A(q)]</b> }]
  }
  
................................................................................
  return $ret
}

proc main {} {
  global A
  sqlite3 db search.db
  cgi_parse_args








  db transaction {
    set t [ttime { 
      if {[catch searchresults srchout]} {
        set A(q) [string tolower $A(q)]
        set srchout [searchresults]
      }







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







>
>
>
>







 







>
>
>
>
>
>
>







100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
...
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
...
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
    <table align=right>
    <td>
      <i>Powered by <a href="http://www.sqlite.org/fts5.html">FTS5</a>.</i>
    </table>
  }
}

#-------------------------------------------------------------------------
# Add an entry to the log database for the current query. Which 
# returns $nRes results.
#
proc search_add_log_entry {nRes} {
  if {[info exists ::A(donotlog)]} return

  sqlite3 db2 searchlog.db
  db2 timeout 10000

  set ip $::env(REMOTE_ADDR)
  set query $::A(q)

  db2 eval {
    PRAGMA synchronous=OFF;
    BEGIN;
      CREATE TABLE IF NOT EXISTS log(
        ip,                  -- IP query was made from
        query,               -- Fts5 query string
        nres,                -- Number of results
        timestamp DEFAULT CURRENT_TIMESTAMP
      );

      INSERT INTO log(ip, query, nres) VALUES($ip, $query, $nRes);
    COMMIT;
  }

  db2 close
}

proc sqlize {text} {
  return "'[string map [list ' ''] $text]'"
}

proc admin_list {} {
  sqlite3 db2 searchlog.db

  set where ""
  set res ""

  set ipfilter ""
  if {[info exists ::A(ip)] && $::A(ip)!=""} {
    set where "WHERE ip = [sqlize $::A(ip)]"
    set ipfilter $::A(ip)
  }

  set checked ""
  if {[info exists ::A(unique)] && $::A(unique)} {
    set checked "checked"
  }

  set limit 10
  if {[info exists ::A(limit)]} {
    set limit $::A(limit)
  }
  set s10 ""
  set s100 ""
  set s1000 ""
  if {$limit==10} {set s10 selected}
  if {$limit==100} {set s100 selected}
  if {$limit==1000} {set s1000 selected}

  append res "
    <div style=\"margin:2em\">
    <center>
    <form action=search method=get>
      <input type=hidden name=admin>
      Results: <select name=limit onChange=\"this.form.submit()\">
        <option $s10 value=\"10\">10</option>
        <option $s100 value=\"100\">100</option>
        <option $s1000 value=\"1000\">1000</option>
      </select>
      IP: <input type=input name=ip value=\"[attrize $ipfilter]\"> 
      Unique: <input 
        type=checkbox name=unique value=1 
        $checked
        onChange=\"this.form.submit()\"
      >
      <input type=submit>
    </form action=search method=get>
    </center>
    </div>
  "

  set i 0
  append res "<table border=1 cellpadding=10 align=center>\n"
  append res "<tr><td><th>IP <th>Query <th> Results <th> Timestamp\n"
  db2 eval "
    SELECT rowid, ip, query, nres, timestamp FROM log $where
    ORDER BY rowid DESC
  " {

    if {[info exists ::A(unique)] && $::A(unique)} {
      if {[info exists seen($query)]} continue
      set seen($query) 1
    }

    set querylink "<a href=\"?q=[attrize $query]&donotlog=1\">$query</a>"
    set iplink "<a href=\"?admin=1&ip=$ip\">$ip</a>"

    append res "  <tr> <td> $rowid <td> $iplink <td> $querylink"
    append res "       <td> $nres <td> $timestamp\n"

    incr i
    if {$i >= $limit} break
  }
  append res "</table>\n"

  return $res
}


#-------------------------------------------------------------------------
# This command is similar to the builtin Tcl [time] command, except that
# it only ever runs the supplied script once. Also, instead of returning
# a string like "xxx microseconds per iteration", it returns "x.yy ms" or
# "x.yy s", depending on the magnitude of the time spent running the 
# command. For example:
................................................................................
    } else {
      lappend lRes $rowid
    }
  }

  set nRes [llength $lRes]
  set lRes [lrange $lRes $iStart [expr $iStart+9]]

  # Add an entry to the log database.
  #
  search_add_log_entry $nRes

  # If there are no results, return a message to that effect.
  #
  if {[llength $lRes] == 0} {
    return [subst { No results for: <b>[htmlize $::A(q)]</b> }]
  }
  
................................................................................
  return $ret
}

proc main {} {
  global A
  sqlite3 db search.db
  cgi_parse_args

  # If "env=1" is specified, dump the environment variables instead
  # of running any search.
  if {[info exists ::A(env)]} { return [cgi_env_dump] }
  
  # If "admin=1" is specified, jump to the admin screen.
  if {[info exists ::A(admin)]} { return [admin_list]}

  db transaction {
    set t [ttime { 
      if {[catch searchresults srchout]} {
        set A(q) [string tolower $A(q)]
        set srchout [searchresults]
      }