Documentation Source Text

Check-in [c7628dcb37]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Change the way sub-sections are selected by the search script.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | experimental
Files: files | file ages | folders
SHA1: c7628dcb371ab43aeab21ce55a786ef37895ad3a
User & Date: dan 2016-08-25 17:29:24.418
Context
2016-08-25
20:47
Merge trunk changes into this branch. (check-in: a3888fbe0e user: dan tags: experimental)
17:29
Change the way sub-sections are selected by the search script. (check-in: c7628dcb37 user: dan tags: experimental)
12:18
Add <fancy_format> or <table_of_contents> markup to a few more documents. To ensure that there are enough anchors in the longer documents for the search script to use. (check-in: 066e5931ce user: dan tags: experimental)
Changes
Unified Diff Ignore Whitespace Patch
Changes to pages/fts3.in.
1
2
3
4
5
6
7
8
9
10
11
12
13
14

<tcl>hd_keywords *fts3 FTS3 {full-text search}</tcl>
<title>SQLite FTS3 and FTS4 Extensions</title>

<table_of_contents>

<h2 style="margin-left:1.0em" notoc> Overview</h2>

<p>
  FTS3 and FTS4 are SQLite virtual table modules that allows users to perform 
  full-text searches on a set of documents. The most common (and effective) 
  way to describe full-text searches is "what Google, Yahoo, and Bing do
  with documents placed on the World Wide Web". Users input a term, or series 
  of terms, perhaps connected by a binary operator or grouped together into a 






|







1
2
3
4
5
6
7
8
9
10
11
12
13
14

<tcl>hd_keywords *fts3 FTS3 {full-text search}</tcl>
<title>SQLite FTS3 and FTS4 Extensions</title>

<table_of_contents>

<h2 id=overview style="margin-left:1.0em" notoc> Overview</h2>

<p>
  FTS3 and FTS4 are SQLite virtual table modules that allows users to perform 
  full-text searches on a set of documents. The most common (and effective) 
  way to describe full-text searches is "what Google, Yahoo, and Bing do
  with documents placed on the World Wide Web". Users input a term, or series 
  of terms, perhaps connected by a binary operator or grouped together into a 
Changes to search/buildsearchdb.tcl.
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
      set nosearch(releaselog/current.html) 1


      # As of version 3.7.16, sub-release changelogs duplicated the entries
      # from the major release. This block does the following:
      #
      #   * sets the weight of a changelog containing superceded content
      #     to 25%
      #   * sets the weights of other changelogs to 50%.
      #
      foreach f [glob releaselog/*.html] { 
        set tail [file tail $f]
        set ::weight($f) 50
        if {[regexp {^(3_8_[0-9]*).*} $tail -> prefix]
         || [regexp {^(3_7_16).*} $tail -> prefix]
         || [regexp {^(3_9_).*} $tail -> prefix]
         || [regexp {^(3_[1-9][0-9]).*} $tail -> prefix]
        } {
          set f1 [lindex [lsort -decreasing [glob releaselog/$prefix*.html]] 0]
          if {$f!=$f1} { set ::weight($f) 25 }
        } 
      }

      foreach f [glob releaselog/*.html] { 
        if {[info exists nosearch($f)]==0} { 
          lappend lFiles $f 
        }







|
|



|






|







63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
      set nosearch(releaselog/current.html) 1


      # As of version 3.7.16, sub-release changelogs duplicated the entries
      # from the major release. This block does the following:
      #
      #   * sets the weight of a changelog containing superceded content
      #     to 10%
      #   * sets the weights of other changelogs to 25%.
      #
      foreach f [glob releaselog/*.html] { 
        set tail [file tail $f]
        set ::weight($f) 25
        if {[regexp {^(3_8_[0-9]*).*} $tail -> prefix]
         || [regexp {^(3_7_16).*} $tail -> prefix]
         || [regexp {^(3_9_).*} $tail -> prefix]
         || [regexp {^(3_[1-9][0-9]).*} $tail -> prefix]
        } {
          set f1 [lindex [lsort -decreasing [glob releaselog/$prefix*.html]] 0]
          if {$f!=$f1} { set ::weight($f) 10 }
        } 
      }

      foreach f [glob releaselog/*.html] { 
        if {[info exists nosearch($f)]==0} { 
          lappend lFiles $f 
        }
Changes to search/fts5ext.c.
32
33
34
35
36
37
38






























































39
40
41
42
43
44
45
    memcpy(&pRet, sqlite3_column_blob(pStmt, 0), sizeof(pRet));
  }
  sqlite3_finalize(pStmt);
  return pRet;
}
/************************************************************************/
































































typedef struct STokenizer STokenizer;
typedef struct STokenCtx STokenCtx;

/*
** Tokenizer type. Casts to Fts5Tokenizer.
*/







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
    memcpy(&pRet, sqlite3_column_blob(pStmt, 0), sizeof(pRet));
  }
  sqlite3_finalize(pStmt);
  return pRet;
}
/************************************************************************/

/*
** Simple ranking function used by search script. Assumes the queried
** table has the following 5 indexed columns:
**
**     apis,                      -- C APIs 
**     keywords,                  -- Keywords
**     title1,                    -- Document title
**     title2,                    -- Heading title, if any
**     content,                   -- Document text
**
** This function returns the following integer values:
**
**   10000 - all phrases present in (the combination of) "apis" or "keywords".
**    1000 - all phrases present in (the combination of) "apis", "keywords"
**           or either "title[12] column.
**
** It adds a bonus of 100 if either of the above and the condition 
** (xRowid()>1000 && (xRowid() % 1000)==1) is true.
**
*/
void srankFunc(
  const Fts5ExtensionApi *pApi,   /* API offered by current FTS version */
  Fts5Context *pFts,              /* First arg to pass to pApi functions */
  sqlite3_context *pCtx,          /* Context for returning result/error */
  int nVal,                       /* Number of values in apVal[] array */
  sqlite3_value **apVal           /* Array of trailing arguments */
){
  int nPhrase;                    /* Number of phrases in query */
  int i;                          /* Used to iterate through phrases */
  int rc;                         /* Return code */
  int n1 = 0;
  int n2 = 0;
  int iScore = 0;                 /* Returned value */
  sqlite3_int64 iRowid;           /* Rowid for current row */

  iRowid = pApi->xRowid(pFts);
  if( iRowid<1000 ) return;
  nPhrase = pApi->xPhraseCount(pFts);
  for(i=0; i<nPhrase; i++){
    Fts5PhraseIter iter;
    int ic, io;
    rc = pApi->xPhraseFirst(pFts, i, &iter, &ic, &io);
    if( rc!=SQLITE_OK ){
      sqlite3_result_error(pCtx, "Error in xPhraseFirst", -1);
      return;
    }

    if( ic==0 || ic==1 ) n1++;
    if( ic==2 || ic==3 ) n2++;
  }

  if( n1==nPhrase ){ iScore = 10000; }
  else if( n1+n2==nPhrase ){ iScore = 1000; }

  if( iScore && iRowid>1000 && (iRowid % 1000)==1 ){
    iScore += 100;
  }

  sqlite3_result_int(pCtx, iScore);
}



typedef struct STokenizer STokenizer;
typedef struct STokenCtx STokenCtx;

/*
** Tokenizer type. Casts to Fts5Tokenizer.
*/
148
149
150
151
152
153
154

155
156
157
158
159
160
161
162
163
164

165



166

167

168
169
170
171
172
173
174
175
176
177
178

  return rc;
}

static int register_tokenizer(sqlite3 *db, char **pzErr, void *p){
  fts5_api *pApi;
  fts5_tokenizer t;


  pApi = fts5_api_from_db(db);
  if( pApi==0 ){
    *pzErr = sqlite3_mprintf("fts5_api_from_db: %s", sqlite3_errmsg(db));
    return SQLITE_ERROR;
  }

  t.xCreate = stokenCreate;
  t.xDelete = stokenDelete;
  t.xTokenize = stokenTokenize;





  return pApi->xCreateTokenizer(pApi, "stoken", (void*)pApi, &t, 0);

}


int Fts5ext_Init(Tcl_Interp *interp){
#ifdef USE_TCL_STUBS
  if (Tcl_InitStubs(interp, "8.4", 0) == 0) {
    return TCL_ERROR;
  }
#endif
  sqlite3_auto_extension((void (*)(void))register_tokenizer);
  return TCL_OK;
}








>










>

>
>
>
|
>

>











210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247

  return rc;
}

static int register_tokenizer(sqlite3 *db, char **pzErr, void *p){
  fts5_api *pApi;
  fts5_tokenizer t;
  int rc;

  pApi = fts5_api_from_db(db);
  if( pApi==0 ){
    *pzErr = sqlite3_mprintf("fts5_api_from_db: %s", sqlite3_errmsg(db));
    return SQLITE_ERROR;
  }

  t.xCreate = stokenCreate;
  t.xDelete = stokenDelete;
  t.xTokenize = stokenTokenize;
  rc = pApi->xCreateTokenizer(pApi, "stoken", (void*)pApi, &t, 0);

  if( rc==SQLITE_OK ){
    rc = pApi->xCreateFunction(pApi, "srank", 0, srankFunc, 0);
  }

  return rc;
}


int Fts5ext_Init(Tcl_Interp *interp){
#ifdef USE_TCL_STUBS
  if (Tcl_InitStubs(interp, "8.4", 0) == 0) {
    return TCL_ERROR;
  }
#endif
  sqlite3_auto_extension((void (*)(void))register_tokenizer);
  return TCL_OK;
}

Changes to search/search.tcl.
138
139
140
141
142
143
144

145
146
147
148
149
150
151
152
  #
  set iStart [expr {([info exists ::A(i)] ? $::A(i) : 0)*10}]

  # Grab a list of rowid results.
  #
  set q {
    SELECT rowid FROM page WHERE page MATCH $::A(q) 

    ORDER BY rank * COALESCE(
      (SELECT percent FROM weight WHERE id=page.rowid), 100
    );
  }
  if {[catch { set lRowid [db eval $q] }]} {
    set x ""
    foreach word [split $::A(q) " "] {
      append x " \"[string map [list "\"" "\"\""] $word]\""







>
|







138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
  #
  set iStart [expr {([info exists ::A(i)] ? $::A(i) : 0)*10}]

  # Grab a list of rowid results.
  #
  set q {
    SELECT rowid FROM page WHERE page MATCH $::A(q) 
    ORDER BY srank(page) DESC,
    rank * COALESCE(
      (SELECT percent FROM weight WHERE id=page.rowid), 100
    );
  }
  if {[catch { set lRowid [db eval $q] }]} {
    set x ""
    foreach word [split $::A(q) " "] {
      append x " \"[string map [list "\"" "\"\""] $word]\""
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238

      append s_content " $data($childid,s_content)"
    }

    append ret [subst -nocommands {<tr>
      <td valign=top style="line-height:150%">
        <div style="white-space:wrap;font-size:larger" class=nounderline>
          <a href="$url">$s_title1</a> 
          <div style="float:right;font-size:smaller;color:#BBB">($url)</div>
        </div>
          <div style="margin-left: 10ex; font:larger monospace">$s_apis</div>
        <div style="margin-left: 4ex; margin-bottom:1.5em">
           $s_content 
        </div>
      </td>







|







225
226
227
228
229
230
231
232
233
234
235
236
237
238
239

      append s_content " $data($childid,s_content)"
    }

    append ret [subst -nocommands {<tr>
      <td valign=top style="line-height:150%">
        <div style="white-space:wrap;font-size:larger" class=nounderline>
          <a href="$url">$s_title1 </a> 
          <div style="float:right;font-size:smaller;color:#BBB">($url)</div>
        </div>
          <div style="margin-left: 10ex; font:larger monospace">$s_apis</div>
        <div style="margin-left: 4ex; margin-bottom:1.5em">
           $s_content 
        </div>
      </td>