Documentation Source Text

Check-in [c7628dcb37]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Change the way sub-sections are selected by the search script.
Timelines: family | ancestors | descendants | both | experimental
Files: files | file ages | folders
SHA1: c7628dcb371ab43aeab21ce55a786ef37895ad3a
User & Date: dan 2016-08-25 17:29:24
Context
2016-08-25
20:47
Merge trunk changes into this branch. check-in: a3888fbe0e user: dan tags: experimental
17:29
Change the way sub-sections are selected by the search script. check-in: c7628dcb37 user: dan tags: experimental
12:18
Add <fancy_format> or <table_of_contents> markup to a few more documents. To ensure that there are enough anchors in the longer documents for the search script to use. check-in: 066e5931ce user: dan tags: experimental
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to pages/fts3.in.

     1      1   
     2      2   <tcl>hd_keywords *fts3 FTS3 {full-text search}</tcl>
     3      3   <title>SQLite FTS3 and FTS4 Extensions</title>
     4      4   
     5      5   <table_of_contents>
     6      6   
     7         -<h2 style="margin-left:1.0em" notoc> Overview</h2>
            7  +<h2 id=overview style="margin-left:1.0em" notoc> Overview</h2>
     8      8   
     9      9   <p>
    10     10     FTS3 and FTS4 are SQLite virtual table modules that allows users to perform 
    11     11     full-text searches on a set of documents. The most common (and effective) 
    12     12     way to describe full-text searches is "what Google, Yahoo, and Bing do
    13     13     with documents placed on the World Wide Web". Users input a term, or series 
    14     14     of terms, perhaps connected by a binary operator or grouped together into a 

Changes to search/buildsearchdb.tcl.

    63     63         set nosearch(releaselog/current.html) 1
    64     64   
    65     65   
    66     66         # As of version 3.7.16, sub-release changelogs duplicated the entries
    67     67         # from the major release. This block does the following:
    68     68         #
    69     69         #   * sets the weight of a changelog containing superceded content
    70         -      #     to 25%
    71         -      #   * sets the weights of other changelogs to 50%.
           70  +      #     to 10%
           71  +      #   * sets the weights of other changelogs to 25%.
    72     72         #
    73     73         foreach f [glob releaselog/*.html] { 
    74     74           set tail [file tail $f]
    75         -        set ::weight($f) 50
           75  +        set ::weight($f) 25
    76     76           if {[regexp {^(3_8_[0-9]*).*} $tail -> prefix]
    77     77            || [regexp {^(3_7_16).*} $tail -> prefix]
    78     78            || [regexp {^(3_9_).*} $tail -> prefix]
    79     79            || [regexp {^(3_[1-9][0-9]).*} $tail -> prefix]
    80     80           } {
    81     81             set f1 [lindex [lsort -decreasing [glob releaselog/$prefix*.html]] 0]
    82         -          if {$f!=$f1} { set ::weight($f) 25 }
           82  +          if {$f!=$f1} { set ::weight($f) 10 }
    83     83           } 
    84     84         }
    85     85   
    86     86         foreach f [glob releaselog/*.html] { 
    87     87           if {[info exists nosearch($f)]==0} { 
    88     88             lappend lFiles $f 
    89     89           }

Changes to search/fts5ext.c.

    32     32       memcpy(&pRet, sqlite3_column_blob(pStmt, 0), sizeof(pRet));
    33     33     }
    34     34     sqlite3_finalize(pStmt);
    35     35     return pRet;
    36     36   }
    37     37   /************************************************************************/
    38     38   
           39  +/*
           40  +** Simple ranking function used by search script. Assumes the queried
           41  +** table has the following 5 indexed columns:
           42  +**
           43  +**     apis,                      -- C APIs 
           44  +**     keywords,                  -- Keywords
           45  +**     title1,                    -- Document title
           46  +**     title2,                    -- Heading title, if any
           47  +**     content,                   -- Document text
           48  +**
           49  +** This function returns the following integer values:
           50  +**
           51  +**   10000 - all phrases present in (the combination of) "apis" or "keywords".
           52  +**    1000 - all phrases present in (the combination of) "apis", "keywords"
           53  +**           or either "title[12] column.
           54  +**
           55  +** It adds a bonus of 100 if either of the above and the condition 
           56  +** (xRowid()>1000 && (xRowid() % 1000)==1) is true.
           57  +**
           58  +*/
           59  +void srankFunc(
           60  +  const Fts5ExtensionApi *pApi,   /* API offered by current FTS version */
           61  +  Fts5Context *pFts,              /* First arg to pass to pApi functions */
           62  +  sqlite3_context *pCtx,          /* Context for returning result/error */
           63  +  int nVal,                       /* Number of values in apVal[] array */
           64  +  sqlite3_value **apVal           /* Array of trailing arguments */
           65  +){
           66  +  int nPhrase;                    /* Number of phrases in query */
           67  +  int i;                          /* Used to iterate through phrases */
           68  +  int rc;                         /* Return code */
           69  +  int n1 = 0;
           70  +  int n2 = 0;
           71  +  int iScore = 0;                 /* Returned value */
           72  +  sqlite3_int64 iRowid;           /* Rowid for current row */
           73  +
           74  +  iRowid = pApi->xRowid(pFts);
           75  +  if( iRowid<1000 ) return;
           76  +  nPhrase = pApi->xPhraseCount(pFts);
           77  +  for(i=0; i<nPhrase; i++){
           78  +    Fts5PhraseIter iter;
           79  +    int ic, io;
           80  +    rc = pApi->xPhraseFirst(pFts, i, &iter, &ic, &io);
           81  +    if( rc!=SQLITE_OK ){
           82  +      sqlite3_result_error(pCtx, "Error in xPhraseFirst", -1);
           83  +      return;
           84  +    }
           85  +
           86  +    if( ic==0 || ic==1 ) n1++;
           87  +    if( ic==2 || ic==3 ) n2++;
           88  +  }
           89  +
           90  +  if( n1==nPhrase ){ iScore = 10000; }
           91  +  else if( n1+n2==nPhrase ){ iScore = 1000; }
           92  +
           93  +  if( iScore && iRowid>1000 && (iRowid % 1000)==1 ){
           94  +    iScore += 100;
           95  +  }
           96  +
           97  +  sqlite3_result_int(pCtx, iScore);
           98  +}
           99  +
          100  +
    39    101   
    40    102   typedef struct STokenizer STokenizer;
    41    103   typedef struct STokenCtx STokenCtx;
    42    104   
    43    105   /*
    44    106   ** Tokenizer type. Casts to Fts5Tokenizer.
    45    107   */
................................................................................
   148    210   
   149    211     return rc;
   150    212   }
   151    213   
   152    214   static int register_tokenizer(sqlite3 *db, char **pzErr, void *p){
   153    215     fts5_api *pApi;
   154    216     fts5_tokenizer t;
          217  +  int rc;
   155    218   
   156    219     pApi = fts5_api_from_db(db);
   157    220     if( pApi==0 ){
   158    221       *pzErr = sqlite3_mprintf("fts5_api_from_db: %s", sqlite3_errmsg(db));
   159    222       return SQLITE_ERROR;
   160    223     }
   161    224   
   162    225     t.xCreate = stokenCreate;
   163    226     t.xDelete = stokenDelete;
   164    227     t.xTokenize = stokenTokenize;
          228  +  rc = pApi->xCreateTokenizer(pApi, "stoken", (void*)pApi, &t, 0);
          229  +
          230  +  if( rc==SQLITE_OK ){
          231  +    rc = pApi->xCreateFunction(pApi, "srank", 0, srankFunc, 0);
          232  +  }
   165    233   
   166         -  return pApi->xCreateTokenizer(pApi, "stoken", (void*)pApi, &t, 0);
          234  +  return rc;
   167    235   }
          236  +
   168    237   
   169    238   int Fts5ext_Init(Tcl_Interp *interp){
   170    239   #ifdef USE_TCL_STUBS
   171    240     if (Tcl_InitStubs(interp, "8.4", 0) == 0) {
   172    241       return TCL_ERROR;
   173    242     }
   174    243   #endif
   175    244     sqlite3_auto_extension((void (*)(void))register_tokenizer);
   176    245     return TCL_OK;
   177    246   }
   178    247   

Changes to search/search.tcl.

   138    138     #
   139    139     set iStart [expr {([info exists ::A(i)] ? $::A(i) : 0)*10}]
   140    140   
   141    141     # Grab a list of rowid results.
   142    142     #
   143    143     set q {
   144    144       SELECT rowid FROM page WHERE page MATCH $::A(q) 
   145         -    ORDER BY rank * COALESCE(
          145  +    ORDER BY srank(page) DESC,
          146  +    rank * COALESCE(
   146    147         (SELECT percent FROM weight WHERE id=page.rowid), 100
   147    148       );
   148    149     }
   149    150     if {[catch { set lRowid [db eval $q] }]} {
   150    151       set x ""
   151    152       foreach word [split $::A(q) " "] {
   152    153         append x " \"[string map [list "\"" "\"\""] $word]\""
................................................................................
   224    225   
   225    226         append s_content " $data($childid,s_content)"
   226    227       }
   227    228   
   228    229       append ret [subst -nocommands {<tr>
   229    230         <td valign=top style="line-height:150%">
   230    231           <div style="white-space:wrap;font-size:larger" class=nounderline>
   231         -          <a href="$url">$s_title1</a> 
          232  +          <a href="$url">$s_title1 </a> 
   232    233             <div style="float:right;font-size:smaller;color:#BBB">($url)</div>
   233    234           </div>
   234    235             <div style="margin-left: 10ex; font:larger monospace">$s_apis</div>
   235    236           <div style="margin-left: 4ex; margin-bottom:1.5em">
   236    237              $s_content 
   237    238           </div>
   238    239         </td>