Documentation Source Text

Check-in [12433ed2a3]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:When searching the changelog only, leave hyperlinks and other markup in the result summary.
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 12433ed2a34e5f976fd5f84e3a2a5f4197753833c9cfe686c128f0dd5f40167c
User & Date: dan 2017-08-02 14:08:58
Context
2017-08-03
13:02
Increased detail on pointer types in the bindptr.html document. check-in: 1316d197a4 user: drh tags: trunk
2017-08-02
14:08
When searching the changelog only, leave hyperlinks and other markup in the result summary. check-in: 12433ed2a3 user: dan tags: trunk
2017-08-01
19:29
Add a website option to search the changelog instead of the documentation. Do not add any changelog files to the documentation index. check-in: 8f15082015 user: dan tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to search/buildsearchdb.tcl.

359
360
361
362
363
364
365






366
367
368
369
370
371
372
373





374
375
376
377
378
379
380
...
385
386
387
388
389
390
391
392

393
394
395
396
397
398
399
...
420
421
422
423
424
425
426
427

428
429
430
431
432
433
434
    foreach { tag hdr text } $section {}
    if {[string trim $text]==""} continue
    incr i
    set url "${doc}#${tag}"
    insert_entry -rowid $i -url $url -title1 $title -title2 $hdr -content $text
  }
}







proc changelog_document_import {doc} {

  set content [readfile $doc]
  set end [string first "Changes carried forward from version " $content]
  if {$end>0} { set content [string range $content 0 $end] }

  set dom [::hdom::parse $content]






  # Extract the version number from the document name.
  set version 0.0.0
  regexp {releaselog/(.*).html} $doc -> version
  set version [string map {_ .} $version]

  # Find each of the <li> nodes in the document.
................................................................................
    set ol [$li parent]
    if {$ol=="" || [$ol tag]!="ol"} {error UNTHINKABLE!}
    foreach c [$ol children] {
      if {$c==$li} break
      if {[$c tag]=="li"} {incr i}
    }

    set t [$li text]

    db eval { INSERT INTO change VALUES($doc, $version, $i, $t) }
  }
}

proc rebuild_database {} {

  db transaction {
................................................................................
      INSERT INTO page(page, rank) VALUES('rank', 'bm25(10.0,10.0,20.0,20.0)');

      DROP TABLE IF EXISTS change;
      CREATE VIRTUAL TABLE change USING fts5(
          url UNINDEXED,          -- Path to document
          version UNINDEXED,      -- SQLite version number
          idx UNINDEXED,          -- Bullet point number
          text                    -- Text of change log entry

      );
    }

    foreach doc [document_list changelog] { 
      puts "Indexing $doc..."
      changelog_document_import $doc 
    }







>
>
>
>
>
>








>
>
>
>
>







 







|
>







 







|
>







359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
...
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
...
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
    foreach { tag hdr text } $section {}
    if {[string trim $text]==""} continue
    incr i
    set url "${doc}#${tag}"
    insert_entry -rowid $i -url $url -title1 $title -title2 $hdr -content $text
  }
}

proc node_innerhtml {n} {
  set ret ""
  foreach c [$n children] { append ret [$c html] }
  set ret
}

proc changelog_document_import {doc} {

  set content [readfile $doc]
  set end [string first "Changes carried forward from version " $content]
  if {$end>0} { set content [string range $content 0 $end] }

  set dom [::hdom::parse $content]

  foreach n [[$dom root] search p] {
    set c [lindex [$n children] 0]
    if {$c!="" && [$c tag]=="b"} { $n detach }
  }

  # Extract the version number from the document name.
  set version 0.0.0
  regexp {releaselog/(.*).html} $doc -> version
  set version [string map {_ .} $version]

  # Find each of the <li> nodes in the document.
................................................................................
    set ol [$li parent]
    if {$ol=="" || [$ol tag]!="ol"} {error UNTHINKABLE!}
    foreach c [$ol children] {
      if {$c==$li} break
      if {[$c tag]=="li"} {incr i}
    }

    #set t [$li text]
    set t [node_innerhtml $li]
    db eval { INSERT INTO change VALUES($doc, $version, $i, $t) }
  }
}

proc rebuild_database {} {

  db transaction {
................................................................................
      INSERT INTO page(page, rank) VALUES('rank', 'bm25(10.0,10.0,20.0,20.0)');

      DROP TABLE IF EXISTS change;
      CREATE VIRTUAL TABLE change USING fts5(
          url UNINDEXED,          -- Path to document
          version UNINDEXED,      -- SQLite version number
          idx UNINDEXED,          -- Bullet point number
          text,                   -- Text of change log entry
          tokenize='html stoken unicode61 tokenchars _' -- Tokenizer definition
      );
    }

    foreach doc [document_list changelog] { 
      puts "Indexing $doc..."
      changelog_document_import $doc 
    }

Changes to search/fts5ext.c.

193
194
195
196
197
198
199


























































































200
201
202
203
204
205
206
...
224
225
226
227
228
229
230

231
232
233
234
235
236
237
...
240
241
242
243
244
245
246







247
248
249
250
251
252
253
  if( rc==SQLITE_OK && nToken>8 && 0==memcmp("sqlite3_", pToken, 8) ){
    rc = p->xToken(
        p->pCtx, FTS5_TOKEN_COLOCATED, pToken+8, nToken-8, iStart, iEnd);
  }

  return rc;
}



























































































static int stokenTokenize(
  Fts5Tokenizer *pTokenizer, 
  void *pCtx,
  int flags,            /* Mask of FTS5_TOKENIZE_* flags */
  const char *pText, int nText, 
  int (*xToken)(
................................................................................
    );
  }else{
    rc = p->porter.xTokenize(p->pPorter, pCtx, flags, pText, nText, xToken);
  }

  return rc;
}


static int register_tokenizer(sqlite3 *db, char **pzErr, void *p){
  fts5_api *pApi;
  fts5_tokenizer t;
  int rc;

  pApi = fts5_api_from_db(db);
................................................................................
    return SQLITE_ERROR;
  }

  t.xCreate = stokenCreate;
  t.xDelete = stokenDelete;
  t.xTokenize = stokenTokenize;
  rc = pApi->xCreateTokenizer(pApi, "stoken", (void*)pApi, &t, 0);








  if( rc==SQLITE_OK ){
    rc = pApi->xCreateFunction(pApi, "srank", 0, srankFunc, 0);
  }

  return rc;
}







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







>







 







>
>
>
>
>
>
>







193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
...
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
...
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
  if( rc==SQLITE_OK && nToken>8 && 0==memcmp("sqlite3_", pToken, 8) ){
    rc = p->xToken(
        p->pCtx, FTS5_TOKEN_COLOCATED, pToken+8, nToken-8, iStart, iEnd);
  }

  return rc;
}

/*
** Tokenizer type for "html" tokenizer. Casts to Fts5Tokenizer.
*/
typedef struct HtmlTokenizer HtmlTokenizer;
struct HtmlTokenizer {
  fts5_tokenizer tokenizer;
  Fts5Tokenizer *pTokenizer;
};

static int htmlCreate(
  void *pCtx, 
  const char **azArg, int nArg, 
  Fts5Tokenizer **ppOut
){
  fts5_api *pApi = (fts5_api*)pCtx;
  HtmlTokenizer *p = 0;
  int rc = SQLITE_OK;

  if( nArg==0 ){
    rc = SQLITE_ERROR;
  }else{
    /* Allocate the Fts5Tokenizer object for this tokenizer. */
    p = sqlite3_malloc(sizeof(HtmlTokenizer));
    if( p ){
      memset(p, 0, sizeof(HtmlTokenizer));
    }else{
      return SQLITE_NOMEM;
    }
  }

  if( rc==SQLITE_OK ){
    /* Locate and allocate the next tokenizer */
    void *pNextCtx = 0;
    rc = pApi->xFindTokenizer(pApi, azArg[0], &pNextCtx, &p->tokenizer);
    if( rc==SQLITE_OK ){
      rc = p->tokenizer.xCreate(pNextCtx, &azArg[1], nArg-1, &p->pTokenizer);
    }
  }

  /* Return the new tokenizer to the caller */
  if( rc!=SQLITE_OK ){
    sqlite3_free(p);
    p = 0;
  }
  *ppOut = (Fts5Tokenizer*)p;
  return rc;
}

static void htmlDelete(Fts5Tokenizer *pTokenizer){
  HtmlTokenizer *p = (HtmlTokenizer*)pTokenizer;
  p->tokenizer.xDelete(p->pTokenizer);
  sqlite3_free(p);
}

static int htmlTokenize(
  Fts5Tokenizer *pTokenizer, 
  void *pCtx,
  int flags,            /* Mask of FTS5_TOKENIZE_* flags */
  const char *pText, int nText, 
  int (*xToken)(
    void *pCtx,         /* Copy of 2nd argument to xTokenize() */
    int tflags,         /* Mask of FTS5_TOKEN_* flags */
    const char *pToken, /* Pointer to buffer containing token */
    int nToken,         /* Size of token in bytes */
    int iStart,         /* Byte offset of token within input text */
    int iEnd            /* Byte offset of end of token within input text */
  )
){
  HtmlTokenizer *p = (HtmlTokenizer*)pTokenizer;
  char *zOut;
  int i;
  int bTag=0;
  int rc;
  
  zOut = sqlite3_malloc(nText+1);
  if( zOut==0 ){
    return SQLITE_NOMEM;
  }
  for(i=0; i<nText; i++){
    char c = pText[i];
    if( bTag==0 && c=='<' ) bTag = 1;
    zOut[i] = bTag ? ' ' : c;
    if( bTag==1 && c=='>' ) bTag = 0;
  }

  rc = p->tokenizer.xTokenize(p->pTokenizer, pCtx, flags, zOut, nText, xToken);
  sqlite3_free(zOut);
  return rc;
}

static int stokenTokenize(
  Fts5Tokenizer *pTokenizer, 
  void *pCtx,
  int flags,            /* Mask of FTS5_TOKENIZE_* flags */
  const char *pText, int nText, 
  int (*xToken)(
................................................................................
    );
  }else{
    rc = p->porter.xTokenize(p->pPorter, pCtx, flags, pText, nText, xToken);
  }

  return rc;
}


static int register_tokenizer(sqlite3 *db, char **pzErr, void *p){
  fts5_api *pApi;
  fts5_tokenizer t;
  int rc;

  pApi = fts5_api_from_db(db);
................................................................................
    return SQLITE_ERROR;
  }

  t.xCreate = stokenCreate;
  t.xDelete = stokenDelete;
  t.xTokenize = stokenTokenize;
  rc = pApi->xCreateTokenizer(pApi, "stoken", (void*)pApi, &t, 0);

  if( rc==SQLITE_OK ){
    t.xCreate = htmlCreate;
    t.xDelete = htmlDelete;
    t.xTokenize = htmlTokenize;
    rc = pApi->xCreateTokenizer(pApi, "html", (void*)pApi, &t, 0);
  }

  if( rc==SQLITE_OK ){
    rc = pApi->xCreateFunction(pApi, "srank", 0, srankFunc, 0);
  }

  return rc;
}