SQLite

Check-in [d8180af2ad]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Have the fts5 snippet() function avoid favouring snippets that begin with sentences that do not contain search terms. Add an extra bias in favour of the first sentence in the document.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | fts5-snippet-bias
Files: files | file ages | folders
SHA1: d8180af2adfc101dfce95a0498b5cd7b30643b30
User & Date: dan 2016-08-19 14:25:38.928
Context
2016-08-19
18:37
Register any built-in fts5 module before loading automatic extensions. This allows automatic extensions to register fts5 tokenizers and auxiliary functions. (check-in: b10e31dce8 user: dan tags: fts5-snippet-bias)
14:25
Have the fts5 snippet() function avoid favouring snippets that begin with sentences that do not contain search terms. Add an extra bias in favour of the first sentence in the document. (check-in: d8180af2ad user: dan tags: fts5-snippet-bias)
2016-08-18
14:47
Adjust some tests to account for recent changes to the fts5 snippet function. (check-in: 184ecbe9c0 user: dan tags: fts5-snippet-bias)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts5/fts5_aux.c.
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457






















458
459
460
461
462
463
464
      rc = pApi->xTokenize(pFts, 
          sFinder.zDoc, nDoc, (void*)&sFinder,fts5SentenceFinderCb
      );
      if( rc!=SQLITE_OK ) break;
      rc = pApi->xColumnSize(pFts, i, &nDocsize);
      if( rc!=SQLITE_OK ) break;

      for(ii=0; rc==SQLITE_OK && ii<sFinder.nFirst; ii++){
        int nScore;
        memset(aSeen, 0, nPhrase);
        rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i, 
            sFinder.aFirst[ii], nToken, &nScore, 0
        );

        /* Bonus of 100 points for starting at the start of a sentence */
        nScore += 100;            

        if( rc==SQLITE_OK && nScore>nBestScore ){
          nBestScore = nScore;
          iBestCol = i;
          iBestStart = sFinder.aFirst[ii];
          nColSize = nDocsize;
        }
      }

      for(ii=0; rc==SQLITE_OK && ii<nInst; ii++){
        int ip, ic, io;
        int nScore;
        rc = pApi->xInst(pFts, ii, &ip, &ic, &io);
        if( ic!=i || rc!=SQLITE_OK ) continue;
        memset(aSeen, 0, nPhrase);
        rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i,
            io, nToken, &nScore, &io
        );
        if( rc==SQLITE_OK && nScore>nBestScore ){
          nBestScore = nScore;
          iBestCol = i;
          iBestStart = io;
          nColSize = nDocsize;
        }






















      }
    }
  }

  if( rc==SQLITE_OK ){
    rc = pApi->xColumnText(pFts, iBestCol, &ctx.zIn, &ctx.nIn);
  }







|
|
<
<
<
<
|
<
|
|
<
<
<
<
<
|
<
<
<
<
<




|




|


>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







418
419
420
421
422
423
424
425
426




427

428
429





430





431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
      rc = pApi->xTokenize(pFts, 
          sFinder.zDoc, nDoc, (void*)&sFinder,fts5SentenceFinderCb
      );
      if( rc!=SQLITE_OK ) break;
      rc = pApi->xColumnSize(pFts, i, &nDocsize);
      if( rc!=SQLITE_OK ) break;

      for(ii=0; rc==SQLITE_OK && ii<nInst; ii++){
        int ip, ic, io;




        int iAdj;

        int nScore;
        int jj;











        rc = pApi->xInst(pFts, ii, &ip, &ic, &io);
        if( ic!=i || rc!=SQLITE_OK ) continue;
        memset(aSeen, 0, nPhrase);
        rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i,
            io, nToken, &nScore, &iAdj
        );
        if( rc==SQLITE_OK && nScore>nBestScore ){
          nBestScore = nScore;
          iBestCol = i;
          iBestStart = iAdj;
          nColSize = nDocsize;
        }

        if( rc==SQLITE_OK && sFinder.nFirst ){
          for(jj=0; jj<(sFinder.nFirst-1); jj++){
            if( sFinder.aFirst[jj+1]>io ) break;
          }

          if( sFinder.aFirst[jj]<io ){
            int nScore;
            memset(aSeen, 0, nPhrase);
            rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i, 
              sFinder.aFirst[jj], nToken, &nScore, 0
            );

            nScore += (sFinder.aFirst[jj]==0 ? 120 : 100);
            if( rc==SQLITE_OK && nScore>nBestScore ){
              nBestScore = nScore;
              iBestCol = i;
              iBestStart = sFinder.aFirst[jj];
              nColSize = nDocsize;
            }
          }
        }
      }
    }
  }

  if( rc==SQLITE_OK ){
    rc = pApi->xColumnText(pFts, iBestCol, &ctx.zIn, &ctx.nIn);
  }