/ Check-in [d8180af2]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Have the fts5 snippet() function avoid favouring snippets that begin with sentences that do not contain search terms. Add an extra bias in favour of the first sentence in the document.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts5-snippet-bias
Files: files | file ages | folders
SHA1: d8180af2adfc101dfce95a0498b5cd7b30643b30
User & Date: dan 2016-08-19 14:25:38
Context
2016-08-19
18:37
Register any built-in fts5 module before loading automatic extensions. This allows automatic extensions to register fts5 tokenizers and auxiliary functions. check-in: b10e31dc user: dan tags: fts5-snippet-bias
14:25
Have the fts5 snippet() function avoid favouring snippets that begin with sentences that do not contain search terms. Add an extra bias in favour of the first sentence in the document. check-in: d8180af2 user: dan tags: fts5-snippet-bias
2016-08-18
14:47
Adjust some tests to account for recent changes to the fts5 snippet function. check-in: 184ecbe9 user: dan tags: fts5-snippet-bias
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5_aux.c.

   418    418         rc = pApi->xTokenize(pFts, 
   419    419             sFinder.zDoc, nDoc, (void*)&sFinder,fts5SentenceFinderCb
   420    420         );
   421    421         if( rc!=SQLITE_OK ) break;
   422    422         rc = pApi->xColumnSize(pFts, i, &nDocsize);
   423    423         if( rc!=SQLITE_OK ) break;
   424    424   
   425         -      for(ii=0; rc==SQLITE_OK && ii<sFinder.nFirst; ii++){
   426         -        int nScore;
   427         -        memset(aSeen, 0, nPhrase);
   428         -        rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i, 
   429         -            sFinder.aFirst[ii], nToken, &nScore, 0
   430         -        );
   431         -
   432         -        /* Bonus of 100 points for starting at the start of a sentence */
   433         -        nScore += 100;            
   434         -
   435         -        if( rc==SQLITE_OK && nScore>nBestScore ){
   436         -          nBestScore = nScore;
   437         -          iBestCol = i;
   438         -          iBestStart = sFinder.aFirst[ii];
   439         -          nColSize = nDocsize;
   440         -        }
   441         -      }
   442         -
   443    425         for(ii=0; rc==SQLITE_OK && ii<nInst; ii++){
   444    426           int ip, ic, io;
          427  +        int iAdj;
   445    428           int nScore;
          429  +        int jj;
          430  +
   446    431           rc = pApi->xInst(pFts, ii, &ip, &ic, &io);
   447    432           if( ic!=i || rc!=SQLITE_OK ) continue;
   448    433           memset(aSeen, 0, nPhrase);
   449    434           rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i,
   450         -            io, nToken, &nScore, &io
          435  +            io, nToken, &nScore, &iAdj
   451    436           );
   452    437           if( rc==SQLITE_OK && nScore>nBestScore ){
   453    438             nBestScore = nScore;
   454    439             iBestCol = i;
   455         -          iBestStart = io;
          440  +          iBestStart = iAdj;
   456    441             nColSize = nDocsize;
   457    442           }
          443  +
          444  +        if( rc==SQLITE_OK && sFinder.nFirst ){
          445  +          for(jj=0; jj<(sFinder.nFirst-1); jj++){
          446  +            if( sFinder.aFirst[jj+1]>io ) break;
          447  +          }
          448  +
          449  +          if( sFinder.aFirst[jj]<io ){
          450  +            int nScore;
          451  +            memset(aSeen, 0, nPhrase);
          452  +            rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i, 
          453  +              sFinder.aFirst[jj], nToken, &nScore, 0
          454  +            );
          455  +
          456  +            nScore += (sFinder.aFirst[jj]==0 ? 120 : 100);
          457  +            if( rc==SQLITE_OK && nScore>nBestScore ){
          458  +              nBestScore = nScore;
          459  +              iBestCol = i;
          460  +              iBestStart = sFinder.aFirst[jj];
          461  +              nColSize = nDocsize;
          462  +            }
          463  +          }
          464  +        }
   458    465         }
   459    466       }
   460    467     }
   461    468   
   462    469     if( rc==SQLITE_OK ){
   463    470       rc = pApi->xColumnText(pFts, iBestCol, &ctx.zIn, &ctx.nIn);
   464    471     }