/ Check-in [bdc58fd2]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add a snippet() function to fts5.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts5
Files: files | file ages | folders
SHA1: bdc58fd28a63ac9632c3df6c7768a9a236566605
User & Date: dan 2014-07-23 19:31:56
Context
2014-07-25
20:30
Add extension apis xRowCount, xQueryPhrase, xSetAuxdata and xGetAuxdata. And a ranking function that uses all of the above. check-in: c4d50428 user: dan tags: fts5
2014-07-23
19:31
Add a snippet() function to fts5. check-in: bdc58fd2 user: dan tags: fts5
2014-07-21
15:45
Fix DELETE and UPDATE operations on fts5 tables. check-in: d44d3a85 user: dan tags: fts5
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5.c.

   682    682     return rc;
   683    683   }
   684    684   
   685    685   static int fts5ApiPoslist(
   686    686     Fts5Context *pCtx, 
   687    687     int iPhrase, 
   688    688     int *pi, 
   689         -  int *piCol, 
   690         -  int *piOff
          689  +  i64 *piPos 
   691    690   ){
   692    691     Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
   693    692     const u8 *a; int n;             /* Poslist for phrase iPhrase */
   694    693     n = sqlite3Fts5ExprPoslist(pCsr->pExpr, iPhrase, &a);
   695         -  return sqlite3Fts5PoslistNext(a, n, pi, piCol, piOff);
          694  +  return sqlite3Fts5PoslistNext64(a, n, pi, piPos);
   696    695   }
   697    696   
   698    697   static void fts5ApiCallback(
   699    698     sqlite3_context *context,
   700    699     int argc,
   701    700     sqlite3_value **argv
   702    701   ){

Changes to ext/fts5/fts5.h.

    65     65   **
    66     66   ** xRowid:
    67     67   **   Returns the rowid of the current row.
    68     68   **
    69     69   ** xPoslist:
    70     70   **   Iterate through instances of phrase iPhrase in the current row. 
    71     71   **
           72  +**   At EOF, a non-zero value is returned and output variable iPos set to -1.
           73  +**
    72     74   ** xTokenize:
    73     75   **   Tokenize text using the tokenizer belonging to the FTS5 table.
    74     76   */
    75     77   struct Fts5ExtensionApi {
    76     78     int iVersion;                   /* Currently always set to 1 */
    77     79   
    78     80     void *(*xUserData)(Fts5Context*);
................................................................................
    87     89   
    88     90     int (*xPhraseCount)(Fts5Context*);
    89     91     int (*xPhraseSize)(Fts5Context*, int iPhrase);
    90     92   
    91     93     sqlite3_int64 (*xRowid)(Fts5Context*);
    92     94     int (*xColumnText)(Fts5Context*, int iCol, const char **pz, int *pn);
    93     95     int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken);
    94         -  int (*xPoslist)(Fts5Context*, int iPhrase, int *pi, int *piCol, int *piOff);
           96  +  int (*xPoslist)(Fts5Context*, int iPhrase, int *pi, sqlite3_int64 *piPos);
    95     97   };
    96     98   
           99  +#define FTS5_POS2COLUMN(iPos) (int)(iPos >> 32)
          100  +#define FTS5_POS2OFFSET(iPos) (int)(iPos & 0xFFFFFFFF)
          101  +
    97    102   /* 
    98    103   ** CUSTOM AUXILIARY FUNCTIONS
    99    104   *************************************************************************/
   100    105   #endif /* _FTS5_H */
   101    106   

Changes to ext/fts5/fts5_aux.c.

     9      9   **    May you share freely, never taking more than you give.
    10     10   **
    11     11   ******************************************************************************
    12     12   */
    13     13   
    14     14   #include "fts5Int.h"
    15     15   
           16  +typedef struct SnippetPhrase SnippetPhrase;
           17  +typedef struct SnippetIter SnippetIter;
           18  +typedef struct SnippetCtx SnippetCtx;
           19  +
           20  +struct SnippetPhrase {
           21  +  u64 mask;                       /* Current mask */
           22  +  int nToken;                     /* Tokens in this phrase */
           23  +  int i;                          /* Current offset in phrase poslist */
           24  +  i64 iPos;                       /* Next position in phrase (-ve -> EOF) */
           25  +};
           26  +
           27  +struct SnippetIter {
           28  +  i64 iLast;                      /* Last token position of current snippet */
           29  +  int nScore;                     /* Score of current snippet */
           30  +
           31  +  const Fts5ExtensionApi *pApi;
           32  +  Fts5Context *pFts;
           33  +  u64 szmask;                     /* Mask used to on SnippetPhrase.mask */
           34  +  int nPhrase;                    /* Number of phrases */
           35  +  SnippetPhrase aPhrase[0];       /* Array of size nPhrase */
           36  +};
           37  +
           38  +struct SnippetCtx {
           39  +  int iFirst;                     /* Offset of first token to record */
           40  +  int nToken;                     /* Size of aiStart[] and aiEnd[] arrays */
           41  +  int iSeen;                      /* Set to largest offset seen */
           42  +  int *aiStart; 
           43  +  int *aiEnd;
           44  +};
           45  +
           46  +static int fts5SnippetCallback(
           47  +  void *pContext,                 /* Pointer to Fts5Buffer object */
           48  +  const char *pToken,             /* Buffer containing token */
           49  +  int nToken,                     /* Size of token in bytes */
           50  +  int iStart,                     /* Start offset of token */
           51  +  int iEnd,                       /* End offset of token */
           52  +  int iPos                        /* Position offset of token */
           53  +){
           54  +  int rc = SQLITE_OK;
           55  +  SnippetCtx *pCtx = (SnippetCtx*)pContext;
           56  +  int iOff = iPos - pCtx->iFirst;
           57  +
           58  +  if( iOff>=0 ){
           59  +    if( iOff < pCtx->nToken ){
           60  +      pCtx->aiStart[iOff] = iStart;
           61  +      pCtx->aiEnd[iOff] = iEnd;
           62  +    }
           63  +    pCtx->iSeen = iPos;
           64  +    if( iOff>=pCtx->nToken ) rc = SQLITE_DONE;
           65  +  }
           66  +
           67  +  return rc;
           68  +}
           69  +
           70  +/*
           71  +** Set pIter->nScore to the score for the current entry.
           72  +*/
           73  +static void fts5SnippetCalculateScore(SnippetIter *pIter){
           74  +  int i;
           75  +  int nScore = 0;
           76  +  assert( pIter->iLast>=0 );
           77  +
           78  +  for(i=0; i<pIter->nPhrase; i++){
           79  +    SnippetPhrase *p = &pIter->aPhrase[i];
           80  +    u64 mask = p->mask;
           81  +    if( mask ){
           82  +      u64 j;
           83  +      nScore += 1000;
           84  +      for(j=1; j & pIter->szmask; j<<=1){
           85  +        if( mask & j ) nScore++;
           86  +      }
           87  +    }
           88  +  }
           89  +
           90  +  pIter->nScore = nScore;
           91  +}
           92  +
           93  +/*
           94  +** Allocate a new snippet iter.
           95  +*/
           96  +static int fts5SnippetIterNew(
           97  +  const Fts5ExtensionApi *pApi,   /* API offered by current FTS version */
           98  +  Fts5Context *pFts,              /* First arg to pass to pApi functions */
           99  +  int nToken,                     /* Number of tokens in snippets */
          100  +  SnippetIter **ppIter            /* OUT: New object */
          101  +){
          102  +  int i;                          /* Counter variable */
          103  +  SnippetIter *pIter;             /* New iterator object */
          104  +  int nByte;                      /* Bytes of space to allocate */
          105  +  int nPhrase;                    /* Number of phrases in query */
          106  +
          107  +  *ppIter = 0;
          108  +  nPhrase = pApi->xPhraseCount(pFts);
          109  +  nByte = sizeof(SnippetIter) + nPhrase * sizeof(SnippetPhrase);
          110  +  pIter = (SnippetIter*)sqlite3_malloc(nByte);
          111  +  if( pIter==0 ) return SQLITE_NOMEM;
          112  +  memset(pIter, 0, nByte);
          113  +
          114  +  pIter->nPhrase = nPhrase;
          115  +  pIter->pApi = pApi;
          116  +  pIter->pFts = pFts;
          117  +  pIter->szmask = ((u64)1 << nToken) - 1;
          118  +  assert( nToken<=63 );
          119  +
          120  +  for(i=0; i<nPhrase; i++){
          121  +    pIter->aPhrase[i].nToken = pApi->xPhraseSize(pFts, i);
          122  +  }
          123  +
          124  +  *ppIter = pIter;
          125  +  return SQLITE_OK;
          126  +}
          127  +
          128  +/*
          129  +** Set the iterator to point to the first candidate snippet.
          130  +*/
          131  +static void fts5SnippetIterFirst(SnippetIter *pIter){
          132  +  const Fts5ExtensionApi *pApi = pIter->pApi;
          133  +  Fts5Context *pFts = pIter->pFts;
          134  +  int i;                          /* Used to iterate through phrases */
          135  +  SnippetPhrase *pMin = 0;        /* Phrase with first match */
          136  +
          137  +  memset(pIter->aPhrase, 0, sizeof(SnippetPhrase) * pIter->nPhrase);
          138  +
          139  +  for(i=0; i<pIter->nPhrase; i++){
          140  +    SnippetPhrase *p = &pIter->aPhrase[i];
          141  +    p->nToken = pApi->xPhraseSize(pFts, i);
          142  +    pApi->xPoslist(pFts, i, &p->i, &p->iPos);
          143  +    if( p->iPos>=0 && (pMin==0 || p->iPos<pMin->iPos) ){
          144  +      pMin = p;
          145  +    }
          146  +  }
          147  +  assert( pMin );
          148  +
          149  +  pIter->iLast = pMin->iPos + pMin->nToken - 1;
          150  +  pMin->mask = 0x01;
          151  +  pApi->xPoslist(pFts, pMin - pIter->aPhrase, &pMin->i, &pMin->iPos);
          152  +  fts5SnippetCalculateScore(pIter);
          153  +}
          154  +
          155  +/*
          156  +** Advance the snippet iterator to the next candidate snippet.
          157  +*/
          158  +static void fts5SnippetIterNext(SnippetIter *pIter){
          159  +  const Fts5ExtensionApi *pApi = pIter->pApi;
          160  +  Fts5Context *pFts = pIter->pFts;
          161  +  int nPhrase = pIter->nPhrase;
          162  +  int i;                          /* Used to iterate through phrases */
          163  +  SnippetPhrase *pMin = 0;
          164  +
          165  +  for(i=0; i<nPhrase; i++){
          166  +    SnippetPhrase *p = &pIter->aPhrase[i];
          167  +    if( p->iPos>=0 && (pMin==0 || p->iPos<pMin->iPos) ) pMin = p;
          168  +  }
          169  +
          170  +  if( pMin==0 ){
          171  +    /* pMin==0 indicates that the SnippetIter is at EOF. */
          172  +    pIter->iLast = -1;
          173  +  }else{
          174  +    i64 nShift = pMin->iPos - pIter->iLast;
          175  +    assert( nShift>=0 );
          176  +    for(i=0; i<nPhrase; i++){
          177  +      SnippetPhrase *p = &pIter->aPhrase[i];
          178  +      if( nShift>=63 ){
          179  +        p->mask = 0;
          180  +      }else{
          181  +        p->mask = p->mask << (int)nShift;
          182  +        p->mask &= pIter->szmask;
          183  +      }
          184  +    }
          185  +
          186  +    pIter->iLast = pMin->iPos;
          187  +    pMin->mask |= 0x01;
          188  +    fts5SnippetCalculateScore(pIter);
          189  +    pApi->xPoslist(pFts, pMin - pIter->aPhrase, &pMin->i, &pMin->iPos);
          190  +  }
          191  +}
          192  +
          193  +static void fts5SnippetIterFree(SnippetIter *pIter){
          194  +  if( pIter ){
          195  +    sqlite3_free(pIter);
          196  +  }
          197  +}
          198  +
          199  +static int fts5SnippetText(
          200  +  const Fts5ExtensionApi *pApi,   /* API offered by current FTS version */
          201  +  Fts5Context *pFts,              /* First arg to pass to pApi functions */
          202  +  SnippetIter *pIter,             /* Snippet to write to buffer */
          203  +  int nToken,                     /* Size of desired snippet in tokens */
          204  +  const char *zStart,
          205  +  const char *zFinal,
          206  +  const char *zEllip,
          207  +  Fts5Buffer *pBuf                /* Write output to this buffer */
          208  +){
          209  +  SnippetCtx ctx;
          210  +  int i;
          211  +  u64 all = 0;
          212  +  const char *zCol;               /* Column text to extract snippet from */
          213  +  int nCol;                       /* Size of column text in bytes */
          214  +  int rc;
          215  +  int nShift;
          216  +
          217  +  rc = pApi->xColumnText(pFts, FTS5_POS2COLUMN(pIter->iLast), &zCol, &nCol);
          218  +  if( rc!=SQLITE_OK ) return rc;
          219  +
          220  +  /* At this point pIter->iLast is the offset of the last token in the
          221  +  ** proposed snippet. However, in all cases pIter->iLast contains the
          222  +  ** final token of one of the phrases. This makes the snippet look
          223  +  ** unbalanced. For example:
          224  +  **
          225  +  **     "...x x x x x <b>term</b>..."
          226  +  **
          227  +  ** It is better to increase iLast a little so that the snippet looks
          228  +  ** more like:
          229  +  **
          230  +  **     "...x x x <b>term</b> y y..."
          231  +  **
          232  +  ** The problem is that there is no easy way to discover whether or not
          233  +  ** how many tokens are present in the column following "term". 
          234  +  */
          235  +
          236  +  /* Set variable nShift to the number of tokens by which the snippet
          237  +  ** should be shifted, assuming there are sufficient tokens to the right
          238  +  ** of iLast in the column value.  */
          239  +  for(i=0; i<pIter->nPhrase; i++){
          240  +    int iToken;
          241  +    for(iToken=0; iToken<pIter->aPhrase[i].nToken; iToken++){
          242  +      all |= (pIter->aPhrase[i].mask << iToken);
          243  +    }
          244  +  }
          245  +  for(i=nToken-1; i>=0; i--){
          246  +    if( all & ((u64)1 << i) ) break;
          247  +  }
          248  +  assert( i>=0 );
          249  +  nShift = (nToken - i) / 2;
          250  +
          251  +  memset(&ctx, 0, sizeof(SnippetCtx));
          252  +  ctx.nToken = nToken + nShift;
          253  +  ctx.iFirst = FTS5_POS2OFFSET(pIter->iLast) - nToken + 1;
          254  +  if( ctx.iFirst<0 ){
          255  +    nShift += ctx.iFirst;
          256  +    if( nShift<0 ) nShift = 0;
          257  +    ctx.iFirst = 0;
          258  +  }
          259  +  ctx.aiStart = (int*)sqlite3_malloc(sizeof(int) * ctx.nToken * 2);
          260  +  if( ctx.aiStart==0 ) return SQLITE_NOMEM;
          261  +  ctx.aiEnd = &ctx.aiStart[ctx.nToken];
          262  +
          263  +  rc = pApi->xTokenize(pFts, zCol, nCol, (void*)&ctx, fts5SnippetCallback);
          264  +  if( rc==SQLITE_OK ){
          265  +    int i1;                       /* First token from input to include */
          266  +    int i2;                       /* Last token from input to include */
          267  +
          268  +    int iPrint;
          269  +    int iMatchto;
          270  +    int iBit0;
          271  +    int iLast;
          272  +
          273  +    int *aiStart = ctx.aiStart - ctx.iFirst;
          274  +    int *aiEnd = ctx.aiEnd - ctx.iFirst;
          275  +
          276  +    /* Ideally we want to start the snippet with token (ctx.iFirst + nShift).
          277  +    ** However, this is only possible if there are sufficient tokens within
          278  +    ** the column. This block sets variables i1 and i2 to the first and last
          279  +    ** input tokens to include in the snippet.  */
          280  +    if( (ctx.iFirst + nShift + nToken)<=ctx.iSeen ){
          281  +      i1 = ctx.iFirst + nShift;
          282  +      i2 = i1 + nToken - 1;
          283  +    }else{
          284  +      i2 = ctx.iSeen;
          285  +      i1 = ctx.iSeen - nToken + 1;
          286  +      assert( i1>=0 || ctx.iFirst==0 );
          287  +      if( i1<0 ) i1 = 0;
          288  +    }
          289  +
          290  +    /* If required, append the preceding ellipsis. */
          291  +    if( i1>0 ) sqlite3Fts5BufferAppendPrintf(&rc, pBuf, "%s", zEllip);
          292  +
          293  +    iLast = FTS5_POS2OFFSET(pIter->iLast);
          294  +    iPrint = i1;
          295  +    iMatchto = -1;
          296  +
          297  +    for(i=i1; i<=i2; i++){
          298  +
          299  +      /* Check if this is the first token of any phrase match. */
          300  +      int ip;
          301  +      for(ip=0; ip<pIter->nPhrase; ip++){
          302  +        SnippetPhrase *pPhrase = &pIter->aPhrase[ip];
          303  +        u64 m = (1 << (iLast - i - pPhrase->nToken + 1));
          304  +
          305  +        if( i<=iLast && (pPhrase->mask & m) ){
          306  +          if( iMatchto<0 ){
          307  +            sqlite3Fts5BufferAppendPrintf(&rc, pBuf, "%.*s%s",
          308  +                aiStart[i] - aiStart[iPrint],
          309  +                &zCol[aiStart[iPrint]],
          310  +                zStart
          311  +            );
          312  +            iPrint = i;
          313  +          }
          314  +          if( i>iMatchto ) iMatchto = i + pPhrase->nToken - 1;
          315  +        }
          316  +      }
          317  +
          318  +      if( i==iMatchto ){
          319  +        sqlite3Fts5BufferAppendPrintf(&rc, pBuf, "%.*s%s",
          320  +            aiEnd[i] - aiStart[iPrint],
          321  +            &zCol[aiStart[iPrint]],
          322  +            zFinal
          323  +        );
          324  +        iMatchto = -1;
          325  +        iPrint = i+1;
          326  +
          327  +        if( i<i2 ){
          328  +          sqlite3Fts5BufferAppendPrintf(&rc, pBuf, "%.*s",
          329  +              aiStart[i+1] - aiEnd[i],
          330  +              &zCol[aiEnd[i]]
          331  +          );
          332  +        }
          333  +      }
          334  +    }
          335  +
          336  +    if( iPrint<=i2 ){
          337  +      sqlite3Fts5BufferAppendPrintf(&rc, pBuf, "%.*s", 
          338  +          aiEnd[i2] - aiStart[iPrint], 
          339  +          &zCol[aiStart[iPrint]]
          340  +      );
          341  +      if( iMatchto>=0 ){
          342  +        sqlite3Fts5BufferAppendString(&rc, pBuf, zFinal);
          343  +      }
          344  +    }
          345  +
          346  +    /* If required, append the trailing ellipsis. */
          347  +    if( i2<ctx.iSeen ) sqlite3Fts5BufferAppendString(&rc, pBuf, zEllip);
          348  +  }
          349  +
          350  +  sqlite3_free(ctx.aiStart);
          351  +  return rc;
          352  +}
          353  +
          354  +/*
          355  +** A default snippet() implementation. This is compatible with the FTS3
          356  +** snippet() function.
          357  +*/
    16    358   static void fts5SnippetFunction(
          359  +  const Fts5ExtensionApi *pApi,   /* API offered by current FTS version */
          360  +  Fts5Context *pFts,              /* First arg to pass to pApi functions */
          361  +  sqlite3_context *pCtx,          /* Context for returning result/error */
          362  +  int nVal,                       /* Number of values in apVal[] array */
          363  +  sqlite3_value **apVal           /* Array of trailing arguments */
          364  +){
          365  +  const char *zStart = "<b>";
          366  +  const char *zFinal = "</b>";
          367  +  const char *zEllip = "<b>...</b>";
          368  +  int nToken = -15;
          369  +  int nAbs;
          370  +  int nFrag;                      /* Number of fragments to return */
          371  +  int rc;
          372  +  SnippetIter *pIter = 0;
          373  +
          374  +  if( nVal>=1 ) zStart = (const char*)sqlite3_value_text(apVal[0]);
          375  +  if( nVal>=2 ) zFinal = (const char*)sqlite3_value_text(apVal[1]);
          376  +  if( nVal>=3 ) zEllip = (const char*)sqlite3_value_text(apVal[2]);
          377  +  if( nVal>=4 ){
          378  +    nToken = sqlite3_value_int(apVal[3]);
          379  +    if( nToken==0 ) nToken = -15;
          380  +  }
          381  +  nAbs = nToken * (nToken<0 ? -1 : 1);
          382  +
          383  +  rc = fts5SnippetIterNew(pApi, pFts, nAbs, &pIter);
          384  +  if( rc==SQLITE_OK ){
          385  +    Fts5Buffer buf;               /* Result buffer */
          386  +    int nBestScore = 0;           /* Score of best snippet found */
          387  +    int n;                        /* Size of column snippet is from in bytes */
          388  +    int i;                        /* Used to iterate through phrases */
          389  +
          390  +    for(fts5SnippetIterFirst(pIter); 
          391  +        pIter->iLast>=0; 
          392  +        fts5SnippetIterNext(pIter)
          393  +    ){
          394  +      if( pIter->nScore>nBestScore ) nBestScore = pIter->nScore;
          395  +    }
          396  +    for(fts5SnippetIterFirst(pIter); 
          397  +        pIter->iLast>=0; 
          398  +        fts5SnippetIterNext(pIter)
          399  +    ){
          400  +      if( pIter->nScore==nBestScore ) break;
          401  +    }
          402  +
          403  +    memset(&buf, 0, sizeof(Fts5Buffer));
          404  +    rc = fts5SnippetText(pApi, pFts, pIter, nAbs, zStart, zFinal, zEllip, &buf);
          405  +    if( rc==SQLITE_OK ){
          406  +      sqlite3_result_text(pCtx, (const char*)buf.p, buf.n, SQLITE_TRANSIENT);
          407  +    }
          408  +    sqlite3_free(buf.p);
          409  +  }
          410  +
          411  +  fts5SnippetIterFree(pIter);
          412  +  if( rc!=SQLITE_OK ){
          413  +    sqlite3_result_error_code(pCtx, rc);
          414  +  }
          415  +}
          416  +
          417  +static void fts5Bm25Function(
    17    418     const Fts5ExtensionApi *pApi,   /* API offered by current FTS version */
    18    419     Fts5Context *pFts,              /* First arg to pass to pApi functions */
    19    420     sqlite3_context *pCtx,          /* Context for returning result/error */
    20    421     int nVal,                       /* Number of values in apVal[] array */
    21    422     sqlite3_value **apVal           /* Array of trailing arguments */
    22    423   ){
    23    424     assert( 0 );
................................................................................
   142    543       Fts5Buffer s3;
   143    544       memset(&s3, 0, sizeof(s3));
   144    545   
   145    546   
   146    547       for(i=0; i<nPhrase; i++){
   147    548         Fts5Buffer s2;                  /* List of positions for phrase/column */
   148    549         int j = 0;
   149         -      int iOff = 0;
   150         -      int iCol = 0;
          550  +      i64 iPos = 0;
   151    551         int nElem = 0;
   152    552   
   153    553         memset(&s2, 0, sizeof(s2));
   154         -      while( 0==pApi->xPoslist(pFts, i, &j, &iCol, &iOff) ){
          554  +      while( 0==pApi->xPoslist(pFts, i, &j, &iPos) ){
          555  +        int iOff = FTS5_POS2OFFSET(iPos);
          556  +        int iCol = FTS5_POS2COLUMN(iPos);
   155    557           if( nElem!=0 ) sqlite3Fts5BufferAppendPrintf(&rc, &s2, " ");
   156    558           sqlite3Fts5BufferAppendPrintf(&rc, &s2, "%d.%d", iCol, iOff);
   157    559           nElem++;
   158    560         }
   159    561   
   160    562         if( i!=0 ){
   161    563           sqlite3Fts5BufferAppendPrintf(&rc, &s3, " ");

Changes to ext/fts5/fts5_buffer.c.

   142    142     const u8 *a, int n,             /* Buffer containing poslist */
   143    143     int *pi,                        /* IN/OUT: Offset within a[] */
   144    144     i64 *piOff                      /* IN/OUT: Current offset */
   145    145   ){
   146    146     int i = *pi;
   147    147     if( i>=n ){
   148    148       /* EOF */
          149  +    *piOff = -1;
   149    150       return 1;  
   150    151     }else{
   151    152       i64 iOff = *piOff;
   152    153       int iVal;
   153    154       i += getVarint32(&a[i], iVal);
   154    155       if( iVal==1 ){
   155    156         i += getVarint32(&a[i], iVal);

Changes to ext/fts5/fts5_expr.c.

   385    385     Fts5NearTrimmer *a = aStatic;
   386    386   
   387    387     Fts5ExprPhrase **apPhrase = pNear->apPhrase;
   388    388   
   389    389     int i;
   390    390     int rc = SQLITE_OK;
   391    391     int bMatch;
   392         -  i64 iMax;
   393    392   
   394    393     assert( pNear->nPhrase>1 );
   395    394   
   396    395     /* If the aStatic[] array is not large enough, allocate a large array
   397    396     ** using sqlite3_malloc(). This approach could be improved upon. */
   398    397     if( pNear->nPhrase>(sizeof(aStatic) / sizeof(aStatic[0])) ){
   399    398       int nByte = sizeof(Fts5LookaheadReader) * pNear->nPhrase;

Changes to ext/fts5/fts5_storage.c.

   481    481     /* Write the averages record */
   482    482     if( rc==SQLITE_OK ){
   483    483       rc = fts5StorageSaveTotals(p);
   484    484     }
   485    485   
   486    486     return rc;
   487    487   }
          488  +
          489  +static int fts5StorageCount(Fts5Storage *p, const char *zSuffix, i64 *pnRow){
          490  +  Fts5Config *pConfig = p->pConfig;
          491  +  char *zSql;
          492  +  int rc;
          493  +
          494  +  zSql = sqlite3_mprintf("SELECT count(*) FROM %Q.'%q_%s'", 
          495  +      pConfig->zDb, pConfig->zName, zSuffix
          496  +  );
          497  +  if( zSql==0 ){
          498  +    rc = SQLITE_NOMEM;
          499  +  }else{
          500  +    sqlite3_stmt *pCnt = 0;
          501  +    rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &pCnt, 0);
          502  +    if( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pCnt) ){
          503  +      *pnRow = sqlite3_column_int64(pCnt, 0);
          504  +    }
          505  +    rc = sqlite3_finalize(pCnt);
          506  +  }
          507  +
          508  +  sqlite3_free(zSql);
          509  +  return rc;
          510  +}
   488    511   
   489    512   /*
   490    513   ** Context object used by sqlite3Fts5StorageIntegrity().
   491    514   */
   492    515   typedef struct Fts5IntegrityCtx Fts5IntegrityCtx;
   493    516   struct Fts5IntegrityCtx {
   494    517     i64 iRowid;
   495    518     int iCol;
          519  +  int szCol;
   496    520     u64 cksum;
   497    521     Fts5Config *pConfig;
   498    522   };
   499    523   
   500    524   /*
   501    525   ** Tokenization callback used by integrity check.
   502    526   */
................................................................................
   508    532     int iEnd,                       /* End offset of token */
   509    533     int iPos                        /* Position offset of token */
   510    534   ){
   511    535     Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext;
   512    536     pCtx->cksum ^= sqlite3Fts5IndexCksum(
   513    537         pCtx->pConfig, pCtx->iRowid, pCtx->iCol, iPos, pToken, nToken
   514    538     );
          539  +  pCtx->szCol = iPos+1;
   515    540     return SQLITE_OK;
   516    541   }
   517    542   
   518    543   /*
   519    544   ** Check that the contents of the FTS index match that of the %_content
   520    545   ** table. Return SQLITE_OK if they do, or SQLITE_CORRUPT if not. Return
   521    546   ** some other SQLite error code if an error occurs while attempting to
   522    547   ** determine this.
   523    548   */
   524    549   int sqlite3Fts5StorageIntegrity(Fts5Storage *p){
   525    550     Fts5Config *pConfig = p->pConfig;
   526    551     int rc;                         /* Return code */
          552  +  int *aColSize;                  /* Array of size pConfig->nCol */
          553  +  i64 *aTotalSize;                /* Array of size pConfig->nCol */
   527    554     Fts5IntegrityCtx ctx;
   528    555     sqlite3_stmt *pScan;
   529    556   
   530    557     memset(&ctx, 0, sizeof(Fts5IntegrityCtx));
   531    558     ctx.pConfig = p->pConfig;
          559  +  aTotalSize = (i64*)sqlite3_malloc(pConfig->nCol * (sizeof(int)+sizeof(i64)));
          560  +  if( !aTotalSize ) return SQLITE_NOMEM;
          561  +  aColSize = (int*)&aTotalSize[pConfig->nCol];
          562  +  memset(aTotalSize, 0, sizeof(i64) * pConfig->nCol);
   532    563   
   533    564     /* Generate the expected index checksum based on the contents of the
   534    565     ** %_content table. This block stores the checksum in ctx.cksum. */
   535    566     rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN_ASC, &pScan);
   536    567     if( rc==SQLITE_OK ){
   537    568       int rc2;
   538    569       while( SQLITE_ROW==sqlite3_step(pScan) ){
   539    570         int i;
   540    571         ctx.iRowid = sqlite3_column_int64(pScan, 0);
          572  +      ctx.szCol = 0;
          573  +      rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize);
   541    574         for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
   542    575           ctx.iCol = i;
   543    576           rc = sqlite3Fts5Tokenize(
   544    577               pConfig, 
   545    578               (const char*)sqlite3_column_text(pScan, i+1),
   546    579               sqlite3_column_bytes(pScan, i+1),
   547    580               (void*)&ctx,
   548    581               fts5StorageIntegrityCallback
   549    582           );
          583  +        if( ctx.szCol!=aColSize[i] ) rc = SQLITE_CORRUPT_VTAB;
          584  +        aTotalSize[i] += ctx.szCol;
   550    585         }
          586  +      if( rc!=SQLITE_OK ) break;
   551    587       }
   552    588       rc2 = sqlite3_reset(pScan);
   553    589       if( rc==SQLITE_OK ) rc = rc2;
   554    590     }
          591  +
          592  +  /* Test that the "totals" (sometimes called "averages") record looks Ok */
          593  +  if( rc==SQLITE_OK ){
          594  +    int i;
          595  +    rc = fts5StorageLoadTotals(p);
          596  +    for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
          597  +      if( p->aTotalSize[i]!=aTotalSize[i] ) rc = SQLITE_CORRUPT_VTAB;
          598  +    }
          599  +  }
          600  +
          601  +  /* Check that the %_docsize and %_content tables contain the expected
          602  +  ** number of rows.  */
          603  +  if( rc==SQLITE_OK ){
          604  +    i64 nRow;
          605  +    rc = fts5StorageCount(p, "content", &nRow);
          606  +    if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = SQLITE_CORRUPT_VTAB;
          607  +  }
          608  +  if( rc==SQLITE_OK ){
          609  +    i64 nRow;
          610  +    rc = fts5StorageCount(p, "docsize", &nRow);
          611  +    if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = SQLITE_CORRUPT_VTAB;
          612  +  }
   555    613   
   556    614     /* Pass the expected checksum down to the FTS index module. It will
   557    615     ** verify, amongst other things, that it matches the checksum generated by
   558    616     ** inspecting the index itself.  */
   559    617     if( rc==SQLITE_OK ){
   560    618       rc = sqlite3Fts5IndexIntegrityCheck(p->pIndex, ctx.cksum);
   561    619     }
   562    620   
          621  +  sqlite3_free(aTotalSize);
   563    622     return rc;
   564    623   }
   565    624   
   566    625   /*
   567    626   ** Obtain an SQLite statement handle that may be used to read data from the
   568    627   ** %_content table.
   569    628   */

Added test/fts5af.test.

            1  +# 2014 June 17
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#*************************************************************************
           11  +# This file implements regression tests for SQLite library.  The
           12  +# focus of this script is testing the FTS5 module.
           13  +# 
           14  +# More specifically, the tests in this file focus on the built-in 
           15  +# snippet() function.
           16  +#
           17  +
           18  +set testdir [file dirname $argv0]
           19  +source $testdir/tester.tcl
           20  +set testprefix fts5af
           21  +
           22  +# If SQLITE_ENABLE_FTS3 is defined, omit this file.
           23  +ifcapable !fts3 {
           24  +  finish_test
           25  +  return
           26  +}
           27  +
           28  +
           29  +do_execsql_test 1.0 {
           30  +  CREATE VIRTUAL TABLE t1 USING fts5(x, y);
           31  +}
           32  +
           33  +
           34  +foreach {tn doc res} {
           35  +
           36  +  1.1 {X o o o o o o} {[X] o o o o o o}
           37  +  1.2 {o X o o o o o} {o [X] o o o o o}
           38  +  1.3 {o o X o o o o} {o o [X] o o o o}
           39  +  1.4 {o o o X o o o} {o o o [X] o o o}
           40  +  1.5 {o o o o X o o} {o o o o [X] o o}
           41  +  1.6 {o o o o o X o} {o o o o o [X] o}
           42  +  1.7 {o o o o o o X} {o o o o o o [X]}
           43  +
           44  +  2.1 {X o o o o o o o} {[X] o o o o o o...}
           45  +  2.2 {o X o o o o o o} {o [X] o o o o o...}
           46  +  2.3 {o o X o o o o o} {o o [X] o o o o...}
           47  +  2.4 {o o o X o o o o} {o o o [X] o o o...}
           48  +  2.5 {o o o o X o o o} {...o o o [X] o o o}
           49  +  2.6 {o o o o o X o o} {...o o o o [X] o o}
           50  +  2.7 {o o o o o o X o} {...o o o o o [X] o}
           51  +  2.8 {o o o o o o o X} {...o o o o o o [X]}
           52  +
           53  +  3.1 {X o o o o o o o o} {[X] o o o o o o...}
           54  +  3.2 {o X o o o o o o o} {o [X] o o o o o...}
           55  +  3.3 {o o X o o o o o o} {o o [X] o o o o...}
           56  +  3.4 {o o o X o o o o o} {o o o [X] o o o...}
           57  +  3.5 {o o o o X o o o o} {...o o o [X] o o o...}
           58  +  3.6 {o o o o o X o o o} {...o o o [X] o o o}
           59  +  3.7 {o o o o o o X o o} {...o o o o [X] o o}
           60  +  3.8 {o o o o o o o X o} {...o o o o o [X] o}
           61  +  3.9 {o o o o o o o o X} {...o o o o o o [X]}
           62  +
           63  +  4.1 {X o o o o o X o o} {[X] o o o o o [X]...}
           64  +  4.2 {o X o o o o o X o} {...[X] o o o o o [X]...}
           65  +  4.3 {o o X o o o o o X} {...[X] o o o o o [X]}
           66  +
           67  +  5.1 {X o o o o X o o o} {[X] o o o o [X] o...}
           68  +  5.2 {o X o o o o X o o} {...[X] o o o o [X] o...}
           69  +  5.3 {o o X o o o o X o} {...[X] o o o o [X] o}
           70  +  5.4 {o o o X o o o o X} {...o [X] o o o o [X]}
           71  +
           72  +  6.1 {X o o o X o o o} {[X] o o o [X] o o...}
           73  +  6.2 {o X o o o X o o o} {o [X] o o o [X] o...}
           74  +  6.3 {o o X o o o X o o} {...o [X] o o o [X] o...}
           75  +  6.4 {o o o X o o o X o} {...o [X] o o o [X] o}
           76  +  6.5 {o o o o X o o o X} {...o o [X] o o o [X]}
           77  +
           78  +  7.1 {X o o X o o o o o} {[X] o o [X] o o o...}
           79  +  7.2 {o X o o X o o o o} {o [X] o o [X] o o...}
           80  +  7.3 {o o X o o X o o o} {...o [X] o o [X] o o...}
           81  +  7.4 {o o o X o o X o o} {...o [X] o o [X] o o}
           82  +  7.5 {o o o o X o o X o} {...o o [X] o o [X] o}
           83  +  7.6 {o o o o o X o o X} {...o o o [X] o o [X]}
           84  +} {
           85  +  do_execsql_test 1.$tn.1 {
           86  +    DELETE FROM t1;
           87  +    INSERT INTO t1 VALUES($doc, NULL);
           88  +    SELECT snippet(t1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH 'X';
           89  +  } [list $res]
           90  +
           91  +  do_execsql_test 1.$tn.2 {
           92  +    DELETE FROM t1;
           93  +    INSERT INTO t1 VALUES(NULL, $doc);
           94  +    SELECT snippet(t1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH 'X';
           95  +  } [list $res]
           96  +}
           97  +
           98  +foreach {tn doc res} {
           99  +  1.1 {X Y o o o o o} {[X Y] o o o o o}
          100  +  1.2 {o X Y o o o o} {o [X Y] o o o o}
          101  +  1.3 {o o X Y o o o} {o o [X Y] o o o}
          102  +  1.4 {o o o X Y o o} {o o o [X Y] o o}
          103  +  1.5 {o o o o X Y o} {o o o o [X Y] o}
          104  +  1.6 {o o o o o X Y} {o o o o o [X Y]}
          105  +
          106  +  2.1 {X Y o o o o o o} {[X Y] o o o o o...}
          107  +  2.2 {o X Y o o o o o} {o [X Y] o o o o...}
          108  +  2.3 {o o X Y o o o o} {o o [X Y] o o o...}
          109  +  2.4 {o o o X Y o o o} {...o o [X Y] o o o}
          110  +  2.5 {o o o o X Y o o} {...o o o [X Y] o o}
          111  +  2.6 {o o o o o X Y o} {...o o o o [X Y] o}
          112  +  2.7 {o o o o o o X Y} {...o o o o o [X Y]}
          113  +
          114  +  3.1 {X Y o o o o o o o} {[X Y] o o o o o...}
          115  +  3.2 {o X Y o o o o o o} {o [X Y] o o o o...}
          116  +  3.3 {o o X Y o o o o o} {o o [X Y] o o o...}
          117  +  3.4 {o o o X Y o o o o} {...o o [X Y] o o o...}
          118  +  3.5 {o o o o X Y o o o} {...o o [X Y] o o o}
          119  +  3.6 {o o o o o X Y o o} {...o o o [X Y] o o}
          120  +  3.7 {o o o o o o X Y o} {...o o o o [X Y] o}
          121  +  3.8 {o o o o o o o X Y} {...o o o o o [X Y]}
          122  +
          123  +} {
          124  +  do_execsql_test 2.$tn.1 {
          125  +    DELETE FROM t1;
          126  +    INSERT INTO t1 VALUES($doc, NULL);
          127  +    SELECT snippet(t1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH 'X+Y';
          128  +  } [list $res]
          129  +
          130  +  do_execsql_test 2.$tn.2 {
          131  +    DELETE FROM t1;
          132  +    INSERT INTO t1 VALUES(NULL, $doc);
          133  +    SELECT snippet(t1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH 'X+Y';
          134  +  } [list $res]
          135  +}
          136  +
          137  +finish_test
          138  +

Changes to test/permutations.test.

   222    222     fts4growth.test fts4growth2.test
   223    223   }
   224    224   
   225    225   test_suite "fts5" -prefix "" -description {
   226    226     All FTS5 tests.
   227    227   } -files {
   228    228     fts5aa.test fts5ab.test fts5ac.test fts5ad.test fts5ae.test fts5ea.test
          229  +  fts5af.test
   229    230   }
   230    231   
   231    232   test_suite "nofaultsim" -prefix "" -description {
   232    233     "Very" quick test suite. Runs in less than 5 minutes on a workstation. 
   233    234     This test suite is the same as the "quick" tests, except that some files
   234    235     that test malloc and IO errors are omitted.
   235    236   } -files [