Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add an implementation of snippet() and its associated mi apis to fts5.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | matchinfo
Files: files | file ages | folders
SHA1: 8d94102cd33b75d9c1f84bf5b8be5065e80e51cf
User & Date: dan 2013-01-07 19:52:49.064
Context
2013-01-08
11:45
Fix an fts5 problem to do with initializing the global size record. Also have the checksum routine ignore size records when calculating the index checksum. check-in: e7b52edf68 user: dan tags: matchinfo
2013-01-07
19:52
Add an implementation of snippet() and its associated mi apis to fts5. check-in: 8d94102cd3 user: dan tags: matchinfo
2013-01-04
18:37
Allow an fts5 tokenizer to split a single document into multiple streams (i.e. sub-fields within a single column value). Modify the matchinfo APIs so that a ranking function may handle streams and/or columns separately or otherwise. check-in: f3ac136843 user: dan tags: matchinfo
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/fts5.c.
141
142
143
144
145
146
147

148
149
150
151
152
153
154

/*
** Structure types used by this module.
*/
typedef struct Fts5Expr Fts5Expr;
typedef struct Fts5ExprNode Fts5ExprNode;
typedef struct Fts5List Fts5List;

typedef struct Fts5Parser Fts5Parser;
typedef struct Fts5ParserToken Fts5ParserToken;
typedef struct Fts5Phrase Fts5Phrase;
typedef struct Fts5Prefix Fts5Prefix;
typedef struct Fts5Size Fts5Size;
typedef struct Fts5Str Fts5Str;
typedef struct Fts5Token Fts5Token;







>







141
142
143
144
145
146
147
148
149
150
151
152
153
154
155

/*
** Structure types used by this module.
*/
typedef struct Fts5Expr Fts5Expr;
typedef struct Fts5ExprNode Fts5ExprNode;
typedef struct Fts5List Fts5List;
typedef struct Fts5MatchIter Fts5MatchIter;
typedef struct Fts5Parser Fts5Parser;
typedef struct Fts5ParserToken Fts5ParserToken;
typedef struct Fts5Phrase Fts5Phrase;
typedef struct Fts5Prefix Fts5Prefix;
typedef struct Fts5Size Fts5Size;
typedef struct Fts5Str Fts5Str;
typedef struct Fts5Token Fts5Token;
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254

255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
  Fts5ExprNode *pLeft;
  Fts5ExprNode *pRight;
  const u8 *aPk;                  /* Primary key of current entry (or null) */
  int nPk;                        /* Size of aPk[] in bytes */
};

struct Fts5Expr {
  Fts5ExprNode *pRoot;

  int nPhrase;                    /* Number of Fts5Str objects in query */
  Fts5Str **apPhrase;             /* All Fts5Str objects */
};

/*
** FTS5 specific cursor data.
*/
struct Fts5Cursor {
  sqlite4 *db;
  Fts5Info *pInfo;
  Fts5Expr *pExpr;                /* MATCH expression for this cursor */
  char *zExpr;                    /* Full text of MATCH expression */
  KVByteArray *aKey;              /* Buffer for primary key */
  int nKeyAlloc;                  /* Bytes allocated at aKey[] */

  KVCursor *pCsr;                 /* Cursor used to retrive values */
  Mem *aMem;                      /* Array of column values */


  Fts5Size *pSz;                  /* Local size data */
  Fts5Size *pGlobal;              /* Global size data */
  i64 nGlobal;                    /* Total number of rows in table */
  int *anRow;

#if 1
  i64 *aGlobal;

  /* Size of each column of current row (in tokens). */
  int bSzValid;
  int *aSz;
#endif
};

/*
** A deserialized 'size record' (see above).
*/
struct Fts5Size {
  int nCol;                       /* Number of columns in indexed table */







|
<

















>






<
<
|
<
<
<
<







230
231
232
233
234
235
236
237

238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261


262




263
264
265
266
267
268
269
  Fts5ExprNode *pLeft;
  Fts5ExprNode *pRight;
  const u8 *aPk;                  /* Primary key of current entry (or null) */
  int nPk;                        /* Size of aPk[] in bytes */
};

struct Fts5Expr {
  Fts5ExprNode *pRoot;            /* Root node of expression */

  int nPhrase;                    /* Number of Fts5Str objects in query */
  Fts5Str **apPhrase;             /* All Fts5Str objects */
};

/*
** FTS5 specific cursor data.
*/
struct Fts5Cursor {
  sqlite4 *db;
  Fts5Info *pInfo;
  Fts5Expr *pExpr;                /* MATCH expression for this cursor */
  char *zExpr;                    /* Full text of MATCH expression */
  KVByteArray *aKey;              /* Buffer for primary key */
  int nKeyAlloc;                  /* Bytes allocated at aKey[] */

  KVCursor *pCsr;                 /* Cursor used to retrive values */
  Mem *aMem;                      /* Array of column values */
  int bMemValid;                  /* True if contents of aMem[] are valid */

  Fts5Size *pSz;                  /* Local size data */
  Fts5Size *pGlobal;              /* Global size data */
  i64 nGlobal;                    /* Total number of rows in table */
  int *anRow;



  Fts5MatchIter *pIter;           /* Used by mi_match_detail() */




};

/*
** A deserialized 'size record' (see above).
*/
struct Fts5Size {
  int nCol;                       /* Number of columns in indexed table */
286
287
288
289
290
291
292











293
294
295
296
297
298
299
  int iList;

  /* The current entry */
  int iCol;
  int iStream;
  int iOff;
};












/*
** Return true for EOF, or false if the next entry is valid.
*/
static int fts5InstanceListNext(InstanceList *p){
  int i = p->iList;
  int bRet = 1;







>
>
>
>
>
>
>
>
>
>
>







281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
  int iList;

  /* The current entry */
  int iCol;
  int iStream;
  int iOff;
};

/*
** An instance of this structure is used by the sqlite4_mi_match_detail()
** API to iterate through matches. 
*/
struct Fts5MatchIter {
  int bValid;                     /* True if aList[] is current row */
  int iCurrent;                   /* Current index in aList[] (or -1) */
  int iMatch;                     /* Current iMatch value */
  InstanceList *aList;            /* One iterator for each phrase in expr */
};

/*
** Return true for EOF, or false if the next entry is valid.
*/
static int fts5InstanceListNext(InstanceList *p){
  int i = p->iList;
  int bRet = 1;
777
778
779
780
781
782
783

784
785
786
787
788
789
790
    sqlite4DbFree(db, pNode);
  }
}

static void fts5ExpressionFree(sqlite4 *db, Fts5Expr *pExpr){
  if( pExpr ){
    fts5FreeExprNode(db, pExpr->pRoot);

    sqlite4DbFree(db, pExpr);
  }
}

typedef struct ExprHier ExprHier;
struct ExprHier {
  Fts5ExprNode **ppNode;







>







783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
    sqlite4DbFree(db, pNode);
  }
}

static void fts5ExpressionFree(sqlite4 *db, Fts5Expr *pExpr){
  if( pExpr ){
    fts5FreeExprNode(db, pExpr->pRoot);
    sqlite4DbFree(db, pExpr->apPhrase);
    sqlite4DbFree(db, pExpr);
  }
}

typedef struct ExprHier ExprHier;
struct ExprHier {
  Fts5ExprNode **ppNode;
833
834
835
836
837
838
839















840
841
842
843
844
845
846
  *pp = pNode;
  (*paHier)[*pnHier].ppNode = &pNode->pRight;
  (*paHier)[*pnHier].nOpen = 0;
  (*pnHier)++;

  return SQLITE4_OK;
}
















static int fts5ParseExpression(
  sqlite4 *db,                    /* Database handle */
  Fts5Tokenizer *pTokenizer,      /* Tokenizer module */
  sqlite4_tokenizer *p,           /* Tokenizer instance */
  int iRoot,                      /* Root page number of FTS index */
  char **azCol,                   /* Array of column names (nul-term'd) */







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
  *pp = pNode;
  (*paHier)[*pnHier].ppNode = &pNode->pRight;
  (*paHier)[*pnHier].nOpen = 0;
  (*pnHier)++;

  return SQLITE4_OK;
}

static void fts5FindStrings(Fts5ExprNode *p, Fts5Str ***papStr){
  if( p ){
    if( p->eType==TOKEN_PRIMITIVE ){
      int i;
      Fts5Str *aStr = p->pPhrase->aStr;
      for(i=0; i<p->pPhrase->nStr; i++){
        **papStr = &aStr[i];
        (*papStr)++;
      }
    }
    fts5FindStrings(p->pLeft, papStr);
    fts5FindStrings(p->pRight, papStr);
  }
}

static int fts5ParseExpression(
  sqlite4 *db,                    /* Database handle */
  Fts5Tokenizer *pTokenizer,      /* Tokenizer module */
  sqlite4_tokenizer *p,           /* Tokenizer instance */
  int iRoot,                      /* Root page number of FTS index */
  char **azCol,                   /* Array of column names (nul-term'd) */
972
973
974
975
976
977
978











979
980
981
982
983
984
985
986

987
988
989
990
991
992
993

  if( rc==SQLITE4_OK && *aHier[nHier-1].ppNode==0 ){
    rc = SQLITE4_ERROR;
  }
  for(i=0; rc==SQLITE4_OK && i<nHier; i++){
    if( aHier[i].nOpen>0 ) rc = SQLITE4_ERROR;
  }












  if( rc!=SQLITE4_OK ){
    fts5ExpressionFree(db, pExpr);
    *pzErr = sParse.zErr;
  }else{
    pExpr->nPhrase = nStr;
    *ppExpr = pExpr;
  }

  sqlite4DbFree(db, aHier);
  return rc;
}

/*
** Search for the Fts5Tokenizer object named zName. Return a pointer to it
** if it exists, or NULL otherwise.







>
>
>
>
>
>
>
>
>
>
>




<
<
|

>







994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015


1016
1017
1018
1019
1020
1021
1022
1023
1024
1025

  if( rc==SQLITE4_OK && *aHier[nHier-1].ppNode==0 ){
    rc = SQLITE4_ERROR;
  }
  for(i=0; rc==SQLITE4_OK && i<nHier; i++){
    if( aHier[i].nOpen>0 ) rc = SQLITE4_ERROR;
  }

  if( rc==SQLITE4_OK ){
    pExpr->nPhrase = nStr;
    pExpr->apPhrase = (Fts5Str**)sqlite4DbMallocZero(db, sizeof(Fts5Str*)*nStr);
    if( pExpr->apPhrase==0 ){
      rc = SQLITE4_NOMEM;
    }else{
      Fts5Str **a = pExpr->apPhrase;
      fts5FindStrings(pExpr->pRoot, &a);
    }
  }

  if( rc!=SQLITE4_OK ){
    fts5ExpressionFree(db, pExpr);
    *pzErr = sParse.zErr;


    pExpr = 0;
  }
  *ppExpr = pExpr;
  sqlite4DbFree(db, aHier);
  return rc;
}

/*
** Search for the Fts5Tokenizer object named zName. Return a pointer to it
** if it exists, or NULL otherwise.
1595
1596
1597
1598
1599
1600
1601

1602
1603
1604
1605
1606
1607
1608
    nByte += nCol * sizeof(char *);
  }

  pInfo = sqlite4DbMallocZero(db, nByte);
  if( pInfo ){
    pInfo->iDb = sqlite4SchemaToIndex(db, pIdx->pSchema);
    pInfo->iRoot = pIdx->tnum;

    pInfo->nCol = pIdx->pTable->nCol;
    fts5TokenizerCreate(pParse, pIdx->pFts, &pInfo->pTokenizer, &pInfo->p);

    if( pInfo->p==0 ){
      assert( pParse->nErr );
      sqlite4DbFree(db, pInfo);
      pInfo = 0;







>







1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
    nByte += nCol * sizeof(char *);
  }

  pInfo = sqlite4DbMallocZero(db, nByte);
  if( pInfo ){
    pInfo->iDb = sqlite4SchemaToIndex(db, pIdx->pSchema);
    pInfo->iRoot = pIdx->tnum;
    sqlite4FindPrimaryKey(pIdx->pTable, &pInfo->iTbl);
    pInfo->nCol = pIdx->pTable->nCol;
    fts5TokenizerCreate(pParse, pIdx->pFts, &pInfo->pTokenizer, &pInfo->p);

    if( pInfo->p==0 ){
      assert( pParse->nErr );
      sqlite4DbFree(db, pInfo);
      pInfo = 0;
2201
2202
2203
2204
2205
2206
2207

2208
2209
2210
2211
2212
2213
2214
static int fts5OpenCursors(sqlite4 *db, Fts5Info *pInfo, Fts5Cursor *pCsr){
  return fts5OpenExprCursors(db, pInfo, pCsr->pExpr->pRoot);
}

void sqlite4Fts5Close(sqlite4 *db, Fts5Cursor *pCsr){
  if( pCsr ){
    fts5ExpressionFree(db, pCsr->pExpr);

    sqlite4DbFree(db, pCsr->aKey);
    sqlite4DbFree(db, pCsr->anRow);
    sqlite4DbFree(db, pCsr);
  }
}

static int fts5TokenAdvanceToMatch(







>







2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
static int fts5OpenCursors(sqlite4 *db, Fts5Info *pInfo, Fts5Cursor *pCsr){
  return fts5OpenExprCursors(db, pInfo, pCsr->pExpr->pRoot);
}

void sqlite4Fts5Close(sqlite4 *db, Fts5Cursor *pCsr){
  if( pCsr ){
    fts5ExpressionFree(db, pCsr->pExpr);
    sqlite4DbFree(db, pCsr->pIter);
    sqlite4DbFree(db, pCsr->aKey);
    sqlite4DbFree(db, pCsr->anRow);
    sqlite4DbFree(db, pCsr);
  }
}

static int fts5TokenAdvanceToMatch(
2519
2520
2521
2522
2523
2524
2525

2526
2527
2528
2529
2530
2531
2532
  assert( rc!=SQLITE4_NOTFOUND );
  return rc;
}

int sqlite4Fts5Next(Fts5Cursor *pCsr){
  sqlite4DbFree(pCsr->db, pCsr->pSz);
  pCsr->pSz = 0;

  return fts5ExprAdvance(pCsr->db, pCsr->pExpr->pRoot, 0);
}

int sqlite4Fts5Open(
  sqlite4 *db,                    /* Database handle */
  Fts5Info *pInfo,                /* Index description */
  const char *zMatch,             /* Match expression */







>







2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
  assert( rc!=SQLITE4_NOTFOUND );
  return rc;
}

int sqlite4Fts5Next(Fts5Cursor *pCsr){
  sqlite4DbFree(pCsr->db, pCsr->pSz);
  pCsr->pSz = 0;
  pCsr->bMemValid = 0;
  return fts5ExprAdvance(pCsr->db, pCsr->pExpr->pRoot, 0);
}

int sqlite4Fts5Open(
  sqlite4 *db,                    /* Database handle */
  Fts5Info *pInfo,                /* Index description */
  const char *zMatch,             /* Match expression */
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
*/
int sqlite4Fts5Valid(Fts5Cursor *pCsr){
  return( pCsr->pExpr->pRoot->aPk!=0 );
}

int sqlite4Fts5Pk(
  Fts5Cursor *pCsr, 
  int iTbl, 
  KVByteArray **paKey, 
  KVSize *pnKey
){
  int i;
  int nReq;
  const u8 *aPk;
  int nPk;








|
|







2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
*/
int sqlite4Fts5Valid(Fts5Cursor *pCsr){
  return( pCsr->pExpr->pRoot->aPk!=0 );
}

int sqlite4Fts5Pk(
  Fts5Cursor *pCsr, 
  int iTbl,
  KVByteArray **paKey,
  KVSize *pnKey
){
  int i;
  int nReq;
  const u8 *aPk;
  int nPk;

2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706


2707









































































2708



2709
2710
2711
2712
2713
2714
2715
  }else{
    rc = fts5CsrLoadGlobal(pCsr);
    if( rc==SQLITE4_OK ) *pn = pCsr->nGlobal;
  }
  return rc;
}


int sqlite4_mi_column_value(
  sqlite4_context *pCtx, 
  int iCol, 
  sqlite4_value **ppVal
){
  int rc = SQLITE4_OK;
  if( pCtx->pFts ){


  }else{









































































    rc = SQLITE4_MISUSE;



  }
  return rc;
}

static Fts5Str *fts5FindStr(Fts5ExprNode *p, int *piStr){
  Fts5Str *pRet = 0;
  if( p->eType==TOKEN_PRIMITIVE ){







<






|
>
>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

>
>
>







2727
2728
2729
2730
2731
2732
2733

2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
  }else{
    rc = fts5CsrLoadGlobal(pCsr);
    if( rc==SQLITE4_OK ) *pn = pCsr->nGlobal;
  }
  return rc;
}


int sqlite4_mi_column_value(
  sqlite4_context *pCtx, 
  int iCol, 
  sqlite4_value **ppVal
){
  int rc = SQLITE4_OK;
  Fts5Cursor *pCsr = pCtx->pFts;
  if( pCsr==0 ){
    rc = SQLITE4_MISUSE;
  }else{
    if( pCsr->bMemValid==0 ){
      sqlite4 *db = pCsr->db;

      Fts5Info *pInfo = pCsr->pInfo;
      if( pCsr->aMem==0 ){
        int nByte = sizeof(Mem) * pInfo->nCol;
        pCsr->aMem = (Mem *)sqlite4DbMallocZero(db, nByte);
        if( pCsr->aMem==0 ){
          rc = SQLITE4_NOMEM;
        }else{
          int i;
          for(i=0; i<pInfo->nCol; i++){
            pCsr->aMem[i].db = db;
          }
        }
      }

      if( pCsr->pCsr==0 && rc==SQLITE4_OK ){
        KVStore *pStore = db->aDb[pInfo->iDb].pKV;
        rc = sqlite4KVStoreOpenCursor(pStore, &pCsr->pCsr);
      }

      if( rc==SQLITE4_OK ){
        u8 *aKey = 0; int nKey;     /* Primary key for current row */
        const u8 *aData; int nData; /* Data record for current row */

        rc = sqlite4Fts5Pk(pCsr, pInfo->iTbl, &aKey, &nKey);
        if( rc==SQLITE4_OK ){
          rc = sqlite4KVCursorSeek(pCsr->pCsr, aKey, nKey, 0);
          if( rc==SQLITE4_NOTFOUND ){
            rc = SQLITE4_CORRUPT_BKPT;
          }
        }

        if( rc==SQLITE4_OK ){
          rc = sqlite4KVCursorData(pCsr->pCsr, 0, -1, &aData, &nData);
        }

        if( rc==SQLITE4_OK ){
          int i;
          ValueDecoder *pCodec;   /* The decoder object */

          rc = sqlite4VdbeCreateDecoder(db, aData, nData, pInfo->nCol, &pCodec);
          for(i=0; rc==SQLITE4_OK && i<pInfo->nCol; i++){
            rc = sqlite4VdbeDecodeValue(pCodec, i, 0, &pCsr->aMem[i]);
          }
          sqlite4VdbeDestroyDecoder(pCodec);
        }

        if( rc==SQLITE4_OK ) pCsr->bMemValid = 1;
      }
    }

    if( rc==SQLITE4_OK ){
      assert( pCsr->bMemValid );
      *ppVal = &pCsr->aMem[iCol];
    }
  }

  return rc;
}

int sqlite4_mi_tokenize(
  sqlite4_context *pCtx,
  const char *zText,
  int nText,
  void *p,
  int(*x)(void *, int, int, const char *, int, int, int)
){
  int rc = SQLITE4_OK;
  Fts5Cursor *pCsr = pCtx->pFts;

  if( pCsr==0 ){
    rc = SQLITE4_MISUSE;
  }else{
    Fts5Info *pInfo = pCsr->pInfo;
    rc = pInfo->pTokenizer->xTokenize(p, pInfo->p, zText, nText, x);
  }
  return rc;
}

static Fts5Str *fts5FindStr(Fts5ExprNode *p, int *piStr){
  Fts5Str *pRet = 0;
  if( p->eType==TOKEN_PRIMITIVE ){
2893
2894
2895
2896
2897
2898
2899

















































































2900
2901
2902
2903
2904
2905
2906
        for(i=0; i<nStream; i++) nRow += aRow[nStream*iC + iS];
      }else if( iC<nCol && iS<nStream ){
        nRow = aRow[iC * nStream + iS];
      }

      *pn = nRow;
    }

















































































  }
  return rc;
}

/**************************************************************************
***************************************************************************
** Below this point is test code.







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
        for(i=0; i<nStream; i++) nRow += aRow[nStream*iC + iS];
      }else if( iC<nCol && iS<nStream ){
        nRow = aRow[iC * nStream + iS];
      }

      *pn = nRow;
    }
  }
  return rc;
}

static void fts5IterSetCurrent(Fts5MatchIter *pIter, int nList){
  InstanceList *pBest = 0;
  int i;

  for(i=0; i<nList; i++){
    InstanceList *p = &pIter->aList[i];
    if( fts5InstanceListEof(p)==0 ){
      if( (pBest==0)
       || (p->iCol<pBest->iCol)
       || (p->iCol==pBest->iCol && p->iOff<pBest->iOff)
      ){
        pBest = p;
      }
    }
  }

  if( pBest==0 ){
    pIter->iCurrent = -1;
  }else{
    pIter->iCurrent = pBest - pIter->aList;
  }
}

int sqlite4_mi_match_detail(
  sqlite4_context *pCtx,          /* Context object passed to mi function */
  int iMatch,                     /* Index of match */
  int *piOff,                     /* OUT: Token offset of match */
  int *piC,                       /* OUT: Column number of match iMatch */
  int *piS,                       /* OUT: Stream number of match iMatch */
  int *piP                        /* OUT: Phrase number of match iMatch */
){
  int rc = SQLITE4_OK;
  Fts5Cursor *pCsr = pCtx->pFts;
  if( pCsr==0 ){
    rc = SQLITE4_MISUSE;
  }else{
    int nPhrase = pCsr->pExpr->nPhrase;
    Fts5MatchIter *pIter = pCsr->pIter;
    if( pIter==0 ){
      pCsr->pIter = pIter = (Fts5MatchIter *)sqlite4DbMallocZero(
          pCsr->db, sizeof(Fts5MatchIter) + sizeof(InstanceList)*nPhrase
      );
      if( pIter ){
        pIter->aList = (InstanceList *)&pIter[1];
      }else{
        rc = SQLITE4_NOMEM;
      }
    }

    if( rc==SQLITE4_OK && (pIter->bValid==0 || iMatch<pIter->iMatch) ){
      int i;
      for(i=0; i<pCsr->pExpr->nPhrase; i++){
        Fts5Str *pStr = pCsr->pExpr->apPhrase[i];
        fts5InstanceListInit(pStr->aList, pStr->nList, &pIter->aList[i]);
        fts5InstanceListNext(&pIter->aList[i]);
      }

      pIter->iMatch = 0;
      fts5IterSetCurrent(pIter, pCsr->pExpr->nPhrase);
    }

    if( rc==SQLITE4_OK ){
      assert( pIter->iMatch<=iMatch );
      while( pIter->iCurrent>=0 && pIter->iMatch<iMatch ){
        fts5InstanceListNext(&pIter->aList[pIter->iCurrent]);
        fts5IterSetCurrent(pIter, pCsr->pExpr->nPhrase);
      }
      if( pIter->iCurrent<0 ){
        rc = SQLITE4_NOTFOUND;
      }else{
        InstanceList *p = &pIter->aList[pIter->iCurrent];
        *piOff = p->iOff;
        *piC = p->iCol;
        *piS = p->iStream;
        *piP = pIter->iCurrent;
      }
    }
  }
  return rc;
}

/**************************************************************************
***************************************************************************
** Below this point is test code.
Changes to src/fts5func.c.
147
148
149
150
151
152
153
154



























































































































































































155














156
157
158
159
160
161
162
163
164
165
166
167

  if( rc==SQLITE4_OK ){
    sqlite4_result_double(pCtx, rank);
  }else{
    sqlite4_result_error_code(pCtx, rc);
  }
}




























































































































































































static void fts5Snippet(sqlite4_context *pCtx, int nArg, sqlite4_value **apArg){














}

static int fts5SimpleTokenize(
  void *pCtx, 
  sqlite4_tokenizer *p,
  const char *zDoc,
  int nDoc,
  int(*x)(void*, int, int, const char*, int, int, int)
){
  sqlite4_env *pEnv = (sqlite4_env *)p;
  char *aBuf;
  int nBuf;








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

>
>
>
>
>
>
>
>
>
>
>
>
>
>



<
|







147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359

360
361
362
363
364
365
366
367

  if( rc==SQLITE4_OK ){
    sqlite4_result_double(pCtx, rank);
  }else{
    sqlite4_result_error_code(pCtx, rc);
  }
}

typedef struct SnippetCtx SnippetCtx;
struct SnippetCtx {
  sqlite4 *db;                    /* Database handle */
  int nToken;                     /* Number of tokens in snippet */
  int iOff;                       /* First token in snippet */
  u64 mask;                       /* Snippet mask. Highlight these terms */

  char *zOut;                     /* Pointer to snippet text */
  int nOut;                       /* Size of zOut in bytes */
  int nAlloc;                     /* Bytes of space allocated at zOut */

  int iFrom;
  const char *zText;              /* Document to extract snippet from */

  int rc;                         /* Set to NOMEM if OOM is encountered */
};

static void fts5SnippetAppend(SnippetCtx *p, const char *z, int n){
  if( p->rc==SQLITE4_OK ){
    if( (p->nOut + n) > p->nAlloc ){
      int nNew = (p->nOut+n) * 2;

      p->zOut = sqlite4DbReallocOrFree(p->db, p->zOut, nNew);
      if( p->zOut==0 ){
        p->rc = SQLITE4_NOMEM;
        return;
      }
      p->nAlloc = sqlite4DbMallocSize(p->db, p->zOut);
    }

    memcpy(&p->zOut[p->nOut], z, n);
    p->nOut += n;
  }
}

static int fts5SnippetCb(
  void *pCtx, 
  int iStream, 
  int iOff, 
  const char *z, int n,
  int iSrc, int nSrc
){
  SnippetCtx *p = (SnippetCtx *)pCtx;

  if( iOff<p->iOff ){
    return 0;
  }else if( iOff>(p->iOff + p->nToken) ){
    fts5SnippetAppend(p, &p->zText[p->iFrom], iSrc - p->iFrom);
    fts5SnippetAppend(p, "...", 3);
    p->iFrom = -1;
    return 1;
  }else{
    int bHighlight;               /* True to highlight term */

    bHighlight = (p->mask & (1 << (p->iOff+p->nToken - iOff - 1))) ? 1 : 0;

    if( p->iFrom==0 && p->iOff!=0 ){
      p->iFrom = iSrc;
      fts5SnippetAppend(p, "...", 3);
    }

    if( bHighlight ){
      fts5SnippetAppend(p, &p->zText[p->iFrom], iSrc - p->iFrom);
      fts5SnippetAppend(p, "[", 1);
      fts5SnippetAppend(p, &p->zText[iSrc], nSrc);
      fts5SnippetAppend(p, "]", 1);
      p->iFrom = iSrc+nSrc;
    }
  }

  return 0;
}

static int fts5SnippetText(
  sqlite4_context *pCtx, 
  int iCol,
  int iOff,
  int nToken,
  u64 mask
){
  int rc;
  sqlite4_value *pVal = 0;

  rc = sqlite4_mi_column_value(pCtx, iCol, &pVal);
  if( rc==SQLITE4_OK ){
    SnippetCtx sCtx;
    int nText;

    nText = sqlite4_value_bytes(pVal);
    memset(&sCtx, 0, sizeof(sCtx));
    sCtx.zText = (const char *)sqlite4_value_text(pVal);
    sCtx.db = sqlite4_context_db_handle(pCtx);
    sCtx.nToken = nToken;
    sCtx.iOff = iOff;
    sCtx.mask = mask;

    sqlite4_mi_tokenize(pCtx, sCtx.zText, nText, &sCtx, fts5SnippetCb);
    if( sCtx.rc==SQLITE4_OK && sCtx.iFrom>0 ){
      fts5SnippetAppend(&sCtx, &sCtx.zText[sCtx.iFrom], nText - sCtx.iFrom);
    }
    rc = sCtx.rc;

    sqlite4_result_text(pCtx, sCtx.zOut, sCtx.nOut, SQLITE4_TRANSIENT);
    sqlite4DbFree(sCtx.db, sCtx.zOut);
  }

  return rc;
}

static int fts5BestSnippet(
  sqlite4_context *pCtx, 
  u64 mask,                       /* Mask of high-priority phrases */
  int nToken,
  int *piOff,
  int *piCol,
  u64 *pMask
){
  sqlite4 *db = sqlite4_context_db_handle(pCtx);
  int nPhrase;
  int rc = SQLITE4_OK;
  int i;
  int iPrev = 0;
  int iPrevCol = 0;
  u64 *aMask;
  u64 lmask = (((u64)1) << nToken) - 1;

  int iBestOff = 0;
  int iBestCol = 0;
  int nBest = 0;
  u64 bmask = 0;

  sqlite4_mi_phrase_count(pCtx, &nPhrase);
  aMask = sqlite4DbMallocZero(db, sizeof(u64) * nPhrase);
  if( !aMask ) return SQLITE4_NOMEM;

  /* Iterate through all matches for all phrases */
  for(i=0; rc==SQLITE4_OK; i++){
    int iOff;
    int iCol;
    int iStream;
    int iPhrase;
    u64 tmask = 0;

    rc = sqlite4_mi_match_detail(pCtx, i, &iOff, &iCol, &iStream, &iPhrase);
    if( rc==SQLITE4_OK ){
      int iMask;
      int nShift; 
      int nScore = 0;

      nShift = ((iPrevCol==iCol) ? (iOff-iPrev) : 100);

      for(iMask=0; iMask<nPhrase; iMask++){
        if( nShift<64){
          aMask[iMask] = aMask[iMask] << nShift;
        }else{
          aMask[iMask] = 0;
        }
      }
      aMask[iPhrase] = aMask[iMask] | 0x0001;

      for(iMask=0; iMask<nPhrase; iMask++){
        if( (aMask[iMask] & lmask) ){
          nScore += ((aMask[iMask] & mask) ? 100 : 1);
        }
        tmask = tmask | aMask[iMask];
      }

      if( nScore>nBest ){
        bmask = (tmask & lmask);
        nBest = nScore;
        iBestOff = iOff;
        iBestCol = iCol;
      }

      iPrev = iOff;
      iPrevCol = iCol;
    }
  }

  *piOff = iBestOff;
  *piCol = iBestCol;
  *pMask = bmask;

  sqlite4DbFree(db, aMask);
  return rc;
}

static void fts5Snippet(sqlite4_context *pCtx, int nArg, sqlite4_value **apArg){
  int nToken = 15;
  u64 hlmask = 0;
  u64 mask = 0;
  int iOff = 0;
  int iCol = 0;
  int rc;

  rc = fts5BestSnippet(pCtx, mask, nToken, &iOff, &iCol, &hlmask);
  if( rc==SQLITE4_OK ){
    rc = fts5SnippetText(pCtx, iCol, iOff, nToken, hlmask);
  }
  if( rc!=SQLITE4_OK ){
    sqlite4_result_error_code(pCtx, rc);
  }
}

static int fts5SimpleTokenize(

  void *pCtx, sqlite4_tokenizer *p,
  const char *zDoc,
  int nDoc,
  int(*x)(void*, int, int, const char*, int, int, int)
){
  sqlite4_env *pEnv = (sqlite4_env *)p;
  char *aBuf;
  int nBuf;
Changes to src/sqlite.h.in.
4414
4415
4416
4417
4418
4419
4420


4421
4422
4423
4424
4425
4426
4427
  int enc,
  void *p,
  void (*xFunc)(sqlite4_context*,int,sqlite4_value **),
  void (*xDestroy)(void *)
);

/*


** Special functions that may be called from within matchinfo UDFs. All
** return an SQLite error code - SQLITE4_OK if successful, or some other
** error code otherwise.
**
** sqlite4_mi_column_count():
**   Set *pn to the number of columns in the queried table.
**







>
>







4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
  int enc,
  void *p,
  void (*xFunc)(sqlite4_context*,int,sqlite4_value **),
  void (*xDestroy)(void *)
);

/*
** CAPIREF: Matchinfo APIs.
**
** Special functions that may be called from within matchinfo UDFs. All
** return an SQLite error code - SQLITE4_OK if successful, or some other
** error code otherwise.
**
** sqlite4_mi_column_count():
**   Set *pn to the number of columns in the queried table.
**
4460
4461
4462
4463
4464
4465
4466








4467
4468


4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489




4490
4491
4492
4493
4494
4495
4496
**   identified by parameters iC and iS.
**
**   Parameter iP may also be negative. In this case, the output value is
**   set to the total number of occurrences of all query phrases in the
**   current row, subject to the constraints imposed by iC and iS.
**
** sqlite4_mi_match_detail():








**   This function may be used to iterate through all matches in the
**   current row in order of occurrence.


**
** sqlite4_mi_column_value():
**   Set *ppVal to point to an sqlite4_value object containing the value
**   read from column iCol of the current row. This object is valid until
**   the function callback returns.
*/
int sqlite4_mi_column_count(sqlite4_context *, int *pn);
int sqlite4_mi_phrase_count(sqlite4_context *, int *pn);
int sqlite4_mi_stream_count(sqlite4_context *, int *pn);

int sqlite4_mi_total_size(sqlite4_context *, int iC, int iS, int *pn);
int sqlite4_mi_total_rows(sqlite4_context *, int *pn);

int sqlite4_mi_row_count(sqlite4_context *, int iC, int iS, int iP, int *pn);

int sqlite4_mi_size(sqlite4_context *, int iC, int iS, int *pn);
int sqlite4_mi_match_count(sqlite4_context *, int iC, int iS, int iP, int *pn);
int sqlite4_mi_match_detail(
    sqlite4_context *, int iMatch, int *piOff, int *piC, int *piS, int *piP
);
int sqlite4_mi_column_value(sqlite4_context *, int iCol, sqlite4_value **ppVal);







/*
** Undo the hack that converts floating point types to integer for
** builds on processors without floating point support.
*/







>
>
>
>
>
>
>
>
|
|
>
>




















|
>
>
>
>







4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
**   identified by parameters iC and iS.
**
**   Parameter iP may also be negative. In this case, the output value is
**   set to the total number of occurrences of all query phrases in the
**   current row, subject to the constraints imposed by iC and iS.
**
** sqlite4_mi_match_detail():
**   This function is used to access the details of the iMatch'th match
**   (of any phrase) in the current row. Matches are sorted in order of
**   occurrence. If parameter iMatch is equal to or greater than the number 
**   of matches in the current row, SQLITE_NOTFOUND is returned. Otherwise,
**   unless an error occurs, SQLITE4_OK is returned and the *piOff, *piC, *piS,
**   and *piP output parameters are set to the token offset, column number,
**   stream number and phrase number respectively.
**
**   It is anticipated that this function be used to iterate through matches 
**   in order of occurrence. It is optimized so that it is fastest when 
**   called with the iMatch parameter set to 0, P or P+1, where P is the 
**   iMatch value passed to the previous call.
**
** sqlite4_mi_column_value():
**   Set *ppVal to point to an sqlite4_value object containing the value
**   read from column iCol of the current row. This object is valid until
**   the function callback returns.
*/
int sqlite4_mi_column_count(sqlite4_context *, int *pn);
int sqlite4_mi_phrase_count(sqlite4_context *, int *pn);
int sqlite4_mi_stream_count(sqlite4_context *, int *pn);

int sqlite4_mi_total_size(sqlite4_context *, int iC, int iS, int *pn);
int sqlite4_mi_total_rows(sqlite4_context *, int *pn);

int sqlite4_mi_row_count(sqlite4_context *, int iC, int iS, int iP, int *pn);

int sqlite4_mi_size(sqlite4_context *, int iC, int iS, int *pn);
int sqlite4_mi_match_count(sqlite4_context *, int iC, int iS, int iP, int *pn);
int sqlite4_mi_match_detail(
    sqlite4_context *, int iMatch, int *piOff, int *piC, int *piS, int *piP
);
int sqlite4_mi_column_value(sqlite4_context *, int iC, sqlite4_value **ppVal);

int sqlite4_mi_tokenize(sqlite4_context *, const char *, int, void *,
  int(*x)(void *, int, int, const char *, int, int, int)
);



/*
** Undo the hack that converts floating point types to integer for
** builds on processors without floating point support.
*/
Changes to src/sqliteInt.h.
2485
2486
2487
2488
2489
2490
2491

2492
2493
2494
2495
2496
2497
2498
/*
** An instance of this structure is used as the p4 argument to some fts5
** related vdbe opcodes.
*/
struct Fts5Info {
  int iDb;                        /* Database containing this index */
  int iRoot;                      /* Root page number of index */

  int nCol;                       /* Number of columns in indexed table */
  char **azCol;                   /* Column names for table */
  Fts5Tokenizer *pTokenizer;      /* Tokenizer module */
  sqlite4_tokenizer *p;           /* Tokenizer instance */
};

int sqlite4WalkExpr(Walker*, Expr*);







>







2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
/*
** An instance of this structure is used as the p4 argument to some fts5
** related vdbe opcodes.
*/
struct Fts5Info {
  int iDb;                        /* Database containing this index */
  int iRoot;                      /* Root page number of index */
  int iTbl;                       /* Root page number of indexed table */
  int nCol;                       /* Number of columns in indexed table */
  char **azCol;                   /* Column names for table */
  Fts5Tokenizer *pTokenizer;      /* Tokenizer module */
  sqlite4_tokenizer *p;           /* Tokenizer instance */
};

int sqlite4WalkExpr(Walker*, Expr*);