SQLite

Check-in [059092379f]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add the auxiliary highlight() function to fts5.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | fts5
Files: files | file ages | folders
SHA1: 059092379f981eb919b500ce447006f9e645fc5a
User & Date: dan 2014-11-24 16:24:33.456
Context
2014-11-27
20:03
Add a %_config table to fts5. (check-in: 83491c5666 user: dan tags: fts5)
2014-11-24
16:24
Add the auxiliary highlight() function to fts5. (check-in: 059092379f user: dan tags: fts5)
2014-11-15
20:07
Fix the customization interfaces so that they match the documentation. (check-in: fba0b5fc7e user: dan tags: fts5)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts5/fts5.c.
161
162
163
164
165
166
167



168
169
170
171
172
173
174
  char *zSpecial;                 /* Result of special query */

  /* Variables used by auxiliary functions */
  i64 iCsrId;                     /* Cursor id */
  Fts5Auxiliary *pAux;            /* Currently executing extension function */
  Fts5Auxdata *pAuxdata;          /* First in linked list of saved aux-data */
  int *aColumnSize;               /* Values for xColumnSize() */



};

/*
** Values for Fts5Cursor.csrflags
*/
#define FTS5CSR_REQUIRE_CONTENT   0x01
#define FTS5CSR_REQUIRE_DOCSIZE   0x02







>
>
>







161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
  char *zSpecial;                 /* Result of special query */

  /* Variables used by auxiliary functions */
  i64 iCsrId;                     /* Cursor id */
  Fts5Auxiliary *pAux;            /* Currently executing extension function */
  Fts5Auxdata *pAuxdata;          /* First in linked list of saved aux-data */
  int *aColumnSize;               /* Values for xColumnSize() */

  int nInstCount;                 /* Number of phrase instances */
  int *aInst;                     /* 3 integers per phrase instance */
};

/*
** Values for Fts5Cursor.csrflags
*/
#define FTS5CSR_REQUIRE_CONTENT   0x01
#define FTS5CSR_REQUIRE_DOCSIZE   0x02
483
484
485
486
487
488
489












490
491
492
493
494
495
496
497
498
499
500
501

502
503
504
505
506
507
508

static int fts5StmtType(int idxNum){
  if( FTS5_PLAN(idxNum)==FTS5_PLAN_SCAN ){
    return (idxNum&FTS5_ORDER_ASC) ? FTS5_STMT_SCAN_ASC : FTS5_STMT_SCAN_DESC;
  }
  return FTS5_STMT_LOOKUP;
}













/*
** Close the cursor.  For additional information see the documentation
** on the xClose method of the virtual table interface.
*/
static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){
  Fts5Table *pTab = (Fts5Table*)(pCursor->pVtab);
  Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
  Fts5Cursor **pp;
  Fts5Auxdata *pData;
  Fts5Auxdata *pNext;


  if( pCsr->pStmt ){
    int eStmt = fts5StmtType(pCsr->idxNum);
    sqlite3Fts5StorageStmtRelease(pTab->pStorage, eStmt, pCsr->pStmt);
  }
  if( pCsr->pSorter ){
    Fts5Sorter *pSorter = pCsr->pSorter;
    sqlite3_finalize(pSorter->pStmt);







>
>
>
>
>
>
>
>
>
>
>
>












>







486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524

static int fts5StmtType(int idxNum){
  if( FTS5_PLAN(idxNum)==FTS5_PLAN_SCAN ){
    return (idxNum&FTS5_ORDER_ASC) ? FTS5_STMT_SCAN_ASC : FTS5_STMT_SCAN_DESC;
  }
  return FTS5_STMT_LOOKUP;
}

/*
** This function is called after the cursor passed as the only argument
** is moved to point at a different row. It clears all cached data 
** specific to the previous row stored by the cursor object.
*/
static void fts5CsrNewrow(Fts5Cursor *pCsr){
  CsrFlagSet(pCsr, FTS5CSR_REQUIRE_CONTENT | FTS5CSR_REQUIRE_DOCSIZE );
  sqlite3_free(pCsr->aInst);
  pCsr->aInst = 0;
  pCsr->nInstCount = 0;
}

/*
** Close the cursor.  For additional information see the documentation
** on the xClose method of the virtual table interface.
*/
static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){
  Fts5Table *pTab = (Fts5Table*)(pCursor->pVtab);
  Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
  Fts5Cursor **pp;
  Fts5Auxdata *pData;
  Fts5Auxdata *pNext;

  fts5CsrNewrow(pCsr);
  if( pCsr->pStmt ){
    int eStmt = fts5StmtType(pCsr->idxNum);
    sqlite3Fts5StorageStmtRelease(pTab->pStorage, eStmt, pCsr->pStmt);
  }
  if( pCsr->pSorter ){
    Fts5Sorter *pSorter = pCsr->pSorter;
    sqlite3_finalize(pSorter->pStmt);
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
      a += getVarint32(a, iVal);
      iOff += iVal;
      pSorter->aIdx[i] = iOff;
    }
    pSorter->aIdx[i] = &aBlob[nBlob] - a;

    pSorter->aPoslist = a;
    CsrFlagSet(pCsr, FTS5CSR_REQUIRE_CONTENT | FTS5CSR_REQUIRE_DOCSIZE );
  }

  return rc;
}

/*
** Advance the cursor to the next row in the table that matches the 







|







569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
      a += getVarint32(a, iVal);
      iOff += iVal;
      pSorter->aIdx[i] = iOff;
    }
    pSorter->aIdx[i] = &aBlob[nBlob] - a;

    pSorter->aPoslist = a;
    fts5CsrNewrow(pCsr);
  }

  return rc;
}

/*
** Advance the cursor to the next row in the table that matches the 
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
  switch( ePlan ){
    case FTS5_PLAN_MATCH:
    case FTS5_PLAN_SOURCE:
      rc = sqlite3Fts5ExprNext(pCsr->pExpr);
      if( sqlite3Fts5ExprEof(pCsr->pExpr) ){
        CsrFlagSet(pCsr, FTS5CSR_EOF);
      }
      CsrFlagSet(pCsr, FTS5CSR_REQUIRE_CONTENT | FTS5CSR_REQUIRE_DOCSIZE );
      break;

    case FTS5_PLAN_SPECIAL: {
      CsrFlagSet(pCsr, FTS5CSR_EOF);
      break;
    }








|







595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
  switch( ePlan ){
    case FTS5_PLAN_MATCH:
    case FTS5_PLAN_SOURCE:
      rc = sqlite3Fts5ExprNext(pCsr->pExpr);
      if( sqlite3Fts5ExprEof(pCsr->pExpr) ){
        CsrFlagSet(pCsr, FTS5CSR_EOF);
      }
      fts5CsrNewrow(pCsr);
      break;

    case FTS5_PLAN_SPECIAL: {
      CsrFlagSet(pCsr, FTS5CSR_EOF);
      break;
    }

662
663
664
665
666
667
668
669
670
671
672
673
674
675
676

static int fts5CursorFirst(Fts5Table *pTab, Fts5Cursor *pCsr, int bAsc){
  int rc;
  rc = sqlite3Fts5ExprFirst(pCsr->pExpr, pTab->pIndex, bAsc);
  if( sqlite3Fts5ExprEof(pCsr->pExpr) ){
    CsrFlagSet(pCsr, FTS5CSR_EOF);
  }
  CsrFlagSet(pCsr, FTS5CSR_REQUIRE_CONTENT | FTS5CSR_REQUIRE_DOCSIZE );
  return rc;
}

/*
** Process a "special" query. A special query is identified as one with a
** MATCH expression that begins with a '*' character. The remainder of
** the text passed to the MATCH operator are used as  the special query







|







678
679
680
681
682
683
684
685
686
687
688
689
690
691
692

static int fts5CursorFirst(Fts5Table *pTab, Fts5Cursor *pCsr, int bAsc){
  int rc;
  rc = sqlite3Fts5ExprFirst(pCsr->pExpr, pTab->pIndex, bAsc);
  if( sqlite3Fts5ExprEof(pCsr->pExpr) ){
    CsrFlagSet(pCsr, FTS5CSR_EOF);
  }
  fts5CsrNewrow(pCsr);
  return rc;
}

/*
** Process a "special" query. A special query is identified as one with a
** MATCH expression that begins with a '*' character. The remainder of
** the text passed to the MATCH operator are used as  the special query
1039
1040
1041
1042
1043
1044
1045


































































































1046
1047
1048
1049
1050
1051
1052
  return sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
}

static int fts5ApiPhraseSize(Fts5Context *pCtx, int iPhrase){
  Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  return sqlite3Fts5ExprPhraseSize(pCsr->pExpr, iPhrase);
}



































































































static sqlite3_int64 fts5ApiRowid(Fts5Context *pCtx){
  return fts5CursorRowid((Fts5Cursor*)pCtx);
}

static int fts5ApiColumnText(
  Fts5Context *pCtx, 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
  return sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
}

static int fts5ApiPhraseSize(Fts5Context *pCtx, int iPhrase){
  Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  return sqlite3Fts5ExprPhraseSize(pCsr->pExpr, iPhrase);
}

static int fts5CsrPoslist(Fts5Cursor *pCsr, int iPhrase, const u8 **pa){
  int n;
  if( pCsr->pSorter ){
    Fts5Sorter *pSorter = pCsr->pSorter;
    int i1 = (iPhrase==0 ? 0 : pSorter->aIdx[iPhrase-1]);
    n = pSorter->aIdx[iPhrase] - i1;
    *pa = &pSorter->aPoslist[i1];
  }else{
    n = sqlite3Fts5ExprPoslist(pCsr->pExpr, iPhrase, pa);
  }
  return n;
}

/*
** Ensure that the Fts5Cursor.nInstCount and aInst[] variables are populated
** correctly for the current view. Return SQLITE_OK if successful, or an
** SQLite error code otherwise.
*/
static int fts5CacheInstArray(Fts5Cursor *pCsr){
  int rc = SQLITE_OK;
  if( pCsr->aInst==0 ){
    Fts5PoslistReader *aIter;     /* One iterator for each phrase */
    int nIter;                    /* Number of iterators/phrases */
    int nByte;
    
    nIter = sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
    nByte = sizeof(Fts5PoslistReader) * nIter;
    aIter = (Fts5PoslistReader*)sqlite3Fts5MallocZero(&rc, nByte);
    if( aIter ){
      Fts5Buffer buf = {0, 0, 0}; /* Build up aInst[] here */
      int nInst;                  /* Number instances seen so far */
      int i;

      /* Initialize all iterators */
      for(i=0; i<nIter; i++){
        const u8 *a;
        int n = fts5CsrPoslist(pCsr, i, &a);
        sqlite3Fts5PoslistReaderInit(-1, a, n, &aIter[i]);
      }

      while( 1 ){
        int *aInst;
        int iBest = -1;
        for(i=0; i<nIter; i++){
          if( aIter[i].bEof==0 && (iBest<0 || aIter[i].iPos<iBest) ){
            iBest = i;
          }
        }

        if( iBest<0 ) break;
        nInst++;
        if( sqlite3Fts5BufferGrow(&rc, &buf, nInst * sizeof(int) * 3) ) break;

        aInst = &((int*)buf.p)[3 * (nInst-1)];
        aInst[0] = iBest;
        aInst[1] = FTS5_POS2COLUMN(aIter[iBest].iPos);
        aInst[2] = FTS5_POS2OFFSET(aIter[iBest].iPos);
        sqlite3Fts5PoslistReaderNext(&aIter[iBest]);
      }

      pCsr->aInst = (int*)buf.p;
      pCsr->nInstCount = nInst;
      sqlite3_free(aIter);
    }
  }
  return rc;
}

static int fts5ApiInstCount(Fts5Context *pCtx, int *pnInst){
  Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  int rc;
  if( SQLITE_OK==(rc = fts5CacheInstArray(pCsr)) ){
    *pnInst = pCsr->nInstCount;
  }
  return rc;
}

static int fts5ApiInst(
  Fts5Context *pCtx, 
  int iIdx, 
  int *piPhrase, 
  int *piCol, 
  int *piOff
){
  Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  int rc;
  if( SQLITE_OK==(rc = fts5CacheInstArray(pCsr)) ){
    if( iIdx<0 || iIdx>=pCsr->nInstCount ){
      rc = SQLITE_RANGE;
    }else{
      *piPhrase = pCsr->aInst[iIdx*3];
      *piCol = pCsr->aInst[iIdx*3 + 1];
      *piOff = pCsr->aInst[iIdx*3 + 2];
    }
  }
  return rc;
}

static sqlite3_int64 fts5ApiRowid(Fts5Context *pCtx){
  return fts5CursorRowid((Fts5Cursor*)pCtx);
}

static int fts5ApiColumnText(
  Fts5Context *pCtx, 
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
  Fts5Context *pCtx, 
  int iPhrase, 
  int *pi, 
  i64 *piPos 
){
  Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  const u8 *a; int n;             /* Poslist for phrase iPhrase */
  if( pCsr->pSorter ){
    Fts5Sorter *pSorter = pCsr->pSorter;
    int i1 = (iPhrase==0 ? 0 : pSorter->aIdx[iPhrase-1]);
    n = pSorter->aIdx[iPhrase] - i1;
    a = &pSorter->aPoslist[i1];
  }else{
    n = sqlite3Fts5ExprPoslist(pCsr->pExpr, iPhrase, &a);
  }
  return sqlite3Fts5PoslistNext64(a, n, pi, piPos);
}

static int fts5ApiSetAuxdata(
  Fts5Context *pCtx,              /* Fts5 context */
  void *pPtr,                     /* Pointer to save as auxdata */
  void(*xDelete)(void*)           /* Destructor for pPtr (or NULL) */







<
<
<
<
<
<
|
<







1198
1199
1200
1201
1202
1203
1204






1205

1206
1207
1208
1209
1210
1211
1212
  Fts5Context *pCtx, 
  int iPhrase, 
  int *pi, 
  i64 *piPos 
){
  Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  const u8 *a; int n;             /* Poslist for phrase iPhrase */






  n = fts5CsrPoslist(pCsr, iPhrase, &a);

  return sqlite3Fts5PoslistNext64(a, n, pi, piPos);
}

static int fts5ApiSetAuxdata(
  Fts5Context *pCtx,              /* Fts5 context */
  void *pPtr,                     /* Pointer to save as auxdata */
  void(*xDelete)(void*)           /* Destructor for pPtr (or NULL) */
1158
1159
1160
1161
1162
1163
1164


1165
1166
1167
1168
1169
1170
1171
  fts5ApiUserData,
  fts5ApiColumnCount,
  fts5ApiRowCount,
  fts5ApiColumnTotalSize,
  fts5ApiTokenize,
  fts5ApiPhraseCount,
  fts5ApiPhraseSize,


  fts5ApiRowid,
  fts5ApiColumnText,
  fts5ApiColumnSize,
  fts5ApiPoslist,
  fts5ApiQueryPhrase,
  fts5ApiSetAuxdata,
  fts5ApiGetAuxdata,







>
>







1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
  fts5ApiUserData,
  fts5ApiColumnCount,
  fts5ApiRowCount,
  fts5ApiColumnTotalSize,
  fts5ApiTokenize,
  fts5ApiPhraseCount,
  fts5ApiPhraseSize,
  fts5ApiInstCount,
  fts5ApiInst,
  fts5ApiRowid,
  fts5ApiColumnText,
  fts5ApiColumnSize,
  fts5ApiPoslist,
  fts5ApiQueryPhrase,
  fts5ApiSetAuxdata,
  fts5ApiGetAuxdata,
Changes to ext/fts5/fts5.h.
66
67
68
69
70
71
72














73
74
75
76

77

78
79













80
81
82
83
84
85
86
** xPhraseCount:
**   Returns the number of phrases in the current query expression.
**
** xPhraseSize:
**   Returns the number of tokens in phrase iPhrase of the query. Phrases
**   are numbered starting from zero.
**














** xRowid:
**   Returns the rowid of the current row.
**
** xPoslist:

**   Iterate through instances of phrase iPhrase in the current row. 

**
**   At EOF, a non-zero value is returned and output variable iPos set to -1.













**
** xTokenize:
**   Tokenize text using the tokenizer belonging to the FTS5 table.
**
**
** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback):
**   This API function is used to query the FTS table for phrase iPhrase







>
>
>
>
>
>
>
>
>
>
>
>
>
>




>
|
>

|
>
>
>
>
>
>
>
>
>
>
>
>
>







66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
** xPhraseCount:
**   Returns the number of phrases in the current query expression.
**
** xPhraseSize:
**   Returns the number of tokens in phrase iPhrase of the query. Phrases
**   are numbered starting from zero.
**
** xInstCount:
**   Set *pnInst to the total number of occurrences of all phrases within
**   the query within the current row. Return SQLITE_OK if successful, or
**   an error code (i.e. SQLITE_NOMEM) if an error occurs.
**
** xInst:
**   Query for the details of phrase match iIdx within the current row.
**   Phrase matches are numbered starting from zero, so the iIdx argument
**   should be greater than or equal to zero and smaller than the value
**   output by xInstCount().
**
**   Returns SQLITE_OK if successful, or an error code (i.e. SQLITE_NOMEM) 
**   if an error occurs.
**
** xRowid:
**   Returns the rowid of the current row.
**
** xPoslist:
**   Iterate through phrase instances in the current row. If the iPhrase
**   argument is 0 or greater, then only instances of phrase iPhrase are
**   visited. If it is less than 0, instances of all phrases are visited.
**
**   At EOF, -1 is returned and output variable iPos set to -1.
**
**     </pre>
**       sqlite3_int64 iPos;
**       int iPhrase;
**       int ii = 0;
**
**       while( (iPhrase = pFts->xPoslist(pFts, -1, &ii, &iPos) >= 0 ){
**         int iCol = FTS5_POS2COLUMN(iPos);
**         int iOff = FTS5_POS2OFFSET(iPos);
**         // An instance of phrase iPhrase at offset iOff of column iCol.
**       }
**     </pre>
**
**
** xTokenize:
**   Tokenize text using the tokenizer belonging to the FTS5 table.
**
**
** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback):
**   This API function is used to query the FTS table for phrase iPhrase
156
157
158
159
160
161
162



163
164
165
166
167
168
169
    void *pCtx,                   /* Context passed to xToken() */
    int (*xToken)(void*, const char*, int, int, int, int)    /* Callback */
  );

  int (*xPhraseCount)(Fts5Context*);
  int (*xPhraseSize)(Fts5Context*, int iPhrase);




  sqlite3_int64 (*xRowid)(Fts5Context*);
  int (*xColumnText)(Fts5Context*, int iCol, const char **pz, int *pn);
  int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken);
  int (*xPoslist)(Fts5Context*, int iPhrase, int *pi, sqlite3_int64 *piPos);

  int (*xQueryPhrase)(Fts5Context*, int iPhrase, void *pUserData,
    int(*)(const Fts5ExtensionApi*,Fts5Context*,void*)







>
>
>







185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
    void *pCtx,                   /* Context passed to xToken() */
    int (*xToken)(void*, const char*, int, int, int, int)    /* Callback */
  );

  int (*xPhraseCount)(Fts5Context*);
  int (*xPhraseSize)(Fts5Context*, int iPhrase);

  int (*xInstCount)(Fts5Context*, int *pnInst);
  int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff);

  sqlite3_int64 (*xRowid)(Fts5Context*);
  int (*xColumnText)(Fts5Context*, int iCol, const char **pz, int *pn);
  int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken);
  int (*xPoslist)(Fts5Context*, int iPhrase, int *pi, sqlite3_int64 *piPos);

  int (*xQueryPhrase)(Fts5Context*, int iPhrase, void *pUserData,
    int(*)(const Fts5ExtensionApi*,Fts5Context*,void*)
Changes to ext/fts5/fts5Int.h.
297
298
299
300
301
302
303



304
305
306
307
308
309
310

/*
** Return the total number of entries read from the %_data table by 
** this connection since it was created.
*/
int sqlite3Fts5IndexReads(Fts5Index *p);




/*
** End of interface to code in fts5_index.c.
**************************************************************************/

/**************************************************************************
** Interface to code in fts5_hash.c. 
*/







>
>
>







297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313

/*
** Return the total number of entries read from the %_data table by 
** this connection since it was created.
*/
int sqlite3Fts5IndexReads(Fts5Index *p);

/* Malloc utility */
void *sqlite3Fts5MallocZero(int *pRc, int nByte);

/*
** End of interface to code in fts5_index.c.
**************************************************************************/

/**************************************************************************
** Interface to code in fts5_hash.c. 
*/
Changes to ext/fts5/fts5_aux.c.
10
11
12
13
14
15
16




































































































































17
18
19
20
21
22
23
**
******************************************************************************
*/

#include "fts5Int.h"
#include <math.h>





































































































































typedef struct SnipPhrase SnipPhrase;
typedef struct SnipIter SnipIter;
typedef struct SnippetCtx SnippetCtx;

struct SnipPhrase {
  u64 mask;                       /* Current mask */
  int nToken;                     /* Tokens in this phrase */







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
**
******************************************************************************
*/

#include "fts5Int.h"
#include <math.h>

/*************************************************************************
** Start of highlight() implementation.
*/
typedef struct HighlightContext HighlightContext;
struct HighlightContext {
  const Fts5ExtensionApi *pApi;   /* API offered by current FTS version */
  Fts5Context *pFts;              /* First arg to pass to pApi functions */
  int iInst;                      /* Current phrase instance index */
  int iStart;                     /* First token of current phrase */
  int iEnd;                       /* Last token of current phrase */

  const char *zOpen;              /* Opening highlight */
  const char *zClose;             /* Closing highlight */
  int iCol;                       /* Column to read from */

  const char *zIn;                /* Input text */
  int nIn;                        /* Size of input text in bytes */
  int iOff;                       /* Current offset within zIn[] */
  char *zOut;                     /* Output value */
};

static int fts5HighlightAppend(HighlightContext *p, const char *z, int n){
  if( n<0 ) n = strlen(z);
  p->zOut = sqlite3_mprintf("%z%.*s", p->zOut, n, z);
  if( p->zOut==0 ) return SQLITE_NOMEM;
  return SQLITE_OK;
}

static int fts5HighlightCb(
  void *pContext,                 /* Pointer to HighlightContext object */
  const char *pToken,             /* Buffer containing token */
  int nToken,                     /* Size of token in bytes */
  int iStart,                     /* Start offset of token */
  int iEnd,                       /* End offset of token */
  int iPos                        /* Position offset of token */
){
  HighlightContext *p = (HighlightContext*)pContext;
  int rc = SQLITE_OK;

  if( iPos==p->iStart ){
    rc = fts5HighlightAppend(p, &p->zIn[p->iOff], iStart - p->iOff);
    p->iOff = iStart;
    if( rc==SQLITE_OK ){
      rc = fts5HighlightAppend(p, p->zOpen, -1);
    }
  }
  
  if( rc==SQLITE_OK ){
    rc = fts5HighlightAppend(p, &p->zIn[p->iOff], iEnd - p->iOff);
    p->iOff = iEnd;
  }

  if( rc==SQLITE_OK && iPos==p->iEnd ){
    int bClose = 1;
    do{
      int iP, iPCol, iOff;
      rc = p->pApi->xInst(p->pFts, ++p->iInst, &iP, &iPCol, &iOff);
      if( rc==SQLITE_RANGE || iPCol!=p->iCol ){
        p->iStart = -1;
        p->iEnd = -1;
        rc = SQLITE_OK;
      }else{
        iEnd = iOff - 1 + p->pApi->xPhraseSize(p->pFts, iP);
        if( iEnd<=p->iEnd ) continue;
        if( iOff<=p->iEnd ) bClose = 0;
        p->iStart = iOff;
        p->iEnd = iEnd;
      }
    }while( 0 );

    if( rc==SQLITE_OK && bClose ){
      rc = fts5HighlightAppend(p, p->zClose, -1);
    }
  }

  return rc;
}

static void fts5HighlightFunction(
  const Fts5ExtensionApi *pApi,   /* API offered by current FTS version */
  Fts5Context *pFts,              /* First arg to pass to pApi functions */
  sqlite3_context *pCtx,          /* Context for returning result/error */
  int nVal,                       /* Number of values in apVal[] array */
  sqlite3_value **apVal           /* Array of trailing arguments */
){
  HighlightContext ctx;
  int rc;

  if( nVal!=3 ){
    const char *zErr = "wrong number of arguments to function highlight()";
    sqlite3_result_error(pCtx, zErr, -1);
    return;
  }
  memset(&ctx, 0, sizeof(HighlightContext));
  ctx.iCol = sqlite3_value_int(apVal[0]);
  ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]);
  ctx.zClose = (const char*)sqlite3_value_text(apVal[2]);
  rc = pApi->xColumnText(pFts, ctx.iCol, &ctx.zIn, &ctx.nIn);
  ctx.pApi = pApi;
  ctx.pFts = pFts;

  /* Find the first phrase instance in the right column. */
  ctx.iStart = -1;
  ctx.iEnd = -1;
  while( rc==SQLITE_OK ){
    int iP, iPCol, iOff;
    rc = pApi->xInst(pFts, ctx.iInst, &iP, &iPCol, &iOff);
    if( rc==SQLITE_OK && iPCol==ctx.iCol ){
      ctx.iStart = iOff;
      ctx.iEnd = iOff - 1 + pApi->xPhraseSize(pFts, iP);
      break;
    }
    ctx.iInst++;
  }

  if( rc==SQLITE_OK || rc==SQLITE_RANGE ){
    rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx, fts5HighlightCb);
  }
  if( rc==SQLITE_OK ){
    rc = fts5HighlightAppend(&ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff);
  }

  if( rc==SQLITE_OK ){
    sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT);
  }else{
    sqlite3_result_error_code(pCtx, rc);
  }
  sqlite3_free(ctx.zOut);
}
/*
**************************************************************************/

typedef struct SnipPhrase SnipPhrase;
typedef struct SnipIter SnipIter;
typedef struct SnippetCtx SnippetCtx;

struct SnipPhrase {
  u64 mask;                       /* Current mask */
  int nToken;                     /* Tokens in this phrase */
791
792
793
794
795
796
797
















798
799
800
801
802
803
804
      const char *z;
      int n;
      rc = pApi->xColumnText(pFts, i, &z, &n);
      if( i!=0 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, " ");
      sqlite3Fts5BufferAppendListElem(&rc, &s, z, n);
    }
  }

















  /*
  ** xPhraseCount()
  */
  if( zReq==0 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, " phrasecount ");
  nPhrase = pApi->xPhraseCount(pFts);
  if( 0==zReq || 0==sqlite3_stricmp(zReq, "phrasecount") ){







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
      const char *z;
      int n;
      rc = pApi->xColumnText(pFts, i, &z, &n);
      if( i!=0 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, " ");
      sqlite3Fts5BufferAppendListElem(&rc, &s, z, n);
    }
  }

  /*
  ** xInst()
  */
  if( zReq==0 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, " inst ");
  if( 0==zReq || 0==sqlite3_stricmp(zReq, "inst") ){
    int nInst;
    rc = pApi->xInstCount(pFts, &nInst);
    for(i=0; rc==SQLITE_OK && i<nInst; i++){
      int iPhrase, iCol, iOff;
      rc = pApi->xInst(pFts, i, &iPhrase, &iCol, &iOff);
      sqlite3Fts5BufferAppendPrintf(&rc, &s, "%s%d.%d.%d",
          (i==0 ? "" : " "), iPhrase, iCol, iOff
      );
    }
  }

  /*
  ** xPhraseCount()
  */
  if( zReq==0 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, " phrasecount ");
  nPhrase = pApi->xPhraseCount(pFts);
  if( 0==zReq || 0==sqlite3_stricmp(zReq, "phrasecount") ){
962
963
964
965
966
967
968

969
970
971
972
973
974
975
    void *pUserData;              /* User-data pointer */
    fts5_extension_function xFunc;/* Callback function */
    void (*xDestroy)(void*);      /* Destructor function */
  } aBuiltin [] = {
    { "bm25debug", (void*)1, fts5Bm25Function,    0 },
    { "snippet",   0, fts5SnippetFunction, 0 },
    { "fts5_test", 0, fts5TestFunction,    0 },

    { "bm25",      0, fts5Bm25Function,    0 },
  };

  int rc = SQLITE_OK;             /* Return code */
  int i;                          /* To iterate through builtin functions */

  for(i=0; rc==SQLITE_OK && i<sizeof(aBuiltin)/sizeof(aBuiltin[0]); i++){







>







1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
    void *pUserData;              /* User-data pointer */
    fts5_extension_function xFunc;/* Callback function */
    void (*xDestroy)(void*);      /* Destructor function */
  } aBuiltin [] = {
    { "bm25debug", (void*)1, fts5Bm25Function,    0 },
    { "snippet",   0, fts5SnippetFunction, 0 },
    { "fts5_test", 0, fts5TestFunction,    0 },
    { "highlight", 0, fts5HighlightFunction, 0 },
    { "bm25",      0, fts5Bm25Function,    0 },
  };

  int rc = SQLITE_OK;             /* Return code */
  int i;                          /* To iterate through builtin functions */

  for(i=0; rc==SQLITE_OK && i<sizeof(aBuiltin)/sizeof(aBuiltin[0]); i++){
Changes to ext/fts5/fts5_index.c.
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
    p->rc = SQLITE_NOMEM;
  }else{
    memset(pRet, 0, nByte);
  }
  return pRet;
}

static void *fts5MallocZero(int *pRc, int nByte){
  void *pRet = 0;
  if( *pRc==SQLITE_OK ){
    pRet = sqlite3_malloc(nByte);
    if( pRet==0 && nByte>0 ){
      *pRc = SQLITE_NOMEM;
    }else{
      memset(pRet, 0, nByte);







|







688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
    p->rc = SQLITE_NOMEM;
  }else{
    memset(pRet, 0, nByte);
  }
  return pRet;
}

void *sqlite3Fts5MallocZero(int *pRc, int nByte){
  void *pRet = 0;
  if( *pRc==SQLITE_OK ){
    pRet = sqlite3_malloc(nByte);
    if( pRet==0 && nByte>0 ){
      *pRc = SQLITE_NOMEM;
    }else{
      memset(pRet, 0, nByte);
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
  ** structure record.  */
  i = getVarint32(&pData[i], nLevel);
  i += getVarint32(&pData[i], nSegment);
  nByte = (
      sizeof(Fts5Structure) +                    /* Main structure */
      sizeof(Fts5StructureLevel) * (nLevel)      /* aLevel[] array */
  );
  pRet = (Fts5Structure*)fts5MallocZero(&rc, nByte);

  if( pRet ){
    pRet->nLevel = nLevel;
    i += sqlite3GetVarint(&pData[i], &pRet->nWriteCounter);

    for(iLvl=0; rc==SQLITE_OK && iLvl<nLevel; iLvl++){
      Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl];
      int nTotal;
      int iSeg;

      i += getVarint32(&pData[i], pLvl->nMerge);
      i += getVarint32(&pData[i], nTotal);
      assert( nTotal>=pLvl->nMerge );
      pLvl->aSeg = (Fts5StructureSegment*)fts5MallocZero(&rc, 
          nTotal * sizeof(Fts5StructureSegment)
      );

      if( rc==SQLITE_OK ){
        pLvl->nSeg = nTotal;
        for(iSeg=0; iSeg<nTotal; iSeg++){
          i += getVarint32(&pData[i], pLvl->aSeg[iSeg].iSegid);







|













|







977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
  ** structure record.  */
  i = getVarint32(&pData[i], nLevel);
  i += getVarint32(&pData[i], nSegment);
  nByte = (
      sizeof(Fts5Structure) +                    /* Main structure */
      sizeof(Fts5StructureLevel) * (nLevel)      /* aLevel[] array */
  );
  pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte);

  if( pRet ){
    pRet->nLevel = nLevel;
    i += sqlite3GetVarint(&pData[i], &pRet->nWriteCounter);

    for(iLvl=0; rc==SQLITE_OK && iLvl<nLevel; iLvl++){
      Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl];
      int nTotal;
      int iSeg;

      i += getVarint32(&pData[i], pLvl->nMerge);
      i += getVarint32(&pData[i], nTotal);
      assert( nTotal>=pLvl->nMerge );
      pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc, 
          nTotal * sizeof(Fts5StructureSegment)
      );

      if( rc==SQLITE_OK ){
        pLvl->nSeg = nTotal;
        for(iSeg=0; iSeg<nTotal; iSeg++){
          i += getVarint32(&pData[i], pLvl->aSeg[iSeg].iSegid);
Changes to test/fts5ae.test.
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
    SELECT rowid FROM t8 WHERE t8 MATCH $q ORDER BY bm25(t8) DESC;
  } $res

  do_execsql_test 8.2.$tn.2 {
    SELECT rowid FROM t8 WHERE t8 MATCH $q ORDER BY +rank DESC;
  } $res

  do_execsql_test 8.3.$tn.3 {
    SELECT rowid FROM t8 WHERE t8 MATCH $q ORDER BY rank DESC;
  } $res
}


finish_test








|







265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
    SELECT rowid FROM t8 WHERE t8 MATCH $q ORDER BY bm25(t8) DESC;
  } $res

  do_execsql_test 8.2.$tn.2 {
    SELECT rowid FROM t8 WHERE t8 MATCH $q ORDER BY +rank DESC;
  } $res

  do_execsql_test 8.2.$tn.3 {
    SELECT rowid FROM t8 WHERE t8 MATCH $q ORDER BY rank DESC;
  } $res
}


finish_test

Added test/fts5ak.test.






























































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# 2014 November 24
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is testing the FTS5 module.
#
# Specifically, the auxiliary function "highlight".
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl
set testprefix fts5aj

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

do_execsql_test 1.1 {
  CREATE VIRTUAL TABLE ft1 USING fts5(x);
  INSERT INTO ft1 VALUES('i d d a g i b g d d');
  INSERT INTO ft1 VALUES('h d b j c c g a c a');
  INSERT INTO ft1 VALUES('e j a e f h b f h h');
  INSERT INTO ft1 VALUES('j f h d g h i b d f');
  INSERT INTO ft1 VALUES('d c j d c j b c g e');
  INSERT INTO ft1 VALUES('i a d e g j g d a a');
  INSERT INTO ft1 VALUES('j f c e d a h j d b');
  INSERT INTO ft1 VALUES('i c c f a d g h j e');
  INSERT INTO ft1 VALUES('i d i g c d c h b f');
  INSERT INTO ft1 VALUES('g d a e h a b c f j');
}

do_execsql_test 1.2 {
  SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'e';
} {
  {g d a [e] h a b c f j}
  {i c c f a d g h j [e]}
  {j f c [e] d a h j d b}
  {i a d [e] g j g d a a}
  {d c j d c j b c g [e]}
  {[e] j a [e] f h b f h h}
}

do_execsql_test 1.3 {
  SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'h + d';
} {
  {j f [h d] g h i b d f} 
  {[h d] b j c c g a c a}
}

do_execsql_test 1.4 {
  SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'd + d';
} {
  {i [d d] a g i b g [d d]}
}

do_execsql_test 1.5 {
  SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'e e e'
} {
  {g d a [e] h a b c f j}
  {i c c f a d g h j [e]}
  {j f c [e] d a h j d b}
  {i a d [e] g j g d a a}
  {d c j d c j b c g [e]}
  {[e] j a [e] f h b f h h}
}

do_execsql_test 1.6 {
  SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'd + d d + d';
} {
  {i [d d] a g i b g [d d]}
}

do_execsql_test 2.1 {
  CREATE VIRTUAL TABLE ft2 USING fts5(x);
  INSERT INTO ft2 VALUES('a b c d e f g h i j');
}

do_execsql_test 2.2 {
  SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'b+c+d c+d+e'
} {{a [b c d e] f g h i j}}

do_execsql_test 2.3 {
  SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'b+c+d e+f+g'
} {
  {a [b c d] [e f g] h i j}
}

do_execsql_test 2.4 {
  SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'b+c+d c'
} {
  {a [b c d] e f g h i j}
}

do_execsql_test 2.5 {
  SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'b+c c+d+e'
} {
  {a [b c d e] f g h i j}
}


finish_test