SQLite

Check-in [71011a4f9b]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add code for the matchinfo 'longest common substring' feature.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | fts3-experimental
Files: files | file ages | folders
SHA1: 71011a4f9baf09ec6935ad591145252bf3c286ed
User & Date: dan 2010-11-24 19:26:19.000
Context
2010-11-25
10:33
Fix bugs in fts3 function matchinfo() when used with deferred tokens. (check-in: ddc2b7ec26 user: dan tags: fts3-experimental)
2010-11-24
19:26
Add code for the matchinfo 'longest common substring' feature. (check-in: 71011a4f9b user: dan tags: fts3-experimental)
15:02
Fix crashes that can occur when queries are run on an FTS4 table containing zero rows. (check-in: ed61fd20ad user: dan tags: fts3-experimental)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts3/fts3_snippet.c.
966
967
968
969
970
971
972
973




















































































































974
975
976
977
978
979
980
  a = sqlite3_column_blob(pStmt, 0);
  a += sqlite3Fts3GetVarint(a, &nDoc);
  *pnDoc = (u32)nDoc;

  if( paLen ) *paLen = a;
  return SQLITE_OK;
}
  





















































































































static int fts3MatchinfoValues(
  Fts3Cursor *pCsr,               /* FTS3 cursor object */
  int bGlobal,                    /* True to grab the global stats */
  MatchInfo *pInfo,               /* Matchinfo context object */
  const char *zArg                /* Matchinfo format string */
){







|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
  a = sqlite3_column_blob(pStmt, 0);
  a += sqlite3Fts3GetVarint(a, &nDoc);
  *pnDoc = (u32)nDoc;

  if( paLen ) *paLen = a;
  return SQLITE_OK;
}

typedef struct LcsIterator LcsIterator;
struct LcsIterator {
  Fts3Expr *pExpr;                /* Pointer to phrase expression */
  char *pRead;                    /* Cursor used to iterate through aDoclist */
  int iPosOffset;                 /* Tokens count up to end of this phrase */
  int iCol;                       /* Current column number */
  int iPos;                       /* Current position */
};

#define LCS_ITERATOR_FINISHED 0x7FFFFFFF;

static int fts3MatchinfoLcsCb(
  Fts3Expr *pExpr,                /* Phrase expression node */
  int iPhrase,                    /* Phrase number (numbered from zero) */
  void *pCtx                      /* Pointer to MatchInfo structure */
){
  LcsIterator *aIter = (LcsIterator *)pCtx;
  aIter[iPhrase].pExpr = pExpr;
  return SQLITE_OK;
}

static int fts3LcsIteratorAdvance(LcsIterator *pIter){
  char *pRead = pIter->pRead;
  sqlite3_int64 iRead;
  int rc = 0;

  pRead += sqlite3Fts3GetVarint(pRead, &iRead);
  if( iRead==0 ){
    pIter->iCol = LCS_ITERATOR_FINISHED;
    rc = 1;
  }else{
    if( iRead==1 ){
      pRead += sqlite3Fts3GetVarint(pRead, &iRead);
      pIter->iCol = iRead;
      pIter->iPos = pIter->iPosOffset;
      pRead += sqlite3Fts3GetVarint(pRead, &iRead);
      rc = 1;
    }
    pIter->iPos += (iRead-2);
  }

  pIter->pRead = pRead;
  return rc;
}
  
static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){
  LcsIterator *aIter;
  int i;
  int iCol;
  int nToken = 0;

  /* Allocate and populate the array of LcsIterator objects. The array
  ** contains one element for each matchable phrase in the query.
  **/
  aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase);
  if( !aIter ) return SQLITE_NOMEM;
  memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase);
  (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter);

  for(i=0; i<pInfo->nPhrase; i++){
    LcsIterator *pIter = &aIter[i];
    nToken -= pIter->pExpr->pPhrase->nToken;
    pIter->iPosOffset = nToken;
    pIter->pRead = sqlite3Fts3FindPositions(pIter->pExpr, pCsr->iPrevId, -1);
    if( pIter->pRead ){
      pIter->iPos = pIter->iPosOffset;
      fts3LcsIteratorAdvance(&aIter[i]);
    }else{
      pIter->iCol = LCS_ITERATOR_FINISHED;
    }
  }

  for(iCol=0; iCol<pInfo->nCol; iCol++){
    int nLcs = 0;
    int nLive = 0;

    for(i=0; i<pInfo->nPhrase; i++){
      assert( aIter[i].iCol>=iCol );
      if( aIter[i].iCol==iCol ) nLive++;
    }

    while( nLive>0 ){
      LcsIterator *pAdv = 0;
      int nThisLcs = 0;
      char *aRead;
      sqlite3_int64 iRead;

      for(i=0; i<pInfo->nPhrase; i++){
        LcsIterator *pIter = &aIter[i];
        int nToken = pIter->pExpr->pPhrase->nToken;

        if( iCol!=pIter->iCol ){  
          nThisLcs = 0;
          continue;
        }

        if( pAdv==0 || pIter->iPos<pAdv->iPos ){
          pAdv = pIter;
        }

        if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){
          nThisLcs++;
        }else{
          nThisLcs = 1;
        }

        if( nThisLcs>nLcs ) nLcs = nThisLcs;
      }
      if( fts3LcsIteratorAdvance(pAdv) ) nLive--;
    }

    pInfo->aMatchinfo[iCol] = nLcs;
  }

  sqlite3_free(aIter);
}

static int fts3MatchinfoValues(
  Fts3Cursor *pCsr,               /* FTS3 cursor object */
  int bGlobal,                    /* True to grab the global stats */
  MatchInfo *pInfo,               /* Matchinfo context object */
  const char *zArg                /* Matchinfo format string */
){
1044
1045
1046
1047
1048
1049
1050



1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
          }
          (void)fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo);
        }
        (void)fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo);
        break;
      }





      default: 
        assert( zArg[i]==FTS3_MATCHINFO_LCS );
    }

    pInfo->aMatchinfo += fts3MatchinfoSize(pInfo, zArg[i]);
  }

  sqlite3_reset(pSelect);
  return rc;







>
>
>

|
|







1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
          }
          (void)fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo);
        }
        (void)fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo);
        break;
      }

      case FTS3_MATCHINFO_LCS:
        fts3MatchinfoLcs(pCsr, pInfo);
        break;

      default:
        assert( !"this cannot happen" );
    }

    pInfo->aMatchinfo += fts3MatchinfoSize(pInfo, zArg[i]);
  }

  sqlite3_reset(pSelect);
  return rc;
Changes to ext/fts3/fts3_write.c.
323
324
325
326
327
328
329



330
331
332
333
334
335
336
int sqlite3Fts3SelectDocsize(
  Fts3Table *pTab,                /* Fts3 table handle */
  sqlite3_int64 iDocid,           /* Docid to read size data for */
  sqlite3_stmt **ppStmt           /* OUT: Statement handle */
){
  return fts3SelectDocsize(pTab, SQL_SELECT_DOCSIZE, iDocid, ppStmt);
}




/*
** Similar to fts3SqlStmt(). Except, after binding the parameters in
** array apVal[] to the SQL statement identified by eStmt, the statement
** is executed.
**
** Returns SQLITE_OK if the statement is successfully executed, or an







>
>
>







323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
int sqlite3Fts3SelectDocsize(
  Fts3Table *pTab,                /* Fts3 table handle */
  sqlite3_int64 iDocid,           /* Docid to read size data for */
  sqlite3_stmt **ppStmt           /* OUT: Statement handle */
){
  return fts3SelectDocsize(pTab, SQL_SELECT_DOCSIZE, iDocid, ppStmt);
}

void sqlite3Fts3MatchinfoLcs(Fts3Expr *pExpr, u32 *aOut){
}

/*
** Similar to fts3SqlStmt(). Except, after binding the parameters in
** array apVal[] to the SQL statement identified by eStmt, the statement
** is executed.
**
** Returns SQLITE_OK if the statement is successfully executed, or an
Changes to test/fts3defer2.test.
87
88
89
90
91
92
93

94
95
96
97
98
99
100
  1 {}
  2 { INSERT INTO t2(t2) VALUES('optimize') }
  3 { UPDATE t2_segments SET block = zeroblob(length(block)) 
      WHERE length(block)>10000;
  }
} {
  execsql $sql

  do_execsql_test 2.2.$tn {
    SELECT mit(matchinfo(t2, 'pcxnal')) FROM t2 WHERE t2 MATCH 'a b';
  } [list                                          \
    [list 2 1  1 54 54  1 3 3  54 372 7]        \
    [list 2 1  1 54 54  1 3 3  54 372 7]        \
  ]
}







>







87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
  1 {}
  2 { INSERT INTO t2(t2) VALUES('optimize') }
  3 { UPDATE t2_segments SET block = zeroblob(length(block)) 
      WHERE length(block)>10000;
  }
} {
  execsql $sql

  do_execsql_test 2.2.$tn {
    SELECT mit(matchinfo(t2, 'pcxnal')) FROM t2 WHERE t2 MATCH 'a b';
  } [list                                          \
    [list 2 1  1 54 54  1 3 3  54 372 7]        \
    [list 2 1  1 54 54  1 3 3  54 372 7]        \
  ]
}
Changes to test/fts3matchinfo.test.
177
178
179
180
181
182
183

184
185
186
187
188
189
190
191
192
193
194


195

196

197
198
199
200
201
202
203
204
205
206
207


208

209
210




211





































212



do_execsql_test 4.1.0 {
  CREATE VIRTUAL TABLE t4 USING fts4(x, y);
  INSERT INTO t4 VALUES('a b c d e', 'f g h i j');
  INSERT INTO t4 VALUES('f g h i j', 'a b c d e');
}

do_matchinfo_test 4.1.1 t4 {t4 MATCH 'a b c'} {
  p {3 3}
  c {2 2}
  x {
    {1 1 1   0 1 1   1 1 1   0 1 1   1 1 1   0 1 1}
    {0 1 1   1 1 1   0 1 1   1 1 1   0 1 1   1 1 1}
  }
  n {2 2}
  l {{5 5} {5 5}}
  a {{5 5} {5 5}}



  xxxxxxxxxxxxxxxxxx - pcx - xpc - ccc - pppxpcpcx - laxnpc -

}

do_matchinfo_test 4.1.2 t4 {t4 MATCH '"g h i"'} {
  p {1 1}
  c {2 2}
  x {
    {0 1 1   1 1 1}
    {1 1 1   0 1 1}
  }
  n {2 2}
  l {{5 5} {5 5}}
  a {{5 5} {5 5}}



  xxxxxxxxxxxxxxxxxx - pcx - xpc - ccc - pppxpcpcx - laxnpc -

}











































finish_test








>











>
>

>

>











>
>

>


>
>
>
>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

>
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262


do_execsql_test 4.1.0 {
  CREATE VIRTUAL TABLE t4 USING fts4(x, y);
  INSERT INTO t4 VALUES('a b c d e', 'f g h i j');
  INSERT INTO t4 VALUES('f g h i j', 'a b c d e');
}

do_matchinfo_test 4.1.1 t4 {t4 MATCH 'a b c'} {
  p {3 3}
  c {2 2}
  x {
    {1 1 1   0 1 1   1 1 1   0 1 1   1 1 1   0 1 1}
    {0 1 1   1 1 1   0 1 1   1 1 1   0 1 1   1 1 1}
  }
  n {2 2}
  l {{5 5} {5 5}}
  a {{5 5} {5 5}}

  s {{3 0} {0 3}}

  xxxxxxxxxxxxxxxxxx - pcx - xpc - ccc - pppxpcpcx - laxnpc -
  xpxsscplax -
}

do_matchinfo_test 4.1.2 t4 {t4 MATCH '"g h i"'} {
  p {1 1}
  c {2 2}
  x {
    {0 1 1   1 1 1}
    {1 1 1   0 1 1}
  }
  n {2 2}
  l {{5 5} {5 5}}
  a {{5 5} {5 5}}

  s {{0 1} {1 0}}

  xxxxxxxxxxxxxxxxxx - pcx - xpc - ccc - pppxpcpcx - laxnpc -
  sxsxs -
}

do_matchinfo_test 4.1.3 t4 {t4 MATCH 'a b'}     { s {{2 0} {0 2}} }
do_matchinfo_test 4.1.4 t4 {t4 MATCH '"a b" c'} { s {{2 0} {0 2}} }
do_matchinfo_test 4.1.5 t4 {t4 MATCH 'a "b c"'} { s {{2 0} {0 2}} }
do_matchinfo_test 4.1.6 t4 {t4 MATCH 'd d'}     { s {{1 0} {0 1}} }

do_execsql_test 4.2.0 {
  CREATE VIRTUAL TABLE t5 USING fts4;
  INSERT INTO t5 VALUES('a a a a a');
  INSERT INTO t5 VALUES('a b a b a');
  INSERT INTO t5 VALUES('c b c b c');
}
do_matchinfo_test 4.2.1 t5 {t5 MATCH 'a a'}         { s {2 1} }
do_matchinfo_test 4.2.2 t5 {t5 MATCH 'a b'}         { s {2} }
do_matchinfo_test 4.2.3 t5 {t5 MATCH 'a b a'}       { s {3} }
do_matchinfo_test 4.2.4 t5 {t5 MATCH 'a a a'}       { s {3 1} }
do_matchinfo_test 4.2.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} }
do_matchinfo_test 4.2.6 t5 {t5 MATCH 'a OR b'}      { s {1 2 1} }

do_execsql_test 4.3.0 "INSERT INTO t5 VALUES('x y [string repeat {b } 50000]')";
do_execsql_test 4.3.0 "INSERT INTO t5 VALUES('x y [string repeat {x } 50000]')";

#do_matchinfo_test 4.3.1 t5 {t5 MATCH 'a a'}         { s {2 1} }
#do_matchinfo_test 4.3.2 t5 {t5 MATCH 'a b'}         { s {2} }
#do_matchinfo_test 4.3.3 t5 {t5 MATCH 'a b a'}       { s {3} }
#do_matchinfo_test 4.3.4 t5 {t5 MATCH 'a a a'}       { s {3 1} }
#do_matchinfo_test 4.3.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} }
#do_matchinfo_test 4.3.6 t5 {t5 MATCH 'a OR b'}      { s {1 2 1 1} }
#
#do_execsql_test 4.4.0 {
#  UPDATE t5_segments 
#  SET block = zeroblob(length(block)) 
#  WHERE length(block)>10000;
#}
#
#do_matchinfo_test 4.4.1 t5 {t5 MATCH 'a a'}         { s {2 1} }
#do_matchinfo_test 4.4.2 t5 {t5 MATCH 'a b'}         { s {2} }
#do_matchinfo_test 4.4.3 t5 {t5 MATCH 'a b a'}       { s {3} }
#do_matchinfo_test 4.4.4 t5 {t5 MATCH 'a a a'}       { s {3 1} }
#do_matchinfo_test 4.4.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} }
#do_matchinfo_test 4.4.6 t5 {t5 MATCH 'a OR b'}      { s {1 2 1 1} }


finish_test