Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Add code for the matchinfo 'longest common substring' feature. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts3-experimental |
Files: | files | file ages | folders |
SHA1: |
71011a4f9baf09ec6935ad591145252b |
User & Date: | dan 2010-11-24 19:26:19.000 |
Context
2010-11-25
| ||
10:33 | Fix bugs in fts3 function matchinfo() when used with deferred tokens. (check-in: ddc2b7ec26 user: dan tags: fts3-experimental) | |
2010-11-24
| ||
19:26 | Add code for the matchinfo 'longest common substring' feature. (check-in: 71011a4f9b user: dan tags: fts3-experimental) | |
15:02 | Fix crashes that can occur when queries are run on an FTS4 table containing zero rows. (check-in: ed61fd20ad user: dan tags: fts3-experimental) | |
Changes
Changes to ext/fts3/fts3_snippet.c.
︙ | ︙ | |||
966 967 968 969 970 971 972 | a = sqlite3_column_blob(pStmt, 0); a += sqlite3Fts3GetVarint(a, &nDoc); *pnDoc = (u32)nDoc; if( paLen ) *paLen = a; return SQLITE_OK; } | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 | a = sqlite3_column_blob(pStmt, 0); a += sqlite3Fts3GetVarint(a, &nDoc); *pnDoc = (u32)nDoc; if( paLen ) *paLen = a; return SQLITE_OK; } typedef struct LcsIterator LcsIterator; struct LcsIterator { Fts3Expr *pExpr; /* Pointer to phrase expression */ char *pRead; /* Cursor used to iterate through aDoclist */ int iPosOffset; /* Tokens count up to end of this phrase */ int iCol; /* Current column number */ int iPos; /* Current position */ }; #define LCS_ITERATOR_FINISHED 0x7FFFFFFF; static int fts3MatchinfoLcsCb( Fts3Expr *pExpr, /* Phrase expression node */ int iPhrase, /* Phrase number (numbered from zero) */ void *pCtx /* Pointer to MatchInfo structure */ ){ LcsIterator *aIter = (LcsIterator *)pCtx; aIter[iPhrase].pExpr = pExpr; return SQLITE_OK; } static int fts3LcsIteratorAdvance(LcsIterator *pIter){ char *pRead = pIter->pRead; sqlite3_int64 iRead; int rc = 0; pRead += sqlite3Fts3GetVarint(pRead, &iRead); if( iRead==0 ){ pIter->iCol = LCS_ITERATOR_FINISHED; rc = 1; }else{ if( iRead==1 ){ pRead += sqlite3Fts3GetVarint(pRead, &iRead); pIter->iCol = iRead; pIter->iPos = pIter->iPosOffset; pRead += sqlite3Fts3GetVarint(pRead, &iRead); rc = 1; } pIter->iPos += (iRead-2); } pIter->pRead = pRead; return rc; } static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){ LcsIterator *aIter; int i; int iCol; int nToken = 0; /* Allocate and populate the array of LcsIterator objects. The array ** contains one element for each matchable phrase in the query. **/ aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase); if( !aIter ) return SQLITE_NOMEM; memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase); (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter); for(i=0; i<pInfo->nPhrase; i++){ LcsIterator *pIter = &aIter[i]; nToken -= pIter->pExpr->pPhrase->nToken; pIter->iPosOffset = nToken; pIter->pRead = sqlite3Fts3FindPositions(pIter->pExpr, pCsr->iPrevId, -1); if( pIter->pRead ){ pIter->iPos = pIter->iPosOffset; fts3LcsIteratorAdvance(&aIter[i]); }else{ pIter->iCol = LCS_ITERATOR_FINISHED; } } for(iCol=0; iCol<pInfo->nCol; iCol++){ int nLcs = 0; int nLive = 0; for(i=0; i<pInfo->nPhrase; i++){ assert( aIter[i].iCol>=iCol ); if( aIter[i].iCol==iCol ) nLive++; } while( nLive>0 ){ LcsIterator *pAdv = 0; int nThisLcs = 0; char *aRead; sqlite3_int64 iRead; for(i=0; i<pInfo->nPhrase; i++){ LcsIterator *pIter = &aIter[i]; int nToken = pIter->pExpr->pPhrase->nToken; if( iCol!=pIter->iCol ){ nThisLcs = 0; continue; } if( pAdv==0 || pIter->iPos<pAdv->iPos ){ pAdv = pIter; } if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){ nThisLcs++; }else{ nThisLcs = 1; } if( nThisLcs>nLcs ) nLcs = nThisLcs; } if( fts3LcsIteratorAdvance(pAdv) ) nLive--; } pInfo->aMatchinfo[iCol] = nLcs; } sqlite3_free(aIter); } static int fts3MatchinfoValues( Fts3Cursor *pCsr, /* FTS3 cursor object */ int bGlobal, /* True to grab the global stats */ MatchInfo *pInfo, /* Matchinfo context object */ const char *zArg /* Matchinfo format string */ ){ |
︙ | ︙ | |||
1044 1045 1046 1047 1048 1049 1050 1051 | } (void)fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo); } (void)fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo); break; } | > > > | | | 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 | } (void)fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo); } (void)fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo); break; } case FTS3_MATCHINFO_LCS: fts3MatchinfoLcs(pCsr, pInfo); break; default: assert( !"this cannot happen" ); } pInfo->aMatchinfo += fts3MatchinfoSize(pInfo, zArg[i]); } sqlite3_reset(pSelect); return rc; |
︙ | ︙ |
Changes to ext/fts3/fts3_write.c.
︙ | ︙ | |||
323 324 325 326 327 328 329 330 331 332 333 334 335 336 | int sqlite3Fts3SelectDocsize( Fts3Table *pTab, /* Fts3 table handle */ sqlite3_int64 iDocid, /* Docid to read size data for */ sqlite3_stmt **ppStmt /* OUT: Statement handle */ ){ return fts3SelectDocsize(pTab, SQL_SELECT_DOCSIZE, iDocid, ppStmt); } /* ** Similar to fts3SqlStmt(). Except, after binding the parameters in ** array apVal[] to the SQL statement identified by eStmt, the statement ** is executed. ** ** Returns SQLITE_OK if the statement is successfully executed, or an | > > > | 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 | int sqlite3Fts3SelectDocsize( Fts3Table *pTab, /* Fts3 table handle */ sqlite3_int64 iDocid, /* Docid to read size data for */ sqlite3_stmt **ppStmt /* OUT: Statement handle */ ){ return fts3SelectDocsize(pTab, SQL_SELECT_DOCSIZE, iDocid, ppStmt); } void sqlite3Fts3MatchinfoLcs(Fts3Expr *pExpr, u32 *aOut){ } /* ** Similar to fts3SqlStmt(). Except, after binding the parameters in ** array apVal[] to the SQL statement identified by eStmt, the statement ** is executed. ** ** Returns SQLITE_OK if the statement is successfully executed, or an |
︙ | ︙ |
Changes to test/fts3defer2.test.
︙ | ︙ | |||
87 88 89 90 91 92 93 94 95 96 97 98 99 100 | 1 {} 2 { INSERT INTO t2(t2) VALUES('optimize') } 3 { UPDATE t2_segments SET block = zeroblob(length(block)) WHERE length(block)>10000; } } { execsql $sql do_execsql_test 2.2.$tn { SELECT mit(matchinfo(t2, 'pcxnal')) FROM t2 WHERE t2 MATCH 'a b'; } [list \ [list 2 1 1 54 54 1 3 3 54 372 7] \ [list 2 1 1 54 54 1 3 3 54 372 7] \ ] } | > | 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 | 1 {} 2 { INSERT INTO t2(t2) VALUES('optimize') } 3 { UPDATE t2_segments SET block = zeroblob(length(block)) WHERE length(block)>10000; } } { execsql $sql do_execsql_test 2.2.$tn { SELECT mit(matchinfo(t2, 'pcxnal')) FROM t2 WHERE t2 MATCH 'a b'; } [list \ [list 2 1 1 54 54 1 3 3 54 372 7] \ [list 2 1 1 54 54 1 3 3 54 372 7] \ ] } |
︙ | ︙ |
Changes to test/fts3matchinfo.test.
︙ | ︙ | |||
177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 | do_execsql_test 4.1.0 { CREATE VIRTUAL TABLE t4 USING fts4(x, y); INSERT INTO t4 VALUES('a b c d e', 'f g h i j'); INSERT INTO t4 VALUES('f g h i j', 'a b c d e'); } do_matchinfo_test 4.1.1 t4 {t4 MATCH 'a b c'} { p {3 3} c {2 2} x { {1 1 1 0 1 1 1 1 1 0 1 1 1 1 1 0 1 1} {0 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 1} } n {2 2} l {{5 5} {5 5}} a {{5 5} {5 5}} xxxxxxxxxxxxxxxxxx - pcx - xpc - ccc - pppxpcpcx - laxnpc - } do_matchinfo_test 4.1.2 t4 {t4 MATCH '"g h i"'} { p {1 1} c {2 2} x { {0 1 1 1 1 1} {1 1 1 0 1 1} } n {2 2} l {{5 5} {5 5}} a {{5 5} {5 5}} xxxxxxxxxxxxxxxxxx - pcx - xpc - ccc - pppxpcpcx - laxnpc - } finish_test | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 | do_execsql_test 4.1.0 { CREATE VIRTUAL TABLE t4 USING fts4(x, y); INSERT INTO t4 VALUES('a b c d e', 'f g h i j'); INSERT INTO t4 VALUES('f g h i j', 'a b c d e'); } do_matchinfo_test 4.1.1 t4 {t4 MATCH 'a b c'} { p {3 3} c {2 2} x { {1 1 1 0 1 1 1 1 1 0 1 1 1 1 1 0 1 1} {0 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 1} } n {2 2} l {{5 5} {5 5}} a {{5 5} {5 5}} s {{3 0} {0 3}} xxxxxxxxxxxxxxxxxx - pcx - xpc - ccc - pppxpcpcx - laxnpc - xpxsscplax - } do_matchinfo_test 4.1.2 t4 {t4 MATCH '"g h i"'} { p {1 1} c {2 2} x { {0 1 1 1 1 1} {1 1 1 0 1 1} } n {2 2} l {{5 5} {5 5}} a {{5 5} {5 5}} s {{0 1} {1 0}} xxxxxxxxxxxxxxxxxx - pcx - xpc - ccc - pppxpcpcx - laxnpc - sxsxs - } do_matchinfo_test 4.1.3 t4 {t4 MATCH 'a b'} { s {{2 0} {0 2}} } do_matchinfo_test 4.1.4 t4 {t4 MATCH '"a b" c'} { s {{2 0} {0 2}} } do_matchinfo_test 4.1.5 t4 {t4 MATCH 'a "b c"'} { s {{2 0} {0 2}} } do_matchinfo_test 4.1.6 t4 {t4 MATCH 'd d'} { s {{1 0} {0 1}} } do_execsql_test 4.2.0 { CREATE VIRTUAL TABLE t5 USING fts4; INSERT INTO t5 VALUES('a a a a a'); INSERT INTO t5 VALUES('a b a b a'); INSERT INTO t5 VALUES('c b c b c'); } do_matchinfo_test 4.2.1 t5 {t5 MATCH 'a a'} { s {2 1} } do_matchinfo_test 4.2.2 t5 {t5 MATCH 'a b'} { s {2} } do_matchinfo_test 4.2.3 t5 {t5 MATCH 'a b a'} { s {3} } do_matchinfo_test 4.2.4 t5 {t5 MATCH 'a a a'} { s {3 1} } do_matchinfo_test 4.2.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} } do_matchinfo_test 4.2.6 t5 {t5 MATCH 'a OR b'} { s {1 2 1} } do_execsql_test 4.3.0 "INSERT INTO t5 VALUES('x y [string repeat {b } 50000]')"; do_execsql_test 4.3.0 "INSERT INTO t5 VALUES('x y [string repeat {x } 50000]')"; #do_matchinfo_test 4.3.1 t5 {t5 MATCH 'a a'} { s {2 1} } #do_matchinfo_test 4.3.2 t5 {t5 MATCH 'a b'} { s {2} } #do_matchinfo_test 4.3.3 t5 {t5 MATCH 'a b a'} { s {3} } #do_matchinfo_test 4.3.4 t5 {t5 MATCH 'a a a'} { s {3 1} } #do_matchinfo_test 4.3.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} } #do_matchinfo_test 4.3.6 t5 {t5 MATCH 'a OR b'} { s {1 2 1 1} } # #do_execsql_test 4.4.0 { # UPDATE t5_segments # SET block = zeroblob(length(block)) # WHERE length(block)>10000; #} # #do_matchinfo_test 4.4.1 t5 {t5 MATCH 'a a'} { s {2 1} } #do_matchinfo_test 4.4.2 t5 {t5 MATCH 'a b'} { s {2} } #do_matchinfo_test 4.4.3 t5 {t5 MATCH 'a b a'} { s {3} } #do_matchinfo_test 4.4.4 t5 {t5 MATCH 'a a a'} { s {3 1} } #do_matchinfo_test 4.4.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} } #do_matchinfo_test 4.4.6 t5 {t5 MATCH 'a OR b'} { s {1 2 1 1} } finish_test |