Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Changes In Branch fts5-snippet-bias Excluding Merge-Ins
This is equivalent to a diff from a861713c to b174ed2b
2016-08-27
| ||
18:34 | Fixes to fts5 snippet() function. (check-in: d464a7b1 user: dan tags: trunk) | |
2016-08-24
| ||
18:50 | Fix a bug in the fts5 snippet function causing it to return text containing zero phrase instances. (Closed-Leaf check-in: b174ed2b user: dan tags: fts5-snippet-bias) | |
2016-08-20
| ||
17:23 | Fix other minor problems with the fts5 snippet() function. (check-in: b56b2a85 user: dan tags: fts5-snippet-bias) | |
2016-08-18
| ||
22:19 | Change the name of Db.zName to Db.zDbSName for improved long-term code maintainability. (check-in: cb9865e1 user: drh tags: trunk) | |
14:33 | Rename the Db.zName field to Db.zDbSName to make it more descriptive and to distinguish it from all of the other "zName" variables scattered throughout the code. (check-in: 92a22f01 user: drh tags: zDbSName) | |
2016-08-17
| ||
19:05 | Bias the fts5 snippet() function to return snippets that look like they start at the start of sentences. (check-in: 60de1594 user: dan tags: fts5-snippet-bias) | |
11:14 | Fix a problem in the fts5 snippet() auxiliary function. (check-in: a861713c user: dan tags: trunk) | |
2016-08-16
| ||
16:46 | Fix a bug in destructor processing of Lemon. That has no impact on the SQLite grammar. The bug was introduced by prior work to optimize the Lemon-generated parser used by SQLite. (check-in: f9035b8e user: drh tags: trunk) | |
Changes to ext/fts5/fts5_aux.c.
︙ | ︙ | |||
241 242 243 244 245 246 247 248 249 250 251 252 253 254 | if( rc!=SQLITE_OK ){ sqlite3_result_error_code(pCtx, rc); } } /* ** End of highlight() implementation. **************************************************************************/ /* ** Implementation of snippet() function. */ static void fts5SnippetFunction( const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ Fts5Context *pFts, /* First arg to pass to pApi functions */ | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 | if( rc!=SQLITE_OK ){ sqlite3_result_error_code(pCtx, rc); } } /* ** End of highlight() implementation. **************************************************************************/ /* ** Context object passed to the fts5SentenceFinderCb() function. */ typedef struct Fts5SFinder Fts5SFinder; struct Fts5SFinder { int iPos; /* Current token position */ int nFirstAlloc; /* Allocated size of aFirst[] */ int nFirst; /* Number of entries in aFirst[] */ int *aFirst; /* Array of first token in each sentence */ const char *zDoc; /* Document being tokenized */ }; /* ** Add an entry to the Fts5SFinder.aFirst[] array. Grow the array if ** necessary. Return SQLITE_OK if successful, or SQLITE_NOMEM if an ** error occurs. */ static int fts5SentenceFinderAdd(Fts5SFinder *p, int iAdd){ if( p->nFirstAlloc==p->nFirst ){ int nNew = p->nFirstAlloc ? p->nFirstAlloc*2 : 64; int *aNew; aNew = (int*)sqlite3_realloc(p->aFirst, nNew*sizeof(int)); if( aNew==0 ) return SQLITE_NOMEM; p->aFirst = aNew; p->nFirstAlloc = nNew; } p->aFirst[p->nFirst++] = iAdd; return SQLITE_OK; } /* ** This function is an xTokenize() callback used by the auxiliary snippet() ** function. Its job is to identify tokens that are the first in a sentence. ** For each such token, an entry is added to the SFinder.aFirst[] array. */ static int fts5SentenceFinderCb( void *pContext, /* Pointer to HighlightContext object */ int tflags, /* Mask of FTS5_TOKEN_* flags */ const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStartOff, /* Start offset of token */ int iEndOff /* End offset of token */ ){ int rc = SQLITE_OK; if( (tflags & FTS5_TOKEN_COLOCATED)==0 ){ Fts5SFinder *p = (Fts5SFinder*)pContext; if( p->iPos>0 ){ int i; char c = 0; for(i=iStartOff-1; i>=0; i--){ c = p->zDoc[i]; if( c!=' ' && c!='\t' && c!='\n' && c!='\r' ) break; } if( i!=iStartOff-1 && (c=='.' || c==':') ){ rc = fts5SentenceFinderAdd(p, p->iPos); } }else{ rc = fts5SentenceFinderAdd(p, 0); } p->iPos++; } return rc; } static int fts5SnippetScore( const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ Fts5Context *pFts, /* First arg to pass to pApi functions */ int nDocsize, /* Size of column in tokens */ unsigned char *aSeen, /* Array with one element per query phrase */ int iCol, /* Column to score */ int iPos, /* Starting offset to score */ int nToken, /* Max tokens per snippet */ int *pnScore, /* OUT: Score */ int *piPos /* OUT: Adjusted offset */ ){ int rc; int i; int ip = 0; int ic = 0; int iOff = 0; int iFirst = -1; int nInst; int nScore = 0; int iLast = 0; rc = pApi->xInstCount(pFts, &nInst); for(i=0; i<nInst && rc==SQLITE_OK; i++){ rc = pApi->xInst(pFts, i, &ip, &ic, &iOff); if( rc==SQLITE_OK && ic==iCol && iOff>=iPos && iOff<(iPos+nToken) ){ nScore += (aSeen[ip] ? 1 : 1000); aSeen[ip] = 1; if( iFirst<0 ) iFirst = iOff; iLast = iOff + pApi->xPhraseSize(pFts, ip); } } *pnScore = nScore; if( piPos ){ int iAdj = iFirst - (nToken - (iLast-iFirst)) / 2; if( (iAdj+nToken)>nDocsize ) iAdj = nDocsize - nToken; if( iAdj<0 ) iAdj = 0; *piPos = iAdj; } return rc; } /* ** Implementation of snippet() function. */ static void fts5SnippetFunction( const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ Fts5Context *pFts, /* First arg to pass to pApi functions */ |
︙ | ︙ | |||
263 264 265 266 267 268 269 | int nToken; /* 5th argument to snippet() */ int nInst = 0; /* Number of instance matches this row */ int i; /* Used to iterate through instances */ int nPhrase; /* Number of phrases in query */ unsigned char *aSeen; /* Array of "seen instance" flags */ int iBestCol; /* Column containing best snippet */ int iBestStart = 0; /* First token of best snippet */ | < > > > < < | > > > > > | > > > > > > | > > > | | > > | < | | > > > | | < | < | | | > > > | > > > | | > > > > > > > | > | | < | > > > | | | < < < < < < < < | 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 | int nToken; /* 5th argument to snippet() */ int nInst = 0; /* Number of instance matches this row */ int i; /* Used to iterate through instances */ int nPhrase; /* Number of phrases in query */ unsigned char *aSeen; /* Array of "seen instance" flags */ int iBestCol; /* Column containing best snippet */ int iBestStart = 0; /* First token of best snippet */ int nBestScore = 0; /* Score of best snippet */ int nColSize = 0; /* Total size of iBestCol in tokens */ Fts5SFinder sFinder; /* Used to find the beginnings of sentences */ int nCol; if( nVal!=5 ){ const char *zErr = "wrong number of arguments to function snippet()"; sqlite3_result_error(pCtx, zErr, -1); return; } nCol = pApi->xColumnCount(pFts); memset(&ctx, 0, sizeof(HighlightContext)); iCol = sqlite3_value_int(apVal[0]); ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]); ctx.zClose = (const char*)sqlite3_value_text(apVal[2]); zEllips = (const char*)sqlite3_value_text(apVal[3]); nToken = sqlite3_value_int(apVal[4]); iBestCol = (iCol>=0 ? iCol : 0); nPhrase = pApi->xPhraseCount(pFts); aSeen = sqlite3_malloc(nPhrase); if( aSeen==0 ){ rc = SQLITE_NOMEM; } if( rc==SQLITE_OK ){ rc = pApi->xInstCount(pFts, &nInst); } memset(&sFinder, 0, sizeof(Fts5SFinder)); for(i=0; i<nCol; i++){ if( iCol<0 || iCol==i ){ int nDoc; int nDocsize; int ii; sFinder.iPos = 0; sFinder.nFirst = 0; rc = pApi->xColumnText(pFts, i, &sFinder.zDoc, &nDoc); if( rc!=SQLITE_OK ) break; rc = pApi->xTokenize(pFts, sFinder.zDoc, nDoc, (void*)&sFinder,fts5SentenceFinderCb ); if( rc!=SQLITE_OK ) break; rc = pApi->xColumnSize(pFts, i, &nDocsize); if( rc!=SQLITE_OK ) break; for(ii=0; rc==SQLITE_OK && ii<nInst; ii++){ int ip, ic, io; int iAdj; int nScore; int jj; rc = pApi->xInst(pFts, ii, &ip, &ic, &io); if( ic!=i || rc!=SQLITE_OK ) continue; memset(aSeen, 0, nPhrase); rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i, io, nToken, &nScore, &iAdj ); if( rc==SQLITE_OK && nScore>nBestScore ){ nBestScore = nScore; iBestCol = i; iBestStart = iAdj; nColSize = nDocsize; } if( rc==SQLITE_OK && sFinder.nFirst && nDocsize>nToken ){ for(jj=0; jj<(sFinder.nFirst-1); jj++){ if( sFinder.aFirst[jj+1]>io ) break; } if( sFinder.aFirst[jj]<io ){ int nScore; memset(aSeen, 0, nPhrase); rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i, sFinder.aFirst[jj], nToken, &nScore, 0 ); nScore += (sFinder.aFirst[jj]==0 ? 120 : 100); if( rc==SQLITE_OK && nScore>nBestScore ){ nBestScore = nScore; iBestCol = i; iBestStart = sFinder.aFirst[jj]; nColSize = nDocsize; } } } } } } if( rc==SQLITE_OK ){ rc = pApi->xColumnText(pFts, iBestCol, &ctx.zIn, &ctx.nIn); } if( rc==SQLITE_OK && nColSize==0 ){ rc = pApi->xColumnSize(pFts, iBestCol, &nColSize); } if( ctx.zIn ){ if( rc==SQLITE_OK ){ rc = fts5CInstIterInit(pApi, pFts, iBestCol, &ctx.iter); } ctx.iRangeStart = iBestStart; ctx.iRangeEnd = iBestStart + nToken - 1; if( iBestStart>0 ){ fts5HighlightAppend(&rc, &ctx, zEllips, -1); } |
︙ | ︙ | |||
361 362 363 364 365 366 367 | rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb); } if( ctx.iRangeEnd>=(nColSize-1) ){ fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff); }else{ fts5HighlightAppend(&rc, &ctx, zEllips, -1); } | | | | | | | | < > | 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 | rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb); } if( ctx.iRangeEnd>=(nColSize-1) ){ fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff); }else{ fts5HighlightAppend(&rc, &ctx, zEllips, -1); } } if( rc==SQLITE_OK ){ sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT); }else{ sqlite3_result_error_code(pCtx, rc); } sqlite3_free(ctx.zOut); sqlite3_free(aSeen); sqlite3_free(sFinder.aFirst); } /************************************************************************/ /* ** The first time the bm25() function is called for a query, an instance ** of the following structure is allocated and populated. |
︙ | ︙ |
Changes to ext/fts5/test/fts5af.test.
︙ | ︙ | |||
68 69 70 71 72 73 74 | 1.6 {o o o o o X o} {o o o o o [X] o} 1.7 {o o o o o o X} {o o o o o o [X]} 2.1 {X o o o o o o o} {[X] o o o o o o...} 2.2 {o X o o o o o o} {o [X] o o o o o...} 2.3 {o o X o o o o o} {o o [X] o o o o...} 2.4 {o o o X o o o o} {o o o [X] o o o...} | | | | > > > > > > > | | | | | | | | | | | | | | | | | | > > > > | | | | | | | | | > > > > > > > > > > > > > > > > > > > | 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 | 1.6 {o o o o o X o} {o o o o o [X] o} 1.7 {o o o o o o X} {o o o o o o [X]} 2.1 {X o o o o o o o} {[X] o o o o o o...} 2.2 {o X o o o o o o} {o [X] o o o o o...} 2.3 {o o X o o o o o} {o o [X] o o o o...} 2.4 {o o o X o o o o} {o o o [X] o o o...} 2.5 {o o o o X o o o} {o o o o [X] o o...} 2.6 {o o o o o X o o} {o o o o o [X] o...} 2.7 {o o o o o o X o} {o o o o o o [X]...} 2.8 {o o o o o o o X} {...o o o o o o [X]} 2.9 {o o o o o o o X o} {...o o o o o [X] o} 2.10 {o o o o o o o X o o} {...o o o o [X] o o} 2.11 {o o o o o o o X o o o} {...o o o [X] o o o} 2.12 {o o o o o o o X o o o o} {...o o o [X] o o o...} 3.1 {X o o o o o o o o} {[X] o o o o o o...} 3.2 {o X o o o o o o o} {o [X] o o o o o...} 3.3 {o o X o o o o o o} {o o [X] o o o o...} 3.4 {o o o X o o o o o} {o o o [X] o o o...} 3.5 {o o o o o o o X o o o o} {...o o o [X] o o o...} 3.6 {o o o o o o o o X o o o} {...o o o [X] o o o} 3.7 {o o o o o o o o o X o o} {...o o o o [X] o o} 3.8 {o o o o o o o o o o X o} {...o o o o o [X] o} 3.9 {o o o o o o o o o o o X} {...o o o o o o [X]} 4.1 {X o o o o o X o o} {[X] o o o o o [X]...} 4.2 {o o o o o o o X o o o o o X o} {...[X] o o o o o [X]...} 4.3 {o o o o o o o o X o o o o o X} {...[X] o o o o o [X]} 5.1 {X o o o o X o o o} {[X] o o o o [X] o...} 5.2 {o o o o o o o X o o o o X o o} {...[X] o o o o [X] o...} 5.3 {o o o o o o o o X o o o o X o} {...[X] o o o o [X] o} 5.4 {o o o o o o o o o X o o o o X} {...o [X] o o o o [X]} 6.1 {X o o o X o o o} {[X] o o o [X] o o...} 6.2 {o X o o o X o o o} {o [X] o o o [X] o...} 6.3 {o o o o o o o X o o o X o o} {...o [X] o o o [X] o...} 6.4 {o o o o o o o o X o o o X o} {...o [X] o o o [X] o} 6.5 {o o o o o o o o o X o o o X} {...o o [X] o o o [X]} 7.1 {X o o X o o o o o} {[X] o o [X] o o o...} 7.2 {o X o o X o o o o} {o [X] o o [X] o o...} 7.3 {o o o o o o o X o o X o o o} {...o [X] o o [X] o o...} 7.4 {o o o o o o o o X o o X o o} {...o [X] o o [X] o o} 7.5 {o o o o o o o o o X o o X o} {...o o [X] o o [X] o} 7.6 {o o o o o o o o o o X o o X} {...o o o [X] o o [X]} 8.1 {o o o o o o o o o X o o o o o o o o o o o o o o o o X X X o o o} {...o o [X] [X] [X] o o...} 8.2 {o o o o o o o. o o X o o o o o o o o o o o o o o o o X X X o o o} {...o o [X] o o o o...} 8.3 {o o o o X o o o o o o o o o o o o o o o o o o o o o X X X o o o} {o o o o [X] o o...} } { do_snippet_test 1.$tn $doc X $res } if {[detail_is_full]} { foreach {tn doc res} { 1.1 {X Y o o o o o} {[X Y] o o o o o} 1.2 {o X Y o o o o} {o [X Y] o o o o} 1.3 {o o X Y o o o} {o o [X Y] o o o} 1.4 {o o o X Y o o} {o o o [X Y] o o} 1.5 {o o o o X Y o} {o o o o [X Y] o} 1.6 {o o o o o X Y} {o o o o o [X Y]} 2.1 {X Y o o o o o o} {[X Y] o o o o o...} 2.2 {o X Y o o o o o} {o [X Y] o o o o...} 2.3 {o o X Y o o o o} {o o [X Y] o o o...} 2.4 {o o o o o o o X Y o o o} {...o o [X Y] o o o} 2.5 {o o o o o o o o X Y o o} {...o o o [X Y] o o} 2.6 {o o o o o o o o o X Y o} {...o o o o [X Y] o} 2.7 {o o o o o o o o o o X Y} {...o o o o o [X Y]} 3.1 {X Y o o o o o o o} {[X Y] o o o o o...} 3.2 {o X Y o o o o o o} {o [X Y] o o o o...} 3.3 {o o X Y o o o o o} {o o [X Y] o o o...} 3.4 {o o o o o o o X Y o o o o} {...o o [X Y] o o o...} 3.5 {o o o o o o o o X Y o o o} {...o o [X Y] o o o} 3.6 {o o o o o o o o o X Y o o} {...o o o [X Y] o o} 3.7 {o o o o o o o o o o X Y o} {...o o o o [X Y] o} 3.8 {o o o o o o o o o o o X Y} {...o o o o o [X Y]} } { do_snippet_test 2.$tn $doc "X + Y" $res } } do_execsql_test 4.0 { CREATE VIRTUAL TABLE x1 USING fts5(a, b); INSERT INTO x1 VALUES('xyz', '1 2 3 4 5 6 7 8 9 10 11 12 13'); SELECT snippet(x1, 1, '[', ']', '...', 5) FROM x1('xyz'); } { {1 2 3 4 5...} } do_execsql_test 5.0 { CREATE VIRTUAL TABLE p1 USING fts5(a, b); INSERT INTO p1 VALUES( 'x a a a a a a a a a a', 'a a a a a a a a a a a a a a a a a a a x' ); } do_execsql_test 5.1 { SELECT snippet(p1, 0, '[', ']', '...', 6) FROM p1('x'); } {{[x] a a a a a...}} } ;# foreach_detail_mode finish_test |
Changes to ext/fts5/test/fts5unicode2.test.
︙ | ︙ | |||
156 157 158 159 160 161 162 | the maximum x value. } 3 "ROW" { ...returns the value of y on the same [row] that contains the maximum x value. } 4 "rollback" { | | | | | | 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 | the maximum x value. } 3 "ROW" { ...returns the value of y on the same [row] that contains the maximum x value. } 4 "rollback" { Pending statements no longer block [ROLLBACK]. Instead, the pending statement will return SQLITE_ABORT upon... } 5 "rOllback" { Pending statements no longer block [ROLLBACK]. Instead, the pending statement will return SQLITE_ABORT upon... } 6 "lang*" { Added support for the FTS4 [languageid] option. } } { do_test 2.$tn { set q [mapdoc $query] |
︙ | ︙ |
Changes to src/main.c.
︙ | ︙ | |||
2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 | /* Register all built-in functions, but do not attempt to read the ** database schema yet. This is delayed until the first time the database ** is accessed. */ sqlite3Error(db, SQLITE_OK); sqlite3RegisterPerConnectionBuiltinFunctions(db); /* Load automatic extensions - extensions that have been registered ** using the sqlite3_automatic_extension() API. */ | > > > > > > > > > > < | 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 | /* Register all built-in functions, but do not attempt to read the ** database schema yet. This is delayed until the first time the database ** is accessed. */ sqlite3Error(db, SQLITE_OK); sqlite3RegisterPerConnectionBuiltinFunctions(db); rc = sqlite3_errcode(db); #ifdef SQLITE_ENABLE_FTS5 /* Register any built-in FTS5 module before loading the automatic ** extensions. This allows automatic extensions to register FTS5 ** tokenizers and auxiliary functions. */ if( !db->mallocFailed && rc==SQLITE_OK ){ rc = sqlite3Fts5Init(db); } #endif /* Load automatic extensions - extensions that have been registered ** using the sqlite3_automatic_extension() API. */ if( rc==SQLITE_OK ){ sqlite3AutoLoadExtensions(db); rc = sqlite3_errcode(db); if( rc!=SQLITE_OK ){ goto opendb_out; } } |
︙ | ︙ | |||
2975 2976 2977 2978 2979 2980 2981 | } #endif #ifdef SQLITE_ENABLE_FTS3 /* automatically defined by SQLITE_ENABLE_FTS4 */ if( !db->mallocFailed && rc==SQLITE_OK ){ rc = sqlite3Fts3Init(db); } | < < < < < < | 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 | } #endif #ifdef SQLITE_ENABLE_FTS3 /* automatically defined by SQLITE_ENABLE_FTS4 */ if( !db->mallocFailed && rc==SQLITE_OK ){ rc = sqlite3Fts3Init(db); } #endif #ifdef SQLITE_ENABLE_ICU if( !db->mallocFailed && rc==SQLITE_OK ){ rc = sqlite3IcuInit(db); } #endif |
︙ | ︙ |