Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Change the way ^ tokens work in FTS so that the filtering is done as part of reading the FTS index instead of waiting until an entire doclist has been retrieved and then filtering it. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts4-content |
Files: | files | file ages | folders |
SHA1: |
9b58c59eb4efaa38ce50a3ce1b52f9ba |
User & Date: | dan 2011-10-19 11:57:13.985 |
Context
2011-10-19
| ||
15:52 | Have FTS3 ignore ^ prefixes. The ^ syntax is only supported on FTS4 tables. (Closed-Leaf check-in: df36ac9481 user: dan tags: fts4-content) | |
11:57 | Change the way ^ tokens work in FTS so that the filtering is done as part of reading the FTS index instead of waiting until an entire doclist has been retrieved and then filtering it. (check-in: 9b58c59eb4 user: dan tags: fts4-content) | |
10:18 | Add tests for FTS ^ searches and matchinfo(). (check-in: 92618c1463 user: dan tags: fts4-content) | |
Changes
Changes to ext/fts3/fts3.c.
︙ | ︙ | |||
2344 2345 2346 2347 2348 2349 2350 | } } *pnRight = p - aOut; } /* | | | | | > | < < < | | | > | > > < | < < < < < < < | | | | | | | | | | | | | | | | | | | | | | | | | | < < < < | < | 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 | } } *pnRight = p - aOut; } /* ** Argument pList points to a position list nList bytes in size. This ** function checks to see if the position list contains any entries for ** a token in position 0 (of any column). If so, it writes argument iDelta ** to the output buffer pOut, followed by a position list consisting only ** of the entries from pList at position 0, and terminated by an 0x00 byte. ** The value returned is the number of bytes written to pOut (if any). */ int sqlite3Fts3FirstFilter( sqlite3_int64 iDelta, /* Varint that may be written to pOut */ char *pList, /* Position list (no 0x00 term) */ int nList, /* Size of pList in bytes */ char *pOut /* Write output here */ ){ int nOut = 0; int bWritten = 0; /* True once iDelta has been written */ char *p = pList; char *pEnd = &pList[nList]; if( *p!=0x01 ){ if( *p==0x02 ){ nOut += sqlite3Fts3PutVarint(&pOut[nOut], iDelta); pOut[nOut++] = 0x02; bWritten = 1; } fts3ColumnlistCopy(0, &p); } while( p<pEnd && *p==0x01 ){ sqlite3_int64 iCol; p++; p += sqlite3Fts3GetVarint(p, &iCol); if( *p==0x02 ){ if( bWritten==0 ){ nOut += sqlite3Fts3PutVarint(&pOut[nOut], iDelta); bWritten = 1; } pOut[nOut++] = 0x01; nOut += sqlite3Fts3PutVarint(&pOut[nOut], iCol); pOut[nOut++] = 0x02; } fts3ColumnlistCopy(0, &p); } if( bWritten ){ pOut[nOut++] = 0x00; } return nOut; } /* ** Merge all doclists in the TermSelect.aaOutput[] array into a single ** doclist stored in TermSelect.aaOutput[0]. If successful, delete all ** other doclists (except the aaOutput[0] one) and return SQLITE_OK. |
︙ | ︙ | |||
2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 | Fts3SegFilter filter; /* Segment term filter configuration */ pSegcsr = pTok->pSegcsr; memset(&tsc, 0, sizeof(TermSelect)); filter.flags = FTS3_SEGMENT_IGNORE_EMPTY | FTS3_SEGMENT_REQUIRE_POS | (pTok->isPrefix ? FTS3_SEGMENT_PREFIX : 0) | (iColumn<p->nColumn ? FTS3_SEGMENT_COLUMN_FILTER : 0); filter.iCol = iColumn; filter.zTerm = pTok->z; filter.nTerm = pTok->n; rc = sqlite3Fts3SegReaderStart(p, pSegcsr, &filter); while( SQLITE_OK==rc | > | 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 | Fts3SegFilter filter; /* Segment term filter configuration */ pSegcsr = pTok->pSegcsr; memset(&tsc, 0, sizeof(TermSelect)); filter.flags = FTS3_SEGMENT_IGNORE_EMPTY | FTS3_SEGMENT_REQUIRE_POS | (pTok->isPrefix ? FTS3_SEGMENT_PREFIX : 0) | (pTok->bFirst ? FTS3_SEGMENT_FIRST : 0) | (iColumn<p->nColumn ? FTS3_SEGMENT_COLUMN_FILTER : 0); filter.iCol = iColumn; filter.zTerm = pTok->z; filter.nTerm = pTok->n; rc = sqlite3Fts3SegReaderStart(p, pSegcsr, &filter); while( SQLITE_OK==rc |
︙ | ︙ | |||
3576 3577 3578 3579 3580 3581 3582 | Fts3Phrase *p, /* Phrase to merge pList/nList into */ int iToken, /* Token pList/nList corresponds to */ char *pList, /* Pointer to doclist */ int nList /* Number of bytes in pList */ ){ assert( iToken!=p->iDoclistToken ); | < < < < | 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 | Fts3Phrase *p, /* Phrase to merge pList/nList into */ int iToken, /* Token pList/nList corresponds to */ char *pList, /* Pointer to doclist */ int nList /* Number of bytes in pList */ ){ assert( iToken!=p->iDoclistToken ); if( pList==0 ){ sqlite3_free(p->doclist.aAll); p->doclist.aAll = 0; p->doclist.nAll = 0; } else if( p->iDoclistToken<0 ){ |
︙ | ︙ |
Changes to ext/fts3/fts3Int.h.
︙ | ︙ | |||
425 426 427 428 429 430 431 432 433 434 435 436 437 438 | /* Flags allowed as part of the 4th argument to SegmentReaderIterate() */ #define FTS3_SEGMENT_REQUIRE_POS 0x00000001 #define FTS3_SEGMENT_IGNORE_EMPTY 0x00000002 #define FTS3_SEGMENT_COLUMN_FILTER 0x00000004 #define FTS3_SEGMENT_PREFIX 0x00000008 #define FTS3_SEGMENT_SCAN 0x00000010 /* Type passed as 4th argument to SegmentReaderIterate() */ struct Fts3SegFilter { const char *zTerm; int nTerm; int iCol; int flags; | > | 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 | /* Flags allowed as part of the 4th argument to SegmentReaderIterate() */ #define FTS3_SEGMENT_REQUIRE_POS 0x00000001 #define FTS3_SEGMENT_IGNORE_EMPTY 0x00000002 #define FTS3_SEGMENT_COLUMN_FILTER 0x00000004 #define FTS3_SEGMENT_PREFIX 0x00000008 #define FTS3_SEGMENT_SCAN 0x00000010 #define FTS3_SEGMENT_FIRST 0x00000020 /* Type passed as 4th argument to SegmentReaderIterate() */ struct Fts3SegFilter { const char *zTerm; int nTerm; int iCol; int flags; |
︙ | ︙ | |||
464 465 466 467 468 469 470 | /* fts3.c */ int sqlite3Fts3PutVarint(char *, sqlite3_int64); int sqlite3Fts3GetVarint(const char *, sqlite_int64 *); int sqlite3Fts3GetVarint32(const char *, int *); int sqlite3Fts3VarintLen(sqlite3_uint64); void sqlite3Fts3Dequote(char *); void sqlite3Fts3DoclistPrev(int,char*,int,char**,sqlite3_int64*,int*,u8*); | < > | 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 | /* fts3.c */ int sqlite3Fts3PutVarint(char *, sqlite3_int64); int sqlite3Fts3GetVarint(const char *, sqlite_int64 *); int sqlite3Fts3GetVarint32(const char *, int *); int sqlite3Fts3VarintLen(sqlite3_uint64); void sqlite3Fts3Dequote(char *); void sqlite3Fts3DoclistPrev(int,char*,int,char**,sqlite3_int64*,int*,u8*); int sqlite3Fts3EvalPhraseStats(Fts3Cursor *, Fts3Expr *, u32 *); int sqlite3Fts3FirstFilter(sqlite3_int64, char *, int, char *); /* fts3_tokenizer.c */ const char *sqlite3Fts3NextToken(const char *, int *); int sqlite3Fts3InitHashTable(sqlite3 *, Fts3Hash *, const char *); int sqlite3Fts3InitTokenizer(Fts3Hash *pHash, const char *, sqlite3_tokenizer **, char ** ); |
︙ | ︙ |
Changes to ext/fts3/fts3_write.c.
︙ | ︙ | |||
2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 | int rc = SQLITE_OK; int isIgnoreEmpty = (pCsr->pFilter->flags & FTS3_SEGMENT_IGNORE_EMPTY); int isRequirePos = (pCsr->pFilter->flags & FTS3_SEGMENT_REQUIRE_POS); int isColFilter = (pCsr->pFilter->flags & FTS3_SEGMENT_COLUMN_FILTER); int isPrefix = (pCsr->pFilter->flags & FTS3_SEGMENT_PREFIX); int isScan = (pCsr->pFilter->flags & FTS3_SEGMENT_SCAN); Fts3SegReader **apSegment = pCsr->apSegment; int nSegment = pCsr->nSegment; Fts3SegFilter *pFilter = pCsr->pFilter; int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = ( p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp ); | > | 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 | int rc = SQLITE_OK; int isIgnoreEmpty = (pCsr->pFilter->flags & FTS3_SEGMENT_IGNORE_EMPTY); int isRequirePos = (pCsr->pFilter->flags & FTS3_SEGMENT_REQUIRE_POS); int isColFilter = (pCsr->pFilter->flags & FTS3_SEGMENT_COLUMN_FILTER); int isPrefix = (pCsr->pFilter->flags & FTS3_SEGMENT_PREFIX); int isScan = (pCsr->pFilter->flags & FTS3_SEGMENT_SCAN); int isFirst = (pCsr->pFilter->flags & FTS3_SEGMENT_FIRST); Fts3SegReader **apSegment = pCsr->apSegment; int nSegment = pCsr->nSegment; Fts3SegFilter *pFilter = pCsr->pFilter; int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = ( p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp ); |
︙ | ︙ | |||
2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 | ){ nMerge++; } assert( isIgnoreEmpty || (isRequirePos && !isColFilter) ); if( nMerge==1 && !isIgnoreEmpty && (p->bDescIdx==0 || fts3SegReaderIsPending(apSegment[0])==0) ){ pCsr->nDoclist = apSegment[0]->nDoclist; if( fts3SegReaderIsPending(apSegment[0]) ){ rc = fts3MsrBufferData(pCsr, apSegment[0]->aDoclist, pCsr->nDoclist); pCsr->aDoclist = pCsr->aBuffer; }else{ | > | 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 | ){ nMerge++; } assert( isIgnoreEmpty || (isRequirePos && !isColFilter) ); if( nMerge==1 && !isIgnoreEmpty && !isFirst && (p->bDescIdx==0 || fts3SegReaderIsPending(apSegment[0])==0) ){ pCsr->nDoclist = apSegment[0]->nDoclist; if( fts3SegReaderIsPending(apSegment[0]) ){ rc = fts3MsrBufferData(pCsr, apSegment[0]->aDoclist, pCsr->nDoclist); pCsr->aDoclist = pCsr->aBuffer; }else{ |
︙ | ︙ | |||
2629 2630 2631 2632 2633 2634 2635 | pCsr->nBuffer = (nDoclist+nByte)*2; aNew = sqlite3_realloc(pCsr->aBuffer, pCsr->nBuffer); if( !aNew ){ return SQLITE_NOMEM; } pCsr->aBuffer = aNew; } | > > > > > > > > > > > | | | | | | > | 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 | pCsr->nBuffer = (nDoclist+nByte)*2; aNew = sqlite3_realloc(pCsr->aBuffer, pCsr->nBuffer); if( !aNew ){ return SQLITE_NOMEM; } pCsr->aBuffer = aNew; } if( isFirst ){ char *a = &pCsr->aBuffer[nDoclist]; int nWrite; nWrite = sqlite3Fts3FirstFilter(iDelta, pList, nList, a); if( nWrite ){ iPrev = iDocid; nDoclist += nWrite; } }else{ nDoclist += sqlite3Fts3PutVarint(&pCsr->aBuffer[nDoclist], iDelta); iPrev = iDocid; if( isRequirePos ){ memcpy(&pCsr->aBuffer[nDoclist], pList, nList); nDoclist += nList; pCsr->aBuffer[nDoclist++] = '\0'; } } } fts3SegReaderSort(apSegment, nMerge, j, xCmp); } if( nDoclist>0 ){ pCsr->aDoclist = pCsr->aBuffer; |
︙ | ︙ |