Overview
Comment: | Change the way ^ tokens work in FTS so that the filtering is done as part of reading the FTS index instead of waiting until an entire doclist has been retrieved and then filtering it. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | fts4-content |
Files: | files | file ages | folders |
SHA1: | 9b58c59eb4efaa38ce50a3ce1b52f9ba578c71d6 |
User & Date: | dan 2011-10-19 11:57:13 |
Context
2011-10-19
| ||
15:52 | Have FTS3 ignore ^ prefixes. The ^ syntax is only supported on FTS4 tables. Closed-Leaf check-in: df36ac94 user: dan tags: fts4-content | |
11:57 | Change the way ^ tokens work in FTS so that the filtering is done as part of reading the FTS index instead of waiting until an entire doclist has been retrieved and then filtering it. check-in: 9b58c59e user: dan tags: fts4-content | |
10:18 | Add tests for FTS ^ searches and matchinfo(). check-in: 92618c14 user: dan tags: fts4-content | |
Changes
Changes to ext/fts3/fts3.c.
2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 .... 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 .... 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 |
} } *pnRight = p - aOut; } /* ** When this function is called, pList points to a doclist containing position ** data, length *pnList bytes. This removes all entries from the doclist that ** do not correspond to the first token in a column and overwrites pList ** with the result. *pnList is set to the length of the new doclist before ** returning. ** ** If bDescDoclist is true, then both the input and output are in descending ** order. Otherwise, ascending. */ static void fts3DoclistFirstFilter( int bDescDoclist, /* True if pList is a descending doclist */ char *pList, /* Buffer containing doclist */ int *pnList /* IN/OUT: Size of doclist */ ){ char *p = pList; char *pOut = pList; char *pEnd = &pList[*pnList]; sqlite3_int64 iDoc; sqlite3_int64 iPrev; int bFirstOut = 0; fts3GetDeltaVarint3(&p, pEnd, 0, &iDoc); while( p ){ int bWritten = 0; if( *p!=0x01 ){ if( *p==0x02 ){ fts3PutDeltaVarint3(&pOut, bDescDoclist, &iPrev, &bFirstOut, iDoc); *pOut++ = 0x02; bWritten = 1; } fts3ColumnlistCopy(0, &p); } while( *p==0x01 ){ sqlite3_int64 iCol; p++; p += sqlite3Fts3GetVarint(p, &iCol); if( *p==0x02 ){ if( bWritten==0 ){ fts3PutDeltaVarint3(&pOut, bDescDoclist, &iPrev, &bFirstOut, iDoc); bWritten = 1; } *pOut++ = 0x01; pOut += sqlite3Fts3PutVarint(pOut, iCol); *pOut++ = 0x02; } fts3ColumnlistCopy(0, &p); } if( bWritten ){ *pOut++ = 0x00; } assert( *p==0x00 ); p++; fts3GetDeltaVarint3(&p, pEnd, bDescDoclist, &iDoc); } *pnList = (pOut - pList); } /* ** Merge all doclists in the TermSelect.aaOutput[] array into a single ** doclist stored in TermSelect.aaOutput[0]. If successful, delete all ** other doclists (except the aaOutput[0] one) and return SQLITE_OK. ................................................................................ Fts3SegFilter filter; /* Segment term filter configuration */ pSegcsr = pTok->pSegcsr; memset(&tsc, 0, sizeof(TermSelect)); filter.flags = FTS3_SEGMENT_IGNORE_EMPTY | FTS3_SEGMENT_REQUIRE_POS | (pTok->isPrefix ? FTS3_SEGMENT_PREFIX : 0) | (iColumn<p->nColumn ? FTS3_SEGMENT_COLUMN_FILTER : 0); filter.iCol = iColumn; filter.zTerm = pTok->z; filter.nTerm = pTok->n; rc = sqlite3Fts3SegReaderStart(p, pSegcsr, &filter); while( SQLITE_OK==rc ................................................................................ Fts3Phrase *p, /* Phrase to merge pList/nList into */ int iToken, /* Token pList/nList corresponds to */ char *pList, /* Pointer to doclist */ int nList /* Number of bytes in pList */ ){ assert( iToken!=p->iDoclistToken ); if( p->aToken[iToken].bFirst ){ fts3DoclistFirstFilter(pTab->bDescIdx, pList, &nList); } if( pList==0 ){ sqlite3_free(p->doclist.aAll); p->doclist.aAll = 0; p->doclist.nAll = 0; } else if( p->iDoclistToken<0 ){ |
| | | | | | < < < < > > | > | > > < | < < < < < < < | | < > | | | | | | | | | | | < > | | | | | | | | | | | < < < < < < > > < < < < |
2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 .... 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 .... 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 |
} } *pnRight = p - aOut; } /* ** Argument pList points to a position list nList bytes in size. This ** function checks to see if the position list contains any entries for ** a token in position 0 (of any column). If so, it writes argument iDelta ** to the output buffer pOut, followed by a position list consisting only ** of the entries from pList at position 0, and terminated by an 0x00 byte. ** The value returned is the number of bytes written to pOut (if any). */ int sqlite3Fts3FirstFilter( sqlite3_int64 iDelta, /* Varint that may be written to pOut */ char *pList, /* Position list (no 0x00 term) */ int nList, /* Size of pList in bytes */ char *pOut /* Write output here */ ){ int nOut = 0; int bWritten = 0; /* True once iDelta has been written */ char *p = pList; char *pEnd = &pList[nList]; if( *p!=0x01 ){ if( *p==0x02 ){ nOut += sqlite3Fts3PutVarint(&pOut[nOut], iDelta); pOut[nOut++] = 0x02; bWritten = 1; } fts3ColumnlistCopy(0, &p); } while( p<pEnd && *p==0x01 ){ sqlite3_int64 iCol; p++; p += sqlite3Fts3GetVarint(p, &iCol); if( *p==0x02 ){ if( bWritten==0 ){ nOut += sqlite3Fts3PutVarint(&pOut[nOut], iDelta); bWritten = 1; } pOut[nOut++] = 0x01; nOut += sqlite3Fts3PutVarint(&pOut[nOut], iCol); pOut[nOut++] = 0x02; } fts3ColumnlistCopy(0, &p); } if( bWritten ){ pOut[nOut++] = 0x00; } return nOut; } /* ** Merge all doclists in the TermSelect.aaOutput[] array into a single ** doclist stored in TermSelect.aaOutput[0]. If successful, delete all ** other doclists (except the aaOutput[0] one) and return SQLITE_OK. ................................................................................ Fts3SegFilter filter; /* Segment term filter configuration */ pSegcsr = pTok->pSegcsr; memset(&tsc, 0, sizeof(TermSelect)); filter.flags = FTS3_SEGMENT_IGNORE_EMPTY | FTS3_SEGMENT_REQUIRE_POS | (pTok->isPrefix ? FTS3_SEGMENT_PREFIX : 0) | (pTok->bFirst ? FTS3_SEGMENT_FIRST : 0) | (iColumn<p->nColumn ? FTS3_SEGMENT_COLUMN_FILTER : 0); filter.iCol = iColumn; filter.zTerm = pTok->z; filter.nTerm = pTok->n; rc = sqlite3Fts3SegReaderStart(p, pSegcsr, &filter); while( SQLITE_OK==rc ................................................................................ Fts3Phrase *p, /* Phrase to merge pList/nList into */ int iToken, /* Token pList/nList corresponds to */ char *pList, /* Pointer to doclist */ int nList /* Number of bytes in pList */ ){ assert( iToken!=p->iDoclistToken ); if( pList==0 ){ sqlite3_free(p->doclist.aAll); p->doclist.aAll = 0; p->doclist.nAll = 0; } else if( p->iDoclistToken<0 ){ |
Changes to ext/fts3/fts3Int.h.
425
426
427
428
429
430
431
432
433
434
435
436
437
438
...
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
|
/* Flags allowed as part of the 4th argument to SegmentReaderIterate() */
#define FTS3_SEGMENT_REQUIRE_POS 0x00000001
#define FTS3_SEGMENT_IGNORE_EMPTY 0x00000002
#define FTS3_SEGMENT_COLUMN_FILTER 0x00000004
#define FTS3_SEGMENT_PREFIX 0x00000008
#define FTS3_SEGMENT_SCAN 0x00000010
/* Type passed as 4th argument to SegmentReaderIterate() */
struct Fts3SegFilter {
const char *zTerm;
int nTerm;
int iCol;
int flags;
................................................................................
/* fts3.c */
int sqlite3Fts3PutVarint(char *, sqlite3_int64);
int sqlite3Fts3GetVarint(const char *, sqlite_int64 *);
int sqlite3Fts3GetVarint32(const char *, int *);
int sqlite3Fts3VarintLen(sqlite3_uint64);
void sqlite3Fts3Dequote(char *);
void sqlite3Fts3DoclistPrev(int,char*,int,char**,sqlite3_int64*,int*,u8*);
int sqlite3Fts3EvalPhraseStats(Fts3Cursor *, Fts3Expr *, u32 *);
/* fts3_tokenizer.c */
const char *sqlite3Fts3NextToken(const char *, int *);
int sqlite3Fts3InitHashTable(sqlite3 *, Fts3Hash *, const char *);
int sqlite3Fts3InitTokenizer(Fts3Hash *pHash, const char *,
sqlite3_tokenizer **, char **
);
|
>
<
>
|
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
...
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
|
/* Flags allowed as part of the 4th argument to SegmentReaderIterate() */ #define FTS3_SEGMENT_REQUIRE_POS 0x00000001 #define FTS3_SEGMENT_IGNORE_EMPTY 0x00000002 #define FTS3_SEGMENT_COLUMN_FILTER 0x00000004 #define FTS3_SEGMENT_PREFIX 0x00000008 #define FTS3_SEGMENT_SCAN 0x00000010 #define FTS3_SEGMENT_FIRST 0x00000020 /* Type passed as 4th argument to SegmentReaderIterate() */ struct Fts3SegFilter { const char *zTerm; int nTerm; int iCol; int flags; ................................................................................ /* fts3.c */ int sqlite3Fts3PutVarint(char *, sqlite3_int64); int sqlite3Fts3GetVarint(const char *, sqlite_int64 *); int sqlite3Fts3GetVarint32(const char *, int *); int sqlite3Fts3VarintLen(sqlite3_uint64); void sqlite3Fts3Dequote(char *); void sqlite3Fts3DoclistPrev(int,char*,int,char**,sqlite3_int64*,int*,u8*); int sqlite3Fts3EvalPhraseStats(Fts3Cursor *, Fts3Expr *, u32 *); int sqlite3Fts3FirstFilter(sqlite3_int64, char *, int, char *); /* fts3_tokenizer.c */ const char *sqlite3Fts3NextToken(const char *, int *); int sqlite3Fts3InitHashTable(sqlite3 *, Fts3Hash *, const char *); int sqlite3Fts3InitTokenizer(Fts3Hash *pHash, const char *, sqlite3_tokenizer **, char ** ); |
Changes to ext/fts3/fts3_write.c.
2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 .... 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 .... 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 |
int rc = SQLITE_OK; int isIgnoreEmpty = (pCsr->pFilter->flags & FTS3_SEGMENT_IGNORE_EMPTY); int isRequirePos = (pCsr->pFilter->flags & FTS3_SEGMENT_REQUIRE_POS); int isColFilter = (pCsr->pFilter->flags & FTS3_SEGMENT_COLUMN_FILTER); int isPrefix = (pCsr->pFilter->flags & FTS3_SEGMENT_PREFIX); int isScan = (pCsr->pFilter->flags & FTS3_SEGMENT_SCAN); Fts3SegReader **apSegment = pCsr->apSegment; int nSegment = pCsr->nSegment; Fts3SegFilter *pFilter = pCsr->pFilter; int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = ( p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp ); ................................................................................ ){ nMerge++; } assert( isIgnoreEmpty || (isRequirePos && !isColFilter) ); if( nMerge==1 && !isIgnoreEmpty && (p->bDescIdx==0 || fts3SegReaderIsPending(apSegment[0])==0) ){ pCsr->nDoclist = apSegment[0]->nDoclist; if( fts3SegReaderIsPending(apSegment[0]) ){ rc = fts3MsrBufferData(pCsr, apSegment[0]->aDoclist, pCsr->nDoclist); pCsr->aDoclist = pCsr->aBuffer; }else{ ................................................................................ pCsr->nBuffer = (nDoclist+nByte)*2; aNew = sqlite3_realloc(pCsr->aBuffer, pCsr->nBuffer); if( !aNew ){ return SQLITE_NOMEM; } pCsr->aBuffer = aNew; } nDoclist += sqlite3Fts3PutVarint(&pCsr->aBuffer[nDoclist], iDelta); iPrev = iDocid; if( isRequirePos ){ memcpy(&pCsr->aBuffer[nDoclist], pList, nList); nDoclist += nList; pCsr->aBuffer[nDoclist++] = '\0'; } } fts3SegReaderSort(apSegment, nMerge, j, xCmp); } if( nDoclist>0 ){ pCsr->aDoclist = pCsr->aBuffer; |
> > > > > > > > > > > > > | | | | | | > |
2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 .... 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 .... 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 |
int rc = SQLITE_OK; int isIgnoreEmpty = (pCsr->pFilter->flags & FTS3_SEGMENT_IGNORE_EMPTY); int isRequirePos = (pCsr->pFilter->flags & FTS3_SEGMENT_REQUIRE_POS); int isColFilter = (pCsr->pFilter->flags & FTS3_SEGMENT_COLUMN_FILTER); int isPrefix = (pCsr->pFilter->flags & FTS3_SEGMENT_PREFIX); int isScan = (pCsr->pFilter->flags & FTS3_SEGMENT_SCAN); int isFirst = (pCsr->pFilter->flags & FTS3_SEGMENT_FIRST); Fts3SegReader **apSegment = pCsr->apSegment; int nSegment = pCsr->nSegment; Fts3SegFilter *pFilter = pCsr->pFilter; int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = ( p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp ); ................................................................................ ){ nMerge++; } assert( isIgnoreEmpty || (isRequirePos && !isColFilter) ); if( nMerge==1 && !isIgnoreEmpty && !isFirst && (p->bDescIdx==0 || fts3SegReaderIsPending(apSegment[0])==0) ){ pCsr->nDoclist = apSegment[0]->nDoclist; if( fts3SegReaderIsPending(apSegment[0]) ){ rc = fts3MsrBufferData(pCsr, apSegment[0]->aDoclist, pCsr->nDoclist); pCsr->aDoclist = pCsr->aBuffer; }else{ ................................................................................ pCsr->nBuffer = (nDoclist+nByte)*2; aNew = sqlite3_realloc(pCsr->aBuffer, pCsr->nBuffer); if( !aNew ){ return SQLITE_NOMEM; } pCsr->aBuffer = aNew; } if( isFirst ){ char *a = &pCsr->aBuffer[nDoclist]; int nWrite; nWrite = sqlite3Fts3FirstFilter(iDelta, pList, nList, a); if( nWrite ){ iPrev = iDocid; nDoclist += nWrite; } }else{ nDoclist += sqlite3Fts3PutVarint(&pCsr->aBuffer[nDoclist], iDelta); iPrev = iDocid; if( isRequirePos ){ memcpy(&pCsr->aBuffer[nDoclist], pList, nList); nDoclist += nList; pCsr->aBuffer[nDoclist++] = '\0'; } } } fts3SegReaderSort(apSegment, nMerge, j, xCmp); } if( nDoclist>0 ){ pCsr->aDoclist = pCsr->aBuffer; |