Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Require a minimum fanout for interior nodes. This prevents cases where excessively large terms keep the tree from finding a single root. A downside is that this could result in large interior nodes in the presence of large terms, which may be prone to fragmentation, though if the nodes were smaller that would translate into more levels in the tree, which would also have that problem. (CVS 3510) |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
64b7e3406134ac4891113b9bb432ad97 |
User & Date: | shess 2006-11-13 21:00:55.000 |
Context
2006-11-13
| ||
21:09 | Delta-encode docids. This is good for around 22% reduction in index size with DL_POSITIONS. It improves performance about 5%-6%. (CVS 3511) (check-in: 9b6d413d75 user: shess tags: trunk) | |
21:00 | Require a minimum fanout for interior nodes. This prevents cases where excessively large terms keep the tree from finding a single root. A downside is that this could result in large interior nodes in the presence of large terms, which may be prone to fragmentation, though if the nodes were smaller that would translate into more levels in the tree, which would also have that problem. (CVS 3510) (check-in: 64b7e34061 user: shess tags: trunk) | |
20:15 | Allow backing tables to be missing on dropping fts table. Fixes http://www.sqlite.org/cvstrac/tktview?tn=1992,35 . (CVS 3509) (check-in: 9628a61a6f user: shess tags: trunk) | |
Changes
Changes to ext/fts2/fts2.c.
︙ | ︙ | |||
155 156 157 158 159 160 161 | ** is encoded. The subtree at iBlockid will contain all terms less ** than the first term encoded (or all terms if no term is encoded). ** Otherwise, for terms greater than or equal to pTerm[i] but less ** than pTerm[i+1], the subtree for that term will be rooted at ** iBlockid+i. ** ** New data is spilled to a new interior node at the same height when | | > > | | | 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 | ** is encoded. The subtree at iBlockid will contain all terms less ** than the first term encoded (or all terms if no term is encoded). ** Otherwise, for terms greater than or equal to pTerm[i] but less ** than pTerm[i+1], the subtree for that term will be rooted at ** iBlockid+i. ** ** New data is spilled to a new interior node at the same height when ** the current node exceeds INTERIOR_MAX bytes (default 2048). ** INTERIOR_MIN_TERMS (default 7) keeps large terms from monopolizing ** interior nodes and making the tree too skinny. The interior nodes ** at a given height are naturally tracked by interior nodes at ** height+1, and so on. ** ** **** Segment directory **** ** The segment directory in table %_segdir stores meta-information for ** merging and deleting segments, and also the root node of the ** segment's tree. ** |
︙ | ︙ | |||
3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 | /* InteriorWriter is used to collect terms and block references into ** interior nodes in %_segments. See commentary at top of file for ** format. */ /* How large interior nodes can grow. */ #define INTERIOR_MAX 2048 /* ROOT_MAX controls how much data is stored inline in the segment ** directory. */ /* TODO(shess) Push ROOT_MAX down to whoever is writing things. It's ** only here so that interiorWriterRootInfo() and leafWriterRootInfo() ** can both see it, but if the caller passed it in, we wouldn't even | > > > > > > > > > > | 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 | /* InteriorWriter is used to collect terms and block references into ** interior nodes in %_segments. See commentary at top of file for ** format. */ /* How large interior nodes can grow. */ #define INTERIOR_MAX 2048 /* Minimum number of terms per interior node (except the root). This ** prevents large terms from making the tree too skinny - must be >0 ** so that the tree always makes progress. Note that the min tree ** fanout will be INTERIOR_MIN_TERMS+1. */ #define INTERIOR_MIN_TERMS 7 #if INTERIOR_MIN_TERMS<1 # error INTERIOR_MIN_TERMS must be greater than 0. #endif /* ROOT_MAX controls how much data is stored inline in the segment ** directory. */ /* TODO(shess) Push ROOT_MAX down to whoever is writing things. It's ** only here so that interiorWriterRootInfo() and leafWriterRootInfo() ** can both see it, but if the caller passed it in, we wouldn't even |
︙ | ︙ | |||
3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 | } typedef struct InteriorWriter { int iHeight; /* from 0 at leaves. */ InteriorBlock *first, *last; struct InteriorWriter *parentWriter; #ifndef NDEBUG sqlite_int64 iLastChildBlock; /* for consistency checks. */ #endif } InteriorWriter; /* Initialize an interior node where pTerm[nTerm] marks the leftmost ** term in the tree. iChildBlock is the leftmost child block at the ** next level down the tree. */ static void interiorWriterInit(int iHeight, const char *pTerm, int nTerm, sqlite_int64 iChildBlock, InteriorWriter *pWriter){ InteriorBlock *block; assert( iHeight>0 ); CLEAR(pWriter); pWriter->iHeight = iHeight; #ifndef NDEBUG pWriter->iLastChildBlock = iChildBlock; #endif block = interiorBlockNew(iHeight, iChildBlock, pTerm, nTerm); pWriter->last = pWriter->first = block; } | > > | 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 | } typedef struct InteriorWriter { int iHeight; /* from 0 at leaves. */ InteriorBlock *first, *last; struct InteriorWriter *parentWriter; sqlite_int64 iOpeningChildBlock; /* First child block in block "last". */ #ifndef NDEBUG sqlite_int64 iLastChildBlock; /* for consistency checks. */ #endif } InteriorWriter; /* Initialize an interior node where pTerm[nTerm] marks the leftmost ** term in the tree. iChildBlock is the leftmost child block at the ** next level down the tree. */ static void interiorWriterInit(int iHeight, const char *pTerm, int nTerm, sqlite_int64 iChildBlock, InteriorWriter *pWriter){ InteriorBlock *block; assert( iHeight>0 ); CLEAR(pWriter); pWriter->iHeight = iHeight; pWriter->iOpeningChildBlock = iChildBlock; #ifndef NDEBUG pWriter->iLastChildBlock = iChildBlock; #endif block = interiorBlockNew(iHeight, iChildBlock, pTerm, nTerm); pWriter->last = pWriter->first = block; } |
︙ | ︙ | |||
3676 3677 3678 3679 3680 3681 3682 | int n = putVarint(c, nTerm); #ifndef NDEBUG pWriter->iLastChildBlock++; #endif assert( pWriter->iLastChildBlock==iChildBlock ); | > > > | < > > | 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 | int n = putVarint(c, nTerm); #ifndef NDEBUG pWriter->iLastChildBlock++; #endif assert( pWriter->iLastChildBlock==iChildBlock ); /* Overflow to a new block if the new term makes the current block ** too big, and the current block already has enough terms. */ if( pWriter->last->data.nData+n+nTerm>INTERIOR_MAX && iChildBlock-pWriter->iOpeningChildBlock>INTERIOR_MIN_TERMS ){ pWriter->last->next = interiorBlockNew(pWriter->iHeight, iChildBlock, pTerm, nTerm); pWriter->last = pWriter->last->next; pWriter->iOpeningChildBlock = iChildBlock; }else{ dataBufferAppend2(&pWriter->last->data, c, n, pTerm, nTerm); } } /* Free the space used by pWriter, including the linked-list of ** InteriorBlocks, and parentWriter, if present. |
︙ | ︙ |