Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Refactoring groundwork for coming work on interior nodes. Change LeafWriter to use empty data buffer (instead of empty term) to detect an empty block. Code to validate interior nodes. Moderate revisions to leaf-node and doclist validation. Recast leafWriterStep() in terms of LeafWriterStepMerge(). (CVS 3512) |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
f30771d5c7ef2b502af95d81a18796b7 |
User & Date: | shess 2006-11-17 21:12:16.000 |
Context
2006-11-18
| ||
00:12 | Store minimal terms in interior nodes. Whenever there's a break between leaf nodes, instead of storing the entire leftmost term of the rightmost child, store only that portion of the leftmost term necessary to distinguish it from the rightmost term of the leftmost child. (CVS 3513) (check-in: f6e0b080dc user: shess tags: trunk) | |
2006-11-17
| ||
21:12 | Refactoring groundwork for coming work on interior nodes. Change LeafWriter to use empty data buffer (instead of empty term) to detect an empty block. Code to validate interior nodes. Moderate revisions to leaf-node and doclist validation. Recast leafWriterStep() in terms of LeafWriterStepMerge(). (CVS 3512) (check-in: f30771d5c7 user: shess tags: trunk) | |
2006-11-13
| ||
21:09 | Delta-encode docids. This is good for around 22% reduction in index size with DL_POSITIONS. It improves performance about 5%-6%. (CVS 3511) (check-in: 9b6d413d75 user: shess tags: trunk) | |
Changes
Changes to ext/fts2/fts2.c.
︙ | ︙ | |||
78 79 80 81 82 83 84 | ** information. A DL_DOCIDS doclist omits both the position and ** offset information, becoming an array of varint-encoded docids. ** ** On-disk data is stored as type DL_DEFAULT, so we don't serialize ** the type. Due to how deletion is implemented in the segmentation ** system, on-disk doclists MUST store at least positions. ** | < < < < | 78 79 80 81 82 83 84 85 86 87 88 89 90 91 | ** information. A DL_DOCIDS doclist omits both the position and ** offset information, becoming an array of varint-encoded docids. ** ** On-disk data is stored as type DL_DEFAULT, so we don't serialize ** the type. Due to how deletion is implemented in the segmentation ** system, on-disk doclists MUST store at least positions. ** ** **** Segment leaf nodes **** ** Segment leaf nodes store terms and doclists, ordered by term. Leaf ** nodes are written using LeafWriter, and read using LeafReader (to ** iterate through a single leaf node's data) and LeavesReader (to ** iterate through a segment's entire leaf layer). Leaf nodes have ** the format: |
︙ | ︙ | |||
399 400 401 402 403 404 405 | ** ** dataBufferInit - create a buffer with given initial capacity. ** dataBufferReset - forget buffer's data, retaining capacity. ** dataBufferDestroy - free buffer's data. ** dataBufferExpand - expand capacity without adding data. ** dataBufferAppend - append data. ** dataBufferAppend2 - append two pieces of data at once. | < | 395 396 397 398 399 400 401 402 403 404 405 406 407 408 | ** ** dataBufferInit - create a buffer with given initial capacity. ** dataBufferReset - forget buffer's data, retaining capacity. ** dataBufferDestroy - free buffer's data. ** dataBufferExpand - expand capacity without adding data. ** dataBufferAppend - append data. ** dataBufferAppend2 - append two pieces of data at once. ** dataBufferReplace - replace buffer's data. */ typedef struct DataBuffer { char *pData; /* Pointer to malloc'ed buffer. */ int nCapacity; /* Size of pData buffer. */ int nData; /* End of data loaded into pData. */ } DataBuffer; |
︙ | ︙ | |||
449 450 451 452 453 454 455 | assert( nSource1>0 && pSource1!=NULL ); assert( nSource2>0 && pSource2!=NULL ); dataBufferExpand(pBuffer, nSource1+nSource2); memcpy(pBuffer->pData+pBuffer->nData, pSource1, nSource1); memcpy(pBuffer->pData+pBuffer->nData+nSource1, pSource2, nSource2); pBuffer->nData += nSource1+nSource2; } | < < < < < < | 444 445 446 447 448 449 450 451 452 453 454 455 456 457 | assert( nSource1>0 && pSource1!=NULL ); assert( nSource2>0 && pSource2!=NULL ); dataBufferExpand(pBuffer, nSource1+nSource2); memcpy(pBuffer->pData+pBuffer->nData, pSource1, nSource1); memcpy(pBuffer->pData+pBuffer->nData+nSource1, pSource2, nSource2); pBuffer->nData += nSource1+nSource2; } static void dataBufferReplace(DataBuffer *pBuffer, const char *pSource, int nSource){ dataBufferReset(pBuffer); dataBufferAppend(pBuffer, pSource, nSource); } /* StringBuffer is a null-terminated version of DataBuffer. */ |
︙ | ︙ | |||
645 646 647 648 649 650 651 | } #ifndef NDEBUG /* Verify that the doclist can be validly decoded. Also returns the ** last docid found because it's convenient in other assertions for ** DLWriter. */ | | | > | | 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 | } #ifndef NDEBUG /* Verify that the doclist can be validly decoded. Also returns the ** last docid found because it's convenient in other assertions for ** DLWriter. */ static void docListValidate(DocListType iType, const char *pData, int nData, sqlite_int64 *pLastDocid){ sqlite_int64 iPrevDocid = 0; assert( nData>0 ); assert( pData!=0 ); assert( pData+nData>pData ); while( nData!=0 ){ sqlite_int64 iDocidDelta; int n = getVarint(pData, &iDocidDelta); iPrevDocid += iDocidDelta; if( iType>DL_DOCIDS ){ int iDummy; while( 1 ){ |
︙ | ︙ | |||
673 674 675 676 677 678 679 | } } assert( n<=nData ); pData += n; nData -= n; } if( pLastDocid ) *pLastDocid = iPrevDocid; | < > > > | 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 | } } assert( n<=nData ); pData += n; nData -= n; } if( pLastDocid ) *pLastDocid = iPrevDocid; } #define ASSERT_VALID_DOCLIST(i, p, n, o) docListValidate(i, p, n, o) #else #define ASSERT_VALID_DOCLIST(i, p, n, o) assert( 1 ) #endif /*******************************************************************/ /* DLWriter is used to write doclist data to a DataBuffer. DLWriter ** always appends to the buffer and does not own it. ** ** dlwInit - initialize to write a given type doclistto a buffer. |
︙ | ︙ | |||
732 733 734 735 736 737 738 | assert( nFirstOld<nData || (nFirstOld==nData && pWriter->iType==DL_DOCIDS) ); nFirstNew = putVarint(c, iFirstDocid-pWriter->iPrevDocid); /* Verify that the incoming doclist is valid AND that it ends with ** the expected docid. This is essential because we'll trust this ** docid in future delta-encoding. */ | | | 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 | assert( nFirstOld<nData || (nFirstOld==nData && pWriter->iType==DL_DOCIDS) ); nFirstNew = putVarint(c, iFirstDocid-pWriter->iPrevDocid); /* Verify that the incoming doclist is valid AND that it ends with ** the expected docid. This is essential because we'll trust this ** docid in future delta-encoding. */ ASSERT_VALID_DOCLIST(pWriter->iType, pData, nData, &iLastDocidDelta); assert( iLastDocid==iFirstDocid-iDocid+iLastDocidDelta ); /* Append recoded initial docid and everything else. Rest of docids ** should have been delta-encoded from previous initial docid. */ if( nFirstOld<nData ){ dataBufferAppend2(pWriter->b, c, nFirstNew, |
︙ | ︙ | |||
3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 | n = putVarint(c, iHeight); n += putVarint(c+n, iChildBlock); dataBufferInit(&block->data, INTERIOR_MAX); dataBufferReplace(&block->data, c, n); return block; } typedef struct InteriorWriter { int iHeight; /* from 0 at leaves. */ InteriorBlock *first, *last; struct InteriorWriter *parentWriter; sqlite_int64 iOpeningChildBlock; /* First child block in block "last". */ | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 | n = putVarint(c, iHeight); n += putVarint(c+n, iChildBlock); dataBufferInit(&block->data, INTERIOR_MAX); dataBufferReplace(&block->data, c, n); return block; } #ifndef NDEBUG /* Verify that the data is readable as an interior node. */ static void interiorBlockValidate(InteriorBlock *pBlock){ const char *pData = pBlock->data.pData; int nData = pBlock->data.nData; int n, iDummy; sqlite_int64 iBlockid; assert( nData>0 ); assert( pData!=0 ); assert( pData+nData>pData ); /* Must lead with height of node as a varint(n), n>0 */ n = getVarint32(pData, &iDummy); assert( n>0 ); assert( iDummy>0 ); assert( n<nData ); pData += n; nData -= n; /* Must contain iBlockid. */ n = getVarint(pData, &iBlockid); assert( n>0 ); assert( n<=nData ); pData += n; nData -= n; /* Zero or more terms of positive length */ while( nData!=0 ){ n = getVarint32(pData, &iDummy); assert( n>0 ); assert( iDummy>0 ); assert( n+iDummy>0); assert( n+iDummy<=nData ); pData += n+iDummy; nData -= n+iDummy; } } #define ASSERT_VALID_INTERIOR_BLOCK(x) interiorBlockValidate(x) #else #define ASSERT_VALID_INTERIOR_BLOCK(x) assert( 1 ) #endif typedef struct InteriorWriter { int iHeight; /* from 0 at leaves. */ InteriorBlock *first, *last; struct InteriorWriter *parentWriter; sqlite_int64 iOpeningChildBlock; /* First child block in block "last". */ |
︙ | ︙ | |||
3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 | pWriter->iHeight = iHeight; pWriter->iOpeningChildBlock = iChildBlock; #ifndef NDEBUG pWriter->iLastChildBlock = iChildBlock; #endif block = interiorBlockNew(iHeight, iChildBlock, pTerm, nTerm); pWriter->last = pWriter->first = block; } /* Append the child node rooted at iChildBlock to the interior node, ** with pTerm[nTerm] as the leftmost term in iChildBlock's subtree. */ static void interiorWriterAppend(InteriorWriter *pWriter, const char *pTerm, int nTerm, sqlite_int64 iChildBlock){ char c[VARINT_MAX+VARINT_MAX]; int n = putVarint(c, nTerm); #ifndef NDEBUG pWriter->iLastChildBlock++; #endif assert( pWriter->iLastChildBlock==iChildBlock ); /* Overflow to a new block if the new term makes the current block ** too big, and the current block already has enough terms. */ if( pWriter->last->data.nData+n+nTerm>INTERIOR_MAX && iChildBlock-pWriter->iOpeningChildBlock>INTERIOR_MIN_TERMS ){ pWriter->last->next = interiorBlockNew(pWriter->iHeight, iChildBlock, pTerm, nTerm); pWriter->last = pWriter->last->next; pWriter->iOpeningChildBlock = iChildBlock; }else{ dataBufferAppend2(&pWriter->last->data, c, n, pTerm, nTerm); } } /* Free the space used by pWriter, including the linked-list of ** InteriorBlocks, and parentWriter, if present. */ static int interiorWriterDestroy(InteriorWriter *pWriter){ InteriorBlock *block = pWriter->first; | > > > > | 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 | pWriter->iHeight = iHeight; pWriter->iOpeningChildBlock = iChildBlock; #ifndef NDEBUG pWriter->iLastChildBlock = iChildBlock; #endif block = interiorBlockNew(iHeight, iChildBlock, pTerm, nTerm); pWriter->last = pWriter->first = block; ASSERT_VALID_INTERIOR_BLOCK(pWriter->last); } /* Append the child node rooted at iChildBlock to the interior node, ** with pTerm[nTerm] as the leftmost term in iChildBlock's subtree. */ static void interiorWriterAppend(InteriorWriter *pWriter, const char *pTerm, int nTerm, sqlite_int64 iChildBlock){ char c[VARINT_MAX+VARINT_MAX]; int n = putVarint(c, nTerm); ASSERT_VALID_INTERIOR_BLOCK(pWriter->last); #ifndef NDEBUG pWriter->iLastChildBlock++; #endif assert( pWriter->iLastChildBlock==iChildBlock ); /* Overflow to a new block if the new term makes the current block ** too big, and the current block already has enough terms. */ if( pWriter->last->data.nData+n+nTerm>INTERIOR_MAX && iChildBlock-pWriter->iOpeningChildBlock>INTERIOR_MIN_TERMS ){ pWriter->last->next = interiorBlockNew(pWriter->iHeight, iChildBlock, pTerm, nTerm); pWriter->last = pWriter->last->next; pWriter->iOpeningChildBlock = iChildBlock; }else{ dataBufferAppend2(&pWriter->last->data, c, n, pTerm, nTerm); } ASSERT_VALID_INTERIOR_BLOCK(pWriter->last); } /* Free the space used by pWriter, including the linked-list of ** InteriorBlocks, and parentWriter, if present. */ static int interiorWriterDestroy(InteriorWriter *pWriter){ InteriorBlock *block = pWriter->first; |
︙ | ︙ | |||
3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 | *pnRootInfo = block->data.nData; return SQLITE_OK; } /* Flush the first block to %_segments, and create a new level of ** interior node. */ rc = block_insert(v, block->data.pData, block->data.nData, &iBlockid); if( rc!=SQLITE_OK ) return rc; *piEndBlockid = iBlockid; pWriter->parentWriter = malloc(sizeof(*pWriter->parentWriter)); interiorWriterInit(pWriter->iHeight+1, block->term.pData, block->term.nData, iBlockid, pWriter->parentWriter); /* Flush additional blocks and append to the higher interior ** node. */ for(block=block->next; block!=NULL; block=block->next){ rc = block_insert(v, block->data.pData, block->data.nData, &iBlockid); if( rc!=SQLITE_OK ) return rc; *piEndBlockid = iBlockid; interiorWriterAppend(pWriter->parentWriter, block->term.pData, block->term.nData, iBlockid); } | > > | 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 | *pnRootInfo = block->data.nData; return SQLITE_OK; } /* Flush the first block to %_segments, and create a new level of ** interior node. */ ASSERT_VALID_INTERIOR_BLOCK(block); rc = block_insert(v, block->data.pData, block->data.nData, &iBlockid); if( rc!=SQLITE_OK ) return rc; *piEndBlockid = iBlockid; pWriter->parentWriter = malloc(sizeof(*pWriter->parentWriter)); interiorWriterInit(pWriter->iHeight+1, block->term.pData, block->term.nData, iBlockid, pWriter->parentWriter); /* Flush additional blocks and append to the higher interior ** node. */ for(block=block->next; block!=NULL; block=block->next){ ASSERT_VALID_INTERIOR_BLOCK(block); rc = block_insert(v, block->data.pData, block->data.nData, &iBlockid); if( rc!=SQLITE_OK ) return rc; *piEndBlockid = iBlockid; interiorWriterAppend(pWriter->parentWriter, block->term.pData, block->term.nData, iBlockid); } |
︙ | ︙ | |||
3921 3922 3923 3924 3925 3926 3927 | DataBuffer data; /* encoding buffer */ InteriorWriter parentWriter; /* if we overflow */ int has_parent; } LeafWriter; static void leafWriterInit(int iLevel, int idx, LeafWriter *pWriter){ | < < < < < | > > | > | > > > > > > | > > > > > | > > > > > > | < > > > | | 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 | DataBuffer data; /* encoding buffer */ InteriorWriter parentWriter; /* if we overflow */ int has_parent; } LeafWriter; static void leafWriterInit(int iLevel, int idx, LeafWriter *pWriter){ CLEAR(pWriter); pWriter->iLevel = iLevel; pWriter->idx = idx; dataBufferInit(&pWriter->term, 32); /* Start out with a reasonably sized block, though it can grow. */ dataBufferInit(&pWriter->data, LEAF_MAX); } #ifndef NDEBUG /* Verify that the data is readable as a leaf node. */ static void leafNodeValidate(const char *pData, int nData){ int n, iDummy; if( nData==0 ) return; assert( nData>0 ); assert( pData!=0 ); assert( pData+nData>pData ); /* Must lead with a varint(0) */ n = getVarint32(pData, &iDummy); assert( iDummy==0 ); assert( n>0 ); assert( n<nData ); pData += n; nData -= n; /* Leading term length and data must fit in buffer. */ n = getVarint32(pData, &iDummy); assert( n>0 ); assert( iDummy>0 ); assert( n+iDummy>0 ); assert( n+iDummy<nData ); pData += n+iDummy; nData -= n+iDummy; /* Leading term's doclist length and data must fit. */ n = getVarint32(pData, &iDummy); assert( n>0 ); assert( iDummy>0 ); assert( n+iDummy>0 ); assert( n+iDummy<=nData ); ASSERT_VALID_DOCLIST(DL_DEFAULT, pData+n, iDummy, NULL); pData += n+iDummy; nData -= n+iDummy; /* Verify that trailing terms and doclists also are readable. */ while( nData!=0 ){ n = getVarint32(pData, &iDummy); assert( n>0 ); assert( iDummy>=0 ); assert( n<nData ); pData += n; nData -= n; n = getVarint32(pData, &iDummy); assert( n>0 ); assert( iDummy>0 ); assert( n+iDummy>0 ); assert( n+iDummy<nData ); pData += n+iDummy; nData -= n+iDummy; n = getVarint32(pData, &iDummy); assert( n>0 ); assert( iDummy>0 ); assert( n+iDummy>0 ); assert( n+iDummy<=nData ); ASSERT_VALID_DOCLIST(DL_DEFAULT, pData+n, iDummy, NULL); pData += n+iDummy; nData -= n+iDummy; } } #define ASSERT_VALID_LEAF_NODE(p, n) leafNodeValidate(p, n) #else #define ASSERT_VALID_LEAF_NODE(p, n) assert( 1 ) #endif /* Flush the current leaf node to %_segments, and adding the resulting ** blockid and the starting term to the interior node which will ** contain it. */ static int leafWriterInternalFlush(fulltext_vtab *v, LeafWriter *pWriter, int iData, int nData){ sqlite_int64 iBlockid = 0; const char *pStartingTerm; int nStartingTerm, rc, n; /* Must have the leading varint(0) flag, plus at least some ** valid-looking data. */ assert( nData>2 ); assert( iData>=0 ); assert( iData+nData<=pWriter->data.nData ); ASSERT_VALID_LEAF_NODE(pWriter->data.pData+iData, nData); rc = block_insert(v, pWriter->data.pData+iData, nData, &iBlockid); if( rc!=SQLITE_OK ) return rc; assert( iBlockid!=0 ); /* Reconstruct the first term in the leaf for purposes of building ** the interior node. |
︙ | ︙ | |||
4035 4036 4037 4038 4039 4040 4041 | return SQLITE_OK; } static int leafWriterFlush(fulltext_vtab *v, LeafWriter *pWriter){ int rc = leafWriterInternalFlush(v, pWriter, 0, pWriter->data.nData); if( rc!=SQLITE_OK ) return rc; /* Re-initialize the output buffer. */ | < | | 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 | return SQLITE_OK; } static int leafWriterFlush(fulltext_vtab *v, LeafWriter *pWriter){ int rc = leafWriterInternalFlush(v, pWriter, 0, pWriter->data.nData); if( rc!=SQLITE_OK ) return rc; /* Re-initialize the output buffer. */ dataBufferReset(&pWriter->data); return SQLITE_OK; } /* Fetch the root info for the segment. If the entire leaf fits ** within ROOT_MAX, then it will be returned directly, otherwise it ** will be flushed and the root info will be returned from the |
︙ | ︙ | |||
4060 4061 4062 4063 4064 4065 4066 | *ppRootInfo = pWriter->data.pData; *pnRootInfo = pWriter->data.nData; *piEndBlockid = 0; return SQLITE_OK; } /* Flush remaining leaf data. */ | | | 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 | *ppRootInfo = pWriter->data.pData; *pnRootInfo = pWriter->data.nData; *piEndBlockid = 0; return SQLITE_OK; } /* Flush remaining leaf data. */ if( pWriter->data.nData>0 ){ int rc = leafWriterFlush(v, pWriter); if( rc!=SQLITE_OK ) return rc; } /* We must have flushed a leaf at some point. */ assert( pWriter->has_parent ); |
︙ | ︙ | |||
4092 4093 4094 4095 4096 4097 4098 | char *pRootInfo; int rc, nRootInfo; rc = leafWriterRootInfo(v, pWriter, &pRootInfo, &nRootInfo, &iEndBlockid); if( rc!=SQLITE_OK ) return rc; /* Don't bother storing an entirely empty segment. */ | | > > > | | > | > | < | > | > > < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 | char *pRootInfo; int rc, nRootInfo; rc = leafWriterRootInfo(v, pWriter, &pRootInfo, &nRootInfo, &iEndBlockid); if( rc!=SQLITE_OK ) return rc; /* Don't bother storing an entirely empty segment. */ if( iEndBlockid==0 && nRootInfo==0 ) return SQLITE_OK; return segdir_set(v, pWriter->iLevel, pWriter->idx, pWriter->iStartBlockid, pWriter->iEndBlockid, iEndBlockid, pRootInfo, nRootInfo); } static void leafWriterDestroy(LeafWriter *pWriter){ if( pWriter->has_parent ) interiorWriterDestroy(&pWriter->parentWriter); dataBufferDestroy(&pWriter->term); dataBufferDestroy(&pWriter->data); } /* Encode a term into the leafWriter, delta-encoding as appropriate. */ static void leafWriterEncodeTerm(LeafWriter *pWriter, const char *pTerm, int nTerm){ char c[VARINT_MAX+VARINT_MAX]; int n; if( pWriter->data.nData==0 ){ /* Encode the node header and leading term as: ** varint(0) ** varint(nTerm) ** char pTerm[nTerm] */ n = putVarint(c, '\0'); n += putVarint(c+n, nTerm); dataBufferAppend2(&pWriter->data, c, n, pTerm, nTerm); }else{ /* Delta-encode the term as: ** varint(nPrefix) ** varint(nSuffix) ** char pTermSuffix[nSuffix] */ int nPrefix = 0; assert( nTerm>0 ); while( nPrefix<pWriter->term.nData && pTerm[nPrefix]==pWriter->term.pData[nPrefix] ){ nPrefix++; /* Failing this implies that the terms weren't in order. */ assert( nPrefix<nTerm ); } n = putVarint(c, nPrefix); n += putVarint(c+n, nTerm-nPrefix); dataBufferAppend2(&pWriter->data, c, n, pTerm+nPrefix, nTerm-nPrefix); } dataBufferReplace(&pWriter->term, pTerm, nTerm); } /* Used to avoid a memmove when a large amount of doclist data is in ** the buffer. This constructs a node and term header before ** iDoclistData and flushes the resulting complete node using ** leafWriterInternalFlush(). */ static int leafWriterInlineFlush(fulltext_vtab *v, LeafWriter *pWriter, const char *pTerm, int nTerm, |
︙ | ︙ | |||
4210 4211 4212 4213 4214 4215 4216 | static int leafWriterStepMerge(fulltext_vtab *v, LeafWriter *pWriter, const char *pTerm, int nTerm, DLReader *pReaders, int nReaders){ char c[VARINT_MAX+VARINT_MAX]; int iTermData = pWriter->data.nData, iDoclistData; int i, nData, n, nActualData, nActual, rc; | | | | | | 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 | static int leafWriterStepMerge(fulltext_vtab *v, LeafWriter *pWriter, const char *pTerm, int nTerm, DLReader *pReaders, int nReaders){ char c[VARINT_MAX+VARINT_MAX]; int iTermData = pWriter->data.nData, iDoclistData; int i, nData, n, nActualData, nActual, rc; ASSERT_VALID_LEAF_NODE(pWriter->data.pData, pWriter->data.nData); leafWriterEncodeTerm(pWriter, pTerm, nTerm); iDoclistData = pWriter->data.nData; /* Estimate the length of the merged doclist so we can leave space ** to encode it. */ for(i=0, nData=0; i<nReaders; i++){ nData += dlrAllDataBytes(&pReaders[i]); } n = putVarint(c, nData); dataBufferAppend(&pWriter->data, c, n); docListMerge(&pWriter->data, pReaders, nReaders); ASSERT_VALID_DOCLIST(DL_DEFAULT, pWriter->data.pData+iDoclistData+n, pWriter->data.nData-iDoclistData-n, NULL); /* The actual amount of doclist data at this point could be smaller ** than the length we encoded. Additionally, the space required to ** encode this length could be smaller. For small doclists, this is ** not a big deal, we can just use memmove() to adjust things. */ nActualData = pWriter->data.nData-(iDoclistData+n); |
︙ | ︙ | |||
4250 4251 4252 4253 4254 4255 4256 | /* TODO(shess) This test matches leafWriterStep(), which does this ** test before it knows the cost to varint-encode the term and ** doclist lengths. At some point, change to ** pWriter->data.nData-iTermData>STANDALONE_MIN. */ if( nTerm+nActualData>STANDALONE_MIN ){ /* Push leaf node from before this term. */ | | < | > | 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 | /* TODO(shess) This test matches leafWriterStep(), which does this ** test before it knows the cost to varint-encode the term and ** doclist lengths. At some point, change to ** pWriter->data.nData-iTermData>STANDALONE_MIN. */ if( nTerm+nActualData>STANDALONE_MIN ){ /* Push leaf node from before this term. */ if( iTermData>0 ){ rc = leafWriterInternalFlush(v, pWriter, 0, iTermData); if( rc!=SQLITE_OK ) return rc; } /* Fix the encoded doclist length. */ iDoclistData += n - nActual; memcpy(pWriter->data.pData+iDoclistData, c, nActual); /* Push the standalone leaf node. */ rc = leafWriterInlineFlush(v, pWriter, pTerm, nTerm, iDoclistData); if( rc!=SQLITE_OK ) return rc; /* Leave the node empty. */ dataBufferReset(&pWriter->data); return rc; } /* At this point, we know that the doclist was small, so do the ** memmove if indicated. */ if( nActual<n ){ |
︙ | ︙ | |||
4313 4314 4315 4316 4317 4318 4319 | assert( 2*STANDALONE_MIN<=LEAF_MAX ); assert( n+pWriter->data.nData-iDoclistData<iDoclistData ); memcpy(pWriter->data.pData+n, pWriter->data.pData+iDoclistData, pWriter->data.nData-iDoclistData); pWriter->data.nData -= iDoclistData-n; } | | > > > > > > > > > > > > > > > > > > > | 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 | assert( 2*STANDALONE_MIN<=LEAF_MAX ); assert( n+pWriter->data.nData-iDoclistData<iDoclistData ); memcpy(pWriter->data.pData+n, pWriter->data.pData+iDoclistData, pWriter->data.nData-iDoclistData); pWriter->data.nData -= iDoclistData-n; } ASSERT_VALID_LEAF_NODE(pWriter->data.pData, pWriter->data.nData); return SQLITE_OK; } /* Push pTerm[nTerm] along with the doclist data to the leaf layer of ** %_segments. */ /* TODO(shess) Revise writeZeroSegment() so that doclists are ** constructed directly in pWriter->data. */ static int leafWriterStep(fulltext_vtab *v, LeafWriter *pWriter, const char *pTerm, int nTerm, const char *pData, int nData){ int rc; DLReader reader; dlrInit(&reader, DL_DEFAULT, pData, nData); rc = leafWriterStepMerge(v, pWriter, pTerm, nTerm, &reader, 1); dlrDestroy(&reader); return rc; } /****************************************************************/ /* LeafReader is used to iterate over an individual leaf node. */ typedef struct LeafReader { DataBuffer term; /* copy of current term. */ |
︙ | ︙ |