Index: ext/fts2/fts2.c ================================================================== --- ext/fts2/fts2.c +++ ext/fts2/fts2.c @@ -157,13 +157,15 @@ ** Otherwise, for terms greater than or equal to pTerm[i] but less ** than pTerm[i+1], the subtree for that term will be rooted at ** iBlockid+i. ** ** New data is spilled to a new interior node at the same height when -** the current node exceeds INTERIOR_MAX bytes (default 2048). The -** interior nodes at a given height are naturally tracked by interior -** nodes at height+1, and so on. +** the current node exceeds INTERIOR_MAX bytes (default 2048). +** INTERIOR_MIN_TERMS (default 7) keeps large terms from monopolizing +** interior nodes and making the tree too skinny. The interior nodes +** at a given height are naturally tracked by interior nodes at +** height+1, and so on. ** ** **** Segment directory **** ** The segment directory in table %_segdir stores meta-information for ** merging and deleting segments, and also the root node of the @@ -3595,10 +3597,20 @@ ** format. */ /* How large interior nodes can grow. */ #define INTERIOR_MAX 2048 + +/* Minimum number of terms per interior node (except the root). This +** prevents large terms from making the tree too skinny - must be >0 +** so that the tree always makes progress. Note that the min tree +** fanout will be INTERIOR_MIN_TERMS+1. +*/ +#define INTERIOR_MIN_TERMS 7 +#if INTERIOR_MIN_TERMS<1 +# error INTERIOR_MIN_TERMS must be greater than 0. +#endif /* ROOT_MAX controls how much data is stored inline in the segment ** directory. */ /* TODO(shess) Push ROOT_MAX down to whoever is writing things. It's @@ -3640,10 +3652,11 @@ typedef struct InteriorWriter { int iHeight; /* from 0 at leaves. */ InteriorBlock *first, *last; struct InteriorWriter *parentWriter; + sqlite_int64 iOpeningChildBlock; /* First child block in block "last". */ #ifndef NDEBUG sqlite_int64 iLastChildBlock; /* for consistency checks. */ #endif } InteriorWriter; @@ -3657,10 +3670,11 @@ InteriorBlock *block; assert( iHeight>0 ); CLEAR(pWriter); pWriter->iHeight = iHeight; + pWriter->iOpeningChildBlock = iChildBlock; #ifndef NDEBUG pWriter->iLastChildBlock = iChildBlock; #endif block = interiorBlockNew(iHeight, iChildBlock, pTerm, nTerm); pWriter->last = pWriter->first = block; @@ -3678,15 +3692,19 @@ #ifndef NDEBUG pWriter->iLastChildBlock++; #endif assert( pWriter->iLastChildBlock==iChildBlock ); - if( pWriter->last->data.nData+n+nTerm>INTERIOR_MAX ){ - /* Overflow to a new block. */ + /* Overflow to a new block if the new term makes the current block + ** too big, and the current block already has enough terms. + */ + if( pWriter->last->data.nData+n+nTerm>INTERIOR_MAX && + iChildBlock-pWriter->iOpeningChildBlock>INTERIOR_MIN_TERMS ){ pWriter->last->next = interiorBlockNew(pWriter->iHeight, iChildBlock, pTerm, nTerm); pWriter->last = pWriter->last->next; + pWriter->iOpeningChildBlock = iChildBlock; }else{ dataBufferAppend2(&pWriter->last->data, c, n, pTerm, nTerm); } }