SQLite

Check-in [64b7e34061]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Require a minimum fanout for interior nodes. This prevents cases where excessively large terms keep the tree from finding a single root. A downside is that this could result in large interior nodes in the presence of large terms, which may be prone to fragmentation, though if the nodes were smaller that would translate into more levels in the tree, which would also have that problem. (CVS 3510)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 64b7e3406134ac4891113b9bb432ad97504268bb
User & Date: shess 2006-11-13 21:00:55.000
Context
2006-11-13
21:09
Delta-encode docids. This is good for around 22% reduction in index size with DL_POSITIONS. It improves performance about 5%-6%. (CVS 3511) (check-in: 9b6d413d75 user: shess tags: trunk)
21:00
Require a minimum fanout for interior nodes. This prevents cases where excessively large terms keep the tree from finding a single root. A downside is that this could result in large interior nodes in the presence of large terms, which may be prone to fragmentation, though if the nodes were smaller that would translate into more levels in the tree, which would also have that problem. (CVS 3510) (check-in: 64b7e34061 user: shess tags: trunk)
20:15
Allow backing tables to be missing on dropping fts table. Fixes http://www.sqlite.org/cvstrac/tktview?tn=1992,35 . (CVS 3509) (check-in: 9628a61a6f user: shess tags: trunk)
Changes
Unified Diff Show Whitespace Changes Patch
Changes to ext/fts2/fts2.c.
155
156
157
158
159
160
161
162


163
164
165
166
167
168
169
170
171
** is encoded.  The subtree at iBlockid will contain all terms less
** than the first term encoded (or all terms if no term is encoded).
** Otherwise, for terms greater than or equal to pTerm[i] but less
** than pTerm[i+1], the subtree for that term will be rooted at
** iBlockid+i.
**
** New data is spilled to a new interior node at the same height when
** the current node exceeds INTERIOR_MAX bytes (default 2048).  The


** interior nodes at a given height are naturally tracked by interior
** nodes at height+1, and so on.
**
**
**** Segment directory ****
** The segment directory in table %_segdir stores meta-information for
** merging and deleting segments, and also the root node of the
** segment's tree.
**







|
>
>
|
|







155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
** is encoded.  The subtree at iBlockid will contain all terms less
** than the first term encoded (or all terms if no term is encoded).
** Otherwise, for terms greater than or equal to pTerm[i] but less
** than pTerm[i+1], the subtree for that term will be rooted at
** iBlockid+i.
**
** New data is spilled to a new interior node at the same height when
** the current node exceeds INTERIOR_MAX bytes (default 2048).
** INTERIOR_MIN_TERMS (default 7) keeps large terms from monopolizing
** interior nodes and making the tree too skinny.  The interior nodes
** at a given height are naturally tracked by interior nodes at
** height+1, and so on.
**
**
**** Segment directory ****
** The segment directory in table %_segdir stores meta-information for
** merging and deleting segments, and also the root node of the
** segment's tree.
**
3593
3594
3595
3596
3597
3598
3599










3600
3601
3602
3603
3604
3605
3606
/* InteriorWriter is used to collect terms and block references into
** interior nodes in %_segments.  See commentary at top of file for
** format.
*/

/* How large interior nodes can grow. */
#define INTERIOR_MAX 2048











/* ROOT_MAX controls how much data is stored inline in the segment
** directory.
*/
/* TODO(shess) Push ROOT_MAX down to whoever is writing things.  It's
** only here so that interiorWriterRootInfo() and leafWriterRootInfo()
** can both see it, but if the caller passed it in, we wouldn't even







>
>
>
>
>
>
>
>
>
>







3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
/* InteriorWriter is used to collect terms and block references into
** interior nodes in %_segments.  See commentary at top of file for
** format.
*/

/* How large interior nodes can grow. */
#define INTERIOR_MAX 2048

/* Minimum number of terms per interior node (except the root). This
** prevents large terms from making the tree too skinny - must be >0
** so that the tree always makes progress.  Note that the min tree
** fanout will be INTERIOR_MIN_TERMS+1.
*/
#define INTERIOR_MIN_TERMS 7
#if INTERIOR_MIN_TERMS<1
# error INTERIOR_MIN_TERMS must be greater than 0.
#endif

/* ROOT_MAX controls how much data is stored inline in the segment
** directory.
*/
/* TODO(shess) Push ROOT_MAX down to whoever is writing things.  It's
** only here so that interiorWriterRootInfo() and leafWriterRootInfo()
** can both see it, but if the caller passed it in, we wouldn't even
3638
3639
3640
3641
3642
3643
3644

3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661

3662
3663
3664
3665
3666
3667
3668
}

typedef struct InteriorWriter {
  int iHeight;                   /* from 0 at leaves. */
  InteriorBlock *first, *last;
  struct InteriorWriter *parentWriter;


#ifndef NDEBUG
  sqlite_int64 iLastChildBlock;  /* for consistency checks. */
#endif
} InteriorWriter;

/* Initialize an interior node where pTerm[nTerm] marks the leftmost
** term in the tree.  iChildBlock is the leftmost child block at the
** next level down the tree.
*/
static void interiorWriterInit(int iHeight, const char *pTerm, int nTerm,
                               sqlite_int64 iChildBlock,
                               InteriorWriter *pWriter){
  InteriorBlock *block;
  assert( iHeight>0 );
  CLEAR(pWriter);

  pWriter->iHeight = iHeight;

#ifndef NDEBUG
  pWriter->iLastChildBlock = iChildBlock;
#endif
  block = interiorBlockNew(iHeight, iChildBlock, pTerm, nTerm);
  pWriter->last = pWriter->first = block;
}








>

















>







3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
}

typedef struct InteriorWriter {
  int iHeight;                   /* from 0 at leaves. */
  InteriorBlock *first, *last;
  struct InteriorWriter *parentWriter;

  sqlite_int64 iOpeningChildBlock; /* First child block in block "last". */
#ifndef NDEBUG
  sqlite_int64 iLastChildBlock;  /* for consistency checks. */
#endif
} InteriorWriter;

/* Initialize an interior node where pTerm[nTerm] marks the leftmost
** term in the tree.  iChildBlock is the leftmost child block at the
** next level down the tree.
*/
static void interiorWriterInit(int iHeight, const char *pTerm, int nTerm,
                               sqlite_int64 iChildBlock,
                               InteriorWriter *pWriter){
  InteriorBlock *block;
  assert( iHeight>0 );
  CLEAR(pWriter);

  pWriter->iHeight = iHeight;
  pWriter->iOpeningChildBlock = iChildBlock;
#ifndef NDEBUG
  pWriter->iLastChildBlock = iChildBlock;
#endif
  block = interiorBlockNew(iHeight, iChildBlock, pTerm, nTerm);
  pWriter->last = pWriter->first = block;
}

3676
3677
3678
3679
3680
3681
3682



3683
3684

3685
3686
3687

3688
3689
3690
3691
3692
3693
3694
  int n = putVarint(c, nTerm);

#ifndef NDEBUG
  pWriter->iLastChildBlock++;
#endif
  assert( pWriter->iLastChildBlock==iChildBlock );




  if( pWriter->last->data.nData+n+nTerm>INTERIOR_MAX ){
    /* Overflow to a new block. */

    pWriter->last->next = interiorBlockNew(pWriter->iHeight, iChildBlock,
                                           pTerm, nTerm);
    pWriter->last = pWriter->last->next;

  }else{
    dataBufferAppend2(&pWriter->last->data, c, n, pTerm, nTerm);
  }
}

/* Free the space used by pWriter, including the linked-list of
** InteriorBlocks, and parentWriter, if present.







>
>
>
|
<
>



>







3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700

3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
  int n = putVarint(c, nTerm);

#ifndef NDEBUG
  pWriter->iLastChildBlock++;
#endif
  assert( pWriter->iLastChildBlock==iChildBlock );

  /* Overflow to a new block if the new term makes the current block
  ** too big, and the current block already has enough terms.
  */
  if( pWriter->last->data.nData+n+nTerm>INTERIOR_MAX &&

      iChildBlock-pWriter->iOpeningChildBlock>INTERIOR_MIN_TERMS ){
    pWriter->last->next = interiorBlockNew(pWriter->iHeight, iChildBlock,
                                           pTerm, nTerm);
    pWriter->last = pWriter->last->next;
    pWriter->iOpeningChildBlock = iChildBlock;
  }else{
    dataBufferAppend2(&pWriter->last->data, c, n, pTerm, nTerm);
  }
}

/* Free the space used by pWriter, including the linked-list of
** InteriorBlocks, and parentWriter, if present.