SQLite

Check-in [f30771d5c7]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Refactoring groundwork for coming work on interior nodes. Change LeafWriter to use empty data buffer (instead of empty term) to detect an empty block. Code to validate interior nodes. Moderate revisions to leaf-node and doclist validation. Recast leafWriterStep() in terms of LeafWriterStepMerge(). (CVS 3512)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: f30771d5c7ef2b502af95d81a18796b75271ada4
User & Date: shess 2006-11-17 21:12:16.000
Context
2006-11-18
00:12
Store minimal terms in interior nodes. Whenever there's a break between leaf nodes, instead of storing the entire leftmost term of the rightmost child, store only that portion of the leftmost term necessary to distinguish it from the rightmost term of the leftmost child. (CVS 3513) (check-in: f6e0b080dc user: shess tags: trunk)
2006-11-17
21:12
Refactoring groundwork for coming work on interior nodes. Change LeafWriter to use empty data buffer (instead of empty term) to detect an empty block. Code to validate interior nodes. Moderate revisions to leaf-node and doclist validation. Recast leafWriterStep() in terms of LeafWriterStepMerge(). (CVS 3512) (check-in: f30771d5c7 user: shess tags: trunk)
2006-11-13
21:09
Delta-encode docids. This is good for around 22% reduction in index size with DL_POSITIONS. It improves performance about 5%-6%. (CVS 3511) (check-in: 9b6d413d75 user: shess tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts2/fts2.c.
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
** information.  A DL_DOCIDS doclist omits both the position and
** offset information, becoming an array of varint-encoded docids.
**
** On-disk data is stored as type DL_DEFAULT, so we don't serialize
** the type.  Due to how deletion is implemented in the segmentation
** system, on-disk doclists MUST store at least positions.
**
** TODO(shess) Delta-encode docids.  This provides a 10% win versus
** DL_POSITIONS_OFFSETS on the first 100,000 documents of the Enron
** corpus, greater versus DL_POSITIONS.
**
**
**** Segment leaf nodes ****
** Segment leaf nodes store terms and doclists, ordered by term.  Leaf
** nodes are written using LeafWriter, and read using LeafReader (to
** iterate through a single leaf node's data) and LeavesReader (to
** iterate through a segment's entire leaf layer).  Leaf nodes have
** the format:







<
<
<
<







78
79
80
81
82
83
84




85
86
87
88
89
90
91
** information.  A DL_DOCIDS doclist omits both the position and
** offset information, becoming an array of varint-encoded docids.
**
** On-disk data is stored as type DL_DEFAULT, so we don't serialize
** the type.  Due to how deletion is implemented in the segmentation
** system, on-disk doclists MUST store at least positions.
**




**
**** Segment leaf nodes ****
** Segment leaf nodes store terms and doclists, ordered by term.  Leaf
** nodes are written using LeafWriter, and read using LeafReader (to
** iterate through a single leaf node's data) and LeavesReader (to
** iterate through a segment's entire leaf layer).  Leaf nodes have
** the format:
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
**
** dataBufferInit - create a buffer with given initial capacity.
** dataBufferReset - forget buffer's data, retaining capacity.
** dataBufferDestroy - free buffer's data.
** dataBufferExpand - expand capacity without adding data.
** dataBufferAppend - append data.
** dataBufferAppend2 - append two pieces of data at once.
** dataBufferAppendLenData - append a varint-encoded length plus data.
** dataBufferReplace - replace buffer's data.
*/
typedef struct DataBuffer {
  char *pData;          /* Pointer to malloc'ed buffer. */
  int nCapacity;        /* Size of pData buffer. */
  int nData;            /* End of data loaded into pData. */
} DataBuffer;







<







395
396
397
398
399
400
401

402
403
404
405
406
407
408
**
** dataBufferInit - create a buffer with given initial capacity.
** dataBufferReset - forget buffer's data, retaining capacity.
** dataBufferDestroy - free buffer's data.
** dataBufferExpand - expand capacity without adding data.
** dataBufferAppend - append data.
** dataBufferAppend2 - append two pieces of data at once.

** dataBufferReplace - replace buffer's data.
*/
typedef struct DataBuffer {
  char *pData;          /* Pointer to malloc'ed buffer. */
  int nCapacity;        /* Size of pData buffer. */
  int nData;            /* End of data loaded into pData. */
} DataBuffer;
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
  assert( nSource1>0 && pSource1!=NULL );
  assert( nSource2>0 && pSource2!=NULL );
  dataBufferExpand(pBuffer, nSource1+nSource2);
  memcpy(pBuffer->pData+pBuffer->nData, pSource1, nSource1);
  memcpy(pBuffer->pData+pBuffer->nData+nSource1, pSource2, nSource2);
  pBuffer->nData += nSource1+nSource2;
}
static void dataBufferAppendLenData(DataBuffer *pBuffer,
                                    const char *pSource, int nSource){
  char c[VARINT_MAX];
  int n = putVarint(c, nSource);
  dataBufferAppend2(pBuffer, c, n, pSource, nSource);
}
static void dataBufferReplace(DataBuffer *pBuffer,
                              const char *pSource, int nSource){
  dataBufferReset(pBuffer);
  dataBufferAppend(pBuffer, pSource, nSource);
}

/* StringBuffer is a null-terminated version of DataBuffer. */







<
<
<
<
<
<







444
445
446
447
448
449
450






451
452
453
454
455
456
457
  assert( nSource1>0 && pSource1!=NULL );
  assert( nSource2>0 && pSource2!=NULL );
  dataBufferExpand(pBuffer, nSource1+nSource2);
  memcpy(pBuffer->pData+pBuffer->nData, pSource1, nSource1);
  memcpy(pBuffer->pData+pBuffer->nData+nSource1, pSource2, nSource2);
  pBuffer->nData += nSource1+nSource2;
}






static void dataBufferReplace(DataBuffer *pBuffer,
                              const char *pSource, int nSource){
  dataBufferReset(pBuffer);
  dataBufferAppend(pBuffer, pSource, nSource);
}

/* StringBuffer is a null-terminated version of DataBuffer. */
645
646
647
648
649
650
651
652
653
654

655
656
657
658
659
660
661
662
663
}

#ifndef NDEBUG
/* Verify that the doclist can be validly decoded.  Also returns the
** last docid found because it's convenient in other assertions for
** DLWriter.
*/
static int docListValidate(DocListType iType, const char *pData, int nData,
                           sqlite_int64 *pLastDocid){
  sqlite_int64 iPrevDocid = 0;

  assert( pData!=0 );
  assert( nData!=0 );
  while( nData!=0 ){
    sqlite_int64 iDocidDelta;
    int n = getVarint(pData, &iDocidDelta);
    iPrevDocid += iDocidDelta;
    if( iType>DL_DOCIDS ){
      int iDummy;
      while( 1 ){







|
|

>

|







634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
}

#ifndef NDEBUG
/* Verify that the doclist can be validly decoded.  Also returns the
** last docid found because it's convenient in other assertions for
** DLWriter.
*/
static void docListValidate(DocListType iType, const char *pData, int nData,
                            sqlite_int64 *pLastDocid){
  sqlite_int64 iPrevDocid = 0;
  assert( nData>0 );
  assert( pData!=0 );
  assert( pData+nData>pData );
  while( nData!=0 ){
    sqlite_int64 iDocidDelta;
    int n = getVarint(pData, &iDocidDelta);
    iPrevDocid += iDocidDelta;
    if( iType>DL_DOCIDS ){
      int iDummy;
      while( 1 ){
673
674
675
676
677
678
679
680
681



682
683
684
685
686
687
688
      }
    }
    assert( n<=nData );
    pData += n;
    nData -= n;
  }
  if( pLastDocid ) *pLastDocid = iPrevDocid;
  return 1;
}



#endif

/*******************************************************************/
/* DLWriter is used to write doclist data to a DataBuffer.  DLWriter
** always appends to the buffer and does not own it.
**
** dlwInit - initialize to write a given type doclistto a buffer.







<

>
>
>







663
664
665
666
667
668
669

670
671
672
673
674
675
676
677
678
679
680
      }
    }
    assert( n<=nData );
    pData += n;
    nData -= n;
  }
  if( pLastDocid ) *pLastDocid = iPrevDocid;

}
#define ASSERT_VALID_DOCLIST(i, p, n, o) docListValidate(i, p, n, o)
#else
#define ASSERT_VALID_DOCLIST(i, p, n, o) assert( 1 )
#endif

/*******************************************************************/
/* DLWriter is used to write doclist data to a DataBuffer.  DLWriter
** always appends to the buffer and does not own it.
**
** dlwInit - initialize to write a given type doclistto a buffer.
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
  assert( nFirstOld<nData || (nFirstOld==nData && pWriter->iType==DL_DOCIDS) );
  nFirstNew = putVarint(c, iFirstDocid-pWriter->iPrevDocid);

  /* Verify that the incoming doclist is valid AND that it ends with
  ** the expected docid.  This is essential because we'll trust this
  ** docid in future delta-encoding.
  */
  assert( docListValidate(pWriter->iType, pData, nData, &iLastDocidDelta) );
  assert( iLastDocid==iFirstDocid-iDocid+iLastDocidDelta );

  /* Append recoded initial docid and everything else.  Rest of docids
  ** should have been delta-encoded from previous initial docid.
  */
  if( nFirstOld<nData ){
    dataBufferAppend2(pWriter->b, c, nFirstNew,







|







724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
  assert( nFirstOld<nData || (nFirstOld==nData && pWriter->iType==DL_DOCIDS) );
  nFirstNew = putVarint(c, iFirstDocid-pWriter->iPrevDocid);

  /* Verify that the incoming doclist is valid AND that it ends with
  ** the expected docid.  This is essential because we'll trust this
  ** docid in future delta-encoding.
  */
  ASSERT_VALID_DOCLIST(pWriter->iType, pData, nData, &iLastDocidDelta);
  assert( iLastDocid==iFirstDocid-iDocid+iLastDocidDelta );

  /* Append recoded initial docid and everything else.  Rest of docids
  ** should have been delta-encoded from previous initial docid.
  */
  if( nFirstOld<nData ){
    dataBufferAppend2(pWriter->b, c, nFirstNew,
3662
3663
3664
3665
3666
3667
3668











































3669
3670
3671
3672
3673
3674
3675
  n = putVarint(c, iHeight);
  n += putVarint(c+n, iChildBlock);
  dataBufferInit(&block->data, INTERIOR_MAX);
  dataBufferReplace(&block->data, c, n);

  return block;
}












































typedef struct InteriorWriter {
  int iHeight;                   /* from 0 at leaves. */
  InteriorBlock *first, *last;
  struct InteriorWriter *parentWriter;

  sqlite_int64 iOpeningChildBlock; /* First child block in block "last". */







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
  n = putVarint(c, iHeight);
  n += putVarint(c+n, iChildBlock);
  dataBufferInit(&block->data, INTERIOR_MAX);
  dataBufferReplace(&block->data, c, n);

  return block;
}

#ifndef NDEBUG
/* Verify that the data is readable as an interior node. */
static void interiorBlockValidate(InteriorBlock *pBlock){
  const char *pData = pBlock->data.pData;
  int nData = pBlock->data.nData;
  int n, iDummy;
  sqlite_int64 iBlockid;

  assert( nData>0 );
  assert( pData!=0 );
  assert( pData+nData>pData );

  /* Must lead with height of node as a varint(n), n>0 */
  n = getVarint32(pData, &iDummy);
  assert( n>0 );
  assert( iDummy>0 );
  assert( n<nData );
  pData += n;
  nData -= n;

  /* Must contain iBlockid. */
  n = getVarint(pData, &iBlockid);
  assert( n>0 );
  assert( n<=nData );
  pData += n;
  nData -= n;

  /* Zero or more terms of positive length */
  while( nData!=0 ){
    n = getVarint32(pData, &iDummy);
    assert( n>0 );
    assert( iDummy>0 );
    assert( n+iDummy>0);
    assert( n+iDummy<=nData );
    pData += n+iDummy;
    nData -= n+iDummy;
  }
}
#define ASSERT_VALID_INTERIOR_BLOCK(x) interiorBlockValidate(x)
#else
#define ASSERT_VALID_INTERIOR_BLOCK(x) assert( 1 )
#endif

typedef struct InteriorWriter {
  int iHeight;                   /* from 0 at leaves. */
  InteriorBlock *first, *last;
  struct InteriorWriter *parentWriter;

  sqlite_int64 iOpeningChildBlock; /* First child block in block "last". */
3692
3693
3694
3695
3696
3697
3698

3699
3700
3701
3702
3703
3704
3705
3706
3707
3708


3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726

3727
3728
3729
3730
3731
3732
3733
  pWriter->iHeight = iHeight;
  pWriter->iOpeningChildBlock = iChildBlock;
#ifndef NDEBUG
  pWriter->iLastChildBlock = iChildBlock;
#endif
  block = interiorBlockNew(iHeight, iChildBlock, pTerm, nTerm);
  pWriter->last = pWriter->first = block;

}

/* Append the child node rooted at iChildBlock to the interior node,
** with pTerm[nTerm] as the leftmost term in iChildBlock's subtree.
*/
static void interiorWriterAppend(InteriorWriter *pWriter,
                                 const char *pTerm, int nTerm,
                                 sqlite_int64 iChildBlock){
  char c[VARINT_MAX+VARINT_MAX];
  int n = putVarint(c, nTerm);



#ifndef NDEBUG
  pWriter->iLastChildBlock++;
#endif
  assert( pWriter->iLastChildBlock==iChildBlock );

  /* Overflow to a new block if the new term makes the current block
  ** too big, and the current block already has enough terms.
  */
  if( pWriter->last->data.nData+n+nTerm>INTERIOR_MAX &&
      iChildBlock-pWriter->iOpeningChildBlock>INTERIOR_MIN_TERMS ){
    pWriter->last->next = interiorBlockNew(pWriter->iHeight, iChildBlock,
                                           pTerm, nTerm);
    pWriter->last = pWriter->last->next;
    pWriter->iOpeningChildBlock = iChildBlock;
  }else{
    dataBufferAppend2(&pWriter->last->data, c, n, pTerm, nTerm);
  }

}

/* Free the space used by pWriter, including the linked-list of
** InteriorBlocks, and parentWriter, if present.
*/
static int interiorWriterDestroy(InteriorWriter *pWriter){
  InteriorBlock *block = pWriter->first;







>










>
>


















>







3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
  pWriter->iHeight = iHeight;
  pWriter->iOpeningChildBlock = iChildBlock;
#ifndef NDEBUG
  pWriter->iLastChildBlock = iChildBlock;
#endif
  block = interiorBlockNew(iHeight, iChildBlock, pTerm, nTerm);
  pWriter->last = pWriter->first = block;
  ASSERT_VALID_INTERIOR_BLOCK(pWriter->last);
}

/* Append the child node rooted at iChildBlock to the interior node,
** with pTerm[nTerm] as the leftmost term in iChildBlock's subtree.
*/
static void interiorWriterAppend(InteriorWriter *pWriter,
                                 const char *pTerm, int nTerm,
                                 sqlite_int64 iChildBlock){
  char c[VARINT_MAX+VARINT_MAX];
  int n = putVarint(c, nTerm);

  ASSERT_VALID_INTERIOR_BLOCK(pWriter->last);

#ifndef NDEBUG
  pWriter->iLastChildBlock++;
#endif
  assert( pWriter->iLastChildBlock==iChildBlock );

  /* Overflow to a new block if the new term makes the current block
  ** too big, and the current block already has enough terms.
  */
  if( pWriter->last->data.nData+n+nTerm>INTERIOR_MAX &&
      iChildBlock-pWriter->iOpeningChildBlock>INTERIOR_MIN_TERMS ){
    pWriter->last->next = interiorBlockNew(pWriter->iHeight, iChildBlock,
                                           pTerm, nTerm);
    pWriter->last = pWriter->last->next;
    pWriter->iOpeningChildBlock = iChildBlock;
  }else{
    dataBufferAppend2(&pWriter->last->data, c, n, pTerm, nTerm);
  }
  ASSERT_VALID_INTERIOR_BLOCK(pWriter->last);
}

/* Free the space used by pWriter, including the linked-list of
** InteriorBlocks, and parentWriter, if present.
*/
static int interiorWriterDestroy(InteriorWriter *pWriter){
  InteriorBlock *block = pWriter->first;
3765
3766
3767
3768
3769
3770
3771

3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784

3785
3786
3787
3788
3789
3790
3791
    *pnRootInfo = block->data.nData;
    return SQLITE_OK;
  }

  /* Flush the first block to %_segments, and create a new level of
  ** interior node.
  */

  rc = block_insert(v, block->data.pData, block->data.nData, &iBlockid);
  if( rc!=SQLITE_OK ) return rc;
  *piEndBlockid = iBlockid;

  pWriter->parentWriter = malloc(sizeof(*pWriter->parentWriter));
  interiorWriterInit(pWriter->iHeight+1,
                     block->term.pData, block->term.nData,
                     iBlockid, pWriter->parentWriter);

  /* Flush additional blocks and append to the higher interior
  ** node.
  */
  for(block=block->next; block!=NULL; block=block->next){

    rc = block_insert(v, block->data.pData, block->data.nData, &iBlockid);
    if( rc!=SQLITE_OK ) return rc;
    *piEndBlockid = iBlockid;

    interiorWriterAppend(pWriter->parentWriter,
                         block->term.pData, block->term.nData, iBlockid);
  }







>













>







3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
    *pnRootInfo = block->data.nData;
    return SQLITE_OK;
  }

  /* Flush the first block to %_segments, and create a new level of
  ** interior node.
  */
  ASSERT_VALID_INTERIOR_BLOCK(block);
  rc = block_insert(v, block->data.pData, block->data.nData, &iBlockid);
  if( rc!=SQLITE_OK ) return rc;
  *piEndBlockid = iBlockid;

  pWriter->parentWriter = malloc(sizeof(*pWriter->parentWriter));
  interiorWriterInit(pWriter->iHeight+1,
                     block->term.pData, block->term.nData,
                     iBlockid, pWriter->parentWriter);

  /* Flush additional blocks and append to the higher interior
  ** node.
  */
  for(block=block->next; block!=NULL; block=block->next){
    ASSERT_VALID_INTERIOR_BLOCK(block);
    rc = block_insert(v, block->data.pData, block->data.nData, &iBlockid);
    if( rc!=SQLITE_OK ) return rc;
    *piEndBlockid = iBlockid;

    interiorWriterAppend(pWriter->parentWriter,
                         block->term.pData, block->term.nData, iBlockid);
  }
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947


3948
3949
3950
3951
3952
3953

3954
3955
3956
3957
3958
3959



3960
3961
3962
3963
3964
3965



3966
3967
3968
3969
3970
3971
3972
3973





3974



3975
3976
3977
3978
3979



3980
3981
3982
3983
3984
3985
3986



3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
  DataBuffer data;                /* encoding buffer */

  InteriorWriter parentWriter;    /* if we overflow */
  int has_parent;
} LeafWriter;

static void leafWriterInit(int iLevel, int idx, LeafWriter *pWriter){
  char c[VARINT_MAX];
  int n;

  CLEAR(pWriter);
  pWriter->iLevel = iLevel;
  pWriter->idx = idx;

  dataBufferInit(&pWriter->term, 32);

  /* Start out with a reasonably sized block, though it can grow. */
  dataBufferInit(&pWriter->data, LEAF_MAX);
  n = putVarint(c, 0);
  dataBufferReplace(&pWriter->data, c, n);
}

#ifndef NDEBUG
/* Verify that the data is readable as a leaf node. */
static int leafNodeValidate(const char *pData, int nData){
  int n, iDummy;



  assert( pData!=0 );
  assert( nData!=0 );

  /* Must lead with a varint(0) */
  n = getVarint32(pData, &iDummy);
  assert( iDummy==0 );

  if( nData==n ) return 1;
  pData += n;
  nData -= n;

  /* Leading term length and data must fit in buffer. */
  n = getVarint32(pData, &iDummy);



  assert( n+iDummy<nData );
  pData += n+iDummy;
  nData -= n+iDummy;

  /* Leading term's doclist length and data must fit. */
  n = getVarint32(pData, &iDummy);



  assert( n+iDummy<=nData );
  assert( docListValidate(DL_DEFAULT, pData+n, iDummy, NULL) );
  pData += n+iDummy;
  nData -= n+iDummy;

  /* Verify that trailing terms and doclists also are readable. */
  while( nData!=0 ){
    n = getVarint32(pData, &iDummy);





    n += getVarint32(pData+n, &iDummy);



    assert( n+iDummy<nData );
    pData += n+iDummy;
    nData -= n+iDummy;

    n = getVarint32(pData, &iDummy);



    assert( n+iDummy<=nData );
    assert( docListValidate(DL_DEFAULT, pData+n, iDummy, NULL) );
    pData += n+iDummy;
    nData -= n+iDummy;
  }
  return 1;
}



#endif

/* Flush the current leaf node to %_segments, and adding the resulting
** blockid and the starting term to the interior node which will
** contain it.
*/
static int leafWriterInternalFlush(fulltext_vtab *v, LeafWriter *pWriter,
                                   int iData, int nData){
  sqlite_int64 iBlockid = 0;
  const char *pStartingTerm;
  int nStartingTerm, rc, n;

  /* Must have the leading varint(0) flag, plus at least some
  ** valid-looking data.
  */
  assert( nData>2 );
  assert( iData>=0 );
  assert( iData+nData<=pWriter->data.nData );
  assert( leafNodeValidate(pWriter->data.pData+iData, nData) );

  rc = block_insert(v, pWriter->data.pData+iData, nData, &iBlockid);
  if( rc!=SQLITE_OK ) return rc;
  assert( iBlockid!=0 );

  /* Reconstruct the first term in the leaf for purposes of building
  ** the interior node.







<
<
<








<
<




|


>
>

|




>
|





>
>
>






>
>
>

|






>
>
>
>
>
|
>
>
>





>
>
>

|



<

>
>
>


















|







3962
3963
3964
3965
3966
3967
3968



3969
3970
3971
3972
3973
3974
3975
3976


3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040

4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
  DataBuffer data;                /* encoding buffer */

  InteriorWriter parentWriter;    /* if we overflow */
  int has_parent;
} LeafWriter;

static void leafWriterInit(int iLevel, int idx, LeafWriter *pWriter){



  CLEAR(pWriter);
  pWriter->iLevel = iLevel;
  pWriter->idx = idx;

  dataBufferInit(&pWriter->term, 32);

  /* Start out with a reasonably sized block, though it can grow. */
  dataBufferInit(&pWriter->data, LEAF_MAX);


}

#ifndef NDEBUG
/* Verify that the data is readable as a leaf node. */
static void leafNodeValidate(const char *pData, int nData){
  int n, iDummy;

  if( nData==0 ) return;
  assert( nData>0 );
  assert( pData!=0 );
  assert( pData+nData>pData );

  /* Must lead with a varint(0) */
  n = getVarint32(pData, &iDummy);
  assert( iDummy==0 );
  assert( n>0 );
  assert( n<nData );
  pData += n;
  nData -= n;

  /* Leading term length and data must fit in buffer. */
  n = getVarint32(pData, &iDummy);
  assert( n>0 );
  assert( iDummy>0 );
  assert( n+iDummy>0 );
  assert( n+iDummy<nData );
  pData += n+iDummy;
  nData -= n+iDummy;

  /* Leading term's doclist length and data must fit. */
  n = getVarint32(pData, &iDummy);
  assert( n>0 );
  assert( iDummy>0 );
  assert( n+iDummy>0 );
  assert( n+iDummy<=nData );
  ASSERT_VALID_DOCLIST(DL_DEFAULT, pData+n, iDummy, NULL);
  pData += n+iDummy;
  nData -= n+iDummy;

  /* Verify that trailing terms and doclists also are readable. */
  while( nData!=0 ){
    n = getVarint32(pData, &iDummy);
    assert( n>0 );
    assert( iDummy>=0 );
    assert( n<nData );
    pData += n;
    nData -= n;
    n = getVarint32(pData, &iDummy);
    assert( n>0 );
    assert( iDummy>0 );
    assert( n+iDummy>0 );
    assert( n+iDummy<nData );
    pData += n+iDummy;
    nData -= n+iDummy;

    n = getVarint32(pData, &iDummy);
    assert( n>0 );
    assert( iDummy>0 );
    assert( n+iDummy>0 );
    assert( n+iDummy<=nData );
    ASSERT_VALID_DOCLIST(DL_DEFAULT, pData+n, iDummy, NULL);
    pData += n+iDummy;
    nData -= n+iDummy;
  }

}
#define ASSERT_VALID_LEAF_NODE(p, n) leafNodeValidate(p, n)
#else
#define ASSERT_VALID_LEAF_NODE(p, n) assert( 1 )
#endif

/* Flush the current leaf node to %_segments, and adding the resulting
** blockid and the starting term to the interior node which will
** contain it.
*/
static int leafWriterInternalFlush(fulltext_vtab *v, LeafWriter *pWriter,
                                   int iData, int nData){
  sqlite_int64 iBlockid = 0;
  const char *pStartingTerm;
  int nStartingTerm, rc, n;

  /* Must have the leading varint(0) flag, plus at least some
  ** valid-looking data.
  */
  assert( nData>2 );
  assert( iData>=0 );
  assert( iData+nData<=pWriter->data.nData );
  ASSERT_VALID_LEAF_NODE(pWriter->data.pData+iData, nData);

  rc = block_insert(v, pWriter->data.pData+iData, nData, &iBlockid);
  if( rc!=SQLITE_OK ) return rc;
  assert( iBlockid!=0 );

  /* Reconstruct the first term in the leaf for purposes of building
  ** the interior node.
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
  return SQLITE_OK;
}
static int leafWriterFlush(fulltext_vtab *v, LeafWriter *pWriter){
  int rc = leafWriterInternalFlush(v, pWriter, 0, pWriter->data.nData);
  if( rc!=SQLITE_OK ) return rc;

  /* Re-initialize the output buffer. */
  pWriter->data.nData = putVarint(pWriter->data.pData, 0);
  dataBufferReset(&pWriter->term);

  return SQLITE_OK;
}

/* Fetch the root info for the segment.  If the entire leaf fits
** within ROOT_MAX, then it will be returned directly, otherwise it
** will be flushed and the root info will be returned from the







<
|







4093
4094
4095
4096
4097
4098
4099

4100
4101
4102
4103
4104
4105
4106
4107
  return SQLITE_OK;
}
static int leafWriterFlush(fulltext_vtab *v, LeafWriter *pWriter){
  int rc = leafWriterInternalFlush(v, pWriter, 0, pWriter->data.nData);
  if( rc!=SQLITE_OK ) return rc;

  /* Re-initialize the output buffer. */

  dataBufferReset(&pWriter->data);

  return SQLITE_OK;
}

/* Fetch the root info for the segment.  If the entire leaf fits
** within ROOT_MAX, then it will be returned directly, otherwise it
** will be flushed and the root info will be returned from the
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
    *ppRootInfo = pWriter->data.pData;
    *pnRootInfo = pWriter->data.nData;
    *piEndBlockid = 0;
    return SQLITE_OK;
  }

  /* Flush remaining leaf data. */
  if( pWriter->data.nData>1 ){
    int rc = leafWriterFlush(v, pWriter);
    if( rc!=SQLITE_OK ) return rc;
  }

  /* We must have flushed a leaf at some point. */
  assert( pWriter->has_parent );








|







4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
    *ppRootInfo = pWriter->data.pData;
    *pnRootInfo = pWriter->data.nData;
    *piEndBlockid = 0;
    return SQLITE_OK;
  }

  /* Flush remaining leaf data. */
  if( pWriter->data.nData>0 ){
    int rc = leafWriterFlush(v, pWriter);
    if( rc!=SQLITE_OK ) return rc;
  }

  /* We must have flushed a leaf at some point. */
  assert( pWriter->has_parent );

4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114



4115
4116

4117
4118
4119
4120

4121
4122
4123
4124
4125
4126
4127
4128
4129
4130

4131
4132
4133


4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
  char *pRootInfo;
  int rc, nRootInfo;

  rc = leafWriterRootInfo(v, pWriter, &pRootInfo, &nRootInfo, &iEndBlockid);
  if( rc!=SQLITE_OK ) return rc;

  /* Don't bother storing an entirely empty segment. */
  if( iEndBlockid==0 && nRootInfo==1 ) return SQLITE_OK;

  return segdir_set(v, pWriter->iLevel, pWriter->idx,
                    pWriter->iStartBlockid, pWriter->iEndBlockid,
                    iEndBlockid, pRootInfo, nRootInfo);
}

static void leafWriterDestroy(LeafWriter *pWriter){
  if( pWriter->has_parent ) interiorWriterDestroy(&pWriter->parentWriter);
  dataBufferDestroy(&pWriter->term);
  dataBufferDestroy(&pWriter->data);
}

/* Encode a term into the leafWriter, delta-encoding as appropriate. */
static void leafWriterEncodeTerm(LeafWriter *pWriter,
                                 const char *pTerm, int nTerm){



  if( pWriter->term.nData==0 ){
    /* Encode the entire leading term as:

    **  varint(nTerm)
    **  char pTerm[nTerm]
    */
    assert( pWriter->data.nData==1 );

    dataBufferAppendLenData(&pWriter->data, pTerm, nTerm);
  }else{
    /* Delta-encode the term as:
    **  varint(nPrefix)
    **  varint(nSuffix)
    **  char pTermSuffix[nSuffix]
    */
    char c[VARINT_MAX+VARINT_MAX];
    int n, nPrefix = 0;


    while( nPrefix<nTerm && nPrefix<pWriter->term.nData &&
           pTerm[nPrefix]==pWriter->term.pData[nPrefix] ){
      nPrefix++;


    }

    n = putVarint(c, nPrefix);
    n += putVarint(c+n, nTerm-nPrefix);
    dataBufferAppend2(&pWriter->data, c, n, pTerm+nPrefix, nTerm-nPrefix);
  }
  dataBufferReplace(&pWriter->term, pTerm, nTerm);
}

/* Push pTerm[nTerm] along with the doclist data to the leaf layer of
** %_segments.
*/
/* TODO(shess) Revise writeZeroSegment() so that doclists are
** constructed directly in pWriter->data.  That implies refactoring
** leafWriterStep() and leafWriterStepMerge() to share more code.
*/
static int leafWriterStep(fulltext_vtab *v, LeafWriter *pWriter,
                          const char *pTerm, int nTerm,
                          const char *pData, int nData){
  int rc;

  /* Flush existing data if this item won't fit well. */
  if( pWriter->data.nData>1 &&
      (nData+nTerm>STANDALONE_MIN ||
       pWriter->data.nData+nData+nTerm>LEAF_MAX) ){
    rc = leafWriterFlush(v, pWriter);
    if( rc!=SQLITE_OK ) return rc;
  }

  leafWriterEncodeTerm(pWriter, pTerm, nTerm);

  /* Encode the doclist as:
  **  varint(nDoclist)
  **  char pDoclist[nDoclist]
  */
  dataBufferAppendLenData(&pWriter->data, pData, nData);

  /* Flush standalone blocks right out */
  if( nData+nTerm>STANDALONE_MIN ){
    rc = leafWriterFlush(v, pWriter);
    if( rc!=SQLITE_OK ) return rc;
  }
  assert( leafNodeValidate(pWriter->data.pData, pWriter->data.nData) );

  return SQLITE_OK;
}

/* Used to avoid a memmove when a large amount of doclist data is in
** the buffer.  This constructs a node and term header before
** iDoclistData and flushes the resulting complete node using
** leafWriterInternalFlush().
*/
static int leafWriterInlineFlush(fulltext_vtab *v, LeafWriter *pWriter,
                                 const char *pTerm, int nTerm,







|















>
>
>
|
|
>



|
>
|






<
|

>
|


>
>









<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189

4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206






































4207
4208
4209
4210
4211
4212
4213
  char *pRootInfo;
  int rc, nRootInfo;

  rc = leafWriterRootInfo(v, pWriter, &pRootInfo, &nRootInfo, &iEndBlockid);
  if( rc!=SQLITE_OK ) return rc;

  /* Don't bother storing an entirely empty segment. */
  if( iEndBlockid==0 && nRootInfo==0 ) return SQLITE_OK;

  return segdir_set(v, pWriter->iLevel, pWriter->idx,
                    pWriter->iStartBlockid, pWriter->iEndBlockid,
                    iEndBlockid, pRootInfo, nRootInfo);
}

static void leafWriterDestroy(LeafWriter *pWriter){
  if( pWriter->has_parent ) interiorWriterDestroy(&pWriter->parentWriter);
  dataBufferDestroy(&pWriter->term);
  dataBufferDestroy(&pWriter->data);
}

/* Encode a term into the leafWriter, delta-encoding as appropriate. */
static void leafWriterEncodeTerm(LeafWriter *pWriter,
                                 const char *pTerm, int nTerm){
  char c[VARINT_MAX+VARINT_MAX];
  int n;

  if( pWriter->data.nData==0 ){
    /* Encode the node header and leading term as:
    **  varint(0)
    **  varint(nTerm)
    **  char pTerm[nTerm]
    */
    n = putVarint(c, '\0');
    n += putVarint(c+n, nTerm);
    dataBufferAppend2(&pWriter->data, c, n, pTerm, nTerm);
  }else{
    /* Delta-encode the term as:
    **  varint(nPrefix)
    **  varint(nSuffix)
    **  char pTermSuffix[nSuffix]
    */

    int nPrefix = 0;

    assert( nTerm>0 );
    while( nPrefix<pWriter->term.nData &&
           pTerm[nPrefix]==pWriter->term.pData[nPrefix] ){
      nPrefix++;
      /* Failing this implies that the terms weren't in order. */
      assert( nPrefix<nTerm );
    }

    n = putVarint(c, nPrefix);
    n += putVarint(c+n, nTerm-nPrefix);
    dataBufferAppend2(&pWriter->data, c, n, pTerm+nPrefix, nTerm-nPrefix);
  }
  dataBufferReplace(&pWriter->term, pTerm, nTerm);
}







































/* Used to avoid a memmove when a large amount of doclist data is in
** the buffer.  This constructs a node and term header before
** iDoclistData and flushes the resulting complete node using
** leafWriterInternalFlush().
*/
static int leafWriterInlineFlush(fulltext_vtab *v, LeafWriter *pWriter,
                                 const char *pTerm, int nTerm,
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
static int leafWriterStepMerge(fulltext_vtab *v, LeafWriter *pWriter,
                               const char *pTerm, int nTerm,
                               DLReader *pReaders, int nReaders){
  char c[VARINT_MAX+VARINT_MAX];
  int iTermData = pWriter->data.nData, iDoclistData;
  int i, nData, n, nActualData, nActual, rc;

  assert( leafNodeValidate(pWriter->data.pData, pWriter->data.nData) );
  leafWriterEncodeTerm(pWriter, pTerm, nTerm);

  iDoclistData = pWriter->data.nData;

  /* Estimate the length of the merged doclist so we can leave space
  ** to encode it.
  */
  for(i=0, nData=0; i<nReaders; i++){
    nData += dlrAllDataBytes(&pReaders[i]);
  }
  n = putVarint(c, nData);
  dataBufferAppend(&pWriter->data, c, n);

  docListMerge(&pWriter->data, pReaders, nReaders);
  assert( docListValidate(DL_DEFAULT,
                          pWriter->data.pData+iDoclistData+n,
                          pWriter->data.nData-iDoclistData-n, NULL) );

  /* The actual amount of doclist data at this point could be smaller
  ** than the length we encoded.  Additionally, the space required to
  ** encode this length could be smaller.  For small doclists, this is
  ** not a big deal, we can just use memmove() to adjust things.
  */
  nActualData = pWriter->data.nData-(iDoclistData+n);







|














|
|
|







4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
static int leafWriterStepMerge(fulltext_vtab *v, LeafWriter *pWriter,
                               const char *pTerm, int nTerm,
                               DLReader *pReaders, int nReaders){
  char c[VARINT_MAX+VARINT_MAX];
  int iTermData = pWriter->data.nData, iDoclistData;
  int i, nData, n, nActualData, nActual, rc;

  ASSERT_VALID_LEAF_NODE(pWriter->data.pData, pWriter->data.nData);
  leafWriterEncodeTerm(pWriter, pTerm, nTerm);

  iDoclistData = pWriter->data.nData;

  /* Estimate the length of the merged doclist so we can leave space
  ** to encode it.
  */
  for(i=0, nData=0; i<nReaders; i++){
    nData += dlrAllDataBytes(&pReaders[i]);
  }
  n = putVarint(c, nData);
  dataBufferAppend(&pWriter->data, c, n);

  docListMerge(&pWriter->data, pReaders, nReaders);
  ASSERT_VALID_DOCLIST(DL_DEFAULT,
                       pWriter->data.pData+iDoclistData+n,
                       pWriter->data.nData-iDoclistData-n, NULL);

  /* The actual amount of doclist data at this point could be smaller
  ** than the length we encoded.  Additionally, the space required to
  ** encode this length could be smaller.  For small doclists, this is
  ** not a big deal, we can just use memmove() to adjust things.
  */
  nActualData = pWriter->data.nData-(iDoclistData+n);
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272

4273
4274
4275
4276
4277
4278
4279
  /* TODO(shess) This test matches leafWriterStep(), which does this
  ** test before it knows the cost to varint-encode the term and
  ** doclist lengths.  At some point, change to
  ** pWriter->data.nData-iTermData>STANDALONE_MIN.
  */
  if( nTerm+nActualData>STANDALONE_MIN ){
    /* Push leaf node from before this term. */
    if( iTermData>1 ){
      rc = leafWriterInternalFlush(v, pWriter, 0, iTermData);
      if( rc!=SQLITE_OK ) return rc;
    }

    /* Fix the encoded doclist length. */
    iDoclistData += n - nActual;
    memcpy(pWriter->data.pData+iDoclistData, c, nActual);

    /* Push the standalone leaf node. */
    rc = leafWriterInlineFlush(v, pWriter, pTerm, nTerm, iDoclistData);
    if( rc!=SQLITE_OK ) return rc;

    /* Leave the node empty. */
    pWriter->data.nData = putVarint(pWriter->data.pData, 0);
    dataBufferReset(&pWriter->term);

    return rc;
  }

  /* At this point, we know that the doclist was small, so do the
  ** memmove if indicated.
  */
  if( nActual<n ){







|













<
|
>







4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296

4297
4298
4299
4300
4301
4302
4303
4304
4305
  /* TODO(shess) This test matches leafWriterStep(), which does this
  ** test before it knows the cost to varint-encode the term and
  ** doclist lengths.  At some point, change to
  ** pWriter->data.nData-iTermData>STANDALONE_MIN.
  */
  if( nTerm+nActualData>STANDALONE_MIN ){
    /* Push leaf node from before this term. */
    if( iTermData>0 ){
      rc = leafWriterInternalFlush(v, pWriter, 0, iTermData);
      if( rc!=SQLITE_OK ) return rc;
    }

    /* Fix the encoded doclist length. */
    iDoclistData += n - nActual;
    memcpy(pWriter->data.pData+iDoclistData, c, nActual);

    /* Push the standalone leaf node. */
    rc = leafWriterInlineFlush(v, pWriter, pTerm, nTerm, iDoclistData);
    if( rc!=SQLITE_OK ) return rc;

    /* Leave the node empty. */

    dataBufferReset(&pWriter->data);

    return rc;
  }

  /* At this point, we know that the doclist was small, so do the
  ** memmove if indicated.
  */
  if( nActual<n ){
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323



















4324
4325
4326
4327
4328
4329
4330
    assert( 2*STANDALONE_MIN<=LEAF_MAX );
    assert( n+pWriter->data.nData-iDoclistData<iDoclistData );
    memcpy(pWriter->data.pData+n,
           pWriter->data.pData+iDoclistData,
           pWriter->data.nData-iDoclistData);
    pWriter->data.nData -= iDoclistData-n;
  }
  assert( leafNodeValidate(pWriter->data.pData, pWriter->data.nData) );

  return SQLITE_OK;
}





















/****************************************************************/
/* LeafReader is used to iterate over an individual leaf node. */
typedef struct LeafReader {
  DataBuffer term;          /* copy of current term. */








|



>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
    assert( 2*STANDALONE_MIN<=LEAF_MAX );
    assert( n+pWriter->data.nData-iDoclistData<iDoclistData );
    memcpy(pWriter->data.pData+n,
           pWriter->data.pData+iDoclistData,
           pWriter->data.nData-iDoclistData);
    pWriter->data.nData -= iDoclistData-n;
  }
  ASSERT_VALID_LEAF_NODE(pWriter->data.pData, pWriter->data.nData);

  return SQLITE_OK;
}

/* Push pTerm[nTerm] along with the doclist data to the leaf layer of
** %_segments.
*/
/* TODO(shess) Revise writeZeroSegment() so that doclists are
** constructed directly in pWriter->data.
*/
static int leafWriterStep(fulltext_vtab *v, LeafWriter *pWriter,
                          const char *pTerm, int nTerm,
                          const char *pData, int nData){
  int rc;
  DLReader reader;

  dlrInit(&reader, DL_DEFAULT, pData, nData);
  rc = leafWriterStepMerge(v, pWriter, pTerm, nTerm, &reader, 1);
  dlrDestroy(&reader);

  return rc;
}


/****************************************************************/
/* LeafReader is used to iterate over an individual leaf node. */
typedef struct LeafReader {
  DataBuffer term;          /* copy of current term. */