SQLite

Check-in [7635c68018]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix some code and test coverage issues in fts5_index.c.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | fts5
Files: files | file ages | folders
SHA1: 7635c68018ce1656a1c5d6eebaf8f3a8e8839b59
User & Date: dan 2016-03-29 21:19:04.017
Context
2016-03-29
21:19
Fix some code and test coverage issues in fts5_index.c. (Leaf check-in: 7635c68018 user: dan tags: fts5)
2016-03-28
20:13
Add further tests for savepoint rollback. Fix various code issues and add missing comments in fts5_index.c. (check-in: a805c6f7ea user: dan tags: fts5)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts5/fts5_index.c.
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
#define FTS5_DATA_PADDING 20

typedef struct Fts5Data Fts5Data;
typedef struct Fts5DlidxIter Fts5DlidxIter;
typedef struct Fts5DlidxLvl Fts5DlidxLvl;
typedef struct Fts5DlidxWriter Fts5DlidxWriter;
typedef struct Fts5Iter Fts5Iter;
typedef struct Fts5PageWriter Fts5PageWriter;
typedef struct Fts5SegIter Fts5SegIter;
typedef struct Fts5DoclistIter Fts5DoclistIter;
typedef struct Fts5SegWriter Fts5SegWriter;
typedef struct Fts5Structure Fts5Structure;
typedef struct Fts5StructureLevel Fts5StructureLevel;
typedef struct Fts5StructureSegment Fts5StructureSegment;








<







262
263
264
265
266
267
268

269
270
271
272
273
274
275
#define FTS5_DATA_PADDING 20

typedef struct Fts5Data Fts5Data;
typedef struct Fts5DlidxIter Fts5DlidxIter;
typedef struct Fts5DlidxLvl Fts5DlidxLvl;
typedef struct Fts5DlidxWriter Fts5DlidxWriter;
typedef struct Fts5Iter Fts5Iter;

typedef struct Fts5SegIter Fts5SegIter;
typedef struct Fts5DoclistIter Fts5DoclistIter;
typedef struct Fts5SegWriter Fts5SegWriter;
typedef struct Fts5Structure Fts5Structure;
typedef struct Fts5StructureLevel Fts5StructureLevel;
typedef struct Fts5StructureSegment Fts5StructureSegment;

352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373





374
375
376
377
378
379
380
381
382
383
384
385
386
  int nLevel;                     /* Number of levels in this index */
  Fts5StructureLevel aLevel[1];   /* Array of nLevel level objects */
};

/*
** An object of type Fts5SegWriter is used to write to segments.
*/
struct Fts5PageWriter {
  int pgno;                       /* Page number for this page */
  int iPrevPgidx;                 /* Previous value written into pgidx */
  Fts5Buffer buf;                 /* Buffer containing leaf data */
  Fts5Buffer pgidx;               /* Buffer containing page-index */
  Fts5Buffer term;                /* Buffer containing previous term on page */
};
struct Fts5DlidxWriter {
  int pgno;                       /* Page number for this page */
  int bPrevValid;                 /* True if iPrev is valid */
  i64 iPrev;                      /* Previous rowid value written to page */
  Fts5Buffer buf;                 /* Buffer containing page data */
};
struct Fts5SegWriter {
  int iSegid;                     /* Segid to write to */





  Fts5PageWriter writer;          /* PageWriter object */
  i64 iPrevRowid;                 /* Previous rowid written to current leaf */
  u8 bFirstRowidInDoclist;        /* True if next rowid is first in doclist */
  u8 bFirstRowidInPage;           /* True if next rowid is first in page */
  /* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */
  u8 bFirstTermInPage;            /* True if next term will be first in leaf */
  int nLeafWritten;               /* Number of leaf pages written */
  int nEmpty;                     /* Number of contiguous term-less nodes */

  int nDlidx;                     /* Allocated size of aDlidx[] array */
  Fts5DlidxWriter *aDlidx;        /* Array of Fts5DlidxWriter objects */

  /* Values to insert into the %_idx table */







<
<
<
<
<
<
<








>
>
>
>
>
|



<
<







351
352
353
354
355
356
357







358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374


375
376
377
378
379
380
381
  int nLevel;                     /* Number of levels in this index */
  Fts5StructureLevel aLevel[1];   /* Array of nLevel level objects */
};

/*
** An object of type Fts5SegWriter is used to write to segments.
*/







struct Fts5DlidxWriter {
  int pgno;                       /* Page number for this page */
  int bPrevValid;                 /* True if iPrev is valid */
  i64 iPrev;                      /* Previous rowid value written to page */
  Fts5Buffer buf;                 /* Buffer containing page data */
};
struct Fts5SegWriter {
  int iSegid;                     /* Segid to write to */
  int pgno;                       /* Page number for current leaf page */
  int iPrevPgidx;                 /* Previous value written into pgidx */
  Fts5Buffer buf;                 /* Buffer of current leaf page data */
  Fts5Buffer pgidx;               /* Buffer of current leaf page-index */
  Fts5Buffer term;                /* Buffer containing previous term on leaf */

  i64 iPrevRowid;                 /* Previous rowid written to current leaf */
  u8 bFirstRowidInDoclist;        /* True if next rowid is first in doclist */
  u8 bFirstRowidInPage;           /* True if next rowid is first in page */


  int nLeafWritten;               /* Number of leaf pages written */
  int nEmpty;                     /* Number of contiguous term-less nodes */

  int nDlidx;                     /* Allocated size of aDlidx[] array */
  Fts5DlidxWriter *aDlidx;        /* Array of Fts5DlidxWriter objects */

  /* Values to insert into the %_idx table */
1000
1001
1002
1003
1004
1005
1006








1007
1008
1009
1010
1011
1012
1013
      pRet = 0;
    }
  }

  return pRet;
}









static i64 fts5IndexDataVersion(Fts5Index *p){
  i64 iVersion = 0;

  if( p->rc==SQLITE_OK ){
    if( p->pDataVersion==0 ){
      p->rc = fts5IndexPrepareStmt(p, &p->pDataVersion, 
          sqlite3_mprintf("PRAGMA %Q.data_version", p->pConfig->zDb)







>
>
>
>
>
>
>
>







995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
      pRet = 0;
    }
  }

  return pRet;
}

/*
** Execute "PRAGMA db.data_version" and return the integer value that 
** it returns.
**
** This function returns 0 if Fts5Index.rc is set to other than SQLITE_OK
** when it is called. If an error occurs while executing the PRAGMA,
** Fts5Index.rc is set to an SQLite error code before returning.
*/
static i64 fts5IndexDataVersion(Fts5Index *p){
  i64 iVersion = 0;

  if( p->rc==SQLITE_OK ){
    if( p->pDataVersion==0 ){
      p->rc = fts5IndexPrepareStmt(p, &p->pDataVersion, 
          sqlite3_mprintf("PRAGMA %Q.data_version", p->pConfig->zDb)
3043
3044
3045
3046
3047
3048
3049














3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
          }
        }
      }
    }while( i<nChunk );
  }
}















static void fts5ChunkIterate(
  Fts5Index *p,                   /* Index object */
  Fts5SegIter *pSeg,              /* Poslist of this iterator */
  void *pCtx,                     /* Context pointer for xChunk callback */
  void (*xChunk)(Fts5Index*, void*, const u8*, int)
){
  int nRem = pSeg->nPos;          /* Number of bytes still to come */
  Fts5Data *pData = 0;
  u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset];
  int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset);
  int pgno = pSeg->iLeafPgno;
  int pgnoSave = 0;

  /* This function does notmwork with detail=none databases. */
  assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );

  if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){
    pgnoSave = pgno+1;
  }

  while( 1 ){







>
>
>
>
>
>
>
>
>
>
>
>
>
>













|







3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
          }
        }
      }
    }while( i<nChunk );
  }
}

/*
** This function is used to extract the position list associated with
** the entry that segment iterator pSeg currently points to. If the
** entire position list resides on a single leaf page, then this 
** function invokes the xChunk callback exactly once. Or, if the position
** list spans multiple leaves, xChunk is invoked once for each leaf.
**
** The first argument passed to the xChunk callback is a copy of the Fts5Index
** pointer passed as the first argument to this function. Similarly, the 
** second argument passed to xChunk is a copy of the third parameter passed
** to this function. The third and fourth arguments passed to xChunk are
** a pointer to a blob containing part of the position list and the size
** in bytes there of.
*/
static void fts5ChunkIterate(
  Fts5Index *p,                   /* Index object */
  Fts5SegIter *pSeg,              /* Poslist of this iterator */
  void *pCtx,                     /* Context pointer for xChunk callback */
  void (*xChunk)(Fts5Index*, void*, const u8*, int)
){
  int nRem = pSeg->nPos;          /* Number of bytes still to come */
  Fts5Data *pData = 0;
  u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset];
  int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset);
  int pgno = pSeg->iLeafPgno;
  int pgnoSave = 0;

  /* This function does not work with detail=none databases. */
  assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );

  if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){
    pgnoSave = pgno+1;
  }

  while( 1 ){
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
static void fts5WriteBtreeTerm(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegWriter *pWriter,         /* Writer object */
  int nTerm, const u8 *pTerm      /* First term on new page */
){
  fts5WriteFlushBtree(p, pWriter);
  fts5BufferSet(&p->rc, &pWriter->btterm, nTerm, pTerm);
  pWriter->iBtPage = pWriter->writer.pgno;
}

/*
** This function is called when flushing a leaf page that contains no
** terms at all to disk.
*/
static void fts5WriteBtreeNoTerm(







|







3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
static void fts5WriteBtreeTerm(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegWriter *pWriter,         /* Writer object */
  int nTerm, const u8 *pTerm      /* First term on new page */
){
  fts5WriteFlushBtree(p, pWriter);
  fts5BufferSet(&p->rc, &pWriter->btterm, nTerm, pTerm);
  pWriter->iBtPage = pWriter->pgno;
}

/*
** This function is called when flushing a leaf page that contains no
** terms at all to disk.
*/
static void fts5WriteBtreeNoTerm(
3753
3754
3755
3756
3757
3758
3759




3760
3761
3762
3763


3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
    sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0);
  }

  /* Increment the "number of sequential leaves without a term" counter. */
  pWriter->nEmpty++;
}





static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){
  i64 iRowid;
  int iOff;



  iOff = 1 + fts5GetVarint(&pBuf->p[1], (u64*)&iRowid);
  fts5GetVarint(&pBuf->p[iOff], (u64*)&iRowid);
  return iRowid;
}

/*
** Rowid iRowid has just been appended to the current leaf page. It is the
** first on the page. This function appends an appropriate entry to the current
** doclist-index.
*/
static void fts5WriteDlidxAppend(
  Fts5Index *p, 
  Fts5SegWriter *pWriter, 
  i64 iRowid
){
  int i;







>
>
>
>




>
>







|
|







3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
    sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0);
  }

  /* Increment the "number of sequential leaves without a term" counter. */
  pWriter->nEmpty++;
}

/*
** Buffer pBuf contains a doclist-index page. Return the first rowid 
** value on the page.
*/
static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){
  i64 iRowid;
  int iOff;

  /* Skip past the flags byte and the page number of the left-most child
  ** page to the first rowid.  */
  iOff = 1 + fts5GetVarint(&pBuf->p[1], (u64*)&iRowid);
  fts5GetVarint(&pBuf->p[iOff], (u64*)&iRowid);
  return iRowid;
}

/*
** Rowid iRowid has just been appended to the current leaf page. It is the
** first on the page. This function appends an appropriate entry to the
** current doclist-index.
*/
static void fts5WriteDlidxAppend(
  Fts5Index *p, 
  Fts5SegWriter *pWriter, 
  i64 iRowid
){
  int i;
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833




3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860

3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937



3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982

3983
3984
3985
3986
3987
3988
3989
3990



3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052




4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098


4099
4100
4101
4102
4103
4104
4105
    }else{
      bDone = 1;
    }

    if( pDlidx->bPrevValid ){
      iVal = iRowid - pDlidx->iPrev;
    }else{
      i64 iPgno = (i==0 ? pWriter->writer.pgno : pDlidx[-1].pgno);
      assert( pDlidx->buf.n==0 );
      sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone);
      sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno);
      iVal = iRowid;
    }

    sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal);
    pDlidx->bPrevValid = 1;
    pDlidx->iPrev = iRowid;
  }
}





static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
  static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
  Fts5PageWriter *pPage = &pWriter->writer;
  i64 iRowid;

  assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) );

  /* Set the szLeaf header field. */
  assert( 0==fts5GetU16(&pPage->buf.p[2]) );
  fts5PutU16(&pPage->buf.p[2], (u16)pPage->buf.n);

  if( pWriter->bFirstTermInPage ){
    /* No term was written to this page. */
    assert( pPage->pgidx.n==0 );
    fts5WriteBtreeNoTerm(p, pWriter);
  }else{
    /* Append the pgidx to the page buffer. Set the szLeaf header field. */
    fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p);
  }

  /* Write the page out to disk */
  iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno);
  fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n);

  /* Initialize the next page. */
  fts5BufferZero(&pPage->buf);
  fts5BufferZero(&pPage->pgidx);

  fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
  pPage->iPrevPgidx = 0;
  pPage->pgno++;

  /* Increase the leaves written counter */
  pWriter->nLeafWritten++;

  /* The new leaf holds no terms or rowids */
  pWriter->bFirstTermInPage = 1;
  pWriter->bFirstRowidInPage = 1;
}

/*
** Append term pTerm/nTerm to the segment being written by the writer passed
** as the second argument.
**
** If an error occurs, set the Fts5Index.rc error code. If an error has 
** already occurred, this function is a no-op.
*/
static void fts5WriteAppendTerm(
  Fts5Index *p, 
  Fts5SegWriter *pWriter,
  int nTerm, const u8 *pTerm 
){
  int nPrefix;                    /* Bytes of prefix compression for term */
  Fts5PageWriter *pPage = &pWriter->writer;
  Fts5Buffer *pPgidx = &pWriter->writer.pgidx;

  assert( p->rc==SQLITE_OK );
  assert( pPage->buf.n>=4 );
  assert( pPage->buf.n>4 || pWriter->bFirstTermInPage );

  /* If the current leaf page is full, flush it to disk. */
  if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){
    if( pPage->buf.n>4 ){
      fts5WriteFlushLeaf(p, pWriter);
    }
    fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING);
  }
  
  /* TODO1: Updating pgidx here. */
  pPgidx->n += sqlite3Fts5PutVarint(
      &pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx
  );
  pPage->iPrevPgidx = pPage->buf.n;
#if 0
  fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n);
  pPgidx->n += 2;
#endif

  if( pWriter->bFirstTermInPage ){
    nPrefix = 0;
    if( pPage->pgno!=1 ){
      /* This is the first term on a leaf that is not the leftmost leaf in
      ** the segment b-tree. In this case it is necessary to add a term to
      ** the b-tree hierarchy that is (a) larger than the largest term 
      ** already written to the segment and (b) smaller than or equal to
      ** this term. In other words, a prefix of (pTerm/nTerm) that is one
      ** byte longer than the longest prefix (pTerm/nTerm) shares with the
      ** previous term. 
      **
      ** Usually, the previous term is available in pPage->term. The exception
      ** is if this is the first term written in an incremental-merge step.
      ** In this case the previous term is not available, so just write a
      ** copy of (pTerm/nTerm) into the parent node. This is slightly
      ** inefficient, but still correct.  */
      int n = nTerm;
      if( pPage->term.n ){
        n = 1 + fts5PrefixCompress(pPage->term.n, pPage->term.p, pTerm);
      }
      fts5WriteBtreeTerm(p, pWriter, n, pTerm);
      pPage = &pWriter->writer;
    }
  }else{
    nPrefix = fts5PrefixCompress(pPage->term.n, pPage->term.p, pTerm);
    fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix);
  }




  /* Append the number of bytes of new data, then the term data itself
  ** to the page. */
  fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix);
  fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix]);

  /* Update the Fts5PageWriter.term field. */
  fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm);
  pWriter->bFirstTermInPage = 0;

  pWriter->bFirstRowidInPage = 0;
  pWriter->bFirstRowidInDoclist = 1;

  assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) );
  pWriter->aDlidx[0].pgno = pPage->pgno;
}

/*
** Append a rowid and position-list size field to the writers output. 
*/
static void fts5WriteAppendRowid(
  Fts5Index *p, 
  Fts5SegWriter *pWriter,
  i64 iRowid
){
  if( p->rc==SQLITE_OK ){
    Fts5PageWriter *pPage = &pWriter->writer;

    if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){
      fts5WriteFlushLeaf(p, pWriter);
    }

    /* If this is to be the first rowid written to the page, set the 
    ** rowid-pointer in the page-header. Also append a value to the dlidx
    ** buffer, in case a doclist-index is required.  */
    if( pWriter->bFirstRowidInPage ){
      fts5PutU16(pPage->buf.p, (u16)pPage->buf.n);
      fts5WriteDlidxAppend(p, pWriter, iRowid);
    }

    /* Write the rowid. */
    if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){
      fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid);
    }else{
      assert( p->rc || iRowid>pWriter->iPrevRowid );

      fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid - pWriter->iPrevRowid);
    }
    pWriter->iPrevRowid = iRowid;
    pWriter->bFirstRowidInDoclist = 0;
    pWriter->bFirstRowidInPage = 0;
  }
}




static void fts5WriteAppendPoslistData(
  Fts5Index *p, 
  Fts5SegWriter *pWriter, 
  const u8 *aData, 
  int nData
){
  Fts5PageWriter *pPage = &pWriter->writer;
  const u8 *a = aData;
  int n = nData;
  
  assert( p->pConfig->pgsz>0 );
  while( p->rc==SQLITE_OK 
     && (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz 
  ){
    int nReq = p->pConfig->pgsz - pPage->buf.n - pPage->pgidx.n;
    int nCopy = 0;
    while( nCopy<nReq ){
      i64 dummy;
      nCopy += fts5GetVarint(&a[nCopy], (u64*)&dummy);
    }
    fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a);
    a += nCopy;
    n -= nCopy;
    fts5WriteFlushLeaf(p, pWriter);
  }
  if( n>0 ){
    fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a);
  }
}

/*
** Flush any data cached by the writer object to the database. Free any
** allocations associated with the writer.
*/
static void fts5WriteFinish(
  Fts5Index *p, 
  Fts5SegWriter *pWriter,         /* Writer object */
  int *pnLeaf                     /* OUT: Number of leaf pages in b-tree */
){
  int i;
  Fts5PageWriter *pLeaf = &pWriter->writer;
  if( p->rc==SQLITE_OK ){
    assert( pLeaf->pgno>=1 );
    if( pLeaf->buf.n>4 ){
      fts5WriteFlushLeaf(p, pWriter);
    }
    *pnLeaf = pLeaf->pgno-1;
    if( pLeaf->pgno>1 ){
      fts5WriteFlushBtree(p, pWriter);
    }
  }
  fts5BufferFree(&pLeaf->term);
  fts5BufferFree(&pLeaf->buf);
  fts5BufferFree(&pLeaf->pgidx);
  fts5BufferFree(&pWriter->btterm);

  for(i=0; i<pWriter->nDlidx; i++){
    sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf);
  }
  sqlite3_free(pWriter->aDlidx);
}





static void fts5WriteInit(
  Fts5Index *p, 
  Fts5SegWriter *pWriter, 
  int iSegid
){
  const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING;

  memset(pWriter, 0, sizeof(Fts5SegWriter));
  pWriter->iSegid = iSegid;

  fts5WriteDlidxGrow(p, pWriter, 1);
  pWriter->writer.pgno = 1;
  pWriter->bFirstTermInPage = 1;
  pWriter->iBtPage = 1;

  assert( pWriter->writer.buf.n==0 );
  assert( pWriter->writer.pgidx.n==0 );

  /* Grow the two buffers to pgsz + padding bytes in size. */
  sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.pgidx, nBuffer);
  sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.buf, nBuffer);

  if( p->pIdxWriter==0 ){
    Fts5Config *pConfig = p->pConfig;
    fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf(
          "INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)", 
          pConfig->zDb, pConfig->zName
    ));
  }

  if( p->rc==SQLITE_OK ){
    /* Initialize the 4-byte leaf-page header to 0x00. */
    memset(pWriter->writer.buf.p, 0, 4);
    pWriter->writer.buf.n = 4;

    /* Bind the current output segment id to the index-writer. This is an
    ** optimization over binding the same value over and over as rows are
    ** inserted into %_idx by the current writer.  */
    sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid);
  }
}

/*
** Iterator pIter was used to iterate through the input segments of on an
** incremental merge operation. This function is called if the incremental
** merge step has finished but the input has not been completely exhausted.


*/
static void fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){
  int i;
  Fts5Buffer buf;
  memset(&buf, 0, sizeof(Fts5Buffer));
  for(i=0; i<pIter->nSeg; i++){
    Fts5SegIter *pSeg = &pIter->aSeg[i];







|












>
>
>
>

<
<


<
<

|
|

|

|



|



|
|


|
|
>
|
|
|





<
















|
|


|
<


|
|


|

|
<
<
<
<
|
<
<
|
<
<
<

|








|





|
|


<


|
|

>
>
>



|
|

|
|
<





|











<

|







|





|


>
|







>
>
>


|
|
|

<





|

|





|





|













<

|
|


|
|



|
|
|








>
>
>
>











|
<


|
|


|
|











|
|












>
>







3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861


3862
3863


3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892

3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913

3914
3915
3916
3917
3918
3919
3920
3921
3922




3923


3924



3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944

3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960

3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977

3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013

4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046

4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083

4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
    }else{
      bDone = 1;
    }

    if( pDlidx->bPrevValid ){
      iVal = iRowid - pDlidx->iPrev;
    }else{
      i64 iPgno = (i==0 ? pWriter->pgno : pDlidx[-1].pgno);
      assert( pDlidx->buf.n==0 );
      sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone);
      sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno);
      iVal = iRowid;
    }

    sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal);
    pDlidx->bPrevValid = 1;
    pDlidx->iPrev = iRowid;
  }
}

/*
** Flush the writer's current leaf page to disk. Initialize various
** fields ready for the next leaf page.
*/
static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){


  i64 iRowid;



  /* Set the szLeaf header field. */
  assert( 0==fts5GetU16(&pWriter->buf.p[2]) );
  fts5PutU16(&pWriter->buf.p[2], (u16)pWriter->buf.n);

  if( pWriter->pgidx.n==0 ){
    /* No term was written to this page. */
    assert( pWriter->pgidx.n==0 );
    fts5WriteBtreeNoTerm(p, pWriter);
  }else{
    /* Append the pgidx to the page buffer. Set the szLeaf header field. */
    fts5BufferSafeAppendBlob(&pWriter->buf, pWriter->pgidx.p, pWriter->pgidx.n);
  }

  /* Write the page out to disk */
  iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pWriter->pgno);
  fts5DataWrite(p, iRowid, pWriter->buf.p, pWriter->buf.n);

  /* Initialize the next page. */
  fts5BufferZero(&pWriter->buf);
  fts5BufferZero(&pWriter->pgidx);
  memset(pWriter->buf.p, 0, 4);
  pWriter->buf.n = 4;
  pWriter->iPrevPgidx = 0;
  pWriter->pgno++;

  /* Increase the leaves written counter */
  pWriter->nLeafWritten++;

  /* The new leaf holds no terms or rowids */

  pWriter->bFirstRowidInPage = 1;
}

/*
** Append term pTerm/nTerm to the segment being written by the writer passed
** as the second argument.
**
** If an error occurs, set the Fts5Index.rc error code. If an error has 
** already occurred, this function is a no-op.
*/
static void fts5WriteAppendTerm(
  Fts5Index *p, 
  Fts5SegWriter *pWriter,
  int nTerm, const u8 *pTerm 
){
  int nPrefix;                    /* Bytes of prefix compression for term */
  int iOff;
  Fts5Buffer *pPgidx = &pWriter->pgidx;

  assert( p->rc==SQLITE_OK );
  assert( pWriter->buf.n>=4 );


  /* If the current leaf page is full, flush it to disk. */
  if( (pWriter->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){
    if( pWriter->buf.n>4 ){
      fts5WriteFlushLeaf(p, pWriter);
    }
    fts5BufferGrow(&p->rc, &pWriter->buf, nTerm+FTS5_DATA_PADDING);
  }





  iOff = pWriter->buf.n;


  if( pPgidx->n==0 ){



    nPrefix = 0;
    if( pWriter->pgno!=1 ){
      /* This is the first term on a leaf that is not the leftmost leaf in
      ** the segment b-tree. In this case it is necessary to add a term to
      ** the b-tree hierarchy that is (a) larger than the largest term 
      ** already written to the segment and (b) smaller than or equal to
      ** this term. In other words, a prefix of (pTerm/nTerm) that is one
      ** byte longer than the longest prefix (pTerm/nTerm) shares with the
      ** previous term. 
      **
      ** Usually, the previous term is available in pWriter->term. The exception
      ** is if this is the first term written in an incremental-merge step.
      ** In this case the previous term is not available, so just write a
      ** copy of (pTerm/nTerm) into the parent node. This is slightly
      ** inefficient, but still correct.  */
      int n = nTerm;
      if( pWriter->term.n ){
        n = 1 + fts5PrefixCompress(pWriter->term.n, pWriter->term.p, pTerm);
      }
      fts5WriteBtreeTerm(p, pWriter, n, pTerm);

    }
  }else{
    nPrefix = fts5PrefixCompress(pWriter->term.n, pWriter->term.p, pTerm);
    fts5BufferAppendVarint(&p->rc, &pWriter->buf, nPrefix);
  }

  fts5BufferSafeAppendVarint(pPgidx, iOff - pWriter->iPrevPgidx);
  pWriter->iPrevPgidx = iOff;

  /* Append the number of bytes of new data, then the term data itself
  ** to the page. */
  fts5BufferAppendVarint(&p->rc, &pWriter->buf, nTerm - nPrefix);
  fts5BufferAppendBlob(&p->rc, &pWriter->buf, nTerm - nPrefix, &pTerm[nPrefix]);

  /* Update the Fts5SegWriter.term field. */
  fts5BufferSet(&p->rc, &pWriter->term, nTerm, pTerm);


  pWriter->bFirstRowidInPage = 0;
  pWriter->bFirstRowidInDoclist = 1;

  assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) );
  pWriter->aDlidx[0].pgno = pWriter->pgno;
}

/*
** Append a rowid and position-list size field to the writers output. 
*/
static void fts5WriteAppendRowid(
  Fts5Index *p, 
  Fts5SegWriter *pWriter,
  i64 iRowid
){
  if( p->rc==SQLITE_OK ){


    if( (pWriter->buf.n + pWriter->pgidx.n)>=p->pConfig->pgsz ){
      fts5WriteFlushLeaf(p, pWriter);
    }

    /* If this is to be the first rowid written to the page, set the 
    ** rowid-pointer in the page-header. Also append a value to the dlidx
    ** buffer, in case a doclist-index is required.  */
    if( pWriter->bFirstRowidInPage ){
      fts5PutU16(pWriter->buf.p, (u16)pWriter->buf.n);
      fts5WriteDlidxAppend(p, pWriter, iRowid);
    }

    /* Write the rowid. */
    if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){
      fts5BufferSafeAppendVarint(&pWriter->buf, iRowid);
    }else{
      assert( p->rc || iRowid>pWriter->iPrevRowid );
      assert( pWriter->buf.n+9 <= pWriter->buf.nSpace );
      fts5BufferSafeAppendVarint(&pWriter->buf, iRowid - pWriter->iPrevRowid);
    }
    pWriter->iPrevRowid = iRowid;
    pWriter->bFirstRowidInDoclist = 0;
    pWriter->bFirstRowidInPage = 0;
  }
}

/*
** Append position list data to the writers output.
*/
static void fts5WriteAppendPoslistData(
  Fts5Index *p, 
  Fts5SegWriter *pWriter,         /* Writer object to write to the output of */
  const u8 *aData,                /* Buffer containing data to append */
  int nData                       /* Size of buffer aData[] in bytes */
){

  const u8 *a = aData;
  int n = nData;
  
  assert( p->pConfig->pgsz>0 );
  while( p->rc==SQLITE_OK 
     && (pWriter->buf.n + pWriter->pgidx.n + n)>=p->pConfig->pgsz 
  ){
    int nReq = p->pConfig->pgsz - pWriter->buf.n - pWriter->pgidx.n;
    int nCopy = 0;
    while( nCopy<nReq ){
      i64 dummy;
      nCopy += fts5GetVarint(&a[nCopy], (u64*)&dummy);
    }
    fts5BufferAppendBlob(&p->rc, &pWriter->buf, nCopy, a);
    a += nCopy;
    n -= nCopy;
    fts5WriteFlushLeaf(p, pWriter);
  }
  if( n>0 ){
    fts5BufferAppendBlob(&p->rc, &pWriter->buf, n, a);
  }
}

/*
** Flush any data cached by the writer object to the database. Free any
** allocations associated with the writer.
*/
static void fts5WriteFinish(
  Fts5Index *p, 
  Fts5SegWriter *pWriter,         /* Writer object */
  int *pnLeaf                     /* OUT: Number of leaf pages in b-tree */
){
  int i;

  if( p->rc==SQLITE_OK ){
    assert( pWriter->pgno>=1 );
    if( pWriter->buf.n>4 ){
      fts5WriteFlushLeaf(p, pWriter);
    }
    *pnLeaf = pWriter->pgno-1;
    if( pWriter->pgno>1 ){
      fts5WriteFlushBtree(p, pWriter);
    }
  }
  fts5BufferFree(&pWriter->term);
  fts5BufferFree(&pWriter->buf);
  fts5BufferFree(&pWriter->pgidx);
  fts5BufferFree(&pWriter->btterm);

  for(i=0; i<pWriter->nDlidx; i++){
    sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf);
  }
  sqlite3_free(pWriter->aDlidx);
}

/*
** Initialize the Fts5SegWriter object indicated by the second argument
** to write to the segment with seg-id iSegid.
*/
static void fts5WriteInit(
  Fts5Index *p, 
  Fts5SegWriter *pWriter, 
  int iSegid
){
  const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING;

  memset(pWriter, 0, sizeof(Fts5SegWriter));
  pWriter->iSegid = iSegid;

  fts5WriteDlidxGrow(p, pWriter, 1);
  pWriter->pgno = 1;

  pWriter->iBtPage = 1;

  assert( pWriter->buf.n==0 );
  assert( pWriter->pgidx.n==0 );

  /* Grow the two buffers to pgsz + padding bytes in size. */
  sqlite3Fts5BufferSize(&p->rc, &pWriter->pgidx, nBuffer);
  sqlite3Fts5BufferSize(&p->rc, &pWriter->buf, nBuffer);

  if( p->pIdxWriter==0 ){
    Fts5Config *pConfig = p->pConfig;
    fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf(
          "INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)", 
          pConfig->zDb, pConfig->zName
    ));
  }

  if( p->rc==SQLITE_OK ){
    /* Initialize the 4-byte leaf-page header to 0x00. */
    memset(pWriter->buf.p, 0, 4);
    pWriter->buf.n = 4;

    /* Bind the current output segment id to the index-writer. This is an
    ** optimization over binding the same value over and over as rows are
    ** inserted into %_idx by the current writer.  */
    sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid);
  }
}

/*
** Iterator pIter was used to iterate through the input segments of on an
** incremental merge operation. This function is called if the incremental
** merge step has finished but the input has not been completely exhausted.
** It removes (DELETEs) any input segment leaves that are no longer required
** from the database.
*/
static void fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){
  int i;
  Fts5Buffer buf;
  memset(&buf, 0, sizeof(Fts5Buffer));
  for(i=0; i<pIter->nSeg; i++){
    Fts5SegIter *pSeg = &pIter->aSeg[i];
4150
4151
4152
4153
4154
4155
4156





4157
4158
4159
4160
4161
4162
4163
        fts5DataWrite(p, iLeafRowid, buf.p, buf.n);
      }
    }
  }
  fts5BufferFree(&buf);
}






static void fts5MergeChunkCallback(
  Fts5Index *p, 
  void *pCtx, 
  const u8 *pChunk, int nChunk
){
  Fts5SegWriter *pWriter = (Fts5SegWriter*)pCtx;
  fts5WriteAppendPoslistData(p, pWriter, pChunk, nChunk);







>
>
>
>
>







4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
        fts5DataWrite(p, iLeafRowid, buf.p, buf.n);
      }
    }
  }
  fts5BufferFree(&buf);
}

/*
** This function is used as an fts5ChunkIterate() callback when merging
** levels. The chunk passed to this function is appended to the output
** segment.
*/
static void fts5MergeChunkCallback(
  Fts5Index *p, 
  void *pCtx, 
  const u8 *pChunk, int nChunk
){
  Fts5SegWriter *pWriter = (Fts5SegWriter*)pCtx;
  fts5WriteAppendPoslistData(p, pWriter, pChunk, nChunk);
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
  if( pLvl->nMerge ){
    pLvlOut = &pStruct->aLevel[iLvl+1];
    assert( pLvlOut->nSeg>0 );
    nInput = pLvl->nMerge;
    pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1];

    fts5WriteInit(p, &writer, pSeg->iSegid);
    writer.writer.pgno = pSeg->pgnoLast+1;
    writer.iBtPage = 0;
  }else{
    int iSegid = fts5AllocateSegid(p, pStruct);

    /* Extend the Fts5Structure object as required to ensure the output
    ** segment exists. */
    if( iLvl==pStruct->nLevel-1 ){







|







4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
  if( pLvl->nMerge ){
    pLvlOut = &pStruct->aLevel[iLvl+1];
    assert( pLvlOut->nSeg>0 );
    nInput = pLvl->nMerge;
    pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1];

    fts5WriteInit(p, &writer, pSeg->iSegid);
    writer.pgno = pSeg->pgnoLast+1;
    writer.iBtPage = 0;
  }else{
    int iSegid = fts5AllocateSegid(p, pStruct);

    /* Extend the Fts5Structure object as required to ensure the output
    ** segment exists. */
    if( iLvl==pStruct->nLevel-1 ){
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283

    /* Append the rowid to the output */
    /* WRITEPOSLISTSIZE */
    fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter));

    if( eDetail==FTS5_DETAIL_NONE ){
      if( pSegIter->bDel ){
        fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
        if( pSegIter->nPos>0 ){
          fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
        }
      }
    }else{
      /* Append the position-list data to the output */
      nPos = pSegIter->nPos*2 + pSegIter->bDel;
      fts5BufferAppendVarint(&p->rc, &writer.writer.buf, nPos);
      fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback);
    }
  }

  /* Flush the last leaf page to disk. Set the output segment b-tree height
  ** and last leaf page number at the same time.  */
  fts5WriteFinish(p, &writer, &pSeg->pgnoLast);







|

|





|







4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308

    /* Append the rowid to the output */
    /* WRITEPOSLISTSIZE */
    fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter));

    if( eDetail==FTS5_DETAIL_NONE ){
      if( pSegIter->bDel ){
        fts5BufferAppendVarint(&p->rc, &writer.buf, 0);
        if( pSegIter->nPos>0 ){
          fts5BufferAppendVarint(&p->rc, &writer.buf, 0);
        }
      }
    }else{
      /* Append the position-list data to the output */
      nPos = pSegIter->nPos*2 + pSegIter->bDel;
      fts5BufferAppendVarint(&p->rc, &writer.buf, nPos);
      fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback);
    }
  }

  /* Flush the last leaf page to disk. Set the output segment b-tree height
  ** and last leaf page number at the same time.  */
  fts5WriteFinish(p, &writer, &pSeg->pgnoLast);
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
      Fts5StructureSegment *pSeg;   /* New segment within pStruct */
      Fts5Buffer *pBuf;             /* Buffer in which to assemble leaf page */
      Fts5Buffer *pPgidx;           /* Buffer in which to assemble pgidx */

      Fts5SegWriter writer;
      fts5WriteInit(p, &writer, iSegid);

      pBuf = &writer.writer.buf;
      pPgidx = &writer.writer.pgidx;

      /* fts5WriteInit() should have initialized the buffers to (most likely)
      ** the maximum space required. */
      assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) );
      assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) );

      /* Begin scanning through hash table entries. This loop runs once for each







|
|







4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
      Fts5StructureSegment *pSeg;   /* New segment within pStruct */
      Fts5Buffer *pBuf;             /* Buffer in which to assemble leaf page */
      Fts5Buffer *pPgidx;           /* Buffer in which to assemble pgidx */

      Fts5SegWriter writer;
      fts5WriteInit(p, &writer, iSegid);

      pBuf = &writer.buf;
      pPgidx = &writer.pgidx;

      /* fts5WriteInit() should have initialized the buffers to (most likely)
      ** the maximum space required. */
      assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) );
      assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) );

      /* Begin scanning through hash table entries. This loop runs once for each
5057
5058
5059
5060
5061
5062
5063

5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
    xMerge = fts5MergeRowidLists;
    xAppend = fts5AppendRowid;
  }else{
    xMerge = fts5MergePrefixLists;
    xAppend = fts5AppendPoslist;
  }


  aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf);
  pStruct = fts5StructureRead(p);

  if( aBuf && pStruct ){
    const int flags = FTS5INDEX_QUERY_SCAN 
                    | FTS5INDEX_QUERY_SKIPEMPTY 
                    | FTS5INDEX_QUERY_NOOUTPUT;
    int i;
    i64 iLastRowid = 0;
    Fts5Iter *p1 = 0;     /* Iterator used to gather data from index */
    Fts5Data *pData;







>



|







5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
    xMerge = fts5MergeRowidLists;
    xAppend = fts5AppendRowid;
  }else{
    xMerge = fts5MergePrefixLists;
    xAppend = fts5AppendPoslist;
  }

  assert( p->pStruct );
  aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf);
  pStruct = fts5StructureRead(p);

  if( aBuf ){
    const int flags = FTS5INDEX_QUERY_SCAN 
                    | FTS5INDEX_QUERY_SKIPEMPTY 
                    | FTS5INDEX_QUERY_NOOUTPUT;
    int i;
    i64 iLastRowid = 0;
    Fts5Iter *p1 = 0;     /* Iterator used to gather data from index */
    Fts5Data *pData;
5361
5362
5363
5364
5365
5366
5367


5368
5369
5370
5371
5372
5373
5374
  Fts5Config *pConfig = p->pConfig;
  Fts5Iter *pRet = 0;
  Fts5Buffer buf = {0, 0, 0};

  /* If the QUERY_SCAN flag is set, all other flags must be clear. This
  ** flag is used by the fts5vocab module only. */
  assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || flags==FTS5INDEX_QUERY_SCAN );



  if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){
    int iIdx = 0;                 /* Index to search */
    memcpy(&buf.p[1], pToken, nToken);

    /* Figure out which index to search and set iIdx accordingly. If this
    ** is a prefix query for which there is no prefix index, set iIdx to







>
>







5387
5388
5389
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399
5400
5401
5402
  Fts5Config *pConfig = p->pConfig;
  Fts5Iter *pRet = 0;
  Fts5Buffer buf = {0, 0, 0};

  /* If the QUERY_SCAN flag is set, all other flags must be clear. This
  ** flag is used by the fts5vocab module only. */
  assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || flags==FTS5INDEX_QUERY_SCAN );
  assert( p->rc==SQLITE_OK );
  assert( p->pStruct!=0 || (flags & FTS5INDEX_QUERY_PREFIX)==0 );

  if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){
    int iIdx = 0;                 /* Index to search */
    memcpy(&buf.p[1], pToken, nToken);

    /* Figure out which index to search and set iIdx accordingly. If this
    ** is a prefix query for which there is no prefix index, set iIdx to
5563
5564
5565
5566
5567
5568
5569

5570
5571
5572
5573
5574
5575
5576

/*
** This is called when a new read or write transaction may be being opened.
** It ensures that the in-memory cache of the structure record is valid.
*/
int sqlite3Fts5IndexNewTrans(Fts5Index *p){
  assert( p->pStruct==0 || p->iStructVersion!=0 );

  if( p->pConfig->iCookie<0 || fts5IndexDataVersion(p)!=p->iStructVersion ){
    fts5StructureInvalidate(p);
  }
  return fts5IndexReturn(p);
}









>







5591
5592
5593
5594
5595
5596
5597
5598
5599
5600
5601
5602
5603
5604
5605

/*
** This is called when a new read or write transaction may be being opened.
** It ensures that the in-memory cache of the structure record is valid.
*/
int sqlite3Fts5IndexNewTrans(Fts5Index *p){
  assert( p->pStruct==0 || p->iStructVersion!=0 );
  assert( p->rc==SQLITE_OK );
  if( p->pConfig->iCookie<0 || fts5IndexDataVersion(p)!=p->iStructVersion ){
    fts5StructureInvalidate(p);
  }
  return fts5IndexReturn(p);
}


6453
6454
6455
6456
6457
6458
6459
6460
6461
6462
6463
6464
6465
6466
6467
6468
6469
6470
6471
6472
6473
6474
6475
6476
6477
6478
6479
6480
6481
6482
6483
6484
6485
6486
6487
6488
6489
6490
6491
6492
6493
6494
6495
6496
6497
6498
6499
6500
6501
6502
6503
6504
6505
6506
6507
6508
6509
6510
6511
6512
6513
6514
6515
6516
6517
6518
6519
6520
6521
    sqlite3_result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT);
  }else{
    sqlite3_result_error_code(pCtx, rc);
  }
  fts5BufferFree(&s);
}

/*
** The implementation of user-defined scalar function fts5_rowid().
*/
static void fts5RowidFunction(
  sqlite3_context *pCtx,          /* Function call context */
  int nArg,                       /* Number of args (always 2) */
  sqlite3_value **apVal           /* Function arguments */
){
  const char *zArg;
  if( nArg==0 ){
    sqlite3_result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1);
  }else{
    zArg = (const char*)sqlite3_value_text(apVal[0]);
    if( 0==sqlite3_stricmp(zArg, "segment") ){
      i64 iRowid;
      int segid, pgno;
      if( nArg!=3 ){
        sqlite3_result_error(pCtx, 
            "should be: fts5_rowid('segment', segid, pgno))", -1
        );
      }else{
        segid = sqlite3_value_int(apVal[1]);
        pgno = sqlite3_value_int(apVal[2]);
        iRowid = FTS5_SEGMENT_ROWID(segid, pgno);
        sqlite3_result_int64(pCtx, iRowid);
      }
    }else{
      sqlite3_result_error(pCtx, 
        "first arg to fts5_rowid() must be 'segment'" , -1
      );
    }
  }
}

/*
** This is called as part of registering the FTS5 module with database
** connection db. It registers several user-defined scalar functions useful
** with FTS5.
**
** If successful, SQLITE_OK is returned. If an error occurs, some other
** SQLite error code is returned instead.
*/
int sqlite3Fts5IndexInit(sqlite3 *db){
  int rc = sqlite3_create_function(
      db, "fts5_decode", 2, SQLITE_UTF8, 0, fts5DecodeFunction, 0, 0
  );

  if( rc==SQLITE_OK ){
    rc = sqlite3_create_function(
        db, "fts5_decode_none", 2, 
        SQLITE_UTF8, (void*)db, fts5DecodeFunction, 0, 0
    );
  }

  if( rc==SQLITE_OK ){
    rc = sqlite3_create_function(
        db, "fts5_rowid", -1, SQLITE_UTF8, 0, fts5RowidFunction, 0, 0
    );
  }
  return rc;
}








<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<




















<
<
<
<
<



6482
6483
6484
6485
6486
6487
6488


































6489
6490
6491
6492
6493
6494
6495
6496
6497
6498
6499
6500
6501
6502
6503
6504
6505
6506
6507
6508





6509
6510
6511
    sqlite3_result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT);
  }else{
    sqlite3_result_error_code(pCtx, rc);
  }
  fts5BufferFree(&s);
}



































/*
** This is called as part of registering the FTS5 module with database
** connection db. It registers several user-defined scalar functions useful
** with FTS5.
**
** If successful, SQLITE_OK is returned. If an error occurs, some other
** SQLite error code is returned instead.
*/
int sqlite3Fts5IndexInit(sqlite3 *db){
  int rc = sqlite3_create_function(
      db, "fts5_decode", 2, SQLITE_UTF8, 0, fts5DecodeFunction, 0, 0
  );

  if( rc==SQLITE_OK ){
    rc = sqlite3_create_function(
        db, "fts5_decode_none", 2, 
        SQLITE_UTF8, (void*)db, fts5DecodeFunction, 0, 0
    );
  }






  return rc;
}

Changes to ext/fts5/test/fts5_common.tcl.
640
641
642
643
644
645
646







647
    }
  }
  sqlite3_fts5_create_tokenizer db tclnum tclnum_create
}
#
# End of tokenizer code.
#-------------------------------------------------------------------------















>
>
>
>
>
>
>

640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
    }
  }
  sqlite3_fts5_create_tokenizer db tclnum tclnum_create
}
#
# End of tokenizer code.
#-------------------------------------------------------------------------

proc fts5_rowid_func {segid pgno} { return [expr ($segid<<37) + $pgno] }
proc register_fts5_rowid {db} {
  $db func fts5_rowid -argcount 2 fts5_rowid_func
}



Changes to ext/fts5/test/fts5corrupt.test.
37
38
39
40
41
42
43

44
45
46
47
48
49
50
51
52

53
54
55
56
57
58
59
60
61
62
  fts5_level_segs t1
} {1}
db_save

do_execsql_test 1.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
set segid [lindex [fts5_level_segids t1] 0]


do_test 1.3 {
  execsql {
    DELETE FROM t1_data WHERE rowid = fts5_rowid('segment', $segid, 4);
  }
  catchsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {1 {database disk image is malformed}}

do_test 1.4 {
  db_restore_and_reopen

  execsql {
    UPDATE t1_data set block = X'00000000' || substr(block, 5) WHERE
    rowid = fts5_rowid('segment', $segid, 4);
  }
  catchsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {1 {database disk image is malformed}}

db_restore_and_reopen
#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}








>


|






>


|







37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
  fts5_level_segs t1
} {1}
db_save

do_execsql_test 1.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
set segid [lindex [fts5_level_segids t1] 0]

register_fts5_rowid db
do_test 1.3 {
  execsql {
    DELETE FROM t1_data WHERE rowid = fts5_rowid($segid, 4);
  }
  catchsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {1 {database disk image is malformed}}

do_test 1.4 {
  db_restore_and_reopen
  register_fts5_rowid db
  execsql {
    UPDATE t1_data set block = X'00000000' || substr(block, 5) WHERE
    rowid = fts5_rowid($segid, 4);
  }
  catchsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {1 {database disk image is malformed}}

db_restore_and_reopen
#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}

Changes to ext/fts5/test/fts5faultC.test.
12
13
14
15
16
17
18


19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# This file is focused on OOM errors.
#

source [file join [file dirname [info script]] fts5_common.tcl]
source $testdir/malloc_common.tcl
set testprefix fts5faultC
return_if_no_fts5



#--------------------------------------------------------------------------
# Test that if an OOM error occurs while trying to set a configuration
# option, the in-memory and on-disk configurations are not left in an 
# inconsistent state.
#



proc posrowid {cmd} { $cmd xRowid }
proc negrowid {cmd} { expr -1 * [$cmd xRowid] }

sqlite3_fts5_create_function db posrowid posrowid
sqlite3_fts5_create_function db negrowid negrowid

do_execsql_test 1.0.0 {







>
>






<
<
<







12
13
14
15
16
17
18
19
20
21
22
23
24
25
26



27
28
29
30
31
32
33
# This file is focused on OOM errors.
#

source [file join [file dirname [info script]] fts5_common.tcl]
source $testdir/malloc_common.tcl
set testprefix fts5faultC
return_if_no_fts5

if 1 {

#--------------------------------------------------------------------------
# Test that if an OOM error occurs while trying to set a configuration
# option, the in-memory and on-disk configurations are not left in an 
# inconsistent state.
#



proc posrowid {cmd} { $cmd xRowid }
proc negrowid {cmd} { expr -1 * [$cmd xRowid] }

sqlite3_fts5_create_function db posrowid posrowid
sqlite3_fts5_create_function db negrowid negrowid

do_execsql_test 1.0.0 {
69
70
71
72
73
74
75

76










































77
78
79
  if {$res != $ex} {
    error "2: expected {$ex} got {$res}"
  }
  db2 close
}















































finish_test








>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>



68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
  if {$res != $ex} {
    error "2: expected {$ex} got {$res}"
  }
  db2 close
}


#--------------------------------------------------------------------------

reset_db
do_execsql_test 2.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(y);
}
do_faultsim_test 2.1 -faults * -prep {
  sqlite3 db test.db
  execsql { SELECT * FROM t1('x') }
} -body {
  execsql { SELECT * FROM t1('x') }
  execsql { SELECT * FROM t1('x') }
} -test {
  faultsim_test_result [list 0 {}]
}
do_faultsim_test 2.2 -faults * -prep {
  sqlite3 db test.db
  execsql { INSERT INTO t1 VALUES('yy') }
} -body {
  execsql { INSERT INTO t1 VALUES('yy') }
  execsql { INSERT INTO t1 VALUES('yy') }
} -test {
  faultsim_test_result [list 0 {}]
}

#--------------------------------------------------------------------------
}
reset_db
do_execsql_test 3.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(y);
}
faultsim_save_and_close
do_faultsim_test 3.1 -faults * -prep {
  faultsim_restore_and_reopen
  execsql { 
    BEGIN;
      INSERT INTO t1 VALUES('x y z');
      INSERT INTO t1 VALUES('a b c');
  }
} -body {
  execsql { INSERT INTO t1(t1) VALUES('optimize') }
} -test {
  faultsim_test_result [list 0 {}]
}

finish_test

Changes to ext/fts5/test/fts5rowid.test.
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44

45
46
47
48
49
50
51
#
# Tests of the scalar fts5_rowid() and fts5_decode() functions.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5rowid

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

do_catchsql_test 1.1 {
  SELECT fts5_rowid()
} {1 {should be: fts5_rowid(subject, ....)}}

do_catchsql_test 1.2 {
  SELECT fts5_rowid('segment')
} {1 {should be: fts5_rowid('segment', segid, pgno))}}

do_execsql_test 1.3 {
  SELECT fts5_rowid('segment', 1, 1)
} {137438953473}

do_catchsql_test 1.4 {
  SELECT fts5_rowid('nosucharg');
} {1 {first arg to fts5_rowid() must be 'segment'}} 


#-------------------------------------------------------------------------
# Tests of the fts5_decode() function.
#
reset_db

do_execsql_test 2.1 { 
  CREATE VIRTUAL TABLE x1 USING fts5(a, b);
  INSERT INTO x1(x1, rank) VALUES('pgsz', 32);
} {}

proc rnddoc {n} {
  set map [list 0 a  1 b  2 c  3 d  4 e  5 f  6 g  7 h  8 i  9 j]







<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<





>







11
12
13
14
15
16
17



18


















19
20
21
22
23
24
25
26
27
28
29
30
31
#
# Tests of the scalar fts5_rowid() and fts5_decode() functions.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5rowid




return_if_no_fts5



















#-------------------------------------------------------------------------
# Tests of the fts5_decode() function.
#
reset_db
register_fts5_rowid db
do_execsql_test 2.1 { 
  CREATE VIRTUAL TABLE x1 USING fts5(a, b);
  INSERT INTO x1(x1, rank) VALUES('pgsz', 32);
} {}

proc rnddoc {n} {
  set map [list 0 a  1 b  2 c  3 d  4 e  5 f  6 g  7 h  8 i  9 j]
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# This is really a corruption test...
#do_execsql_test 2.7 {
#  UPDATE x1_data SET block = X'';
#  SELECT count(fts5_decode(rowid, block)) FROM x1_data;
#} $res

do_execsql_test 2.8 {
  SELECT fts5_decode(fts5_rowid('segment', 1000, 1), X'AB')
} {corrupt}

#-------------------------------------------------------------------------
# Tests with very large tokens.
#
set strlist [list \
  "[string repeat x 400]"                       \







|







68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# This is really a corruption test...
#do_execsql_test 2.7 {
#  UPDATE x1_data SET block = X'';
#  SELECT count(fts5_decode(rowid, block)) FROM x1_data;
#} $res

do_execsql_test 2.8 {
  SELECT fts5_decode(fts5_rowid(1000, 1), X'AB')
} {corrupt}

#-------------------------------------------------------------------------
# Tests with very large tokens.
#
set strlist [list \
  "[string repeat x 400]"                       \
Changes to ext/fts5/test/fts5simple3.test.
91
92
93
94
95
96
97






























98
99
100
  BEGIN;
    CREATE VIRTUAL TABLE t1 USING fts5;
} {1 {vtable constructor failed: t1}}

do_execsql_test 4.1 { SELECT * FROM sqlite_master } {}
do_execsql_test 4.2 { COMMIT }
do_execsql_test 4.3 { SELECT * FROM sqlite_master } {}































finish_test








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>



91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
  BEGIN;
    CREATE VIRTUAL TABLE t1 USING fts5;
} {1 {vtable constructor failed: t1}}

do_execsql_test 4.1 { SELECT * FROM sqlite_master } {}
do_execsql_test 4.2 { COMMIT }
do_execsql_test 4.3 { SELECT * FROM sqlite_master } {}

#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 5.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(a);
  INSERT INTO t1 VALUES('vtable constructor failed');
} {}
do_execsql_test 5.1 {
  SELECT quote(block) FROM t1_data WHERE rowid=10;
} {
  X'000000000101010001010101'
}
do_test 5.3 {
  sqlite3 db2 test.db
  register_fts5_rowid db2
  db2 eval {
    UPDATE t1_data SET block = X'000000000101010001A7080101' WHERE rowid=10;
    UPDATE t1_data SET rowid=fts5_rowid(5000, 1) WHERE rowid=fts5_rowid(1,1);
    UPDATE t1_idx SET segid=5000 WHERE segid=1;
  }
  db2 close
} {}
do_execsql_test 5.4 {
  SELECT rowid FROM t1('cons*');
} {1}
breakpoint
do_execsql_test 5.5 {
  INSERT INTO t1 VALUES('hello world');
}

finish_test

Changes to ext/fts5/test/fts5synonym2.test.
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
    }
  }

  list [sort_poslist $PL] $CL
}
sqlite3_fts5_create_function db fts5_test_bothlist fts5_test_bothlist

proc fts5_rowid {cmd} { expr [$cmd xColumnText -1] }
sqlite3_fts5_create_function db fts5_rowid fts5_rowid

do_execsql_test 1.$tok.0.1 "
  CREATE VIRTUAL TABLE ss USING fts5(a, b, 
       tokenize='tclnum $tok', detail=%DETAIL%);
  INSERT INTO ss(ss, rank) VALUES('rank', 'fts5_rowid()');
"

do_execsql_test 1.$tok.0.2 {
  INSERT INTO ss VALUES('5 5 five seven 3 seven i', '2 1 5 0 two 1 i');
  INSERT INTO ss VALUES('six ix iii 7 i vii iii', 'one seven nine 4 9 1 vi');
  INSERT INTO ss VALUES('6 viii i five six zero seven', '5 v iii iv iv 3');
  INSERT INTO ss VALUES('9 ii six 8 1 6', 'six 4 iv iv 7');







|
|




|







38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
    }
  }

  list [sort_poslist $PL] $CL
}
sqlite3_fts5_create_function db fts5_test_bothlist fts5_test_bothlist

proc rowid_func {cmd} { expr [$cmd xColumnText -1] }
sqlite3_fts5_create_function db rowid_func rowid_func

do_execsql_test 1.$tok.0.1 "
  CREATE VIRTUAL TABLE ss USING fts5(a, b, 
       tokenize='tclnum $tok', detail=%DETAIL%);
  INSERT INTO ss(ss, rank) VALUES('rank', 'rowid_func()');
"

do_execsql_test 1.$tok.0.2 {
  INSERT INTO ss VALUES('5 5 five seven 3 seven i', '2 1 5 0 two 1 i');
  INSERT INTO ss VALUES('six ix iii 7 i vii iii', 'one seven nine 4 9 1 vi');
  INSERT INTO ss VALUES('6 viii i five six zero seven', '5 v iii iv iv 3');
  INSERT INTO ss VALUES('9 ii six 8 1 6', 'six 4 iv iv 7');
Changes to ext/fts5/test/fts5version.test.
55
56
57
58
59
60
61



















62
63
64
do_test 1.7 {
  execsql { DELETE FROM t1_config WHERE k='version' }
  db close
  sqlite3 db test.db
  catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' }
} {1 {invalid fts5 file format (found 0, expected 4) - run 'rebuild'}}





















finish_test








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>



55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
do_test 1.7 {
  execsql { DELETE FROM t1_config WHERE k='version' }
  db close
  sqlite3 db test.db
  catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' }
} {1 {invalid fts5 file format (found 0, expected 4) - run 'rebuild'}}

#---------------------------------------------------------------------------

do_execsql_test 2.1 {
  CREATE VIRTUAL TABLE t2 USING fts5(two);
  INSERT INTO t2 VALUES('a b c d');
}

do_test 2.2 {
  sqlite3 db2 test.db
  execsql {
    INSERT INTO t2(t2, rank) VALUES('pgsz', 512);
    UPDATE t2_config SET v=5 WHERE k='version';
  } db2
  db2 close
} {}

do_catchsql_test 2.3 {
  SELECT * FROM t2('b + c');
} {1 {SQL logic error or missing database}}

finish_test