/ Check-in [7aabb62c]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix various incorrect and missing comments and other style issues in and around the FTS incremental merge code.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts4-incr-merge
Files: files | file ages | folders
SHA1:7aabb62c8ccbd2b8d216e25226f06e5820dec38a
User & Date: dan 2012-03-17 16:56:57
Context
2012-03-20
17:04
Merge trunk changes into the fts4-incr-merge branch. check-in: f61d5fb0 user: drh tags: fts4-incr-merge
2012-03-17
16:56
Fix various incorrect and missing comments and other style issues in and around the FTS incremental merge code. check-in: 7aabb62c user: dan tags: fts4-incr-merge
2012-03-16
15:54
Fix some integer overflow problems that can occur when using large langauge id values. check-in: 3475092c user: dan tags: fts4-incr-merge
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3Int.h.

209
210
211
212
213
214
215
216
217
218
219

220
221
222
223
224
225
226
227
228






229
230
231
232
233
234
235
  u8 bHasStat;                    /* True if %_stat table exists */
  u8 bHasDocsize;                 /* True if %_docsize table exists */
  u8 bDescIdx;                    /* True if doclists are in reverse order */
  int nPgsz;                      /* Page size for host database */
  char *zSegmentsTbl;             /* Name of %_segments table */
  sqlite3_blob *pSegments;        /* Blob handle open on %_segments table */

  /* TODO: Fix the first paragraph of this comment.
  **
  ** The following array of hash tables is used to buffer pending index 
  ** updates during transactions. Variable nPendingData estimates the memory 

  ** size of the pending data, including hash table overhead, not including
  ** malloc overhead.  When nPendingData exceeds nMaxPendingData, the buffer 
  ** is flushed automatically. Variable iPrevDocid is the docid of the most 
  ** recently inserted record.
  **
  ** A single FTS4 table may have multiple full-text indexes. For each index
  ** there is an entry in the aIndex[] array. Index 0 is an index of all the
  ** terms that appear in the document set. Each subsequent index in aIndex[]
  ** is an index of prefixes of a specific length.






  */
  int nIndex;                     /* Size of aIndex[] */
  struct Fts3Index {
    int nPrefix;                  /* Prefix length (0 for main terms index) */
    Fts3Hash hPending;            /* Pending terms table for this index */
  } *aIndex;
  int nMaxPendingData;            /* Max pending data before flush to disk */







|
<

|
>
|
<
<
<





>
>
>
>
>
>







209
210
211
212
213
214
215
216

217
218
219
220



221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
  u8 bHasStat;                    /* True if %_stat table exists */
  u8 bHasDocsize;                 /* True if %_docsize table exists */
  u8 bDescIdx;                    /* True if doclists are in reverse order */
  int nPgsz;                      /* Page size for host database */
  char *zSegmentsTbl;             /* Name of %_segments table */
  sqlite3_blob *pSegments;        /* Blob handle open on %_segments table */

  /* 

  ** The following array of hash tables is used to buffer pending index 
  ** updates during transactions. All pending updates buffered at any one
  ** time must share a common language-id (see the FTS4 langid= feature).
  ** The current language id is stored in variable iPrevLangid.



  **
  ** A single FTS4 table may have multiple full-text indexes. For each index
  ** there is an entry in the aIndex[] array. Index 0 is an index of all the
  ** terms that appear in the document set. Each subsequent index in aIndex[]
  ** is an index of prefixes of a specific length.
  **
  ** Variable nPendingData contains an estimate the memory consumed by the 
  ** pending data structures, including hash table overhead, but not including
  ** malloc overhead.  When nPendingData exceeds nMaxPendingData, all hash
  ** tables are flushed to disk. Variable iPrevDocid is the docid of the most 
  ** recently inserted record.
  */
  int nIndex;                     /* Size of aIndex[] */
  struct Fts3Index {
    int nPrefix;                  /* Prefix length (0 for main terms index) */
    Fts3Hash hPending;            /* Pending terms table for this index */
  } *aIndex;
  int nMaxPendingData;            /* Max pending data before flush to disk */

Changes to ext/fts3/fts3_write.c.

61
62
63
64
65
66
67
















68
69
70
71
72
73
74
...
368
369
370
371
372
373
374

375
376
377
378
379
380
381
...
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
....
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
....
3254
3255
3256
3257
3258
3259
3260

3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272

3273
3274
3275
3276
3277

3278
3279
3280
3281
3282
3283
3284
....
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
....
3316
3317
3318
3319
3320
3321
3322




3323
3324
3325
3326
3327
3328




3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
....
3352
3353
3354
3355
3356
3357
3358









3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371










3372
3373
3374
3375
3376
3377
3378
....
3400
3401
3402
3403
3404
3405
3406



3407
3408
3409
3410
3411
3412




3413
3414
3415
3416
3417
3418
3419
....
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515










































































3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531



3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615









3616
3617
3618
3619
3620
3621














3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634

3635
3636

3637
3638

3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661








3662
3663
3664
3665
3666
3667
3668
....
3669
3670
3671
3672
3673
3674
3675












3676
3677

3678
3679

3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692




3693








3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714


3715
3716
3717
3718
3719
3720
3721
....
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
....
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
....
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
....
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931




3932
3933
3934
3935
3936

3937
3938

3939
3940
3941
3942
3943
3944
3945
3946
3947
....
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
....
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067









4068
4069
4070
4071
4072
4073
4074
....
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
....
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
....
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
int test_fts3_node_chunk_threshold = (4*1024)*4;
# define FTS3_NODE_CHUNKSIZE       test_fts3_node_chunksize
# define FTS3_NODE_CHUNK_THRESHOLD test_fts3_node_chunk_threshold
#else
# define FTS3_NODE_CHUNKSIZE (4*1024) 
# define FTS3_NODE_CHUNK_THRESHOLD (FTS3_NODE_CHUNKSIZE*4)
#endif

















typedef struct PendingList PendingList;
typedef struct SegmentNode SegmentNode;
typedef struct SegmentWriter SegmentWriter;

/*
** An instance of the following data structure is used to build doclists
................................................................................
    for(i=0; rc==SQLITE_OK && i<nParam; i++){
      rc = sqlite3_bind_value(pStmt, i+1, apVal[i]);
    }
  }
  *pp = pStmt;
  return rc;
}


static int fts3SelectDocsize(
  Fts3Table *pTab,                /* FTS3 table handle */
  int eStmt,                      /* Either SQL_SELECT_DOCSIZE or DOCTOTAL */
  sqlite3_int64 iDocid,           /* Docid to bind for SQL_SELECT_DOCSIZE */
  sqlite3_stmt **ppStmt           /* OUT: Statement handle */
){
................................................................................
  assert( iLangid>=0 );
  assert( p->nIndex>0 );
  assert( iIndex>=0 && iIndex<p->nIndex );

  iBase = ((sqlite3_int64)iLangid * p->nIndex + iIndex) * FTS3_SEGDIR_MAXLEVEL;
  return iBase + iLevel;
}

/*
** Given an absolute level number, determine the langauge-id, index
** and relative level that it corresponds to.
**
** The return value is the relative level. The language-id and index
** are returned via output variables.
*/
static int getRelativeLevel(
  Fts3Table *p,                   /* FTS table handle */
  sqlite3_int64 iAbsLevel,        /* Absolute level */
  int *piLangid,                  /* OUT: Language id */
  int *piIndex                    /* OUT: Index in p->aIndex[] */
){
  if( piLangid ) *piLangid = (iAbsLevel / FTS3_SEGDIR_MAXLEVEL) / p->nIndex;
  if( piIndex ) *piIndex = (iAbsLevel / FTS3_SEGDIR_MAXLEVEL) % p->nIndex;
  return iAbsLevel % FTS3_SEGDIR_MAXLEVEL;
}


/*
** Set *ppStmt to a statement handle that may be used to iterate through
** all rows in the %_segdir table, from oldest to newest. If successful,
** return SQLITE_OK. If an error occurs while preparing the statement, 
** return an SQLite error code.
**
................................................................................
  if( rc==SQLITE_OK ){
    /* If iNext is FTS3_MERGE_COUNT, indicating that level iLevel is already
    ** full, merge all segments in level iLevel into a single iLevel+1
    ** segment and allocate (newly freed) index 0 at level iLevel. Otherwise,
    ** if iNext is less than FTS3_MERGE_COUNT, allocate index iNext.
    */
    if( iNext>=FTS3_MERGE_COUNT ){
sqlite3_log(SQLITE_OK, 
    "16-way merge at level=%d langid=%d index=%d", iLevel, iLangid, iIndex
);
      rc = fts3SegmentMerge(p, iLangid, iIndex, iLevel);
      *piIdx = 0;
    }else{
      *piIdx = iNext;
    }
  }

................................................................................
  return rc;
}


/*
** This function opens a cursor used to read the input data for an 
** incremental merge operation. Specifically, it opens a cursor to scan

** the oldest two segments (idx=0 and idx=1) in absolute level iAbsLevel.
*/
static int fts3IncrmergeCsr(
  Fts3Table *p,                   /* FTS3 table handle */
  sqlite3_int64 iAbsLevel,        /* Absolute level to open */
  int nSeg,                       /* Number of segments to merge */
  Fts3MultiSegReader *pCsr        /* Cursor object to populate */
){
  int rc;                         /* Return Code */
  sqlite3_stmt *pStmt = 0;        /* Statement used to read %_segdir entry */  
  int nByte;                      /* Bytes allocated at pCsr->apSegment[] */


  assert( nSeg>=2 );
  memset(pCsr, 0, sizeof(*pCsr));
  nByte = sizeof(Fts3SegReader *) * nSeg;

  pCsr->apSegment = (Fts3SegReader **)sqlite3_malloc(nByte);

  if( pCsr->apSegment==0 ){
    rc = SQLITE_NOMEM;
  }else{
    memset(pCsr->apSegment, 0, nByte);
    pCsr->nSegment = nSeg;
    rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0);
  }
................................................................................
    if( rc==SQLITE_OK ) rc = rc2;
  }

  return rc;
}

typedef struct IncrmergeWriter IncrmergeWriter;
typedef struct LayerWriter LayerWriter;
typedef struct Blob Blob;
typedef struct NodeReader NodeReader;

/*
** An instance of the following structure is used as a dynamic buffer
** to build up nodes or other blobs of data in.
**
................................................................................
*/
struct Blob {
  char *a;                        /* Pointer to allocation */
  int n;                          /* Number of valid bytes of data in a[] */
  int nAlloc;                     /* Allocated size of a[] (nAlloc>=n) */
};





struct LayerWriter {
  sqlite3_int64 iBlock;           /* Current block id */
  Blob key;                       /* Last key written to the current block */
  Blob block;                     /* Current block image */
};





struct IncrmergeWriter {
  int nLeafEst;                   /* Space allocated for leaf blocks */
  int nWork;                      /* Number of leaf pages flushed */
  sqlite3_int64 iAbsLevel;        /* Absolute level of input segments */
  int iIdx;                       /* Index of *output* segment in iAbsLevel+1 */
  sqlite3_int64 iStart;           /* Block number of first allocated block */
  sqlite3_int64 iEnd;             /* Block number of last allocated block */
  LayerWriter aLayer[FTS_MAX_APPENDABLE_HEIGHT];
};

/*
** An object of the following type is used to read data from a single
** FTS segment node. See the following functions:
**
**     nodeReaderInit()
................................................................................
  /* Output variables. Containing the current node entry. */
  sqlite3_int64 iChild;           /* Pointer to child node */
  Blob term;                      /* Current term */
  const char *aDoclist;           /* Pointer to doclist */
  int nDoclist;                   /* Size of doclist in bytes */
};










static void blobGrowBuffer(Blob *pBlob, int nMin, int *pRc){
  if( *pRc==SQLITE_OK && nMin>pBlob->nAlloc ){
    int nAlloc = nMin;
    char *a = (char *)sqlite3_realloc(pBlob->a, nAlloc);
    if( a ){
      pBlob->nAlloc = nAlloc;
      pBlob->a = a;
    }else{
      *pRc = SQLITE_NOMEM;
    }
  }
}











static int nodeReaderNext(NodeReader *p){
  int bFirst = (p->term.n==0);    /* True for first term on the node */
  int nPrefix = 0;                /* Bytes to copy from previous term */
  int nSuffix = 0;                /* Bytes to append to the prefix */
  int rc = SQLITE_OK;             /* Return code */

  assert( p->aNode );
................................................................................
  }

  assert( p->iOff<=p->nNode );

  return rc;
}




static void nodeReaderRelease(NodeReader *p){
  sqlite3_free(p->term.a);
}

/*
** Initialize a node-reader object.




*/
static int nodeReaderInit(NodeReader *p, const char *aNode, int nNode){
  memset(p, 0, sizeof(NodeReader));
  p->aNode = aNode;
  p->nNode = nNode;

  /* Figure out if this is a leaf or an internal node. */
................................................................................
** This function is called while writing an FTS segment each time a leaf o
** node is finished and written to disk. The key (zTerm/nTerm) is guaranteed
** to be greater than the largest key on the node just written, but smaller
** than or equal to the first key that will be written to the next leaf
** node.
**
** The block id of the leaf node just written to disk may be found in
** (pWriter->aLayer[0].iBlock) when this function is called.
*/
static int fts3IncrmergePush(
  Fts3Table *p,                   /* Fts3 table handle */
  IncrmergeWriter *pWriter,       /* Writer object */
  const char *zTerm,              /* Term to write to internal node */
  int nTerm                       /* Bytes at zTerm */
){
  sqlite3_int64 iPtr = pWriter->aLayer[0].iBlock;
  int iLayer;

  assert( nTerm>0 );
  for(iLayer=1; ALWAYS(iLayer<FTS_MAX_APPENDABLE_HEIGHT); iLayer++){
    sqlite3_int64 iNextPtr = 0;
    LayerWriter *pLayer = &pWriter->aLayer[iLayer];
    int rc = SQLITE_OK;
    int nPrefix;
    int nSuffix;
    int nSpace;

    /* Figure out how much space the key will consume if it is written to
    ** the current node of layer iLayer. Due to the prefix compression, 
    ** the space required changes depending on which node the key is to
    ** be added to.  */
    nPrefix = fts3PrefixCompress(pLayer->key.a, pLayer->key.n, zTerm, nTerm);
    nSuffix = nTerm - nPrefix;
    nSpace  = sqlite3Fts3VarintLen(nPrefix);
    nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix;

    if( pLayer->key.n==0 || (pLayer->block.n + nSpace)<=p->nNodeSize ){ 
      /* If the current node of layer iLayer contains zero keys, or if adding
      ** the key to it will not cause it to grow to larger than nNodeSize 
      ** bytes in size, write the key here.  */

      Blob *pBlk = &pLayer->block;
      if( pBlk->n==0 ){
        blobGrowBuffer(pBlk, p->nNodeSize, &rc);
        if( rc==SQLITE_OK ){
          pBlk->a[0] = (char)iLayer;
          pBlk->n = 1 + sqlite3Fts3PutVarint(&pBlk->a[1], iPtr);
        }
      }
      blobGrowBuffer(pBlk, pBlk->n + nSpace, &rc);
      blobGrowBuffer(&pLayer->key, nTerm, &rc);

      if( rc==SQLITE_OK ){
        if( pLayer->key.n ){
          pBlk->n += sqlite3Fts3PutVarint(&pBlk->a[pBlk->n], nPrefix);
        }
        pBlk->n += sqlite3Fts3PutVarint(&pBlk->a[pBlk->n], nSuffix);
        memcpy(&pBlk->a[pBlk->n], &zTerm[nPrefix], nSuffix);
        pBlk->n += nSuffix;

        memcpy(pLayer->key.a, zTerm, nTerm);
        pLayer->key.n = nTerm;
      }
    }else{
      /* Otherwise, flush the the current node of layer iLayer to disk.
      ** Then allocate a new, empty sibling node. The key will be written
      ** into the parent of this node. */
      rc = fts3WriteSegment(p, pLayer->iBlock, pLayer->block.a,pLayer->block.n);

      assert( pLayer->block.nAlloc>=p->nNodeSize );
      pLayer->block.a[0] = (char)iLayer;
      pLayer->block.n = 1 + sqlite3Fts3PutVarint(&pLayer->block.a[1], iPtr+1);

      iNextPtr = pLayer->iBlock;
      pLayer->iBlock++;
      pLayer->key.n = 0;
    }

    if( rc!=SQLITE_OK || iNextPtr==0 ) return rc;
    iPtr = iNextPtr;
  }

  assert( 0 );
}











































































static int fts3IncrmergeAppend(
  Fts3Table *p,                   /* Fts3 table handle */
  IncrmergeWriter *pWriter,       /* Writer object */
  Fts3MultiSegReader *pCsr        /* Cursor containing term and doclist */
){
  const char *zTerm = pCsr->zTerm;
  int nTerm = pCsr->nTerm;
  const char *aDoclist = pCsr->aDoclist;
  int nDoclist = pCsr->nDoclist;

  int rc = SQLITE_OK;           /* Return code */
  int nSpace;                   /* Total space in bytes required on leaf */
  int nPrefix;
  int nSuffix;
  LayerWriter *pLayer;
  Blob *pPg;




  pLayer = &pWriter->aLayer[0];
  nPrefix = fts3PrefixCompress(pLayer->key.a, pLayer->key.n, zTerm, nTerm);
  nSuffix = nTerm - nPrefix;

  nSpace  = sqlite3Fts3VarintLen(nPrefix);
  nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix;
  nSpace += sqlite3Fts3VarintLen(nDoclist) + nDoclist;

  /* If the current block is not empty, and if adding this term/doclist
  ** to the current block would make it larger than Fts3Table.nNodeSize
  ** bytes, write this block out to the database. */
  if( pLayer->block.n>0 && (pLayer->block.n + nSpace)>p->nNodeSize ){
    rc = fts3WriteSegment(p, pLayer->iBlock, pLayer->block.a, pLayer->block.n);
    pWriter->nWork++;

    /* Add the current term to the parent node. The term added to the 
    ** parent must:
    **
    **   a) be greater than the largest term on the leaf node just written
    **      to the database (still available in pLayer->key), and
    **
    **   b) be less than or equal to the term about to be added to the new
    **      leaf node (zTerm/nTerm).
    **
    ** In other words, it must be the prefix of zTerm 1 byte longer than
    ** the common prefix (if any) of zTerm and pWriter->zTerm.
    */
    if( rc==SQLITE_OK ){
      rc = fts3IncrmergePush(p, pWriter, zTerm, nPrefix+1);
    }

    /* Advance to the next output block */
    pLayer->iBlock++;
    pLayer->key.n = 0;
    pLayer->block.n = 0;

    nPrefix = 0;
    nSuffix = nTerm;
    nSpace  = 1;
    nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix;
    nSpace += sqlite3Fts3VarintLen(nDoclist) + nDoclist;
  }

  blobGrowBuffer(&pLayer->key, nTerm, &rc);
  blobGrowBuffer(&pLayer->block, pLayer->block.n + nSpace, &rc);

  if( rc==SQLITE_OK ){
    /* Update the block image with the new entry */
    pPg = &pLayer->block;
    pPg->n += sqlite3Fts3PutVarint(&pPg->a[pPg->n], nPrefix);
    pPg->n += sqlite3Fts3PutVarint(&pPg->a[pPg->n], nSuffix);
    memcpy(&pPg->a[pPg->n], &zTerm[nPrefix], nSuffix);
    pPg->n += nSuffix;
    pPg->n += sqlite3Fts3PutVarint(&pPg->a[pPg->n], nDoclist);
    memcpy(&pPg->a[pPg->n], aDoclist, nDoclist);
    pPg->n += nDoclist;

    /* Take a copy of the key just written */
    memcpy(pLayer->key.a, zTerm, nTerm);
    pLayer->key.n = nTerm;
  }

  return rc;
}

static void fts3IncrmergeRelease(
  Fts3Table *p,
  IncrmergeWriter *pWriter,
  int *pRc
){
  int i;                          /* Used to iterate through non-root layers */
  int iRoot;
  LayerWriter *pRoot;
  int rc = *pRc;

  /* Find the root node */
  for(iRoot=FTS_MAX_APPENDABLE_HEIGHT-1; iRoot>=0; iRoot--){
    LayerWriter *pLayer = &pWriter->aLayer[iRoot];
    if( pLayer->block.n>0 ) break;
    assert( *pRc || pLayer->block.nAlloc==0 );
    assert( *pRc || pLayer->key.nAlloc==0 );
    sqlite3_free(pLayer->block.a);
    sqlite3_free(pLayer->key.a);









  }

  /* Empty output segment. This is a no-op. */
  if( iRoot<0 ) return;

  /* The entire output segment fits on the root node. This is not allowed. */














  if( iRoot==0 ){
    Blob *pBlock = &pWriter->aLayer[1].block;
    blobGrowBuffer(pBlock, 1 + FTS3_VARINT_MAX, &rc);
    if( rc==SQLITE_OK ){
      pBlock->a[0] = 0x01;
      pBlock->n = 1 + sqlite3Fts3PutVarint(
          &pBlock->a[1], pWriter->aLayer[0].iBlock
      );
    }
    iRoot = 1;
  }
  pRoot = &pWriter->aLayer[iRoot];


  for(i=0; i<iRoot; i++){
    LayerWriter *pLayer = &pWriter->aLayer[i];

    if( pLayer->block.n>0 && rc==SQLITE_OK ){
      rc = fts3WriteSegment(p, pLayer->iBlock, pLayer->block.a,pLayer->block.n);

    }
    sqlite3_free(pLayer->block.a);
    sqlite3_free(pLayer->key.a);
  }

  /* Write the %_segdir record. */
  if( rc==SQLITE_OK ){
    rc = fts3WriteSegdir(p, 
        pWriter->iAbsLevel+1,               /* level */
        pWriter->iIdx,                      /* idx */
        pWriter->iStart,                    /* start_block */
        pWriter->aLayer[0].iBlock,          /* leaves_end_block */
        pWriter->iEnd,                      /* end_block */
        pRoot->block.a, pRoot->block.n      /* root */
    );
  }
  sqlite3_free(pRoot->block.a);
  sqlite3_free(pRoot->key.a);

  *pRc = rc;
}










static int fts3TermCmp(
  const char *zLhs, int nLhs,     /* LHS of comparison */
  const char *zRhs, int nRhs      /* RHS of comparison */
){
  int nCmp = MIN(nLhs, nRhs);
  int res;

................................................................................
  res = memcmp(zLhs, zRhs, nCmp);
  if( res==0 ) res = nLhs - nRhs;

  return res;
}














static int fts3IsAppendable(Fts3Table *p, sqlite3_int64 iEnd, int *pbRes){
  int bRes = 0;

  sqlite3_stmt *pCheck = 0;
  int rc;


  rc = fts3SqlStmt(p, SQL_SEGMENT_IS_APPENDABLE, &pCheck, 0);
  if( rc==SQLITE_OK ){
    sqlite3_bind_int64(pCheck, 1, iEnd);
    if( SQLITE_ROW==sqlite3_step(pCheck) ) bRes = 1;
    rc = sqlite3_reset(pCheck);
  }
  
  *pbRes = bRes;
  return rc;
}

/*




**








*/
static int fts3IncrmergeLoad(
  Fts3Table *p,                   /* Fts3 table handle */
  sqlite3_int64 iAbsLevel,        /* Absolute level of input segments */
  int iIdx,                       /* Index of candidate output segment */
  const char *zKey,               /* First key to write */
  int nKey,                       /* Number of bytes in nKey */
  IncrmergeWriter *pWriter        /* Populate this object */
){
  sqlite3_int64 iStart = 0;
  sqlite3_int64 iLeafEnd = 0;
  sqlite3_int64 iEnd = 0;
  const char *aRoot = 0;
  int nRoot = 0;
  int rc;

  sqlite3_stmt *pSelect = 0;
  rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR, &pSelect, 0);
  if( rc==SQLITE_OK ){
    int rc2;
    int bAppendable = 0;


    sqlite3_bind_int64(pSelect, 1, iAbsLevel+1);
    sqlite3_bind_int(pSelect, 2, iIdx);
    if( sqlite3_step(pSelect)==SQLITE_ROW ){
      iStart = sqlite3_column_int64(pSelect, 1);
      iLeafEnd = sqlite3_column_int64(pSelect, 2);
      iEnd = sqlite3_column_int64(pSelect, 3);
      nRoot = sqlite3_column_bytes(pSelect, 4);
................................................................................
    }

    if( rc==SQLITE_OK && bAppendable ){
      /* It is possible to append to this segment. Set up the IncrmergeWriter
      ** object to do so.  */
      int i;
      int nHeight = (int)aRoot[0];
      LayerWriter *pLayer;

      pWriter->nLeafEst = ((iEnd - iStart) + 1) / FTS_MAX_APPENDABLE_HEIGHT;
      pWriter->iStart = iStart;
      pWriter->iEnd = iEnd;
      pWriter->iAbsLevel = iAbsLevel;
      pWriter->iIdx = iIdx;

      for(i=nHeight+1; i<FTS_MAX_APPENDABLE_HEIGHT; i++){
        pWriter->aLayer[i].iBlock = pWriter->iStart + i*pWriter->nLeafEst;
      }

      pLayer = &pWriter->aLayer[nHeight];
      pLayer->iBlock = pWriter->iStart + pWriter->nLeafEst*nHeight;
      blobGrowBuffer(&pLayer->block, MAX(nRoot, p->nNodeSize), &rc);
      if( rc==SQLITE_OK ){
        memcpy(pLayer->block.a, aRoot, nRoot);
        pLayer->block.n = nRoot;
      }

      for(i=nHeight; i>=0 && rc==SQLITE_OK; i--){
        pLayer = &pWriter->aLayer[i];
        NodeReader reader;

        rc = nodeReaderInit(&reader, pLayer->block.a, pLayer->block.n);
        while( reader.aNode && rc==SQLITE_OK ) rc = nodeReaderNext(&reader);
        blobGrowBuffer(&pLayer->key, reader.term.n, &rc);
        if( rc==SQLITE_OK ){
          memcpy(pLayer->key.a, reader.term.a, reader.term.n);
          pLayer->key.n = reader.term.n;
          if( i>0 ){
            char *aBlock = 0;
            int nBlock = 0;
            pLayer = &pWriter->aLayer[i-1];
            pLayer->iBlock = reader.iChild;
            rc = sqlite3Fts3ReadBlock(p, reader.iChild, &aBlock, &nBlock, 0);
            blobGrowBuffer(&pLayer->block, MAX(nBlock, p->nNodeSize), &rc);
            if( rc==SQLITE_OK ){
              memcpy(pLayer->block.a, aBlock, nBlock);
              pLayer->block.n = nBlock;
            }
            sqlite3_free(aBlock);
          }
        }
        nodeReaderRelease(&reader);
      }
    }
................................................................................
  }

  return rc;
}

/* 
** Either allocate an output segment or locate an existing appendable 
** output segment to append to. And "appendable" output segment is
** slightly different to a normal one, as the required range of keys in
** the %_segments table must be allocated up front. This requires some
** assumptions:
**
**   * It is expected that due to the short-keys used, and the prefix and
**     suffix compression, the fanout of segment b-trees will be very high.
**     With a conservative assumption of 32 bytes per key and 1024 byte
**     pages, say 32 (2^5). Since SQLite database files are limited to 
**     a total of 2^31 pages in size, it seems very likely that no segment
**     b-tree will have more than ten layers of nodes (including the 
**     leaves).
**
**   * Since each interior node has a pointer to at least two child nodes,
**     each layer of interior nodes must be smaller than the layer of
**     leaf nodes.
**
** In the %_segdir table, a segment is defined by the values in three
** columns:
**
**     start_block
**     leaves_end_block
**     end_block
**
** When an appendable segment is allocated, it is estimated that the
** maximum number of leaf blocks that may be required is the sum of the
** number of leaf blocks consumed by the two input segments multiplied
** by three. If an input segment consists of a root node only, treat it
** as if it has a single leaf node for the purposes of this estimate.
** This value is stored in stack variable nLeafEst.
**
** A total of 10*nLeafEst blocks are allocated when an appendable segment
** is created ((1 + end_block - start_block)==10*nLeafEst). The contiguous
** array of leaf nodes starts at the first block allocated. The array
** of interior nodes that are parents of the leaf nodes start at block
** (start_block + (1 + end_block - start_block) / 10). And so on.
**
** In the actual code below, the value "10" is replaced with the 
** pre-processor macro FTS_MAX_APPENDABLE_HEIGHT.
*/
static int fts3IncrmergeWriter( 
  Fts3Table *p,                   /* Fts3 table handle */
  sqlite3_int64 iAbsLevel,        /* Absolute level of input segments */
  Fts3MultiSegReader *pCsr,       /* Cursor that data will be read from */
  IncrmergeWriter *pWriter        /* Populate this object */
................................................................................
  int rc;                         /* Return Code */
  int i;                          /* Iterator variable */
  int nLeafEst;                   /* Blocks allocated for leaf nodes */
  int iIdx;                       /* Index of output segment */
  sqlite3_stmt *pLeafEst = 0;     /* SQL used to determine nLeafEst */
  sqlite3_stmt *pFirstBlock = 0;  /* SQL used to determine first block */
  sqlite3_stmt *pOutputIdx = 0;   /* SQL used to find output index */
  const char *zKey = pCsr->zTerm;
  int nKey = pCsr->nTerm;

  rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pOutputIdx, 0);
  if( rc==SQLITE_OK ){
    sqlite3_bind_int64(pOutputIdx, 1, iAbsLevel+1);
    sqlite3_step(pOutputIdx);
    iIdx = sqlite3_column_int(pOutputIdx, 0) - 1;
    rc = sqlite3_reset(pOutputIdx);
................................................................................
  rc = fts3WriteSegment(p, pWriter->iEnd, 0, 0);
  if( rc!=SQLITE_OK ) return rc;

  pWriter->iAbsLevel = iAbsLevel;
  pWriter->nLeafEst = nLeafEst;
  pWriter->iIdx = iIdx;

  /* Set up the array of LayerWriter objects */
  for(i=0; i<FTS_MAX_APPENDABLE_HEIGHT; i++){
    pWriter->aLayer[i].iBlock = pWriter->iStart + i*pWriter->nLeafEst;
  }
  return SQLITE_OK;
}

/*
** Remove an entry from the %_segdir table. This involves running the 
** following two statements:
**
**   DELETE FROM %_segdir WHERE level = :iAbsLevel AND idx = :iIdx
**   UPDATE %_segdir SET idx = idx - 1 WHERE level = :iAbsLevel AND idx > :iIdx




*/
static int fts3RemoveSegdirEntry(
  Fts3Table *p, 
  sqlite3_int64 iAbsLevel, 
  int iIdx

){
  int rc;

  sqlite3_stmt *pDelete = 0;
  sqlite3_stmt *pUpdate = 0;

  rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_ENTRY, &pDelete, 0);
  if( rc==SQLITE_OK ){
    sqlite3_bind_int64(pDelete, 1, iAbsLevel);
    sqlite3_bind_int(pDelete, 2, iIdx);
    sqlite3_step(pDelete);
    rc = sqlite3_reset(pDelete);
................................................................................
    sqlite3_step(pUpdate);
    rc = sqlite3_reset(pUpdate);
  }

  return rc;
}

static int fts3AppendToNode(
  Blob *pNode,
  Blob *pPrev, 
  const char *zTerm,
  int nTerm,
  const char *aDoclist,
  int nDoclist
){
  int rc = SQLITE_OK;
  int bFirst = (pPrev->n==0);
  int nPrefix;
  int nSuffix;

  blobGrowBuffer(pPrev, nTerm, &rc);
  if( rc!=SQLITE_OK ) return rc;

  nPrefix = fts3PrefixCompress(pPrev->a, pPrev->n, zTerm, nTerm);
  nSuffix = nTerm - nPrefix;
  memcpy(pPrev->a, zTerm, nTerm);
  pPrev->n = nTerm;

  if( bFirst==0 ){
    pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nPrefix);
  }
  pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nSuffix);
  memcpy(&pNode->a[pNode->n], &zTerm[nPrefix], nSuffix);
  pNode->n += nSuffix;

  if( aDoclist ){
    pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nDoclist);
    memcpy(&pNode->a[pNode->n], aDoclist, nDoclist);
    pNode->n += nDoclist;
  }

  return SQLITE_OK;
}

static void fts3StartNode(Blob *pNode, int iHeight, sqlite3_int64 iChild){
  pNode->a[0] = (char)iHeight;
  if( iChild ){
    assert( pNode->nAlloc>=1+sqlite3Fts3VarintLen(iChild) );
    pNode->n = 1 + sqlite3Fts3PutVarint(&pNode->a[1], iChild);
  }else{
    assert( pNode->nAlloc>=1 );
................................................................................
  int nNode,                      /* Size of aNode in bytes */
  Blob *pNew,                     /* OUT: Write new node image here */
  const char *zTerm,              /* Omit all terms smaller than this */
  int nTerm,                      /* Size of zTerm in bytes */
  sqlite3_int64 *piBlock          /* OUT: Block number in next layer down */
){
  NodeReader reader;              /* Reader object */
  Blob prev = {0, 0, 0};
  int rc = SQLITE_OK;
  int bStarted = 0;
  int bLeaf = aNode[0]=='\0';     /* True for a leaf node */

  /* Allocate required output space */
  blobGrowBuffer(pNew, nNode, &rc);
  if( rc!=SQLITE_OK ) return rc;
  pNew->n = 0;

  /* Populate new node buffer */
  for(rc = nodeReaderInit(&reader, aNode, nNode); 
      rc==SQLITE_OK && reader.aNode; 
      rc = nodeReaderNext(&reader)
  ){
    if( bStarted==0 ){
      int res = fts3TermCmp(reader.term.a, reader.term.n, zTerm, nTerm);
      if( res<0 || (bLeaf==0 && res==0) ) continue;
      pNew->a[0] = aNode[0];
      fts3StartNode(pNew, (int)aNode[0], reader.iChild);
      *piBlock = reader.iChild;
      bStarted = 1;
    }
    rc = fts3AppendToNode(
        pNew, &prev, reader.term.a, reader.term.n,
        reader.aDoclist, reader.nDoclist
    );
    if( rc!=SQLITE_OK ) break;
  }
  if( bStarted==0 ){
    fts3StartNode(pNew, (int)aNode[0], reader.iChild);
    *piBlock = reader.iChild;
  }
  assert( pNew->n<=pNew->nAlloc );

  nodeReaderRelease(&reader);
  sqlite3_free(prev.a);
  return rc;
}










static int fts3TruncateSegment(
  Fts3Table *p,                   /* FTS3 table handle */
  sqlite3_int64 iAbsLevel,        /* Absolute level of segment to modify */
  int iIdx,                       /* Index within level of segment to modify */
  const char *zTerm,              /* Remove terms smaller than this */
  int nTerm                       /* Number of bytes in buffer zTerm */
){
................................................................................
**
** Each input segment is either removed from the db completely (if all of
** its data was copied to the output segment by the incrmerge operation)
** or modified in place so that it no longer contains those entries that
** have been duplicated in the output segment.
*/
static int fts3IncrmergeChomp(
  Fts3Table *p,
  sqlite3_int64 iAbsLevel,
  Fts3MultiSegReader *pCsr
){
  int i;
  int rc = SQLITE_OK;

  for(i=pCsr->nSegment-1; i>=0 && rc==SQLITE_OK; i--){
    Fts3SegReader *pSeg = 0;
    int j;
................................................................................
    }
  }

  return rc;
}

/*
** Attempt an incremental merge that writes nMerge leaf pages.
**
** Incremental merges happen two segments at a time. The two
** segments to be merged are the two oldest segments (the ones with
** the smallest index) in the highest level that has at least
** nMin segments. Multiple segment pair merges might occur in
** an attempt to write the quota of nMerge leaf pages.
*/
static int fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){
  int rc = SQLITE_OK;             /* Return code */
  int nRem = nMerge;              /* Number of leaf pages yet to  be written */

  assert( nMin>=2 );

................................................................................
    pFilter = (Fts3SegFilter *)&pWriter[1];
    pCsr = (Fts3MultiSegReader *)&pFilter[1];

    /* Open a cursor to iterate through the contents of indexes 0 and 1 of
    ** the selected absolute level. */
    pFilter->flags = FTS3_SEGMENT_REQUIRE_POS;
    rc = fts3IncrmergeCsr(p, iAbsLevel, nMin, pCsr);
sqlite3_log(SQLITE_OK, "%d-way merge from level=%d to level=%d", 
    nMin, (int)iAbsLevel, (int)iAbsLevel+1
);
    if( rc==SQLITE_OK ){
      rc = sqlite3Fts3SegReaderStart(p, pCsr, pFilter);
    }
    if( rc==SQLITE_OK ){
      if( SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pCsr)) ){
        rc = fts3IncrmergeWriter(p, iAbsLevel, pCsr, pWriter);
        if( rc==SQLITE_OK ){







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







>







 







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







 







|
<
<







 







>
|











>



<

>







 







|







 







>
>
>
>
|





>
>
>
>







|







 







>
>
>
>
>
>
>
>
>













>
>
>
>
>
>
>
>
>
>







 







>
>
>





|
>
>
>
>







 







|







|





|









|




|




|








|


|






|
|





|

|
|
|

|
|
|









>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>









<


<
<
<
<
>
>
>

|
|









|
|






|












|
|
|








|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>
>
>
>
>
>
>
>
>





|
>
>
>
>
>
>
>
>
>
>
>
>
>
>

|




|




|

>

<
>
|
<
>

|
|








|










<
>
>
>
>
>
>
>
>







 







>
>
>
>
>
>
>
>
>
>
>
>

<
>
|
<
>













>
>
>
>

>
>
>
>
>
>
>
>









|
|
|
|
|
|
|
|
|
|
|
|
>
>







 







|








|


|
|
|

|
|



|


|

|

|
|



|
|

|

|
|







 







|

|
<
<
<
<
<
<
<
<
<
<
<
<
<










|
|
<
|

|
|


|

|







 







|
|







 







|

|










>
>
>
>


|
|
<
>

<
>
|
|







 







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







 







|
|
<












|


<


<







|










>
>
>
>
>
>
>
>
>







 







|
|
|







 







|

|
|
|
|
|







 







|
<
<







61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
...
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
...
531
532
533
534
535
536
537



















538
539
540
541
542
543
544
....
1097
1098
1099
1100
1101
1102
1103
1104


1105
1106
1107
1108
1109
1110
1111
....
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273

3274
3275
3276
3277
3278
3279
3280
3281
3282
....
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
....
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
....
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
....
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
....
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630

3631
3632




3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763

3764
3765

3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788

3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
....
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823

3824
3825

3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
....
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
....
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977













3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989

3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
....
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
....
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086

4087
4088

4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
....
4107
4108
4109
4110
4111
4112
4113





































4114
4115
4116
4117
4118
4119
4120
....
4135
4136
4137
4138
4139
4140
4141
4142
4143

4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158

4159
4160

4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
....
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
....
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
....
4352
4353
4354
4355
4356
4357
4358
4359


4360
4361
4362
4363
4364
4365
4366
int test_fts3_node_chunk_threshold = (4*1024)*4;
# define FTS3_NODE_CHUNKSIZE       test_fts3_node_chunksize
# define FTS3_NODE_CHUNK_THRESHOLD test_fts3_node_chunk_threshold
#else
# define FTS3_NODE_CHUNKSIZE (4*1024) 
# define FTS3_NODE_CHUNK_THRESHOLD (FTS3_NODE_CHUNKSIZE*4)
#endif


/*
** If FTS_LOG_MERGES is defined, call sqlite3_log() to report each automatic
** and incremental merge operation that takes place. This is used for 
** debugging FTS only, it should not usually be turned on in production
** systems.
*/
#ifdef FTS3_LOG_MERGES
static void fts3LogMerge(int nMerge, sqlite3_int64 iAbsLevel){
  sqlite3_log(SQLITE_OK, "%d-way merge from level %d", nMerge, (int)iAbsLevel);
}
#else
#define fts3LogMerge(x, y)
#endif


typedef struct PendingList PendingList;
typedef struct SegmentNode SegmentNode;
typedef struct SegmentWriter SegmentWriter;

/*
** An instance of the following data structure is used to build doclists
................................................................................
    for(i=0; rc==SQLITE_OK && i<nParam; i++){
      rc = sqlite3_bind_value(pStmt, i+1, apVal[i]);
    }
  }
  *pp = pStmt;
  return rc;
}


static int fts3SelectDocsize(
  Fts3Table *pTab,                /* FTS3 table handle */
  int eStmt,                      /* Either SQL_SELECT_DOCSIZE or DOCTOTAL */
  sqlite3_int64 iDocid,           /* Docid to bind for SQL_SELECT_DOCSIZE */
  sqlite3_stmt **ppStmt           /* OUT: Statement handle */
){
................................................................................
  assert( iLangid>=0 );
  assert( p->nIndex>0 );
  assert( iIndex>=0 && iIndex<p->nIndex );

  iBase = ((sqlite3_int64)iLangid * p->nIndex + iIndex) * FTS3_SEGDIR_MAXLEVEL;
  return iBase + iLevel;
}




















/*
** Set *ppStmt to a statement handle that may be used to iterate through
** all rows in the %_segdir table, from oldest to newest. If successful,
** return SQLITE_OK. If an error occurs while preparing the statement, 
** return an SQLite error code.
**
................................................................................
  if( rc==SQLITE_OK ){
    /* If iNext is FTS3_MERGE_COUNT, indicating that level iLevel is already
    ** full, merge all segments in level iLevel into a single iLevel+1
    ** segment and allocate (newly freed) index 0 at level iLevel. Otherwise,
    ** if iNext is less than FTS3_MERGE_COUNT, allocate index iNext.
    */
    if( iNext>=FTS3_MERGE_COUNT ){
      fts3LogMerge(16, getAbsoluteLevel(iLevel, iLangid, iIndex));


      rc = fts3SegmentMerge(p, iLangid, iIndex, iLevel);
      *piIdx = 0;
    }else{
      *piIdx = iNext;
    }
  }

................................................................................
  return rc;
}


/*
** This function opens a cursor used to read the input data for an 
** incremental merge operation. Specifically, it opens a cursor to scan
** the oldest nSeg segments (idx=0 through idx=(nSeg-1)) in absolute 
** level iAbsLevel.
*/
static int fts3IncrmergeCsr(
  Fts3Table *p,                   /* FTS3 table handle */
  sqlite3_int64 iAbsLevel,        /* Absolute level to open */
  int nSeg,                       /* Number of segments to merge */
  Fts3MultiSegReader *pCsr        /* Cursor object to populate */
){
  int rc;                         /* Return Code */
  sqlite3_stmt *pStmt = 0;        /* Statement used to read %_segdir entry */  
  int nByte;                      /* Bytes allocated at pCsr->apSegment[] */

  /* Allocate space for the Fts3MultiSegReader.aCsr[] array */
  assert( nSeg>=2 );
  memset(pCsr, 0, sizeof(*pCsr));
  nByte = sizeof(Fts3SegReader *) * nSeg;

  pCsr->apSegment = (Fts3SegReader **)sqlite3_malloc(nByte);

  if( pCsr->apSegment==0 ){
    rc = SQLITE_NOMEM;
  }else{
    memset(pCsr->apSegment, 0, nByte);
    pCsr->nSegment = nSeg;
    rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0);
  }
................................................................................
    if( rc==SQLITE_OK ) rc = rc2;
  }

  return rc;
}

typedef struct IncrmergeWriter IncrmergeWriter;
typedef struct NodeWriter NodeWriter;
typedef struct Blob Blob;
typedef struct NodeReader NodeReader;

/*
** An instance of the following structure is used as a dynamic buffer
** to build up nodes or other blobs of data in.
**
................................................................................
*/
struct Blob {
  char *a;                        /* Pointer to allocation */
  int n;                          /* Number of valid bytes of data in a[] */
  int nAlloc;                     /* Allocated size of a[] (nAlloc>=n) */
};

/*
** This structure is used to build up buffers containing segment b-tree 
** nodes (blocks).
*/
struct NodeWriter {
  sqlite3_int64 iBlock;           /* Current block id */
  Blob key;                       /* Last key written to the current block */
  Blob block;                     /* Current block image */
};

/*
** An object of this type contains the state required to create or append
** to an appendable b-tree segment.
*/
struct IncrmergeWriter {
  int nLeafEst;                   /* Space allocated for leaf blocks */
  int nWork;                      /* Number of leaf pages flushed */
  sqlite3_int64 iAbsLevel;        /* Absolute level of input segments */
  int iIdx;                       /* Index of *output* segment in iAbsLevel+1 */
  sqlite3_int64 iStart;           /* Block number of first allocated block */
  sqlite3_int64 iEnd;             /* Block number of last allocated block */
  NodeWriter aNodeWriter[FTS_MAX_APPENDABLE_HEIGHT];
};

/*
** An object of the following type is used to read data from a single
** FTS segment node. See the following functions:
**
**     nodeReaderInit()
................................................................................
  /* Output variables. Containing the current node entry. */
  sqlite3_int64 iChild;           /* Pointer to child node */
  Blob term;                      /* Current term */
  const char *aDoclist;           /* Pointer to doclist */
  int nDoclist;                   /* Size of doclist in bytes */
};

/*
** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
** Otherwise, if the allocation at pBlob->a is not already at least nMin
** bytes in size, extend (realloc) it to be so.
**
** If an OOM error occurs, set *pRc to SQLITE_NOMEM and leave pBlob->a
** unmodified. Otherwise, if the allocation succeeds, update pBlob->nAlloc
** to reflect the new size of the pBlob->a[] buffer.
*/
static void blobGrowBuffer(Blob *pBlob, int nMin, int *pRc){
  if( *pRc==SQLITE_OK && nMin>pBlob->nAlloc ){
    int nAlloc = nMin;
    char *a = (char *)sqlite3_realloc(pBlob->a, nAlloc);
    if( a ){
      pBlob->nAlloc = nAlloc;
      pBlob->a = a;
    }else{
      *pRc = SQLITE_NOMEM;
    }
  }
}

/*
** Attempt to advance the node-reader object passed as the first argument to
** the next entry on the node. 
**
** Return an error code if an error occurs (SQLITE_NOMEM is possible). 
** Otherwise return SQLITE_OK. If there is no next entry on the node
** (e.g. because the current entry is the last) set NodeReader->aNode to
** NULL to indicate EOF. Otherwise, populate the NodeReader structure output 
** variables for the new entry.
*/
static int nodeReaderNext(NodeReader *p){
  int bFirst = (p->term.n==0);    /* True for first term on the node */
  int nPrefix = 0;                /* Bytes to copy from previous term */
  int nSuffix = 0;                /* Bytes to append to the prefix */
  int rc = SQLITE_OK;             /* Return code */

  assert( p->aNode );
................................................................................
  }

  assert( p->iOff<=p->nNode );

  return rc;
}

/*
** Release all dynamic resources held by node-reader object *p.
*/
static void nodeReaderRelease(NodeReader *p){
  sqlite3_free(p->term.a);
}

/*
** Initialize a node-reader object to read the node in buffer aNode/nNode.
**
** If successful, SQLITE_OK is returned and the NodeReader object set to 
** point to the first entry on the node (if any). Otherwise, an SQLite
** error code is returned.
*/
static int nodeReaderInit(NodeReader *p, const char *aNode, int nNode){
  memset(p, 0, sizeof(NodeReader));
  p->aNode = aNode;
  p->nNode = nNode;

  /* Figure out if this is a leaf or an internal node. */
................................................................................
** This function is called while writing an FTS segment each time a leaf o
** node is finished and written to disk. The key (zTerm/nTerm) is guaranteed
** to be greater than the largest key on the node just written, but smaller
** than or equal to the first key that will be written to the next leaf
** node.
**
** The block id of the leaf node just written to disk may be found in
** (pWriter->aNodeWriter[0].iBlock) when this function is called.
*/
static int fts3IncrmergePush(
  Fts3Table *p,                   /* Fts3 table handle */
  IncrmergeWriter *pWriter,       /* Writer object */
  const char *zTerm,              /* Term to write to internal node */
  int nTerm                       /* Bytes at zTerm */
){
  sqlite3_int64 iPtr = pWriter->aNodeWriter[0].iBlock;
  int iLayer;

  assert( nTerm>0 );
  for(iLayer=1; ALWAYS(iLayer<FTS_MAX_APPENDABLE_HEIGHT); iLayer++){
    sqlite3_int64 iNextPtr = 0;
    NodeWriter *pNode = &pWriter->aNodeWriter[iLayer];
    int rc = SQLITE_OK;
    int nPrefix;
    int nSuffix;
    int nSpace;

    /* Figure out how much space the key will consume if it is written to
    ** the current node of layer iLayer. Due to the prefix compression, 
    ** the space required changes depending on which node the key is to
    ** be added to.  */
    nPrefix = fts3PrefixCompress(pNode->key.a, pNode->key.n, zTerm, nTerm);
    nSuffix = nTerm - nPrefix;
    nSpace  = sqlite3Fts3VarintLen(nPrefix);
    nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix;

    if( pNode->key.n==0 || (pNode->block.n + nSpace)<=p->nNodeSize ){ 
      /* If the current node of layer iLayer contains zero keys, or if adding
      ** the key to it will not cause it to grow to larger than nNodeSize 
      ** bytes in size, write the key here.  */

      Blob *pBlk = &pNode->block;
      if( pBlk->n==0 ){
        blobGrowBuffer(pBlk, p->nNodeSize, &rc);
        if( rc==SQLITE_OK ){
          pBlk->a[0] = (char)iLayer;
          pBlk->n = 1 + sqlite3Fts3PutVarint(&pBlk->a[1], iPtr);
        }
      }
      blobGrowBuffer(pBlk, pBlk->n + nSpace, &rc);
      blobGrowBuffer(&pNode->key, nTerm, &rc);

      if( rc==SQLITE_OK ){
        if( pNode->key.n ){
          pBlk->n += sqlite3Fts3PutVarint(&pBlk->a[pBlk->n], nPrefix);
        }
        pBlk->n += sqlite3Fts3PutVarint(&pBlk->a[pBlk->n], nSuffix);
        memcpy(&pBlk->a[pBlk->n], &zTerm[nPrefix], nSuffix);
        pBlk->n += nSuffix;

        memcpy(pNode->key.a, zTerm, nTerm);
        pNode->key.n = nTerm;
      }
    }else{
      /* Otherwise, flush the the current node of layer iLayer to disk.
      ** Then allocate a new, empty sibling node. The key will be written
      ** into the parent of this node. */
      rc = fts3WriteSegment(p, pNode->iBlock, pNode->block.a, pNode->block.n);

      assert( pNode->block.nAlloc>=p->nNodeSize );
      pNode->block.a[0] = (char)iLayer;
      pNode->block.n = 1 + sqlite3Fts3PutVarint(&pNode->block.a[1], iPtr+1);

      iNextPtr = pNode->iBlock;
      pNode->iBlock++;
      pNode->key.n = 0;
    }

    if( rc!=SQLITE_OK || iNextPtr==0 ) return rc;
    iPtr = iNextPtr;
  }

  assert( 0 );
}

/*
** Append a term and (optionally) doclist to the FTS segment node currently
** stored in blob *pNode. The node need not contain any terms, but the
** header must be written before this function is called.
**
** A node header is a single 0x00 byte for a leaf node, or a height varint
** followed by the left-hand-child varint for an internal node.
**
** The term to be appended is passed via arguments zTerm/nTerm. For a 
** leaf node, the doclist is passed as aDoclist/nDoclist. For an internal
** node, both aDoclist and nDoclist must be passed 0.
**
** If the size of the value in blob pPrev is zero, then this is the first
** term written to the node. Otherwise, pPrev contains a copy of the 
** previous term. Before this function returns, it is updated to contain a
** copy of zTerm/nTerm.
**
** It is assumed that the buffer associated with pNode is already large
** enough to accommodate the new entry. The buffer associated with pPrev
** is extended by this function if requrired.
**
** If an error (i.e. OOM condition) occurs, an SQLite error code is
** returned. Otherwise, SQLITE_OK.
*/
static int fts3AppendToNode(
  Blob *pNode,                    /* Current node image to append to */
  Blob *pPrev,                    /* Buffer containing previous term written */
  const char *zTerm,              /* New term to write */
  int nTerm,                      /* Size of zTerm in bytes */
  const char *aDoclist,           /* Doclist (or NULL) to write */
  int nDoclist                    /* Size of aDoclist in bytes */ 
){
  int rc = SQLITE_OK;             /* Return code */
  int bFirst = (pPrev->n==0);     /* True if this is the first term written */
  int nPrefix;                    /* Size of term prefix in bytes */
  int nSuffix;                    /* Size of term suffix in bytes */

  /* Node must have already been started. There must be a doclist for a
  ** leaf node, and there must not be a doclist for an internal node.  */
  assert( pNode->n>0 );
  assert( (pNode->a[0]=='\0')==(aDoclist!=0) );

  blobGrowBuffer(pPrev, nTerm, &rc);
  if( rc!=SQLITE_OK ) return rc;

  nPrefix = fts3PrefixCompress(pPrev->a, pPrev->n, zTerm, nTerm);
  nSuffix = nTerm - nPrefix;
  memcpy(pPrev->a, zTerm, nTerm);
  pPrev->n = nTerm;

  if( bFirst==0 ){
    pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nPrefix);
  }
  pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nSuffix);
  memcpy(&pNode->a[pNode->n], &zTerm[nPrefix], nSuffix);
  pNode->n += nSuffix;

  if( aDoclist ){
    pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nDoclist);
    memcpy(&pNode->a[pNode->n], aDoclist, nDoclist);
    pNode->n += nDoclist;
  }

  assert( pNode->n<=pNode->nAlloc );

  return SQLITE_OK;
}

/*
** Append the current term and doclist pointed to by cursor pCsr to the
** appendable b-tree segment opened for writing by pWriter.
**
** Return SQLITE_OK if successful, or an SQLite error code otherwise.
*/
static int fts3IncrmergeAppend(
  Fts3Table *p,                   /* Fts3 table handle */
  IncrmergeWriter *pWriter,       /* Writer object */
  Fts3MultiSegReader *pCsr        /* Cursor containing term and doclist */
){
  const char *zTerm = pCsr->zTerm;
  int nTerm = pCsr->nTerm;
  const char *aDoclist = pCsr->aDoclist;
  int nDoclist = pCsr->nDoclist;

  int rc = SQLITE_OK;           /* Return code */
  int nSpace;                   /* Total space in bytes required on leaf */




  int nPrefix;                  /* Size of prefix shared with previous term */
  int nSuffix;                  /* Size of suffix (nTerm - nPrefix) */
  NodeWriter *pLeaf;            /* Object used to write leaf nodes */

  pLeaf = &pWriter->aNodeWriter[0];
  nPrefix = fts3PrefixCompress(pLeaf->key.a, pLeaf->key.n, zTerm, nTerm);
  nSuffix = nTerm - nPrefix;

  nSpace  = sqlite3Fts3VarintLen(nPrefix);
  nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix;
  nSpace += sqlite3Fts3VarintLen(nDoclist) + nDoclist;

  /* If the current block is not empty, and if adding this term/doclist
  ** to the current block would make it larger than Fts3Table.nNodeSize
  ** bytes, write this block out to the database. */
  if( pLeaf->block.n>0 && (pLeaf->block.n + nSpace)>p->nNodeSize ){
    rc = fts3WriteSegment(p, pLeaf->iBlock, pLeaf->block.a, pLeaf->block.n);
    pWriter->nWork++;

    /* Add the current term to the parent node. The term added to the 
    ** parent must:
    **
    **   a) be greater than the largest term on the leaf node just written
    **      to the database (still available in pLeaf->key), and
    **
    **   b) be less than or equal to the term about to be added to the new
    **      leaf node (zTerm/nTerm).
    **
    ** In other words, it must be the prefix of zTerm 1 byte longer than
    ** the common prefix (if any) of zTerm and pWriter->zTerm.
    */
    if( rc==SQLITE_OK ){
      rc = fts3IncrmergePush(p, pWriter, zTerm, nPrefix+1);
    }

    /* Advance to the next output block */
    pLeaf->iBlock++;
    pLeaf->key.n = 0;
    pLeaf->block.n = 0;

    nPrefix = 0;
    nSuffix = nTerm;
    nSpace  = 1;
    nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix;
    nSpace += sqlite3Fts3VarintLen(nDoclist) + nDoclist;
  }

  blobGrowBuffer(&pLeaf->block, pLeaf->block.n + nSpace, &rc);

  if( rc==SQLITE_OK ){
    if( pLeaf->block.n==0 ){
      pLeaf->block.n = 1;
      pLeaf->block.a[0] = '\0';
    }
    rc = fts3AppendToNode(
        &pLeaf->block, &pLeaf->key, zTerm, nTerm, aDoclist, nDoclist
    );
  }

  return rc;
}

/*
** This function is called to release all dynamic resources held by the
** merge-writer object pWriter, and if no error has occurred, to flush
** all outstanding node buffers held by pWriter to disk.
**
** If *pRc is not SQLITE_OK when this function is called, then no attempt
** is made to write any data to disk. Instead, this function serves only
** to release outstanding resources.
**
** Otherwise, if *pRc is initially SQLITE_OK and an error occurs while
** flushing buffers to disk, *pRc is set to an SQLite error code before
** returning.
*/
static void fts3IncrmergeRelease(
  Fts3Table *p,                   /* FTS3 table handle */
  IncrmergeWriter *pWriter,       /* Merge-writer object */
  int *pRc                        /* IN/OUT: Error code */
){
  int i;                          /* Used to iterate through non-root layers */
  int iRoot;                      /* Index of root in pWriter->aNodeWriter */
  NodeWriter *pRoot;              /* NodeWriter for root node */
  int rc = *pRc;                  /* Error code */

  /* Set iRoot to the index in pWriter->aNodeWriter[] of the output segment 
  ** root node. If the segment fits entirely on a single leaf node, iRoot
  ** will be set to 0. If the root node is the parent of the leaves, iRoot
  ** will be 1. And so on.  */
  for(iRoot=FTS_MAX_APPENDABLE_HEIGHT-1; iRoot>=0; iRoot--){
    NodeWriter *pNode = &pWriter->aNodeWriter[iRoot];
    if( pNode->block.n>0 ) break;
    assert( *pRc || pNode->block.nAlloc==0 );
    assert( *pRc || pNode->key.nAlloc==0 );
    sqlite3_free(pNode->block.a);
    sqlite3_free(pNode->key.a);
  }

  /* Empty output segment. This is a no-op. */
  if( iRoot<0 ) return;

  /* The entire output segment fits on a single node. Normally, this means
  ** the node would be stored as a blob in the "root" column of the %_segdir
  ** table. However, this is not permitted in this case. The problem is that 
  ** space has already been reserved in the %_segments table, and so the 
  ** start_block and end_block fields of the %_segdir table must be populated. 
  ** And, by design or by accident, released versions of FTS cannot handle 
  ** segments that fit entirely on the root node with start_block!=0.
  **
  ** Instead, create a synthetic root node that contains nothing but a 
  ** pointer to the single content node. So that the segment consists of a
  ** single leaf and a single interior (root) node.
  **
  ** Todo: Better might be to defer allocating space in the %_segments 
  ** table until we are sure it is needed.
  */
  if( iRoot==0 ){
    Blob *pBlock = &pWriter->aNodeWriter[1].block;
    blobGrowBuffer(pBlock, 1 + FTS3_VARINT_MAX, &rc);
    if( rc==SQLITE_OK ){
      pBlock->a[0] = 0x01;
      pBlock->n = 1 + sqlite3Fts3PutVarint(
          &pBlock->a[1], pWriter->aNodeWriter[0].iBlock
      );
    }
    iRoot = 1;
  }
  pRoot = &pWriter->aNodeWriter[iRoot];

  /* Flush all currently outstanding nodes to disk. */
  for(i=0; i<iRoot; i++){

    NodeWriter *pNode = &pWriter->aNodeWriter[i];
    if( pNode->block.n>0 && rc==SQLITE_OK ){

      rc = fts3WriteSegment(p, pNode->iBlock, pNode->block.a, pNode->block.n);
    }
    sqlite3_free(pNode->block.a);
    sqlite3_free(pNode->key.a);
  }

  /* Write the %_segdir record. */
  if( rc==SQLITE_OK ){
    rc = fts3WriteSegdir(p, 
        pWriter->iAbsLevel+1,               /* level */
        pWriter->iIdx,                      /* idx */
        pWriter->iStart,                    /* start_block */
        pWriter->aNodeWriter[0].iBlock,     /* leaves_end_block */
        pWriter->iEnd,                      /* end_block */
        pRoot->block.a, pRoot->block.n      /* root */
    );
  }
  sqlite3_free(pRoot->block.a);
  sqlite3_free(pRoot->key.a);

  *pRc = rc;
}


/*
** Compare the term in buffer zLhs (size in bytes nLhs) with that in
** zRhs (size in bytes nRhs) using memcmp. If one term is a prefix of
** the other, it is considered to be smaller than the other.
**
** Return -ve if zLhs is smaller than zRhs, 0 if it is equal, or +ve
** if it is greater.
*/
static int fts3TermCmp(
  const char *zLhs, int nLhs,     /* LHS of comparison */
  const char *zRhs, int nRhs      /* RHS of comparison */
){
  int nCmp = MIN(nLhs, nRhs);
  int res;

................................................................................
  res = memcmp(zLhs, zRhs, nCmp);
  if( res==0 ) res = nLhs - nRhs;

  return res;
}


/*
** Query to see if the entry in the %_segments table with blockid iEnd is 
** NULL. If no error occurs and the entry is NULL, set *pbRes 1 before
** returning. Otherwise, set *pbRes to 0. 
**
** Or, if an error occurs while querying the database, return an SQLite 
** error code. The final value of *pbRes is undefined in this case.
**
** This is used to test if a segment is an "appendable" segment. If it
** is, then a NULL entry has been inserted into the %_segments table
** with blockid %_segdir.end_block.
*/
static int fts3IsAppendable(Fts3Table *p, sqlite3_int64 iEnd, int *pbRes){

  int bRes = 0;                   /* Result to set *pbRes to */
  sqlite3_stmt *pCheck = 0;       /* Statement to query database with */

  int rc;                         /* Return code */

  rc = fts3SqlStmt(p, SQL_SEGMENT_IS_APPENDABLE, &pCheck, 0);
  if( rc==SQLITE_OK ){
    sqlite3_bind_int64(pCheck, 1, iEnd);
    if( SQLITE_ROW==sqlite3_step(pCheck) ) bRes = 1;
    rc = sqlite3_reset(pCheck);
  }
  
  *pbRes = bRes;
  return rc;
}

/*
** This function is called when initializing an incremental-merge operation.
** It checks if the existing segment with index value iIdx at absolute level 
** (iAbsLevel+1) can be appended to by the incremental merge. If it can, the
** merge-writer object *pWriter is initialized to write to it.
**
** An existing segment can be appended to by an incremental merge if:
**
**   * It was initially created as an appendable segment (with all required
**     space pre-allocated), and
**
**   * The first key read from the input (arguments zKey and nKey) is 
**     greater than the largest key currently stored in the potential
**     output segment.
*/
static int fts3IncrmergeLoad(
  Fts3Table *p,                   /* Fts3 table handle */
  sqlite3_int64 iAbsLevel,        /* Absolute level of input segments */
  int iIdx,                       /* Index of candidate output segment */
  const char *zKey,               /* First key to write */
  int nKey,                       /* Number of bytes in nKey */
  IncrmergeWriter *pWriter        /* Populate this object */
){
  int rc;                         /* Return code */
  sqlite3_stmt *pSelect = 0;      /* SELECT to read %_segdir entry */

  rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR, &pSelect, 0);
  if( rc==SQLITE_OK ){
    sqlite3_int64 iStart = 0;     /* Value of %_segdir.start_block */
    sqlite3_int64 iLeafEnd = 0;   /* Value of %_segdir.leaves_end_block */
    sqlite3_int64 iEnd = 0;       /* Value of %_segdir.end_block */
    const char *aRoot = 0;        /* Pointer to %_segdir.root buffer */
    int nRoot = 0;                /* Size of aRoot[] in bytes */
    int rc2;                      /* Return code from sqlite3_reset() */
    int bAppendable = 0;          /* Set to true if segment is appendable */

    /* Read the %_segdir entry for index iIdx absolute level (iAbsLevel+1) */
    sqlite3_bind_int64(pSelect, 1, iAbsLevel+1);
    sqlite3_bind_int(pSelect, 2, iIdx);
    if( sqlite3_step(pSelect)==SQLITE_ROW ){
      iStart = sqlite3_column_int64(pSelect, 1);
      iLeafEnd = sqlite3_column_int64(pSelect, 2);
      iEnd = sqlite3_column_int64(pSelect, 3);
      nRoot = sqlite3_column_bytes(pSelect, 4);
................................................................................
    }

    if( rc==SQLITE_OK && bAppendable ){
      /* It is possible to append to this segment. Set up the IncrmergeWriter
      ** object to do so.  */
      int i;
      int nHeight = (int)aRoot[0];
      NodeWriter *pNode;

      pWriter->nLeafEst = ((iEnd - iStart) + 1) / FTS_MAX_APPENDABLE_HEIGHT;
      pWriter->iStart = iStart;
      pWriter->iEnd = iEnd;
      pWriter->iAbsLevel = iAbsLevel;
      pWriter->iIdx = iIdx;

      for(i=nHeight+1; i<FTS_MAX_APPENDABLE_HEIGHT; i++){
        pWriter->aNodeWriter[i].iBlock = pWriter->iStart + i*pWriter->nLeafEst;
      }

      pNode = &pWriter->aNodeWriter[nHeight];
      pNode->iBlock = pWriter->iStart + pWriter->nLeafEst*nHeight;
      blobGrowBuffer(&pNode->block, MAX(nRoot, p->nNodeSize), &rc);
      if( rc==SQLITE_OK ){
        memcpy(pNode->block.a, aRoot, nRoot);
        pNode->block.n = nRoot;
      }

      for(i=nHeight; i>=0 && rc==SQLITE_OK; i--){
        pNode = &pWriter->aNodeWriter[i];
        NodeReader reader;

        rc = nodeReaderInit(&reader, pNode->block.a, pNode->block.n);
        while( reader.aNode && rc==SQLITE_OK ) rc = nodeReaderNext(&reader);
        blobGrowBuffer(&pNode->key, reader.term.n, &rc);
        if( rc==SQLITE_OK ){
          memcpy(pNode->key.a, reader.term.a, reader.term.n);
          pNode->key.n = reader.term.n;
          if( i>0 ){
            char *aBlock = 0;
            int nBlock = 0;
            pNode = &pWriter->aNodeWriter[i-1];
            pNode->iBlock = reader.iChild;
            rc = sqlite3Fts3ReadBlock(p, reader.iChild, &aBlock, &nBlock, 0);
            blobGrowBuffer(&pNode->block, MAX(nBlock, p->nNodeSize), &rc);
            if( rc==SQLITE_OK ){
              memcpy(pNode->block.a, aBlock, nBlock);
              pNode->block.n = nBlock;
            }
            sqlite3_free(aBlock);
          }
        }
        nodeReaderRelease(&reader);
      }
    }
................................................................................
  }

  return rc;
}

/* 
** Either allocate an output segment or locate an existing appendable 
** output segment to append to. An "appendable" output segment is
** slightly different to a normal one, as the required range of keys in
** the %_segments table must be allocated up front.













**
** In the %_segdir table, a segment is defined by the values in three
** columns:
**
**     start_block
**     leaves_end_block
**     end_block
**
** When an appendable segment is allocated, it is estimated that the
** maximum number of leaf blocks that may be required is the sum of the
** number of leaf blocks consumed by the input segments, plus the number
** of input segments, multiplied by two. This value is stored in stack 

** variable nLeafEst.
**
** A total of 16*nLeafEst blocks are allocated when an appendable segment
** is created ((1 + end_block - start_block)==16*nLeafEst). The contiguous
** array of leaf nodes starts at the first block allocated. The array
** of interior nodes that are parents of the leaf nodes start at block
** (start_block + (1 + end_block - start_block) / 16). And so on.
**
** In the actual code below, the value "16" is replaced with the 
** pre-processor macro FTS_MAX_APPENDABLE_HEIGHT.
*/
static int fts3IncrmergeWriter( 
  Fts3Table *p,                   /* Fts3 table handle */
  sqlite3_int64 iAbsLevel,        /* Absolute level of input segments */
  Fts3MultiSegReader *pCsr,       /* Cursor that data will be read from */
  IncrmergeWriter *pWriter        /* Populate this object */
................................................................................
  int rc;                         /* Return Code */
  int i;                          /* Iterator variable */
  int nLeafEst;                   /* Blocks allocated for leaf nodes */
  int iIdx;                       /* Index of output segment */
  sqlite3_stmt *pLeafEst = 0;     /* SQL used to determine nLeafEst */
  sqlite3_stmt *pFirstBlock = 0;  /* SQL used to determine first block */
  sqlite3_stmt *pOutputIdx = 0;   /* SQL used to find output index */
  const char *zKey = pCsr->zTerm; /* First key to be appended to output */
  int nKey = pCsr->nTerm;         /* Size of zKey in bytes */

  rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pOutputIdx, 0);
  if( rc==SQLITE_OK ){
    sqlite3_bind_int64(pOutputIdx, 1, iAbsLevel+1);
    sqlite3_step(pOutputIdx);
    iIdx = sqlite3_column_int(pOutputIdx, 0) - 1;
    rc = sqlite3_reset(pOutputIdx);
................................................................................
  rc = fts3WriteSegment(p, pWriter->iEnd, 0, 0);
  if( rc!=SQLITE_OK ) return rc;

  pWriter->iAbsLevel = iAbsLevel;
  pWriter->nLeafEst = nLeafEst;
  pWriter->iIdx = iIdx;

  /* Set up the array of NodeWriter objects */
  for(i=0; i<FTS_MAX_APPENDABLE_HEIGHT; i++){
    pWriter->aNodeWriter[i].iBlock = pWriter->iStart + i*pWriter->nLeafEst;
  }
  return SQLITE_OK;
}

/*
** Remove an entry from the %_segdir table. This involves running the 
** following two statements:
**
**   DELETE FROM %_segdir WHERE level = :iAbsLevel AND idx = :iIdx
**   UPDATE %_segdir SET idx = idx - 1 WHERE level = :iAbsLevel AND idx > :iIdx
**
** The DELETE statement removes the specific %_segdir level. The UPDATE 
** statement ensures that the remaining segments have contiguously allocated
** idx values.
*/
static int fts3RemoveSegdirEntry(
  Fts3Table *p,                   /* FTS3 table handle */
  sqlite3_int64 iAbsLevel,        /* Absolute level to delete from */

  int iIdx                        /* Index of %_segdir entry to delete */
){

  int rc;                         /* Return code */
  sqlite3_stmt *pDelete = 0;      /* DELETE statement */
  sqlite3_stmt *pUpdate = 0;      /* UPDATE statement */

  rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_ENTRY, &pDelete, 0);
  if( rc==SQLITE_OK ){
    sqlite3_bind_int64(pDelete, 1, iAbsLevel);
    sqlite3_bind_int(pDelete, 2, iIdx);
    sqlite3_step(pDelete);
    rc = sqlite3_reset(pDelete);
................................................................................
    sqlite3_step(pUpdate);
    rc = sqlite3_reset(pUpdate);
  }

  return rc;
}






































static void fts3StartNode(Blob *pNode, int iHeight, sqlite3_int64 iChild){
  pNode->a[0] = (char)iHeight;
  if( iChild ){
    assert( pNode->nAlloc>=1+sqlite3Fts3VarintLen(iChild) );
    pNode->n = 1 + sqlite3Fts3PutVarint(&pNode->a[1], iChild);
  }else{
    assert( pNode->nAlloc>=1 );
................................................................................
  int nNode,                      /* Size of aNode in bytes */
  Blob *pNew,                     /* OUT: Write new node image here */
  const char *zTerm,              /* Omit all terms smaller than this */
  int nTerm,                      /* Size of zTerm in bytes */
  sqlite3_int64 *piBlock          /* OUT: Block number in next layer down */
){
  NodeReader reader;              /* Reader object */
  Blob prev = {0, 0, 0};          /* Previous term written to new node */
  int rc = SQLITE_OK;             /* Return code */

  int bLeaf = aNode[0]=='\0';     /* True for a leaf node */

  /* Allocate required output space */
  blobGrowBuffer(pNew, nNode, &rc);
  if( rc!=SQLITE_OK ) return rc;
  pNew->n = 0;

  /* Populate new node buffer */
  for(rc = nodeReaderInit(&reader, aNode, nNode); 
      rc==SQLITE_OK && reader.aNode; 
      rc = nodeReaderNext(&reader)
  ){
    if( pNew->n==0 ){
      int res = fts3TermCmp(reader.term.a, reader.term.n, zTerm, nTerm);
      if( res<0 || (bLeaf==0 && res==0) ) continue;

      fts3StartNode(pNew, (int)aNode[0], reader.iChild);
      *piBlock = reader.iChild;

    }
    rc = fts3AppendToNode(
        pNew, &prev, reader.term.a, reader.term.n,
        reader.aDoclist, reader.nDoclist
    );
    if( rc!=SQLITE_OK ) break;
  }
  if( pNew->n==0 ){
    fts3StartNode(pNew, (int)aNode[0], reader.iChild);
    *piBlock = reader.iChild;
  }
  assert( pNew->n<=pNew->nAlloc );

  nodeReaderRelease(&reader);
  sqlite3_free(prev.a);
  return rc;
}

/*
** Remove all terms smaller than zTerm/nTerm from segment iIdx in absolute 
** level iAbsLevel. This may involve deleting entries from the %_segments
** table, and modifying existing entries in both the %_segments and %_segdir
** tables.
**
** SQLITE_OK is returned if the segment is updated successfully. Or an
** SQLite error code otherwise.
*/
static int fts3TruncateSegment(
  Fts3Table *p,                   /* FTS3 table handle */
  sqlite3_int64 iAbsLevel,        /* Absolute level of segment to modify */
  int iIdx,                       /* Index within level of segment to modify */
  const char *zTerm,              /* Remove terms smaller than this */
  int nTerm                       /* Number of bytes in buffer zTerm */
){
................................................................................
**
** Each input segment is either removed from the db completely (if all of
** its data was copied to the output segment by the incrmerge operation)
** or modified in place so that it no longer contains those entries that
** have been duplicated in the output segment.
*/
static int fts3IncrmergeChomp(
  Fts3Table *p,                   /* FTS table handle */
  sqlite3_int64 iAbsLevel,        /* Absolute level containing segments */
  Fts3MultiSegReader *pCsr        /* Chomp all segments opened by this cursor */
){
  int i;
  int rc = SQLITE_OK;

  for(i=pCsr->nSegment-1; i>=0 && rc==SQLITE_OK; i--){
    Fts3SegReader *pSeg = 0;
    int j;
................................................................................
    }
  }

  return rc;
}

/*
** Attempt an incremental merge that writes nMerge leaf blocks.
**
** Incremental merges happen nMin segments at a time. The two
** segments to be merged are the nMin oldest segments (the ones with
** the smallest indexes) in the highest level that contains at least
** nMin segments. Multiple merges might occur in an attempt to write the 
** quota of nMerge leaf blocks.
*/
static int fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){
  int rc = SQLITE_OK;             /* Return code */
  int nRem = nMerge;              /* Number of leaf pages yet to  be written */

  assert( nMin>=2 );

................................................................................
    pFilter = (Fts3SegFilter *)&pWriter[1];
    pCsr = (Fts3MultiSegReader *)&pFilter[1];

    /* Open a cursor to iterate through the contents of indexes 0 and 1 of
    ** the selected absolute level. */
    pFilter->flags = FTS3_SEGMENT_REQUIRE_POS;
    rc = fts3IncrmergeCsr(p, iAbsLevel, nMin, pCsr);
    fts3LogMerge(nMin, iAbsLevel);


    if( rc==SQLITE_OK ){
      rc = sqlite3Fts3SegReaderStart(p, pCsr, pFilter);
    }
    if( rc==SQLITE_OK ){
      if( SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pCsr)) ){
        rc = fts3IncrmergeWriter(p, iAbsLevel, pCsr, pWriter);
        if( rc==SQLITE_OK ){