SQLite

Check-in [af5bfb986e]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Replace the DocList and DocListReader structures. The new structures distinguish reading from a static buffer from writing to a dynamic buffer. This allows n-way doclist merging, and in-place merging of segment leaf nodes, which together cut segment merge times in half. (CVS 3486)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: af5bfb986e39248abbfc6fff2e13c6f9e634a751
User & Date: shess 2006-10-25 21:00:10.000
Context
2006-10-25
23:22
Remove unreferenced local variable. (CVS 3487) (check-in: 2d3b22197c user: shess tags: trunk)
21:00
Replace the DocList and DocListReader structures. The new structures distinguish reading from a static buffer from writing to a dynamic buffer. This allows n-way doclist merging, and in-place merging of segment leaf nodes, which together cut segment merge times in half. (CVS 3486) (check-in: af5bfb986e user: shess tags: trunk)
20:27
Test to force edge cases in query logic. Basically, exercise code to handle lack of hits correctly. (CVS 3485) (check-in: 2cb5903366 user: shess tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts2/fts2.c.
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332



333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354

355
356
357
358
359
360
361



362
363
364
365
366
367
368
369
370




371

372
373
374
375
376
377
378

#if 0
# define TRACE(A)  printf A; fflush(stdout)
#else
# define TRACE(A)
#endif

/* utility functions */

typedef struct StringBuffer {
  int len;      /* length, not including null terminator */
  int alloced;  /* Space allocated for s[] */ 
  char *s;      /* Content of the string */
} StringBuffer;

static void initStringBuffer(StringBuffer *sb){
  sb->len = 0;
  sb->alloced = 100;
  sb->s = malloc(100);
  sb->s[0] = '\0';
}

static void nappend(StringBuffer *sb, const char *zFrom, int nFrom){
  if( sb->len + nFrom >= sb->alloced ){
    sb->alloced = sb->len + nFrom + 100;
    sb->s = realloc(sb->s, sb->alloced+1);
    if( sb->s==0 ){
      initStringBuffer(sb);
      return;
    }
  }
  memcpy(sb->s + sb->len, zFrom, nFrom);
  sb->len += nFrom;
  sb->s[sb->len] = 0;
}
static void append(StringBuffer *sb, const char *zFrom){
  nappend(sb, zFrom, strlen(zFrom));
}

/* Helper functions for certain common memory-allocation idioms:



**
** data_dup() - malloc+memcpy to duplicate a buffer
** data_replace() - realloc+memcpy to dup a buffer over an existing buffer
** data_append() - realloc+memcpy to append data to an existing buffer
** data_append2() - shorthand for calling data_append() twice.
*/
/* TODO(shess) There is a "block of binary data on the heap" construct
** in here which could be shared with (at least) the StringBuffer and
** DocList constructs.
*/
static void data_replace(char **ppTarget, int *pnTarget,
                         const char *pSource, int nSource){
  *ppTarget = realloc(*ppTarget, nSource);
  memcpy(*ppTarget, pSource, nSource);
  *pnTarget = nSource;
}

static void data_dup(char **ppTarget, int *pnTarget,
                     const char *pSource, int nSource){
  *ppTarget = malloc(nSource);
  memcpy(*ppTarget, pSource, nSource);
  *pnTarget = nSource;

}

static void data_append(char **ppTarget, int *pnTarget,
                        const char *pSource, int nSource){
  *ppTarget = realloc(*ppTarget, *pnTarget+nSource);
  memcpy(*ppTarget+*pnTarget, pSource, nSource);
  *pnTarget += nSource;



}

static void data_append2(char **ppTarget, int *pnTarget,
                         const char *pSource1, int nSource1,
                         const char *pSource2, int nSource2){
  *ppTarget = realloc(*ppTarget, *pnTarget+nSource1+nSource2);
  memcpy(*ppTarget+*pnTarget, pSource1, nSource1);
  memcpy(*ppTarget+*pnTarget+nSource1, pSource2, nSource2);
  *pnTarget += nSource1+nSource2;




}


/* We may need up to VARINT_MAX bytes to store an encoded 64-bit integer. */
#define VARINT_MAX 10

/* Write a 64-bit variable-length integer to memory starting at p[0].
 * The length of data written will be between 1 and VARINT_MAX bytes.
 * The number of bytes written is returned. */







<
|
<
|
<
|
<
|
<
<
|
<
<
|
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
>
>
>

|
|
<
<

<
<
|
<
<
<
<
<
<
<
|
<
<
<
<
<
>
|
|
<
<
<
<
<
>
>
>
|

<
<
<
<
<
<
<
>
>
>
>
|
>







293
294
295
296
297
298
299

300

301

302

303


304


305
306
















307

308
309
310
311
312
313


314


315







316





317
318
319





320
321
322
323
324







325
326
327
328
329
330
331
332
333
334
335
336
337

#if 0
# define TRACE(A)  printf A; fflush(stdout)
#else
# define TRACE(A)
#endif


typedef enum DocListType {

  DL_DOCIDS,              /* docids only */

  DL_POSITIONS,           /* docids + positions */

  DL_POSITIONS_OFFSETS    /* docids + positions + offsets */


} DocListType;



/*
















** By default, only positions and not offsets are stored in the doclists.

** To change this so that offsets are stored too, compile with
**
**          -DDL_DEFAULT=DL_POSITIONS_OFFSETS
**
** If DL_DEFAULT is set to DL_DOCIDS, your table can only be inserted
** into (no deletes or updates).


*/


#ifndef DL_DEFAULT







# define DL_DEFAULT DL_POSITIONS





#endif

enum {





  POS_END = 0,        /* end of this position list */
  POS_COLUMN,         /* followed by new column number */
  POS_BASE
};








/* MERGE_COUNT controls how often we merge segments (see comment at
** top of file).
*/
#define MERGE_COUNT 16

/* utility functions */

/* We may need up to VARINT_MAX bytes to store an encoded 64-bit integer. */
#define VARINT_MAX 10

/* Write a 64-bit variable-length integer to memory starting at p[0].
 * The length of data written will be between 1 and VARINT_MAX bytes.
 * The number of bytes written is returned. */
411
412
413
414
415
416
417
418

419
420
421
422
423
424
425
426
427





428
429
430
431
432

















433
434

435
436
437





438

439
440
441
442
443
444


445
446
447
448
449
450


451
452
453
454
455
456
457
458



459
460
461
462

463




464
465
466
467


468

469


470
471



472








473
474
475
476
477
478
479

480

481
482



483

484
485
486
487



488




489






490
491




492
493
494
495
496
497
498
499

500

501
502
503

504
505
506
507

508




509



510
511
512
513
514
515









516
517
518
519
520
521
522
523

524



525
526

527
528
529
530
531
532



533



534
535
536
537
538
539
540
541
542
543
544
545







546
547
548
549





550
551
552
553
554
555




556
















557
558
559
560
561
562
563






564
565
566
567
568
569
570
571
572
573
574
575


576
577
578

579
580


581




582
583
584

585
586
587
588
589
590
591
592
593
594
595
596
597





598
599
600

601

602











603
604
605
606
607
608
609



610

611
612

613

614

615



616
617
618




619
620
621
622
623
624




625

626



627
628
629
630
631
632


633
634
635
636
637
638
639
640
641

642
643

644



645
646
647
648
649
650


651



652





653




654


655

656




657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672

673
674
675
676
677
678
679
680

681








682
683






684






685











686
687
688
689
690
691



692


693

694
695
696
697
698

699







700


701
702
703

704
705
706

707
708
709






710

711
712
713
714
715
716
717
718
719

720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735

736

737



738
739
740
741
742
743
744




















745
746
747
748
749
750




751
752
753
754
755

756
757
758
759

760
761
762
763
764

765






766


767
768


769


770

771
772
773
774
775

776




777
778
779
780

781
782

783
784
785
786
787
788
789
790
791

792



793

794



795
796
797
798
799










800

801



802
803
804
805
806
807
808
809



810
811
812




813



814


815




816
817
818


819
820
821
822
823
824
825

826
827

828
829
830
831
832


833


834


835
836
837
838
839
840
841



842















843

844


845
846
847

848
849
850
851
852
853
854
855
856

857
858
859


860

861
862
863
864
865
866
867
868
869
870
871
872
873


874
875

876
877




878
879

880

881
882
883
884
885
886
887
888
889
890
891
892
893
894


895





896

897

898






899



900
901
902
903
904
905
906
907
908


909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945


946
947
948


949

950
951
952
953

954

955


956
957
958






959



960
961
962
963
964
965
966
967
968
969
970
971
972
973

974
975
976
977
978

979

980
981
982

983
984
985
986
987
988
989
990
991
992
993
994




995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011

1012
1013
1014
1015
1016

1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028




1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042

1043



1044

1045


1046
1047
1048

1049
1050
1051

1052



1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070

1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101

1102
1103
1104
1105

1106
1107
1108
1109
1110



1111
1112
1113
1114
1115
1116
1117
 sqlite_int64 i;
 int ret = getVarint(p, &i);
 *pi = (int) i;
 assert( *pi==i );
 return ret;
}

typedef enum DocListType {

  DL_DOCIDS,              /* docids only */
  DL_POSITIONS,           /* docids + positions */
  DL_POSITIONS_OFFSETS    /* docids + positions + offsets */
} DocListType;

/*
** By default, only positions and not offsets are stored in the doclists.
** To change this so that offsets are stored too, compile with
**





**          -DDL_DEFAULT=DL_POSITIONS_OFFSETS
**
** If DL_DEFAULT is set to DL_DOCIDS, your table can only be inserted
** into (no deletes or updates).
*/

















#ifndef DL_DEFAULT
# define DL_DEFAULT DL_POSITIONS

#endif

typedef struct DocList {





  char *pData;

  int nData;
  DocListType iType;
  int iLastColumn;    /* the last column written */
  int iLastPos;       /* the last position written */
  int iLastOffset;    /* the last start offset written */
} DocList;



enum {
  POS_END = 0,        /* end of this position list */
  POS_COLUMN,         /* followed by new column number */
  POS_BASE
};



/* TODO(shess) I think it might be time to refactor the doclist
** manipulation.  Broadly put, there are four fairly discrete clients,
** tokenization, insert-time segment merging, query-time segment
** merging and query-time analysis.  The breakdown I think might be
** reasonable would be:
**
** DocListReader - Wraps const char *pData, int nData.



**   Used to traverse doclists
** DocListWriter - Starts empty, can add complete doclist elements.
**   Used in merging doclists.
** DocBuilder - Used when tokenizing documents.

*/





static void docListCoreInit(DocList *d, DocListType iType,
                            char *pData, int nData){
  d->nData = nData;


  d->pData = pData;

  d->iType = iType;


  d->iLastColumn = 0;
  d->iLastPos = d->iLastOffset = 0;



}









/* Initialize a new DocList pointing to static data.  Don't call
** docListDestroy() to release, just free(d) (if you originally
** malloced d).
*/
static void docListStaticInit(DocList *d, DocListType iType,
                              const char *pData, int nData){

  docListCoreInit(d, iType, (char *)pData, nData);

}




/* Initialize a new DocList to hold a copy of the given data. */

static void docListInit(DocList *d, DocListType iType,
                        const char *pData, int nData){
  char *p = 0;
  if( nData>0 ){



    p = malloc(nData);




    memcpy(p, pData, nData);






  }
  docListCoreInit(d, iType, p, nData);




}

/* Create a new dynamically-allocated DocList. */
static DocList *docListNew(DocListType iType){
  DocList *d = (DocList *) malloc(sizeof(DocList));
  docListInit(d, iType, 0, 0);
  return d;
}



static void docListDestroy(DocList *d){
  free(d->pData);
#ifndef NDEBUG

  memset(d, 0x55, sizeof(*d));
#endif
}


static void docListDelete(DocList *d){




  docListDestroy(d);



  free(d);
}

static char *docListEnd(DocList *d){
  return d->pData + d->nData;
}










/* Append a varint to a DocList's data. */
static void appendVarint(DocList *d, sqlite_int64 i){
  char c[VARINT_MAX];
  int n = putVarint(c, i);
  d->pData = realloc(d->pData, d->nData + n);
  memcpy(d->pData + d->nData, c, n);
  d->nData += n;

}




static void docListAddDocid(DocList *d, sqlite_int64 iDocid){

  appendVarint(d, iDocid);
  if( d->iType>=DL_POSITIONS ){
    appendVarint(d, POS_END);  /* initially empty position list */
    d->iLastColumn = 0;
    d->iLastPos = d->iLastOffset = 0;
  }



}




/* helper function for docListAddPos and docListAddPosOffset */
static void addPos(DocList *d, int iColumn, int iPos){
  assert( d->nData>0 );
  --d->nData;  /* remove previous terminator */
  if( iColumn!=d->iLastColumn ){
    assert( iColumn>d->iLastColumn );
    appendVarint(d, POS_COLUMN);
    appendVarint(d, iColumn);
    d->iLastColumn = iColumn;
    d->iLastPos = d->iLastOffset = 0;
  }







  assert( iPos>=d->iLastPos );
  appendVarint(d, iPos-d->iLastPos+POS_BASE);
  d->iLastPos = iPos;
}






/* Add a position to the last position list in a doclist. */
static void docListAddPos(DocList *d, int iColumn, int iPos){
  assert( d->iType==DL_POSITIONS );
  addPos(d, iColumn, iPos);
  appendVarint(d, POS_END);  /* add new terminator */




}

















/*
** Add a position and starting and ending offsets to a doclist.
**
** If the doclist is setup to handle only positions, then insert
** the position only and ignore the offsets.
*/






static void docListAddPosOffset(
  DocList *d,             /* Doclist under construction */
  int iColumn,            /* Column the inserted term is part of */
  int iPos,               /* Position of the inserted term */
  int iStartOffset,       /* Starting offset of inserted term */
  int iEndOffset          /* Ending offset of inserted term */
){
  assert( d->iType>=DL_POSITIONS );
  addPos(d, iColumn, iPos);
  if( d->iType==DL_POSITIONS_OFFSETS ){
    assert( iStartOffset>=d->iLastOffset );
    appendVarint(d, iStartOffset-d->iLastOffset);


    d->iLastOffset = iStartOffset;
    assert( iEndOffset>=iStartOffset );
    appendVarint(d, iEndOffset-iStartOffset);

  }
  appendVarint(d, POS_END);  /* add new terminator */


}





/*
** A DocListReader object is a cursor into a doclist.  Initialize

** the cursor to the beginning of the doclist by calling readerInit().
** Then use routines
**
**      peekDocid()
**      readDocid()
**      readPosition()
**      skipPositionList()
**      and so forth...
**
** to read information out of the doclist.  When we reach the end
** of the doclist, atEnd() returns TRUE.
*/
typedef struct DocListReader {





  DocList *pDoclist;  /* The document list we are stepping through */
  char *p;            /* Pointer to next unread byte in the doclist */
  int iLastColumn;

  int iLastPos;  /* the last position read, or -1 when not in a position list */

} DocListReader;












/*
** Initialize the DocListReader r to point to the beginning of pDoclist.
*/
static void readerInit(DocListReader *r, DocList *pDoclist){
  r->pDoclist = pDoclist;
  if( pDoclist!=NULL ){



    r->p = pDoclist->pData;

  }
  r->iLastColumn = -1;

  r->iLastPos = -1;

}





/*
** Return TRUE if we have reached then end of pReader and there is
** nothing else left to read.




*/
static int atEnd(DocListReader *pReader){
  return pReader->pDoclist==0 || (pReader->p >= docListEnd(pReader->pDoclist));
}

/* Peek at the next docid without advancing the read pointer. 




*/

static sqlite_int64 peekDocid(DocListReader *pReader){



  sqlite_int64 ret;
  assert( !atEnd(pReader) );
  assert( pReader->iLastPos==-1 );
  getVarint(pReader->p, &ret);
  return ret;
}



/* Read the next docid.   See also nextDocid().
*/
static sqlite_int64 readDocid(DocListReader *pReader){
  sqlite_int64 ret;
  assert( !atEnd(pReader) );
  assert( pReader->iLastPos==-1 );
  pReader->p += getVarint(pReader->p, &ret);
  if( pReader->pDoclist->iType>=DL_POSITIONS ){

    pReader->iLastColumn = 0;
    pReader->iLastPos = 0;

  }



  return ret;
}

/* Read the next position and column index from a position list.
 * Returns the position, or -1 at the end of the list. */
static int readPosition(DocListReader *pReader, int *iColumn){


  int i;



  int iType = pReader->pDoclist->iType;










  if( pReader->iLastPos==-1 ){


    return -1;

  }




  assert( !atEnd(pReader) );

  if( iType<DL_POSITIONS ){
    return -1;
  }
  pReader->p += getVarint32(pReader->p, &i);
  if( i==POS_END ){
    pReader->iLastColumn = pReader->iLastPos = -1;
    *iColumn = -1;
    return -1;
  }
  if( i==POS_COLUMN ){
    pReader->p += getVarint32(pReader->p, &pReader->iLastColumn);
    pReader->iLastPos = 0;
    pReader->p += getVarint32(pReader->p, &i);
    assert( i>=POS_BASE );

  }
  pReader->iLastPos += ((int) i)-POS_BASE;
  if( iType>=DL_POSITIONS_OFFSETS ){
    /* Skip over offsets, ignoring them for now. */
    int iStart, iEnd;
    pReader->p += getVarint32(pReader->p, &iStart);
    pReader->p += getVarint32(pReader->p, &iEnd);
  }

  *iColumn = pReader->iLastColumn;








  return pReader->iLastPos;
}













/* Skip past the end of a position list. */











static void skipPositionList(DocListReader *pReader){
  DocList *p = pReader->pDoclist;
  if( p && p->iType>=DL_POSITIONS ){
    int iColumn;
    while( readPosition(pReader, &iColumn)!=-1 ){}
  }



}




/* Skip over a docid, including its position list if the doclist has
 * positions. */
static void skipDocument(DocListReader *pReader){
  readDocid(pReader);
  skipPositionList(pReader);

}










/* Skip past all docids which are less than [iDocid].  Returns 1 if a docid
 * matching [iDocid] was found.  */
static int skipToDocid(DocListReader *pReader, sqlite_int64 iDocid){

  sqlite_int64 d = 0;
  while( !atEnd(pReader) && (d=peekDocid(pReader))<iDocid ){
    skipDocument(pReader);

  }
  return !atEnd(pReader) && d==iDocid;
}








#ifdef SQLITE_DEBUG
/*
** This routine is used for debugging purpose only.
**
** Write the content of a doclist to standard output.
*/
static void printDoclist(DocList *p){
  DocListReader r;
  const char *zSep = "";


  readerInit(&r, p);
  while( !atEnd(&r) ){
    sqlite_int64 docid = readDocid(&r);
    if( docid==0 ){
      skipPositionList(&r);
      continue;
    }
    printf("%s%lld", zSep, docid);
    zSep =  ",";
    if( p->iType>=DL_POSITIONS ){
      int iPos, iCol;
      const char *zDiv = "";
      printf("(");
      while( (iPos = readPosition(&r, &iCol))>=0 ){
        printf("%s%d:%d", zDiv, iCol, iPos);

        zDiv = ":";

      }



      printf(")");
    }
  }
  printf("\n");
  fflush(stdout);
}
#endif /* SQLITE_DEBUG */





















/* Trim the given doclist to contain only positions in column
 * [iRestrictColumn]. */
static void docListRestrictColumn(DocList *in, int iRestrictColumn){
  DocListReader r;
  DocList out;





  assert( in->iType>=DL_POSITIONS );
  readerInit(&r, in);
  docListInit(&out, DL_POSITIONS, NULL, 0);


  while( !atEnd(&r) ){
    sqlite_int64 iDocid = readDocid(&r);
    int iPos, iColumn;


    docListAddDocid(&out, iDocid);
    while( (iPos = readPosition(&r, &iColumn)) != -1 ){
      if( iColumn==iRestrictColumn ){
        docListAddPos(&out, iColumn, iPos);
      }

    }






  }



  docListDestroy(in);


  *in = out;


}


/* Trim the given doclist by discarding any docids without any remaining
 * positions. */
static void docListDiscardEmpty(DocList *in) {
  DocListReader r;

  DocList out;





  /* TODO: It would be nice to implement this operation in place; that
   * could save a significant amount of memory in queries with long doclists. */
  assert( in->iType>=DL_POSITIONS );

  readerInit(&r, in);
  docListInit(&out, DL_POSITIONS, NULL, 0);


  while( !atEnd(&r) ){
    sqlite_int64 iDocid = readDocid(&r);
    int match = 0;
    int iPos, iColumn;
    while( (iPos = readPosition(&r, &iColumn)) != -1 ){
      if( !match ){
        docListAddDocid(&out, iDocid);
        match = 1;

      }



      docListAddPos(&out, iColumn, iPos);

    }



  }

  docListDestroy(in);
  *in = out;
}












/* Efficiently merge left and right into out, with duplicated docids



** from right overwriting those in left (left is effectively older
** than right).  The previous code had a memmove() which introduced an
** O(N^2) into merges, while this code should be O(N).
*/
static void docListMerge(DocList *out, DocList *left, DocList *right){
  DocListReader leftReader, rightReader;
  int iData = 0;
#ifndef NDEBUG



  /* Track these to make certain that every byte is processed. */
  int nLeftProcessed = 0, nRightProcessed = 0;
#endif








  assert( left->iType==right->iType );







  /* Handle edge cases. */
  /* TODO(shess) Consider simply forbidding edge cases, in the
  ** interests of saving copies.


  */
  if( left->nData==0 ){
    docListInit(out, right->iType, right->pData, right->nData);
    return;
  }else if(right->nData==0 ){
    docListInit(out, left->iType, left->pData, left->nData);
    return;

  }
  docListInit(out, left->iType, 0, 0);


  /* At this time, the sum of the space of the inputs is a strict
  ** upper bound.  *out can end up smaller if elements of *right
  ** overwrite elements of *left, but never larger.
  */


  out->nData = left->nData+right->nData;


  out->pData = malloc(out->nData);



  readerInit(&leftReader, left);
  readerInit(&rightReader, right);

  while( !atEnd(&leftReader) && !atEnd(&rightReader) ){
    sqlite_int64 iLeftDocid = peekDocid(&leftReader);
    sqlite_int64 iRightDocid = peekDocid(&rightReader);



    const char *pStart, *pEnd;

















    if( iLeftDocid<iRightDocid ){


      /* Copy from *left where less than iRightDocid. */
      pStart = leftReader.p;
      skipToDocid(&leftReader, iRightDocid);

      pEnd = leftReader.p;
#ifndef NDEBUG
      nLeftProcessed += pEnd-pStart;
#endif
    }else{
      /* Copy from *right where less than iLeftDocid, plus the element
      ** matching iLeftDocid, if present.  Also drop the matching
      ** element from *left.
      */

      pStart = rightReader.p;
      if( skipToDocid(&rightReader, iLeftDocid) ){
#ifndef NDEBUG


        const char *pLeftStart = leftReader.p;

#endif
        skipDocument(&leftReader);
        skipDocument(&rightReader);
#ifndef NDEBUG
        nLeftProcessed += leftReader.p-pLeftStart;
#endif
      }
      pEnd = rightReader.p;
#ifndef NDEBUG
      nRightProcessed += pEnd-pStart;
#endif
    }
    assert( pEnd>pStart );


    assert( iData+pEnd-pStart<=out->nData );
    memcpy(out->pData+iData, pStart, pEnd-pStart);

    iData += pEnd-pStart;
  }





  if( !atEnd(&leftReader) ){

    int n = left->nData-(leftReader.p-left->pData);

    assert( atEnd(&rightReader) );
    memcpy(out->pData+iData, leftReader.p, n);
    iData += n;
#ifndef NDEBUG
    nLeftProcessed += n;
#endif
  }else if( !atEnd(&rightReader) ){
    int n = right->nData-(rightReader.p-right->pData);
    memcpy(out->pData+iData, rightReader.p, n);
    iData += n;
#ifndef NDEBUG
    nRightProcessed += n;
#endif
  }


  out->nData = iData;





  out->pData = realloc(out->pData, out->nData);



  assert( nLeftProcessed==left->nData );






  assert( nRightProcessed==right->nData );



}

/*
** Read the next docid off of pIn.  Return 0 if we reach the end.
*
* TODO: This assumes that docids are never 0, but they may actually be 0 since
* users can choose docids when inserting into a full-text table.  Fix this.
*/
static sqlite_int64 nextDocid(DocListReader *pIn){


  skipPositionList(pIn);
  return atEnd(pIn) ? 0 : readDocid(pIn);
}

/*
** pLeft and pRight are two DocListReaders that are pointing to
** positions lists of the same document: iDocid. 
**
** If there are no instances in pLeft or pRight where the position
** of pLeft is one less than the position of pRight, then this
** routine adds nothing to pOut.
**
** If there are one or more instances where positions from pLeft
** are exactly one less than positions from pRight, then add a new
** document record to pOut.  If pOut wants to hold positions, then
** include the positions from pRight that are one more than a
** position in pLeft.  In other words:  pRight.iPos==pLeft.iPos+1.
**
** pLeft and pRight are left pointing at the next document record.
*/
static void mergePosList(
  DocListReader *pLeft,    /* Left position list */
  DocListReader *pRight,   /* Right position list */
  sqlite_int64 iDocid,     /* The docid from pLeft and pRight */
  DocList *pOut            /* Write the merged document record here */
){
  int iLeftCol, iLeftPos = readPosition(pLeft, &iLeftCol);
  int iRightCol, iRightPos = readPosition(pRight, &iRightCol);
  int match = 0;

  /* Loop until we've reached the end of both position lists. */
  while( iLeftPos!=-1 && iRightPos!=-1 ){
    if( iLeftCol==iRightCol && iLeftPos+1==iRightPos ){
      if( !match ){
        docListAddDocid(pOut, iDocid);
        match = 1;
      }


      if( pOut->iType>=DL_POSITIONS ){
        docListAddPos(pOut, iRightCol, iRightPos);
      }


      iLeftPos = readPosition(pLeft, &iLeftCol);

      iRightPos = readPosition(pRight, &iRightCol);
    }else if( iRightCol<iLeftCol ||
              (iRightCol==iLeftCol && iRightPos<iLeftPos+1) ){
      iRightPos = readPosition(pRight, &iRightCol);

    }else{

      iLeftPos = readPosition(pLeft, &iLeftCol);


    }
  }
  if( iLeftPos>=0 ) skipPositionList(pLeft);






  if( iRightPos>=0 ) skipPositionList(pRight);



}

/* We have two doclists:  pLeft and pRight.
** Write the phrase intersection of these two doclists into pOut.
**
** A phrase intersection means that two documents only match
** if pLeft.iPos+1==pRight.iPos.
**
** The output pOut may or may not contain positions.  If pOut
** does contain positions, they are the positions of pRight.
*/
static void docListPhraseMerge(
  DocList *pLeft,    /* Doclist resulting from the words on the left */
  DocList *pRight,   /* Doclist for the next word to the right */

  DocList *pOut      /* Write the combined doclist here */
){
  DocListReader left, right;
  sqlite_int64 docidLeft, docidRight;


  readerInit(&left, pLeft);

  readerInit(&right, pRight);
  docidLeft = nextDocid(&left);
  docidRight = nextDocid(&right);


  while( docidLeft>0 && docidRight>0 ){
    if( docidLeft<docidRight ){
      docidLeft = nextDocid(&left);
    }else if( docidRight<docidLeft ){
      docidRight = nextDocid(&right);
    }else{
      mergePosList(&left, &right, docidLeft, pOut);
      docidLeft = nextDocid(&left);
      docidRight = nextDocid(&right);
    }
  }




}

/* We have two doclists:  pLeft and pRight.
** Write the intersection of these two doclists into pOut.
** Only docids are matched.  Position information is ignored.
**
** The output pOut never holds positions.
*/
static void docListAndMerge(
  DocList *pLeft,    /* Doclist resulting from the words on the left */
  DocList *pRight,   /* Doclist for the next word to the right */
  DocList *pOut      /* Write the combined doclist here */
){
  DocListReader left, right;
  sqlite_int64 docidLeft, docidRight;

  assert( pOut->iType<DL_POSITIONS );


  readerInit(&left, pLeft);
  readerInit(&right, pRight);
  docidLeft = nextDocid(&left);
  docidRight = nextDocid(&right);


  while( docidLeft>0 && docidRight>0 ){
    if( docidLeft<docidRight ){
      docidLeft = nextDocid(&left);
    }else if( docidRight<docidLeft ){
      docidRight = nextDocid(&right);
    }else{
      docListAddDocid(pOut, docidLeft);
      docidLeft = nextDocid(&left);
      docidRight = nextDocid(&right);
    }
  }




}

/* We have two doclists:  pLeft and pRight.
** Write the union of these two doclists into pOut.
** Only docids are matched.  Position information is ignored.
**
** The output pOut never holds positions.
*/
static void docListOrMerge(
  DocList *pLeft,    /* Doclist resulting from the words on the left */
  DocList *pRight,   /* Doclist for the next word to the right */
  DocList *pOut      /* Write the combined doclist here */
){
  DocListReader left, right;

  sqlite_int64 docidLeft, docidRight, priorLeft;





  readerInit(&left, pLeft);


  readerInit(&right, pRight);
  docidLeft = nextDocid(&left);
  docidRight = nextDocid(&right);


  while( docidLeft>0 && docidRight>0 ){
    if( docidLeft<=docidRight ){

      docListAddDocid(pOut, docidLeft);



    }else{
      docListAddDocid(pOut, docidRight);
    }
    priorLeft = docidLeft;
    if( docidLeft<=docidRight ){
      docidLeft = nextDocid(&left);
    }
    if( docidRight>0 && docidRight<=priorLeft ){
      docidRight = nextDocid(&right);
    }
  }
  while( docidLeft>0 ){
    docListAddDocid(pOut, docidLeft);
    docidLeft = nextDocid(&left);
  }
  while( docidRight>0 ){
    docListAddDocid(pOut, docidRight);
    docidRight = nextDocid(&right);

  }
}

/* We have two doclists:  pLeft and pRight.
** Write into pOut all documents that occur in pLeft but not
** in pRight.
**
** Only docids are matched.  Position information is ignored.
**
** The output pOut never holds positions.
*/
static void docListExceptMerge(
  DocList *pLeft,    /* Doclist resulting from the words on the left */
  DocList *pRight,   /* Doclist for the next word to the right */
  DocList *pOut      /* Write the combined doclist here */
){
  DocListReader left, right;
  sqlite_int64 docidLeft, docidRight, priorLeft;

  readerInit(&left, pLeft);
  readerInit(&right, pRight);
  docidLeft = nextDocid(&left);
  docidRight = nextDocid(&right);

  while( docidLeft>0 && docidRight>0 ){
    priorLeft = docidLeft;
    if( docidLeft<docidRight ){
      docListAddDocid(pOut, docidLeft);
    }
    if( docidLeft<=docidRight ){
      docidLeft = nextDocid(&left);

    }
    if( docidRight>0 && docidRight<=priorLeft ){
      docidRight = nextDocid(&right);
    }

  }
  while( docidLeft>0 ){
    docListAddDocid(pOut, docidLeft);
    docidLeft = nextDocid(&left);
  }



}

static char *string_dup_n(const char *s, int n){
  char *str = malloc(n + 1);
  memcpy(str, s, n);
  str[n] = '\0';
  return str;







<
>
|
<
<
<
|
<
<
|

>
>
>
>
>
|
|
|
<

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
<
>

|
<
>
>
>
>
>
|
>
|
<
<
<
<
<
>
>
|
|
|
|
|
<
>
>
|
<
|
|
<
<
<
|
>
>
>
|
|
|
<
>
|
>
>
>
>
|
|
|
|
>
>
|
>
|
>
>
|
<
>
>
>

>
>
>
>
>
>
>
>
|
|
<
<
<
|
|
>
|
>
|
|
>
>
>
|
>
|
|
|
|
>
>
>
|
>
>
>
>
|
>
>
>
>
>
>
|
|
>
>
>
>
|
|
<
<
<
<
<
|
>
|
>
|
<
<
>
|
|
<
|
>
|
>
>
>
>
|
>
>
>
|
<
|
|
<
<
>
>
>
>
>
>
>
>
>

<
|
<
|
<
<
<
>
|
>
>
>
|
|
>
|
<
<
<
<
|
>
>
>

>
>
>
|
<
|
|
|
<
<
<
<
<
<
|
>
>
>
>
>
>
>
|
<
|

>
>
>
>
>
|
<
|
|
|
<
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
<
<
<
<
<
<
>
>
>
>
>
>
|
|
<
<
<
<
<
|
<
|
<
<
>
>
|
<
<
>
|
<
>
>

>
>
>
>
|
|
<
>
|
<
<
<
|
<
<
<
|
<
<

|
>
>
>
>
>
|
<
|
>
|
>
|
>
>
>
>
>
>
>
>
>
>
>
|
<
<
<
<
<
|
>
>
>
|
>

<
>
|
>

>

>
>
>
|
<
<
>
>
>
>

<
<
<
|
<
>
>
>
>

>
|
>
>
>
|
<
<
<
<
<
>
>

<
<
|
|
<
<
<
|
>
|
|
>
|
>
>
>
|

|
<
<
|
>
>
|
>
>
>
|
>
>
>
>
>
|
>
>
>
>
|
>
>
|
>
|
>
>
>
>
|
|
<
<
|
<
<
<
<
<

|
<
|
|
<
>
|
<
<
<
<
<
<
|
>
|
>
>
>
>
>
>
>
>
|
<
>
>
>
>
>
>

>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
|
|
<
<
|
|
>
>
>

>
>

>
|
<
<
|
|
>
|
>
>
>
>
>
>
>
|
>
>
|
<
<
>
|
<
|
>

<
|
>
>
>
>
>
>
|
>
|
<
<
<
<
<
<
|
<
>

|
|
|
<
<
<
<
<
|
|
|
<
<
|
<
>
|
>
|
>
>
>
|
|
|
<
<
<
<
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

<
<
<
|
|
>
>
>
>

|
|
<
|
>
|
<
|
|
>
|
|
<
<
|
>
|
>
>
>
>
>
>

>
>
|
|
>
>
|
>
>
|
>
|
<
<
|
|
>
|
>
>
>
>
|
<
<
<
>
|
<
>
|
<
|
|
<
<
<
|
<
>
|
>
>
>
|
>
|
>
>
>
|

<
<
|
>
>
>
>
>
>
>
>
>
>

>
|
>
>
>
|
<
|
<
<
|
|
|
>
>
>
|
<
|
>
>
>
>
|
>
>
>
|
>
>
|
>
>
>
>
|
<
<
>
>
|
<
<
|
<
<
<
>
|
|
>
|
<
<
<

>
>
|
>
>
|
>
>
|
|
<

|
|
|
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
|
>
>
|
|
<
>
|
<
<
<
<
<
<
|
|
>
|
|
<
>
>
|
>
|
|
|
<
|
|
|
<
<
<
<
|
|
>
>
|
|
>
|

>
>
>
>

|
>
|
>
|
|
<
<
<
<
|
<
|
<
<
<
<
|
>
>
|
>
>
>
>
>
|
>
|
>
|
>
>
>
>
>
>
|
>
>
>
|
|
<
<
|
<
<
<
<
>
>
|
<


<
|
<










<
<

|
|
<
<
|
<
<
<


<
|
<
<
|
<
|
>
>
|
<
|
>
>
|
>
|
|
|
|
>

>
|
>
>


|
>
>
>
>
>
>
|
>
>
>


|





|
|


|
|
>
|

|
|

>
|
>
|
|
|
>

|
|
|
|
|

|
|
|


>
>
>
>


|
|
<
|
<


|
|
|

|
|

<
>

|
|
<
<
>

|
|
|
|
|

|
|
|


>
>
>
>


|
|
<
|
<


|
|
|

|
>
|
>
>
>
|
>
|
>
>
|
|
|
>

|
|
>
|
>
>
>

|
<
<
<
|
<
<
|


<
<
<
|
|
<
|
>
|
|
<
|
|
|
<
<
<
<


|
|
|

|
|

|
|
|
|
|
|
|
|
|
|
|
|
>

|
|

>

<
<
<
|
>
>
>







370
371
372
373
374
375
376

377
378



379


380
381
382
383
384
385
386
387
388
389

390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408

409
410
411

412
413
414
415
416
417
418
419





420
421
422
423
424
425
426

427
428
429

430
431



432
433
434
435
436
437
438

439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456

457
458
459
460
461
462
463
464
465
466
467
468
469
470



471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509





510
511
512
513
514


515
516
517

518
519
520
521
522
523
524
525
526
527
528
529

530
531


532
533
534
535
536
537
538
539
540
541

542

543



544
545
546
547
548
549
550
551
552




553
554
555
556
557
558
559
560
561

562
563
564






565
566
567
568
569
570
571
572
573

574
575
576
577
578
579
580
581

582
583
584

585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606






607
608
609
610
611
612
613
614





615

616


617
618
619


620
621

622
623
624
625
626
627
628
629
630

631
632



633



634


635
636
637
638
639
640
641
642

643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659





660
661
662
663
664
665
666

667
668
669
670
671
672
673
674
675
676


677
678
679
680
681



682

683
684
685
686
687
688
689
690
691
692
693





694
695
696


697
698



699
700
701
702
703
704
705
706
707
708
709
710


711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740


741





742
743

744
745

746
747






748
749
750
751
752
753
754
755
756
757
758
759

760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786


787
788
789
790
791
792
793
794
795
796
797


798
799
800
801
802
803
804
805
806
807
808
809
810
811
812


813
814

815
816
817

818
819
820
821
822
823
824
825
826
827






828

829
830
831
832
833





834
835
836


837

838
839
840
841
842
843
844
845
846
847




848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868



869
870
871
872
873
874
875
876
877

878
879
880

881
882
883
884
885


886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907


908
909
910
911
912
913
914
915
916



917
918

919
920

921
922



923

924
925
926
927
928
929
930
931
932
933
934
935
936


937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954

955


956
957
958
959
960
961
962

963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980


981
982
983


984



985
986
987
988
989



990
991
992
993
994
995
996
997
998
999
1000

1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030

1031
1032






1033
1034
1035
1036
1037

1038
1039
1040
1041
1042
1043
1044

1045
1046
1047




1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067




1068

1069




1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095


1096




1097
1098
1099

1100
1101

1102

1103
1104
1105
1106
1107
1108
1109
1110
1111
1112


1113
1114
1115


1116



1117
1118

1119


1120

1121
1122
1123
1124

1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199

1200

1201
1202
1203
1204
1205
1206
1207
1208
1209

1210
1211
1212
1213


1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234

1235

1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266



1267


1268
1269
1270



1271
1272

1273
1274
1275
1276

1277
1278
1279




1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307



1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
 sqlite_int64 i;
 int ret = getVarint(p, &i);
 *pi = (int) i;
 assert( *pi==i );
 return ret;
}


/*******************************************************************/
/* DataBuffer is used to collect data into a buffer in piecemeal



** fashion.  It implements the usual distinction between amount of


** data currently stored (nData) and buffer capacity (nCapacity).
**
** dataBufferInit - create a buffer with given initial capacity.
** dataBufferReset - forget buffer's data, retaining capacity.
** dataBufferDestroy - free buffer's data.
** dataBufferExpand - expand capacity without adding data.
** dataBufferAppend - append data.
** dataBufferAppend2 - append two pieces of data at once.
** dataBufferAppendLenData - append a varint-encoded length plus data.
** dataBufferReplace - replace buffer's data.

*/
typedef struct DataBuffer {
  char *pData;          /* Pointer to malloc'ed buffer. */
  int nCapacity;        /* Size of pData buffer. */
  int nData;            /* End of data loaded into pData. */
} DataBuffer;

static void dataBufferInit(DataBuffer *pBuffer, int nCapacity){
  assert( nCapacity>=0 );
  pBuffer->nData = 0;
  pBuffer->nCapacity = nCapacity;
  pBuffer->pData = nCapacity==0 ? NULL : malloc(nCapacity);
}
static void dataBufferReset(DataBuffer *pBuffer){
  pBuffer->nData = 0;
}
static void dataBufferDestroy(DataBuffer *pBuffer){
  if( pBuffer->pData!=NULL ) free(pBuffer->pData);
#ifndef NDEBUG

  memset(pBuffer, 0x55, sizeof(*pBuffer));
#endif
}

static void dataBufferExpand(DataBuffer *pBuffer, int nAddCapacity){
  assert( nAddCapacity>0 );
  /* TODO(shess) Consider expanding more aggressively.  Note that the
  ** underlying malloc implementation may take care of such things for
  ** us already.
  */
  if( pBuffer->nData+nAddCapacity>pBuffer->nCapacity ){
    pBuffer->nCapacity = pBuffer->nData+nAddCapacity;





    pBuffer->pData = realloc(pBuffer->pData, pBuffer->nCapacity);
  }
}
static void dataBufferAppend(DataBuffer *pBuffer,
                             const char *pSource, int nSource){
  assert( nSource>0 && pSource!=NULL );
  dataBufferExpand(pBuffer, nSource);

  memcpy(pBuffer->pData+pBuffer->nData, pSource, nSource);
  pBuffer->nData += nSource;
}

static void dataBufferAppend2(DataBuffer *pBuffer,
                              const char *pSource1, int nSource1,



                              const char *pSource2, int nSource2){
  assert( nSource1>0 && pSource1!=NULL );
  assert( nSource2>0 && pSource2!=NULL );
  dataBufferExpand(pBuffer, nSource1+nSource2);
  memcpy(pBuffer->pData+pBuffer->nData, pSource1, nSource1);
  memcpy(pBuffer->pData+pBuffer->nData+nSource1, pSource2, nSource2);
  pBuffer->nData += nSource1+nSource2;

}
static void dataBufferAppendLenData(DataBuffer *pBuffer,
                                    const char *pSource, int nSource){
  char c[VARINT_MAX];
  int n = putVarint(c, nSource);
  dataBufferAppend2(pBuffer, c, n, pSource, nSource);
}
static void dataBufferReplace(DataBuffer *pBuffer,
                              const char *pSource, int nSource){
  dataBufferReset(pBuffer);
  dataBufferAppend(pBuffer, pSource, nSource);
}

/* StringBuffer is a null-terminated version of DataBuffer. */
typedef struct StringBuffer {
  DataBuffer b;            /* Includes null terminator. */
} StringBuffer;


static void initStringBuffer(StringBuffer *sb){
  dataBufferInit(&sb->b, 100);
  dataBufferReplace(&sb->b, "", 1);
}
static int stringBufferLength(StringBuffer *sb){
  return sb->b.nData-1;
}
static char *stringBufferData(StringBuffer *sb){
  return sb->b.pData;
}
static void stringBufferDestroy(StringBuffer *sb){
  dataBufferDestroy(&sb->b);
}




static void nappend(StringBuffer *sb, const char *zFrom, int nFrom){
  assert( sb->b.nData>0 );
  if( nFrom>0 ){
    sb->b.nData--;
    dataBufferAppend2(&sb->b, zFrom, nFrom, "", 1);
  }
}
static void append(StringBuffer *sb, const char *zFrom){
  nappend(sb, zFrom, strlen(zFrom));
}

/* Append a list of strings separated by commas. */
static void appendList(StringBuffer *sb, int nString, char **azString){
  int i;
  for(i=0; i<nString; ++i){
    if( i>0 ) append(sb, ", ");
    append(sb, azString[i]);
  }
}

static int endsInWhiteSpace(StringBuffer *p){
  return stringBufferLength(p)>0 &&
    isspace(stringBufferData(p)[stringBufferLength(p)-1]);
}

/* If the StringBuffer ends in something other than white space, add a
** single space character to the end.
*/
static void appendWhiteSpace(StringBuffer *p){
  if( stringBufferLength(p)==0 ) return;
  if( !endsInWhiteSpace(p) ) append(p, " ");
}

/* Remove white space from the end of the StringBuffer */
static void trimWhiteSpace(StringBuffer *p){
  while( endsInWhiteSpace(p) ){
    p->b.pData[--p->b.nData-1] = '\0';
  }
}






/*******************************************************************/
/* DLReader is used to read document elements from a doclist.  The
** current docid is cached, so dlrDocid() is fast.  DLReader does not
** own the doclist buffer.


**
** dlrAtEnd - true if there's no more data to read.
** dlrDocid - docid of current document.

** dlrDocData - doclist data for current document (including docid).
** dlrDocDataBytes - length of same.
** dlrAllDataBytes - length of all remaining data.
** dlrPosData - position data for current document.
** dlrPosDataLen - length of pos data for current document (incl POS_END).
** dlrStep - step to current document.
** dlrInit - initial for doclist of given type against given data.
** dlrDestroy - clean up.
**
** Expected usage is something like:
**
**   DLReader reader;

**   dlrInit(&reader, pData, nData);
**   while( !dlrAtEnd(&reader) ){


**     // calls to dlrDocid() and kin.
**     dlrStep(&reader);
**   }
**   dlrDestroy(&reader);
*/
typedef struct DLReader {
  DocListType iType;
  const char *pData;
  int nData;


  sqlite_int64 iDocid;

  int nElement;



} DLReader;

static int dlrAtEnd(DLReader *pReader){
  assert( pReader->nData>=0 );
  return pReader->nData==0;
}
static sqlite_int64 dlrDocid(DLReader *pReader){
  assert( !dlrAtEnd(pReader) );
  return pReader->iDocid;




}
static const char *dlrDocData(DLReader *pReader){
  assert( !dlrAtEnd(pReader) );
  return pReader->pData;
}
static int dlrDocDataBytes(DLReader *pReader){
  assert( !dlrAtEnd(pReader) );
  return pReader->nElement;
}

static int dlrAllDataBytes(DLReader *pReader){
  assert( !dlrAtEnd(pReader) );
  return pReader->nData;






}
/* TODO(shess) Consider adding a field to track iDocid varint length
** to make these two functions faster.  This might matter (a tiny bit)
** for queries.
*/
static const char *dlrPosData(DLReader *pReader){
  sqlite_int64 iDummy;
  int n = getVarint(pReader->pData, &iDummy);
  assert( !dlrAtEnd(pReader) );

  return pReader->pData+n;
}
static int dlrPosDataLen(DLReader *pReader){
  sqlite_int64 iDummy;
  int n = getVarint(pReader->pData, &iDummy);
  assert( !dlrAtEnd(pReader) );
  return pReader->nElement-n;
}

static void dlrStep(DLReader *pReader){
  assert( !dlrAtEnd(pReader) );


  /* Skip past current doclist element. */
  assert( pReader->nElement<=pReader->nData );
  pReader->pData += pReader->nElement;
  pReader->nData -= pReader->nElement;

  /* If there is more data, read the next doclist element. */
  if( pReader->nData!=0 ){
    int iDummy, n = getVarint(pReader->pData, &pReader->iDocid);
    if( pReader->iType>=DL_POSITIONS ){
      assert( n<pReader->nData );
      while( 1 ){
        n += getVarint32(pReader->pData+n, &iDummy);
        assert( n<=pReader->nData );
        if( iDummy==POS_END ) break;
        if( iDummy==POS_COLUMN ){
          n += getVarint32(pReader->pData+n, &iDummy);
          assert( n<pReader->nData );
        }else if( pReader->iType==DL_POSITIONS_OFFSETS ){
          n += getVarint32(pReader->pData+n, &iDummy);
          n += getVarint32(pReader->pData+n, &iDummy);
          assert( n<pReader->nData );
        }






      }
    }
    pReader->nElement = n;
    assert( pReader->nElement<=pReader->nData );
  }
}
static void dlrInit(DLReader *pReader, DocListType iType,
                    const char *pData, int nData){





  assert( pData!=NULL && nData!=0 );

  pReader->iType = iType;


  pReader->pData = pData;
  pReader->nData = nData;
  pReader->nElement = 0;


  pReader->iDocid = 0;


  /* Load the first element's data.  There must be a first element. */
  dlrStep(pReader);
}
static void dlrDestroy(DLReader *pReader){
#ifndef NDEBUG
  memset(pReader, 0x55, sizeof(pReader));
#endif
}


#ifndef NDEBUG
/* Verify that the doclist can be validly decoded.  Also returns the



** last docid found because it's convenient in other assertions for



** DLWriter.


*/
static int docListValidate(DocListType iType, const char *pData, int nData,
                           sqlite_int64 *pLastDocid){
  int has_prevDocid = 0;
  sqlite_int64 iPrevDocid;
  assert( pData!=0 );
  assert( nData!=0 );
  while( nData!=0 ){

    int n;
    sqlite_int64 iDocid;
    n = getVarint(pData, &iDocid);
    assert( !has_prevDocid || iPrevDocid<iDocid );
    has_prevDocid = 1;
    iPrevDocid = iDocid;
    if( iType>DL_DOCIDS ){
      int iDummy;
      while( 1 ){
        n += getVarint32(pData+n, &iDummy);
        if( iDummy==POS_END ) break;
        if( iDummy==POS_COLUMN ){
          n += getVarint32(pData+n, &iDummy);
        }else if( iType>DL_POSITIONS ){
          n += getVarint32(pData+n, &iDummy);
          n += getVarint32(pData+n, &iDummy);
        }





        assert( n<=nData );
      }
    }
    assert( n<=nData );
    pData += n;
    nData -= n;
  }

  assert( has_prevDocid );
  if( pLastDocid ) *pLastDocid = iPrevDocid;
  return 1;
}
#endif

/*******************************************************************/
/* DLWriter is used to write doclist data to a DataBuffer.  DLWriter
** always appends to the buffer and does not own it.
**


** dlwInit - initialize to write a given type doclistto a buffer.
** dlwDestroy - clear the writer's memory.  Does not free buffer.
** dlwAppend - append raw doclist data to buffer.
** dlwAdd - construct doclist element and append to buffer.
*/



/* TODO(shess) Modify to handle delta-encoding docids.  This should be

** fairly simple.  The changes to dlwAdd() are obvious.  dlwAppend()
** would need to decode the leading docid, rencode as a delta, and
** copy the rest of the data (which would already be delta-encoded).
** Note that this will require a change to pass the trailing docid.
*/
typedef struct DLWriter {
  DocListType iType;
  DataBuffer *b;
#ifndef NDEBUG
  int has_prevDocid;
  sqlite_int64 iPrevDocid;





#endif
} DLWriter;



static void dlwInit(DLWriter *pWriter, DocListType iType, DataBuffer *b){
  pWriter->b = b;



  pWriter->iType = iType;
#ifndef NDEBUG
  pWriter->has_prevDocid = 0;
  pWriter->iPrevDocid = 0;
#endif
}
static void dlwDestroy(DLWriter *pWriter){
#ifndef NDEBUG
  memset(pWriter, 0x55, sizeof(pWriter));
#endif
}
static void dlwAppend(DLWriter *pWriter,


                      const char *pData, int nData){
#ifndef NDEBUG
  sqlite_int64 iDocid;
  int n;
  n = getVarint(pData, &iDocid);
  assert( n<=nData );
  assert( !pWriter->has_prevDocid || pWriter->iPrevDocid<iDocid );
  assert( n<nData || pWriter->iType>DL_DOCIDS );
  assert( docListValidate(pWriter->iType, pData, nData, &iDocid) );
  pWriter->has_prevDocid = 1;
  pWriter->iPrevDocid = iDocid;
#endif
  dataBufferAppend(pWriter->b, pData, nData);
}
static void dlwAdd(DLWriter *pWriter, sqlite_int64 iDocid,
                   const char *pPosList, int nPosList){
  char c[VARINT_MAX];
  int n = putVarint(c, iDocid);

  assert( !pWriter->has_prevDocid || pWriter->iPrevDocid<iDocid );
  assert( pPosList==0 || pWriter->iType>DL_DOCIDS );

  dataBufferAppend(pWriter->b, c, n);

  if( pWriter->iType>DL_DOCIDS ){
    n = putVarint(c, 0);
    if( nPosList>0 ){
      dataBufferAppend2(pWriter->b, pPosList, nPosList, c, n);
    }else{
      dataBufferAppend(pWriter->b, c, n);


    }





  }
#ifndef NDEBUG

  pWriter->has_prevDocid = 1;
  pWriter->iPrevDocid = iDocid;

#endif
}







/*******************************************************************/
/* PLReader is used to read data from a document's position list.  As
** the caller steps through the list, data is cached so that varints
** only need to be decoded once.
**
** plrInit, plrDestroy - create/destroy a reader.
** plrColumn, plrPosition, plrStartOffset, plrEndOffset - accessors
** plrAtEnd - at end of stream, only call plrDestroy once true.
** plrStep - step to the next element.
*/
typedef struct PLReader {

  /* These refer to the next position's data.  nData will reach 0 when
  ** reading the last position, so plrStep() signals EOF by setting
  ** pData to NULL.
  */
  const char *pData;
  int nData;

  DocListType iType;
  int iColumn;         /* the last column read */
  int iPosition;       /* the last position read */
  int iStartOffset;    /* the last start offset read */
  int iEndOffset;      /* the last end offset read */
} PLReader;

static int plrAtEnd(PLReader *pReader){
  return pReader->pData==NULL;
}
static int plrColumn(PLReader *pReader){
  assert( !plrAtEnd(pReader) );
  return pReader->iColumn;
}
static int plrPosition(PLReader *pReader){
  assert( !plrAtEnd(pReader) );
  return pReader->iPosition;
}
static int plrStartOffset(PLReader *pReader){
  assert( !plrAtEnd(pReader) );


  return pReader->iStartOffset;
}
static int plrEndOffset(PLReader *pReader){
  assert( !plrAtEnd(pReader) );
  return pReader->iEndOffset;
}
static void plrStep(PLReader *pReader){
  int i, n;

  assert( !plrAtEnd(pReader) );



  if( pReader->nData==0 ){
    pReader->pData = NULL;
    return;
  }

  n = getVarint32(pReader->pData, &i);
  if( i==POS_COLUMN ){
    n += getVarint32(pReader->pData+n, &pReader->iColumn);
    pReader->iPosition = 0;
    pReader->iStartOffset = 0;
    n += getVarint32(pReader->pData+n, &i);
  }
  /* Should never see adjacent column changes. */
  assert( i!=POS_COLUMN );



  if( i==POS_END ){
    pReader->nData = 0;

    pReader->pData = NULL;
    return;
  }


  pReader->iPosition += i-POS_BASE;
  if( pReader->iType==DL_POSITIONS_OFFSETS ){
    n += getVarint32(pReader->pData+n, &i);
    pReader->iStartOffset += i;
    n += getVarint32(pReader->pData+n, &i);
    pReader->iEndOffset = pReader->iStartOffset+i;
  }
  assert( n<=pReader->nData );
  pReader->pData += n;






  pReader->nData -= n;

}

static void plrInit(PLReader *pReader, DocListType iType,
                    const char *pData, int nData){
  pReader->pData = pData;





  pReader->nData = nData;
  pReader->iType = iType;
  pReader->iColumn = 0;


  pReader->iPosition = 0;

  pReader->iStartOffset = 0;
  pReader->iEndOffset = 0;
  plrStep(pReader);
}
static void plrDestroy(PLReader *pReader){
#ifndef NDEBUG
  memset(pReader, 0x55, sizeof(pReader));
#endif
}





/*******************************************************************/
/* PLWriter is used in constructing a document's position list.  As a
** convenience, if iType is DL_DOCIDS, PLWriter becomes a no-op.
**
** plwInit - init for writing a document's poslist.
** plwReset - reset the writer for a new document.
** plwDestroy - clear a writer.
** plwNew - malloc storage and initialize it.
** plwDelete - clear and free storage.
** plwDlwAdd - append the docid and poslist to a doclist writer.
** plwAdd - append position and offset information.
*/
/* TODO(shess) PLWriter is used in two ways.  fulltextUpdate() uses it
** in construction of a new doclist.  docListTrim() and mergePosList()
** use it when trimming.  In the former case, it wants to own the
** DataBuffer, in the latter it's possible it could encode into a
** pre-existing DataBuffer.
*/
typedef struct PLWriter {
  DataBuffer b;




  sqlite_int64 iDocid;
  DocListType iType;
  int iColumn;    /* the last column written */
  int iPos;       /* the last position written */
  int iOffset;    /* the last start offset written */
} PLWriter;

static void plwDlwAdd(PLWriter *pWriter, DLWriter *dlWriter){
  dlwAdd(dlWriter, pWriter->iDocid, pWriter->b.pData, pWriter->b.nData);

}
static void plwAdd(PLWriter *pWriter, int iColumn, int iPos,
                   int iStartOffset, int iEndOffset){

  /* Worst-case space for POS_COLUMN, iColumn, iPosDelta,
  ** iStartOffsetDelta, and iEndOffsetDelta.
  */
  char c[5*VARINT_MAX];
  int n = 0;



  if( pWriter->iType==DL_DOCIDS ) return;

  if( iColumn!=pWriter->iColumn ){
    n += putVarint(c+n, POS_COLUMN);
    n += putVarint(c+n, iColumn);
    pWriter->iColumn = iColumn;
    pWriter->iPos = 0;
    pWriter->iOffset = 0;
  }
  assert( iPos>=pWriter->iPos );
  n += putVarint(c+n, POS_BASE+(iPos-pWriter->iPos));
  pWriter->iPos = iPos;
  if( pWriter->iType==DL_POSITIONS_OFFSETS ){
    assert( iStartOffset>=pWriter->iOffset );
    n += putVarint(c+n, iStartOffset-pWriter->iOffset);
    pWriter->iOffset = iStartOffset;
    assert( iEndOffset>=iStartOffset );
    n += putVarint(c+n, iEndOffset-iStartOffset);
  }
  dataBufferAppend(&pWriter->b, c, n);
}


static void plwReset(PLWriter *pWriter,
                     sqlite_int64 iDocid, DocListType iType){
  dataBufferReset(&pWriter->b);
  pWriter->iDocid = iDocid;
  pWriter->iType = iType;
  pWriter->iColumn = 0;
  pWriter->iPos = 0;
  pWriter->iOffset = 0;
}



static void plwInit(PLWriter *pWriter, sqlite_int64 iDocid, DocListType iType){
  dataBufferInit(&pWriter->b, 0);

  plwReset(pWriter, iDocid, iType);
}

static PLWriter *plwNew(sqlite_int64 iDocid, DocListType iType){
  PLWriter *pWriter = malloc(sizeof(PLWriter));



  plwInit(pWriter, iDocid, iType);

  return pWriter;
}
static void plwDestroy(PLWriter *pWriter){
  dataBufferDestroy(&pWriter->b);
#ifndef NDEBUG
  memset(pWriter, 0x55, sizeof(pWriter));
#endif
}
static void plwDelete(PLWriter *pWriter){
  plwDestroy(pWriter);
  free(pWriter);
}




/* Copy the doclist data of iType in pData/nData into *out, trimming
** unnecessary data as we go.  Only columns matching iColumn are
** copied, all columns copied if iColimn is -1.  Elements with no
** matching columns are dropped.  The output is an iOutType doclist.
*/
static void docListTrim(DocListType iType, const char *pData, int nData,
                        int iColumn, DocListType iOutType, DataBuffer *out){
  DLReader dlReader;
  DLWriter dlWriter;
  PLWriter plWriter;

  assert( iOutType<=iType );

  dlrInit(&dlReader, iType, pData, nData);
  dlwInit(&dlWriter, iOutType, out);
  plwInit(&plWriter, 0, iOutType);


  while( !dlrAtEnd(&dlReader) ){


    PLReader plReader;
    int match = 0;

    plrInit(&plReader, dlReader.iType,
            dlrPosData(&dlReader), dlrPosDataLen(&dlReader));
    plwReset(&plWriter, dlrDocid(&dlReader), iOutType);


    while( !plrAtEnd(&plReader) ){
      if( iColumn==-1 || plrColumn(&plReader)==iColumn ){
        match = 1;
        plwAdd(&plWriter, plrColumn(&plReader), plrPosition(&plReader),
               plrStartOffset(&plReader), plrEndOffset(&plReader));
      }
      plrStep(&plReader);
    }
    if( match ) plwDlwAdd(&plWriter, &dlWriter);

    plrDestroy(&plReader);
    dlrStep(&dlReader);
  }
  plwDestroy(&plWriter);
  dlwDestroy(&dlWriter);
  dlrDestroy(&dlReader);
}



/* Used by docListMerge() to keep doclists in the ascending order by
** docid, then ascending order by age (so the newest comes first).
*/


typedef struct OrderedDLReader {



  DLReader *pReader;

  /* TODO(shess) If we assume that docListMerge pReaders is ordered by
  ** age (which we do), then we could use pReader comparisons to break
  ** ties.



  */
  int idx;
} OrderedDLReader;

/* Order eof to end, then by docid asc, idx desc. */
static int orderedDLReaderCmp(OrderedDLReader *r1, OrderedDLReader *r2){
  if( dlrAtEnd(r1->pReader) ){
    if( dlrAtEnd(r2->pReader) ) return 0;  /* Both atEnd(). */
    return 1;                              /* Only r1 atEnd(). */
  }
  if( dlrAtEnd(r2->pReader) ) return -1;   /* Only r2 atEnd(). */


  if( dlrDocid(r1->pReader)<dlrDocid(r2->pReader) ) return -1;
  if( dlrDocid(r1->pReader)>dlrDocid(r2->pReader) ) return 1;

  /* Descending on idx. */
  return r2->idx-r1->idx;
}

/* Bubble p[0] to appropriate place in p[1..n-1].  Assumes that
** p[1..n-1] is already sorted.
*/
/* TODO(shess) Is this frequent enough to warrant a binary search?
** Before implementing that, instrument the code to check.  In most
** current usage, I expect that p[0] will be less than p[1] a very
** high proportion of the time.
*/
static void orderedDLReaderReorder(OrderedDLReader *p, int n){
  while( n>1 && orderedDLReaderCmp(p, p+1)>0 ){
    OrderedDLReader tmp = p[0];
    p[0] = p[1];
    p[1] = tmp;
    n--;
    p++;
  }
}

/* Given an array of doclist readers, merge their doclist elements
** into out in sorted order (by docid), dropping elements from older
** readers when there is a duplicate docid.  pReaders is assumed to be
** ordered by age, oldest first.

*/
/* TODO(shess) nReaders must be <= MERGE_COUNT.  This should probably






** be fixed.
*/
static void docListMerge(DataBuffer *out,
                         DLReader *pReaders, int nReaders){
  OrderedDLReader readers[MERGE_COUNT];

  DLWriter writer;
  int i, n;
  const char *pStart = 0;
  int nStart = 0;

  assert( nReaders>0 );
  if( nReaders==1 ){

    dataBufferAppend(out, dlrDocData(pReaders), dlrAllDataBytes(pReaders));
    return;
  }





  assert( nReaders<=MERGE_COUNT );
  n = 0;
  for(i=0; i<nReaders; i++){
    assert( pReaders[i].iType==pReaders[0].iType );
    readers[i].pReader = pReaders+i;
    readers[i].idx = i;
    n += dlrAllDataBytes(&pReaders[i]);
  }
  /* Conservatively size output to sum of inputs.  Output should end
  ** up strictly smaller than input.
  */
  dataBufferExpand(out, n);

  /* Get the readers into sorted order. */
  while( i-->0 ){
    orderedDLReaderReorder(readers+i, nReaders-i);
  }

  dlwInit(&writer, pReaders[0].iType, out);




  while( !dlrAtEnd(readers[0].pReader) ){

    sqlite_int64 iDocid = dlrDocid(readers[0].pReader);





    /* If this is a continuation of the current buffer to copy, extend
    ** that buffer.  memcpy() seems to be more efficient if it has a
    ** lots of data to copy.
    */
    if( dlrDocData(readers[0].pReader)==pStart+nStart ){
      nStart += dlrDocDataBytes(readers[0].pReader);
    }else{
      if( pStart!=0 ) dlwAppend(&writer, pStart, nStart);
      pStart = dlrDocData(readers[0].pReader);
      nStart = dlrDocDataBytes(readers[0].pReader);
    }
    dlrStep(readers[0].pReader);

    /* Drop all of the older elements with the same docid. */
    for(i=1; i<nReaders &&
             !dlrAtEnd(readers[i].pReader) &&
             dlrDocid(readers[i].pReader)==iDocid; i++){
      dlrStep(readers[i].pReader);
    }

    /* Get the readers back into order. */
    while( i-->0 ){
      orderedDLReaderReorder(readers+i, nReaders-i);
    }
  }







  /* Copy over any remaining elements. */
  if( nStart>0 ) dlwAppend(&writer, pStart, nStart);
  dlwDestroy(&writer);

}


/* pLeft and pRight are DLReaders positioned to the same docid.

**
** If there are no instances in pLeft or pRight where the position
** of pLeft is one less than the position of pRight, then this
** routine adds nothing to pOut.
**
** If there are one or more instances where positions from pLeft
** are exactly one less than positions from pRight, then add a new
** document record to pOut.  If pOut wants to hold positions, then
** include the positions from pRight that are one more than a
** position in pLeft.  In other words:  pRight.iPos==pLeft.iPos+1.


*/
static void mergePosList(DLReader *pLeft, DLReader *pRight, DLWriter *pOut){
  PLReader left, right;


  PLWriter writer;



  int match = 0;


  assert( dlrDocid(pLeft)==dlrDocid(pRight) );


  assert( pOut->iType!=DL_POSITIONS_OFFSETS );


  plrInit(&left, pLeft->iType, dlrPosData(pLeft), dlrPosDataLen(pLeft));
  plrInit(&right, pRight->iType, dlrPosData(pRight), dlrPosDataLen(pRight));
  plwInit(&writer, dlrDocid(pLeft), pOut->iType);


  while( !plrAtEnd(&left) && !plrAtEnd(&right) ){
    if( plrColumn(&left)<plrColumn(&right) ){
      plrStep(&left);
    }else if( plrColumn(&left)>plrColumn(&right) ){
      plrStep(&right);
    }else if( plrPosition(&left)+1<plrPosition(&right) ){
      plrStep(&left);
    }else if( plrPosition(&left)+1>plrPosition(&right) ){
      plrStep(&right);
    }else{
      match = 1;
      plwAdd(&writer, plrColumn(&right), plrPosition(&right), 0, 0);
      plrStep(&left);
      plrStep(&right);
    }
  }

  /* TODO(shess) We could remember the output position, encode the
  ** docid, then encode the poslist directly into the output.  If no
  ** match, we back out to the stored output position.  This would
  ** also reduce the malloc count.
  */
  if( match ) plwDlwAdd(&writer, pOut);

  plrDestroy(&left);
  plrDestroy(&right);
  plwDestroy(&writer);
}

/* We have two doclists with positions:  pLeft and pRight.
** Write the phrase intersection of these two doclists into pOut.
**
** A phrase intersection means that two documents only match
** if pLeft.iPos+1==pRight.iPos.
**
** iType controls the type of data written to pOut.  If iType is
** DL_POSITIONS, the positions are those from pRight.
*/
static void docListPhraseMerge(
  const char *pLeft, int nLeft,
  const char *pRight, int nRight,
  DocListType iType,
  DataBuffer *pOut      /* Write the combined doclist here */
){
  DLReader left, right;
  DLWriter writer;

  if( nLeft==0 || nRight==0 ) return;

  assert( iType!=DL_POSITIONS_OFFSETS );

  dlrInit(&left, DL_POSITIONS, pLeft, nLeft);
  dlrInit(&right, DL_POSITIONS, pRight, nRight);
  dlwInit(&writer, iType, pOut);

  while( !dlrAtEnd(&left) && !dlrAtEnd(&right) ){
    if( dlrDocid(&left)<dlrDocid(&right) ){
      dlrStep(&left);
    }else if( dlrDocid(&right)<dlrDocid(&left) ){
      dlrStep(&right);
    }else{
      mergePosList(&left, &right, &writer);
      dlrStep(&left);
      dlrStep(&right);
    }
  }

  dlrDestroy(&left);
  dlrDestroy(&right);
  dlwDestroy(&writer);
}

/* We have two DL_DOCIDS doclists:  pLeft and pRight.
** Write the intersection of these two doclists into pOut as a

** DL_DOCIDS doclist.

*/
static void docListAndMerge(
  const char *pLeft, int nLeft,
  const char *pRight, int nRight,
  DataBuffer *pOut      /* Write the combined doclist here */
){
  DLReader left, right;
  DLWriter writer;


  if( nLeft==0 || nRight==0 ) return;

  dlrInit(&left, DL_DOCIDS, pLeft, nLeft);
  dlrInit(&right, DL_DOCIDS, pRight, nRight);


  dlwInit(&writer, DL_DOCIDS, pOut);

  while( !dlrAtEnd(&left) && !dlrAtEnd(&right) ){
    if( dlrDocid(&left)<dlrDocid(&right) ){
      dlrStep(&left);
    }else if( dlrDocid(&right)<dlrDocid(&left) ){
      dlrStep(&right);
    }else{
      dlwAdd(&writer, dlrDocid(&left), 0, 0);
      dlrStep(&left);
      dlrStep(&right);
    }
  }

  dlrDestroy(&left);
  dlrDestroy(&right);
  dlwDestroy(&writer);
}

/* We have two DL_DOCIDS doclists:  pLeft and pRight.
** Write the union of these two doclists into pOut as a

** DL_DOCIDS doclist.

*/
static void docListOrMerge(
  const char *pLeft, int nLeft,
  const char *pRight, int nRight,
  DataBuffer *pOut      /* Write the combined doclist here */
){
  DLReader left, right;
  DLWriter writer;

  if( nLeft==0 ){
    dataBufferAppend(pOut, pRight, nRight);
    return;
  }
  if( nRight==0 ){
    dataBufferAppend(pOut, pLeft, nLeft);
    return;
  }

  dlrInit(&left, DL_DOCIDS, pLeft, nLeft);
  dlrInit(&right, DL_DOCIDS, pRight, nRight);
  dlwInit(&writer, DL_DOCIDS, pOut);

  while( !dlrAtEnd(&left) || !dlrAtEnd(&right) ){
    if( dlrAtEnd(&right) || dlrDocid(&left)<dlrDocid(&right) ){
      dlwAdd(&writer, dlrDocid(&left), 0, 0);
      dlrStep(&left);
    }else if( dlrAtEnd(&left) || dlrDocid(&right)<dlrDocid(&left) ){
      dlwAdd(&writer, dlrDocid(&right), 0, 0);
      dlrStep(&right);
    }else{
      dlwAdd(&writer, dlrDocid(&left), 0, 0);



      dlrStep(&left);


      dlrStep(&right);
    }
  }




  dlrDestroy(&left);

  dlrDestroy(&right);
  dlwDestroy(&writer);
}


/* We have two DL_DOCIDS doclists:  pLeft and pRight.
** Write into pOut as DL_DOCIDS doclist containing all documents that
** occur in pLeft but not in pRight.




*/
static void docListExceptMerge(
  const char *pLeft, int nLeft,
  const char *pRight, int nRight,
  DataBuffer *pOut      /* Write the combined doclist here */
){
  DLReader left, right;
  DLWriter writer;

  if( nLeft==0 ) return;
  if( nRight==0 ){
    dataBufferAppend(pOut, pLeft, nLeft);
    return;
  }

  dlrInit(&left, DL_DOCIDS, pLeft, nLeft);
  dlrInit(&right, DL_DOCIDS, pRight, nRight);
  dlwInit(&writer, DL_DOCIDS, pOut);

  while( !dlrAtEnd(&left) ){
    while( !dlrAtEnd(&right) && dlrDocid(&right)<dlrDocid(&left) ){
      dlrStep(&right);
    }
    if( dlrAtEnd(&right) || dlrDocid(&left)<dlrDocid(&right) ){
      dlwAdd(&writer, dlrDocid(&left), 0, 0);
    }
    dlrStep(&left);
  }




  dlrDestroy(&left);
  dlrDestroy(&right);
  dlwDestroy(&writer);
}

static char *string_dup_n(const char *s, int n){
  char *str = malloc(n + 1);
  memcpy(str, s, n);
  str[n] = '\0';
  return str;
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
  /* SEGDIR_SPAN */
  "select min(start_block), max(end_block) from %_segdir "
  " where level = ? and start_block <> 0",
  /* SEGDIR_DELETE */ "delete from %_segdir where level = ?",
  /* SEGDIR_SELECT_ALL */ "select root from %_segdir order by level desc, idx",
};

/* MERGE_COUNT controls how often we merge segments (see comment at
** top of file).
*/
#define MERGE_COUNT 16

/*
** A connection to a fulltext index is an instance of the following
** structure.  The xCreate and xConnect methods create an instance
** of this structure and xDestroy and xDisconnect free that instance.
** All other methods receive a pointer to the structure as one of their
** arguments.
*/







<
<
<
<
<







1502
1503
1504
1505
1506
1507
1508





1509
1510
1511
1512
1513
1514
1515
  /* SEGDIR_SPAN */
  "select min(start_block), max(end_block) from %_segdir "
  " where level = ? and start_block <> 0",
  /* SEGDIR_DELETE */ "delete from %_segdir where level = ?",
  /* SEGDIR_SELECT_ALL */ "select root from %_segdir order by level desc, idx",
};






/*
** A connection to a fulltext index is an instance of the following
** structure.  The xCreate and xConnect methods create an instance
** of this structure and xDestroy and xDisconnect free that instance.
** All other methods receive a pointer to the structure as one of their
** arguments.
*/
1349
1350
1351
1352
1353
1354
1355
1356

1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
  sqlite3_vtab_cursor base;        /* Base class used by SQLite core */
  QueryType iCursorType;           /* Copy of sqlite3_index_info.idxNum */
  sqlite3_stmt *pStmt;             /* Prepared statement in use by the cursor */
  int eof;                         /* True if at End Of Results */
  Query q;                         /* Parsed query string */
  Snippet snippet;                 /* Cached snippet for the current row */
  int iColumn;                     /* Column being searched */
  DocListReader result;  /* used when iCursorType == QUERY_FULLTEXT */ 

} fulltext_cursor;

static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){
  return (fulltext_vtab *) c->base.pVtab;
}

static const sqlite3_module fulltextModule;   /* forward declaration */

/* Append a list of strings separated by commas to a StringBuffer. */
static void appendList(StringBuffer *sb, int nString, char **azString){
  int i;
  for(i=0; i<nString; ++i){
    if( i>0 ) append(sb, ", ");
    append(sb, azString[i]);
  }
}

/* Return a dynamically generated statement of the form
 *   insert into %_content (rowid, ...) values (?, ...)
 */
static const char *contentInsertStatement(fulltext_vtab *v){
  StringBuffer sb;
  int i;

  initStringBuffer(&sb);
  append(&sb, "insert into %_content (rowid, ");
  appendList(&sb, v->nColumn, v->azContentColumn);
  append(&sb, ") values (?");
  for(i=0; i<v->nColumn; ++i)
    append(&sb, ", ?");
  append(&sb, ")");
  return sb.s;
}

/* Return a dynamically generated statement of the form
 *   update %_content set [col_0] = ?, [col_1] = ?, ...
 *                    where rowid = ?
 */
static const char *contentUpdateStatement(fulltext_vtab *v){
  StringBuffer sb;
  int i;

  initStringBuffer(&sb);
  append(&sb, "update %_content set ");
  for(i=0; i<v->nColumn; ++i) {
    if( i>0 ){
      append(&sb, ", ");
    }
    append(&sb, v->azContentColumn[i]);
    append(&sb, " = ?");
  }
  append(&sb, " where rowid = ?");
  return sb.s;
}

/* Puts a freshly-prepared statement determined by iStmt in *ppStmt.
** If the indicated statement has never been prepared, it is prepared
** and cached, otherwise the cached version is reset.
*/
static int sql_get_statement(fulltext_vtab *v, fulltext_statement iStmt,







|
>








<
<
<
<
<
<
<
<
<














|




















|







1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561









1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
  sqlite3_vtab_cursor base;        /* Base class used by SQLite core */
  QueryType iCursorType;           /* Copy of sqlite3_index_info.idxNum */
  sqlite3_stmt *pStmt;             /* Prepared statement in use by the cursor */
  int eof;                         /* True if at End Of Results */
  Query q;                         /* Parsed query string */
  Snippet snippet;                 /* Cached snippet for the current row */
  int iColumn;                     /* Column being searched */
  DataBuffer result;               /* Doclist results from fulltextQuery */
  DLReader reader;                 /* Result reader if result not empty */
} fulltext_cursor;

static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){
  return (fulltext_vtab *) c->base.pVtab;
}

static const sqlite3_module fulltextModule;   /* forward declaration */










/* Return a dynamically generated statement of the form
 *   insert into %_content (rowid, ...) values (?, ...)
 */
static const char *contentInsertStatement(fulltext_vtab *v){
  StringBuffer sb;
  int i;

  initStringBuffer(&sb);
  append(&sb, "insert into %_content (rowid, ");
  appendList(&sb, v->nColumn, v->azContentColumn);
  append(&sb, ") values (?");
  for(i=0; i<v->nColumn; ++i)
    append(&sb, ", ?");
  append(&sb, ")");
  return stringBufferData(&sb);
}

/* Return a dynamically generated statement of the form
 *   update %_content set [col_0] = ?, [col_1] = ?, ...
 *                    where rowid = ?
 */
static const char *contentUpdateStatement(fulltext_vtab *v){
  StringBuffer sb;
  int i;

  initStringBuffer(&sb);
  append(&sb, "update %_content set ");
  for(i=0; i<v->nColumn; ++i) {
    if( i>0 ){
      append(&sb, ", ");
    }
    append(&sb, v->azContentColumn[i]);
    append(&sb, " = ?");
  }
  append(&sb, " where rowid = ?");
  return stringBufferData(&sb);
}

/* Puts a freshly-prepared statement determined by iStmt in *ppStmt.
** If the indicated statement has never been prepared, it is prepared
** and cached, otherwise the cached version is reset.
*/
static int sql_get_statement(fulltext_vtab *v, fulltext_statement iStmt,
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
  rc = parseSpec(&spec, argc, argv, pzErr);
  if( rc!=SQLITE_OK ) return rc;

  initStringBuffer(&schema);
  append(&schema, "CREATE TABLE %_content(");
  appendList(&schema, spec.nColumn, spec.azContentColumn);
  append(&schema, ")");
  rc = sql_exec(db, spec.zName, schema.s);
  free(schema.s);
  if( rc!=SQLITE_OK ) goto out;

  rc = sql_exec(db, spec.zName, "create table %_segments(block blob);");
  if( rc!=SQLITE_OK ) goto out;

  rc = sql_exec(db, spec.zName,
                "create table %_segdir("







|
|







2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
  rc = parseSpec(&spec, argc, argv, pzErr);
  if( rc!=SQLITE_OK ) return rc;

  initStringBuffer(&schema);
  append(&schema, "CREATE TABLE %_content(");
  appendList(&schema, spec.nColumn, spec.azContentColumn);
  append(&schema, ")");
  rc = sql_exec(db, spec.zName, stringBufferData(&schema));
  stringBufferDestroy(&schema);
  if( rc!=SQLITE_OK ) goto out;

  rc = sql_exec(db, spec.zName, "create table %_segments(block blob);");
  if( rc!=SQLITE_OK ) goto out;

  rc = sql_exec(db, spec.zName,
                "create table %_segdir("
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
    struct snippetMatch *pMatch = &p->aMatch[i];
    zBuf[0] = ' ';
    sprintf(&zBuf[cnt>0], "%d %d %d %d", pMatch->iCol,
        pMatch->iTerm, pMatch->iStart, pMatch->nByte);
    append(&sb, zBuf);
    cnt++;
  }
  p->zOffset = sb.s;
  p->nOffset = sb.len;
}

/*
** zDoc[0..nDoc-1] is phrase of text.  aMatch[0..nMatch-1] are a set
** of matching words some of which might be in zDoc.  zDoc is column
** number iCol.
**







|
|







2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
    struct snippetMatch *pMatch = &p->aMatch[i];
    zBuf[0] = ' ';
    sprintf(&zBuf[cnt>0], "%d %d %d %d", pMatch->iCol,
        pMatch->iTerm, pMatch->iStart, pMatch->nByte);
    append(&sb, zBuf);
    cnt++;
  }
  p->zOffset = stringBufferData(&sb);
  p->nOffset = stringBufferLength(&sb);
}

/*
** zDoc[0..nDoc-1] is phrase of text.  aMatch[0..nMatch-1] are a set
** of matching words some of which might be in zDoc.  zDoc is column
** number iCol.
**
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
    if( isspace(zDoc[iBreak+i]) ){
      return iBreak + i + 1;
    }
  }
  return iBreak;
}

/*
** If the StringBuffer does not end in white space, add a single
** space character to the end.
*/
static void appendWhiteSpace(StringBuffer *p){
  if( p->len==0 ) return;
  if( isspace(p->s[p->len-1]) ) return;
  append(p, " ");
}

/*
** Remove white space from teh end of the StringBuffer
*/
static void trimWhiteSpace(StringBuffer *p){
  while( p->len>0 && isspace(p->s[p->len-1]) ){
    p->len--;
  }
}



/*
** Allowed values for Snippet.aMatch[].snStatus
*/
#define SNIPPET_IGNORE  0   /* It is ok to omit this match from the snippet */
#define SNIPPET_DESIRED 1   /* We want to include this match in the snippet */







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







2871
2872
2873
2874
2875
2876
2877



















2878
2879
2880
2881
2882
2883
2884
    if( isspace(zDoc[iBreak+i]) ){
      return iBreak + i + 1;
    }
  }
  return iBreak;
}






















/*
** Allowed values for Snippet.aMatch[].snStatus
*/
#define SNIPPET_IGNORE  0   /* It is ok to omit this match from the snippet */
#define SNIPPET_DESIRED 1   /* We want to include this match in the snippet */
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841

2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
    tailOffset = iEnd;
  }
  trimWhiteSpace(&sb);
  if( tailEllipsis ){
    appendWhiteSpace(&sb);
    append(&sb, zEllipsis);
  }
  pCursor->snippet.zSnippet = sb.s;
  pCursor->snippet.nSnippet = sb.len;  
}


/*
** Close the cursor.  For additional information see the documentation
** on the xClose method of the virtual table interface.
*/
static int fulltextClose(sqlite3_vtab_cursor *pCursor){
  fulltext_cursor *c = (fulltext_cursor *) pCursor;
  TRACE(("FTS2 Close %p\n", c));
  sqlite3_finalize(c->pStmt);
  queryClear(&c->q);
  snippetClear(&c->snippet);
  if( c->result.pDoclist!=NULL ){

    docListDelete(c->result.pDoclist);
  }
  free(c);
  return SQLITE_OK;
}

static int fulltextNext(sqlite3_vtab_cursor *pCursor){
  fulltext_cursor *c = (fulltext_cursor *) pCursor;
  sqlite_int64 iDocid;
  int rc;

  TRACE(("FTS2 Next %p\n", pCursor));
  snippetClear(&c->snippet);
  if( c->iCursorType < QUERY_FULLTEXT ){
    /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
    rc = sqlite3_step(c->pStmt);







|
|













|
>
|







<







2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019

3020
3021
3022
3023
3024
3025
3026
    tailOffset = iEnd;
  }
  trimWhiteSpace(&sb);
  if( tailEllipsis ){
    appendWhiteSpace(&sb);
    append(&sb, zEllipsis);
  }
  pCursor->snippet.zSnippet = stringBufferData(&sb);
  pCursor->snippet.nSnippet = stringBufferLength(&sb);
}


/*
** Close the cursor.  For additional information see the documentation
** on the xClose method of the virtual table interface.
*/
static int fulltextClose(sqlite3_vtab_cursor *pCursor){
  fulltext_cursor *c = (fulltext_cursor *) pCursor;
  TRACE(("FTS2 Close %p\n", c));
  sqlite3_finalize(c->pStmt);
  queryClear(&c->q);
  snippetClear(&c->snippet);
  if( c->result.nData!=0 ){
    dlrDestroy(&c->reader);
    dataBufferDestroy(&c->result);
  }
  free(c);
  return SQLITE_OK;
}

static int fulltextNext(sqlite3_vtab_cursor *pCursor){
  fulltext_cursor *c = (fulltext_cursor *) pCursor;

  int rc;

  TRACE(("FTS2 Next %p\n", pCursor));
  snippetClear(&c->snippet);
  if( c->iCursorType < QUERY_FULLTEXT ){
    /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
    rc = sqlite3_step(c->pStmt);
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878

2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897

2898
2899
2900
2901
2902
2903

2904
2905
2906
2907
2908
2909
2910
2911
2912
2913


2914

2915

2916
2917
2918
2919

2920
2921

2922
2923


2924
2925
2926

2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
        c->eof = 1;
        return rc;
    }
  } else {  /* full-text query */
    rc = sqlite3_reset(c->pStmt);
    if( rc!=SQLITE_OK ) return rc;

    iDocid = nextDocid(&c->result);
    if( iDocid==0 ){
      c->eof = 1;
      return SQLITE_OK;
    }
    rc = sqlite3_bind_int64(c->pStmt, 1, iDocid);

    if( rc!=SQLITE_OK ) return rc;
    /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
    rc = sqlite3_step(c->pStmt);
    if( rc==SQLITE_ROW ){   /* the case we expect */
      c->eof = 0;
      return SQLITE_OK;
    }
    /* an error occurred; abort */
    return rc==SQLITE_DONE ? SQLITE_ERROR : rc;
  }
}


/* TODO(shess) If we pushed LeafReader to the top of the file, or to
** another file, term_select() could be pushed above
** docListOfTerm().
*/
static int termSelect(fulltext_vtab *v, int iColumn,
                      const char *pTerm, int nTerm, DocList *out);


/* Return a DocList corresponding to the query term *pTerm.  If *pTerm
** is the first term of a phrase query, go ahead and evaluate the phrase
** query and return the doclist for the entire phrase query.
**
** The result is stored in pTerm->doclist.

*/
static int docListOfTerm(
  fulltext_vtab *v,     /* The full text index */
  int iColumn,          /* column to restrict to.  No restrition if >=nColumn */
  QueryTerm *pQTerm,    /* Term we are looking for, or 1st term of a phrase */
  DocList **ppResult    /* Write the result here */
){
  DocList *pLeft, *pRight, *pNew;
  int i, rc;



  pLeft = docListNew(DL_POSITIONS);

  rc = termSelect(v, iColumn, pQTerm->pTerm, pQTerm->nTerm, pLeft);

  if( rc ) return rc;
  for(i=1; i<=pQTerm->nPhrase; i++){
    pRight = docListNew(DL_POSITIONS);
    rc = termSelect(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm, pRight);

    if( rc ){
      docListDelete(pLeft);

      return rc;
    }


    pNew = docListNew(i<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS);
    docListPhraseMerge(pLeft, pRight, pNew);
    docListDelete(pLeft);

    docListDelete(pRight);
    pLeft = pNew;
  }
  *ppResult = pLeft;
  return SQLITE_OK;
}

/* Add a new term pTerm[0..nTerm-1] to the query *q.
*/
static void queryAdd(Query *q, const char *pTerm, int nTerm){
  QueryTerm *t;







|
<



|
>


















|
>





|
>


|
|
|
|

|


>
>
|
>
|
>

|
|
|
>

<
>


>
>
|
<
<
>
|
|

|







3035
3036
3037
3038
3039
3040
3041
3042

3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096

3097
3098
3099
3100
3101
3102


3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
        c->eof = 1;
        return rc;
    }
  } else {  /* full-text query */
    rc = sqlite3_reset(c->pStmt);
    if( rc!=SQLITE_OK ) return rc;

    if( c->result.nData==0 || dlrAtEnd(&c->reader) ){

      c->eof = 1;
      return SQLITE_OK;
    }
    rc = sqlite3_bind_int64(c->pStmt, 1, dlrDocid(&c->reader));
    dlrStep(&c->reader);
    if( rc!=SQLITE_OK ) return rc;
    /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
    rc = sqlite3_step(c->pStmt);
    if( rc==SQLITE_ROW ){   /* the case we expect */
      c->eof = 0;
      return SQLITE_OK;
    }
    /* an error occurred; abort */
    return rc==SQLITE_DONE ? SQLITE_ERROR : rc;
  }
}


/* TODO(shess) If we pushed LeafReader to the top of the file, or to
** another file, term_select() could be pushed above
** docListOfTerm().
*/
static int termSelect(fulltext_vtab *v, int iColumn,
                      const char *pTerm, int nTerm,
                      DocListType iType, DataBuffer *out);

/* Return a DocList corresponding to the query term *pTerm.  If *pTerm
** is the first term of a phrase query, go ahead and evaluate the phrase
** query and return the doclist for the entire phrase query.
**
** The resulting DL_DOCIDS doclist is stored in pResult, which is
** overwritten.
*/
static int docListOfTerm(
  fulltext_vtab *v,   /* The full text index */
  int iColumn,        /* column to restrict to.  No restriction if >=nColumn */
  QueryTerm *pQTerm,  /* Term we are looking for, or 1st term of a phrase */
  DataBuffer *pResult /* Write the result here */
){
  DataBuffer left, right, new;
  int i, rc;

  /* No phrase search if no position info. */
  assert( pQTerm->nPhrase==0 || DL_DEFAULT!=DL_DOCIDS );

  dataBufferInit(&left, 0);
  rc = termSelect(v, iColumn, pQTerm->pTerm, pQTerm->nTerm,
                  0<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS, &left);
  if( rc ) return rc;
  for(i=1; i<=pQTerm->nPhrase && left.nData>0; i++){
    dataBufferInit(&right, 0);
    rc = termSelect(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm,
                    DL_POSITIONS, &right);
    if( rc ){

      dataBufferDestroy(&left);
      return rc;
    }
    dataBufferInit(&new, 0);
    docListPhraseMerge(left.pData, left.nData, right.pData, right.nData,
                       i<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS, &new);


    dataBufferDestroy(&left);
    dataBufferDestroy(&right);
    left = new;
  }
  *pResult = left;
  return SQLITE_OK;
}

/* Add a new term pTerm[0..nTerm-1] to the query *q.
*/
static void queryAdd(Query *q, const char *pTerm, int nTerm){
  QueryTerm *t;
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103



3104
3105
3106
3107

3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118

3119
3120
3121
3122
3123
3124
3125


3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
** they are allowed to match against any column.
*/
static int fulltextQuery(
  fulltext_vtab *v,      /* The full text index */
  int iColumn,           /* Match against this column by default */
  const char *zInput,    /* The query string */
  int nInput,            /* Number of bytes in zInput[] */
  DocList **pResult,     /* Write the result doclist here */
  Query *pQuery          /* Put parsed query string here */
){
  int i, iNext, rc;
  DocList *pLeft = NULL;
  DocList *pRight, *pNew, *pOr;
  int nNot = 0;
  QueryTerm *aTerm;




  rc = parseQuery(v, zInput, nInput, iColumn, pQuery);
  if( rc!=SQLITE_OK ) return rc;

  /* Merge AND terms. */

  aTerm = pQuery->pTerms;
  for(i = 0; i<pQuery->nTerms; i=iNext){
    if( aTerm[i].isNot ){
      /* Handle all NOT terms in a separate pass */
      nNot++;
      iNext = i + aTerm[i].nPhrase+1;
      continue;
    }
    iNext = i + aTerm[i].nPhrase + 1;
    rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &pRight);
    if( rc ){

      queryClear(pQuery);
      return rc;
    }
    while( iNext<pQuery->nTerms && aTerm[iNext].isOr ){
      rc = docListOfTerm(v, aTerm[iNext].iColumn, &aTerm[iNext], &pOr);
      iNext += aTerm[iNext].nPhrase + 1;
      if( rc ){


        queryClear(pQuery);
        return rc;
      }
      pNew = docListNew(DL_DOCIDS);
      docListOrMerge(pRight, pOr, pNew);
      docListDelete(pRight);
      docListDelete(pOr);
      pRight = pNew;
    }
    if( pLeft==0 ){
      pLeft = pRight;
    }else{
      pNew = docListNew(DL_DOCIDS);
      docListAndMerge(pLeft, pRight, pNew);
      docListDelete(pRight);
      docListDelete(pLeft);
      pLeft = pNew;
    }
  }

  if( nNot && pLeft==0 ){
    /* We do not yet know how to handle a query of only NOT terms */
    return SQLITE_ERROR;
  }

  /* Do the EXCEPT terms */
  for(i=0; i<pQuery->nTerms;  i += aTerm[i].nPhrase + 1){
    if( !aTerm[i].isNot ) continue;
    rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &pRight);
    if( rc ){
      queryClear(pQuery);
      docListDelete(pLeft);
      return rc;
    }
    pNew = docListNew(DL_DOCIDS);
    docListExceptMerge(pLeft, pRight, pNew);
    docListDelete(pRight);
    docListDelete(pLeft);
    pLeft = pNew;
  }

  *pResult = pLeft;
  return rc;
}

/*
** This is the xFilter interface for the virtual table.  See
** the virtual table xFilter method documentation for additional
** information.







|



<
|



>
>
>




>









|

>




|


>
>



|
|
|
|
|

|
|

|
|
|
|
|



|







|


|


|
|
|
|
|


|







3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275

3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
** they are allowed to match against any column.
*/
static int fulltextQuery(
  fulltext_vtab *v,      /* The full text index */
  int iColumn,           /* Match against this column by default */
  const char *zInput,    /* The query string */
  int nInput,            /* Number of bytes in zInput[] */
  DataBuffer *pResult,   /* Write the result doclist here */
  Query *pQuery          /* Put parsed query string here */
){
  int i, iNext, rc;

  DataBuffer left, right, or, new;
  int nNot = 0;
  QueryTerm *aTerm;

  /* TODO(shess) I think that the queryClear() calls below are not
  ** necessary, because fulltextClose() already clears the query.
  */
  rc = parseQuery(v, zInput, nInput, iColumn, pQuery);
  if( rc!=SQLITE_OK ) return rc;

  /* Merge AND terms. */
  /* TODO(shess) I think we can early-exit if( i>nNot && left.nData==0 ). */
  aTerm = pQuery->pTerms;
  for(i = 0; i<pQuery->nTerms; i=iNext){
    if( aTerm[i].isNot ){
      /* Handle all NOT terms in a separate pass */
      nNot++;
      iNext = i + aTerm[i].nPhrase+1;
      continue;
    }
    iNext = i + aTerm[i].nPhrase + 1;
    rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &right);
    if( rc ){
      if( i!=nNot ) dataBufferDestroy(&left);
      queryClear(pQuery);
      return rc;
    }
    while( iNext<pQuery->nTerms && aTerm[iNext].isOr ){
      rc = docListOfTerm(v, aTerm[iNext].iColumn, &aTerm[iNext], &or);
      iNext += aTerm[iNext].nPhrase + 1;
      if( rc ){
        if( i!=nNot ) dataBufferDestroy(&left);
        dataBufferDestroy(&right);
        queryClear(pQuery);
        return rc;
      }
      dataBufferInit(&new, 0);
      docListOrMerge(right.pData, right.nData, or.pData, or.nData, &new);
      dataBufferDestroy(&right);
      dataBufferDestroy(&or);
      right = new;
    }
    if( i==nNot ){           /* first term processed. */
      left = right;
    }else{
      dataBufferInit(&new, 0);
      docListAndMerge(left.pData, left.nData, right.pData, right.nData, &new);
      dataBufferDestroy(&right);
      dataBufferDestroy(&left);
      left = new;
    }
  }

  if( nNot==pQuery->nTerms ){
    /* We do not yet know how to handle a query of only NOT terms */
    return SQLITE_ERROR;
  }

  /* Do the EXCEPT terms */
  for(i=0; i<pQuery->nTerms;  i += aTerm[i].nPhrase + 1){
    if( !aTerm[i].isNot ) continue;
    rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &right);
    if( rc ){
      queryClear(pQuery);
      dataBufferDestroy(&left);
      return rc;
    }
    dataBufferInit(&new, 0);
    docListExceptMerge(left.pData, left.nData, right.pData, right.nData, &new);
    dataBufferDestroy(&right);
    dataBufferDestroy(&left);
    left = new;
  }

  *pResult = left;
  return rc;
}

/*
** This is the xFilter interface for the virtual table.  See
** the virtual table xFilter method documentation for additional
** information.
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221

3222
3223
3224


3225
3226
3227
3228
3229
3230
3231
      rc = sqlite3_bind_int64(c->pStmt, 1, sqlite3_value_int64(argv[0]));
      if( rc!=SQLITE_OK ) goto out;
      break;

    default:   /* full-text search */
    {
      const char *zQuery = (const char *)sqlite3_value_text(argv[0]);
      DocList *pResult;
      assert( idxNum<=QUERY_FULLTEXT+v->nColumn);
      assert( argc==1 );
      queryClear(&c->q);

      rc = fulltextQuery(v, idxNum-QUERY_FULLTEXT, zQuery, -1, &pResult, &c->q);
      if( rc!=SQLITE_OK ) goto out;
      readerInit(&c->result, pResult);


      break;
    }
  }

  rc = fulltextNext(pCursor);

out:







<



>
|
|
|
>
>







3394
3395
3396
3397
3398
3399
3400

3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
      rc = sqlite3_bind_int64(c->pStmt, 1, sqlite3_value_int64(argv[0]));
      if( rc!=SQLITE_OK ) goto out;
      break;

    default:   /* full-text search */
    {
      const char *zQuery = (const char *)sqlite3_value_text(argv[0]);

      assert( idxNum<=QUERY_FULLTEXT+v->nColumn);
      assert( argc==1 );
      queryClear(&c->q);
      dataBufferInit(&c->result, 0);
      rc = fulltextQuery(v, idxNum-QUERY_FULLTEXT, zQuery, -1, &c->result, &c->q);
      if( rc!=SQLITE_OK ) return rc;
      if( c->result.nData!=0 ){
        dlrInit(&c->reader, DL_DOCIDS, c->result.pData, c->result.nData);
      }
      break;
    }
  }

  rc = fulltextNext(pCursor);

out:
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
  if( rc!=SQLITE_OK ) return rc;

  pCursor->pTokenizer = pTokenizer;
  while( SQLITE_OK==pTokenizer->pModule->xNext(pCursor,
                                               &pToken, &nTokenBytes,
                                               &iStartOffset, &iEndOffset,
                                               &iPosition) ){
    DocList *p;

    /* Positions can't be negative; we use -1 as a terminator internally. */
    if( iPosition<0 ){
      pTokenizer->pModule->xClose(pCursor);
      return SQLITE_ERROR;
    }

    p = fts2HashFind(terms, pToken, nTokenBytes);
    if( p==NULL ){
      p = docListNew(DL_DEFAULT);
      docListAddDocid(p, iDocid);
      fts2HashInsert(terms, pToken, nTokenBytes, p);
    }
    if( iColumn>=0 ){
      docListAddPosOffset(p, iColumn, iPosition, iStartOffset, iEndOffset);
    }
  }

  /* TODO(shess) Check return?  Should this be able to cause errors at
  ** this point?  Actually, same question about sqlite3_finalize(),
  ** though one could argue that failure there means that the data is
  ** not durable.  *ponder*







|









|
<



|







3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493

3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
  if( rc!=SQLITE_OK ) return rc;

  pCursor->pTokenizer = pTokenizer;
  while( SQLITE_OK==pTokenizer->pModule->xNext(pCursor,
                                               &pToken, &nTokenBytes,
                                               &iStartOffset, &iEndOffset,
                                               &iPosition) ){
    PLWriter *p;

    /* Positions can't be negative; we use -1 as a terminator internally. */
    if( iPosition<0 ){
      pTokenizer->pModule->xClose(pCursor);
      return SQLITE_ERROR;
    }

    p = fts2HashFind(terms, pToken, nTokenBytes);
    if( p==NULL ){
      p = plwNew(iDocid, DL_DEFAULT);

      fts2HashInsert(terms, pToken, nTokenBytes, p);
    }
    if( iColumn>=0 ){
      plwAdd(p, iColumn, iPosition, iStartOffset, iEndOffset);
    }
  }

  /* TODO(shess) Check return?  Should this be able to cause errors at
  ** this point?  Actually, same question about sqlite3_finalize(),
  ** though one could argue that failure there means that the data is
  ** not durable.  *ponder*
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436

3437
3438
3439
3440

3441
3442
3443
3444
3445
3446
3447
3448
# error ROOT_MAX must have enough space for a header.
#endif

/* InteriorBlock stores a linked-list of interior blocks while a lower
** layer is being constructed.
*/
typedef struct InteriorBlock {
  char *pTerm;               /* Leftmost term in block's subtree. */
  int nTerm;

  char *pData;
  int nData;

  struct InteriorBlock *next;
} InteriorBlock;

static InteriorBlock *interiorBlockNew(int iHeight, sqlite_int64 iChildBlock,
                                       const char *pTerm, int nTerm){
  InteriorBlock *block = calloc(1, sizeof(InteriorBlock));
  char c[VARINT_MAX+VARINT_MAX];
  int n;


  data_dup(&block->pTerm, &block->nTerm, pTerm, nTerm);

  n = putVarint(c, iHeight);
  n += putVarint(c+n, iChildBlock);

  data_dup(&block->pData, &block->nData, c, n);

  return block;
}

typedef struct InteriorWriter {
  int iHeight;                   /* from 0 at leaves. */
  InteriorBlock *first, *last;







|
<
|
<
<
<









>
|



>
|







3599
3600
3601
3602
3603
3604
3605
3606

3607



3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
# error ROOT_MAX must have enough space for a header.
#endif

/* InteriorBlock stores a linked-list of interior blocks while a lower
** layer is being constructed.
*/
typedef struct InteriorBlock {
  DataBuffer term;           /* Leftmost term in block's subtree. */

  DataBuffer data;           /* Accumulated data for the block. */



  struct InteriorBlock *next;
} InteriorBlock;

static InteriorBlock *interiorBlockNew(int iHeight, sqlite_int64 iChildBlock,
                                       const char *pTerm, int nTerm){
  InteriorBlock *block = calloc(1, sizeof(InteriorBlock));
  char c[VARINT_MAX+VARINT_MAX];
  int n;

  dataBufferInit(&block->term, 0);
  dataBufferReplace(&block->term, pTerm, nTerm);

  n = putVarint(c, iHeight);
  n += putVarint(c+n, iChildBlock);
  dataBufferInit(&block->data, INTERIOR_MAX);
  dataBufferReplace(&block->data, c, n);

  return block;
}

typedef struct InteriorWriter {
  int iHeight;                   /* from 0 at leaves. */
  InteriorBlock *first, *last;
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
  int n = putVarint(c, nTerm);

#ifndef NDEBUG
  pWriter->iLastChildBlock++;
#endif
  assert( pWriter->iLastChildBlock==iChildBlock );

  if( pWriter->last->nData+n+nTerm>INTERIOR_MAX ){
    /* Overflow to a new block. */
    pWriter->last->next = interiorBlockNew(pWriter->iHeight, iChildBlock,
                                           pTerm, nTerm);
    pWriter->last = pWriter->last->next;
  }else{
    InteriorBlock *last = pWriter->last;
    data_append2(&last->pData, &last->nData, c, n, pTerm, nTerm);
  }
}

/* Free the space used by pWriter, including the linked-list of
** InteriorBlocks.
*/
static int interiorWriterDestroy(InteriorWriter *pWriter){
  InteriorBlock *block = pWriter->first;

  while( block!=NULL ){
    InteriorBlock *b = block;
    block = block->next;
    free(b->pData);
    free(b->pTerm);
    free(b);
  }
#ifndef NDEBUG
  memset(pWriter, 0x55, sizeof(pWriter));
#endif
  return SQLITE_OK;
}







|





<
|












|
|







3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676

3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
  int n = putVarint(c, nTerm);

#ifndef NDEBUG
  pWriter->iLastChildBlock++;
#endif
  assert( pWriter->iLastChildBlock==iChildBlock );

  if( pWriter->last->data.nData+n+nTerm>INTERIOR_MAX ){
    /* Overflow to a new block. */
    pWriter->last->next = interiorBlockNew(pWriter->iHeight, iChildBlock,
                                           pTerm, nTerm);
    pWriter->last = pWriter->last->next;
  }else{

    dataBufferAppend2(&pWriter->last->data, c, n, pTerm, nTerm);
  }
}

/* Free the space used by pWriter, including the linked-list of
** InteriorBlocks.
*/
static int interiorWriterDestroy(InteriorWriter *pWriter){
  InteriorBlock *block = pWriter->first;

  while( block!=NULL ){
    InteriorBlock *b = block;
    block = block->next;
    dataBufferDestroy(&b->term);
    dataBufferDestroy(&b->data);
    free(b);
  }
#ifndef NDEBUG
  memset(pWriter, 0x55, sizeof(pWriter));
#endif
  return SQLITE_OK;
}
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
                                  char **ppRootInfo, int *pnRootInfo,
                                  sqlite_int64 *piEndBlockid){
  InteriorBlock *block = pWriter->first;
  sqlite_int64 iBlockid = 0;
  int rc;

  /* If we can fit the segment inline */
  if( block==pWriter->last && block->nData<ROOT_MAX ){
    *ppRootInfo = block->pData;
    *pnRootInfo = block->nData;
    return SQLITE_OK;
  }

  /* Flush the first block to %_segments, and create a new level of
  ** interior node.
  */
  rc = block_insert(v, block->pData, block->nData, &iBlockid);
  if( rc!=SQLITE_OK ) return rc;
  *piEndBlockid = iBlockid;

  pWriter->parentWriter = malloc(sizeof(*pWriter->parentWriter));
  interiorWriterInit(pWriter->iHeight+1,
                     block->pTerm, block->nTerm,
                     iBlockid, pWriter->parentWriter);

  /* Flush additional blocks and append to the higher interior
  ** node.
  */
  for(block=block->next; block!=NULL; block=block->next){
    rc = block_insert(v, block->pData, block->nData, &iBlockid);
    if( rc!=SQLITE_OK ) return rc;
    *piEndBlockid = iBlockid;

    interiorWriterAppend(pWriter->parentWriter,
                         block->pTerm, block->nTerm, iBlockid);
  }

  /* Parent node gets the chance to be the root. */
  return interiorWriterRootInfo(v, pWriter->parentWriter,
                                ppRootInfo, pnRootInfo, piEndBlockid);
}








|
|
|






|





|






|




|







3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
                                  char **ppRootInfo, int *pnRootInfo,
                                  sqlite_int64 *piEndBlockid){
  InteriorBlock *block = pWriter->first;
  sqlite_int64 iBlockid = 0;
  int rc;

  /* If we can fit the segment inline */
  if( block==pWriter->last && block->data.nData<ROOT_MAX ){
    *ppRootInfo = block->data.pData;
    *pnRootInfo = block->data.nData;
    return SQLITE_OK;
  }

  /* Flush the first block to %_segments, and create a new level of
  ** interior node.
  */
  rc = block_insert(v, block->data.pData, block->data.nData, &iBlockid);
  if( rc!=SQLITE_OK ) return rc;
  *piEndBlockid = iBlockid;

  pWriter->parentWriter = malloc(sizeof(*pWriter->parentWriter));
  interiorWriterInit(pWriter->iHeight+1,
                     block->term.pData, block->term.nData,
                     iBlockid, pWriter->parentWriter);

  /* Flush additional blocks and append to the higher interior
  ** node.
  */
  for(block=block->next; block!=NULL; block=block->next){
    rc = block_insert(v, block->data.pData, block->data.nData, &iBlockid);
    if( rc!=SQLITE_OK ) return rc;
    *piEndBlockid = iBlockid;

    interiorWriterAppend(pWriter->parentWriter,
                         block->term.pData, block->term.nData, iBlockid);
  }

  /* Parent node gets the chance to be the root. */
  return interiorWriterRootInfo(v, pWriter->parentWriter,
                                ppRootInfo, pnRootInfo, piEndBlockid);
}

3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681



3682
3683
3684
3685


3686




3687




3688


3689





































3690
3691
3692
3693
3694
3695

3696
3697
3698
3699
3700


3701



3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730






3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765

typedef struct LeafWriter {
  int iLevel;
  int idx;
  sqlite_int64 iStartBlockid;     /* needed to create the root info */
  sqlite_int64 iEndBlockid;       /* when we're done writing. */

  char *pTerm;                    /* previous encoded term */
  int nTerm;

  char *pData;                    /* encoding buffer */
  int nData;

  InteriorWriter parentWriter;    /* if we overflow */
  int has_parent;
} LeafWriter;

static void leafWriterInit(int iLevel, int idx, LeafWriter *pWriter){



  memset(pWriter, 0, sizeof(*pWriter));
  pWriter->iLevel = iLevel;
  pWriter->idx = idx;



  /* Start out with a reasonably sized block, though it can grow. */




  pWriter->pData = malloc(LEAF_MAX);




  pWriter->nData = putVarint(pWriter->pData, 0);


}






































/* Flush the current leaf node to %_segments, and adding the resulting
** blockid and the starting term to the interior node which will
** contain it.
*/
static int leafWriterInternalFlush(fulltext_vtab *v, LeafWriter *pWriter){

  sqlite_int64 iBlockid = 0;
  const char *pStartingTerm;
  int nStartingTerm, rc, n;

  /* Must have the leading varint(0) flag, plus at least some data. */


  assert( pWriter->nData>2 );




  rc = block_insert(v, pWriter->pData, pWriter->nData, &iBlockid);
  if( rc!=SQLITE_OK ) return rc;
  assert( iBlockid!=0 );

  /* Reconstruct the first term in the leaf for purposes of building
  ** the interior node.
  */
  n = getVarint32(pWriter->pData+1, &nStartingTerm);
  pStartingTerm = pWriter->pData+1+n;
  assert( pWriter->nData>1+n+nStartingTerm );

  if( pWriter->has_parent ){
    interiorWriterAppend(&pWriter->parentWriter,
                         pStartingTerm, nStartingTerm, iBlockid);
  }else{
    interiorWriterInit(1, pStartingTerm, nStartingTerm, iBlockid,
                       &pWriter->parentWriter);
    pWriter->has_parent = 1;
  }

  /* Track the span of this segment's leaf nodes. */
  if( pWriter->iEndBlockid==0 ){
    pWriter->iEndBlockid = pWriter->iStartBlockid = iBlockid;
  }else{
    pWriter->iEndBlockid++;
    assert( iBlockid==pWriter->iEndBlockid );
  }







  /* Re-initialize the output buffer. */
  pWriter->nData = putVarint(pWriter->pData, 0);
  pWriter->nTerm = 0;

  return SQLITE_OK;
}

/* Fetch the root info for the segment.  If the entire leaf fits
** within ROOT_MAX, then it will be returned directly, otherwise it
** will be flushed and the root info will be returned from the
** interior node.  *piEndBlockid is set to the blockid of the last
** interior or leaf node written to disk (0 if none are written at
** all).
*/
static int leafWriterRootInfo(fulltext_vtab *v, LeafWriter *pWriter,
                              char **ppRootInfo, int *pnRootInfo,
                              sqlite_int64 *piEndBlockid){
  /* we can fit the segment entirely inline */
  if( !pWriter->has_parent && pWriter->nData<ROOT_MAX ){
    *ppRootInfo = pWriter->pData;
    *pnRootInfo = pWriter->nData;
    *piEndBlockid = 0;
    return SQLITE_OK;
  }

  /* Flush remaining leaf data. */
  if( pWriter->nData>1 ){
    int rc = leafWriterInternalFlush(v, pWriter);
    if( rc!=SQLITE_OK ) return rc;
  }

  /* We must have flushed a leaf at some point. */
  assert( pWriter->has_parent );

  /* Tenatively set the end leaf blockid as the end blockid.  If the







|
<
<
|
<






>
>
>




>
>

>
>
>
>
|
>
>
>
>
|
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>





|
>




|
>
>
|
>
>
>

|






|
|
|


















>
>
>
>
>
>

|
|















|
|
|





|
|







3845
3846
3847
3848
3849
3850
3851
3852


3853

3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007

typedef struct LeafWriter {
  int iLevel;
  int idx;
  sqlite_int64 iStartBlockid;     /* needed to create the root info */
  sqlite_int64 iEndBlockid;       /* when we're done writing. */

  DataBuffer term;                /* previous encoded term */


  DataBuffer data;                /* encoding buffer */


  InteriorWriter parentWriter;    /* if we overflow */
  int has_parent;
} LeafWriter;

static void leafWriterInit(int iLevel, int idx, LeafWriter *pWriter){
  char c[VARINT_MAX];
  int n;

  memset(pWriter, 0, sizeof(*pWriter));
  pWriter->iLevel = iLevel;
  pWriter->idx = idx;

  dataBufferInit(&pWriter->term, 32);

  /* Start out with a reasonably sized block, though it can grow. */
  dataBufferInit(&pWriter->data, LEAF_MAX);
  n = putVarint(c, 0);
  dataBufferReplace(&pWriter->data, c, n);
}

#ifndef NDEBUG
/* Verify that the data is readable as a leaf node. */
static int leafNodeValidate(const char *pData, int nData){
  int n, iDummy;

  assert( pData!=0 );
  assert( nData!=0 );

  /* Must lead with a varint(0) */
  n = getVarint32(pData, &iDummy);
  assert( iDummy==0 );
  if( nData==n ) return 1;
  pData += n;
  nData -= n;

  /* Leading term length and data must fit in buffer. */
  n = getVarint32(pData, &iDummy);
  assert( n+iDummy<nData );
  pData += n+iDummy;
  nData -= n+iDummy;

  /* Leading term's doclist length and data must fit. */
  n = getVarint32(pData, &iDummy);
  assert( n+iDummy<=nData );
  assert( docListValidate(DL_DEFAULT, pData+n, iDummy, NULL) );
  pData += n+iDummy;
  nData -= n+iDummy;

  /* Verify that trailing terms and doclists also are readable. */
  while( nData!=0 ){
    n = getVarint32(pData, &iDummy);
    n += getVarint32(pData+n, &iDummy);
    assert( n+iDummy<nData );
    pData += n+iDummy;
    nData -= n+iDummy;

    n = getVarint32(pData, &iDummy);
    assert( n+iDummy<=nData );
    assert( docListValidate(DL_DEFAULT, pData+n, iDummy, NULL) );
    pData += n+iDummy;
    nData -= n+iDummy;
  }
  return 1;
}
#endif

/* Flush the current leaf node to %_segments, and adding the resulting
** blockid and the starting term to the interior node which will
** contain it.
*/
static int leafWriterInternalFlush(fulltext_vtab *v, LeafWriter *pWriter,
                                   int iData, int nData){
  sqlite_int64 iBlockid = 0;
  const char *pStartingTerm;
  int nStartingTerm, rc, n;

  /* Must have the leading varint(0) flag, plus at least some
  ** valid-looking data.
  */
  assert( nData>2 );
  assert( iData>=0 );
  assert( iData+nData<=pWriter->data.nData );
  assert( leafNodeValidate(pWriter->data.pData+iData, nData) );

  rc = block_insert(v, pWriter->data.pData+iData, nData, &iBlockid);
  if( rc!=SQLITE_OK ) return rc;
  assert( iBlockid!=0 );

  /* Reconstruct the first term in the leaf for purposes of building
  ** the interior node.
  */
  n = getVarint32(pWriter->data.pData+iData+1, &nStartingTerm);
  pStartingTerm = pWriter->data.pData+iData+1+n;
  assert( pWriter->data.nData>iData+1+n+nStartingTerm );

  if( pWriter->has_parent ){
    interiorWriterAppend(&pWriter->parentWriter,
                         pStartingTerm, nStartingTerm, iBlockid);
  }else{
    interiorWriterInit(1, pStartingTerm, nStartingTerm, iBlockid,
                       &pWriter->parentWriter);
    pWriter->has_parent = 1;
  }

  /* Track the span of this segment's leaf nodes. */
  if( pWriter->iEndBlockid==0 ){
    pWriter->iEndBlockid = pWriter->iStartBlockid = iBlockid;
  }else{
    pWriter->iEndBlockid++;
    assert( iBlockid==pWriter->iEndBlockid );
  }

  return SQLITE_OK;
}
static int leafWriterFlush(fulltext_vtab *v, LeafWriter *pWriter){
  int rc = leafWriterInternalFlush(v, pWriter, 0, pWriter->data.nData);
  if( rc!=SQLITE_OK ) return rc;

  /* Re-initialize the output buffer. */
  pWriter->data.nData = putVarint(pWriter->data.pData, 0);
  dataBufferReset(&pWriter->term);

  return SQLITE_OK;
}

/* Fetch the root info for the segment.  If the entire leaf fits
** within ROOT_MAX, then it will be returned directly, otherwise it
** will be flushed and the root info will be returned from the
** interior node.  *piEndBlockid is set to the blockid of the last
** interior or leaf node written to disk (0 if none are written at
** all).
*/
static int leafWriterRootInfo(fulltext_vtab *v, LeafWriter *pWriter,
                              char **ppRootInfo, int *pnRootInfo,
                              sqlite_int64 *piEndBlockid){
  /* we can fit the segment entirely inline */
  if( !pWriter->has_parent && pWriter->data.nData<ROOT_MAX ){
    *ppRootInfo = pWriter->data.pData;
    *pnRootInfo = pWriter->data.nData;
    *piEndBlockid = 0;
    return SQLITE_OK;
  }

  /* Flush remaining leaf data. */
  if( pWriter->data.nData>1 ){
    int rc = leafWriterFlush(v, pWriter);
    if( rc!=SQLITE_OK ) return rc;
  }

  /* We must have flushed a leaf at some point. */
  assert( pWriter->has_parent );

  /* Tenatively set the end leaf blockid as the end blockid.  If the
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832

3833
3834
3835
3836
3837
3838
3839
3840
3841

3842


3843











3844






3845
3846

3847
3848
3849
3850
3851


























































3852
3853
3854




3855









3856









3857


3858
3859



























































3860

3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920

3921
3922
3923
3924
3925
3926
3927
3928
                                ppRootInfo, pnRootInfo, piEndBlockid);
}

/* Collect the rootInfo data and store it into the segment directory.
** This has the effect of flushing the segment's leaf data to
** %_segments, and also flushing any interior nodes to %_segments.
*/
static int leafWriterFlush(fulltext_vtab *v, LeafWriter *pWriter){
  sqlite_int64 iEndBlockid;
  char *pRootInfo;
  int rc, nRootInfo;

  rc = leafWriterRootInfo(v, pWriter, &pRootInfo, &nRootInfo, &iEndBlockid);
  if( rc!=SQLITE_OK ) return rc;

  /* Don't bother storing an entirely empty segment. */
  if( iEndBlockid==0 && nRootInfo==1 ) return SQLITE_OK;

  return segdir_set(v, pWriter->iLevel, pWriter->idx,
                    pWriter->iStartBlockid, pWriter->iEndBlockid,
                    iEndBlockid, pRootInfo, nRootInfo);
}

static void leafWriterDestroy(LeafWriter *pWriter){
  if( pWriter->has_parent ) interiorWriterDestroy(&pWriter->parentWriter);
  free(pWriter->pTerm);
  free(pWriter->pData);
}

/* Push pTerm[nTerm] along with the doclist data to the leaf layer of
** %_segments.
*/
static int leafWriterStep(fulltext_vtab *v, LeafWriter *pWriter,
                          const char *pTerm, int nTerm, DocList *doclist){
  char c[VARINT_MAX+VARINT_MAX];
  int rc, n;

  /* Flush existing data if this item won't fit well. */
  if( pWriter->nData>1 &&
      (doclist->nData+nTerm>STANDALONE_MIN ||
       pWriter->nData+doclist->nData+nTerm>LEAF_MAX) ){
    rc = leafWriterInternalFlush(v, pWriter);
    if( rc!=SQLITE_OK ) return rc;
  }

  if( pWriter->nTerm==0 ){
    /* Encode the entire leading term as:
    **  varint(nTerm)
    **  char pTerm[nTerm]
    */
    n = putVarint(c, nTerm);
    assert( pWriter->nData==1 );
    data_append2(&pWriter->pData, &pWriter->nData,
                 c, n, pTerm, nTerm);
  }else{
    /* Delta-encode the term as:
    **  varint(nPrefix)
    **  varint(nSuffix)
    **  char pTermSuffix[nSuffix]
    */

    int nPrefix = 0;

    while( nPrefix<nTerm && nPrefix<pWriter->nTerm &&
           pTerm[nPrefix]==pWriter->pTerm[nPrefix] ){
      nPrefix++;
    }

    n = putVarint(c, nPrefix);
    n += putVarint(c+n, nTerm-nPrefix);




    data_append2(&pWriter->pData, &pWriter->nData,











                 c, n, pTerm+nPrefix, nTerm-nPrefix);






  }
  data_replace(&pWriter->pTerm, &pWriter->nTerm, pTerm, nTerm);


  /* Encode the doclist as:
  **  varint(nDoclist)
  **  char pDoclist[nDoclist]
  */


























































  n = putVarint(c, doclist->nData);
  data_append2(&pWriter->pData, &pWriter->nData,
               c, n, doclist->pData, doclist->nData);














  /* Flush standalone blocks right out */









  if( doclist->nData+nTerm>STANDALONE_MIN ){


    rc = leafWriterInternalFlush(v, pWriter);
    if( rc!=SQLITE_OK ) return rc;



























































  }


  return SQLITE_OK;
}


/****************************************************************/
/* LeafReader is used to iterate over an individual leaf node. */
typedef struct LeafReader {
  char *pTerm;              /* copy of current term. */
  int nTerm;

  const char *pData;        /* data for current term. */
  int nData;
} LeafReader;

static void leafReaderDestroy(LeafReader *pReader){
  free(pReader->pTerm);
#ifndef NDEBUG
  memset(pReader, 0x55, sizeof(pReader));
#endif
}

static int leafReaderAtEnd(LeafReader *pReader){
  return pReader->nData<=0;
}

/* Access the current term. */
static int leafReaderTermBytes(LeafReader *pReader){
  return pReader->nTerm;
}
static const char *leafReaderTerm(LeafReader *pReader){
  assert( pReader->nTerm>0 );
  return pReader->pTerm;
}

/* Access the doclist data for the current term. */
static int leafReaderDataBytes(LeafReader *pReader){
  int nData;
  assert( pReader->nTerm>0 );
  getVarint32(pReader->pData, &nData);
  return nData;
}
static const char *leafReaderData(LeafReader *pReader){
  int n, nData;
  assert( pReader->nTerm>0 );
  n = getVarint32(pReader->pData, &nData);
  return pReader->pData+n;
}

static void leafReaderInit(const char *pData, int nData,
                           LeafReader *pReader){
  int nTerm, n;

  assert( nData>0 );
  assert( pData[0]=='\0' );

  memset(pReader, '\0', sizeof(pReader));

  /* Read the first term, skipping the header byte. */
  n = getVarint32(pData+1, &nTerm);

  data_dup(&pReader->pTerm, &pReader->nTerm, pData+1+n, nTerm);

  /* Position after the first term. */
  assert( 1+n+nTerm<nData );
  pReader->pData = pData+1+n+nTerm;
  pReader->nData = nData-1-n-nTerm;
}








|

















|
|


|
<
<
|
|
<
<
<
<
|
<
<
<
<
<
<
<




<
|
<
|






>
|

|
|





>
|
>
>
|
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>

|
>





>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
|
>
>
>
>

>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
|
>
>
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

>








|
<






|











|


|
|





|





|















>
|







4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044


4045
4046




4047







4048
4049
4050
4051

4052

4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260

4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
                                ppRootInfo, pnRootInfo, piEndBlockid);
}

/* Collect the rootInfo data and store it into the segment directory.
** This has the effect of flushing the segment's leaf data to
** %_segments, and also flushing any interior nodes to %_segments.
*/
static int leafWriterFinalize(fulltext_vtab *v, LeafWriter *pWriter){
  sqlite_int64 iEndBlockid;
  char *pRootInfo;
  int rc, nRootInfo;

  rc = leafWriterRootInfo(v, pWriter, &pRootInfo, &nRootInfo, &iEndBlockid);
  if( rc!=SQLITE_OK ) return rc;

  /* Don't bother storing an entirely empty segment. */
  if( iEndBlockid==0 && nRootInfo==1 ) return SQLITE_OK;

  return segdir_set(v, pWriter->iLevel, pWriter->idx,
                    pWriter->iStartBlockid, pWriter->iEndBlockid,
                    iEndBlockid, pRootInfo, nRootInfo);
}

static void leafWriterDestroy(LeafWriter *pWriter){
  if( pWriter->has_parent ) interiorWriterDestroy(&pWriter->parentWriter);
  dataBufferDestroy(&pWriter->term);
  dataBufferDestroy(&pWriter->data);
}

/* Encode a term into the leafWriter, delta-encoding as appropriate. */


static void leafWriterEncodeTerm(LeafWriter *pWriter,
                                 const char *pTerm, int nTerm){




  if( pWriter->term.nData==0 ){







    /* Encode the entire leading term as:
    **  varint(nTerm)
    **  char pTerm[nTerm]
    */

    assert( pWriter->data.nData==1 );

    dataBufferAppendLenData(&pWriter->data, pTerm, nTerm);
  }else{
    /* Delta-encode the term as:
    **  varint(nPrefix)
    **  varint(nSuffix)
    **  char pTermSuffix[nSuffix]
    */
    char c[VARINT_MAX+VARINT_MAX];
    int n, nPrefix = 0;

    while( nPrefix<nTerm && nPrefix<pWriter->term.nData &&
           pTerm[nPrefix]==pWriter->term.pData[nPrefix] ){
      nPrefix++;
    }

    n = putVarint(c, nPrefix);
    n += putVarint(c+n, nTerm-nPrefix);
    dataBufferAppend2(&pWriter->data, c, n, pTerm+nPrefix, nTerm-nPrefix);
  }
  dataBufferReplace(&pWriter->term, pTerm, nTerm);
}

/* Push pTerm[nTerm] along with the doclist data to the leaf layer of
** %_segments.
*/
/* TODO(shess) Revise writeZeroSegment() so that doclists are
** constructed directly in pWriter->data.  That implies refactoring
** leafWriterStep() and leafWriterStepMerge() to share more code.
*/
static int leafWriterStep(fulltext_vtab *v, LeafWriter *pWriter,
                          const char *pTerm, int nTerm,
                          const char *pData, int nData){
  int rc;

  /* Flush existing data if this item won't fit well. */
  if( pWriter->data.nData>1 &&
      (nData+nTerm>STANDALONE_MIN ||
       pWriter->data.nData+nData+nTerm>LEAF_MAX) ){
    rc = leafWriterFlush(v, pWriter);
    if( rc!=SQLITE_OK ) return rc;
  }

  leafWriterEncodeTerm(pWriter, pTerm, nTerm);

  /* Encode the doclist as:
  **  varint(nDoclist)
  **  char pDoclist[nDoclist]
  */
  dataBufferAppendLenData(&pWriter->data, pData, nData);

  /* Flush standalone blocks right out */
  if( nData+nTerm>STANDALONE_MIN ){
    rc = leafWriterFlush(v, pWriter);
    if( rc!=SQLITE_OK ) return rc;
  }

  return SQLITE_OK;
}

/* Used to avoid a memmove when a large amount of doclist data is in
** the buffer.  This constructs a node and term header before
** iDoclistData and flushes the resulting complete node using
** leafWriterInternalFlush().
*/
static int leafWriterInlineFlush(fulltext_vtab *v, LeafWriter *pWriter,
                                 const char *pTerm, int nTerm,
                                 int iDoclistData){
  char c[VARINT_MAX+VARINT_MAX];
  int iData, n = putVarint(c, 0);
  n += putVarint(c+n, nTerm);

  /* There should always be room for the header.  Even if pTerm shared
  ** a substantial prefix with the previous term, the entire prefix
  ** could be constructed from earlier data in the doclist, so there
  ** should be room.
  */
  assert( iDoclistData>=n+nTerm );

  iData = iDoclistData-(n+nTerm);
  memcpy(pWriter->data.pData+iData, c, n);
  memcpy(pWriter->data.pData+iData+n, pTerm, nTerm);

  return leafWriterInternalFlush(v, pWriter, iData, pWriter->data.nData-iData);
}

/* Push pTerm[nTerm] along with the doclist data to the leaf layer of
** %_segments.
*/
static int leafWriterStepMerge(fulltext_vtab *v, LeafWriter *pWriter,
                               const char *pTerm, int nTerm,
                               DLReader *pReaders, int nReaders){
  char c[VARINT_MAX+VARINT_MAX];
  int iTermData = pWriter->data.nData, iDoclistData;
  int i, nData, n, nActualData, nActual, rc;

  assert( leafNodeValidate(pWriter->data.pData, pWriter->data.nData) );
  leafWriterEncodeTerm(pWriter, pTerm, nTerm);

  iDoclistData = pWriter->data.nData;

  /* Estimate the length of the merged doclist so we can leave space
  ** to encode it.
  */
  for(i=0, nData=0; i<nReaders; i++){
    nData += dlrAllDataBytes(&pReaders[i]);
  }
  n = putVarint(c, nData);
  dataBufferAppend(&pWriter->data, c, n);

  docListMerge(&pWriter->data, pReaders, nReaders);
  assert( docListValidate(DL_DEFAULT,
                          pWriter->data.pData+iDoclistData+n,
                          pWriter->data.nData-iDoclistData-n, NULL) );

  /* The actual amount of doclist data at this point could be smaller
  ** than the length we encoded.  Additionally, the space required to
  ** encode this length could be smaller.  For small doclists, this is
  ** not a big deal, we can just use memmove() to adjust things.
  */
  nActualData = pWriter->data.nData-(iDoclistData+n);
  nActual = putVarint(c, nActualData);
  assert( nActualData<=nData );
  assert( nActual<=n );

  /* If the new doclist is big enough for force a standalone leaf
  ** node, we can immediately flush it inline without doing the
  ** memmove().
  */
  /* TODO(shess) This test matches leafWriterStep(), which does this
  ** test before it knows the cost to varint-encode the term and
  ** doclist lengths.  At some point, change to
  ** pWriter->data.nData-iTermData>STANDALONE_MIN.
  */
  if( nTerm+nActualData>STANDALONE_MIN ){
    /* Push leaf node from before this term. */
    if( iTermData>1 ){
      rc = leafWriterInternalFlush(v, pWriter, 0, iTermData);
      if( rc!=SQLITE_OK ) return rc;
    }

    /* Fix the encoded doclist length. */
    iDoclistData += n - nActual;
    memcpy(pWriter->data.pData+iDoclistData, c, nActual);

    /* Push the standalone leaf node. */
    rc = leafWriterInlineFlush(v, pWriter, pTerm, nTerm, iDoclistData);
    if( rc!=SQLITE_OK ) return rc;

    /* Leave the node empty. */
    pWriter->data.nData = putVarint(pWriter->data.pData, 0);
    dataBufferReset(&pWriter->term);
    return rc;
  }

  /* At this point, we know that the doclist was small, so do the
  ** memmove if indicated.
  */
  if( nActual<n ){
    memmove(pWriter->data.pData+iDoclistData+nActual,
            pWriter->data.pData+iDoclistData+n,
            pWriter->data.nData-(iDoclistData+n));
    pWriter->data.nData -= n-nActual;
  }

  /* Replace written length with actual length. */
  memcpy(pWriter->data.pData+iDoclistData, c, nActual);

  /* If the node is too large, break things up. */
  /* TODO(shess) This test matches leafWriterStep(), which does this
  ** test before it knows the cost to varint-encode the term and
  ** doclist lengths.  At some point, change to
  ** pWriter->data.nData>LEAF_MAX.
  */
  if( iTermData+nTerm+nActualData>LEAF_MAX ){
    /* Flush out the leading data as a node */
    rc = leafWriterInternalFlush(v, pWriter, 0, iTermData);
    if( rc!=SQLITE_OK ) return rc;

    /* Rebuild header using the current term */
    n = putVarint(pWriter->data.pData, 0);
    n += putVarint(pWriter->data.pData+n, nTerm);
    memcpy(pWriter->data.pData+n, pTerm, nTerm);
    n += nTerm;

    /* There should always be room, because the previous encoding
    ** included all data necessary to construct the term.
    */
    assert( n<iDoclistData );
    /* So long as STANDALONE_MIN is half or less of LEAF_MAX, the
    ** following memcpy() is safe (as opposed to needing a memmove).
    */
    assert( 2*STANDALONE_MIN<=LEAF_MAX );
    assert( n+pWriter->data.nData-iDoclistData<iDoclistData );
    memcpy(pWriter->data.pData+n,
           pWriter->data.pData+iDoclistData,
           pWriter->data.nData-iDoclistData);
    pWriter->data.nData -= iDoclistData-n;
  }
  assert( leafNodeValidate(pWriter->data.pData, pWriter->data.nData) );

  return SQLITE_OK;
}


/****************************************************************/
/* LeafReader is used to iterate over an individual leaf node. */
typedef struct LeafReader {
  DataBuffer term;          /* copy of current term. */


  const char *pData;        /* data for current term. */
  int nData;
} LeafReader;

static void leafReaderDestroy(LeafReader *pReader){
  dataBufferDestroy(&pReader->term);
#ifndef NDEBUG
  memset(pReader, 0x55, sizeof(pReader));
#endif
}

static int leafReaderAtEnd(LeafReader *pReader){
  return pReader->nData<=0;
}

/* Access the current term. */
static int leafReaderTermBytes(LeafReader *pReader){
  return pReader->term.nData;
}
static const char *leafReaderTerm(LeafReader *pReader){
  assert( pReader->term.nData>0 );
  return pReader->term.pData;
}

/* Access the doclist data for the current term. */
static int leafReaderDataBytes(LeafReader *pReader){
  int nData;
  assert( pReader->term.nData>0 );
  getVarint32(pReader->pData, &nData);
  return nData;
}
static const char *leafReaderData(LeafReader *pReader){
  int n, nData;
  assert( pReader->term.nData>0 );
  n = getVarint32(pReader->pData, &nData);
  return pReader->pData+n;
}

static void leafReaderInit(const char *pData, int nData,
                           LeafReader *pReader){
  int nTerm, n;

  assert( nData>0 );
  assert( pData[0]=='\0' );

  memset(pReader, '\0', sizeof(pReader));

  /* Read the first term, skipping the header byte. */
  n = getVarint32(pData+1, &nTerm);
  dataBufferInit(&pReader->term, nTerm);
  dataBufferReplace(&pReader->term, pData+1+n, nTerm);

  /* Position after the first term. */
  assert( 1+n+nTerm<nData );
  pReader->pData = pData+1+n+nTerm;
  pReader->nData = nData-1-n-nTerm;
}

3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
  if( !leafReaderAtEnd(pReader) ){
    /* Construct the new term using a prefix from the old term plus a
    ** suffix from the leaf data.
    */
    n = getVarint32(pReader->pData, &nPrefix);
    n += getVarint32(pReader->pData+n, &nSuffix);
    assert( n+nSuffix<pReader->nData );
    pReader->nTerm = nPrefix;
    data_append(&pReader->pTerm, &pReader->nTerm, pReader->pData+n, nSuffix);

    pReader->pData += n+nSuffix;
    pReader->nData -= n+nSuffix;
  }
}

/* strcmp-style comparison of pReader's current term against pTerm. */
static int leafReaderTermCmp(LeafReader *pReader,
                             const char *pTerm, int nTerm){
  int c, n = pReader->nTerm<nTerm ? pReader->nTerm : nTerm;
  if( n==0 ){
    if( pReader->nTerm>0 ) return -1;
    if(nTerm>0 ) return 1;
    return 0;
  }

  c = memcmp(pReader->pTerm, pTerm, n);
  if( c!=0 ) return c;
  return pReader->nTerm - nTerm;
}


/****************************************************************/
/* LeavesReader wraps LeafReader to allow iterating over the entire
** leaf layer of the tree.
*/
typedef struct LeavesReader {
  int idx;                  /* Index within the segment. */

  sqlite3_stmt *pStmt;      /* Statement we're streaming leaves from. */
  int eof;                  /* we've seen SQLITE_DONE from pStmt. */

  LeafReader leafReader;    /* reader for the current leaf. */
  char *pRootData;          /* root data for inline. */
} LeavesReader;

/* Access the current term. */
static int leavesReaderTermBytes(LeavesReader *pReader){
  assert( !pReader->eof );
  return leafReaderTermBytes(&pReader->leafReader);
}







|
|









|

|




|

|














|







4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
  if( !leafReaderAtEnd(pReader) ){
    /* Construct the new term using a prefix from the old term plus a
    ** suffix from the leaf data.
    */
    n = getVarint32(pReader->pData, &nPrefix);
    n += getVarint32(pReader->pData+n, &nSuffix);
    assert( n+nSuffix<pReader->nData );
    pReader->term.nData = nPrefix;
    dataBufferAppend(&pReader->term, pReader->pData+n, nSuffix);

    pReader->pData += n+nSuffix;
    pReader->nData -= n+nSuffix;
  }
}

/* strcmp-style comparison of pReader's current term against pTerm. */
static int leafReaderTermCmp(LeafReader *pReader,
                             const char *pTerm, int nTerm){
  int c, n = pReader->term.nData<nTerm ? pReader->term.nData : nTerm;
  if( n==0 ){
    if( pReader->term.nData>0 ) return -1;
    if(nTerm>0 ) return 1;
    return 0;
  }

  c = memcmp(pReader->term.pData, pTerm, n);
  if( c!=0 ) return c;
  return pReader->term.nData - nTerm;
}


/****************************************************************/
/* LeavesReader wraps LeafReader to allow iterating over the entire
** leaf layer of the tree.
*/
typedef struct LeavesReader {
  int idx;                  /* Index within the segment. */

  sqlite3_stmt *pStmt;      /* Statement we're streaming leaves from. */
  int eof;                  /* we've seen SQLITE_DONE from pStmt. */

  LeafReader leafReader;    /* reader for the current leaf. */
  DataBuffer rootData;      /* root data for inline. */
} LeavesReader;

/* Access the current term. */
static int leavesReaderTermBytes(LeavesReader *pReader){
  assert( !pReader->eof );
  return leafReaderTermBytes(&pReader->leafReader);
}
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029

4030
4031
4032
4033

4034
4035
4036
4037
4038
4039
4040
4041

static int leavesReaderAtEnd(LeavesReader *pReader){
  return pReader->eof;
}

static void leavesReaderDestroy(LeavesReader *pReader){
  leafReaderDestroy(&pReader->leafReader);
  if( pReader->pRootData!=0 ) free(pReader->pRootData);
#ifndef NDEBUG
  memset(pReader, 0x55, sizeof(pReader));
#endif
}

/* Initialize pReader with the given root data (if iStartBlockid==0
** the leaf data was entirely contained in the root), or from the
** stream of blocks between iStartBlockid and iEndBlockid, inclusive.
*/
static int leavesReaderInit(fulltext_vtab *v,
                            int idx,
                            sqlite_int64 iStartBlockid,
                            sqlite_int64 iEndBlockid,
                            const char *pRootData, int nRootData,
                            LeavesReader *pReader){
  memset(pReader, 0, sizeof(*pReader));
  pReader->idx = idx;


  if( iStartBlockid==0 ){
    /* Entire leaf level fit in root data. */
    int n;
    data_dup(&pReader->pRootData, &n, pRootData, nRootData);

    leafReaderInit(pReader->pRootData, nRootData, &pReader->leafReader);
  }else{
    sqlite3_stmt *s;
    int rc = sql_get_leaf_statement(v, idx, &s);
    if( rc!=SQLITE_OK ) return rc;

    rc = sqlite3_bind_int64(s, 1, iStartBlockid);
    if( rc!=SQLITE_OK ) return rc;







|


















>


<
|
>
|







4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423

4424
4425
4426
4427
4428
4429
4430
4431
4432
4433

static int leavesReaderAtEnd(LeavesReader *pReader){
  return pReader->eof;
}

static void leavesReaderDestroy(LeavesReader *pReader){
  leafReaderDestroy(&pReader->leafReader);
  dataBufferDestroy(&pReader->rootData);
#ifndef NDEBUG
  memset(pReader, 0x55, sizeof(pReader));
#endif
}

/* Initialize pReader with the given root data (if iStartBlockid==0
** the leaf data was entirely contained in the root), or from the
** stream of blocks between iStartBlockid and iEndBlockid, inclusive.
*/
static int leavesReaderInit(fulltext_vtab *v,
                            int idx,
                            sqlite_int64 iStartBlockid,
                            sqlite_int64 iEndBlockid,
                            const char *pRootData, int nRootData,
                            LeavesReader *pReader){
  memset(pReader, 0, sizeof(*pReader));
  pReader->idx = idx;

  dataBufferInit(&pReader->rootData, 0);
  if( iStartBlockid==0 ){
    /* Entire leaf level fit in root data. */

    dataBufferReplace(&pReader->rootData, pRootData, nRootData);
    leafReaderInit(pReader->rootData.pData, pReader->rootData.nData,
                   &pReader->leafReader);
  }else{
    sqlite3_stmt *s;
    int rc = sql_get_leaf_statement(v, idx, &s);
    if( rc!=SQLITE_OK ) return rc;

    rc = sqlite3_bind_int64(s, 1, iStartBlockid);
    if( rc!=SQLITE_OK ) return rc;
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
*/
static int leavesReaderStep(fulltext_vtab *v, LeavesReader *pReader){
  assert( !leavesReaderAtEnd(pReader) );
  leafReaderStep(&pReader->leafReader);

  if( leafReaderAtEnd(&pReader->leafReader) ){
    int rc;
    if( pReader->pRootData ){
      pReader->eof = 1;
      return SQLITE_OK;
    }
    rc = sql_step_leaf_statement(v, pReader->idx, &pReader->pStmt);
    if( rc!=SQLITE_ROW ){
      pReader->eof = 1;
      return rc==SQLITE_DONE ? SQLITE_OK : rc;







|







4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
*/
static int leavesReaderStep(fulltext_vtab *v, LeavesReader *pReader){
  assert( !leavesReaderAtEnd(pReader) );
  leafReaderStep(&pReader->leafReader);

  if( leafReaderAtEnd(&pReader->leafReader) ){
    int rc;
    if( pReader->rootData.pData ){
      pReader->eof = 1;
      return SQLITE_OK;
    }
    rc = sql_step_leaf_statement(v, pReader->idx, &pReader->pStmt);
    if( rc!=SQLITE_ROW ){
      pReader->eof = 1;
      return rc==SQLITE_DONE ? SQLITE_OK : rc;
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177

4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
  return SQLITE_OK;
}

/* Merge doclists from pReaders[nReaders] into a single doclist, which
** is written to pWriter.  Assumes pReaders is ordered oldest to
** newest.
*/
/* TODO(shess) I have a version of this that merges the doclists
** pairwise, and is thus much faster, but is also more intricate.  So
** I'll throw that in as a standalone change.  N-way merge would be
** even faster.
*/
static int leavesReadersMerge(fulltext_vtab *v,
                              LeavesReader *pReaders, int nReaders,
                              LeafWriter *pWriter){

  const char *pTerm = leavesReaderTerm(pReaders);
  int i, rc, nTerm = leavesReaderTermBytes(pReaders);
  DocList doclist;

  /* No need to merge, insert directly. */
  if( nReaders==1 ){
    docListStaticInit(&doclist, DL_DEFAULT,
                      leavesReaderData(pReaders),
                      leavesReaderDataBytes(pReaders));
  }else{
    docListInit(&doclist, DL_DEFAULT,
                leavesReaderData(pReaders),
                leavesReaderDataBytes(pReaders));

    for(i=1; i<nReaders; i++){
      DocList new, merged;
      docListStaticInit(&new, DL_DEFAULT,
                        leavesReaderData(pReaders+i),
                        leavesReaderDataBytes(pReaders+i));
      docListMerge(&merged, &doclist, &new);
      docListDestroy(&doclist);
      doclist = merged;
    }
  }

  /* Insert the new doclist */
  rc = leafWriterStep(v, pWriter, pTerm, nTerm, &doclist);
  if( nReaders>1 ) docListDestroy(&doclist);
  return rc;
}

/* Forward ref due to mutual recursion with segdirNextIndex(). */
static int segmentMerge(fulltext_vtab *v, int iLevel);

/* Put the next available index at iLevel into *pidx.  If iLevel
** already has MERGE_COUNT segments, they are merged to a higher







|
<
<
<
<



>

|
<

<
|
<
<
<
<
<
<
<

|
<
|
|
|
<
<
<
|
|
<
<
|
<
<







4555
4556
4557
4558
4559
4560
4561
4562




4563
4564
4565
4566
4567
4568

4569

4570







4571
4572

4573
4574
4575



4576
4577


4578


4579
4580
4581
4582
4583
4584
4585
  return SQLITE_OK;
}

/* Merge doclists from pReaders[nReaders] into a single doclist, which
** is written to pWriter.  Assumes pReaders is ordered oldest to
** newest.
*/
/* TODO(shess) Consider putting this inline in segmentMerge(). */




static int leavesReadersMerge(fulltext_vtab *v,
                              LeavesReader *pReaders, int nReaders,
                              LeafWriter *pWriter){
  DLReader dlReaders[MERGE_COUNT];
  const char *pTerm = leavesReaderTerm(pReaders);
  int i, nTerm = leavesReaderTermBytes(pReaders);



  assert( nReaders<=MERGE_COUNT );








  for(i=0; i<nReaders; i++){

    dlrInit(&dlReaders[i], DL_DEFAULT,
            leavesReaderData(pReaders+i),
            leavesReaderDataBytes(pReaders+i));



  }



  return leafWriterStepMerge(v, pWriter, pTerm, nTerm, dlReaders, nReaders);


}

/* Forward ref due to mutual recursion with segdirNextIndex(). */
static int segmentMerge(fulltext_vtab *v, int iLevel);

/* Put the next available index at iLevel into *pidx.  If iLevel
** already has MERGE_COUNT segments, they are merged to a higher
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316






4317
4318

4319
4320
4321

4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
    }
  }

  for(i=0; i<MERGE_COUNT; i++){
    leavesReaderDestroy(&lrs[i]);
  }

  rc = leafWriterFlush(v, &writer);
  leafWriterDestroy(&writer);
  if( rc!=SQLITE_OK ) return rc;

  /* Delete the merged segment data. */
  return segdir_delete(v, iLevel);

 err:
  for(i=0; i<MERGE_COUNT; i++){
    leavesReaderDestroy(&lrs[i]);
  }
  leafWriterDestroy(&writer);
  return rc;
}

/* Read pData[nData] as a leaf node, and if the doclist for
** pTerm[nTerm] is present, merge it over *out (any duplicate doclists
** read from pData will overwrite those in *out).
*/
static int loadSegmentLeaf(fulltext_vtab *v, const char *pData, int nData,
                           const char *pTerm, int nTerm, DocList *out){
  LeafReader reader;
  assert( nData>1 );
  assert( *pData=='\0' );

  leafReaderInit(pData, nData, &reader);
  while( !leafReaderAtEnd(&reader) ){
    int c = leafReaderTermCmp(&reader, pTerm, nTerm);
    if( c==0 ){
      DocList new, doclist;






      docListStaticInit(&new, DL_DEFAULT,
                        leafReaderData(&reader), leafReaderDataBytes(&reader));

      docListMerge(&doclist, out, &new);
      docListDestroy(out);
      *out = doclist;

    }
    if( c>=0 ) break;
    leafReaderStep(&reader);
  }
  leafReaderDestroy(&reader);
  return SQLITE_OK;
}

/* Traverse the tree represented by pData[nData] looking for
** pTerm[nTerm], merging its doclist over *out if found (any duplicate
** doclists read from the segment rooted at pData will overwrite those
** in *out).
*/
static int loadSegment(fulltext_vtab *v, const char *pData, int nData,
                       const char *pTerm, int nTerm, DocList *out){
  int rc;
  sqlite3_stmt *s = NULL;

  assert( nData>1 );

  /* Process data as an interior node until we reach a leaf. */
  while( *pData!='\0' ){







|



















|








|
>
>
>
>
>
>
|
|
>
|
|
|
>














|







4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
    }
  }

  for(i=0; i<MERGE_COUNT; i++){
    leavesReaderDestroy(&lrs[i]);
  }

  rc = leafWriterFinalize(v, &writer);
  leafWriterDestroy(&writer);
  if( rc!=SQLITE_OK ) return rc;

  /* Delete the merged segment data. */
  return segdir_delete(v, iLevel);

 err:
  for(i=0; i<MERGE_COUNT; i++){
    leavesReaderDestroy(&lrs[i]);
  }
  leafWriterDestroy(&writer);
  return rc;
}

/* Read pData[nData] as a leaf node, and if the doclist for
** pTerm[nTerm] is present, merge it over *out (any duplicate doclists
** read from pData will overwrite those in *out).
*/
static int loadSegmentLeaf(fulltext_vtab *v, const char *pData, int nData,
                           const char *pTerm, int nTerm, DataBuffer *out){
  LeafReader reader;
  assert( nData>1 );
  assert( *pData=='\0' );

  leafReaderInit(pData, nData, &reader);
  while( !leafReaderAtEnd(&reader) ){
    int c = leafReaderTermCmp(&reader, pTerm, nTerm);
    if( c==0 ){
      if( out->nData==0 ){
        dataBufferReplace(out,
                          leafReaderData(&reader), leafReaderDataBytes(&reader));
      }else{
        DLReader readers[2];
        DataBuffer result;
        dlrInit(&readers[0], DL_DEFAULT, out->pData, out->nData);
        dlrInit(&readers[1], DL_DEFAULT,
                leafReaderData(&reader), leafReaderDataBytes(&reader));
        dataBufferInit(&result, out->nData+leafReaderDataBytes(&reader));
        docListMerge(&result, readers, 2);
        dataBufferDestroy(out);
        *out = result;
      }
    }
    if( c>=0 ) break;
    leafReaderStep(&reader);
  }
  leafReaderDestroy(&reader);
  return SQLITE_OK;
}

/* Traverse the tree represented by pData[nData] looking for
** pTerm[nTerm], merging its doclist over *out if found (any duplicate
** doclists read from the segment rooted at pData will overwrite those
** in *out).
*/
static int loadSegment(fulltext_vtab *v, const char *pData, int nData,
                       const char *pTerm, int nTerm, DataBuffer *out){
  int rc;
  sqlite3_stmt *s = NULL;

  assert( nData>1 );

  /* Process data as an interior node until we reach a leaf. */
  while( *pData!='\0' ){
4390
4391
4392
4393
4394
4395
4396
4397

4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421

4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
  return SQLITE_OK;
}

/* Scan the database and merge together the posting lists for the term
** into *out.
*/
static int termSelect(fulltext_vtab *v, int iColumn,
                      const char *pTerm, int nTerm, DocList *out){

  DocList doclist;
  sqlite3_stmt *s;
  int rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s);
  if( rc!=SQLITE_OK ) return rc;

  docListInit(&doclist, DL_DEFAULT, 0, 0);

  /* Traverse the segments from oldest to newest so that newer doclist
  ** elements for given docids overwrite older elements.
  */
  while( (rc=sql_step_statement(v, SEGDIR_SELECT_ALL_STMT, &s))==SQLITE_ROW ){
    rc = loadSegment(v, sqlite3_column_blob(s, 0), sqlite3_column_bytes(s, 0),
                     pTerm, nTerm, &doclist);
    if( rc!=SQLITE_OK ) goto err;
  }
  if( rc==SQLITE_DONE ){
    *out = doclist;

    /* TODO(shess) The old term_select_all() code applied the column
    ** restrict as we merged segments, leading to smaller buffers.
    ** This is probably worthwhile to bring back, once the new storage
    ** system is checked in.
    */
    if( iColumn<v->nColumn ){   /* querying a single column */

      docListRestrictColumn(out, iColumn);
    }
    docListDiscardEmpty(out);
    return SQLITE_OK;
  }

 err:
  docListDestroy(&doclist);
  return rc;
}

/****************************************************************/
/* Used to hold hashtable data for sorting. */
typedef struct TermData {
  const char *pTerm;
  int nTerm;
  DocList *pDoclist;
} TermData;

/* Orders TermData elements in strcmp fashion ( <0 for less-than, 0
** for equal, >0 for greater-than).
*/
static int termDataCmp(const void *av, const void *bv){
  const TermData *a = (const TermData *)av;







|
>
|




|










|
<
|
|
|
|
|
|
>
|

<
|



|








|







4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795

4796
4797
4798
4799
4800
4801
4802
4803
4804

4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
  return SQLITE_OK;
}

/* Scan the database and merge together the posting lists for the term
** into *out.
*/
static int termSelect(fulltext_vtab *v, int iColumn,
                      const char *pTerm, int nTerm,
                      DocListType iType, DataBuffer *out){
  DataBuffer doclist;
  sqlite3_stmt *s;
  int rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s);
  if( rc!=SQLITE_OK ) return rc;

  dataBufferInit(&doclist, 0);

  /* Traverse the segments from oldest to newest so that newer doclist
  ** elements for given docids overwrite older elements.
  */
  while( (rc=sql_step_statement(v, SEGDIR_SELECT_ALL_STMT, &s))==SQLITE_ROW ){
    rc = loadSegment(v, sqlite3_column_blob(s, 0), sqlite3_column_bytes(s, 0),
                     pTerm, nTerm, &doclist);
    if( rc!=SQLITE_OK ) goto err;
  }
  if( rc==SQLITE_DONE ){
    if( doclist.nData!=0 ){

      /* TODO(shess) The old term_select_all() code applied the column
      ** restrict as we merged segments, leading to smaller buffers.
      ** This is probably worthwhile to bring back, once the new storage
      ** system is checked in.
      */
      if( iColumn==v->nColumn) iColumn = -1;
      docListTrim(DL_DEFAULT, doclist.pData, doclist.nData,
                  iColumn, iType, out);
    }

    rc = SQLITE_OK;
  }

 err:
  dataBufferDestroy(&doclist);
  return rc;
}

/****************************************************************/
/* Used to hold hashtable data for sorting. */
typedef struct TermData {
  const char *pTerm;
  int nTerm;
  PLWriter *pWriter;
} TermData;

/* Orders TermData elements in strcmp fashion ( <0 for less-than, 0
** for equal, >0 for greater-than).
*/
static int termDataCmp(const void *av, const void *bv){
  const TermData *a = (const TermData *)av;
4454
4455
4456
4457
4458
4459
4460

4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481



4482

4483




4484
4485

4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
** LeafWriter.
*/
static int writeZeroSegment(fulltext_vtab *v, fts2Hash *pTerms){
  fts2HashElem *e;
  int idx, rc, i, n;
  TermData *pData;
  LeafWriter writer;


  /* Determine the next index at level 0, merging as necessary. */
  rc = segdirNextIndex(v, 0, &idx);
  if( rc!=SQLITE_OK ) return rc;

  n = fts2HashCount(pTerms);
  pData = malloc(n*sizeof(TermData));

  for(i = 0, e = fts2HashFirst(pTerms); e; i++, e = fts2HashNext(e)){
    assert( i<n );
    pData[i].pTerm = fts2HashKey(e);
    pData[i].nTerm = fts2HashKeysize(e);
    pData[i].pDoclist = fts2HashData(e);
  }
  assert( i==n );

  /* TODO(shess) Should we allow user-defined collation sequences,
  ** here?  I think we only need that once we support prefix searches.
  */
  if( n>1 ) qsort(pData, n, sizeof(*pData), termDataCmp);




  leafWriterInit(0, idx, &writer);

  for(i=0; i<n; i++){




    rc = leafWriterStep(v, &writer,
                        pData[i].pTerm, pData[i].nTerm, pData[i].pDoclist);

    if( rc!=SQLITE_OK ) goto err;
  }
  rc = leafWriterFlush(v, &writer);

 err:
  free(pData);
  leafWriterDestroy(&writer);
  return rc;
}








>












|








>
>
>

>

>
>
>
>

|
>


|







4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
** LeafWriter.
*/
static int writeZeroSegment(fulltext_vtab *v, fts2Hash *pTerms){
  fts2HashElem *e;
  int idx, rc, i, n;
  TermData *pData;
  LeafWriter writer;
  DataBuffer dl;

  /* Determine the next index at level 0, merging as necessary. */
  rc = segdirNextIndex(v, 0, &idx);
  if( rc!=SQLITE_OK ) return rc;

  n = fts2HashCount(pTerms);
  pData = malloc(n*sizeof(TermData));

  for(i = 0, e = fts2HashFirst(pTerms); e; i++, e = fts2HashNext(e)){
    assert( i<n );
    pData[i].pTerm = fts2HashKey(e);
    pData[i].nTerm = fts2HashKeysize(e);
    pData[i].pWriter = fts2HashData(e);
  }
  assert( i==n );

  /* TODO(shess) Should we allow user-defined collation sequences,
  ** here?  I think we only need that once we support prefix searches.
  */
  if( n>1 ) qsort(pData, n, sizeof(*pData), termDataCmp);

  /* TODO(shess) Refactor so that we can write directly to the segment
  ** DataBuffer, as happens for segment merges.
  */
  leafWriterInit(0, idx, &writer);
  dataBufferInit(&dl, 0);
  for(i=0; i<n; i++){
    DLWriter dlw;
    dataBufferReset(&dl);
    dlwInit(&dlw, DL_DEFAULT, &dl);
    plwDlwAdd(pData[i].pWriter, &dlw);
    rc = leafWriterStep(v, &writer,
                        pData[i].pTerm, pData[i].nTerm, dl.pData, dl.nData);
    dlwDestroy(&dlw);
    if( rc!=SQLITE_OK ) goto err;
  }
  rc = leafWriterFinalize(v, &writer);

 err:
  free(pData);
  leafWriterDestroy(&writer);
  return rc;
}

4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
    rc = index_insert(v, ppArg[1], &ppArg[2], pRowid, &terms);
  }

  if( rc==SQLITE_OK ) rc = writeZeroSegment(v, &terms);

  /* clean up */
  for(e=fts2HashFirst(&terms); e; e=fts2HashNext(e)){
    DocList *p = fts2HashData(e);
    docListDelete(p);
  }
  fts2HashClear(&terms);

  return rc;
}

/*







|
<







4923
4924
4925
4926
4927
4928
4929
4930

4931
4932
4933
4934
4935
4936
4937
    rc = index_insert(v, ppArg[1], &ppArg[2], pRowid, &terms);
  }

  if( rc==SQLITE_OK ) rc = writeZeroSegment(v, &terms);

  /* clean up */
  for(e=fts2HashFirst(&terms); e; e=fts2HashNext(e)){
    plwDelete(fts2HashData(e));

  }
  fts2HashClear(&terms);

  return rc;
}

/*