/ Check-in [02069782]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Avoid redundant loads from the %_data table in the fts5 code.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts5
Files: files | file ages | folders
SHA1:02069782f8b7896a582582c79185b50418622736
User & Date: dan 2015-05-25 11:46:33
Context
2015-05-26
18:22
Simplifications and minor optimizations to fts5 prefix queries that cannot use a prefix index. check-in: aef89d9f user: dan tags: fts5
2015-05-25
11:46
Avoid redundant loads from the %_data table in the fts5 code. check-in: 02069782 user: dan tags: fts5
2015-05-23
15:43
Avoid making redundant copies of position-lists within the fts5 code. check-in: 5165de54 user: dan tags: fts5
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5Int.h.

280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
** The various operations on open token or token prefix iterators opened
** using sqlite3Fts5IndexQuery().
*/
int sqlite3Fts5IterEof(Fts5IndexIter*);
int sqlite3Fts5IterNext(Fts5IndexIter*);
int sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch);
i64 sqlite3Fts5IterRowid(Fts5IndexIter*);
int sqlite3Fts5IterPoslist(Fts5IndexIter*, const u8 **pp, int *pn);
int sqlite3Fts5IterPoslistBuffer(Fts5IndexIter *pIter, Fts5Buffer *pBuf);

/*
** Close an iterator opened by sqlite3Fts5IndexQuery().
*/
void sqlite3Fts5IterClose(Fts5IndexIter*);








|







280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
** The various operations on open token or token prefix iterators opened
** using sqlite3Fts5IndexQuery().
*/
int sqlite3Fts5IterEof(Fts5IndexIter*);
int sqlite3Fts5IterNext(Fts5IndexIter*);
int sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch);
i64 sqlite3Fts5IterRowid(Fts5IndexIter*);
int sqlite3Fts5IterPoslist(Fts5IndexIter*, const u8 **pp, int *pn, i64 *pi);
int sqlite3Fts5IterPoslistBuffer(Fts5IndexIter *pIter, Fts5Buffer *pBuf);

/*
** Close an iterator opened by sqlite3Fts5IndexQuery().
*/
void sqlite3Fts5IterClose(Fts5IndexIter*);

Changes to ext/fts5/fts5_expr.c.

353
354
355
356
357
358
359

360
361
362
363
364
365
366
367
368
369
...
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
    int nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm;
    aIter = (Fts5PoslistReader*)sqlite3_malloc(nByte);
    if( !aIter ) return SQLITE_NOMEM;
  }

  /* Initialize a term iterator for each term in the phrase */
  for(i=0; i<pPhrase->nTerm; i++){

    int n;
    const u8 *a;
    rc = sqlite3Fts5IterPoslist(pPhrase->aTerm[i].pIter, &a, &n);
    if( rc || sqlite3Fts5PoslistReaderInit(iCol, a, n, &aIter[i]) ){
      goto ismatch_out;
    }
  }

  while( 1 ){
    int bMatch;
................................................................................
    /* If this "NEAR" object is actually a single phrase that consists of
    ** a single term only, then the row that it currently points to must
    ** be a match. All that is required is to populate pPhrase->poslist
    ** with the position-list data for the only term.  */
    Fts5ExprPhrase *pPhrase = pNear->apPhrase[0];
    Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter;
    assert( pPhrase->poslist.nSpace==0 );
    pNode->iRowid = sqlite3Fts5IterRowid(pIter);
    return sqlite3Fts5IterPoslist(pIter, 
        (const u8**)&pPhrase->poslist.p, &pPhrase->poslist.n
    );
  }else{
    int rc = SQLITE_OK;

    while( 1 ){
      int i;








>


|







 







<

|







353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
...
682
683
684
685
686
687
688

689
690
691
692
693
694
695
696
697
    int nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm;
    aIter = (Fts5PoslistReader*)sqlite3_malloc(nByte);
    if( !aIter ) return SQLITE_NOMEM;
  }

  /* Initialize a term iterator for each term in the phrase */
  for(i=0; i<pPhrase->nTerm; i++){
    i64 dummy;
    int n;
    const u8 *a;
    rc = sqlite3Fts5IterPoslist(pPhrase->aTerm[i].pIter, &a, &n, &dummy);
    if( rc || sqlite3Fts5PoslistReaderInit(iCol, a, n, &aIter[i]) ){
      goto ismatch_out;
    }
  }

  while( 1 ){
    int bMatch;
................................................................................
    /* If this "NEAR" object is actually a single phrase that consists of
    ** a single term only, then the row that it currently points to must
    ** be a match. All that is required is to populate pPhrase->poslist
    ** with the position-list data for the only term.  */
    Fts5ExprPhrase *pPhrase = pNear->apPhrase[0];
    Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter;
    assert( pPhrase->poslist.nSpace==0 );

    return sqlite3Fts5IterPoslist(pIter, 
        (const u8**)&pPhrase->poslist.p, &pPhrase->poslist.n, &pNode->iRowid
    );
  }else{
    int rc = SQLITE_OK;

    while( 1 ){
      int i;

Changes to ext/fts5/fts5_index.c.

305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
...
512
513
514
515
516
517
518

519
520
521
522
523
524
525
...
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
....
1709
1710
1711
1712
1713
1714
1715




1716
1717
1718
1719
1720
1721
1722
1723
....
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
....
2333
2334
2335
2336
2337
2338
2339

2340
2341
2342
2343
2344
2345
2346
....
2479
2480
2481
2482
2483
2484
2485

2486
2487
2488


2489
2490
2491
2492
2493
2494
2495
....
2533
2534
2535
2536
2537
2538
2539

2540
2541
2542
2543
2544
2545
2546
....
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
....
3507
3508
3509
3510
3511
3512
3513









3514
3515
3516
3517
3518
3519
3520
....
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
....
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
....
4052
4053
4054
4055
4056
4057
4058








4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
....
4688
4689
4690
4691
4692
4693
4694
4695






4696
4697
4698
4699

4700
4701
4702

4703
4704
4705
4706
4707
4708
4709
....
4979
4980
4981
4982
4983
4984
4985

4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
** many zero bytes. This makes it easier to decode the various record formats
** without overreading if the records are corrupt.
*/
#define FTS5_DATA_ZERO_PADDING 8

typedef struct Fts5BtreeIter Fts5BtreeIter;
typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel;
typedef struct Fts5ChunkIter Fts5ChunkIter;
typedef struct Fts5Data Fts5Data;
typedef struct Fts5DlidxIter Fts5DlidxIter;
typedef struct Fts5DlidxLvl Fts5DlidxLvl;
typedef struct Fts5DlidxWriter Fts5DlidxWriter;
typedef struct Fts5MultiSegIter Fts5MultiSegIter;
typedef struct Fts5NodeIter Fts5NodeIter;
typedef struct Fts5PageWriter Fts5PageWriter;
typedef struct Fts5PosIter Fts5PosIter;
typedef struct Fts5SegIter Fts5SegIter;
typedef struct Fts5DoclistIter Fts5DoclistIter;
typedef struct Fts5SegWriter Fts5SegWriter;
typedef struct Fts5Structure Fts5Structure;
typedef struct Fts5StructureLevel Fts5StructureLevel;
typedef struct Fts5StructureSegment Fts5StructureSegment;

................................................................................
**     start of the "position-list-size" field within the page.
*/
struct Fts5SegIter {
  Fts5StructureSegment *pSeg;     /* Segment to iterate through */
  int flags;                      /* Mask of configuration flags */
  int iLeafPgno;                  /* Current leaf page number */
  Fts5Data *pLeaf;                /* Current leaf data */

  int iLeafOffset;                /* Byte offset within current leaf */

  /* The page and offset from which the current term was read. The offset 
  ** is the offset of the first rowid in the current doclist.  */
  int iTermLeafPgno;
  int iTermLeafOffset;

................................................................................
  int bDel;                       /* True if the delete flag is set */
};

#define FTS5_SEGITER_ONETERM 0x01
#define FTS5_SEGITER_REVERSE 0x02


/*
** Object for iterating through paginated data.
*/
struct Fts5ChunkIter {
  Fts5Data *pLeaf;                /* Current leaf data. NULL -> EOF. */
  i64 iLeafRowid;                 /* Absolute rowid of current leaf */
  int nRem;                       /* Remaining bytes of data to read */

  /* Output parameters */
  u8 *p;                          /* Pointer to chunk of data */
  int n;                          /* Size of buffer p in bytes */
};

/*
** Object for iterating through a single position list on disk.
*/
struct Fts5PosIter {
  Fts5ChunkIter chunk;            /* Current chunk of data */
  int iOff;                       /* Offset within chunk data */

  int iCol;
  int iPos;
};

/*
** Object for iterating through the conents of a single internal node in 
** memory.
*/
struct Fts5NodeIter {
  /* Internal. Set and managed by fts5NodeIterXXX() functions. Except, 
  ** the EOF test for the iterator is (Fts5NodeIter.aData==0).  */
................................................................................
static void fts5SegIterNextPage(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegIter *pIter              /* Iterator to advance to next page */
){
  Fts5StructureSegment *pSeg = pIter->pSeg;
  fts5DataRelease(pIter->pLeaf);
  pIter->iLeafPgno++;




  if( pIter->iLeafPgno<=pSeg->pgnoLast ){
    pIter->pLeaf = fts5DataRead(p, 
        FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, pIter->iLeafPgno)
    );
  }else{
    pIter->pLeaf = 0;
  }
}
................................................................................
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegIter *pIter,             /* Iterator to advance */
  int *pbNewTerm                  /* OUT: Set for new term */
){
  assert( pbNewTerm==0 || *pbNewTerm==0 );
  if( p->rc==SQLITE_OK ){
    if( pIter->flags & FTS5_SEGITER_REVERSE ){

      if( pIter->iRowidOffset>0 ){
        u8 *a = pIter->pLeaf->p;
        int iOff;
        int nPos;
        int bDummy;
        i64 iDelta;

................................................................................

/*
** Zero the iterator passed as the only argument.
*/
static void fts5SegIterClear(Fts5SegIter *pIter){
  fts5BufferFree(&pIter->term);
  fts5DataRelease(pIter->pLeaf);

  fts5DlidxIterFree(pIter->pDlidx);
  sqlite3_free(pIter->aRowidOffset);
  memset(pIter, 0, sizeof(Fts5SegIter));
}

#ifdef SQLITE_DEBUG

................................................................................
*/
static void fts5SegIterGotoPage(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegIter *pIter,             /* Iterator to advance */
  int iLeafPgno
){
  assert( iLeafPgno>pIter->iLeafPgno );

  if( iLeafPgno>pIter->pSeg->pgnoLast ){
    p->rc = FTS5_CORRUPT;
  }else{


    pIter->iLeafPgno = iLeafPgno-1;
    fts5SegIterNextPage(p, pIter);
    assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno );

    if( p->rc==SQLITE_OK ){
      int iOff;
      u8 *a = pIter->pLeaf->p;
................................................................................
    }
    assert_nc( iLeafPgno>=pIter->iLeafPgno || p->rc );
    if( iLeafPgno>pIter->iLeafPgno ){
      fts5SegIterGotoPage(p, pIter, iLeafPgno);
      bMove = 0;
    }
  }else{

    assert( iMatch<pIter->iRowid );
    while( !fts5DlidxIterEof(p, pDlidx) && iMatch<fts5DlidxIterRowid(pDlidx) ){
      fts5DlidxIterPrev(p, pDlidx);
    }
    iLeafPgno = fts5DlidxIterPgno(pDlidx);

    assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno );
................................................................................
*/
static const u8 *fts5MultiIterTerm(Fts5MultiSegIter *pIter, int *pn){
  Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
  *pn = p->term.n;
  return p->term.p;
}

/*
** Return true if the chunk iterator passed as the second argument is
** at EOF. Or if an error has already occurred. Otherwise, return false.
*/
static int fts5ChunkIterEof(Fts5Index *p, Fts5ChunkIter *pIter){
  return (p->rc || pIter->pLeaf==0);
}

/*
** Advance the chunk-iterator to the next chunk of data to read.
*/
static void fts5ChunkIterNext(Fts5Index *p, Fts5ChunkIter *pIter){
  assert( pIter->nRem>=pIter->n );
  pIter->nRem -= pIter->n;
  fts5DataRelease(pIter->pLeaf);
  pIter->pLeaf = 0;
  pIter->p = 0;
  if( pIter->nRem>0 ){
    Fts5Data *pLeaf;
    pIter->iLeafRowid++;
    pLeaf = pIter->pLeaf = fts5DataRead(p, pIter->iLeafRowid);
    if( pLeaf ){
      pIter->n = MIN(pIter->nRem, pLeaf->n-4);
      pIter->p = pLeaf->p+4;
    }
  }
}

/*
** Intialize the chunk iterator to read the position list data for which 
** the size field is at offset iOff of leaf pLeaf. 
*/
static void fts5ChunkIterInit(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegIter *pSeg,              /* Segment iterator to read poslist from */
  Fts5ChunkIter *pIter            /* Initialize this object */
){
  Fts5Data *pLeaf = pSeg->pLeaf;
  int iOff = pSeg->iLeafOffset;

  memset(pIter, 0, sizeof(*pIter));
  /* If Fts5SegIter.pSeg is NULL, then this iterator iterates through data
  ** currently stored in a hash table. In this case there is no leaf-rowid
  ** to calculate.  */
  if( pSeg->pSeg ){
    i64 rowid = FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, 0, pSeg->iLeafPgno);
    pIter->iLeafRowid = rowid;
  }

  fts5DataReference(pLeaf);
  pIter->pLeaf = pLeaf;
  pIter->nRem = pSeg->nPos;
  pIter->n = MIN(pLeaf->n - iOff, pIter->nRem);
  pIter->p = pLeaf->p + iOff;
  if( pIter->n==0 ){
    fts5ChunkIterNext(p, pIter);
  }
}

static void fts5ChunkIterRelease(Fts5ChunkIter *pIter){
  fts5DataRelease(pIter->pLeaf);
  pIter->pLeaf = 0;
}


/*
** Allocate a new segment-id for the structure pStruct. The new segment
** id must be between 1 and 65335 inclusive, and must not be used by 
** any currently existing segment. If a free segment id cannot be found,
** SQLITE_FULL is returned.
................................................................................
        fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 0, 1),iLeafRowid);
        fts5DataWrite(p, iLeafRowid, buf.p, buf.n);
      }
    }
  }
  fts5BufferFree(&buf);
}










/*
**
*/
static void fts5IndexMergeLevel(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5Structure **ppStruct,       /* IN/OUT: Stucture of index */
................................................................................

  assert( iLvl>=0 );
  for(fts5MultiIterNew(p, pStruct, 0, 0, 0, 0, iLvl, nInput, &pIter);
      fts5MultiIterEof(p, pIter)==0;
      fts5MultiIterNext(p, pIter, 0, 0)
  ){
    Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
    Fts5ChunkIter sPos;           /* Used to iterate through position list */
    int nPos;                     /* position-list size field value */
    int nTerm;
    const u8 *pTerm;

    /* Check for key annihilation. */
    if( pSeg->nPos==0 && (bOldest || pSeg->bDel==0) ) continue;

    fts5ChunkIterInit(p, pSeg, &sPos);

    pTerm = fts5MultiIterTerm(pIter, &nTerm);
    if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){
      if( pnRem && writer.nLeafWritten>nRem ){
        fts5ChunkIterRelease(&sPos);
        break;
      }

      /* This is a new term. Append a term to the output segment. */
      if( bRequireDoclistTerm ){
        fts5WriteAppendZerobyte(p, &writer);
      }
................................................................................
    }

    /* Append the rowid to the output */
    /* WRITEPOSLISTSIZE */
    nPos = pSeg->nPos*2 + pSeg->bDel;
    fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter), nPos);

    for(/* noop */; !fts5ChunkIterEof(p, &sPos); fts5ChunkIterNext(p, &sPos)){
      fts5WriteAppendPoslistData(p, &writer, sPos.p, sPos.n);
    }

    fts5ChunkIterRelease(&sPos);
  }

  /* Flush the last leaf page to disk. Set the output segment b-tree height
  ** and last leaf page number at the same time.  */
  fts5WriteFinish(p, &writer, &pSeg->nHeight, &pSeg->pgnoLast);

  if( fts5MultiIterEof(p, pIter) ){
................................................................................
    fts5IndexMerge(p, &pStruct, nMerge);
    fts5StructureWrite(p, pStruct);
  }
  fts5StructureRelease(pStruct);

  return fts5IndexReturn(p);
}









/*
** Iterator pIter currently points to a valid entry (not EOF). This
** function appends the position list data for the current entry to
** buffer pBuf. It does not make a copy of the position-list size
** field.
*/
static void fts5SegiterPoslist(
  Fts5Index *p,
  Fts5SegIter *pSeg,
  Fts5Buffer *pBuf
){
  if( p->rc==SQLITE_OK ){
    Fts5ChunkIter iter;
    fts5ChunkIterInit(p, pSeg, &iter);
    while( fts5ChunkIterEof(p, &iter)==0 ){
      fts5BufferAppendBlob(&p->rc, pBuf, iter.n, iter.p);
      fts5ChunkIterNext(p, &iter);
    }
    fts5ChunkIterRelease(&iter);
  }
}

/*
** Iterator pMulti currently points to a valid entry (not EOF). This
** function appends a copy of the position-list of the entry pMulti 
** currently points to to buffer pBuf.
**
................................................................................
** Return a pointer to a buffer containing a copy of the position list for
** the current entry. Output variable *pn is set to the size of the buffer 
** in bytes before returning.
**
** The returned position list does not include the "number of bytes" varint
** field that starts the position list on disk.
*/
int sqlite3Fts5IterPoslist(Fts5IndexIter *pIter, const u8 **pp, int *pn){






  assert( pIter->pIndex->rc==SQLITE_OK );
  if( pIter->pDoclist ){
    *pn = pIter->pDoclist->nPoslist;
    *pp = pIter->pDoclist->aPoslist;

  }else{
    Fts5MultiSegIter *pMulti = pIter->pMulti;
    Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ];

    *pn = pSeg->nPos;
    if( pSeg->iLeafOffset+pSeg->nPos <= pSeg->pLeaf->n ){
      *pp = &pSeg->pLeaf->p[pSeg->iLeafOffset];
    }else{
      fts5BufferZero(&pIter->poslist);
      fts5SegiterPoslist(pIter->pIndex, pSeg, &pIter->poslist);
      *pp = pIter->poslist.p;
................................................................................
  u64 *pCksum                     /* IN/OUT: Checksum value */
){
  u64 cksum = *pCksum;
  Fts5IndexIter *pIdxIter = 0;
  int rc = sqlite3Fts5IndexQuery(p, z, n, flags, &pIdxIter);

  while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){

    const u8 *pPos;
    int nPos;
    i64 rowid = sqlite3Fts5IterRowid(pIdxIter);
    rc = sqlite3Fts5IterPoslist(pIdxIter, &pPos, &nPos);
    if( rc==SQLITE_OK ){
      Fts5PoslistReader sReader;
      for(sqlite3Fts5PoslistReaderInit(-1, pPos, nPos, &sReader);
          sReader.bEof==0;
          sqlite3Fts5PoslistReaderNext(&sReader)
      ){
        int iCol = FTS5_POS2COLUMN(sReader.iPos);







<







<







 







>







 







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







 







>
>
>
>
|







 







|







 







>







 







>



>
>







 







>







 







|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







 







>
>
>
>
>
>
>
>
>







 







<







<
<



<







 







|
|
<
<
<







 







>
>
>
>
>
>
>
>












|
<
<
<
<
<
<
<
<







 







|
>
>
>
>
>
>

|
|
|
>



>







 







>



|







305
306
307
308
309
310
311

312
313
314
315
316
317
318

319
320
321
322
323
324
325
...
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
...
536
537
538
539
540
541
542
























543
544
545
546
547
548
549
....
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
....
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
....
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
....
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
....
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
....
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834

























2835
2836
2837
2838
2839
2840
2841
....
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
....
3547
3548
3549
3550
3551
3552
3553

3554
3555
3556
3557
3558
3559
3560


3561
3562
3563

3564
3565
3566
3567
3568
3569
3570
....
3574
3575
3576
3577
3578
3579
3580
3581
3582



3583
3584
3585
3586
3587
3588
3589
....
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040








4041
4042
4043
4044
4045
4046
4047
....
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
....
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
** many zero bytes. This makes it easier to decode the various record formats
** without overreading if the records are corrupt.
*/
#define FTS5_DATA_ZERO_PADDING 8

typedef struct Fts5BtreeIter Fts5BtreeIter;
typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel;

typedef struct Fts5Data Fts5Data;
typedef struct Fts5DlidxIter Fts5DlidxIter;
typedef struct Fts5DlidxLvl Fts5DlidxLvl;
typedef struct Fts5DlidxWriter Fts5DlidxWriter;
typedef struct Fts5MultiSegIter Fts5MultiSegIter;
typedef struct Fts5NodeIter Fts5NodeIter;
typedef struct Fts5PageWriter Fts5PageWriter;

typedef struct Fts5SegIter Fts5SegIter;
typedef struct Fts5DoclistIter Fts5DoclistIter;
typedef struct Fts5SegWriter Fts5SegWriter;
typedef struct Fts5Structure Fts5Structure;
typedef struct Fts5StructureLevel Fts5StructureLevel;
typedef struct Fts5StructureSegment Fts5StructureSegment;

................................................................................
**     start of the "position-list-size" field within the page.
*/
struct Fts5SegIter {
  Fts5StructureSegment *pSeg;     /* Segment to iterate through */
  int flags;                      /* Mask of configuration flags */
  int iLeafPgno;                  /* Current leaf page number */
  Fts5Data *pLeaf;                /* Current leaf data */
  Fts5Data *pNextLeaf;            /* Leaf page (iLeafPgno+1) */
  int iLeafOffset;                /* Byte offset within current leaf */

  /* The page and offset from which the current term was read. The offset 
  ** is the offset of the first rowid in the current doclist.  */
  int iTermLeafPgno;
  int iTermLeafOffset;

................................................................................
  int bDel;                       /* True if the delete flag is set */
};

#define FTS5_SEGITER_ONETERM 0x01
#define FTS5_SEGITER_REVERSE 0x02


























/*
** Object for iterating through the conents of a single internal node in 
** memory.
*/
struct Fts5NodeIter {
  /* Internal. Set and managed by fts5NodeIterXXX() functions. Except, 
  ** the EOF test for the iterator is (Fts5NodeIter.aData==0).  */
................................................................................
static void fts5SegIterNextPage(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegIter *pIter              /* Iterator to advance to next page */
){
  Fts5StructureSegment *pSeg = pIter->pSeg;
  fts5DataRelease(pIter->pLeaf);
  pIter->iLeafPgno++;
  if( pIter->pNextLeaf ){
    assert( pIter->iLeafPgno<=pSeg->pgnoLast );
    pIter->pLeaf = pIter->pNextLeaf;
    pIter->pNextLeaf = 0;
  }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){
    pIter->pLeaf = fts5DataRead(p, 
        FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, pIter->iLeafPgno)
    );
  }else{
    pIter->pLeaf = 0;
  }
}
................................................................................
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegIter *pIter,             /* Iterator to advance */
  int *pbNewTerm                  /* OUT: Set for new term */
){
  assert( pbNewTerm==0 || *pbNewTerm==0 );
  if( p->rc==SQLITE_OK ){
    if( pIter->flags & FTS5_SEGITER_REVERSE ){
      assert( pIter->pNextLeaf==0 );
      if( pIter->iRowidOffset>0 ){
        u8 *a = pIter->pLeaf->p;
        int iOff;
        int nPos;
        int bDummy;
        i64 iDelta;

................................................................................

/*
** Zero the iterator passed as the only argument.
*/
static void fts5SegIterClear(Fts5SegIter *pIter){
  fts5BufferFree(&pIter->term);
  fts5DataRelease(pIter->pLeaf);
  fts5DataRelease(pIter->pNextLeaf);
  fts5DlidxIterFree(pIter->pDlidx);
  sqlite3_free(pIter->aRowidOffset);
  memset(pIter, 0, sizeof(Fts5SegIter));
}

#ifdef SQLITE_DEBUG

................................................................................
*/
static void fts5SegIterGotoPage(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegIter *pIter,             /* Iterator to advance */
  int iLeafPgno
){
  assert( iLeafPgno>pIter->iLeafPgno );

  if( iLeafPgno>pIter->pSeg->pgnoLast ){
    p->rc = FTS5_CORRUPT;
  }else{
    fts5DataRelease(pIter->pNextLeaf);
    pIter->pNextLeaf = 0;
    pIter->iLeafPgno = iLeafPgno-1;
    fts5SegIterNextPage(p, pIter);
    assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno );

    if( p->rc==SQLITE_OK ){
      int iOff;
      u8 *a = pIter->pLeaf->p;
................................................................................
    }
    assert_nc( iLeafPgno>=pIter->iLeafPgno || p->rc );
    if( iLeafPgno>pIter->iLeafPgno ){
      fts5SegIterGotoPage(p, pIter, iLeafPgno);
      bMove = 0;
    }
  }else{
    assert( pIter->pNextLeaf==0 );
    assert( iMatch<pIter->iRowid );
    while( !fts5DlidxIterEof(p, pDlidx) && iMatch<fts5DlidxIterRowid(pDlidx) ){
      fts5DlidxIterPrev(p, pDlidx);
    }
    iLeafPgno = fts5DlidxIterPgno(pDlidx);

    assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno );
................................................................................
*/
static const u8 *fts5MultiIterTerm(Fts5MultiSegIter *pIter, int *pn){
  Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
  *pn = p->term.n;
  return p->term.p;
}

static void fts5ChunkIterate(
  Fts5Index *p,                   /* Index object */
  Fts5SegIter *pSeg,              /* Poslist of this iterator */
  void *pCtx,                     /* Context pointer for xChunk callback */
  void (*xChunk)(Fts5Index*, void*, const u8*, int)
){
  int nRem = pSeg->nPos;          /* Number of bytes still to come */
  Fts5Data *pData = 0;
  u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset];
  int nChunk = MIN(nRem, pSeg->pLeaf->n - pSeg->iLeafOffset);
  int pgno = pSeg->iLeafPgno;
  int pgnoSave = 0;

  if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){
    pgnoSave = pgno+1;
  }

  while( 1 ){
    xChunk(p, pCtx, pChunk, nChunk);
    nRem -= nChunk;
    fts5DataRelease(pData);
    if( nRem<=0 ){
      break;
    }else{
      pgno++;
      pData = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, 0, pgno));
      if( pData==0 ) break;
      pChunk = &pData->p[4];
      nChunk = MIN(nRem, pData->n - 4);
      if( pgno==pgnoSave ){
        assert( pSeg->pNextLeaf==0 );
        pSeg->pNextLeaf = pData;
        pData = 0;
      }
    }
  }
}




























/*
** Allocate a new segment-id for the structure pStruct. The new segment
** id must be between 1 and 65335 inclusive, and must not be used by 
** any currently existing segment. If a free segment id cannot be found,
** SQLITE_FULL is returned.
................................................................................
        fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 0, 1),iLeafRowid);
        fts5DataWrite(p, iLeafRowid, buf.p, buf.n);
      }
    }
  }
  fts5BufferFree(&buf);
}

static void fts5MergeChunkCallback(
  Fts5Index *p, 
  void *pCtx, 
  const u8 *pChunk, int nChunk
){
  Fts5SegWriter *pWriter = (Fts5SegWriter*)pCtx;
  fts5WriteAppendPoslistData(p, pWriter, pChunk, nChunk);
}

/*
**
*/
static void fts5IndexMergeLevel(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5Structure **ppStruct,       /* IN/OUT: Stucture of index */
................................................................................

  assert( iLvl>=0 );
  for(fts5MultiIterNew(p, pStruct, 0, 0, 0, 0, iLvl, nInput, &pIter);
      fts5MultiIterEof(p, pIter)==0;
      fts5MultiIterNext(p, pIter, 0, 0)
  ){
    Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];

    int nPos;                     /* position-list size field value */
    int nTerm;
    const u8 *pTerm;

    /* Check for key annihilation. */
    if( pSeg->nPos==0 && (bOldest || pSeg->bDel==0) ) continue;



    pTerm = fts5MultiIterTerm(pIter, &nTerm);
    if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){
      if( pnRem && writer.nLeafWritten>nRem ){

        break;
      }

      /* This is a new term. Append a term to the output segment. */
      if( bRequireDoclistTerm ){
        fts5WriteAppendZerobyte(p, &writer);
      }
................................................................................
    }

    /* Append the rowid to the output */
    /* WRITEPOSLISTSIZE */
    nPos = pSeg->nPos*2 + pSeg->bDel;
    fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter), nPos);

    /* Append the position-list data to the output */
    fts5ChunkIterate(p, pSeg, (void*)&writer, fts5MergeChunkCallback);



  }

  /* Flush the last leaf page to disk. Set the output segment b-tree height
  ** and last leaf page number at the same time.  */
  fts5WriteFinish(p, &writer, &pSeg->nHeight, &pSeg->pgnoLast);

  if( fts5MultiIterEof(p, pIter) ){
................................................................................
    fts5IndexMerge(p, &pStruct, nMerge);
    fts5StructureWrite(p, pStruct);
  }
  fts5StructureRelease(pStruct);

  return fts5IndexReturn(p);
}

static void fts5PoslistCallback(
  Fts5Index *p, 
  void *pCtx, 
  const u8 *pChunk, int nChunk
){
  fts5BufferAppendBlob(&p->rc, (Fts5Buffer*)pCtx, nChunk, pChunk);
}

/*
** Iterator pIter currently points to a valid entry (not EOF). This
** function appends the position list data for the current entry to
** buffer pBuf. It does not make a copy of the position-list size
** field.
*/
static void fts5SegiterPoslist(
  Fts5Index *p,
  Fts5SegIter *pSeg,
  Fts5Buffer *pBuf
){
  fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback);








}

/*
** Iterator pMulti currently points to a valid entry (not EOF). This
** function appends a copy of the position-list of the entry pMulti 
** currently points to to buffer pBuf.
**
................................................................................
** Return a pointer to a buffer containing a copy of the position list for
** the current entry. Output variable *pn is set to the size of the buffer 
** in bytes before returning.
**
** The returned position list does not include the "number of bytes" varint
** field that starts the position list on disk.
*/
int sqlite3Fts5IterPoslist(
  Fts5IndexIter *pIter, 
  const u8 **pp,                  /* OUT: Pointer to position-list data */
  int *pn,                        /* OUT: Size of position-list in bytes */
  i64 *piRowid                    /* OUT: Current rowid */
){
  Fts5DoclistIter *pDoclist = pIter->pDoclist;
  assert( pIter->pIndex->rc==SQLITE_OK );
  if( pDoclist ){
    *pn = pDoclist->nPoslist;
    *pp = pDoclist->aPoslist;
    *piRowid = pDoclist->iRowid;
  }else{
    Fts5MultiSegIter *pMulti = pIter->pMulti;
    Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ];
    *piRowid = pSeg->iRowid;
    *pn = pSeg->nPos;
    if( pSeg->iLeafOffset+pSeg->nPos <= pSeg->pLeaf->n ){
      *pp = &pSeg->pLeaf->p[pSeg->iLeafOffset];
    }else{
      fts5BufferZero(&pIter->poslist);
      fts5SegiterPoslist(pIter->pIndex, pSeg, &pIter->poslist);
      *pp = pIter->poslist.p;
................................................................................
  u64 *pCksum                     /* IN/OUT: Checksum value */
){
  u64 cksum = *pCksum;
  Fts5IndexIter *pIdxIter = 0;
  int rc = sqlite3Fts5IndexQuery(p, z, n, flags, &pIdxIter);

  while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){
    i64 dummy;
    const u8 *pPos;
    int nPos;
    i64 rowid = sqlite3Fts5IterRowid(pIdxIter);
    rc = sqlite3Fts5IterPoslist(pIdxIter, &pPos, &nPos, &dummy);
    if( rc==SQLITE_OK ){
      Fts5PoslistReader sReader;
      for(sqlite3Fts5PoslistReaderInit(-1, pPos, nPos, &sReader);
          sReader.bEof==0;
          sqlite3Fts5PoslistReaderNext(&sReader)
      ){
        int iCol = FTS5_POS2COLUMN(sReader.iPos);

Changes to ext/fts5/fts5_vocab.c.

343
344
345
346
347
348
349

350
351
352
353
354
355
356
357
358
359
360
361
      memset(pCsr->aVal, 0, sizeof(pCsr->aVal));
      memset(pCsr->aCnt, 0, pCsr->nCol * sizeof(i64));
      memset(pCsr->aDoc, 0, pCsr->nCol * sizeof(i64));
      pCsr->iCol = 0;

      assert( pTab->eType==FTS5_VOCAB_COL || pTab->eType==FTS5_VOCAB_ROW );
      while( rc==SQLITE_OK ){

        const u8 *pPos; int nPos;   /* Position list */
        i64 iPos = 0;               /* 64-bit position read from poslist */
        int iOff = 0;               /* Current offset within position list */

        rc = sqlite3Fts5IterPoslist(pCsr->pIter, &pPos, &nPos);
        if( rc==SQLITE_OK ){
          if( pTab->eType==FTS5_VOCAB_ROW ){
            while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){
              pCsr->aVal[1]++;
            }
            pCsr->aVal[0]++;
          }else{







>




|







343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
      memset(pCsr->aVal, 0, sizeof(pCsr->aVal));
      memset(pCsr->aCnt, 0, pCsr->nCol * sizeof(i64));
      memset(pCsr->aDoc, 0, pCsr->nCol * sizeof(i64));
      pCsr->iCol = 0;

      assert( pTab->eType==FTS5_VOCAB_COL || pTab->eType==FTS5_VOCAB_ROW );
      while( rc==SQLITE_OK ){
        i64 dummy;
        const u8 *pPos; int nPos;   /* Position list */
        i64 iPos = 0;               /* 64-bit position read from poslist */
        int iOff = 0;               /* Current offset within position list */

        rc = sqlite3Fts5IterPoslist(pCsr->pIter, &pPos, &nPos, &dummy);
        if( rc==SQLITE_OK ){
          if( pTab->eType==FTS5_VOCAB_ROW ){
            while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){
              pCsr->aVal[1]++;
            }
            pCsr->aVal[0]++;
          }else{

Changes to ext/fts5/test/fts5ah.test.

86
87
88
89
90
91
92

93
94
95
96
97

98
99
100
101
102
103
104
105
106
107
108
  2 { SELECT rowid FROM t1 WHERE t1 MATCH 'x + w'   }  [list $W]
  3 { SELECT rowid FROM t1 WHERE t1 MATCH 'x AND w' }  [list $W]
  4 { SELECT rowid FROM t1 WHERE t1 MATCH 'y AND x' }  [list $Y]
" {

  do_test 1.6.$tn.1 {
    set n [execsql_reads $q]

    expr {$n < ($nReadX / 10)}
  } {1}

  do_test 1.6.$tn.2 {
    set n [execsql_reads "$q ORDER BY rowid DESC"]

    expr {$n < ($nReadX / 10)}
  } {1}

  do_execsql_test 1.6.$tn.3 $q [lsort -int -incr $res]
  do_execsql_test 1.6.$tn.4 "$q ORDER BY rowid DESC" [lsort -int -decr $res]
}

#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}

finish_test








>
|




>
|










86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
  2 { SELECT rowid FROM t1 WHERE t1 MATCH 'x + w'   }  [list $W]
  3 { SELECT rowid FROM t1 WHERE t1 MATCH 'x AND w' }  [list $W]
  4 { SELECT rowid FROM t1 WHERE t1 MATCH 'y AND x' }  [list $Y]
" {

  do_test 1.6.$tn.1 {
    set n [execsql_reads $q]
    puts -nonewline "(n=$n nReadX=$nReadX)"
    expr {$n < ($nReadX / 8)}
  } {1}

  do_test 1.6.$tn.2 {
    set n [execsql_reads "$q ORDER BY rowid DESC"]
    puts -nonewline "(n=$n nReadX=$nReadX)"
    expr {$n < ($nReadX / 8)}
  } {1}

  do_execsql_test 1.6.$tn.3 $q [lsort -int -incr $res]
  do_execsql_test 1.6.$tn.4 "$q ORDER BY rowid DESC" [lsort -int -decr $res]
}

#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}

finish_test