/ Check-in [fab245be]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Change the array of 16-bit offsets at the end of each page to an array of varints.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts5-incompatible
Files: files | file ages | folders
SHA1: fab245bea4f283714c17bca22428d5eb4db5935a
User & Date: dan 2015-09-10 05:40:17
Context
2015-09-10
10:01
Fix an fts5 problem that could occur if a term and the first associated rowid are on different leaf pages. check-in: ffe2796a user: dan tags: fts5-incompatible
05:40
Change the array of 16-bit offsets at the end of each page to an array of varints. check-in: fab245be user: dan tags: fts5-incompatible
2015-09-09
08:15
Fix a bug in preprocessor macros within fts5_main.c. check-in: 0eb2b952 user: dan tags: fts5-incompatible
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5_index.c.

374
375
376
377
378
379
380

381
382
383
384
385
386
387
...
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
...
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
...
644
645
646
647
648
649
650





651
652
653
654
655
656
657
....
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
....
1579
1580
1581
1582
1583
1584
1585
1586







1587
1588
1589
1590
1591
1592
1593
....
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
....
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
....
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840









1841

1842
1843
1844
1845
1846
1847
1848
1849
1850
....
1852
1853
1854
1855
1856
1857
1858

1859
1860
1861
1862
1863
1864
1865
....
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
....
2006
2007
2008
2009
2010
2011
2012

2013
2014
2015
2016
2017
2018
2019


2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042


2043
2044
2045
2046
2047
2048
2049
....
2052
2053
2054
2055
2056
2057
2058
2059
2060

2061
2062
2063
2064
2065


2066

2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106








2107
2108
2109
2110
2111
2112
2113
....
3169
3170
3171
3172
3173
3174
3175

3176
3177
3178
3179
3180
3181
3182
....
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214





3215
3216

3217
3218
3219
3220
3221
3222
3223
....
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
....
3491
3492
3493
3494
3495
3496
3497
3498









3499
3500
3501
3502
3503
3504
3505
....
4938
4939
4940
4941
4942
4943
4944

4945

4946
4947
4948

4949
4950
4951







4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
....
5422
5423
5424
5425
5426
5427
5428
5429



5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453
5454
5455
....
5460
5461
5462
5463
5464
5465
5466
5467





5468
5469
5470
5471










5472
5473
5474
5475
5476
5477

5478
5479
5480
5481
5482
5483
5484
};

/*
** An object of type Fts5SegWriter is used to write to segments.
*/
struct Fts5PageWriter {
  int pgno;                       /* Page number for this page */

  Fts5Buffer buf;                 /* Buffer containing leaf data */
  Fts5Buffer pgidx;               /* Buffer containing page-index */
  Fts5Buffer term;                /* Buffer containing previous term on page */
};
struct Fts5DlidxWriter {
  int pgno;                       /* Page number for this page */
  int bPrevValid;                 /* True if iPrev is valid */
................................................................................
  int iLeafOffset;                /* Byte offset within current leaf */

  /* The page and offset from which the current term was read. The offset 
  ** is the offset of the first rowid in the current doclist.  */
  int iTermLeafPgno;
  int iTermLeafOffset;

  int iTermIdx;
  int iEndofDoclist;

  /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */
  int iRowidOffset;               /* Current entry in aRowidOffset[] */
  int nRowidOffset;               /* Allocated size of aRowidOffset[] array */
  int *aRowidOffset;              /* Array of offset to rowid fields */

................................................................................
*/
#define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn)

#define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2]))

#define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p))

#define fts5LeafFirstTermOff(x) fts5LeafTermOff(x, 0)

/*
** poslist:
**   Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered.
**   There is no way to tell if this is populated or not.
*/
struct Fts5IndexIter {
  Fts5Index *pIndex;              /* Index that owns this iterator */
................................................................................
){
  int nCmp = MIN(nLeft, nRight);
  int res = memcmp(pLeft, pRight, nCmp);
  return (res==0 ? (nLeft - nRight) : res);
}
#endif







/*
** Close the read-only blob handle, if it is open.
*/
static void fts5CloseReader(Fts5Index *p){
  if( p->pReader ){
    sqlite3_blob *pReader = p->pReader;
................................................................................
    }else{
      const u8 *a = &pIter->pLeaf->p[iOff];
      pIter->iLeafOffset += fts5GetPoslistSize(a, &pIter->nPos, &pIter->bDel);
    }
  }
}

static void fts5SegIterLoadEod(Fts5Index *p, Fts5SegIter *pIter){
  Fts5Data *pLeaf = pIter->pLeaf;
  int nPg = (pLeaf->nn - pLeaf->szLeaf) / 2;

  assert( pIter->iLeafPgno==pIter->iTermLeafPgno );
  if( (pIter->iTermIdx+1)<nPg ){
    int iRead = pLeaf->szLeaf + (pIter->iTermIdx + 1) * 2;
    pIter->iEndofDoclist = fts5GetU16(&pLeaf->p[iRead]);
  }else{
    pIter->iEndofDoclist = pLeaf->nn+1;
  }

}

static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
  u8 *a = pIter->pLeaf->p;        /* Buffer to read data from */
  int iOff = pIter->iLeafOffset;

  ASSERT_SZLEAF_OK(pIter->pLeaf);
  if( iOff>=pIter->pLeaf->szLeaf ){
    fts5SegIterNextPage(p, pIter);
................................................................................
  pIter->term.n = nKeep;
  fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
  iOff += nNew;
  pIter->iTermLeafOffset = iOff;
  pIter->iTermLeafPgno = pIter->iLeafPgno;
  pIter->iLeafOffset = iOff;

  fts5SegIterLoadEod(p, pIter);







  fts5SegIterLoadRowid(p, pIter);
}

/*
** Initialize the iterator object pIter to iterate through the entries in
** segment pSeg. The iterator is left pointing to the first entry when 
** this function returns.
................................................................................
    fts5SegIterNextPage(p, pIter);
  }

  if( p->rc==SQLITE_OK ){
    pIter->iLeafOffset = 4;
    assert_nc( pIter->pLeaf->nn>4 );
    assert( fts5LeafFirstTermOff(pIter->pLeaf)==4 );
    pIter->iTermIdx = 0;
    fts5SegIterLoadTerm(p, pIter, 0);
    fts5SegIterLoadNPos(p, pIter);
  }
}

/*
** This function is only ever called on iterators created by calls to
................................................................................
      iOff = pIter->iLeafOffset + pIter->nPos;

      if( iOff<n ){
        /* The next entry is on the current page. */
        assert_nc( iOff<=pIter->iEndofDoclist );
        if( iOff>=pIter->iEndofDoclist ){
          bNewTerm = 1;
          if( pIter->iTermLeafPgno==pIter->iLeafPgno ){
            pIter->iTermIdx++;
          }else{
            pIter->iTermIdx = 0;
          }
          if( iOff!=fts5LeafFirstTermOff(pLeaf) ){
            iOff += fts5GetVarint32(&a[iOff], nKeep);
          }
        }else{
          u64 iDelta;
          iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta);
          pIter->iRowid += iDelta;
................................................................................
          fts5SegIterNextPage(p, pIter);
          pLeaf = pIter->pLeaf;
          if( pLeaf==0 ) break;
          ASSERT_SZLEAF_OK(pLeaf);
          if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){
            iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid);
            pIter->iLeafOffset = iOff;
          }
          else if( pLeaf->nn>pLeaf->szLeaf ){
            iOff = fts5LeafFirstTermOff(pLeaf);









            pIter->iLeafOffset = iOff;

            bNewTerm = 1;
            pIter->iTermIdx = 0;
          }
          if( iOff>=pLeaf->szLeaf ){
            p->rc = FTS5_CORRUPT;
            return;
          }
        }
      }
................................................................................
      /* Check if the iterator is now at EOF. If so, return early. */
      if( pIter->pLeaf ){
        if( bNewTerm ){
          if( pIter->flags & FTS5_SEGITER_ONETERM ){
            fts5DataRelease(pIter->pLeaf);
            pIter->pLeaf = 0;
          }else{

            fts5SegIterLoadTerm(p, pIter, nKeep);
            fts5SegIterLoadNPos(p, pIter);
            if( pbNewTerm ) *pbNewTerm = 1;
          }
        }else{
          fts5SegIterLoadNPos(p, pIter);
        }
................................................................................
    iOff = fts5LeafFirstRowidOff(pLast);
    iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid);
    pIter->iLeafOffset = iOff;

    if( fts5LeafIsTermless(pLast) ){
      pIter->iEndofDoclist = pLast->nn+1;
    }else{
      pIter->iEndofDoclist = fts5LeafTermOff(pLast, 0);
    }

  }

  fts5SegIterReverseInitPage(p, pIter);
}

................................................................................
  Fts5Index *p,                   /* Leave any error code here */
  int bGe,                        /* True for a >= search */
  Fts5SegIter *pIter,             /* Iterator to seek */
  const u8 *pTerm, int nTerm      /* Term to search for */
){
  int iOff;
  const u8 *a = pIter->pLeaf->p;

  int n = pIter->pLeaf->szLeaf;

  int nMatch = 0;
  int nKeep = 0;
  int nNew = 0;
  int iTerm = 0;
  int nPgTerm = (pIter->pLeaf->nn - pIter->pLeaf->szLeaf) >> 1;



  assert( p->rc==SQLITE_OK );
  assert( pIter->pLeaf );

  iOff = fts5LeafFirstTermOff(pIter->pLeaf);
  if( iOff<4 || iOff>=n ){
    p->rc = FTS5_CORRUPT;
    return;
  }

  while( 1 ){
    int i;
    int nCmp;

    /* Figure out how many new bytes are in this term */
    fts5IndexGetVarint32(a, iOff, nNew);

    if( nKeep<nMatch ){
      goto search_failed;
    }

    assert( nKeep>=nMatch );
    if( nKeep==nMatch ){


      nCmp = MIN(nNew, nTerm-nMatch);
      for(i=0; i<nCmp; i++){
        if( a[iOff+i]!=pTerm[nMatch+i] ) break;
      }
      nMatch += i;

      if( nTerm==nMatch ){
................................................................................
        }else{
          goto search_failed;
        }
      }else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){
        goto search_failed;
      }
    }
    iOff += nNew;


    iTerm++;
    if( iTerm>=nPgTerm ){
      iOff = n;
      break;
    }


    iOff = fts5GetU16(&a[n + iTerm*2]);


    /* Read the nKeep field of the next term. */
    fts5IndexGetVarint32(a, iOff, nKeep);
  }

 search_failed:
  if( bGe==0 ){
    fts5DataRelease(pIter->pLeaf);
    pIter->pLeaf = 0;
    return;
  }else if( iOff>=n ){
    do {
      iTerm = 0;
      fts5SegIterNextPage(p, pIter);
      if( pIter->pLeaf==0 ) return;
      a = pIter->pLeaf->p;
      if( fts5LeafIsTermless(pIter->pLeaf)==0 ){
        iOff = fts5LeafFirstTermOff(pIter->pLeaf);
        if( iOff<4 || iOff>=pIter->pLeaf->szLeaf ){
          p->rc = FTS5_CORRUPT;
        }else{
          nKeep = 0;
          iOff += fts5GetVarint32(&a[iOff], nNew);
          break;
        }
      }
    }while( 1 );
  }

 search_success:
  pIter->iTermIdx = iTerm;

  pIter->iLeafOffset = iOff + nNew;
  pIter->iTermLeafOffset = pIter->iLeafOffset;
  pIter->iTermLeafPgno = pIter->iLeafPgno;

  fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm);
  fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);

  fts5SegIterLoadEod(p, pIter);








  fts5SegIterLoadRowid(p, pIter);
  fts5SegIterLoadNPos(p, pIter);
}

/*
** Initialize the object pIter to point to term pTerm/nTerm within segment
** pSeg. If there is no such term in the index, the iterator is set to EOF.
................................................................................
  iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, 0, pPage->pgno);
  fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n);

  /* Initialize the next page. */
  fts5BufferZero(&pPage->buf);
  fts5BufferZero(&pPage->pgidx);
  fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);

  pPage->pgno++;

  /* Increase the leaves written counter */
  pWriter->nLeafWritten++;

  /* The new leaf holds no terms or rowids */
  pWriter->bFirstTermInPage = 1;
................................................................................
  Fts5Buffer *pPgidx = &pWriter->writer.pgidx;

  if( p->rc ) return;
  assert( pPage->buf.n>=4 );
  assert( pPage->buf.n>4 || pWriter->bFirstTermInPage );

  /* If the current leaf page is full, flush it to disk. */
  if( (pPage->buf.n + pPage->pgidx.n + nTerm + 2)>=p->pConfig->pgsz ){
    if( pPage->buf.n>4 ){
      fts5WriteFlushLeaf(p, pWriter);
    }
    fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING);
  }
  
  /* TODO1: Updating pgidx here. */





  fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n);
  pPgidx->n += 2;


  if( pWriter->bFirstTermInPage ){
    nPrefix = 0;
    if( pPage->pgno!=1 ){
      /* This is the first term on a leaf that is not the leftmost leaf in
      ** the segment b-tree. In this case it is necessary to add a term to
      ** the b-tree hierarchy that is (a) larger than the largest term 
................................................................................
    /* Bind the current output segment id to the index-writer. This is an
    ** optimization over binding the same value over and over as rows are
    ** inserted into %_idx by the current writer.  */
    sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid);
  }
}

/*
** The buffer passed as the second argument contains a leaf page that is
** missing its page-idx array. The first term is guaranteed to start at
** byte offset 4 of the buffer. The szLeaf field of the leaf page header
** is already populated.
**
** This function appends a page-index to the buffer. The buffer is 
** guaranteed to be large enough to fit the page-index.
*/
static void fts5MakePageidx(
  Fts5Index *p,                   /* Fts index object to store any error in */
  Fts5Data *pOld,                 /* Original page data */
  int iOldidx,                    /* Index of term in pOld pgidx */
  Fts5Buffer *pBuf                /* Buffer containing new page (no pgidx) */
){
  if( p->rc==SQLITE_OK ){
    int iOff;
    int iEnd;
    int nByte;
    int iEndOld;                  /* Byte after term iOldIdx on old page */
    int iEndNew;                  /* Byte after term iOldIdx on new page */
    int ii;
    int nPgIdx = (pOld->nn - pOld->szLeaf) / 2;

    /* Determine end of term on old page */
    iEndOld = fts5LeafTermOff(pOld, iOldidx);
    if( iOldidx>0 ){
      iEndOld += fts5GetVarint32(&pOld->p[iEndOld], nByte);
    }
    iEndOld += fts5GetVarint32(&pOld->p[iEndOld], nByte);
    iEndOld += nByte;

    /* Determine end of term on new page */
    iEndNew = 4 + fts5GetVarint32(&pBuf->p[4], nByte);
    iEndNew += nByte;

    fts5PutU16(&pBuf->p[pBuf->n], 4);
    pBuf->n += 2;
    for(ii=iOldidx+1; ii<nPgIdx; ii++){
      int iVal = fts5LeafTermOff(pOld, ii);
      fts5PutU16(&pBuf->p[pBuf->n], iVal + (iEndNew - iEndOld));
      pBuf->n += 2;
    }
  }
}

/*
** Iterator pIter was used to iterate through the input segments of on an
** incremental merge operation. This function is called if the incremental
** merge step has finished but the input has not been completely exhausted.
*/
static void fts5TrimSegments(Fts5Index *p, Fts5IndexIter *pIter){
  int i;
................................................................................
        fts5BufferAppendBlob(&p->rc, &buf, pData->szLeaf-iOff, &pData->p[iOff]);
        if( p->rc==SQLITE_OK ){
          /* Set the szLeaf field */
          fts5PutU16(&buf.p[2], buf.n);
        }

        /* Set up the new page-index array */
        fts5MakePageidx(p, pData, pSeg->iTermIdx, &buf);










        fts5DataRelease(pData);
        pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno;
        fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 0, 1), iLeafRowid);
        fts5DataWrite(p, iLeafRowid, buf.p, buf.n);
      }
    }
................................................................................
    fts5DataRelease(pLeaf);
    if( p->rc ) break;
  }
}

static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){
  int nPg = (pLeaf->nn - pLeaf->szLeaf) / 2;

  int ii;

  Fts5Buffer buf1 = {0,0,0};
  Fts5Buffer buf2 = {0,0,0};


  for(ii=0; p->rc==SQLITE_OK && ii<nPg; ii++){
    int res;
    int iOff = fts5LeafTermOff(pLeaf, ii);







    if( iOff>=pLeaf->szLeaf ){
      p->rc = FTS5_CORRUPT;
    }else if( ii==0 ){
      int nByte;
      iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
      if( (iOff+nByte)>pLeaf->szLeaf ){
        p->rc = FTS5_CORRUPT;
      }else{
        fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
      }
................................................................................
  }else if( iSegid==0 ){
    if( iRowid==FTS5_AVERAGES_ROWID ){
      /* todo */
    }else{
      fts5DecodeStructure(&rc, &s, a, n);
    }
  }else{
    Fts5Buffer term;



    int iTermOff = 0;
    int szLeaf = 0;
    int iRowidOff = 0;
    int iOff;
    int nPgTerm = 0;
    int nDoclist;
    int i;

    memset(&term, 0, sizeof(Fts5Buffer));

    if( n<4 ){
      sqlite3Fts5BufferSet(&rc, &s, 8, (const u8*)"corrupt");
      goto decode_out;
    }else{
      iRowidOff = fts5GetU16(&a[0]);
      szLeaf = fts5GetU16(&a[2]);
      nPgTerm = (n - szLeaf) / 2;
      if( nPgTerm ){
        iTermOff = fts5GetU16(&a[szLeaf]);
      }
    }

    /* Decode the position list tail at the start of the page */
    if( iRowidOff!=0 ){
      iOff = iRowidOff;
    }else if( iTermOff!=0 ){
................................................................................
    fts5DecodePoslist(&rc, &s, &a[4], iOff-4);

    /* Decode any more doclist data that appears on the page before the
    ** first term. */
    nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff;
    fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist);

    for(i=0; i<nPgTerm; i++){





      int nByte;
      int iEnd = (i+1)<nPgTerm ? fts5GetU16(&a[szLeaf+i*2+2]) : szLeaf;
      iOff = fts5GetU16(&a[szLeaf + i*2]);
      if( i>0 ){










        iOff += fts5GetVarint32(&a[iOff], nByte);
        term.n = nByte;
      }
      iOff += fts5GetVarint32(&a[iOff], nByte);
      fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]);
      iOff += nByte;

      sqlite3Fts5BufferAppendPrintf(
          &rc, &s, " term=%.*s", term.n, (const char*)term.p
      );
      iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff);
    }

    fts5BufferFree(&term);







>







 







|







 







<
<







 







>
>
>
>
>







 







<
<
<
<
<
<
<
<
<
<
<
<
<
<







 







|
>
>
>
>
>
>
>







 







|







 







<
<
<
<
<







 







|
|
|
>
>
>
>
>
>
>
>
>

>

<







 







>







 







|







 







>
|





|
>
>


<

<
|
|
|
|
<

<
<



<






>
>







 







<

>
|
<
<


>
>
|
>










|






|












<








|
>
>
>
>
>
>
>
>







 







>







 







|







>
>
>
>
>


>







 







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







 







|
>
>
>
>
>
>
>
>
>







 







>

>



>
|

<
>
>
>
>
>
>
>


|







 







|
>
>
>

<


<

<








|
|
|
<







 







|
>
>
>
>
>
|
<
<
<
>
>
>
>
>
>
>
>
>
>






>







374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
...
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
...
529
530
531
532
533
534
535


536
537
538
539
540
541
542
...
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
....
1523
1524
1525
1526
1527
1528
1529














1530
1531
1532
1533
1534
1535
1536
....
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
....
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
....
1782
1783
1784
1785
1786
1787
1788





1789
1790
1791
1792
1793
1794
1795
....
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844

1845
1846
1847
1848
1849
1850
1851
....
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
....
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
....
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026

2027

2028
2029
2030
2031

2032


2033
2034
2035

2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
....
2053
2054
2055
2056
2057
2058
2059

2060
2061
2062


2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098

2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
....
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
....
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
....
3420
3421
3422
3423
3424
3425
3426














































3427
3428
3429
3430
3431
3432
3433
....
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
....
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932

4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
....
5410
5411
5412
5413
5414
5415
5416
5417
5418
5419
5420
5421

5422
5423

5424

5425
5426
5427
5428
5429
5430
5431
5432
5433
5434
5435

5436
5437
5438
5439
5440
5441
5442
....
5447
5448
5449
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460



5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482
5483
5484
};

/*
** An object of type Fts5SegWriter is used to write to segments.
*/
struct Fts5PageWriter {
  int pgno;                       /* Page number for this page */
  int iPrevPgidx;                 /* Previous value written into pgidx */
  Fts5Buffer buf;                 /* Buffer containing leaf data */
  Fts5Buffer pgidx;               /* Buffer containing page-index */
  Fts5Buffer term;                /* Buffer containing previous term on page */
};
struct Fts5DlidxWriter {
  int pgno;                       /* Page number for this page */
  int bPrevValid;                 /* True if iPrev is valid */
................................................................................
  int iLeafOffset;                /* Byte offset within current leaf */

  /* The page and offset from which the current term was read. The offset 
  ** is the offset of the first rowid in the current doclist.  */
  int iTermLeafPgno;
  int iTermLeafOffset;

  int iPgidxOff;                  /* Next offset in pgidx */
  int iEndofDoclist;

  /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */
  int iRowidOffset;               /* Current entry in aRowidOffset[] */
  int nRowidOffset;               /* Allocated size of aRowidOffset[] array */
  int *aRowidOffset;              /* Array of offset to rowid fields */

................................................................................
*/
#define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn)

#define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2]))

#define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p))



/*
** poslist:
**   Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered.
**   There is no way to tell if this is populated or not.
*/
struct Fts5IndexIter {
  Fts5Index *pIndex;              /* Index that owns this iterator */
................................................................................
){
  int nCmp = MIN(nLeft, nRight);
  int res = memcmp(pLeft, pRight, nCmp);
  return (res==0 ? (nLeft - nRight) : res);
}
#endif

static int fts5LeafFirstTermOff(Fts5Data *pLeaf){
  int ret;
  fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret);
  return ret;
}

/*
** Close the read-only blob handle, if it is open.
*/
static void fts5CloseReader(Fts5Index *p){
  if( p->pReader ){
    sqlite3_blob *pReader = p->pReader;
................................................................................
    }else{
      const u8 *a = &pIter->pLeaf->p[iOff];
      pIter->iLeafOffset += fts5GetPoslistSize(a, &pIter->nPos, &pIter->bDel);
    }
  }
}















static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
  u8 *a = pIter->pLeaf->p;        /* Buffer to read data from */
  int iOff = pIter->iLeafOffset;

  ASSERT_SZLEAF_OK(pIter->pLeaf);
  if( iOff>=pIter->pLeaf->szLeaf ){
    fts5SegIterNextPage(p, pIter);
................................................................................
  pIter->term.n = nKeep;
  fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
  iOff += nNew;
  pIter->iTermLeafOffset = iOff;
  pIter->iTermLeafPgno = pIter->iLeafPgno;
  pIter->iLeafOffset = iOff;

  if( pIter->iPgidxOff>=pIter->pLeaf->nn ){
    pIter->iEndofDoclist = pIter->pLeaf->nn+1;
  }else{
    int nExtra;
    pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra);
    pIter->iEndofDoclist += nExtra;
  }

  fts5SegIterLoadRowid(p, pIter);
}

/*
** Initialize the iterator object pIter to iterate through the entries in
** segment pSeg. The iterator is left pointing to the first entry when 
** this function returns.
................................................................................
    fts5SegIterNextPage(p, pIter);
  }

  if( p->rc==SQLITE_OK ){
    pIter->iLeafOffset = 4;
    assert_nc( pIter->pLeaf->nn>4 );
    assert( fts5LeafFirstTermOff(pIter->pLeaf)==4 );
    pIter->iPgidxOff = pIter->pLeaf->szLeaf+1;
    fts5SegIterLoadTerm(p, pIter, 0);
    fts5SegIterLoadNPos(p, pIter);
  }
}

/*
** This function is only ever called on iterators created by calls to
................................................................................
      iOff = pIter->iLeafOffset + pIter->nPos;

      if( iOff<n ){
        /* The next entry is on the current page. */
        assert_nc( iOff<=pIter->iEndofDoclist );
        if( iOff>=pIter->iEndofDoclist ){
          bNewTerm = 1;





          if( iOff!=fts5LeafFirstTermOff(pLeaf) ){
            iOff += fts5GetVarint32(&a[iOff], nKeep);
          }
        }else{
          u64 iDelta;
          iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta);
          pIter->iRowid += iDelta;
................................................................................
          fts5SegIterNextPage(p, pIter);
          pLeaf = pIter->pLeaf;
          if( pLeaf==0 ) break;
          ASSERT_SZLEAF_OK(pLeaf);
          if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){
            iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid);
            pIter->iLeafOffset = iOff;

            if( pLeaf->nn>pLeaf->szLeaf ){
              pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
                  &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist
              );
            }

          }
          else if( pLeaf->nn>pLeaf->szLeaf ){
            pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
                &pLeaf->p[pLeaf->szLeaf], iOff
            );
            pIter->iLeafOffset = iOff;
            pIter->iEndofDoclist = iOff;
            bNewTerm = 1;

          }
          if( iOff>=pLeaf->szLeaf ){
            p->rc = FTS5_CORRUPT;
            return;
          }
        }
      }
................................................................................
      /* Check if the iterator is now at EOF. If so, return early. */
      if( pIter->pLeaf ){
        if( bNewTerm ){
          if( pIter->flags & FTS5_SEGITER_ONETERM ){
            fts5DataRelease(pIter->pLeaf);
            pIter->pLeaf = 0;
          }else{
            int nExtra;
            fts5SegIterLoadTerm(p, pIter, nKeep);
            fts5SegIterLoadNPos(p, pIter);
            if( pbNewTerm ) *pbNewTerm = 1;
          }
        }else{
          fts5SegIterLoadNPos(p, pIter);
        }
................................................................................
    iOff = fts5LeafFirstRowidOff(pLast);
    iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid);
    pIter->iLeafOffset = iOff;

    if( fts5LeafIsTermless(pLast) ){
      pIter->iEndofDoclist = pLast->nn+1;
    }else{
      pIter->iEndofDoclist = fts5LeafFirstTermOff(pLast);
    }

  }

  fts5SegIterReverseInitPage(p, pIter);
}

................................................................................
  Fts5Index *p,                   /* Leave any error code here */
  int bGe,                        /* True for a >= search */
  Fts5SegIter *pIter,             /* Iterator to seek */
  const u8 *pTerm, int nTerm      /* Term to search for */
){
  int iOff;
  const u8 *a = pIter->pLeaf->p;
  int szLeaf = pIter->pLeaf->szLeaf;
  int n = pIter->pLeaf->nn;

  int nMatch = 0;
  int nKeep = 0;
  int nNew = 0;
  int iTerm = 0;
  int iTermOff;
  int iPgidx;                     /* Current offset in pgidx */
  int bEndOfPage = 0;

  assert( p->rc==SQLITE_OK );



  iPgidx = szLeaf;
  iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff);
  iOff = iTermOff;


  while( 1 ){



    /* Figure out how many new bytes are in this term */
    fts5IndexGetVarint32(a, iOff, nNew);

    if( nKeep<nMatch ){
      goto search_failed;
    }

    assert( nKeep>=nMatch );
    if( nKeep==nMatch ){
      int nCmp;
      int i;
      nCmp = MIN(nNew, nTerm-nMatch);
      for(i=0; i<nCmp; i++){
        if( a[iOff+i]!=pTerm[nMatch+i] ) break;
      }
      nMatch += i;

      if( nTerm==nMatch ){
................................................................................
        }else{
          goto search_failed;
        }
      }else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){
        goto search_failed;
      }
    }


    if( iPgidx>=n ){
      bEndOfPage = 1;


      break;
    }

    iPgidx += fts5GetVarint32(&a[iPgidx], nKeep);
    iTermOff += nKeep;
    iOff = iTermOff;

    /* Read the nKeep field of the next term. */
    fts5IndexGetVarint32(a, iOff, nKeep);
  }

 search_failed:
  if( bGe==0 ){
    fts5DataRelease(pIter->pLeaf);
    pIter->pLeaf = 0;
    return;
  }else if( bEndOfPage ){
    do {
      iTerm = 0;
      fts5SegIterNextPage(p, pIter);
      if( pIter->pLeaf==0 ) return;
      a = pIter->pLeaf->p;
      if( fts5LeafIsTermless(pIter->pLeaf)==0 ){
        fts5GetVarint32(&pIter->pLeaf->p[pIter->pLeaf->szLeaf], iOff);
        if( iOff<4 || iOff>=pIter->pLeaf->szLeaf ){
          p->rc = FTS5_CORRUPT;
        }else{
          nKeep = 0;
          iOff += fts5GetVarint32(&a[iOff], nNew);
          break;
        }
      }
    }while( 1 );
  }

 search_success:


  pIter->iLeafOffset = iOff + nNew;
  pIter->iTermLeafOffset = pIter->iLeafOffset;
  pIter->iTermLeafPgno = pIter->iLeafPgno;

  fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm);
  fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);

  if( iPgidx>=n ){
    pIter->iEndofDoclist = pIter->pLeaf->nn+1;
  }else{
    int nExtra;
    iPgidx += fts5GetVarint32(&a[iPgidx], nExtra);
    pIter->iEndofDoclist = iTermOff + nExtra;
  }
  pIter->iPgidxOff = iPgidx;

  fts5SegIterLoadRowid(p, pIter);
  fts5SegIterLoadNPos(p, pIter);
}

/*
** Initialize the object pIter to point to term pTerm/nTerm within segment
** pSeg. If there is no such term in the index, the iterator is set to EOF.
................................................................................
  iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, 0, pPage->pgno);
  fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n);

  /* Initialize the next page. */
  fts5BufferZero(&pPage->buf);
  fts5BufferZero(&pPage->pgidx);
  fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
  pPage->iPrevPgidx = 0;
  pPage->pgno++;

  /* Increase the leaves written counter */
  pWriter->nLeafWritten++;

  /* The new leaf holds no terms or rowids */
  pWriter->bFirstTermInPage = 1;
................................................................................
  Fts5Buffer *pPgidx = &pWriter->writer.pgidx;

  if( p->rc ) return;
  assert( pPage->buf.n>=4 );
  assert( pPage->buf.n>4 || pWriter->bFirstTermInPage );

  /* If the current leaf page is full, flush it to disk. */
  if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){
    if( pPage->buf.n>4 ){
      fts5WriteFlushLeaf(p, pWriter);
    }
    fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING);
  }
  
  /* TODO1: Updating pgidx here. */
  pPgidx->n += sqlite3Fts5PutVarint(
      &pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx
  );
  pPage->iPrevPgidx = pPage->buf.n;
#if 0
  fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n);
  pPgidx->n += 2;
#endif

  if( pWriter->bFirstTermInPage ){
    nPrefix = 0;
    if( pPage->pgno!=1 ){
      /* This is the first term on a leaf that is not the leftmost leaf in
      ** the segment b-tree. In this case it is necessary to add a term to
      ** the b-tree hierarchy that is (a) larger than the largest term 
................................................................................
    /* Bind the current output segment id to the index-writer. This is an
    ** optimization over binding the same value over and over as rows are
    ** inserted into %_idx by the current writer.  */
    sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid);
  }
}















































/*
** Iterator pIter was used to iterate through the input segments of on an
** incremental merge operation. This function is called if the incremental
** merge step has finished but the input has not been completely exhausted.
*/
static void fts5TrimSegments(Fts5Index *p, Fts5IndexIter *pIter){
  int i;
................................................................................
        fts5BufferAppendBlob(&p->rc, &buf, pData->szLeaf-iOff, &pData->p[iOff]);
        if( p->rc==SQLITE_OK ){
          /* Set the szLeaf field */
          fts5PutU16(&buf.p[2], buf.n);
        }

        /* Set up the new page-index array */
        fts5BufferAppendVarint(&p->rc, &buf, 4);
        if( pSeg->iLeafPgno==pSeg->iTermLeafPgno 
         && pSeg->iEndofDoclist<pData->szLeaf 
        ){
          int nDiff = pData->szLeaf - pSeg->iEndofDoclist;
          fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4);
          fts5BufferAppendBlob(&p->rc, &buf, 
              pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff]
          );
        }

        fts5DataRelease(pData);
        pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno;
        fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 0, 1), iLeafRowid);
        fts5DataWrite(p, iLeafRowid, buf.p, buf.n);
      }
    }
................................................................................
    fts5DataRelease(pLeaf);
    if( p->rc ) break;
  }
}

static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){
  int nPg = (pLeaf->nn - pLeaf->szLeaf) / 2;
  int iTermOff = 0;
  int ii;

  Fts5Buffer buf1 = {0,0,0};
  Fts5Buffer buf2 = {0,0,0};

  ii = pLeaf->szLeaf;
  while( ii<pLeaf->nn && p->rc==SQLITE_OK ){
    int res;

    int iOff;
    int nIncr;

    ii += fts5GetVarint32(&pLeaf->p[ii], nIncr);
    iTermOff += nIncr;
    iOff = iTermOff;

    if( iOff>=pLeaf->szLeaf ){
      p->rc = FTS5_CORRUPT;
    }else if( iTermOff==nIncr ){
      int nByte;
      iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
      if( (iOff+nByte)>pLeaf->szLeaf ){
        p->rc = FTS5_CORRUPT;
      }else{
        fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
      }
................................................................................
  }else if( iSegid==0 ){
    if( iRowid==FTS5_AVERAGES_ROWID ){
      /* todo */
    }else{
      fts5DecodeStructure(&rc, &s, a, n);
    }
  }else{
    Fts5Buffer term;              /* Current term read from page */
    int szLeaf;                   /* Offset of pgidx in a[] */
    int iPgidxOff;
    int iPgidxPrev = 0;           /* Previous value read from pgidx */
    int iTermOff = 0;

    int iRowidOff = 0;
    int iOff;

    int nDoclist;


    memset(&term, 0, sizeof(Fts5Buffer));

    if( n<4 ){
      sqlite3Fts5BufferSet(&rc, &s, 8, (const u8*)"corrupt");
      goto decode_out;
    }else{
      iRowidOff = fts5GetU16(&a[0]);
      iPgidxOff = szLeaf = fts5GetU16(&a[2]);
      if( iPgidxOff<n ){
        fts5GetVarint32(&a[iPgidxOff], iTermOff);

      }
    }

    /* Decode the position list tail at the start of the page */
    if( iRowidOff!=0 ){
      iOff = iRowidOff;
    }else if( iTermOff!=0 ){
................................................................................
    fts5DecodePoslist(&rc, &s, &a[4], iOff-4);

    /* Decode any more doclist data that appears on the page before the
    ** first term. */
    nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff;
    fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist);

    while( iPgidxOff<n ){
      int bFirst = (iPgidxOff==szLeaf);     /* True for first term on page */
      int nByte;                            /* Bytes of data */
      int iEnd;
      
      iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nByte);
      iPgidxPrev += nByte;



      iOff = iPgidxPrev;

      if( iPgidxOff<n ){
        fts5GetVarint32(&a[iPgidxOff], nByte);
        iEnd = iPgidxPrev + nByte;
      }else{
        iEnd = szLeaf;
      }

      if( bFirst==0 ){
        iOff += fts5GetVarint32(&a[iOff], nByte);
        term.n = nByte;
      }
      iOff += fts5GetVarint32(&a[iOff], nByte);
      fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]);
      iOff += nByte;

      sqlite3Fts5BufferAppendPrintf(
          &rc, &s, " term=%.*s", term.n, (const char*)term.p
      );
      iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff);
    }

    fts5BufferFree(&term);

Changes to ext/fts5/test/fts5simple.test.

62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
...
110
111
112
113
114
115
116
117
























































118
119
  SELECT * FROM t1 WHERE t1 MATCH 'o*';
} {one}

do_execsql_test 3.1 {
  INSERT INTO t1(t1) VALUES('integrity-check');
} {}

}

#-------------------------------------------------------------------------
reset_db
do_execsql_test 4.1 {
  CREATE VIRTUAL TABLE t11 USING fts5(content);
  INSERT INTO t11(t11, rank) VALUES('pgsz', 32);
  INSERT INTO t11 VALUES('another');
  INSERT INTO t11 VALUES('string');
................................................................................
} {1 2 3 4 5 6 7 8}

do_execsql_test 5.3 {
  SELECT rowid FROM yy WHERE yy MATCH 'y' ORDER BY rowid DESC
} {8 7 6 5 4 3 2 1}

#db eval { SELECT fts5_decode(rowid, block) as x FROM yy_data } { puts $x }

























































finish_test








<
<







 








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


62
63
64
65
66
67
68


69
70
71
72
73
74
75
...
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
  SELECT * FROM t1 WHERE t1 MATCH 'o*';
} {one}

do_execsql_test 3.1 {
  INSERT INTO t1(t1) VALUES('integrity-check');
} {}



#-------------------------------------------------------------------------
reset_db
do_execsql_test 4.1 {
  CREATE VIRTUAL TABLE t11 USING fts5(content);
  INSERT INTO t11(t11, rank) VALUES('pgsz', 32);
  INSERT INTO t11 VALUES('another');
  INSERT INTO t11 VALUES('string');
................................................................................
} {1 2 3 4 5 6 7 8}

do_execsql_test 5.3 {
  SELECT rowid FROM yy WHERE yy MATCH 'y' ORDER BY rowid DESC
} {8 7 6 5 4 3 2 1}

#db eval { SELECT fts5_decode(rowid, block) as x FROM yy_data } { puts $x }

#-------------------------------------------------------------------------
reset_db
do_execsql_test 5.1 {
  CREATE VIRTUAL TABLE tt USING fts5(content);
  INSERT INTO tt(tt, rank) VALUES('pgsz', 32);
  INSERT INTO tt VALUES('aa');
}

do_execsql_test 5.2 {
  SELECT rowid FROM tt WHERE tt MATCH 'a*';
} {1}

do_execsql_test 5.3 {
  DELETE FROM tt;
  BEGIN;
    INSERT INTO tt VALUES('aa');
    INSERT INTO tt VALUES('ab');
  COMMIT;
} {}

do_execsql_test 5.4 {
  SELECT rowid FROM tt WHERE tt MATCH 'a*';
} {1 2}

}

do_execsql_test 5.5 {
  DELETE FROM tt;
  BEGIN;
    INSERT INTO tt VALUES('aa');
    INSERT INTO tt VALUES('ab');
    INSERT INTO tt VALUES('aa');
    INSERT INTO tt VALUES('ab');
    INSERT INTO tt VALUES('aa');
    INSERT INTO tt VALUES('ab');
    INSERT INTO tt VALUES('aa');
    INSERT INTO tt VALUES('ab');
  COMMIT;
  SELECT rowid FROM tt WHERE tt MATCH 'a*';
} {1 2 3 4 5 6 7 8}

do_execsql_test 5.6 {
  INSERT INTO tt(tt) VALUES('integrity-check');
}

reset_db
do_execsql_test 5.7 {
  CREATE VIRTUAL TABLE tt USING fts5(content);
  INSERT INTO tt(tt, rank) VALUES('pgsz', 32);
  INSERT INTO tt VALUES('aa ab ac ad ae af');
}

do_execsql_test 5.8 {
  SELECT rowid FROM tt WHERE tt MATCH 'a*';
} {1}

finish_test

Changes to ext/fts5/tool/loadfts5.tcl.

14
15
16
17
18
19
20






21
22
23
24
25
26
27
..
37
38
39
40
41
42
43

44
45
46
47
48
49
50
51
52
53

54
55
56
57
58
59
60
..
73
74
75
76
77
78
79





80
81
82
83
84
85
86
...
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
...
123
124
125
126
127
128
129
130
131
132
133
  foreach f [glob -nocomplain -dir $dir *] {
    if {$::O(limit) && $::nRow>=$::O(limit)} break
    if {[file isdir $f]} {
      load_hierachy $f
    } else {
      db eval { INSERT INTO t1 VALUES($f, loadfile($f)) }
      incr ::nRow







      if {($::nRow % $::nRowPerDot)==0} {
        puts -nonewline .
        if {($::nRow % (65*$::nRowPerDot))==0} { puts "" }
        flush stdout
      }

................................................................................
  puts stderr "  -fts5        (use fts5)"
  puts stderr "  -porter      (use porter tokenizer)"
  puts stderr "  -delete      (delete the database file before starting)"
  puts stderr "  -limit N     (load no more than N documents)"
  puts stderr "  -automerge N (set the automerge parameter to N)"
  puts stderr "  -crisismerge N (set the crisismerge parameter to N)"
  puts stderr "  -prefix PREFIX (comma separated prefix= argument)"

  exit 1
}

set O(vtab)       fts5
set O(tok)        ""
set O(limit)      0
set O(delete)     0
set O(automerge)  -1
set O(crisismerge)  -1
set O(prefix)     ""


if {[llength $argv]<2} usage
set nOpt [expr {[llength $argv]-2}]
for {set i 0} {$i < $nOpt} {incr i} {
  set arg [lindex $argv $i]
  switch -- [lindex $argv $i] {
    -fts4 {
................................................................................
      set O(delete) 1
    }

    -limit {
      if { [incr i]>=$nOpt } usage
      set O(limit) [lindex $argv $i]
    }





    
    -automerge {
      if { [incr i]>=$nOpt } usage
      set O(automerge) [lindex $argv $i]
    }

    -crisismerge {
................................................................................
set dbfile [lindex $argv end-1]
if {$O(delete)} { file delete -force $dbfile }
sqlite3 db $dbfile
catch { load_static_extension db fts5 }
db func loadfile loadfile
db eval "PRAGMA page_size=4096"

db transaction {
  set pref ""
  if {$O(prefix)!=""} { set pref ", prefix='$O(prefix)'" }
  catch {
    db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok)$pref)"
    db eval "INSERT INTO t1(t1, rank) VALUES('pgsz', 4050);"
  }
  if {$O(automerge)>=0} {
................................................................................
  if {$O(crisismerge)>=0} {
    if {$O(vtab) == "fts5"} {
      db eval {INSERT INTO t1(t1, rank) VALUES('crisismerge', $O(crisismerge))}
    } else {
    }
  }
  load_hierachy [lindex $argv end]
}










>
>
>
>
>
>







 







>










>







 







>
>
>
>
>







 







|







 







|



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
..
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
..
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
...
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
...
136
137
138
139
140
141
142
143
144
145
146
  foreach f [glob -nocomplain -dir $dir *] {
    if {$::O(limit) && $::nRow>=$::O(limit)} break
    if {[file isdir $f]} {
      load_hierachy $f
    } else {
      db eval { INSERT INTO t1 VALUES($f, loadfile($f)) }
      incr ::nRow

      if {$::O(trans) && ($::nRow % $::O(trans))==0} {
        db eval { COMMIT }
        db eval { INSERT INTO t1(t1) VALUES('integrity-check') }
        db eval { BEGIN }
      }

      if {($::nRow % $::nRowPerDot)==0} {
        puts -nonewline .
        if {($::nRow % (65*$::nRowPerDot))==0} { puts "" }
        flush stdout
      }

................................................................................
  puts stderr "  -fts5        (use fts5)"
  puts stderr "  -porter      (use porter tokenizer)"
  puts stderr "  -delete      (delete the database file before starting)"
  puts stderr "  -limit N     (load no more than N documents)"
  puts stderr "  -automerge N (set the automerge parameter to N)"
  puts stderr "  -crisismerge N (set the crisismerge parameter to N)"
  puts stderr "  -prefix PREFIX (comma separated prefix= argument)"
  puts stderr "  -trans N     (commit after N inserts - 0 == never)"
  exit 1
}

set O(vtab)       fts5
set O(tok)        ""
set O(limit)      0
set O(delete)     0
set O(automerge)  -1
set O(crisismerge)  -1
set O(prefix)     ""
set O(trans)      0

if {[llength $argv]<2} usage
set nOpt [expr {[llength $argv]-2}]
for {set i 0} {$i < $nOpt} {incr i} {
  set arg [lindex $argv $i]
  switch -- [lindex $argv $i] {
    -fts4 {
................................................................................
      set O(delete) 1
    }

    -limit {
      if { [incr i]>=$nOpt } usage
      set O(limit) [lindex $argv $i]
    }

    -trans {
      if { [incr i]>=$nOpt } usage
      set O(trans) [lindex $argv $i]
    }
    
    -automerge {
      if { [incr i]>=$nOpt } usage
      set O(automerge) [lindex $argv $i]
    }

    -crisismerge {
................................................................................
set dbfile [lindex $argv end-1]
if {$O(delete)} { file delete -force $dbfile }
sqlite3 db $dbfile
catch { load_static_extension db fts5 }
db func loadfile loadfile
db eval "PRAGMA page_size=4096"

db eval BEGIN
  set pref ""
  if {$O(prefix)!=""} { set pref ", prefix='$O(prefix)'" }
  catch {
    db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok)$pref)"
    db eval "INSERT INTO t1(t1, rank) VALUES('pgsz', 4050);"
  }
  if {$O(automerge)>=0} {
................................................................................
  if {$O(crisismerge)>=0} {
    if {$O(vtab) == "fts5"} {
      db eval {INSERT INTO t1(t1, rank) VALUES('crisismerge', $O(crisismerge))}
    } else {
    }
  }
  load_hierachy [lindex $argv end]
db eval COMMIT