SQLite

Check-in [fab245bea4]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Change the array of 16-bit offsets at the end of each page to an array of varints.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | fts5-incompatible
Files: files | file ages | folders
SHA1: fab245bea4f283714c17bca22428d5eb4db5935a
User & Date: dan 2015-09-10 05:40:17.756
Context
2015-09-10
10:01
Fix an fts5 problem that could occur if a term and the first associated rowid are on different leaf pages. (check-in: ffe2796ac9 user: dan tags: fts5-incompatible)
05:40
Change the array of 16-bit offsets at the end of each page to an array of varints. (check-in: fab245bea4 user: dan tags: fts5-incompatible)
2015-09-09
08:15
Fix a bug in preprocessor macros within fts5_main.c. (check-in: 0eb2b9521f user: dan tags: fts5-incompatible)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts5/fts5_index.c.
374
375
376
377
378
379
380

381
382
383
384
385
386
387
};

/*
** An object of type Fts5SegWriter is used to write to segments.
*/
struct Fts5PageWriter {
  int pgno;                       /* Page number for this page */

  Fts5Buffer buf;                 /* Buffer containing leaf data */
  Fts5Buffer pgidx;               /* Buffer containing page-index */
  Fts5Buffer term;                /* Buffer containing previous term on page */
};
struct Fts5DlidxWriter {
  int pgno;                       /* Page number for this page */
  int bPrevValid;                 /* True if iPrev is valid */







>







374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
};

/*
** An object of type Fts5SegWriter is used to write to segments.
*/
struct Fts5PageWriter {
  int pgno;                       /* Page number for this page */
  int iPrevPgidx;                 /* Previous value written into pgidx */
  Fts5Buffer buf;                 /* Buffer containing leaf data */
  Fts5Buffer pgidx;               /* Buffer containing page-index */
  Fts5Buffer term;                /* Buffer containing previous term on page */
};
struct Fts5DlidxWriter {
  int pgno;                       /* Page number for this page */
  int bPrevValid;                 /* True if iPrev is valid */
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
  int iLeafOffset;                /* Byte offset within current leaf */

  /* The page and offset from which the current term was read. The offset 
  ** is the offset of the first rowid in the current doclist.  */
  int iTermLeafPgno;
  int iTermLeafOffset;

  int iTermIdx;
  int iEndofDoclist;

  /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */
  int iRowidOffset;               /* Current entry in aRowidOffset[] */
  int nRowidOffset;               /* Allocated size of aRowidOffset[] array */
  int *aRowidOffset;              /* Array of offset to rowid fields */








|







489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
  int iLeafOffset;                /* Byte offset within current leaf */

  /* The page and offset from which the current term was read. The offset 
  ** is the offset of the first rowid in the current doclist.  */
  int iTermLeafPgno;
  int iTermLeafOffset;

  int iPgidxOff;                  /* Next offset in pgidx */
  int iEndofDoclist;

  /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */
  int iRowidOffset;               /* Current entry in aRowidOffset[] */
  int nRowidOffset;               /* Allocated size of aRowidOffset[] array */
  int *aRowidOffset;              /* Array of offset to rowid fields */

528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
*/
#define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn)

#define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2]))

#define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p))

#define fts5LeafFirstTermOff(x) fts5LeafTermOff(x, 0)

/*
** poslist:
**   Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered.
**   There is no way to tell if this is populated or not.
*/
struct Fts5IndexIter {
  Fts5Index *pIndex;              /* Index that owns this iterator */







<
<







529
530
531
532
533
534
535


536
537
538
539
540
541
542
*/
#define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn)

#define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2]))

#define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p))



/*
** poslist:
**   Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered.
**   There is no way to tell if this is populated or not.
*/
struct Fts5IndexIter {
  Fts5Index *pIndex;              /* Index that owns this iterator */
644
645
646
647
648
649
650





651
652
653
654
655
656
657
){
  int nCmp = MIN(nLeft, nRight);
  int res = memcmp(pLeft, pRight, nCmp);
  return (res==0 ? (nLeft - nRight) : res);
}
#endif







/*
** Close the read-only blob handle, if it is open.
*/
static void fts5CloseReader(Fts5Index *p){
  if( p->pReader ){
    sqlite3_blob *pReader = p->pReader;







>
>
>
>
>







643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
){
  int nCmp = MIN(nLeft, nRight);
  int res = memcmp(pLeft, pRight, nCmp);
  return (res==0 ? (nLeft - nRight) : res);
}
#endif

static int fts5LeafFirstTermOff(Fts5Data *pLeaf){
  int ret;
  fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret);
  return ret;
}

/*
** Close the read-only blob handle, if it is open.
*/
static void fts5CloseReader(Fts5Index *p){
  if( p->pReader ){
    sqlite3_blob *pReader = p->pReader;
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
    }else{
      const u8 *a = &pIter->pLeaf->p[iOff];
      pIter->iLeafOffset += fts5GetPoslistSize(a, &pIter->nPos, &pIter->bDel);
    }
  }
}

static void fts5SegIterLoadEod(Fts5Index *p, Fts5SegIter *pIter){
  Fts5Data *pLeaf = pIter->pLeaf;
  int nPg = (pLeaf->nn - pLeaf->szLeaf) / 2;

  assert( pIter->iLeafPgno==pIter->iTermLeafPgno );
  if( (pIter->iTermIdx+1)<nPg ){
    int iRead = pLeaf->szLeaf + (pIter->iTermIdx + 1) * 2;
    pIter->iEndofDoclist = fts5GetU16(&pLeaf->p[iRead]);
  }else{
    pIter->iEndofDoclist = pLeaf->nn+1;
  }

}

static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
  u8 *a = pIter->pLeaf->p;        /* Buffer to read data from */
  int iOff = pIter->iLeafOffset;

  ASSERT_SZLEAF_OK(pIter->pLeaf);
  if( iOff>=pIter->pLeaf->szLeaf ){
    fts5SegIterNextPage(p, pIter);







<
<
<
<
<
<
<
<
<
<
<
<
<
<







1523
1524
1525
1526
1527
1528
1529














1530
1531
1532
1533
1534
1535
1536
    }else{
      const u8 *a = &pIter->pLeaf->p[iOff];
      pIter->iLeafOffset += fts5GetPoslistSize(a, &pIter->nPos, &pIter->bDel);
    }
  }
}















static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
  u8 *a = pIter->pLeaf->p;        /* Buffer to read data from */
  int iOff = pIter->iLeafOffset;

  ASSERT_SZLEAF_OK(pIter->pLeaf);
  if( iOff>=pIter->pLeaf->szLeaf ){
    fts5SegIterNextPage(p, pIter);
1579
1580
1581
1582
1583
1584
1585







1586
1587
1588
1589
1590
1591
1592
1593
  pIter->term.n = nKeep;
  fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
  iOff += nNew;
  pIter->iTermLeafOffset = iOff;
  pIter->iTermLeafPgno = pIter->iLeafPgno;
  pIter->iLeafOffset = iOff;








  fts5SegIterLoadEod(p, pIter);
  fts5SegIterLoadRowid(p, pIter);
}

/*
** Initialize the iterator object pIter to iterate through the entries in
** segment pSeg. The iterator is left pointing to the first entry when 
** this function returns.







>
>
>
>
>
>
>
|







1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
  pIter->term.n = nKeep;
  fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
  iOff += nNew;
  pIter->iTermLeafOffset = iOff;
  pIter->iTermLeafPgno = pIter->iLeafPgno;
  pIter->iLeafOffset = iOff;

  if( pIter->iPgidxOff>=pIter->pLeaf->nn ){
    pIter->iEndofDoclist = pIter->pLeaf->nn+1;
  }else{
    int nExtra;
    pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra);
    pIter->iEndofDoclist += nExtra;
  }

  fts5SegIterLoadRowid(p, pIter);
}

/*
** Initialize the iterator object pIter to iterate through the entries in
** segment pSeg. The iterator is left pointing to the first entry when 
** this function returns.
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
    fts5SegIterNextPage(p, pIter);
  }

  if( p->rc==SQLITE_OK ){
    pIter->iLeafOffset = 4;
    assert_nc( pIter->pLeaf->nn>4 );
    assert( fts5LeafFirstTermOff(pIter->pLeaf)==4 );
    pIter->iTermIdx = 0;
    fts5SegIterLoadTerm(p, pIter, 0);
    fts5SegIterLoadNPos(p, pIter);
  }
}

/*
** This function is only ever called on iterators created by calls to







|







1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
    fts5SegIterNextPage(p, pIter);
  }

  if( p->rc==SQLITE_OK ){
    pIter->iLeafOffset = 4;
    assert_nc( pIter->pLeaf->nn>4 );
    assert( fts5LeafFirstTermOff(pIter->pLeaf)==4 );
    pIter->iPgidxOff = pIter->pLeaf->szLeaf+1;
    fts5SegIterLoadTerm(p, pIter, 0);
    fts5SegIterLoadNPos(p, pIter);
  }
}

/*
** This function is only ever called on iterators created by calls to
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
      iOff = pIter->iLeafOffset + pIter->nPos;

      if( iOff<n ){
        /* The next entry is on the current page. */
        assert_nc( iOff<=pIter->iEndofDoclist );
        if( iOff>=pIter->iEndofDoclist ){
          bNewTerm = 1;
          if( pIter->iTermLeafPgno==pIter->iLeafPgno ){
            pIter->iTermIdx++;
          }else{
            pIter->iTermIdx = 0;
          }
          if( iOff!=fts5LeafFirstTermOff(pLeaf) ){
            iOff += fts5GetVarint32(&a[iOff], nKeep);
          }
        }else{
          u64 iDelta;
          iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta);
          pIter->iRowid += iDelta;







<
<
<
<
<







1782
1783
1784
1785
1786
1787
1788





1789
1790
1791
1792
1793
1794
1795
      iOff = pIter->iLeafOffset + pIter->nPos;

      if( iOff<n ){
        /* The next entry is on the current page. */
        assert_nc( iOff<=pIter->iEndofDoclist );
        if( iOff>=pIter->iEndofDoclist ){
          bNewTerm = 1;





          if( iOff!=fts5LeafFirstTermOff(pLeaf) ){
            iOff += fts5GetVarint32(&a[iOff], nKeep);
          }
        }else{
          u64 iDelta;
          iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta);
          pIter->iRowid += iDelta;
1831
1832
1833
1834
1835
1836
1837
1838
1839




1840





1841

1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858

1859
1860
1861
1862
1863
1864
1865
          fts5SegIterNextPage(p, pIter);
          pLeaf = pIter->pLeaf;
          if( pLeaf==0 ) break;
          ASSERT_SZLEAF_OK(pLeaf);
          if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){
            iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid);
            pIter->iLeafOffset = iOff;
          }
          else if( pLeaf->nn>pLeaf->szLeaf ){




            iOff = fts5LeafFirstTermOff(pLeaf);





            pIter->iLeafOffset = iOff;

            bNewTerm = 1;
            pIter->iTermIdx = 0;
          }
          if( iOff>=pLeaf->szLeaf ){
            p->rc = FTS5_CORRUPT;
            return;
          }
        }
      }

      /* Check if the iterator is now at EOF. If so, return early. */
      if( pIter->pLeaf ){
        if( bNewTerm ){
          if( pIter->flags & FTS5_SEGITER_ONETERM ){
            fts5DataRelease(pIter->pLeaf);
            pIter->pLeaf = 0;
          }else{

            fts5SegIterLoadTerm(p, pIter, nKeep);
            fts5SegIterLoadNPos(p, pIter);
            if( pbNewTerm ) *pbNewTerm = 1;
          }
        }else{
          fts5SegIterLoadNPos(p, pIter);
        }







|
|
>
>
>
>
|
>
>
>
>
>

>

<















>







1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844

1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
          fts5SegIterNextPage(p, pIter);
          pLeaf = pIter->pLeaf;
          if( pLeaf==0 ) break;
          ASSERT_SZLEAF_OK(pLeaf);
          if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){
            iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid);
            pIter->iLeafOffset = iOff;

            if( pLeaf->nn>pLeaf->szLeaf ){
              pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
                  &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist
              );
            }

          }
          else if( pLeaf->nn>pLeaf->szLeaf ){
            pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
                &pLeaf->p[pLeaf->szLeaf], iOff
            );
            pIter->iLeafOffset = iOff;
            pIter->iEndofDoclist = iOff;
            bNewTerm = 1;

          }
          if( iOff>=pLeaf->szLeaf ){
            p->rc = FTS5_CORRUPT;
            return;
          }
        }
      }

      /* Check if the iterator is now at EOF. If so, return early. */
      if( pIter->pLeaf ){
        if( bNewTerm ){
          if( pIter->flags & FTS5_SEGITER_ONETERM ){
            fts5DataRelease(pIter->pLeaf);
            pIter->pLeaf = 0;
          }else{
            int nExtra;
            fts5SegIterLoadTerm(p, pIter, nKeep);
            fts5SegIterLoadNPos(p, pIter);
            if( pbNewTerm ) *pbNewTerm = 1;
          }
        }else{
          fts5SegIterLoadNPos(p, pIter);
        }
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
    iOff = fts5LeafFirstRowidOff(pLast);
    iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid);
    pIter->iLeafOffset = iOff;

    if( fts5LeafIsTermless(pLast) ){
      pIter->iEndofDoclist = pLast->nn+1;
    }else{
      pIter->iEndofDoclist = fts5LeafTermOff(pLast, 0);
    }

  }

  fts5SegIterReverseInitPage(p, pIter);
}








|







1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
    iOff = fts5LeafFirstRowidOff(pLast);
    iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid);
    pIter->iLeafOffset = iOff;

    if( fts5LeafIsTermless(pLast) ){
      pIter->iEndofDoclist = pLast->nn+1;
    }else{
      pIter->iEndofDoclist = fts5LeafFirstTermOff(pLast);
    }

  }

  fts5SegIterReverseInitPage(p, pIter);
}

2006
2007
2008
2009
2010
2011
2012

2013
2014
2015
2016
2017
2018
2019


2020
2021
2022
2023

2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042


2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066



2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105








2106
2107
2108
2109
2110
2111
2112
2113
  Fts5Index *p,                   /* Leave any error code here */
  int bGe,                        /* True for a >= search */
  Fts5SegIter *pIter,             /* Iterator to seek */
  const u8 *pTerm, int nTerm      /* Term to search for */
){
  int iOff;
  const u8 *a = pIter->pLeaf->p;

  int n = pIter->pLeaf->szLeaf;

  int nMatch = 0;
  int nKeep = 0;
  int nNew = 0;
  int iTerm = 0;
  int nPgTerm = (pIter->pLeaf->nn - pIter->pLeaf->szLeaf) >> 1;



  assert( p->rc==SQLITE_OK );
  assert( pIter->pLeaf );


  iOff = fts5LeafFirstTermOff(pIter->pLeaf);
  if( iOff<4 || iOff>=n ){
    p->rc = FTS5_CORRUPT;
    return;
  }

  while( 1 ){
    int i;
    int nCmp;

    /* Figure out how many new bytes are in this term */
    fts5IndexGetVarint32(a, iOff, nNew);

    if( nKeep<nMatch ){
      goto search_failed;
    }

    assert( nKeep>=nMatch );
    if( nKeep==nMatch ){


      nCmp = MIN(nNew, nTerm-nMatch);
      for(i=0; i<nCmp; i++){
        if( a[iOff+i]!=pTerm[nMatch+i] ) break;
      }
      nMatch += i;

      if( nTerm==nMatch ){
        if( i==nNew ){
          goto search_success;
        }else{
          goto search_failed;
        }
      }else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){
        goto search_failed;
      }
    }
    iOff += nNew;

    iTerm++;
    if( iTerm>=nPgTerm ){
      iOff = n;
      break;
    }
    iOff = fts5GetU16(&a[n + iTerm*2]);




    /* Read the nKeep field of the next term. */
    fts5IndexGetVarint32(a, iOff, nKeep);
  }

 search_failed:
  if( bGe==0 ){
    fts5DataRelease(pIter->pLeaf);
    pIter->pLeaf = 0;
    return;
  }else if( iOff>=n ){
    do {
      iTerm = 0;
      fts5SegIterNextPage(p, pIter);
      if( pIter->pLeaf==0 ) return;
      a = pIter->pLeaf->p;
      if( fts5LeafIsTermless(pIter->pLeaf)==0 ){
        iOff = fts5LeafFirstTermOff(pIter->pLeaf);
        if( iOff<4 || iOff>=pIter->pLeaf->szLeaf ){
          p->rc = FTS5_CORRUPT;
        }else{
          nKeep = 0;
          iOff += fts5GetVarint32(&a[iOff], nNew);
          break;
        }
      }
    }while( 1 );
  }

 search_success:
  pIter->iTermIdx = iTerm;

  pIter->iLeafOffset = iOff + nNew;
  pIter->iTermLeafOffset = pIter->iLeafOffset;
  pIter->iTermLeafPgno = pIter->iLeafPgno;

  fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm);
  fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);









  fts5SegIterLoadEod(p, pIter);
  fts5SegIterLoadRowid(p, pIter);
  fts5SegIterLoadNPos(p, pIter);
}

/*
** Initialize the object pIter to point to term pTerm/nTerm within segment
** pSeg. If there is no such term in the index, the iterator is set to EOF.







>
|





|
>
>


<

>
|
|
<
<
|
<

<
<



<






>
>
















<

<
|
|


|
>
>
>










|






|












<








>
>
>
>
>
>
>
>
|







2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026

2027
2028
2029
2030


2031

2032


2033
2034
2035

2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059

2060

2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098

2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
  Fts5Index *p,                   /* Leave any error code here */
  int bGe,                        /* True for a >= search */
  Fts5SegIter *pIter,             /* Iterator to seek */
  const u8 *pTerm, int nTerm      /* Term to search for */
){
  int iOff;
  const u8 *a = pIter->pLeaf->p;
  int szLeaf = pIter->pLeaf->szLeaf;
  int n = pIter->pLeaf->nn;

  int nMatch = 0;
  int nKeep = 0;
  int nNew = 0;
  int iTerm = 0;
  int iTermOff;
  int iPgidx;                     /* Current offset in pgidx */
  int bEndOfPage = 0;

  assert( p->rc==SQLITE_OK );


  iPgidx = szLeaf;
  iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff);
  iOff = iTermOff;




  while( 1 ){



    /* Figure out how many new bytes are in this term */
    fts5IndexGetVarint32(a, iOff, nNew);

    if( nKeep<nMatch ){
      goto search_failed;
    }

    assert( nKeep>=nMatch );
    if( nKeep==nMatch ){
      int nCmp;
      int i;
      nCmp = MIN(nNew, nTerm-nMatch);
      for(i=0; i<nCmp; i++){
        if( a[iOff+i]!=pTerm[nMatch+i] ) break;
      }
      nMatch += i;

      if( nTerm==nMatch ){
        if( i==nNew ){
          goto search_success;
        }else{
          goto search_failed;
        }
      }else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){
        goto search_failed;
      }
    }



    if( iPgidx>=n ){
      bEndOfPage = 1;
      break;
    }

    iPgidx += fts5GetVarint32(&a[iPgidx], nKeep);
    iTermOff += nKeep;
    iOff = iTermOff;

    /* Read the nKeep field of the next term. */
    fts5IndexGetVarint32(a, iOff, nKeep);
  }

 search_failed:
  if( bGe==0 ){
    fts5DataRelease(pIter->pLeaf);
    pIter->pLeaf = 0;
    return;
  }else if( bEndOfPage ){
    do {
      iTerm = 0;
      fts5SegIterNextPage(p, pIter);
      if( pIter->pLeaf==0 ) return;
      a = pIter->pLeaf->p;
      if( fts5LeafIsTermless(pIter->pLeaf)==0 ){
        fts5GetVarint32(&pIter->pLeaf->p[pIter->pLeaf->szLeaf], iOff);
        if( iOff<4 || iOff>=pIter->pLeaf->szLeaf ){
          p->rc = FTS5_CORRUPT;
        }else{
          nKeep = 0;
          iOff += fts5GetVarint32(&a[iOff], nNew);
          break;
        }
      }
    }while( 1 );
  }

 search_success:


  pIter->iLeafOffset = iOff + nNew;
  pIter->iTermLeafOffset = pIter->iLeafOffset;
  pIter->iTermLeafPgno = pIter->iLeafPgno;

  fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm);
  fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);

  if( iPgidx>=n ){
    pIter->iEndofDoclist = pIter->pLeaf->nn+1;
  }else{
    int nExtra;
    iPgidx += fts5GetVarint32(&a[iPgidx], nExtra);
    pIter->iEndofDoclist = iTermOff + nExtra;
  }
  pIter->iPgidxOff = iPgidx;

  fts5SegIterLoadRowid(p, pIter);
  fts5SegIterLoadNPos(p, pIter);
}

/*
** Initialize the object pIter to point to term pTerm/nTerm within segment
** pSeg. If there is no such term in the index, the iterator is set to EOF.
3169
3170
3171
3172
3173
3174
3175

3176
3177
3178
3179
3180
3181
3182
  iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, 0, pPage->pgno);
  fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n);

  /* Initialize the next page. */
  fts5BufferZero(&pPage->buf);
  fts5BufferZero(&pPage->pgidx);
  fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);

  pPage->pgno++;

  /* Increase the leaves written counter */
  pWriter->nLeafWritten++;

  /* The new leaf holds no terms or rowids */
  pWriter->bFirstTermInPage = 1;







>







3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
  iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, 0, pPage->pgno);
  fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n);

  /* Initialize the next page. */
  fts5BufferZero(&pPage->buf);
  fts5BufferZero(&pPage->pgidx);
  fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
  pPage->iPrevPgidx = 0;
  pPage->pgno++;

  /* Increase the leaves written counter */
  pWriter->nLeafWritten++;

  /* The new leaf holds no terms or rowids */
  pWriter->bFirstTermInPage = 1;
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214





3215
3216

3217
3218
3219
3220
3221
3222
3223
  Fts5Buffer *pPgidx = &pWriter->writer.pgidx;

  if( p->rc ) return;
  assert( pPage->buf.n>=4 );
  assert( pPage->buf.n>4 || pWriter->bFirstTermInPage );

  /* If the current leaf page is full, flush it to disk. */
  if( (pPage->buf.n + pPage->pgidx.n + nTerm + 2)>=p->pConfig->pgsz ){
    if( pPage->buf.n>4 ){
      fts5WriteFlushLeaf(p, pWriter);
    }
    fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING);
  }
  
  /* TODO1: Updating pgidx here. */





  fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n);
  pPgidx->n += 2;


  if( pWriter->bFirstTermInPage ){
    nPrefix = 0;
    if( pPage->pgno!=1 ){
      /* This is the first term on a leaf that is not the leftmost leaf in
      ** the segment b-tree. In this case it is necessary to add a term to
      ** the b-tree hierarchy that is (a) larger than the largest term 







|







>
>
>
>
>


>







3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
  Fts5Buffer *pPgidx = &pWriter->writer.pgidx;

  if( p->rc ) return;
  assert( pPage->buf.n>=4 );
  assert( pPage->buf.n>4 || pWriter->bFirstTermInPage );

  /* If the current leaf page is full, flush it to disk. */
  if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){
    if( pPage->buf.n>4 ){
      fts5WriteFlushLeaf(p, pWriter);
    }
    fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING);
  }
  
  /* TODO1: Updating pgidx here. */
  pPgidx->n += sqlite3Fts5PutVarint(
      &pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx
  );
  pPage->iPrevPgidx = pPage->buf.n;
#if 0
  fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n);
  pPgidx->n += 2;
#endif

  if( pWriter->bFirstTermInPage ){
    nPrefix = 0;
    if( pPage->pgno!=1 ){
      /* This is the first term on a leaf that is not the leftmost leaf in
      ** the segment b-tree. In this case it is necessary to add a term to
      ** the b-tree hierarchy that is (a) larger than the largest term 
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
    /* Bind the current output segment id to the index-writer. This is an
    ** optimization over binding the same value over and over as rows are
    ** inserted into %_idx by the current writer.  */
    sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid);
  }
}

/*
** The buffer passed as the second argument contains a leaf page that is
** missing its page-idx array. The first term is guaranteed to start at
** byte offset 4 of the buffer. The szLeaf field of the leaf page header
** is already populated.
**
** This function appends a page-index to the buffer. The buffer is 
** guaranteed to be large enough to fit the page-index.
*/
static void fts5MakePageidx(
  Fts5Index *p,                   /* Fts index object to store any error in */
  Fts5Data *pOld,                 /* Original page data */
  int iOldidx,                    /* Index of term in pOld pgidx */
  Fts5Buffer *pBuf                /* Buffer containing new page (no pgidx) */
){
  if( p->rc==SQLITE_OK ){
    int iOff;
    int iEnd;
    int nByte;
    int iEndOld;                  /* Byte after term iOldIdx on old page */
    int iEndNew;                  /* Byte after term iOldIdx on new page */
    int ii;
    int nPgIdx = (pOld->nn - pOld->szLeaf) / 2;

    /* Determine end of term on old page */
    iEndOld = fts5LeafTermOff(pOld, iOldidx);
    if( iOldidx>0 ){
      iEndOld += fts5GetVarint32(&pOld->p[iEndOld], nByte);
    }
    iEndOld += fts5GetVarint32(&pOld->p[iEndOld], nByte);
    iEndOld += nByte;

    /* Determine end of term on new page */
    iEndNew = 4 + fts5GetVarint32(&pBuf->p[4], nByte);
    iEndNew += nByte;

    fts5PutU16(&pBuf->p[pBuf->n], 4);
    pBuf->n += 2;
    for(ii=iOldidx+1; ii<nPgIdx; ii++){
      int iVal = fts5LeafTermOff(pOld, ii);
      fts5PutU16(&pBuf->p[pBuf->n], iVal + (iEndNew - iEndOld));
      pBuf->n += 2;
    }
  }
}

/*
** Iterator pIter was used to iterate through the input segments of on an
** incremental merge operation. This function is called if the incremental
** merge step has finished but the input has not been completely exhausted.
*/
static void fts5TrimSegments(Fts5Index *p, Fts5IndexIter *pIter){
  int i;







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







3420
3421
3422
3423
3424
3425
3426














































3427
3428
3429
3430
3431
3432
3433
    /* Bind the current output segment id to the index-writer. This is an
    ** optimization over binding the same value over and over as rows are
    ** inserted into %_idx by the current writer.  */
    sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid);
  }
}















































/*
** Iterator pIter was used to iterate through the input segments of on an
** incremental merge operation. This function is called if the incremental
** merge step has finished but the input has not been completely exhausted.
*/
static void fts5TrimSegments(Fts5Index *p, Fts5IndexIter *pIter){
  int i;
3491
3492
3493
3494
3495
3496
3497


3498







3499
3500
3501
3502
3503
3504
3505
        fts5BufferAppendBlob(&p->rc, &buf, pData->szLeaf-iOff, &pData->p[iOff]);
        if( p->rc==SQLITE_OK ){
          /* Set the szLeaf field */
          fts5PutU16(&buf.p[2], buf.n);
        }

        /* Set up the new page-index array */


        fts5MakePageidx(p, pData, pSeg->iTermIdx, &buf);








        fts5DataRelease(pData);
        pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno;
        fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 0, 1), iLeafRowid);
        fts5DataWrite(p, iLeafRowid, buf.p, buf.n);
      }
    }







>
>
|
>
>
>
>
>
>
>







3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
        fts5BufferAppendBlob(&p->rc, &buf, pData->szLeaf-iOff, &pData->p[iOff]);
        if( p->rc==SQLITE_OK ){
          /* Set the szLeaf field */
          fts5PutU16(&buf.p[2], buf.n);
        }

        /* Set up the new page-index array */
        fts5BufferAppendVarint(&p->rc, &buf, 4);
        if( pSeg->iLeafPgno==pSeg->iTermLeafPgno 
         && pSeg->iEndofDoclist<pData->szLeaf 
        ){
          int nDiff = pData->szLeaf - pSeg->iEndofDoclist;
          fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4);
          fts5BufferAppendBlob(&p->rc, &buf, 
              pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff]
          );
        }

        fts5DataRelease(pData);
        pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno;
        fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 0, 1), iLeafRowid);
        fts5DataWrite(p, iLeafRowid, buf.p, buf.n);
      }
    }
4938
4939
4940
4941
4942
4943
4944

4945

4946
4947
4948

4949
4950
4951






4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
    fts5DataRelease(pLeaf);
    if( p->rc ) break;
  }
}

static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){
  int nPg = (pLeaf->nn - pLeaf->szLeaf) / 2;

  int ii;

  Fts5Buffer buf1 = {0,0,0};
  Fts5Buffer buf2 = {0,0,0};


  for(ii=0; p->rc==SQLITE_OK && ii<nPg; ii++){
    int res;
    int iOff = fts5LeafTermOff(pLeaf, ii);






    if( iOff>=pLeaf->szLeaf ){
      p->rc = FTS5_CORRUPT;
    }else if( ii==0 ){
      int nByte;
      iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
      if( (iOff+nByte)>pLeaf->szLeaf ){
        p->rc = FTS5_CORRUPT;
      }else{
        fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
      }







>

>



>
|

|
>
>
>
>
>
>


|







4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
    fts5DataRelease(pLeaf);
    if( p->rc ) break;
  }
}

static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){
  int nPg = (pLeaf->nn - pLeaf->szLeaf) / 2;
  int iTermOff = 0;
  int ii;

  Fts5Buffer buf1 = {0,0,0};
  Fts5Buffer buf2 = {0,0,0};

  ii = pLeaf->szLeaf;
  while( ii<pLeaf->nn && p->rc==SQLITE_OK ){
    int res;
    int iOff;
    int nIncr;

    ii += fts5GetVarint32(&pLeaf->p[ii], nIncr);
    iTermOff += nIncr;
    iOff = iTermOff;

    if( iOff>=pLeaf->szLeaf ){
      p->rc = FTS5_CORRUPT;
    }else if( iTermOff==nIncr ){
      int nByte;
      iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
      if( (iOff+nByte)>pLeaf->szLeaf ){
        p->rc = FTS5_CORRUPT;
      }else{
        fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
      }
5422
5423
5424
5425
5426
5427
5428
5429



5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464
5465
5466




5467

5468

5469






5470
5471
5472
5473
5474
5475
5476
5477

5478
5479
5480
5481
5482
5483
5484
  }else if( iSegid==0 ){
    if( iRowid==FTS5_AVERAGES_ROWID ){
      /* todo */
    }else{
      fts5DecodeStructure(&rc, &s, a, n);
    }
  }else{
    Fts5Buffer term;



    int iTermOff = 0;
    int szLeaf = 0;
    int iRowidOff = 0;
    int iOff;
    int nPgTerm = 0;
    int nDoclist;
    int i;

    memset(&term, 0, sizeof(Fts5Buffer));

    if( n<4 ){
      sqlite3Fts5BufferSet(&rc, &s, 8, (const u8*)"corrupt");
      goto decode_out;
    }else{
      iRowidOff = fts5GetU16(&a[0]);
      szLeaf = fts5GetU16(&a[2]);
      nPgTerm = (n - szLeaf) / 2;
      if( nPgTerm ){
        iTermOff = fts5GetU16(&a[szLeaf]);
      }
    }

    /* Decode the position list tail at the start of the page */
    if( iRowidOff!=0 ){
      iOff = iRowidOff;
    }else if( iTermOff!=0 ){
      iOff = iTermOff;
    }else{
      iOff = szLeaf;
    }
    fts5DecodePoslist(&rc, &s, &a[4], iOff-4);

    /* Decode any more doclist data that appears on the page before the
    ** first term. */
    nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff;
    fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist);





    for(i=0; i<nPgTerm; i++){

      int nByte;

      int iEnd = (i+1)<nPgTerm ? fts5GetU16(&a[szLeaf+i*2+2]) : szLeaf;






      iOff = fts5GetU16(&a[szLeaf + i*2]);
      if( i>0 ){
        iOff += fts5GetVarint32(&a[iOff], nByte);
        term.n = nByte;
      }
      iOff += fts5GetVarint32(&a[iOff], nByte);
      fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]);
      iOff += nByte;

      sqlite3Fts5BufferAppendPrintf(
          &rc, &s, " term=%.*s", term.n, (const char*)term.p
      );
      iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff);
    }

    fts5BufferFree(&term);







|
>
>
>

<


<

<








|
<
|
|


















>
>
>
>
|
>
|
>
|
>
>
>
>
>
>
|
|






>







5410
5411
5412
5413
5414
5415
5416
5417
5418
5419
5420
5421

5422
5423

5424

5425
5426
5427
5428
5429
5430
5431
5432
5433

5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482
5483
5484
  }else if( iSegid==0 ){
    if( iRowid==FTS5_AVERAGES_ROWID ){
      /* todo */
    }else{
      fts5DecodeStructure(&rc, &s, a, n);
    }
  }else{
    Fts5Buffer term;              /* Current term read from page */
    int szLeaf;                   /* Offset of pgidx in a[] */
    int iPgidxOff;
    int iPgidxPrev = 0;           /* Previous value read from pgidx */
    int iTermOff = 0;

    int iRowidOff = 0;
    int iOff;

    int nDoclist;


    memset(&term, 0, sizeof(Fts5Buffer));

    if( n<4 ){
      sqlite3Fts5BufferSet(&rc, &s, 8, (const u8*)"corrupt");
      goto decode_out;
    }else{
      iRowidOff = fts5GetU16(&a[0]);
      iPgidxOff = szLeaf = fts5GetU16(&a[2]);

      if( iPgidxOff<n ){
        fts5GetVarint32(&a[iPgidxOff], iTermOff);
      }
    }

    /* Decode the position list tail at the start of the page */
    if( iRowidOff!=0 ){
      iOff = iRowidOff;
    }else if( iTermOff!=0 ){
      iOff = iTermOff;
    }else{
      iOff = szLeaf;
    }
    fts5DecodePoslist(&rc, &s, &a[4], iOff-4);

    /* Decode any more doclist data that appears on the page before the
    ** first term. */
    nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff;
    fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist);

    while( iPgidxOff<n ){
      int bFirst = (iPgidxOff==szLeaf);     /* True for first term on page */
      int nByte;                            /* Bytes of data */
      int iEnd;
      
      iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nByte);
      iPgidxPrev += nByte;
      iOff = iPgidxPrev;

      if( iPgidxOff<n ){
        fts5GetVarint32(&a[iPgidxOff], nByte);
        iEnd = iPgidxPrev + nByte;
      }else{
        iEnd = szLeaf;
      }

      if( bFirst==0 ){
        iOff += fts5GetVarint32(&a[iOff], nByte);
        term.n = nByte;
      }
      iOff += fts5GetVarint32(&a[iOff], nByte);
      fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]);
      iOff += nByte;

      sqlite3Fts5BufferAppendPrintf(
          &rc, &s, " term=%.*s", term.n, (const char*)term.p
      );
      iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff);
    }

    fts5BufferFree(&term);
Changes to ext/fts5/test/fts5simple.test.
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
  SELECT * FROM t1 WHERE t1 MATCH 'o*';
} {one}

do_execsql_test 3.1 {
  INSERT INTO t1(t1) VALUES('integrity-check');
} {}

}

#-------------------------------------------------------------------------
reset_db
do_execsql_test 4.1 {
  CREATE VIRTUAL TABLE t11 USING fts5(content);
  INSERT INTO t11(t11, rank) VALUES('pgsz', 32);
  INSERT INTO t11 VALUES('another');
  INSERT INTO t11 VALUES('string');







<
<







62
63
64
65
66
67
68


69
70
71
72
73
74
75
  SELECT * FROM t1 WHERE t1 MATCH 'o*';
} {one}

do_execsql_test 3.1 {
  INSERT INTO t1(t1) VALUES('integrity-check');
} {}



#-------------------------------------------------------------------------
reset_db
do_execsql_test 4.1 {
  CREATE VIRTUAL TABLE t11 USING fts5(content);
  INSERT INTO t11(t11, rank) VALUES('pgsz', 32);
  INSERT INTO t11 VALUES('another');
  INSERT INTO t11 VALUES('string');
110
111
112
113
114
115
116
117
























































118
119
} {1 2 3 4 5 6 7 8}

do_execsql_test 5.3 {
  SELECT rowid FROM yy WHERE yy MATCH 'y' ORDER BY rowid DESC
} {8 7 6 5 4 3 2 1}

#db eval { SELECT fts5_decode(rowid, block) as x FROM yy_data } { puts $x }

























































finish_test









>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
} {1 2 3 4 5 6 7 8}

do_execsql_test 5.3 {
  SELECT rowid FROM yy WHERE yy MATCH 'y' ORDER BY rowid DESC
} {8 7 6 5 4 3 2 1}

#db eval { SELECT fts5_decode(rowid, block) as x FROM yy_data } { puts $x }

#-------------------------------------------------------------------------
reset_db
do_execsql_test 5.1 {
  CREATE VIRTUAL TABLE tt USING fts5(content);
  INSERT INTO tt(tt, rank) VALUES('pgsz', 32);
  INSERT INTO tt VALUES('aa');
}

do_execsql_test 5.2 {
  SELECT rowid FROM tt WHERE tt MATCH 'a*';
} {1}

do_execsql_test 5.3 {
  DELETE FROM tt;
  BEGIN;
    INSERT INTO tt VALUES('aa');
    INSERT INTO tt VALUES('ab');
  COMMIT;
} {}

do_execsql_test 5.4 {
  SELECT rowid FROM tt WHERE tt MATCH 'a*';
} {1 2}

}

do_execsql_test 5.5 {
  DELETE FROM tt;
  BEGIN;
    INSERT INTO tt VALUES('aa');
    INSERT INTO tt VALUES('ab');
    INSERT INTO tt VALUES('aa');
    INSERT INTO tt VALUES('ab');
    INSERT INTO tt VALUES('aa');
    INSERT INTO tt VALUES('ab');
    INSERT INTO tt VALUES('aa');
    INSERT INTO tt VALUES('ab');
  COMMIT;
  SELECT rowid FROM tt WHERE tt MATCH 'a*';
} {1 2 3 4 5 6 7 8}

do_execsql_test 5.6 {
  INSERT INTO tt(tt) VALUES('integrity-check');
}

reset_db
do_execsql_test 5.7 {
  CREATE VIRTUAL TABLE tt USING fts5(content);
  INSERT INTO tt(tt, rank) VALUES('pgsz', 32);
  INSERT INTO tt VALUES('aa ab ac ad ae af');
}

do_execsql_test 5.8 {
  SELECT rowid FROM tt WHERE tt MATCH 'a*';
} {1}

finish_test

Changes to ext/fts5/tool/loadfts5.tcl.
14
15
16
17
18
19
20






21
22
23
24
25
26
27
  foreach f [glob -nocomplain -dir $dir *] {
    if {$::O(limit) && $::nRow>=$::O(limit)} break
    if {[file isdir $f]} {
      load_hierachy $f
    } else {
      db eval { INSERT INTO t1 VALUES($f, loadfile($f)) }
      incr ::nRow







      if {($::nRow % $::nRowPerDot)==0} {
        puts -nonewline .
        if {($::nRow % (65*$::nRowPerDot))==0} { puts "" }
        flush stdout
      }








>
>
>
>
>
>







14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
  foreach f [glob -nocomplain -dir $dir *] {
    if {$::O(limit) && $::nRow>=$::O(limit)} break
    if {[file isdir $f]} {
      load_hierachy $f
    } else {
      db eval { INSERT INTO t1 VALUES($f, loadfile($f)) }
      incr ::nRow

      if {$::O(trans) && ($::nRow % $::O(trans))==0} {
        db eval { COMMIT }
        db eval { INSERT INTO t1(t1) VALUES('integrity-check') }
        db eval { BEGIN }
      }

      if {($::nRow % $::nRowPerDot)==0} {
        puts -nonewline .
        if {($::nRow % (65*$::nRowPerDot))==0} { puts "" }
        flush stdout
      }

37
38
39
40
41
42
43

44
45
46
47
48
49
50
51
52
53

54
55
56
57
58
59
60
  puts stderr "  -fts5        (use fts5)"
  puts stderr "  -porter      (use porter tokenizer)"
  puts stderr "  -delete      (delete the database file before starting)"
  puts stderr "  -limit N     (load no more than N documents)"
  puts stderr "  -automerge N (set the automerge parameter to N)"
  puts stderr "  -crisismerge N (set the crisismerge parameter to N)"
  puts stderr "  -prefix PREFIX (comma separated prefix= argument)"

  exit 1
}

set O(vtab)       fts5
set O(tok)        ""
set O(limit)      0
set O(delete)     0
set O(automerge)  -1
set O(crisismerge)  -1
set O(prefix)     ""


if {[llength $argv]<2} usage
set nOpt [expr {[llength $argv]-2}]
for {set i 0} {$i < $nOpt} {incr i} {
  set arg [lindex $argv $i]
  switch -- [lindex $argv $i] {
    -fts4 {







>










>







43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
  puts stderr "  -fts5        (use fts5)"
  puts stderr "  -porter      (use porter tokenizer)"
  puts stderr "  -delete      (delete the database file before starting)"
  puts stderr "  -limit N     (load no more than N documents)"
  puts stderr "  -automerge N (set the automerge parameter to N)"
  puts stderr "  -crisismerge N (set the crisismerge parameter to N)"
  puts stderr "  -prefix PREFIX (comma separated prefix= argument)"
  puts stderr "  -trans N     (commit after N inserts - 0 == never)"
  exit 1
}

set O(vtab)       fts5
set O(tok)        ""
set O(limit)      0
set O(delete)     0
set O(automerge)  -1
set O(crisismerge)  -1
set O(prefix)     ""
set O(trans)      0

if {[llength $argv]<2} usage
set nOpt [expr {[llength $argv]-2}]
for {set i 0} {$i < $nOpt} {incr i} {
  set arg [lindex $argv $i]
  switch -- [lindex $argv $i] {
    -fts4 {
73
74
75
76
77
78
79





80
81
82
83
84
85
86
      set O(delete) 1
    }

    -limit {
      if { [incr i]>=$nOpt } usage
      set O(limit) [lindex $argv $i]
    }





    
    -automerge {
      if { [incr i]>=$nOpt } usage
      set O(automerge) [lindex $argv $i]
    }

    -crisismerge {







>
>
>
>
>







81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
      set O(delete) 1
    }

    -limit {
      if { [incr i]>=$nOpt } usage
      set O(limit) [lindex $argv $i]
    }

    -trans {
      if { [incr i]>=$nOpt } usage
      set O(trans) [lindex $argv $i]
    }
    
    -automerge {
      if { [incr i]>=$nOpt } usage
      set O(automerge) [lindex $argv $i]
    }

    -crisismerge {
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130

131
132
133
set dbfile [lindex $argv end-1]
if {$O(delete)} { file delete -force $dbfile }
sqlite3 db $dbfile
catch { load_static_extension db fts5 }
db func loadfile loadfile
db eval "PRAGMA page_size=4096"

db transaction {
  set pref ""
  if {$O(prefix)!=""} { set pref ", prefix='$O(prefix)'" }
  catch {
    db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok)$pref)"
    db eval "INSERT INTO t1(t1, rank) VALUES('pgsz', 4050);"
  }
  if {$O(automerge)>=0} {
    if {$O(vtab) == "fts5"} {
      db eval { INSERT INTO t1(t1, rank) VALUES('automerge', $O(automerge)) }
    } else {
      db eval { INSERT INTO t1(t1) VALUES('automerge=' || $O(automerge)) }
    }
  }
  if {$O(crisismerge)>=0} {
    if {$O(vtab) == "fts5"} {
      db eval {INSERT INTO t1(t1, rank) VALUES('crisismerge', $O(crisismerge))}
    } else {
    }
  }
  load_hierachy [lindex $argv end]
}











|




















<
>



115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142

143
144
145
146
set dbfile [lindex $argv end-1]
if {$O(delete)} { file delete -force $dbfile }
sqlite3 db $dbfile
catch { load_static_extension db fts5 }
db func loadfile loadfile
db eval "PRAGMA page_size=4096"

db eval BEGIN
  set pref ""
  if {$O(prefix)!=""} { set pref ", prefix='$O(prefix)'" }
  catch {
    db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok)$pref)"
    db eval "INSERT INTO t1(t1, rank) VALUES('pgsz', 4050);"
  }
  if {$O(automerge)>=0} {
    if {$O(vtab) == "fts5"} {
      db eval { INSERT INTO t1(t1, rank) VALUES('automerge', $O(automerge)) }
    } else {
      db eval { INSERT INTO t1(t1) VALUES('automerge=' || $O(automerge)) }
    }
  }
  if {$O(crisismerge)>=0} {
    if {$O(vtab) == "fts5"} {
      db eval {INSERT INTO t1(t1, rank) VALUES('crisismerge', $O(crisismerge))}
    } else {
    }
  }
  load_hierachy [lindex $argv end]

db eval COMMIT