/ Check-in [3b5758c6]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix problems with prefix queries in fts5.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:3b5758c647530bd5c2e68d0ee3e9f58a96347ca4
User & Date: dan 2015-10-27 17:48:57
Context
2015-10-27
20:04
Have contentless and external content fts5 tables ignore "OR REPLACE" conflict handling. check-in: a85c2a47 user: dan tags: trunk
17:48
Fix problems with prefix queries in fts5. check-in: 3b5758c6 user: dan tags: trunk
13:35
Provide hints to the storage engine using the sqlite3BtreeCursorHint() interface when compiling with SQLITE_ENABLE_CURSOR_HINTS. check-in: 45d3539e user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5_index.c.

1495
1496
1497
1498
1499
1500
1501

1502
1503
1504
1505
1506
1507
1508
....
1668
1669
1670
1671
1672
1673
1674
1675
1676

1677
1678
1679
1680

1681
1682
1683
1684
1685
1686
1687
....
1847
1848
1849
1850
1851
1852
1853

1854
1855
1856
1857
1858
1859
1860
....
2055
2056
2057
2058
2059
2060
2061

2062
2063
2064
2065
2066


2067
2068
2069
2070
2071
2072
2073
....
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
....
5578
5579
5580
5581
5582
5583
5584
5585
5586

5587
5588
5589
5590
5591
5592
5593
    int iOff = pIter->iLeafOffset;  /* Offset to read at */
    int nSz;
    ASSERT_SZLEAF_OK(pIter->pLeaf);
    fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz);
    pIter->bDel = (nSz & 0x0001);
    pIter->nPos = nSz>>1;
    pIter->iLeafOffset = iOff;

  }
}

static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
  u8 *a = pIter->pLeaf->p;        /* Buffer to read data from */
  int iOff = pIter->iLeafOffset;

................................................................................
    pIter->iLeafPgno--;
    pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(
          pIter->pSeg->iSegid, pIter->iLeafPgno
    ));
    if( pNew ){
      /* iTermLeafOffset may be equal to szLeaf if the term is the last
      ** thing on the page - i.e. the first rowid is on the following page.
      ** In this case leaf pIter->pLeaf==0, this iterator is at EOF. */
      if( pIter->iLeafPgno==pIter->iTermLeafPgno 

       && pIter->iTermLeafOffset<pNew->szLeaf 
      ){
        pIter->pLeaf = pNew;
        pIter->iLeafOffset = pIter->iTermLeafOffset;

      }else{
        int iRowidOff;
        iRowidOff = fts5LeafFirstRowidOff(pNew);
        if( iRowidOff ){
          pIter->pLeaf = pNew;
          pIter->iLeafOffset = iRowidOff;
        }
................................................................................
          ** this block is particularly performance critical, so equivalent
          ** code is inlined. */
          int nSz;
          assert( p->rc==SQLITE_OK );
          fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz);
          pIter->bDel = (nSz & 0x0001);
          pIter->nPos = nSz>>1;

        }
      }
    }
  }
}

#define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; }
................................................................................
    return;
  }else if( bEndOfPage ){
    do {
      fts5SegIterNextPage(p, pIter);
      if( pIter->pLeaf==0 ) return;
      a = pIter->pLeaf->p;
      if( fts5LeafIsTermless(pIter->pLeaf)==0 ){

        fts5GetVarint32(&pIter->pLeaf->p[pIter->pLeaf->szLeaf], iOff);
        if( iOff<4 || iOff>=pIter->pLeaf->szLeaf ){
          p->rc = FTS5_CORRUPT;
        }else{
          nKeep = 0;


          iOff += fts5GetVarint32(&a[iOff], nNew);
          break;
        }
      }
    }while( 1 );
  }

................................................................................
  if( aBuf && pStruct ){
    const int flags = FTS5INDEX_QUERY_SCAN;
    int i;
    i64 iLastRowid = 0;
    Fts5IndexIter *p1 = 0;     /* Iterator used to gather data from index */
    Fts5Data *pData;
    Fts5Buffer doclist;
    int bNewTerm = 0;

    memset(&doclist, 0, sizeof(doclist));
    for(fts5MultiIterNew(p, pStruct, 1, flags, pToken, nToken, -1, 0, &p1);
        fts5MultiIterEof(p, p1)==0;
        fts5MultiIterNext2(p, p1, &bNewTerm)
    ){
      i64 iRowid = fts5MultiIterRowid(p1);
................................................................................

  if( n>0 ){
    iOff = sqlite3Fts5GetVarint(a, (u64*)&iDocid);
    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
  }
  while( iOff<n ){
    int nPos;
    int bDummy;
    iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDummy);

    iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos));
    if( iOff<n ){
      i64 iDelta;
      iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta);
      iDocid += iDelta;
      sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
    }







>







 







|
|
>
|
<
|
|
>







 







>







 







>
|




>
>







 







|







 







|
|
>







1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
....
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679

1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
....
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
....
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
....
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
....
5584
5585
5586
5587
5588
5589
5590
5591
5592
5593
5594
5595
5596
5597
5598
5599
5600
    int iOff = pIter->iLeafOffset;  /* Offset to read at */
    int nSz;
    ASSERT_SZLEAF_OK(pIter->pLeaf);
    fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz);
    pIter->bDel = (nSz & 0x0001);
    pIter->nPos = nSz>>1;
    pIter->iLeafOffset = iOff;
    assert_nc( pIter->nPos>=0 );
  }
}

static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
  u8 *a = pIter->pLeaf->p;        /* Buffer to read data from */
  int iOff = pIter->iLeafOffset;

................................................................................
    pIter->iLeafPgno--;
    pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(
          pIter->pSeg->iSegid, pIter->iLeafPgno
    ));
    if( pNew ){
      /* iTermLeafOffset may be equal to szLeaf if the term is the last
      ** thing on the page - i.e. the first rowid is on the following page.
      ** In this case leave pIter->pLeaf==0, this iterator is at EOF. */
      if( pIter->iLeafPgno==pIter->iTermLeafPgno ){
        assert( pIter->pLeaf==0 );
        if( pIter->iTermLeafOffset<pNew->szLeaf ){

          pIter->pLeaf = pNew;
          pIter->iLeafOffset = pIter->iTermLeafOffset;
        }
      }else{
        int iRowidOff;
        iRowidOff = fts5LeafFirstRowidOff(pNew);
        if( iRowidOff ){
          pIter->pLeaf = pNew;
          pIter->iLeafOffset = iRowidOff;
        }
................................................................................
          ** this block is particularly performance critical, so equivalent
          ** code is inlined. */
          int nSz;
          assert( p->rc==SQLITE_OK );
          fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz);
          pIter->bDel = (nSz & 0x0001);
          pIter->nPos = nSz>>1;
          assert_nc( pIter->nPos>=0 );
        }
      }
    }
  }
}

#define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; }
................................................................................
    return;
  }else if( bEndOfPage ){
    do {
      fts5SegIterNextPage(p, pIter);
      if( pIter->pLeaf==0 ) return;
      a = pIter->pLeaf->p;
      if( fts5LeafIsTermless(pIter->pLeaf)==0 ){
        iPgidx = pIter->pLeaf->szLeaf;
        iPgidx += fts5GetVarint32(&pIter->pLeaf->p[iPgidx], iOff);
        if( iOff<4 || iOff>=pIter->pLeaf->szLeaf ){
          p->rc = FTS5_CORRUPT;
        }else{
          nKeep = 0;
          iTermOff = iOff;
          n = pIter->pLeaf->nn;
          iOff += fts5GetVarint32(&a[iOff], nNew);
          break;
        }
      }
    }while( 1 );
  }

................................................................................
  if( aBuf && pStruct ){
    const int flags = FTS5INDEX_QUERY_SCAN;
    int i;
    i64 iLastRowid = 0;
    Fts5IndexIter *p1 = 0;     /* Iterator used to gather data from index */
    Fts5Data *pData;
    Fts5Buffer doclist;
    int bNewTerm = 1;

    memset(&doclist, 0, sizeof(doclist));
    for(fts5MultiIterNew(p, pStruct, 1, flags, pToken, nToken, -1, 0, &p1);
        fts5MultiIterEof(p, p1)==0;
        fts5MultiIterNext2(p, p1, &bNewTerm)
    ){
      i64 iRowid = fts5MultiIterRowid(p1);
................................................................................

  if( n>0 ){
    iOff = sqlite3Fts5GetVarint(a, (u64*)&iDocid);
    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
  }
  while( iOff<n ){
    int nPos;
    int bDel;
    iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDel);
    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " nPos=%d%s", nPos, bDel?"*":"");
    iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos));
    if( iOff<n ){
      i64 iDelta;
      iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta);
      iDocid += iDelta;
      sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
    }

Changes to ext/fts5/test/fts5integrity.test.

5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
..
98
99
100
101
102
103
104

105














































106
107
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file containst tests focused on the integrity-check procedure.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5integrity

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
................................................................................
    INSERT INTO aa_content VALUES(23, '');
    INSERT INTO aa(aa) VALUES('integrity-check'); 
} {1 {database disk image is malformed}}

#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM zz_data} {puts $r}
#exit

















































finish_test








|







 







>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
..
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file contains tests focused on the integrity-check procedure.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5integrity

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
................................................................................
    INSERT INTO aa_content VALUES(23, '');
    INSERT INTO aa(aa) VALUES('integrity-check'); 
} {1 {database disk image is malformed}}

#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM zz_data} {puts $r}
#exit

execsql { ROLLBACK }


#-------------------------------------------------------------------------
# Test that integrity-check works on a reasonably large db with many
# different terms.

# Document generator command.
proc rnddoc {n} {
  set doc [list]
  for {set i 0} {$i<$n} {incr i} {
    lappend doc [format %.5d [expr int(rand()*10000)]]
  }
  return $doc
}
db func rnddoc rnddoc

expr srand(0)
do_execsql_test 5.0 {
  CREATE VIRTUAL TABLE gg USING fts5(a, prefix="1,2,3");
  INSERT INTO gg(gg, rank) VALUES('pgsz', 256);
  INSERT INTO gg VALUES(rnddoc(20));
  INSERT INTO gg SELECT rnddoc(20) FROM gg;
  INSERT INTO gg SELECT rnddoc(20) FROM gg;
  INSERT INTO gg SELECT rnddoc(20) FROM gg;
  INSERT INTO gg SELECT rnddoc(20) FROM gg;
  INSERT INTO gg SELECT rnddoc(20) FROM gg;
  INSERT INTO gg SELECT rnddoc(20) FROM gg;
  INSERT INTO gg SELECT rnddoc(20) FROM gg;
  INSERT INTO gg SELECT rnddoc(20) FROM gg;
  INSERT INTO gg SELECT rnddoc(20) FROM gg;
  INSERT INTO gg SELECT rnddoc(20) FROM gg;
  INSERT INTO gg SELECT rnddoc(20) FROM gg;
}

do_execsql_test 5.1 {
  INSERT INTO gg(gg) VALUES('integrity-check');
}

do_execsql_test 5.2 {
  INSERT INTO gg(gg) VALUES('optimize');
}

breakpoint
do_execsql_test 5.3 {
  INSERT INTO gg(gg) VALUES('integrity-check');
}

finish_test

Added ext/fts5/test/fts5query.test.







































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# 2015 October 27
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is testing the FTS5 module.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5query

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

for {set tn 1 ; set pgsz 64} {$tn<32} {incr tn; incr pgsz 16} {
  reset_db
  do_test 1.$tn.1 {
    execsql {
      CREATE VIRTUAL TABLE t1 USING fts5(x);
      INSERT INTO t1(t1, rank) VALUES('pgsz', $pgsz);
      BEGIN;
    }
    foreach x [list aaa bbb ccc ddd eee fff ggg hhh iii jjj] {
      set doc [string repeat "$x " 30]
      execsql { INSERT INTO t1 VALUES($doc) }
    }
    execsql COMMIT
  } {}

  do_execsql_test 1.$tn.2 {
    INSERT INTO t1(t1) VALUES('integrity-check');
  }

  set ret 1
  foreach x [list a b c d e f g h i j] {
    do_execsql_test 1.$tn.3.$ret {
      SELECT rowid FROM t1 WHERE t1 MATCH $x || '*';
    } $ret
    incr ret
  }
}

for {set tn 1 ; set pgsz 64} {$tn<32} {incr tn; incr pgsz 16} {
  reset_db
  do_test 2.$tn.1 {
    execsql {
      CREATE VIRTUAL TABLE t1 USING fts5(x);
      INSERT INTO t1(t1, rank) VALUES('pgsz', $pgsz);
      BEGIN;
    }
    foreach x [list bbb ddd fff hhh jjj lll nnn ppp rrr ttt] {
      set doc [string repeat "$x " 30]
      execsql { INSERT INTO t1 VALUES($doc) }
    }
    execsql COMMIT
  } {}

  do_execsql_test 1.$tn.2 {
    INSERT INTO t1(t1) VALUES('integrity-check');
  }

  set ret 1
  foreach x [list a c e g i k m o q s u] {
    do_execsql_test 2.$tn.3.$ret {
      SELECT rowid FROM t1 WHERE t1 MATCH $x || '*';
    } {}
    incr ret
  }
}


finish_test


Changes to ext/fts5/tool/showfts5.tcl.

4
5
6
7
8
9
10

11
12
13
14
15

16
17
18
19
20
21
22




23
24
25
26
27
28
29
..
73
74
75
76
77
78
79







80
81
82
83
84
#-------------------------------------------------------------------------
# Process command line arguments.
#
proc usage {} {
  puts stderr "usage: $::argv0 ?OPTIONS? database table"
  puts stderr ""
  puts stderr "  -nterm                (count number of terms in each segment)"

  puts stderr ""
  exit 1
}

set O(nterm) 0


if {[llength $argv]<2} usage
foreach a [lrange $argv 0 end-2] {
  switch -- $a {
    -nterm {
      set O(nterm) 1
    }





    default {
      usage
    }
  }
}

................................................................................
        puts [format "        % -28s    nTerm=%d" $seg $nTerm]
      } else {
        puts [format "        % -28s" $seg]
      }
    }
  }
}



















>





>







>
>
>
>







 







>
>
>
>
>
>
>





4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
..
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#-------------------------------------------------------------------------
# Process command line arguments.
#
proc usage {} {
  puts stderr "usage: $::argv0 ?OPTIONS? database table"
  puts stderr ""
  puts stderr "  -nterm                (count number of terms in each segment)"
  puts stderr "  -segments             (output segment contents)"
  puts stderr ""
  exit 1
}

set O(nterm) 0
set O(segments) 0

if {[llength $argv]<2} usage
foreach a [lrange $argv 0 end-2] {
  switch -- $a {
    -nterm {
      set O(nterm) 1
    }

    -segments {
      set O(segments) 1
    }

    default {
      usage
    }
  }
}

................................................................................
        puts [format "        % -28s    nTerm=%d" $seg $nTerm]
      } else {
        puts [format "        % -28s" $seg]
      }
    }
  }
}

if {$O(segments)} {
  puts ""
  db eval "SELECT fts5_decode(rowid, block) AS d FROM ${tbl}_data WHERE id>10" {
    puts $d
  }
}