/ Check-in [3b5758c6]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix problems with prefix queries in fts5.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:3b5758c647530bd5c2e68d0ee3e9f58a96347ca4
User & Date: dan 2015-10-27 17:48:57
Context
2015-10-27
20:04
Have contentless and external content fts5 tables ignore "OR REPLACE" conflict handling. check-in: a85c2a47 user: dan tags: trunk
17:48
Fix problems with prefix queries in fts5. check-in: 3b5758c6 user: dan tags: trunk
13:35
Provide hints to the storage engine using the sqlite3BtreeCursorHint() interface when compiling with SQLITE_ENABLE_CURSOR_HINTS. check-in: 45d3539e user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5_index.c.

  1495   1495       int iOff = pIter->iLeafOffset;  /* Offset to read at */
  1496   1496       int nSz;
  1497   1497       ASSERT_SZLEAF_OK(pIter->pLeaf);
  1498   1498       fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz);
  1499   1499       pIter->bDel = (nSz & 0x0001);
  1500   1500       pIter->nPos = nSz>>1;
  1501   1501       pIter->iLeafOffset = iOff;
         1502  +    assert_nc( pIter->nPos>=0 );
  1502   1503     }
  1503   1504   }
  1504   1505   
  1505   1506   static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
  1506   1507     u8 *a = pIter->pLeaf->p;        /* Buffer to read data from */
  1507   1508     int iOff = pIter->iLeafOffset;
  1508   1509   
................................................................................
  1668   1669       pIter->iLeafPgno--;
  1669   1670       pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(
  1670   1671             pIter->pSeg->iSegid, pIter->iLeafPgno
  1671   1672       ));
  1672   1673       if( pNew ){
  1673   1674         /* iTermLeafOffset may be equal to szLeaf if the term is the last
  1674   1675         ** thing on the page - i.e. the first rowid is on the following page.
  1675         -      ** In this case leaf pIter->pLeaf==0, this iterator is at EOF. */
  1676         -      if( pIter->iLeafPgno==pIter->iTermLeafPgno 
  1677         -       && pIter->iTermLeafOffset<pNew->szLeaf 
  1678         -      ){
  1679         -        pIter->pLeaf = pNew;
  1680         -        pIter->iLeafOffset = pIter->iTermLeafOffset;
         1676  +      ** In this case leave pIter->pLeaf==0, this iterator is at EOF. */
         1677  +      if( pIter->iLeafPgno==pIter->iTermLeafPgno ){
         1678  +        assert( pIter->pLeaf==0 );
         1679  +        if( pIter->iTermLeafOffset<pNew->szLeaf ){
         1680  +          pIter->pLeaf = pNew;
         1681  +          pIter->iLeafOffset = pIter->iTermLeafOffset;
         1682  +        }
  1681   1683         }else{
  1682   1684           int iRowidOff;
  1683   1685           iRowidOff = fts5LeafFirstRowidOff(pNew);
  1684   1686           if( iRowidOff ){
  1685   1687             pIter->pLeaf = pNew;
  1686   1688             pIter->iLeafOffset = iRowidOff;
  1687   1689           }
................................................................................
  1847   1849             ** this block is particularly performance critical, so equivalent
  1848   1850             ** code is inlined. */
  1849   1851             int nSz;
  1850   1852             assert( p->rc==SQLITE_OK );
  1851   1853             fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz);
  1852   1854             pIter->bDel = (nSz & 0x0001);
  1853   1855             pIter->nPos = nSz>>1;
         1856  +          assert_nc( pIter->nPos>=0 );
  1854   1857           }
  1855   1858         }
  1856   1859       }
  1857   1860     }
  1858   1861   }
  1859   1862   
  1860   1863   #define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; }
................................................................................
  2055   2058       return;
  2056   2059     }else if( bEndOfPage ){
  2057   2060       do {
  2058   2061         fts5SegIterNextPage(p, pIter);
  2059   2062         if( pIter->pLeaf==0 ) return;
  2060   2063         a = pIter->pLeaf->p;
  2061   2064         if( fts5LeafIsTermless(pIter->pLeaf)==0 ){
  2062         -        fts5GetVarint32(&pIter->pLeaf->p[pIter->pLeaf->szLeaf], iOff);
         2065  +        iPgidx = pIter->pLeaf->szLeaf;
         2066  +        iPgidx += fts5GetVarint32(&pIter->pLeaf->p[iPgidx], iOff);
  2063   2067           if( iOff<4 || iOff>=pIter->pLeaf->szLeaf ){
  2064   2068             p->rc = FTS5_CORRUPT;
  2065   2069           }else{
  2066   2070             nKeep = 0;
         2071  +          iTermOff = iOff;
         2072  +          n = pIter->pLeaf->nn;
  2067   2073             iOff += fts5GetVarint32(&a[iOff], nNew);
  2068   2074             break;
  2069   2075           }
  2070   2076         }
  2071   2077       }while( 1 );
  2072   2078     }
  2073   2079   
................................................................................
  4371   4377     if( aBuf && pStruct ){
  4372   4378       const int flags = FTS5INDEX_QUERY_SCAN;
  4373   4379       int i;
  4374   4380       i64 iLastRowid = 0;
  4375   4381       Fts5IndexIter *p1 = 0;     /* Iterator used to gather data from index */
  4376   4382       Fts5Data *pData;
  4377   4383       Fts5Buffer doclist;
  4378         -    int bNewTerm = 0;
         4384  +    int bNewTerm = 1;
  4379   4385   
  4380   4386       memset(&doclist, 0, sizeof(doclist));
  4381   4387       for(fts5MultiIterNew(p, pStruct, 1, flags, pToken, nToken, -1, 0, &p1);
  4382   4388           fts5MultiIterEof(p, p1)==0;
  4383   4389           fts5MultiIterNext2(p, p1, &bNewTerm)
  4384   4390       ){
  4385   4391         i64 iRowid = fts5MultiIterRowid(p1);
................................................................................
  5578   5584   
  5579   5585     if( n>0 ){
  5580   5586       iOff = sqlite3Fts5GetVarint(a, (u64*)&iDocid);
  5581   5587       sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
  5582   5588     }
  5583   5589     while( iOff<n ){
  5584   5590       int nPos;
  5585         -    int bDummy;
  5586         -    iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDummy);
         5591  +    int bDel;
         5592  +    iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDel);
         5593  +    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " nPos=%d%s", nPos, bDel?"*":"");
  5587   5594       iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos));
  5588   5595       if( iOff<n ){
  5589   5596         i64 iDelta;
  5590   5597         iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta);
  5591   5598         iDocid += iDelta;
  5592   5599         sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
  5593   5600       }

Changes to ext/fts5/test/fts5integrity.test.

     5      5   #
     6      6   #    May you do good and not evil.
     7      7   #    May you find forgiveness for yourself and forgive others.
     8      8   #    May you share freely, never taking more than you give.
     9      9   #
    10     10   #***********************************************************************
    11     11   #
    12         -# This file containst tests focused on the integrity-check procedure.
           12  +# This file contains tests focused on the integrity-check procedure.
    13     13   #
    14     14   
    15     15   source [file join [file dirname [info script]] fts5_common.tcl]
    16     16   set testprefix fts5integrity
    17     17   
    18     18   # If SQLITE_ENABLE_FTS5 is defined, omit this file.
    19     19   ifcapable !fts5 {
................................................................................
    98     98       INSERT INTO aa_content VALUES(23, '');
    99     99       INSERT INTO aa(aa) VALUES('integrity-check'); 
   100    100   } {1 {database disk image is malformed}}
   101    101   
   102    102   #db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM zz_data} {puts $r}
   103    103   #exit
   104    104   
          105  +execsql { ROLLBACK }
          106  +
          107  +
          108  +#-------------------------------------------------------------------------
          109  +# Test that integrity-check works on a reasonably large db with many
          110  +# different terms.
          111  +
          112  +# Document generator command.
          113  +proc rnddoc {n} {
          114  +  set doc [list]
          115  +  for {set i 0} {$i<$n} {incr i} {
          116  +    lappend doc [format %.5d [expr int(rand()*10000)]]
          117  +  }
          118  +  return $doc
          119  +}
          120  +db func rnddoc rnddoc
          121  +
          122  +expr srand(0)
          123  +do_execsql_test 5.0 {
          124  +  CREATE VIRTUAL TABLE gg USING fts5(a, prefix="1,2,3");
          125  +  INSERT INTO gg(gg, rank) VALUES('pgsz', 256);
          126  +  INSERT INTO gg VALUES(rnddoc(20));
          127  +  INSERT INTO gg SELECT rnddoc(20) FROM gg;
          128  +  INSERT INTO gg SELECT rnddoc(20) FROM gg;
          129  +  INSERT INTO gg SELECT rnddoc(20) FROM gg;
          130  +  INSERT INTO gg SELECT rnddoc(20) FROM gg;
          131  +  INSERT INTO gg SELECT rnddoc(20) FROM gg;
          132  +  INSERT INTO gg SELECT rnddoc(20) FROM gg;
          133  +  INSERT INTO gg SELECT rnddoc(20) FROM gg;
          134  +  INSERT INTO gg SELECT rnddoc(20) FROM gg;
          135  +  INSERT INTO gg SELECT rnddoc(20) FROM gg;
          136  +  INSERT INTO gg SELECT rnddoc(20) FROM gg;
          137  +  INSERT INTO gg SELECT rnddoc(20) FROM gg;
          138  +}
          139  +
          140  +do_execsql_test 5.1 {
          141  +  INSERT INTO gg(gg) VALUES('integrity-check');
          142  +}
          143  +
          144  +do_execsql_test 5.2 {
          145  +  INSERT INTO gg(gg) VALUES('optimize');
          146  +}
          147  +
          148  +breakpoint
          149  +do_execsql_test 5.3 {
          150  +  INSERT INTO gg(gg) VALUES('integrity-check');
          151  +}
   105    152   
   106    153   finish_test
   107    154   

Added ext/fts5/test/fts5query.test.

            1  +# 2015 October 27
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#*************************************************************************
           11  +# This file implements regression tests for SQLite library.  The
           12  +# focus of this script is testing the FTS5 module.
           13  +#
           14  +
           15  +source [file join [file dirname [info script]] fts5_common.tcl]
           16  +set testprefix fts5query
           17  +
           18  +# If SQLITE_ENABLE_FTS5 is defined, omit this file.
           19  +ifcapable !fts5 {
           20  +  finish_test
           21  +  return
           22  +}
           23  +
           24  +for {set tn 1 ; set pgsz 64} {$tn<32} {incr tn; incr pgsz 16} {
           25  +  reset_db
           26  +  do_test 1.$tn.1 {
           27  +    execsql {
           28  +      CREATE VIRTUAL TABLE t1 USING fts5(x);
           29  +      INSERT INTO t1(t1, rank) VALUES('pgsz', $pgsz);
           30  +      BEGIN;
           31  +    }
           32  +    foreach x [list aaa bbb ccc ddd eee fff ggg hhh iii jjj] {
           33  +      set doc [string repeat "$x " 30]
           34  +      execsql { INSERT INTO t1 VALUES($doc) }
           35  +    }
           36  +    execsql COMMIT
           37  +  } {}
           38  +
           39  +  do_execsql_test 1.$tn.2 {
           40  +    INSERT INTO t1(t1) VALUES('integrity-check');
           41  +  }
           42  +
           43  +  set ret 1
           44  +  foreach x [list a b c d e f g h i j] {
           45  +    do_execsql_test 1.$tn.3.$ret {
           46  +      SELECT rowid FROM t1 WHERE t1 MATCH $x || '*';
           47  +    } $ret
           48  +    incr ret
           49  +  }
           50  +}
           51  +
           52  +for {set tn 1 ; set pgsz 64} {$tn<32} {incr tn; incr pgsz 16} {
           53  +  reset_db
           54  +  do_test 2.$tn.1 {
           55  +    execsql {
           56  +      CREATE VIRTUAL TABLE t1 USING fts5(x);
           57  +      INSERT INTO t1(t1, rank) VALUES('pgsz', $pgsz);
           58  +      BEGIN;
           59  +    }
           60  +    foreach x [list bbb ddd fff hhh jjj lll nnn ppp rrr ttt] {
           61  +      set doc [string repeat "$x " 30]
           62  +      execsql { INSERT INTO t1 VALUES($doc) }
           63  +    }
           64  +    execsql COMMIT
           65  +  } {}
           66  +
           67  +  do_execsql_test 1.$tn.2 {
           68  +    INSERT INTO t1(t1) VALUES('integrity-check');
           69  +  }
           70  +
           71  +  set ret 1
           72  +  foreach x [list a c e g i k m o q s u] {
           73  +    do_execsql_test 2.$tn.3.$ret {
           74  +      SELECT rowid FROM t1 WHERE t1 MATCH $x || '*';
           75  +    } {}
           76  +    incr ret
           77  +  }
           78  +}
           79  +
           80  +
           81  +finish_test
           82  +
           83  +

Changes to ext/fts5/tool/showfts5.tcl.

     4      4   #-------------------------------------------------------------------------
     5      5   # Process command line arguments.
     6      6   #
     7      7   proc usage {} {
     8      8     puts stderr "usage: $::argv0 ?OPTIONS? database table"
     9      9     puts stderr ""
    10     10     puts stderr "  -nterm                (count number of terms in each segment)"
           11  +  puts stderr "  -segments             (output segment contents)"
    11     12     puts stderr ""
    12     13     exit 1
    13     14   }
    14     15   
    15     16   set O(nterm) 0
           17  +set O(segments) 0
    16     18   
    17     19   if {[llength $argv]<2} usage
    18     20   foreach a [lrange $argv 0 end-2] {
    19     21     switch -- $a {
    20     22       -nterm {
    21     23         set O(nterm) 1
    22     24       }
           25  +
           26  +    -segments {
           27  +      set O(segments) 1
           28  +    }
    23     29   
    24     30       default {
    25     31         usage
    26     32       }
    27     33     }
    28     34   }
    29     35   
................................................................................
    73     79           puts [format "        % -28s    nTerm=%d" $seg $nTerm]
    74     80         } else {
    75     81           puts [format "        % -28s" $seg]
    76     82         }
    77     83       }
    78     84     }
    79     85   }
           86  +
           87  +if {$O(segments)} {
           88  +  puts ""
           89  +  db eval "SELECT fts5_decode(rowid, block) AS d FROM ${tbl}_data WHERE id>10" {
           90  +    puts $d
           91  +  }
           92  +}
    80     93   
    81     94   
    82     95   
    83     96   
    84     97