/ Check-in [d692434b]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Re-introduce the prefix-search optimization of [feef1b15d6], which was lost in a reorganization of FTS3 code.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:d692434b4935e8e7858230af1c126b0be8203077
User & Date: dan 2010-07-19 11:16:37
Context
2010-07-19
12:05
Changes to stat.test so that it works with file-format 4. check-in: f87bb283 user: dan tags: trunk
11:16
Re-introduce the prefix-search optimization of [feef1b15d6], which was lost in a reorganization of FTS3 code. check-in: d692434b user: dan tags: trunk
05:27
Enable previously failing tests in e_expr.test that pass following [3e5975aa3b]. check-in: 3d59c54a user: dan tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3.c.

1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
















































1627
1628
1629
1630
1631
1632
1633
....
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657



1658
1659
1660
1661
1662
1663



1664






















1665
1666

1667
1668



1669
1670
1671
1672



1673


1674
1675
1676
1677
1678
1679
1680
....
1790
1791
1792
1793
1794
1795
1796


1797

1798
1799
1800
1801

1802

1803
1804
1805
1806
1807
1808
1809
/* 
** A pointer to an instance of this structure is used as the context 
** argument to sqlite3Fts3SegReaderIterate()
*/
typedef struct TermSelect TermSelect;
struct TermSelect {
  int isReqPos;
  char *aOutput;                  /* Malloc'd output buffer */
  int nOutput;                    /* Size of output in bytes */
};

















































/*
** This function is used as the sqlite3Fts3SegReaderIterate() callback when
** querying the full-text index for a doclist associated with a term or
** term-prefix.
*/
static int fts3TermSelectCb(
................................................................................
  void *pContext,                 /* Pointer to TermSelect structure */
  char *zTerm,
  int nTerm,
  char *aDoclist,
  int nDoclist
){
  TermSelect *pTS = (TermSelect *)pContext;
  int nNew = pTS->nOutput + nDoclist;
  char *aNew = sqlite3_malloc(nNew);

  UNUSED_PARAMETER(p);
  UNUSED_PARAMETER(zTerm);
  UNUSED_PARAMETER(nTerm);

  if( !aNew ){
    return SQLITE_NOMEM;
  }

  if( pTS->nOutput==0 ){
    /* If this is the first term selected, copy the doclist to the output
    ** buffer using memcpy(). TODO: Add a way to transfer control of the
    ** aDoclist buffer from the caller so as to avoid the memcpy().
    */



    memcpy(aNew, aDoclist, nDoclist);
  }else{
    /* The output buffer is not empty. Merge doclist aDoclist with the
    ** existing output. This can only happen with prefix-searches (as
    ** searches for exact terms return exactly one doclist).
    */



    int mergetype = (pTS->isReqPos ? MERGE_POS_OR : MERGE_OR);






















    fts3DoclistMerge(mergetype, 0, 0,
        aNew, &nNew, pTS->aOutput, pTS->nOutput, aDoclist, nDoclist

    );
  }




  sqlite3_free(pTS->aOutput);
  pTS->aOutput = aNew;
  pTS->nOutput = nNew;






  return SQLITE_OK;
}

/*
** This function retreives the doclist for the specified term (or term
** prefix) from the database. 
**
................................................................................
  filter.iCol = iColumn;
  filter.zTerm = zTerm;
  filter.nTerm = nTerm;

  rc = sqlite3Fts3SegReaderIterate(p, apSegment, nSegment, &filter,
      fts3TermSelectCb, (void *)&tsc
  );




  if( rc==SQLITE_OK ){
    *ppOut = tsc.aOutput;
    *pnOut = tsc.nOutput;
  }else{

    sqlite3_free(tsc.aOutput);

  }

finished:
  sqlite3_reset(pStmt);
  for(i=0; i<nSegment; i++){
    sqlite3Fts3SegReaderFree(p, apSegment[i]);
  }







|
|

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







<
<





<
<
<
<
|




>
>
>
|
|
<
<
<
<
>
>
>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
<
>
|
|
>
>
>

<
|
|
>
>
>
|
>
>







 







>
>
|
>

|
|

>
|
>







1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
....
1683
1684
1685
1686
1687
1688
1689


1690
1691
1692
1693
1694




1695
1696
1697
1698
1699
1700
1701
1702
1703
1704




1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731

1732
1733
1734
1735
1736
1737
1738

1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
....
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
/* 
** A pointer to an instance of this structure is used as the context 
** argument to sqlite3Fts3SegReaderIterate()
*/
typedef struct TermSelect TermSelect;
struct TermSelect {
  int isReqPos;
  char *aaOutput[16];             /* Malloc'd output buffer */
  int anOutput[16];               /* Size of output in bytes */
};

/*
** Merge all doclists in the TermSelect.aaOutput[] array into a single
** doclist stored in TermSelect.aaOutput[0]. If successful, delete all
** other doclists (except the aaOutput[0] one) and return SQLITE_OK.
**
** If an OOM error occurs, return SQLITE_NOMEM. In this case it is
** the responsibility of the caller to free any doclists left in the
** TermSelect.aaOutput[] array.
*/
static int fts3TermSelectMerge(TermSelect *pTS){
  int mergetype = (pTS->isReqPos ? MERGE_POS_OR : MERGE_OR);
  char *aOut = 0;
  int nOut = 0;
  int i;

  /* Loop through the doclists in the aaOutput[] array. Merge them all
  ** into a single doclist.
  */
  for(i=0; i<SizeofArray(pTS->aaOutput); i++){
    if( pTS->aaOutput[i] ){
      if( !aOut ){
        aOut = pTS->aaOutput[i];
        nOut = pTS->anOutput[i];
        pTS->aaOutput[0] = 0;
      }else{
        int nNew = nOut + pTS->anOutput[i];
        char *aNew = sqlite3_malloc(nNew);
        if( !aNew ){
          sqlite3_free(aOut);
          return SQLITE_NOMEM;
        }
        fts3DoclistMerge(mergetype, 0, 0,
            aNew, &nNew, pTS->aaOutput[i], pTS->anOutput[i], aOut, nOut
        );
        sqlite3_free(pTS->aaOutput[i]);
        sqlite3_free(aOut);
        pTS->aaOutput[i] = 0;
        aOut = aNew;
        nOut = nNew;
      }
    }
  }

  pTS->aaOutput[0] = aOut;
  pTS->anOutput[0] = nOut;
  return SQLITE_OK;
}

/*
** This function is used as the sqlite3Fts3SegReaderIterate() callback when
** querying the full-text index for a doclist associated with a term or
** term-prefix.
*/
static int fts3TermSelectCb(
................................................................................
  void *pContext,                 /* Pointer to TermSelect structure */
  char *zTerm,
  int nTerm,
  char *aDoclist,
  int nDoclist
){
  TermSelect *pTS = (TermSelect *)pContext;



  UNUSED_PARAMETER(p);
  UNUSED_PARAMETER(zTerm);
  UNUSED_PARAMETER(nTerm);





  if( pTS->aaOutput[0]==0 ){
    /* If this is the first term selected, copy the doclist to the output
    ** buffer using memcpy(). TODO: Add a way to transfer control of the
    ** aDoclist buffer from the caller so as to avoid the memcpy().
    */
    pTS->aaOutput[0] = sqlite3_malloc(nDoclist);
    pTS->anOutput[0] = nDoclist;
    if( pTS->aaOutput[0] ){
      memcpy(pTS->aaOutput[0], aDoclist, nDoclist);
    }else{




      return SQLITE_NOMEM;
    }
  }else{
    int mergetype = (pTS->isReqPos ? MERGE_POS_OR : MERGE_OR);
    char *aMerge = aDoclist;
    int nMerge = nDoclist;
    int iOut;

    for(iOut=0; iOut<SizeofArray(pTS->aaOutput); iOut++){
      char *aNew;
      int nNew;
      if( pTS->aaOutput[iOut]==0 ){
        assert( iOut>0 );
        pTS->aaOutput[iOut] = aMerge;
        pTS->anOutput[iOut] = nMerge;
        break;
      }

      nNew = nMerge + pTS->anOutput[iOut];
      aNew = sqlite3_malloc(nNew);
      if( !aNew ){
        if( aMerge!=aDoclist ){
          sqlite3_free(aMerge);
        }
        return SQLITE_NOMEM;
      }
      fts3DoclistMerge(mergetype, 0, 0,

          aNew, &nNew, pTS->aaOutput[iOut], pTS->anOutput[iOut], aMerge, nMerge
      );

      if( iOut>0 ) sqlite3_free(aMerge);
      sqlite3_free(pTS->aaOutput[iOut]);
      pTS->aaOutput[iOut] = 0;


      aMerge = aNew;
      nMerge = nNew;
      if( (iOut+1)==SizeofArray(pTS->aaOutput) ){
        pTS->aaOutput[iOut] = aMerge;
        pTS->anOutput[iOut] = nMerge;
      }
    }
  }
  return SQLITE_OK;
}

/*
** This function retreives the doclist for the specified term (or term
** prefix) from the database. 
**
................................................................................
  filter.iCol = iColumn;
  filter.zTerm = zTerm;
  filter.nTerm = nTerm;

  rc = sqlite3Fts3SegReaderIterate(p, apSegment, nSegment, &filter,
      fts3TermSelectCb, (void *)&tsc
  );
  if( rc==SQLITE_OK ){
    rc = fts3TermSelectMerge(&tsc);
  }

  if( rc==SQLITE_OK ){
    *ppOut = tsc.aaOutput[0];
    *pnOut = tsc.anOutput[0];
  }else{
    for(i=0; i<SizeofArray(tsc.aaOutput); i++){
      sqlite3_free(tsc.aaOutput[i]);
    }
  }

finished:
  sqlite3_reset(pStmt);
  for(i=0; i<nSegment; i++){
    sqlite3Fts3SegReaderFree(p, apSegment[i]);
  }

Changes to test/fts3.test.

5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41


42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#    May you share freely, never taking more than you give.
#
#***********************************************************************
# This file runs all tests.
#
# $Id: fts3.test,v 1.2 2008/07/23 18:17:32 drh Exp $

proc lshift {lvar} {
  upvar $lvar l
  set ret [lindex $l 0]
  set l [lrange $l 1 end]
  return $ret
}
while {[set arg [lshift argv]] != ""} {
  switch -- $arg {
    -sharedpagercache {
      sqlite3_enable_shared_cache 1
    }
    -soak {
       set G(issoak) 1
    }
    default {
      set argv [linsert $argv 0 $arg]
      break
    }
  }
}

set testdir [file dirname $argv0]
source $testdir/tester.tcl
# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts3 {
  return
}
rename finish_test really_finish_test
proc finish_test {} {}
set G(isquick) 1



set EXCLUDE {
  fts3.test
  fts3malloc.test
  fts3rnd.test
}

# Files to include in the test.  If this list is empty then everything
# that is not in the EXCLUDE list is run.
#
set INCLUDE {
}

foreach testfile [lsort -dictionary [glob $testdir/fts3*.test]] {
  set tail [file tail $testfile]
  if {[lsearch -exact $EXCLUDE $tail]>=0} continue
  if {[llength $INCLUDE]>0 && [lsearch -exact $INCLUDE $tail]<0} continue
  source $testfile
  catch {db close}
  if {$sqlite_open_file_count>0} {
    puts "$tail did not close all files: $sqlite_open_file_count"
    fail_test $tail
    set sqlite_open_file_count 0
  }
}

set sqlite_open_file_count 0
really_finish_test







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<

|
<
<
<
|
<
<
<
>
>
|
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
5
6
7
8
9
10
11





















12
13



14



15
16
17




18





















19
#    May you share freely, never taking more than you give.
#
#***********************************************************************
# This file runs all tests.
#
# $Id: fts3.test,v 1.2 2008/07/23 18:17:32 drh Exp $






















set testdir [file dirname $argv0]
source $testdir/permutations.test







ifcapable fts3 {
  run_test_suite fts3
}


























finish_test

Changes to test/fts3an.test.

181
182
183
184
185
186
187
188
189
190

191
192
193
194






















195
196
  db eval {SELECT offsets(t3) as o FROM t3 WHERE t3 MATCH 'l*'} {
    set l [llength $o]
    lappend t [expr {$l/4}]
  }
  set t
} $ret

# TODO(shess) It would be useful to test a couple edge cases, but I
# don't know if we have the precision to manage it from here at this
# time.  Prefix hits can cross leaves, which the code above _should_

# hit by virtue of size.  There are two variations on this.  If the
# tree is 2 levels high, the code will find the leaf-node extent
# directly, but if it is higher, the code will have to follow two
# separate interior branches down the tree.  Both should be tested.























finish_test







|
|
<
>
|
|
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


181
182
183
184
185
186
187
188
189

190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
  db eval {SELECT offsets(t3) as o FROM t3 WHERE t3 MATCH 'l*'} {
    set l [llength $o]
    lappend t [expr {$l/4}]
  }
  set t
} $ret

# Test a boundary condition: More than 2^16 terms that match a searched for
# prefix in a single segment.

#
puts "This next test can take a little while (~ 30 seconds)..."
do_test fts3an-4.1 {
  execsql { CREATE VIRTUAL TABLE ft USING fts3(x) }
  execsql BEGIN
    execsql { INSERT INTO ft VALUES(NULL) }
    execsql { INSERT INTO ft SELECT * FROM ft }     ;#      2
    execsql { INSERT INTO ft SELECT * FROM ft }     ;#      4
    execsql { INSERT INTO ft SELECT * FROM ft }     ;#      8
    execsql { INSERT INTO ft SELECT * FROM ft }     ;#     16
    execsql { INSERT INTO ft SELECT * FROM ft }     ;#     32
    execsql { INSERT INTO ft SELECT * FROM ft }     ;#     64
    execsql { INSERT INTO ft SELECT * FROM ft }     ;#    128
    execsql { INSERT INTO ft SELECT * FROM ft }     ;#    256
    execsql { INSERT INTO ft SELECT * FROM ft }     ;#    512
    execsql { INSERT INTO ft SELECT * FROM ft }     ;#   1024
    execsql { INSERT INTO ft SELECT * FROM ft }     ;#   2048
    execsql { INSERT INTO ft SELECT * FROM ft }     ;#   4096
    execsql { INSERT INTO ft SELECT * FROM ft }     ;#   8192
    execsql { INSERT INTO ft SELECT * FROM ft }     ;#  16384
    execsql { INSERT INTO ft SELECT * FROM ft }     ;#  32768
    execsql { INSERT INTO ft SELECT * FROM ft }     ;#  65536
    execsql { INSERT INTO ft SELECT * FROM ft }     ;# 131072
  execsql COMMIT
  execsql { UPDATE ft SET x = 'abc' || rowid }
  execsql { SELECT count(*) FROM ft WHERE x MATCH 'abc*' }
} {131072}

finish_test

Changes to test/permutations.test.

148
149
150
151
152
153
154











155
156
157
158
159
160
161

test_suite "threads" -prefix "" -description {
  All multi-threaded tests.
} -files {
  notify2.test   thread001.test thread002.test thread003.test 
  thread004.test thread005.test walthread.test
}













lappend ::testsuitelist xxx
#-------------------------------------------------------------------------
# Define the coverage related test suites:
#
#   coverage-wal







>
>
>
>
>
>
>
>
>
>
>







148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172

test_suite "threads" -prefix "" -description {
  All multi-threaded tests.
} -files {
  notify2.test   thread001.test thread002.test thread003.test 
  thread004.test thread005.test walthread.test
}

test_suite "fts3" -prefix "" -description {
  All FTS3 tests except fts3malloc.test and fts3rnd.test.
} -files {
  fts3aa.test fts3ab.test fts3ac.test fts3ad.test fts3ae.test
  fts3af.test fts3ag.test fts3ah.test fts3ai.test fts3aj.test
  fts3ak.test fts3al.test fts3am.test fts3an.test fts3ao.test
  fts3atoken.test fts3b.test fts3c.test fts3cov.test fts3d.test
  fts3e.test fts3expr.test fts3expr2.test fts3near.test 
  fts3query.test fts3snippet.test
}


lappend ::testsuitelist xxx
#-------------------------------------------------------------------------
# Define the coverage related test suites:
#
#   coverage-wal