SQLite

Check-in [249a2d070c]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Improve the performance of fts5 column filters on detail=col tables.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | fts5-perf
Files: files | file ages | folders
SHA1: 249a2d070c34bf884a04cb248b9691e239f2871c
User & Date: dan 2016-01-26 19:30:49.768
Context
2016-01-26
20:08
Further minor performance improvements and code-size reductions related to fts5 column filters on detail=col tables. (Leaf check-in: b4ac61aeee user: dan tags: fts5-perf)
19:30
Improve the performance of fts5 column filters on detail=col tables. (check-in: 249a2d070c user: dan tags: fts5-perf)
17:08
Enhance fts5txt2db.tcl, a script used to generate fts5/fts4 databases for performance testing. (check-in: c646e40350 user: dan tags: fts5-perf)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts5/fts5_index.c.
4998
4999
5000
5001
5002
5003
5004















































5005
5006
5007
5008
5009
5010
5011
    ** Fts5Iter.poslist buffer and then set the output pointer to point
    ** to this buffer.  */
    fts5BufferZero(&pIter->poslist);
    fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist);
    pIter->base.pData = pIter->poslist.p;
  }
}
















































/*
** xSetOutputs callback used by detail=col when there is a column filter.
*/
static void fts5IterSetOutputs_Col(Fts5Iter *pIter, Fts5SegIter *pSeg){
  Fts5Colset *pColset = pIter->pColset;
  pIter->base.iRowid = pSeg->iRowid;







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
    ** Fts5Iter.poslist buffer and then set the output pointer to point
    ** to this buffer.  */
    fts5BufferZero(&pIter->poslist);
    fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist);
    pIter->base.pData = pIter->poslist.p;
  }
}

/*
** xSetOutputs callback used when: detail=col when there is a column filter.
**
**   * detail=col,
**   * there is a column filter, and
**   * the table contains 32 or fewer columns.
*/
static void fts5IterSetOutputs_Col32(Fts5Iter *pIter, Fts5SegIter *pSeg){
  Fts5Colset *pColset = pIter->pColset;
  pIter->base.iRowid = pSeg->iRowid;

  assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_COLUMNS );
  assert( pColset );

  fts5BufferZero(&pIter->poslist);
  if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
    int i;
    int iPrev = 0;
    u32 m = 0;
    u8 *a = (u8*)&pSeg->pLeaf->p[pSeg->iLeafOffset];
    u8 *pEnd = (u8*)&a[pSeg->nPos]; 

    while( a<pEnd ){
      iPrev += (int)a[0] - 2;
      m |= (1 << iPrev);
      a++;
    }

    iPrev = 0;
    a = pIter->poslist.p;
    for(i=0; i<pColset->nCol; i++){
      int iCol = pColset->aiCol[i];
      if( m & (1 << iCol) ){
        *a++ = (iCol - iPrev) + 2;
        iPrev = iCol;
      }
    }
    pIter->poslist.n = a - pIter->poslist.p;

  }else{
    fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist);
  }

  pIter->base.pData = pIter->poslist.p;
  pIter->base.nData = pIter->poslist.n;
}

/*
** xSetOutputs callback used by detail=col when there is a column filter.
*/
static void fts5IterSetOutputs_Col(Fts5Iter *pIter, Fts5SegIter *pSeg){
  Fts5Colset *pColset = pIter->pColset;
  pIter->base.iRowid = pSeg->iRowid;
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101




5102

5103
5104
5105
5106
5107
5108
5109
    fts5BufferZero(&pIter->poslist);
    fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist);
    pIter->base.pData = pIter->poslist.p;
    pIter->base.nData = pIter->poslist.n;
  }
}

static void fts5IterSetOutputCb(Fts5Iter *pIter){
  int eDetail = pIter->pIndex->pConfig->eDetail;
  if( eDetail==FTS5_DETAIL_NONE ){
    pIter->xSetOutputs = fts5IterSetOutputs_None;
  }

  else if( pIter->pColset==0 || pIter->bFiltered ){
    pIter->xSetOutputs = fts5IterSetOutputs_Nocolset;
  }

  else if( eDetail==FTS5_DETAIL_FULL ){
    pIter->xSetOutputs = fts5IterSetOutputs_Full;
  }

  else{
    assert( eDetail==FTS5_DETAIL_COLUMNS );




    pIter->xSetOutputs = fts5IterSetOutputs_Col;

  }
}

/*
** Open a new iterator to iterate though all rowid that match the 
** specified token or token prefix.
*/







|
|
|







|




|
>
>
>
>
|
>







5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
    fts5BufferZero(&pIter->poslist);
    fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist);
    pIter->base.pData = pIter->poslist.p;
    pIter->base.nData = pIter->poslist.n;
  }
}

static void fts5IterSetOutputCb(int *pRc, Fts5Iter *pIter){
  Fts5Config *pConfig = pIter->pIndex->pConfig;
  if( pConfig->eDetail==FTS5_DETAIL_NONE ){
    pIter->xSetOutputs = fts5IterSetOutputs_None;
  }

  else if( pIter->pColset==0 || pIter->bFiltered ){
    pIter->xSetOutputs = fts5IterSetOutputs_Nocolset;
  }

  else if( pConfig->eDetail==FTS5_DETAIL_FULL ){
    pIter->xSetOutputs = fts5IterSetOutputs_Full;
  }

  else{
    assert( pConfig->eDetail==FTS5_DETAIL_COLUMNS );
    if( pConfig->nCol<=32 ){
      pIter->xSetOutputs = fts5IterSetOutputs_Col32;
      sqlite3Fts5BufferSize(pRc, &pIter->poslist, pConfig->nCol);
    }else{
      pIter->xSetOutputs = fts5IterSetOutputs_Col;
    }
  }
}

/*
** Open a new iterator to iterate though all rowid that match the 
** specified token or token prefix.
*/
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
      buf.p[0] = FTS5_MAIN_PREFIX;
      fts5SetupPrefixIter(p, bDesc, buf.p, nToken+1, pColset, &pRet);
    }

    if( p->rc==SQLITE_OK ){
      Fts5SegIter *pSeg = &pRet->aSeg[pRet->aFirst[1].iFirst];
      pRet->pColset = pColset;
      fts5IterSetOutputCb(pRet);
      if( pSeg->pLeaf ) pRet->xSetOutputs(pRet, pSeg);
    }
    if( p->rc ){
      sqlite3Fts5IterClose(&pRet->base);
      pRet = 0;
      fts5CloseReader(p);
    }








|
|







5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
      buf.p[0] = FTS5_MAIN_PREFIX;
      fts5SetupPrefixIter(p, bDesc, buf.p, nToken+1, pColset, &pRet);
    }

    if( p->rc==SQLITE_OK ){
      Fts5SegIter *pSeg = &pRet->aSeg[pRet->aFirst[1].iFirst];
      pRet->pColset = pColset;
      fts5IterSetOutputCb(&p->rc, pRet);
      if( p->rc==SQLITE_OK && pSeg->pLeaf ) pRet->xSetOutputs(pRet, pSeg);
    }
    if( p->rc ){
      sqlite3Fts5IterClose(&pRet->base);
      pRet = 0;
      fts5CloseReader(p);
    }

Changes to ext/fts5/test/fts5ac.test.
154
155
156
157
158
159
160
161
162

163
164
165
166
167
168
169
  do_execsql_test 1.$tn2.integrity {
    INSERT INTO xx(xx) VALUES('integrity-check');
  }

  #-------------------------------------------------------------------------
  #
  foreach {tn expr} {
    1.2 "a   OR b"
    1.1 "a   AND b"

    1.3 "o"
    1.4 "b q"
    1.5 "e a e"
    1.6 "m d g q q b k b w f q q p p"
    1.7 "l o o l v v k"
    1.8 "a"
    1.9 "b"







<

>







154
155
156
157
158
159
160

161
162
163
164
165
166
167
168
169
  do_execsql_test 1.$tn2.integrity {
    INSERT INTO xx(xx) VALUES('integrity-check');
  }

  #-------------------------------------------------------------------------
  #
  foreach {tn expr} {

    1.1 "a   AND b"
    1.2 "a   OR b"
    1.3 "o"
    1.4 "b q"
    1.5 "e a e"
    1.6 "m d g q q b k b w f q q p p"
    1.7 "l o o l v v k"
    1.8 "a"
    1.9 "b"
Added ext/fts5/test/fts5simple3.test.
























































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# 2015 September 05
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5simple3

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

fts5_aux_test_functions db

do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(a, b, c, detail=col);
  INSERT INTO t1 VALUES('a', 'b', 'c');
  INSERT INTO t1 VALUES('x', 'x', 'x');
}

do_execsql_test 1.1 {
  SELECT rowid, fts5_test_collist(t1) FROM t1('a:a');
} {1 0.0}

do_execsql_test 1.2 {
  SELECT rowid, fts5_test_collist(t1) FROM t1('b:x');
} {2 0.1}

do_execsql_test 1.3 {
  SELECT rowid, fts5_test_collist(t1) FROM t1('b:a');
} {}


finish_test

Changes to ext/fts5/tool/fts5speed.tcl.
1
2
3
4
5
6
7
8
9
10
11
12
13


14
15
16
17
18
19
20


set Q {
  {1   "SELECT count(*) FROM t1 WHERE t1 MATCH 'enron'"}
  {25  "SELECT count(*) FROM t1 WHERE t1 MATCH 'hours'"}
  {300 "SELECT count(*) FROM t1 WHERE t1 MATCH 'acid'"}
  {100 "SELECT count(*) FROM t1 WHERE t1 MATCH 'loaned OR mobility OR popcore OR sunk'"}
  {100 "SELECT count(*) FROM t1 WHERE t1 MATCH 'enron AND myapps'"}
  {1   "SELECT count(*) FROM t1 WHERE t1 MATCH 'en* AND my*'"}

  {1   "SELECT count(*) FROM t1 WHERE t1 MATCH 'c:t*'"}
  {1   "SELECT count(*) FROM t1 WHERE t1 MATCH 'a:t* OR b:t* OR c:t* OR d:t* OR e:t* OR f:t* OR g:t*'"}
  {1   "SELECT count(*) FROM t1 WHERE t1 MATCH 'a:t*'"}


}

proc usage {} {
  global Q
  puts stderr "Usage: $::argv0 DATABASE QUERY"
  puts stderr ""
  for {set i 1} {$i <= [llength $Q]} {incr i} {













>
>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22


set Q {
  {1   "SELECT count(*) FROM t1 WHERE t1 MATCH 'enron'"}
  {25  "SELECT count(*) FROM t1 WHERE t1 MATCH 'hours'"}
  {300 "SELECT count(*) FROM t1 WHERE t1 MATCH 'acid'"}
  {100 "SELECT count(*) FROM t1 WHERE t1 MATCH 'loaned OR mobility OR popcore OR sunk'"}
  {100 "SELECT count(*) FROM t1 WHERE t1 MATCH 'enron AND myapps'"}
  {1   "SELECT count(*) FROM t1 WHERE t1 MATCH 'en* AND my*'"}

  {1   "SELECT count(*) FROM t1 WHERE t1 MATCH 'c:t*'"}
  {1   "SELECT count(*) FROM t1 WHERE t1 MATCH 'a:t* OR b:t* OR c:t* OR d:t* OR e:t* OR f:t* OR g:t*'"}
  {1   "SELECT count(*) FROM t1 WHERE t1 MATCH 'a:t*'"}

  {2   "SELECT count(*) FROM t1 WHERE t1 MATCH 'c:the'"}
}

proc usage {} {
  global Q
  puts stderr "Usage: $::argv0 DATABASE QUERY"
  puts stderr ""
  for {set i 1} {$i <= [llength $Q]} {incr i} {