SQLite

Check-in [13fb4aa6a8]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Performance enhancement for fts5 column filter queries on detail=full tables.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 13fb4aa6a87c5c6258979953da82eedc1a7bf14f
User & Date: dan 2016-01-30 19:16:11.820
Context
2016-01-30
21:09
Fix new test cases in stat.test so that they work with -DSQLITE_DEFAULT_AUTOVACUUM=1 builds. (check-in: a2810cf65d user: dan tags: trunk)
19:16
Performance enhancement for fts5 column filter queries on detail=full tables. (check-in: 13fb4aa6a8 user: dan tags: trunk)
16:59
Merge the implementation of OP_IdxRowid and OP_Seek so that OP_Seek no longer requires the rowid register and a separate OP_IdxRowid call. Shorter and faster prepared statements result. (check-in: 9bec50a1e7 user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts5/fts5_index.c.
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340



4341
4342
4343
4344
4345



4346

4347
4348
4349
4350
4351
4352
4353
4354
4355

4356
4357
4358
4359
4360
4361
4362
  const u8 **pa,                  /* IN/OUT: Pointer to poslist */
  int n,                          /* IN: Size of poslist in bytes */
  int iCol                        /* Column to extract from poslist */
){
  int iCurrent = 0;               /* Anything before the first 0x01 is col 0 */
  const u8 *p = *pa;
  const u8 *pEnd = &p[n];         /* One byte past end of position list */
  u8 prev = 0;

  while( iCol>iCurrent ){
    /* Advance pointer p until it points to pEnd or an 0x01 byte that is
    ** not part of a varint */



    while( (prev & 0x80) || *p!=0x01 ){
      prev = *p++;
      if( p==pEnd ) return 0;
    }
    *pa = p++;



    p += fts5GetVarint32(p, iCurrent);

  }
  if( iCol!=iCurrent ) return 0;

  /* Advance pointer p until it points to pEnd or an 0x01 byte that is
  ** not part of a varint */
  assert( (prev & 0x80)==0 );
  while( p<pEnd && ((prev & 0x80) || *p!=0x01) ){
    prev = *p++;
  }

  return p - (*pa);
}

static int fts5AppendRowid(
  Fts5Index *p,
  i64 iDelta,
  Fts5Iter *pMulti,







<



|
>
>
>
|
|
|


>
>
>
|
>





<
|
|

>







4329
4330
4331
4332
4333
4334
4335

4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357

4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
  const u8 **pa,                  /* IN/OUT: Pointer to poslist */
  int n,                          /* IN: Size of poslist in bytes */
  int iCol                        /* Column to extract from poslist */
){
  int iCurrent = 0;               /* Anything before the first 0x01 is col 0 */
  const u8 *p = *pa;
  const u8 *pEnd = &p[n];         /* One byte past end of position list */


  while( iCol>iCurrent ){
    /* Advance pointer p until it points to pEnd or an 0x01 byte that is
    ** not part of a varint. Note that it is not possible for a negative
    ** or extremely large varint to occur within an uncorrupted position 
    ** list. So the last byte of each varint may be assumed to have a clear
    ** 0x80 bit.  */
    while( *p!=0x01 ){
      while( *p++ & 0x80 );
      if( p>=pEnd ) return 0;
    }
    *pa = p++;
    iCurrent = *p++;
    if( iCurrent & 0x80 ){
      p--;
      p += fts5GetVarint32(p, iCurrent);
    }
  }
  if( iCol!=iCurrent ) return 0;

  /* Advance pointer p until it points to pEnd or an 0x01 byte that is
  ** not part of a varint */

  while( p<pEnd && *p!=0x01 ){
    while( *p++ & 0x80 );
  }

  return p - (*pa);
}

static int fts5AppendRowid(
  Fts5Index *p,
  i64 iDelta,
  Fts5Iter *pMulti,
Changes to ext/fts5/tool/fts5speed.tcl.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21


set Q {
  {1   "SELECT count(*) FROM t1 WHERE t1 MATCH 'enron'"}
  {25  "SELECT count(*) FROM t1 WHERE t1 MATCH 'hours'"}
  {300 "SELECT count(*) FROM t1 WHERE t1 MATCH 'acid'"}
  {100 "SELECT count(*) FROM t1 WHERE t1 MATCH 'loaned OR mobility OR popcore OR sunk'"}
  {100 "SELECT count(*) FROM t1 WHERE t1 MATCH 'enron AND myapps'"}
  {1   "SELECT count(*) FROM t1 WHERE t1 MATCH 'en* AND my*'"}

  {1   "SELECT count(*) FROM t1 WHERE t1 MATCH 'c:t*'"}
  {1   "SELECT count(*) FROM t1 WHERE t1 MATCH 'a:t* OR b:t* OR c:t* OR d:t* OR e:t* OR f:t* OR g:t*'"}
  {1   "SELECT count(*) FROM t1 WHERE t1 MATCH 'a:t*'"}

  {2   "SELECT count(*) FROM t1 WHERE t1 MATCH 'c:the'"}
}

proc usage {} {
  global Q
  puts stderr "Usage: $::argv0 DATABASE QUERY"
  puts stderr ""













<







1
2
3
4
5
6
7
8
9
10
11
12
13

14
15
16
17
18
19
20


set Q {
  {1   "SELECT count(*) FROM t1 WHERE t1 MATCH 'enron'"}
  {25  "SELECT count(*) FROM t1 WHERE t1 MATCH 'hours'"}
  {300 "SELECT count(*) FROM t1 WHERE t1 MATCH 'acid'"}
  {100 "SELECT count(*) FROM t1 WHERE t1 MATCH 'loaned OR mobility OR popcore OR sunk'"}
  {100 "SELECT count(*) FROM t1 WHERE t1 MATCH 'enron AND myapps'"}
  {1   "SELECT count(*) FROM t1 WHERE t1 MATCH 'en* AND my*'"}

  {1   "SELECT count(*) FROM t1 WHERE t1 MATCH 'c:t*'"}
  {1   "SELECT count(*) FROM t1 WHERE t1 MATCH 'a:t* OR b:t* OR c:t* OR d:t* OR e:t* OR f:t* OR g:t*'"}
  {1   "SELECT count(*) FROM t1 WHERE t1 MATCH 'a:t*'"}

  {2   "SELECT count(*) FROM t1 WHERE t1 MATCH 'c:the'"}
}

proc usage {} {
  global Q
  puts stderr "Usage: $::argv0 DATABASE QUERY"
  puts stderr ""
Changes to ext/fts5/tool/fts5txt2db.tcl.


1


2






















3











4

5

6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110

111
112
113

114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159

160
161
162
163
164
165
166




























#-------------------------------------------------------------------------











# Command line options processor.

#

proc command_line_error {O E {msg ""}} {
  if {$msg != ""} {
    puts stderr "Error: $msg"
    puts stderr ""
  }

  set L [list]
  foreach o $O {
    if {[llength $o]==1} {
      lappend L [string toupper $o]
    }
  }

  puts stderr "Usage: $::argv0 ?SWITCHES? $L"
  puts stderr ""
  puts stderr "Switches are:"
  foreach o $O {
    if {[llength $o]==3} {
      foreach {a b c} $o {}
      puts stderr [format "    -%-15s %s (default \"%s\")" "$a VAL" $c $b]
    } elseif {[llength $o]==2} {
      foreach {a b} $o {}
      puts stderr [format "    -%-15s %s" $a $b]
    }
  }
  puts stderr ""
  puts stderr $E
  exit -1
}

proc process_command_line {avar lArgs O E} {

  upvar $avar A
  set zTrailing ""       ;# True if ... is present in $O
  set lPosargs [list]

  # Populate A() with default values. Also, for each switch in the command
  # line spec, set an entry in the idx() array as follows:
  #
  #  {tblname t1 "table name to use"}  
  #      -> [set idx(-tblname) {tblname t1 "table name to use"}  
  #
  # For each position parameter, append its name to $lPosargs. If the ...
  # specifier is present, set $zTrailing to the name of the prefix.
  #
  foreach o $O {
    set nm [lindex $o 0]
    set nArg [llength $o]
    switch -- $nArg {
      1 {
        if {[string range $nm end-2 end]=="..."} {
          set zTrailing [string range $nm 0 end-3]
        } else {
          lappend lPosargs $nm
        }
      }
      2 {
        set A($nm) 0
        set idx(-$nm) $o
      }
      3 {
        set A($nm) [lindex $o 1]
        set idx(-$nm) $o
      }
      default {
        error "Error in command line specification"
      }
    }
  }

  # Set explicitly specified option values
  #
  set nArg [llength $lArgs]
  for {set i 0} {$i < $nArg} {incr i} {
    set opt [lindex $lArgs $i]
    if {[string range $opt 0 0]!="-" || $opt=="--"} break
    set c [array names idx "${opt}*"]
    if {[llength $c]==0} { command_line_error $O $E "Unrecognized option: $opt"}
    if {[llength $c]>1}  { command_line_error $O $E "Ambiguous option: $opt"}

    if {[llength $idx($c)]==3} {
      if {$i==[llength $lArgs]-1} {
        command_line_error $O $E "Option requires argument: $c" 
      }
      incr i
      set A([lindex $idx($c) 0]) [lindex $lArgs $i]
    } else {
      set A([lindex $idx($c) 0]) 1
    }
  }

  # Deal with position arguments.
  #
  set nPosarg [llength $lPosargs]
  set nRem [expr $nArg - $i]
  if {$nRem < $nPosarg || ($zTrailing=="" && $nRem > $nPosarg)} {
    command_line_error $O $E
  }
  for {set j 0} {$j < $nPosarg} {incr j} {
    set A([lindex $lPosargs $j]) [lindex $lArgs [expr $j+$i]]
  }
  if {$zTrailing!=""} {
    set A($zTrailing) [lrange $lArgs [expr $j+$i] end]
  }
}

# End of command line options processor.
#-------------------------------------------------------------------------



process_command_line A $argv {
  {fts5                 "use fts5"}
  {fts4                 "use fts4"}
  {colsize   "10 10 10" "list of column sizes"}
  {tblname   "t1"       "table name to create"}
  {detail    "full"     "Fts5 detail mode to use"}
  {repeat    1          "Load each file this many times"}
  database
  file...
} {
This script is designed to create fts4/5 tables with more than one column.
The -colsize option should be set to a Tcl list of integer values, one for
each column in the table. Each value is the number of tokens that will be
inserted into the column value for each row. For example, setting the -colsize
option to "5 10" creates an FTS table with 2 columns, with roughly 5 and 10
tokens per row in each, respectively.

Each "FILE" argument should be a text file. The contents of these text files is
split on whitespace characters to form a list of tokens. The first N1 tokens
are used for the first column of the first row, where N1 is the first element
of the -colsize list. The next N2 are used for the second column of the first
row, and so on. Rows are added to the table until the entire list of tokens
is exhausted.
}

if {$A(fts4)} {
  set A(fts) fts4
} else {
  set A(fts) fts5
}

sqlite3 db $A(database)

# Create the FTS table in the db. Return a list of the table columns.
#
proc create_table {} {
  global A
  set cols [list a b c d e f g h i j k l m n o p q r s t u v w x y z]

  set nCol [llength $A(colsize)]
  set cols [lrange $cols 0 [expr $nCol-1]]

  set sql    "CREATE VIRTUAL TABLE IF NOT EXISTS $A(tblname) USING $A(fts) ("
  append sql [join $cols ,]
  if {$A(fts)=="fts5"} { append sql ",detail=$A(detail));" }


  db eval $sql
  return $cols
}

# Return a list of tokens from the named file.
#
>
>
|
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
|
>

>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>

<
|
>

|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<

<
<
<
<
<
<
<
|



















|
>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75

76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150

151
152
153
154















155







156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
##########################################################################
# 2016 Jan 27
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
proc process_cmdline {} { 
  cmdline::process ::A $::argv {
    {fts5                 "use fts5 (this is the default)"}
    {fts4                 "use fts4"}
    {colsize   "10 10 10" "list of column sizes"}
    {tblname   "t1"       "table name to create"}
    {detail    "full"     "Fts5 detail mode to use"}
    {repeat    1          "Load each file this many times"}
    {prefix    ""         "Fts prefix= option"}
    database
    file...
  } {
  This script is designed to create fts4/5 tables with more than one column.
  The -colsize option should be set to a Tcl list of integer values, one for
  each column in the table. Each value is the number of tokens that will be
  inserted into the column value for each row. For example, setting the -colsize
  option to "5 10" creates an FTS table with 2 columns, with roughly 5 and 10
  tokens per row in each, respectively.
  
  Each "FILE" argument should be a text file. The contents of these text files
  is split on whitespace characters to form a list of tokens. The first N1
  tokens are used for the first column of the first row, where N1 is the first
  element of the -colsize list. The next N2 are used for the second column of
  the first row, and so on. Rows are added to the table until the entire list
  of tokens is exhausted.
  }
}

###########################################################################
###########################################################################
# Command line options processor. This is generic code that can be copied
# between scripts.
#
namespace eval cmdline {
  proc cmdline_error {O E {msg ""}} {
    if {$msg != ""} {
      puts stderr "Error: $msg"
      puts stderr ""
    }
  
    set L [list]
    foreach o $O {
      if {[llength $o]==1} {
        lappend L [string toupper $o]
      }
    }
  
    puts stderr "Usage: $::argv0 ?SWITCHES? $L"
    puts stderr ""
    puts stderr "Switches are:"
    foreach o $O {
      if {[llength $o]==3} {
        foreach {a b c} $o {}
        puts stderr [format "    -%-15s %s (default \"%s\")" "$a VAL" $c $b]
      } elseif {[llength $o]==2} {
        foreach {a b} $o {}
        puts stderr [format "    -%-15s %s" $a $b]
      }
    }
    puts stderr ""
    puts stderr $E
    exit -1
  }
  
  proc process {avar lArgs O E} {

    upvar $avar A
    set zTrailing ""       ;# True if ... is present in $O
    set lPosargs [list]
  
    # Populate A() with default values. Also, for each switch in the command
    # line spec, set an entry in the idx() array as follows:
    #
    #  {tblname t1 "table name to use"}  
    #      -> [set idx(-tblname) {tblname t1 "table name to use"}  
    #
    # For each position parameter, append its name to $lPosargs. If the ...
    # specifier is present, set $zTrailing to the name of the prefix.
    #
    foreach o $O {
      set nm [lindex $o 0]
      set nArg [llength $o]
      switch -- $nArg {
        1 {
          if {[string range $nm end-2 end]=="..."} {
            set zTrailing [string range $nm 0 end-3]
          } else {
            lappend lPosargs $nm
          }
        }
        2 {
          set A($nm) 0
          set idx(-$nm) $o
        }
        3 {
          set A($nm) [lindex $o 1]
          set idx(-$nm) $o
        }
        default {
          error "Error in command line specification"
        }
      }
    }
  
    # Set explicitly specified option values
    #
    set nArg [llength $lArgs]
    for {set i 0} {$i < $nArg} {incr i} {
      set opt [lindex $lArgs $i]
      if {[string range $opt 0 0]!="-" || $opt=="--"} break
      set c [array names idx "${opt}*"]
      if {[llength $c]==0} { cmdline_error $O $E "Unrecognized option: $opt"}
      if {[llength $c]>1}  { cmdline_error $O $E "Ambiguous option: $opt"}
  
      if {[llength $idx($c)]==3} {
        if {$i==[llength $lArgs]-1} {
          cmdline_error $O $E "Option requires argument: $c" 
        }
        incr i
        set A([lindex $idx($c) 0]) [lindex $lArgs $i]
      } else {
        set A([lindex $idx($c) 0]) 1
      }
    }
  
    # Deal with position arguments.
    #
    set nPosarg [llength $lPosargs]
    set nRem [expr $nArg - $i]
    if {$nRem < $nPosarg || ($zTrailing=="" && $nRem > $nPosarg)} {
      cmdline_error $O $E
    }
    for {set j 0} {$j < $nPosarg} {incr j} {
      set A([lindex $lPosargs $j]) [lindex $lArgs [expr $j+$i]]
    }
    if {$zTrailing!=""} {
      set A($zTrailing) [lrange $lArgs [expr $j+$i] end]
    }
  }
} ;# namespace eval cmdline
# End of command line options processor.

###########################################################################
###########################################################################

process_cmdline























# If -fts4 was specified, use fts4. Otherwise, fts5.
if {$A(fts4)} {
  set A(fts) fts4
} else {
  set A(fts) fts5
}

sqlite3 db $A(database)

# Create the FTS table in the db. Return a list of the table columns.
#
proc create_table {} {
  global A
  set cols [list a b c d e f g h i j k l m n o p q r s t u v w x y z]

  set nCol [llength $A(colsize)]
  set cols [lrange $cols 0 [expr $nCol-1]]

  set sql    "CREATE VIRTUAL TABLE IF NOT EXISTS $A(tblname) USING $A(fts) ("
  append sql [join $cols ,]
  if {$A(fts)=="fts5"} { append sql ",detail=$A(detail)" }
  append sql ", prefix='$A(prefix)');"

  db eval $sql
  return $cols
}

# Return a list of tokens from the named file.
#