Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Performance enhancement for fts5 column filter queries on detail=full tables. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
13fb4aa6a87c5c6258979953da82eedc |
User & Date: | dan 2016-01-30 19:16:11.820 |
Context
2016-01-30
| ||
21:09 | Fix new test cases in stat.test so that they work with -DSQLITE_DEFAULT_AUTOVACUUM=1 builds. (check-in: a2810cf65d user: dan tags: trunk) | |
19:16 | Performance enhancement for fts5 column filter queries on detail=full tables. (check-in: 13fb4aa6a8 user: dan tags: trunk) | |
16:59 | Merge the implementation of OP_IdxRowid and OP_Seek so that OP_Seek no longer requires the rowid register and a separate OP_IdxRowid call. Shorter and faster prepared statements result. (check-in: 9bec50a1e7 user: drh tags: trunk) | |
Changes
Changes to ext/fts5/fts5_index.c.
︙ | ︙ | |||
4329 4330 4331 4332 4333 4334 4335 | const u8 **pa, /* IN/OUT: Pointer to poslist */ int n, /* IN: Size of poslist in bytes */ int iCol /* Column to extract from poslist */ ){ int iCurrent = 0; /* Anything before the first 0x01 is col 0 */ const u8 *p = *pa; const u8 *pEnd = &p[n]; /* One byte past end of position list */ | < | > > > | | | > > > | > < | | > | 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 | const u8 **pa, /* IN/OUT: Pointer to poslist */ int n, /* IN: Size of poslist in bytes */ int iCol /* Column to extract from poslist */ ){ int iCurrent = 0; /* Anything before the first 0x01 is col 0 */ const u8 *p = *pa; const u8 *pEnd = &p[n]; /* One byte past end of position list */ while( iCol>iCurrent ){ /* Advance pointer p until it points to pEnd or an 0x01 byte that is ** not part of a varint. Note that it is not possible for a negative ** or extremely large varint to occur within an uncorrupted position ** list. So the last byte of each varint may be assumed to have a clear ** 0x80 bit. */ while( *p!=0x01 ){ while( *p++ & 0x80 ); if( p>=pEnd ) return 0; } *pa = p++; iCurrent = *p++; if( iCurrent & 0x80 ){ p--; p += fts5GetVarint32(p, iCurrent); } } if( iCol!=iCurrent ) return 0; /* Advance pointer p until it points to pEnd or an 0x01 byte that is ** not part of a varint */ while( p<pEnd && *p!=0x01 ){ while( *p++ & 0x80 ); } return p - (*pa); } static int fts5AppendRowid( Fts5Index *p, i64 iDelta, Fts5Iter *pMulti, |
︙ | ︙ |
Changes to ext/fts5/tool/fts5speed.tcl.
1 2 3 4 5 6 7 8 9 10 11 12 13 | set Q { {1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'enron'"} {25 "SELECT count(*) FROM t1 WHERE t1 MATCH 'hours'"} {300 "SELECT count(*) FROM t1 WHERE t1 MATCH 'acid'"} {100 "SELECT count(*) FROM t1 WHERE t1 MATCH 'loaned OR mobility OR popcore OR sunk'"} {100 "SELECT count(*) FROM t1 WHERE t1 MATCH 'enron AND myapps'"} {1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'en* AND my*'"} {1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'c:t*'"} {1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'a:t* OR b:t* OR c:t* OR d:t* OR e:t* OR f:t* OR g:t*'"} {1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'a:t*'"} | < | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | set Q { {1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'enron'"} {25 "SELECT count(*) FROM t1 WHERE t1 MATCH 'hours'"} {300 "SELECT count(*) FROM t1 WHERE t1 MATCH 'acid'"} {100 "SELECT count(*) FROM t1 WHERE t1 MATCH 'loaned OR mobility OR popcore OR sunk'"} {100 "SELECT count(*) FROM t1 WHERE t1 MATCH 'enron AND myapps'"} {1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'en* AND my*'"} {1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'c:t*'"} {1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'a:t* OR b:t* OR c:t* OR d:t* OR e:t* OR f:t* OR g:t*'"} {1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'a:t*'"} {2 "SELECT count(*) FROM t1 WHERE t1 MATCH 'c:the'"} } proc usage {} { global Q puts stderr "Usage: $::argv0 DATABASE QUERY" puts stderr "" |
︙ | ︙ |
Changes to ext/fts5/tool/fts5txt2db.tcl.
|
| > > | > > | > > > > > > > > > > > > > > > > > > > > > > | > > > > > > > > > > > | > > | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | < | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | > < | > | < < < < < < < < < < < < < < < < < < < < < < | | > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 | ########################################################################## # 2016 Jan 27 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # proc process_cmdline {} { cmdline::process ::A $::argv { {fts5 "use fts5 (this is the default)"} {fts4 "use fts4"} {colsize "10 10 10" "list of column sizes"} {tblname "t1" "table name to create"} {detail "full" "Fts5 detail mode to use"} {repeat 1 "Load each file this many times"} {prefix "" "Fts prefix= option"} database file... } { This script is designed to create fts4/5 tables with more than one column. The -colsize option should be set to a Tcl list of integer values, one for each column in the table. Each value is the number of tokens that will be inserted into the column value for each row. For example, setting the -colsize option to "5 10" creates an FTS table with 2 columns, with roughly 5 and 10 tokens per row in each, respectively. Each "FILE" argument should be a text file. The contents of these text files is split on whitespace characters to form a list of tokens. The first N1 tokens are used for the first column of the first row, where N1 is the first element of the -colsize list. The next N2 are used for the second column of the first row, and so on. Rows are added to the table until the entire list of tokens is exhausted. } } ########################################################################### ########################################################################### # Command line options processor. This is generic code that can be copied # between scripts. # namespace eval cmdline { proc cmdline_error {O E {msg ""}} { if {$msg != ""} { puts stderr "Error: $msg" puts stderr "" } set L [list] foreach o $O { if {[llength $o]==1} { lappend L [string toupper $o] } } puts stderr "Usage: $::argv0 ?SWITCHES? $L" puts stderr "" puts stderr "Switches are:" foreach o $O { if {[llength $o]==3} { foreach {a b c} $o {} puts stderr [format " -%-15s %s (default \"%s\")" "$a VAL" $c $b] } elseif {[llength $o]==2} { foreach {a b} $o {} puts stderr [format " -%-15s %s" $a $b] } } puts stderr "" puts stderr $E exit -1 } proc process {avar lArgs O E} { upvar $avar A set zTrailing "" ;# True if ... is present in $O set lPosargs [list] # Populate A() with default values. Also, for each switch in the command # line spec, set an entry in the idx() array as follows: # # {tblname t1 "table name to use"} # -> [set idx(-tblname) {tblname t1 "table name to use"} # # For each position parameter, append its name to $lPosargs. If the ... # specifier is present, set $zTrailing to the name of the prefix. # foreach o $O { set nm [lindex $o 0] set nArg [llength $o] switch -- $nArg { 1 { if {[string range $nm end-2 end]=="..."} { set zTrailing [string range $nm 0 end-3] } else { lappend lPosargs $nm } } 2 { set A($nm) 0 set idx(-$nm) $o } 3 { set A($nm) [lindex $o 1] set idx(-$nm) $o } default { error "Error in command line specification" } } } # Set explicitly specified option values # set nArg [llength $lArgs] for {set i 0} {$i < $nArg} {incr i} { set opt [lindex $lArgs $i] if {[string range $opt 0 0]!="-" || $opt=="--"} break set c [array names idx "${opt}*"] if {[llength $c]==0} { cmdline_error $O $E "Unrecognized option: $opt"} if {[llength $c]>1} { cmdline_error $O $E "Ambiguous option: $opt"} if {[llength $idx($c)]==3} { if {$i==[llength $lArgs]-1} { cmdline_error $O $E "Option requires argument: $c" } incr i set A([lindex $idx($c) 0]) [lindex $lArgs $i] } else { set A([lindex $idx($c) 0]) 1 } } # Deal with position arguments. # set nPosarg [llength $lPosargs] set nRem [expr $nArg - $i] if {$nRem < $nPosarg || ($zTrailing=="" && $nRem > $nPosarg)} { cmdline_error $O $E } for {set j 0} {$j < $nPosarg} {incr j} { set A([lindex $lPosargs $j]) [lindex $lArgs [expr $j+$i]] } if {$zTrailing!=""} { set A($zTrailing) [lrange $lArgs [expr $j+$i] end] } } } ;# namespace eval cmdline # End of command line options processor. ########################################################################### ########################################################################### process_cmdline # If -fts4 was specified, use fts4. Otherwise, fts5. if {$A(fts4)} { set A(fts) fts4 } else { set A(fts) fts5 } sqlite3 db $A(database) # Create the FTS table in the db. Return a list of the table columns. # proc create_table {} { global A set cols [list a b c d e f g h i j k l m n o p q r s t u v w x y z] set nCol [llength $A(colsize)] set cols [lrange $cols 0 [expr $nCol-1]] set sql "CREATE VIRTUAL TABLE IF NOT EXISTS $A(tblname) USING $A(fts) (" append sql [join $cols ,] if {$A(fts)=="fts5"} { append sql ",detail=$A(detail)" } append sql ", prefix='$A(prefix)');" db eval $sql return $cols } # Return a list of tokens from the named file. # |
︙ | ︙ |