SQLite

Check-in [44f1ce30d1]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add fts5txt2db.tcl, a tool for creating sample fts4/5 databases from text files.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 44f1ce30d1e446c9ee5f8bd8a62119e00356aa0e
User & Date: dan 2015-10-05 11:57:09.832
Context
2015-10-05
15:39
Update fts3 so that expressions to the left and right of a NOT operator are balanced. This prevents relatively small expressions (a dozen terms or so) that are children of NOT operators from triggering the "expression tree is too large" error. (check-in: d6b66cd7b8 user: dan tags: trunk)
11:57
Add fts5txt2db.tcl, a tool for creating sample fts4/5 databases from text files. (check-in: 44f1ce30d1 user: dan tags: trunk)
2015-10-03
15:38
Update fts5 to support the table function syntax. "... FROM fts5_tbl WHERE fts5_tbl MATCH ?1 AND rank MATCH ?1" can now be written "FROM fts5_tbl(?1, ?2)". (check-in: 41d17d9e24 user: dan tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Added ext/fts5/tool/fts5txt2db.tcl.














































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135


proc usage {} {
  puts stderr "$::argv0 ?OPTIONS? DATABASE FILE1..."
  puts stderr ""
  puts stderr "Options are"
  puts stderr "  -fts5"
  puts stderr "  -fts4"
  puts stderr "  -colsize <list of column sizes>"
  puts stderr {
This script is designed to create fts4/5 tables with more than one column.
The -colsize option should be set to a Tcl list of integer values, one for
each column in the table. Each value is the number of tokens that will be
inserted into the column value for each row. For example, setting the -colsize
option to "5 10" creates an FTS table with 2 columns, with roughly 5 and 10
tokens per row in each, respectively.

Each "FILE" argument should be a text file. The contents of these text files is
split on whitespace characters to form a list of tokens. The first N1 tokens
are used for the first column of the first row, where N1 is the first element
of the -colsize list. The next N2 are used for the second column of the first
row, and so on. Rows are added to the table until the entire list of tokens
is exhausted.
}
  exit -1
}

set O(aColsize)       [list 10 10 10]
set O(tblname)        t1
set O(fts)            fts5


set options_with_values {-colsize}

for {set i 0} {$i < [llength $argv]} {incr i} {
  set opt [lindex $argv $i]
  if {[string range $opt 0 0]!="-"} break

  if {[lsearch $options_with_values $opt]>=0} {
    incr i
    if {$i==[llength $argv]} usage
    set val [lindex $argv $i]
  }

  switch -- $opt {
    -colsize {
      set O(aColSize) $val
    }

    -fts4 {
      set O(fts) fts4
    }

    -fts5 {
      set O(fts) fts5
    }
  }
}

if {$i > [llength $argv]-2} usage
set O(db) [lindex $argv $i]
set O(files) [lrange $argv [expr $i+1] end]

foreach {k v} [lrange $argv 0 end-2] {
  switch -- $k {
    -colsize {
      set O(aColSize) $v
    }

    -colsize {
      set O(aColSize) $v
    }
  }

}

sqlite3 db $O(db)
load_static_extension db fts5


# Create the FTS table in the db. Return a list of the table columns.
#
proc create_table {} {
  global O
  set cols [list a b c d e f g h i j k l m n o p q r s t u v w x y z]

  set nCol [llength $O(aColsize)]
  set cols [lrange $cols 0 [expr $nCol-1]]

  set sql    "CREATE VIRTUAL TABLE IF NOT EXISTS $O(tblname) USING $O(fts) ("
  append sql [join $cols ,]
  append sql ");"

  db eval $sql
  return $cols
}

# Return a list of tokens from the named file.
#
proc readfile {file} {
  set fd [open $file]
  set data [read $fd]
  close $fd
  split $data
}


# Load all the data into a big list of tokens.
#
set tokens [list]
foreach f $O(files) {
  set tokens [concat $tokens [readfile $f]]
}

set N [llength $tokens]
set i 0
set cols [create_table]
set sql "INSERT INTO $O(tblname) VALUES(\$[lindex $cols 0]"
foreach c [lrange $cols 1 end] {
  append sql ", \$$c"
}
append sql ")"

db eval BEGIN
  while {$i < $N} {
    foreach c $cols s $O(aColsize) {
      set $c [lrange $tokens $i [expr $i+$s-1]]
      incr i $s
    }
    db eval $sql
  }
db eval COMMIT