Documentation Source Text

Check-in [e3ae335670]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Change the way the fts3.html document is generated to be more similar to most of the other documents.
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: e3ae335670ce7c246524171e47bdbda64d361695
User & Date: dan 2010-01-20 18:32:01
Context
2010-01-21
06:06
Change foreignkeys.in to be more similar to other documentation files. check-in: 7889bfe143 user: dan tags: trunk
2010-01-20
18:32
Change the way the fts3.html document is generated to be more similar to most of the other documents. check-in: e3ae335670 user: dan tags: trunk
05:52
Fix typo in fts3.html. check-in: 897b36953b user: dan tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to main.mk.

51
52
53
54
55
56
57
58
59
60
61
62
63
64
65

# This rule generates all documention files from their sources.  The
# special markup on HTML files used to identify testable statements and
# requirements are retained in the HTML and so the HTML generated by
# this rule is not suitable for publication.  This is the first step
# only.
#
base:	tclsh sqlite3.h docdir always
	rm -rf doc/images
	cp -r $(DOC)/images doc
	cp $(SRC)/art/*.gif doc/images
	mkdir doc/images/syntax
	cp $(DOC)/art/syntax/*.gif doc/images/syntax
	cp $(DOC)/rawpages/* doc
	./tclsh $(DOC)/wrap.tcl $(DOC) $(SRC) doc $(DOC)/pages/*.in







|







51
52
53
54
55
56
57
58
59
60
61
62
63
64
65

# This rule generates all documention files from their sources.  The
# special markup on HTML files used to identify testable statements and
# requirements are retained in the HTML and so the HTML generated by
# this rule is not suitable for publication.  This is the first step
# only.
#
base:	tclsh sqlite3.h docdir always parsehtml.so
	rm -rf doc/images
	cp -r $(DOC)/images doc
	cp $(SRC)/art/*.gif doc/images
	mkdir doc/images/syntax
	cp $(DOC)/art/syntax/*.gif doc/images/syntax
	cp $(DOC)/rawpages/* doc
	./tclsh $(DOC)/wrap.tcl $(DOC) $(SRC) doc $(DOC)/pages/*.in

Changes to pages/fancyformat.tcl.

230
231
232
233
234
235
236


































































































































































































    <div style="font-size:1.5em;margin:1em;color:#80a796">Table Of Contents</div>
    <div id=toc>
      $::TOC
    </div id>
    [FixReferences $body]
  }]
}









































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
    <div style="font-size:1.5em;margin:1em;color:#80a796">Table Of Contents</div>
    <div id=toc>
      $::TOC
    </div id>
    [FixReferences $body]
  }]
}


proc addtoc_cb {tag details} {
  upvar #0 ::Addtoc G
  switch -glob -- $tag {

    "" { ;# Text node. Copy the text to the output. And the TOC, if applicable.
      if {$G(inCodeblock)} { 
        append G(codeblock) $details
      } else {
        append G(doc) $details
        if {$G(inHeading)} { append G(toc) $details }
        if {$G(inTitle)}   { append G(title) $details }
      }
    }

    h[1-6] { ;# A heading.
      array set D $details
      set level [string range $tag 1 end]

      set HN ""
      if {![info exists D(notoc)]} { 
        if {![info exists D(nonumber)]} { set HN [headingnumber $level] }

        # If the heading does not have an 'id' attribute, generate one.
        if {[info exists D(id)]==0} {
          if {$HN != ""} { 
            set D(id) "section_[string map {. _} [string range $HN 0 end-1]]" 
          } else {
            set D(id) "notoc[incr G(notoccounter)]"
          }
        }

        # Append the entry to the table-of-contents.
        append G(toc) "<div style=\"margin-left:[expr $level*6]ex\">"
        append G(toc) "<a href=\"#$D(id)\">$HN "
        set G(inHeading) 1
      }
      catch { unset D(nonumber) }
      catch { unset D(notoc) }

      # If there is a "tags" attribute, then add an [hd_fragment] command
      # to the output.
      if {[info exists D(tags)]} {
        append G(doc) "<tcl>[list set ::hd(fragment) $D(id)]</tcl>"
        foreach t [split $D(tags) ,] {
          append G(doc) "<tcl>[list hd_keywords [string trim $t]]</tcl>"
        }
        unset D(tags)
      }

      append G(doc) [formattag $tag [array get D]]
      append G(doc) "$HN "
    }

    /h[1-6] { ;# End of current heading.
      if {$::Addtoc(inHeading)} {
        append G(toc) "</a></div>"
      }
      set G(inHeading) 0
      append G(doc) [formattag $tag $details]
    }

    title  { 
      set G(inTitle) 1
      append G(doc) [formattag $tag $details]
    }
    /title { 
      set G(inTitle) 0
      append G(doc) [formattag $tag $details]
    }

    codeblock  { set G(inCodeblock) 1 }
    /codeblock { 
      append G(doc) [Code $G(codeblock)]
      set G(codeblock) "" 
      set G(inCodeblock) 0 
    }

    table {
      catch {array unset D} 
      array set D $details
      if {[info exists D(striped)]} {
        unset D(striped)
        set D(style) "margin:1em auto; width:80%; border-spacing:0"
        set G(inStripedTable) 1
      }
      append G(doc) [formattag $tag [array get D]]
    }
    /table { 
      set G(inStripedTable) 0 
      append G(doc) [formattag $tag [array get D]]
    }
    tr {
      catch {array unset D} 
      array set D $details
      switch $G(inStripedTable) {
        1 {
          set D(style) "text-align:left"
          set G(inStripedTable) 2
        }
        2 {
          set D(style) "text-align:left;background-color:#DDDDDD"
          set G(inStripedTable) 1
        }
      }
      append G(doc) [formattag $tag [array get D]]
    }

    default {
      if {$G(inCodeblock)} { 
        append G(codeblock) [formattag $tag $details]
      } else {
        append G(doc) [formattag $tag $details]
      }
    }
  }
}

proc formattag {tag details} {
  set ret "<$tag"
  foreach {key value} $details {
    append ret " $key=\"$value\""
  }
  append ret ">"
  set ret
}

proc headingnumber {level} {
  upvar #0 ::Addtoc G
  set ret ""
  incr G(heading:$level)
  for {set i 1} {$i < 6} {incr i} {
    if {$i > $level} { 
      set G(heading:$i) 0 
    } else {
      append ret "$G(heading:$i)."
    }
  }
  set ret
}

proc addtoc {zDoc} {
  # If the extension with the [parsehtml] command has not been loaded,
  # load it now.
  #
  if {[info commands parsehtml] == ""} { load ./parsehtml.so }

  # These variables are all used to store state between invocations of
  # the [parsehtml] callback used to do preprocessing.
  #
  set ::Addtoc(heading:1) 0
  set ::Addtoc(heading:2) 0
  set ::Addtoc(heading:3) 0
  set ::Addtoc(heading:4) 0
  set ::Addtoc(heading:5) 0
  set ::Addtoc(heading:6) 0
  set ::Addtoc(inHeading) 0
  set ::Addtoc(inTitle) 0
  set ::Addtoc(inCodeblock) 0
  set ::Addtoc(inStripedTable) 0
  set ::Addtoc(notoccounter) 0
  set ::Addtoc(codeblock) ""

  # The following three are set by the [parsehtml] callback. The title,
  # table-of-contents and text of the pre-processed document.
  #
  set ::Addtoc(title) ""
  set ::Addtoc(toc) ""
  set ::Addtoc(doc) ""

  parsehtml $zDoc addtoc_cb

  # Variable $toc is set to the HTML text for the table of contents. The
  # text "<table_of_contents>" in the input file will be replaced by
  # this text. The "<div class=startsearch>" tag tells the script that 
  # builds the site-search database not to index any text that occurs
  # before it. This stops the table of contents from being used for 
  # snippets on search results pages.
  #
  set toc [subst {
    <div class=fancy>
    <div style="font-size:2em;text-align:center;color:#80a796">
      $::Addtoc(title)
    </div>
    <div style="font-size:1.5em;margin:1em;color:#80a796">
      Table Of Contents</div>
    <div id=toc> $::Addtoc(toc) </div>
    <div class=startsearch></div>
  }]

  string map [list <table_of_contents> $toc] $::Addtoc(doc)
}

Changes to pages/fts3.in.

1
2
3
4
5
6
7


8
9
10
11
12
13
14
15
..
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
..
33
34
35
36
37
38
39
40
41
42
43

44
45
46
47
48
49
50
51
52
53
54

55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
..
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101

102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129

130
131
132
133
134
135
136
137
138
139

140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
...
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
...
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
...
204
205
206
207
208
209
210
211
212
213
214

215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
...
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
...
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
...
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405

406
407
408
409
410
411
412
413
414

415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439

440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
...
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579

580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
...
612
613
614
615
616
617
618
619
620

621
622
623
624
625
626
627
628
...
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
...
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
...
704
705
706
707
708
709
710
711

712
713
714
715
716
717
718
719
720
721
...
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
...
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
...
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
...
874
875
876
877
878
879
880
881

882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
...
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
...
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
....
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078

1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
....
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
....
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180

1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
....
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218

1219
1220
1221
1222
1223
1224
1225
....
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
....
1285
1286
1287
1288
1289
1290
1291
1292

1293
1294
1295
1296
1297

1298
1299
1300
1301
1302
1303
1304
....
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
....
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
....
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
....
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
....
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
....
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507


1508

1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521


1522

1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
....
1546
1547
1548
1549
1550
1551
1552


1553

1554


1555

1556
1557
1558
1559
1560
1561
1562
1563

1564

1565
1566
1567
1568
1569
1570
1571
....
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602

1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
....
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
....
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
....
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
....
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817

<title>SQLite FTS3 Extension</title>
<tcl>

hd_keywords *fts3 FTS3
source [file join $::DOC pages fancyformat.tcl]
fancyformat_document "SQLite FTS3 Extension" {} {



<h2 style="margin-left:1.0em"> Overview</h2>

<p>
  FTS3 is an SQLite virtual table module that allows users to perform 
  full-text searches on a set of documents. The most common (and effective) 
  way to describe full-text searches is "what Google, Yahoo and Altavista do
  with documents placed on the World Wide Web". Users input a term, or series 
  of terms, perhaps connected by a binary operator or grouped together into a 
................................................................................
  specified. This document describes the deployment and usage of FTS3.

<p>
  Portions of the original FTS3 code were contributed to the SQLite project 
  by Scott Hess of <a href="http://www.google.com">Google</a>. It is now 
  developed and maintained as part of SQLite.

[h1 "Introduction to FTS3"]

<p>
  The FTS3 extension module allows users to create special tables with a 
  built-in full-text index (hereafter "FTS3 tables"). The full-text index
  allows the user to efficiently query the database for all rows that contain
  one or more instances specified word (hereafter a "token", even if the table
  contains many large documents.
................................................................................

<p>
  For example, if each of the 517430 documents in the 
  "<a href="http://www.cs.cmu.edu/~enron/">Enron E-Mail Dataset</a>"
  is inserted into both the FTS3 table and the ordinary SQLite table
  created using the following SQL script:

[Code {
  CREATE VIRTUAL TABLE enrondata1 USING fts3(content TEXT);     /* FTS3 table */
  CREATE TABLE enrondata2(content TEXT);                        /* Ordinary table */
}]


<p>
  Then either of the two queries below may be executed to find the number of
  documents in the database that contain the word "linux" (351). Using one
  desktop PC hardware configuration, the query on the FTS3 table returns in
  approximately 0.03 seconds, versus 22.5 for querying the ordinary table.

[Code {
  SELECT count(*) FROM enrondata1 WHERE content MATCH 'linux';  /* 0.03 seconds */
  SELECT count(*) FROM enrondata2 WHERE content LIKE '%linux%'; /* 22.5 seconds */
}]


<p>
  Of course, the two queries above are not entirely equivalent. For example
  the LIKE query matches rows that contain terms such as "linuxophobe"
  or "EnterpriseLinux" (as it happens, the Enron E-Mail Dataset does not
  actually contain any such terms), whereas the MATCH query on the FTS3 table
  selects only those rows that contain "linux" as a discrete token. Both 
  searches are case-insensitive. The FTS3 table consumes around 2006 MB on
  disk compared to just 1453 MB for the ordinary table. Using the same
  hardware configuration used to perform the SELECT queries above, the FTS3
  table took just under 31 minutes to populate, versus 25 for the ordinary
  table.

[h2 "Creating and Destroying FTS3 Tables"]

<p>
  Like other virtual table types, new FTS3 tables are created using a 
  \[CREATE VIRTUAL TABLE\] statement. The module name, which follows
  the USING keyword, is "fts3". The virtual table module arguments may
  be left empty, in which case an FTS3 table with a single user-defined 
  column named "content" is created. Alternatively, the module arguments
  may be passed a list of comma separated column names. 

<p>
  If column names are explicitly provided for the FTS3 table as part of
................................................................................
  the CREATE VIRTUAL TABLE statement, then a datatype name may be optionally 
  specified for each column. However, this is pure syntactic sugar, the
  supplied typenames are not used by FTS3 or the SQLite core for any
  purpose. The same applies to any constraints specified along with an
  FTS3 column name - they are parsed but not used or recorded by the system
  in any way.

[Code {
  <i>-- Create an FTS3 table named "data" with one column - "content":</i>
  CREATE VIRTUAL TABLE data USING fts3();

  <i>-- Create an FTS3 table named "pages" with three columns:</i>
  CREATE VIRTUAL TABLE pages USING fts3(title, keywords, body);

  <i>-- Create an FTS3 table named "mail" with two columns. Datatypes
  -- and column constraints are specified along with each column. These
  -- are completely ignored by FTS3 and SQLite. </i>
  CREATE VIRTUAL TABLE mail USING fts3(
    subject VARCHAR(256) NOT NULL,
    body TEXT CHECK(length(body)<10240)
  );
}]


<p>
  As well as a list of columns, the module arguments passed to a CREATE
  VIRTUAL TABLE statement used to create an FTS3 table may be used to specify
  a \[tokenizer\]. This is done by specifying a string of the form
  "tokenize=&lt;tokenizer name&gt; &lt;tokenizer args&gt;" in place of a column
  name, where &lt;tokenizer name&gt; is the name of the tokenizer to use and
  &lt;tokenizer args&gt; is an optional list of whitespace separated qualifiers
  to pass to the tokenizer implementation. A tokenizer specification may be
  placed anywhere in the column list, but at most one tokenizer declaration is
  allowed for each CREATE VIRTUAL TABLE statement.  The second and subsequent
  tokenizer declaration are interpreted as column names. 
  \[tokenizer|See below\] for a detailed description of using (and, if
  necessary, implementing) a tokenizer.

[Code {
  <i>-- Create an FTS3 table named "papers" with two columns that uses</i>
  <i>-- the tokenizer "porter".</i>
  CREATE VIRTUAL TABLE papers USING fts3(author, document, tokenize=porter);

  <i>-- Create an FTS3 table with a single column - "content" - that uses</i>
  <i>-- the "simple" tokenizer.</i>
  CREATE VIRTUAL TABLE data USING fts3(tokenize=simple);

  <i>-- Create an FTS3 table with two columns that uses the "icu" tokenizer.</i>
  <i>-- The qualifier "en_AU" is passed to the tokenizer implementation</i>
  CREATE VIRTUAL TABLE names USING fts3(a, b, tokenize=icu en_AU);
}]


<p>
  FTS3 tables may be dropped from the database using an ordinary \[DROP TABLE\]
  statement. For example:

[Code {
  <i>-- Create, then immediately drop, an FTS3 table.</i>
  CREATE VIRTUAL TABLE data USING fts3();
  DROP TABLE data;
}]


[h2 "Populating FTS3 Tables"]

  <p>
    FTS3 tables are populated using \[INSERT\], \[UPDATE\] and \[DELETE\]
    statements in the same way as ordinary SQLite tables are.

  <p>
    As well as the columns named by the user (or the "content" column if no
    module arguments where specified as part of the \[CREATE VIRTUAL TABLE\] 
    statement), each FTS3 table has a "rowid" column. The rowid of an FTS3
    table behaves in the same way as the rowid column of an ordinary SQLite 
    table, except that the values stored in the rowid column of an FTS3 table 
    remain unchanged if the database is rebuilt using the \[VACUUM\] command. 
    For FTS3 tables, "docid" is allowed as an alias along with the usual "rowid",
    "oid" and "_oid_" identifiers. Attempting to insert or update a row with a 
    docid value that already exists in the table is an error, just as it would 
    be with an ordinary SQLite table.

  <p>
    There is one other subtle difference between "docid" and the normal SQLite
................................................................................
    aliases for the rowid column. Normally, if an INSERT or UPDATE statement 
    assigns discreet values to two or more aliases of the rowid column, SQLite 
    writes the rightmost of such values specified in the INSERT or UPDATE
    statement to the database. However, assigning a non-NULL value to both
    the "docid" and one or more of the SQLite rowid aliases when inserting or
    updating an FTS3 table is considered an error. See below for an example.

[Code {
  <i>-- Create an FTS3 table</i>
  CREATE VIRTUAL TABLE pages USING fts3(title, body);

  <i>-- Insert a row with a specific docid value.</i>
  INSERT INTO pages(docid, title, body) VALUES(53, 'Home Page', 'SQLite is a software...');

  <i>-- Insert a row and allow FTS3 to assign a docid value using the same algorithm as</i>
................................................................................

  <i>-- Delete the entire table contents.</i>
  DELETE FROM pages;

  <i>-- The following is an error. It is not possible to assign non-NULL values to both</i>
  <i>-- the rowid and docid columns of an FTS3 table.</i>
  INSERT INTO pages(rowid, docid, title, body) VALUES(1, 2, 'A title', 'A document body');
}]

  <p>
    To support full-text queries, FTS3 maintains an inverted index that maps
    from each unique term or word that appears in the dataset to the locations
    in which it appears within the table contents. For the curious, a 
    complete description of the \[segment btree|data structure\] used to store
    this index within the database file is described below. A feature of
    this data structure is that at any time the database may contain not
    one index b-tree, but several different b-trees that are incrementally
    merged as rows are inserted, updated and deleted. This technique improves 
    performance when writing to an FTS3 table, but causes some overhead for
    full-text queries that use the index. Executing an SQL statement of the
    form "INSERT INTO &lt;fts3-table&gt;(&lt;fts3-table&gt;) VALUES('optimize')"
................................................................................
    b-tree containing the entire index. This can be an expensive operation,
    but may speed up future queries. 

  <p>
    For example, to optimize the full-text index for an FTS3 table named
    "docs":

[Code {
  <i>-- Optimize the internal structure of FTS3 table "docs".</i>
  INSERT INTO docs(docs) VALUES('optimize');
}]


  <p>
    The statement above may appear syntacticly incorrect to some. Refer to
    the section describing the \[simple fts3 queries\] for an explanation.

  <p>
    There is another, deprecated, method for invoking the optimize 
    operation using a SELECT statement. New code should use statements
    similar to the INSERT above to optimize FTS3 structures.

[h2 "Querying FTS3 Tables" {} {simple fts3 queries}]

<p>
  As for all other SQLite tables, virtual or otherwise, data is retrieved
  from FTS3 tables using a \[SELECT\] statement.

<p>
  FTS3 tables can be queried efficiently using SELECT statements of two
  different forms:

<ul>
  <li><p>
    <b>Query by rowid</b>. If the WHERE clause of the SELECT statement
    contains a sub-clause of the form "rowid = ?", where ? is an SQL expression,
    FTS3 is able to retreive the requested row directly using the equivalent 
    of an SQLite \[INTEGER PRIMARY KEY\] index.

  <li><p>
    <b>Full-text query</b>. If the WHERE clause of the SELECT statement contains
    a sub-clause of the form "&lt;column&gt; MATCH ?", FTS3 is able to use 
    the built-in full-text index to restrict the search to those documents 
    that match the full-text query string specified as the right-hand operand
    of the MATCH clause.
................................................................................
<p>
  If neither of the two query strategies enumerated above can be used, all
  queries on FTS3 tables are implemented using a linear scan of the entire
  table. If the table contains large amounts of data, this may be an 
  impractically approach (the first example on this page shows that a linear
  scan of 1.5 GB of data takes around 30 seconds using a modern PC).

[Code {
  <i>-- The examples in this block assume the following FTS3 table:</i>
  CREATE VIRTUAL TABLE mail USING fts3(subject, body);

  SELECT * FROM mail WHERE rowid = 15;                <i>-- Fast. Rowid lookup.</i>
  SELECT * FROM mail WHERE body MATCH 'sqlite';       <i>-- Fast. Full-text query.</i>
  SELECT * FROM mail WHERE mail MATCH 'search';       <i>-- Fast. Full-text query.</i>
  SELECT * FROM mail WHERE rowid BETWEEN 15 AND 20;   <i>-- Slow. Linear scan.</i>
  SELECT * FROM mail WHERE subject = 'database';      <i>-- Slow. Linear scan.</i>
  SELECT * FROM mail WHERE subject MATCH 'database';  <i>-- Fast. Full-text query.</i>
}]

<p>
  In all of the full-text queries above, the right-hand operand of the MATCH
  operator is a string consisting of a single term. In this case, the MATCH
  expression evaluates to true for all documents that contain one or more 
  instances of the specified word ("sqlite", "search" or "database", depending 
  on which example you look at). Specifying a single term as the right-hand
  operand of the MATCH operator results in the simplest (and most common) type 
  of full-text query possible. However more complicated queries are possible,
  including phrase searches, term-prefix searches and searches for documents 
  containing combinations of terms occuring within a defined proximity of each
  other. The various ways in which the full-text index may be queried are
  \[FTS3 MATCH|described below\].

<p>
  Normally, full-text queries are case-insensitive. However, this is
  is dependent on the specific \[tokenizer\] used by the FTS3 table
  being queried. Refer to the section on \[tokenizer|tokenizers\] for details.

<p>
  The paragraph above notes that a MATCH operator with a simple term as the
  right-hand operand evaluates to true for all documents that contain the
  specified term. In this context, the "document" may refer to either the 
  data stored in a single column of a row of an FTS3 table, or to the contents
  of all columns in a single row, depending on the identifier used as the
................................................................................
  the left-hand operand of the MATCH operator is an FTS3 table column name,
  then the document that the search term must be contained in is the value
  stored in the specified column. However, if the identifier is the name
  of the FTS3 <i>table</i> itself, then the MATCH operator evaluates to true
  for each row of the FTS3 table for which any column contains the search 
  term. The following example demonstrates this:

[Code {
  <i>-- Example schema</i>
  CREATE VIRTUAL TABLE mail USING fts3(subject, body);

  <i>-- Example table population</i>
  INSERT INTO mail(docid, subject, body) VALUES(1, 'software feedback', 'found it too slow');
  INSERT INTO mail(docid, subject, body) VALUES(2, 'software feedback', 'no feedback');
  INSERT INTO mail(docid, subject, body) VALUES(3, 'slow lunch order',  'was a software problem');

  <i>-- Example queries</i>
  SELECT * FROM mail WHERE subject MATCH 'software';    <i>-- Selects rows 1 and 2</i>
  SELECT * FROM mail WHERE body    MATCH 'feedback';    <i>-- Selects row 2</i>
  SELECT * FROM mail WHERE mail    MATCH 'software';    <i>-- Selects rows 1, 2 and 3</i>
  SELECT * FROM mail WHERE mail    MATCH 'slow';        <i>-- Selects rows 1 and 3</i>
}]
  
<p>
  At first glance, the final two full-text queries in the example above seem
  to be syntacticly incorrect, as there is a table name ("mail") used as
  an SQL expression. The reason this is acceptable is that each FTS3 table
  actually has a \[sqlite3_declare_vtab|HIDDEN\] column with the same name
  as the table itself (in this case, "mail"). The value stored in this
  column is not meaningful to the application, but can be used as the 
  left-hand operand to a MATCH operator. This special column may also be
  passed as an argument to the \[snippet()|FTS3 auxillary functions\].

<p>
  The following example illustrates the above. The expressions "docs", 
  "docs.docs" and "main.docs.docs" all refer to column "docs". However, the 
  expression "main.docs" does not refer to any column. It could be used to 
  refer to a table, but a table name is not allowed in the context in which
  it is used below.

[Code {
  <i>-- Example schema</i>
  CREATE VIRTUAL TABLE docs USING fts3(content);

  <i>-- Example queries</i>
  SELECT * FROM docs WHERE docs MATCH 'sqlite';              <i>-- OK.</i>
  SELECT * FROM docs WHERE docs.docs MATCH 'sqlite';         <i>-- OK.</i>
  SELECT * FROM docs WHERE main.docs.docs MATCH 'sqlite';    <i>-- OK.</i>
  SELECT * FROM docs WHERE main.docs MATCH 'sqlite';         <i>-- Error.</i>
}]
 
[h2 "Summary"]

<p>
  From the users point of view, FTS3 tables are similar to ordinary SQLite
  tables in many ways. Data may be added to, modified within and removed 
  from FTS3 tables using the INSERT, UPDATE and DELETE commands just as 
  it may be with ordinary tables. Similarly, the SELECT command may be used 
  to query data. The following list summarizes the differences between FTS3
................................................................................
    triggers attached to FTS3 tables. Nor is it possible to use the ALTER TABLE
    command to add extra columns to FTS3 tables (although it is possible to use
    ALTER TABLE to rename an FTS3 table).

  <li><p> 
    Data-types specified as part of the "CREATE VIRTUAL TABLE" statement
    used to create an FTS3 table are ignored completely. Instead of the
    normal rules for applying type \[affinity\] to inserted values, all
    values inserted into FTS3 table columns (except the special rowid
    column) are converted to type TEXT before being stored.

  <li><p> 
    FTS3 tables permit the special alias "docid" to be used to refer to the
    rowid column supported by all \[virtual tables\].

  <li><p> 
    The \[FTS3 MATCH\] operator is supported for queries based on the built-in
    full-text index. 

  <li><p> 
    The FTS3 auxillary functions, \[snippet|snippet() and offsets()\], are 
    available to support full-text queries.

  <li><p> 
    Each FTS3 table has a \[sqlite3_declare_vtab()|HIDDEN column\] with the 
    same name as the table itself. The value contained in each row for the
    special column is only useful when used on the left-hand side of a 
    \[FTS3 MATCH|MATCH\] operator, or when specified as an argument to one 
    of the \[snippet|FTS3 auxillary functions\].
    

</ol>


[h1 "Compiling and Enabling FTS3" {} {compile fts3}]

<p>
  Although FTS3 is distributed as part of the SQLite source code, it is not
  enabled by default. To build SQLite with FTS3 functionality enabled, define
  the preprocessor macro \[SQLITE_ENABLE_FTS3\] when compiling. New applications
  should also define the \[SQLITE_ENABLE_FTS3_PARENTHESIS\] macro to enable the
  \[enhanced query syntax\] (see below). Usually, this is done by adding the 
  following two switches to the compiler command line:

[Code {
  -DSQLITE_ENABLE_FTS3
  -DSQLITE_ENABLE_FTS3_PARENTHESIS
}]


<p>
  If using the amalgamation autoconf based build system, setting the CPPFLAGS
  environment variable while running the 'configure' script is an easy
  way to set these macros. For example, the following command:

[Code {
  CPPFLAGS="-DSQLITE_ENABLE_FTS3 -DSQLITE_ENABLE_FTS3_PARENTHESIS" ./configure &lt;configure options&gt;
}]


<p>
  where <i>&lt;configure options&gt;</i> are those options normally passed to
  the configure script, if any.

<p>
  Because FTS3 is a virtual table, it is incompatible with the
  \[SQLITE_OMIT_VIRTUALTABLE\] option.

<p>
  If an SQLite build does not include FTS3, then any attempt to prepare an
  SQL statement to create an FTS3 table or to drop or access an existing 
  FTS3 table in any way will fail. The error message returned will be similar 
  to "no such module: fts3".

<p>
  If the C version of the <a href=http://site.icu-project.org/>ICU library</a>
  is available, then FTS3 may also be compiled with the SQLITE_ENABLE_ICU
  pre-processor macro defined. Compiling with this macro enables an FTS3
  \[tokenizer\] that uses the ICU library to split a document into terms
  (words) using the conventions for a specified language and locale.

[Code {
  -DSQLITE_ENABLE_ICU
}]

  

[h1 "Full-text Index Queries" {} {FTS3 MATCH}]

<p>
  The most useful thing about FTS3 tables is the queries that may be 
  performed using the built-in full-text index. Full-text queries are 
  performed by specifying a clause of the form 
  "&lt;column&gt; MATCH &lt;full-text query expression&gt;" to the WHERE 
  clause of a SELECT statement that reads data from an FTS3 table. 
  \[simple fts3 queries|Simple FTS3 queries\] that return all documents that 
  contain a given term are described above. In that discussion the right-hand
  operand of the MATCH operator was assumed to be a string consisting of a
  single term. This section describes the more complex query types supported 
  by FTS3 tables, and how they may be utilized by specifying a more
  complex query expression as the right-hand operand of a MATCH operator.

<p>
  FTS3 tables support three basic query types:

<ul>
  <li><p><b>Token or token prefix queries</b>. 
    An FTS3 table may be queried for all documents that contain a specified
    term (the \[simple fts3 queries|simple case\] described above), or for
    all documents that contain a term with a specified prefix. As we have
    seen, the query expression for a specific term is simply the term itself.
    The query expression used to search for a term prefix is the prefix
    itself with a '*' character appended to it. For example:
</ul>

[Code {
  <i>-- Virtual table declaration</i>
  CREATE VIRTUAL TABLE docs USING fts3(title, body);

  <i>-- Query for all documents containing the term "linux":</i>
  SELECT * FROM docs WHERE docs MATCH 'linux';

  <i>-- Query for all documents containing a term with the prefix "lin". This will match</i>
  <i>-- all documents that contain "linux", but also those that contain terms "linear",</i>
  <i>--"linker", "linguistic" and so on.</i>
  SELECT * FROM docs WHERE docs MATCH 'lin*';
}]

<ul>
  <li style="list-style:none"><p>
    Normally, a token or token prefix query is matched against the FTS3 table 
    column specified as the right-hand side of the MATCH operator. Or, if the
    special column with the same name as the FTS3 table itself is specified,
    against all columns. This may be overridden by specifying a column-name
    followed by a ":" character before a basic term query. There may be space
    between the ":" and the term to query for, but not between the column-name
    and the ":" character. For example:
</ul>
   
[Code {
  <i>-- Query the database for documents for which the term "linux" appears in</i>
  <i>-- the document title, and the term "problems" appears in either the title</i>
  <i>-- or body of the document.</i>
  SELECT * FROM docs WHERE docs MATCH 'title:linux problems';

  <i>-- Query the database for documents for which the term "linux" appears in</i>
  <i>-- the document title, and the term "driver" appears in the body of the document</i>
  <i>-- ("driver" may also appear in the title, but this alone will not satisfy the</i>.
  <i>-- query criteria).</i>
  SELECT * FROM docs WHERE body MATCH 'title:linux driver';
}]

<ul>
  <li><p><b>Phrase queries</b>.
    A phrase query is a query that retrieves all documents that contain a
    nominated set of terms or term prefixes in a specified order with no
    intervening tokens. Phrase queries are specified by enclosing a space
    separated sequence of terms or term prefixes in double quotes (").
    For example:
</ul>

[Code {
  <i>-- Query for all documents that contain the phrase "linux applications".</i>
  SELECT * FROM docs WHERE docs MATCH '"linux applications"';

  <i>-- Query for all documents that contain a phrase that matches "lin* app*". As well as</i>
  <i>-- "linux applications", this will match common phrases such as "linoleum appliances"</i>
  <i>-- or "link apprentice".</i>
  SELECT * FROM docs WHERE docs MATCH '"lin* app*"';
}]

<ul>
  <li><p><b>NEAR queries</b>. 
    A NEAR query is a query that returns documents that contain a two or
    more nominated terms or phrases within a specified proximity of each 
    other (by default with 10 or less intervening terms). A NEAR query is 
    specified by putting the keyword "NEAR" between two phrase, term or 
    term prefix queries. To specify a proximity other than the default,
    an operator of the form "NEAR/<i>&lt;N&gt;</i>" may be used, where
    <i>&lt;N&gt;</i> is the maximum number of intervening terms allowed.
    For example:
</ul>

[Code {
  <i>-- Virtual table declaration.</i>
  CREATE VIRTUAL TABLE docs USING fts3();

  <i>-- Virtual table data.</i>
  INSERT INTO docs VALUES('SQLite is an ACID compliant embedded relational database management system');

  <i>-- Search for a document that contains the terms "sqlite" and "database" with</i>
................................................................................
  <i>-- document stored in table docs.</i>
  SELECT * FROM docs WHERE docs MATCH 'database NEAR/2 "ACID compliant"';

  <i>-- Search for a document that contains the phrase "ACID compliant" and the term</i>
  <i>-- "sqlite" with not more than 2 terms separating the two. This also matches</i>
  <i>-- the only document stored in table docs.</i>
  SELECT * FROM docs WHERE docs MATCH '"ACID compliant" NEAR/2 sqlite';
}]

<ul>
  <li style="list-style: none"><p>
    More than one NEAR operator may appear in a single query. In this case each
    pair of terms or phrases separated by a NEAR operator must appear within the
    specified proximity of each other in the document. Using the same table and
    data as in the block of examples above:
</ul>


[Code { <i>-- The following query selects documents that contains an instance of the term </i>
  <i>-- "sqlite" separated by two or fewer terms from an instance of the term "acid",</i>
  <i>-- which is in turn separated by two or fewer terms from an instance of the term</i>
  <i>-- "relational". As it happens, the only document in table docs satisfies this criteria.</i>
  SELECT * FROM docs WHERE docs MATCH 'sqlite NEAR/2 acid NEAR/2 relational';

  <i>-- This query matches no documents. There is an instance of the term "sqlite" with</i>
  <i>-- sufficient proximity to an instance of "acid" but it is not sufficiently close</i>
  <i>-- to an instance of the term "relational".</i>
  SELECT * FROM docs WHERE docs MATCH 'acid NEAR/2 sqlite NEAR/2 relational';
}]

<p>
  Phrase and NEAR queries may not span multiple columns within a row.

<p>
  The three basic query types described above may be used to query the full-text
  index for the set of documents that match the specified criteria. Using the
................................................................................
<p>
  The FTS3 module may be compiled to use one of two slightly different versions
  of the full-text query syntax, the "standard" query syntax and the "enhanced" 
  query syntax. The basic term, term-prefix, phrase and NEAR queries described 
  above are the same in both versions of the syntax. The way in which set 
  operations are specified is slightly different. The following two sub-sections 
  describe the part of the two query syntaxes that pertains to set operations. 
  Refer to the description of how to \[compile fts3\] for compilation notes.


[h2 "Set Operations Using The Enhanced Query Syntax" {} {enhanced query syntax}]

<p>
  The enhanced query syntax supports the AND, OR and NOT binary set operators.
  Each of the two operands to an operator may be a basic FTS3 query, or the
  result of another AND, OR or NOT set operation. Operators must be entered
  using capital letters. Otherwise, they are interpreted as basic term queries
  instead of set operators.
................................................................................
<p>
  The AND operator may be implicitly specified. If two basic queries appear 
  with no operator separating them in an FTS3 query string, the results are
  the same as if the two basic queries were separated by an AND operator.
  For example, the query expression "implicit operator" is a more succinct
  version of "implicit AND operator".

[Code {
  <i>-- Virtual table declaration</i>
  CREATE VIRTUAL TABLE docs USING fts3();

  <i>-- Virtual table data</i>
  INSERT INTO docs(docid, content) VALUES(1, 'a database is a software system');
  INSERT INTO docs(docid, content) VALUES(2, 'sqlite is a software system');
  INSERT INTO docs(docid, content) VALUES(3, 'sqlite is a database');
................................................................................

  <i>-- The following query matches no documents. Because "and" is in lowercase letters,</i>
  <i>-- it is interpreted as a basic term query instead of an operator. Operators must</i>
  <i>-- be specified using capital letters. In practice, this query will match any documents</i>
  <i>-- that contain each of the three terms "database", "and" and "sqlite" at least once.</i>
  <i>-- No documents in the example data above match this criteria.</i>
  SELECT * FROM docs WHERE docs MATCH 'database and sqlite';
}]

<p>
  The examples above all use basic full-text term queries as both operands of 
  the set operations demonstrated. Phrase and NEAR queries may also be used,
  as may the results of other set operations. When more than one set operation
  is present in an FTS3 query, the precedence of operators is as follows:

[Table]
  [Tr]<th>Operator<th>Enhanced Query Syntax Precedence
  [Tr]<td>NOT <td> Highest precedence (tightest grouping).
  [Tr]<td>AND <td>
  [Tr]<td>OR  <td> Lowest precedence (loosest grouping).
</table>

<p>
  When using the enhanced query syntax, parenthesis may be used to override
  the default precedence of the various operators. For example:

[Code {

  <i>-- Return the docid values associated with all documents that contain the</i>
  <i>-- two terms "sqlite" and "database", and/or contain the term "library".</i>
  SELECT docid FROM docs WHERE docs MATCH 'sqlite AND database OR library';

  <i>-- This query is equivalent to the above.</i>
  SELECT docid FROM docs WHERE docs MATCH 'sqlite AND database'
................................................................................
  SELECT docid FROM docs WHERE docs MATCH 'linux'
    INTERSECT
  SELECT docid FROM (
    SELECT docid FROM docs WHERE docs MATCH '"sqlite library"'
      UNION
    SELECT docid FROM docs WHERE docs MATCH '"sqlite database"'
  );
}]



[h2 "Set Operations Using The Standard Query Syntax"]

<p>
  FTS3 query set operations using the standard query syntax are similar, but
  not identical, to set operations with the enhanced query syntax. There
  are four differences, as follows:

<ol>
................................................................................
    may be applied to basic term and term-prefix queries (but not to phrase
    or NEAR queries). A term or term-prefix that has a unary "-" operator
    attached to it may not appear as an operand to an OR operator. An FTS3
    query may not consist entirely of terms or term-prefix queries with unary
    "-" operators attached to them.
</ol>

[Code {
  <i>-- Search for the set of documents that contain the term "sqlite" but do</i>
  <i>-- not contain the term "database".</i>
  SELECT * FROM docs WHERE docs MATCH 'sqlite -database';
}]

<ol>
  <li value=4><p> The relative precedence of the set operations is different. 
   In particular, using the standard query syntax the "OR" operator has a
   higher precedence than "AND". The precedence of operators when using the
   standard query syntax is: 
</ol>

[Table]
  [Tr]<th>Operator<th>Standard Query Syntax Precedence
  [Tr]<td>Unary "-" <td> Highest precedence (tightest grouping).
  [Tr]<td>OR  <td>
  [Tr]<td>AND <td> Lowest precedence (loosest grouping).
</table>

<ol><li style="list-style:none">
  The following example illustrates precedence of operators using the standard 
  query syntax:
</ol>

[Code {
  <i>-- Search for documents that contains at least one of the terms "database"</i>
  <i>-- and "sqlite", and also contains the term "library". Because of the differences</i>
  <i>-- in operator precedences, this query would have a different interpretation using</i>
  <i>-- the enhanced query syntax.</i>
  SELECT * FROM docs WHERE docs MATCH 'sqlite OR database library';
}]

[h1 "Auxillary functions - Snippet, Offsets and Matchinfo" {} snippet offsets]

<p>
  The FTS3 module provides three special SQL scalar functions that may be useful
  to the developers of full-text query systems: "snippet", "offsets" and
  "matchinfo". The purpose of the "snippet" and "offsets" functions is to allow
  the user to identify the location of queried terms in the returned documents.
  The "matchinfo" function provides the user with metrics that may be useful
................................................................................

<p>
  The first argument to all three special SQL scalar functions
  must be the the special hidden column of an FTS3 table that has the same
  name as the table (see above). For example, given an FTS3 table named 
  "mail":

[Code {
  SELECT offsets(mail) FROM mail WHERE mail MATCH &lt;full-text query expression&gt;;
  SELECT snippet(mail) FROM mail WHERE mail MATCH &lt;full-text query expression&gt;;
  SELECT matchinfo(mail) FROM mail WHERE mail MATCH &lt;full-text query expression&gt;;
}]

<p>
  The three auxillary functions are only useful within a SELECT statement that
  uses the FTS3 table's full-text index. If used within a SELECT that uses
  the "query by rowid" or "linear scan" strategies, then the snippet and
  offsets both return an an empty string, and the matchinfo function returns
  a blob value zero bytes in size.
................................................................................

  <li> If the matchable phrase in the FTS3 query is restricted to matching
       data in a specified FTS3 table column, then only phrase matches that 
       occur within that column are considered.
</ol>
 

[h2 "The Offsets Function"]

<p>
  For a SELECT query that uses the full-text index, the offsets() function 
  returns a text value containing a series of space-separated integers. For
  each term in each <a href=#matchable>phrase match</a> of the current row, 
  there are four integers in the returned list. Each set of four integers is 
  interpreted as follows:

[Table]
  [Tr]<th>Integer <th>Interpretation
  [Tr]<td>0 
      <td>The column number that the term instance occurs in (0 for the
          leftmost column of the FTS3 table, 1 for the next leftmost, etc.).
  [Tr]<td>1
      <td>The term number of the matching term within the full-text query
          expression. Terms within a query expression are numbered starting
          from 0 in the order that they occur.
  [Tr]<td>2
      <td>The byte offset of the matching term within the column.
  [Tr]<td>3
      <td>The size of the matching term in bytes.
</table>

<p>
  The following block contains examples that use the offsets function.

[Code {
  CREATE VIRTUAL TABLE mail USING fts3(subject, body);
  INSERT INTO mail VALUES('hello world', 'This message is a hello world message.');
  INSERT INTO mail VALUES('urgent: serious', 'This mail is seen as a more serious mail');

  <i>-- The following query returns a single row (as it matches only the first</i>
  <i>-- entry in table "mail". The text returned by the offsets function is</i>
  <i>-- "0 0 6 5 1 0 24 5".</i>
................................................................................
  SELECT offsets(mail) FROM mail WHERE mail MATCH 'message';

  <i>-- The following query matches the second row in table "mail". It returns the</i>
  <i>-- text "1 0 28 7 1 1 36 4". Only those occurences of terms "serious" and "mail"</i>
  <i>-- that are part of an instance of the phrase "serious mail" are identified; the</i>
  <i>-- other occurences of "serious" and "mail" are ignored.</i>
  SELECT offsets(mail) FROM mail WHERE mail MATCH '"serious mail"';
}]


[h2 "The Snippet Function"]

<p>
  The snippet function is used to create formatted fragments of document text
  for display as part of a full-text query results report. The snippet function 
  may be passed between one and four arguments, as follows:

[Table]
  [Tr]<th>Argument <th>Default Value <th>Description
  [Tr]<td>0 <td>N/A
      <td> The first argument to the snippet function must always be the special
           hidden column of the FTS3 table that takes the same name as the table
           itself.
  [Tr]<td>1 <td>"&lt;b&gt;"
      <td> The "start match" text.
  [Tr]<td>2 <td>"&lt;b&gt;"
      <td> The "end match" text.
  [Tr]<td>3 <td>"&lt;b&gt;...&lt;/b&gt;"
      <td> The "ellipses" text.
  [Tr]<td>4 <td>-1
      <td> The FTS3 table column number to extract the returned fragments of
           text from. Columns are numbered from left to right starting with
           zero. A negative value indicates that the text may be extracted
           from any column.
  [Tr]<td>5 <td>-15
      <td> The absolute value of this integer argument is used as the 
           (approximate) number of tokens to include in the returned text 
           value. The maximum allowable absolute value is 64. The value of
           this argument is refered to as <i>N</i> in the discussion below.
</table>

<p>
................................................................................
<p>
  After the <i>M</i> fragments have been located, where <i>M</i> is between
  two and four as described in the paragraphs above, they are joined together
  in sorted order with the "ellipses" text separating them. The three 
  modifications enumerated earlier are performed on the text before it is 
  returned.

[Code {
  <b>Note: In this block of examples, newlines and whitespace characters have
  been inserted into the document inserted into the FTS3 table, and the expected
  results described in SQL comments. This is done to enhance readability only,
  they would not be present in actual SQLite commands or output.</b>

  <i>-- Create and populate an FTS3 table.</i>
  CREATE VIRTUAL TABLE text USING fts3();
................................................................................

  <i>-- The following query returns the text value:</i>
  <i>--</i>
  <i>--   "...the upper portion, &#91;minimum&#93; &#91;temperature&#93; 14-16oC and cool elsewhere,</i>
  <i>--    &#91;minimum&#93; &#91;temperature&#93; 17-20oC. Cold..."</i>
  <i>--</i>
  SELECT snippet(text, '&#91; '&#93;', '...') FROM text WHERE text MATCH '"min* tem*"'
}]

[h2 "The Matchinfo Function" matchinfo matchinfo]

<p>
  The matchinfo function returns a blob value. If used within a query that
  uses the full-text index (not a "query by rowid" or "linear scan"), then
  the blob consists of (2 + <i>C</i> * <i>P</i> * 3) 32-bit unsigned 
  integers in machine byte-order, where <i>C</i> is the number of columns 
  in the FTS3 table being queried, and <i>P</i> is the number of 
  <a href=#matchable>matchable phrases</a> in the query. 

  <p>
    Phrases and columns are both numbered from left to right starting from 
    zero.

[Table]
  [Tr]<th>Array Element <th>Interpretation
  [Tr]<td>0 <td> 
    Number of matchable phrases in the query expression (value <i>P</i> in 
    the formula below).
  [Tr]<td>1 <td>
    Number of columns in the FTS3 table being queried (value <i>C</i> in 
    the formula below).

  [Tr]<td style="white-space:nowrap">2 + 3 * (<i>c</i> + <i>C</i>*<i>p</i>) + 0<td>
    Number of phrase matches for matchable phrase <i>p</i> in column 
    <i>c</i> of the current FTS3 table row.
  [Tr]<td style="white-space:nowrap">2 + 3 * (<i>c</i> + <i>C</i>*<i>p</i>) + 1<td>
    Sum of the number of phrase matches for matchable phrase <i>p</i> in 
    column <i>c</i> for all rows of the FTS3 table.
  [Tr]<td style="white-space:nowrap">2 + 3 * (<i>c</i> + <i>C</i>*<i>p</i>) + 2<td>
    Number of rows of the FTS3 table for which column <i>c</i> contains at 
    least one phrase match for matchable phrase <i>p</i>.
</table>

<p>
  For example:

[Code {
  <i>-- Create and populate an FTS3 table with two columns:</i>
  CREATE VIRTUAL TABLE t1 USING fts3(a, b);
  INSERT INTO t1 VALUES('transaction default models default', 'Non transaction reads');
  INSERT INTO t1 VALUES('the default transaction', 'these semantics present');
  INSERT INTO t1 VALUES('single request', 'default data');

  <i>-- The following query returns a single row consisting of a single blob</i>
................................................................................
  <i>-- 0 of any table row. The 3 hits are spread across 2 different rows.</i>
  <i>--</i>
  <i>-- The next set of three integers (0 1 1) pertain to the hits for "default"</i>
  <i>-- in column 1 of the table (0 in this row, 1 in all rows, spread across </i>
  <i>-- 1 rows).</i>
  <i>--</i>
  SELECT matchinfo(t1) FROM t1 WHERE t1 MATCH 'default transaction "these semantics"';
}]

<p>
  The matchinfo function is much faster than either the snippet or offsets
  functions. This is because the implementation of both snippet and offsets
  is required to retrieve the documents being analyzed from disk, whereas
  all data required by matchinfo is available as part of the same portions
  of the full-text index that are required to implement the full-text query
  itself. This means that of the following two queries, the first may be
  an order of magnitude faster than the second:

[Code {
  SELECT docid, matchinfo(tbl) FROM tbl WHERE tbl MATCH &lt;query expression&gt;;
  SELECT docid, offsets(tbl) FROM tbl WHERE tbl MATCH &lt;query expression&gt;;
}]


<p>
  The matchinfo function provides much of the information required to calculate
  probabalistic "bag-of-words" relevancy scores such as 
  <a href=http://en.wikipedia.org/wiki/Okapi_BM25>Okapi BM25/BM25F</a> that may
  be used to order results in a full-text search application. Also often
  used in such functions is the length or relative length of each document
  or document field. Unfortunately, this information is not made available
  by the matchinfo function as it would require loading extra data from the
  database, potentially slowing matchinfo() down by an order of magnitude.
  One solution is for the application to store the lengths of each document
  or document field in a separate table for use in calculating relevancy
  scores. Appendix A of this document, "\[search application tips\]", contains
  an example of using the matchinfo() function efficiently.

[h1 "Tokenizers" tokenizer {tokenizer}]

<p>
  An FTS3 tokenizer is a set of rules for extracting terms from a document 
  or basic FTS3 full-text query. 

<p>
  Unless a specific tokenizer is specified as part of the CREATE 
................................................................................
  against similar English language terms. For more information on the 
  Porter Stemmer algorithm, please refer to the page linked above.

<p>
  Example illustrating the difference between the "simple" and "porter"
  tokenizers:

[Code {
  <i>-- Create a table using the simple tokenizer. Insert a document into it.</i>
  CREATE VIRTUAL TABLE simple USING fts3(tokenize=simple);
  INSERT INTO simple VALUES('Right now they''re very frustrated');

  <i>-- The first of the following two queries matches the document stored in</i>
  <i>-- table "simple". The second does not.</i>
  SELECT * FROM simple WHERE simple MATCH 'Frustrated');
................................................................................
  <i>-- Create a table using the porter tokenizer. Insert the same document into it</i>
  CREATE VIRTUAL TABLE porter USING fts3(tokenize=porter);
  INSERT INTO porter VALUES('Right now they''re very frustrated');

  <i>-- Both of the following queries match the document stored in table "porter".</i>
  SELECT * FROM porter WHERE porter MATCH 'Frustrated');
  SELECT * FROM porter WHERE porter MATCH 'Frustration');
}]

<p>
  If this extension is compiled with the SQLITE_ENABLE_ICU pre-processor
  symbol defined, then there exists a built-in tokenizer named "icu"
  implemented using the ICU library. The first argument passed to the
  xCreate() method (see fts3_tokenizer.h) of this tokenizer may be
  an ICU locale identifier. For example "tr_TR" for Turkish as used
  in Turkey, or "en_AU" for English as used in Australia. For example:

[Code {
    CREATE VIRTUAL TABLE thai_text USING fts3(text, tokenize=icu th_TH)
}]


<p>
  The ICU tokenizer implementation is very simple. It splits the input
  text according to the ICU rules for finding word boundaries and discards
  any tokens that consist entirely of white-space. This may be suitable
  for some applications in some locales, but not all. If more complex
  processing is required, for example to implement stemming or
  discard punctuation, this can be done by creating a tokenizer
  implementation that uses the ICU tokenizer as part of its implementation.

[h2 "Custom (User Implemented) Tokenizers"]

<p>
  As well as the built-in "simple", "porter" and (possibly) "icu" tokenizers,
  FTS3 exports an interface that allows users to implement custom tokenizers
  using C. The interface used to create a new tokenizer is defined and 
  described in the fts3_tokenizer.h source file.

................................................................................
  FTS3 does not expose a C-function that users call to register new
  tokenizer types with a database handle. Instead, the pointer must
  be encoded as an SQL blob value and passed to FTS3 through the SQL
  engine by evaluating a special scalar function, "fts3_tokenizer()".
  The fts3_tokenizer() function may be called with one or two arguments,
  as follows:

[Code {
    SELECT fts3_tokenizer(&lt;tokenizer-name&gt;);
    SELECT fts3_tokenizer(&lt;tokenizer-name&gt;, &lt;sqlite3_tokenizer_module ptr&gt;);
}]


<p>
  Where <tokenizer-name> is a string identifying the tokenizer and
  <sqlite3_tokenizer_module ptr> is a pointer to an sqlite3_tokenizer_module
  structure encoded as an SQL blob. If the second argument is present,
  it is registered as tokenizer <tokenizer-name> and a copy of it
  returned. If only one argument is passed, a pointer to the tokenizer
................................................................................
  encoded as a blob. Or, if no such tokenizer exists, an SQL exception
  (error) is raised.

<p>
  <b>SECURITY WARNING</b>: If the fts3 extension is used in an environment
  where potentially malicious users may execute arbitrary SQL, they should 
  be prevented from invoking the fts3_tokenizer() function, possibly using 
  the \[sqlite3_set_authorizer()|authorisation callback\].

<p>
  The following block contains an example of calling the fts3_tokenizer()
  function from C code:

[Code {
  <i>/*
  ** Register a tokenizer implementation with FTS3.
  */</i>
  int registerTokenizer(
    sqlite3 *db,
    char *zName,
    const sqlite3_tokenizer_module *p
................................................................................
      if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
        memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
      }
    }

    return sqlite3_finalize(pStmt);
  }
}]


  
  

[h1 "Data Structures" {} {segment btree}]


<p>
  This section describes at a high-level the way the FTS3 module stores its
  index and content in the database. It is <b>not necessary to read or 
  understand the material in this section in order to use FTS3</b> in an 
  application. However, it may be useful to application developers attempting 
  to analyze and understand FTS3 performance characteristics, or to developers 
................................................................................
  named "docid". Following this is one column for each column of the FTS3
  virtual table as declared by the user, named by prepending the column name
  supplied by the user with "c<i>N</i>", where <i>N</i> is the index of the 
  column within the table, numbered from left to right starting with 1. Data
  types supplied as part of the virtual table declaration are not used as
  part of the %_content table declaration. For example:

[Code {
  <i>-- Virtual table declaration</i>
  CREATE VIRTUAL TABLE abc USING FTS3(a NUMBER, b TEXT, c);

  <i>-- Corresponding %_content table declaration</i>
  CREATE TABLE abc_content(docid INTEGER PRIMARY KEY, c0a, c1b, c2c);
}]

<p>
  The %_content table contains the unadulterated data inserted by the user 
  into the FTS3 virtual table by the user. If the user does not explicitly
  supply a "docid" value when inserting records, one is selected automatically
  by the system.

................................................................................
  %_content table that contain one or more occurrences of the term. To
  retrieve all documents that contain a specified term, the FTS3 module
  queries this index to determine the set of docid values for records that
  contain the term, then retrieves the required documents from the %_content
  table. Regardless of the schema of the FTS3 virtual table, the %_segments
  and %_segdir tables are always created as follows:

[Code {
  CREATE TABLE %_segments(
    blockid INTEGER PRIMARY KEY,       <i>-- B-tree node id</i>
    block blob                         <i>-- B-tree node data</i>
  );

  CREATE TABLE %_segdir(
    level INTEGER,
................................................................................
    idx INTEGER,
    start_block INTEGER,               <i>-- Blockid of first node in %_segments</i>
    leaves_end_block INTEGER,          <i>-- Blockid of last leaf node in %_segments</i>
    end_block INTEGER,                 <i>-- Blockid of last node in %_segments</i>
    root BLOB,                         <i>-- B-tree root node</i>
    PRIMARY KEY(level, idx)
  );
}]

<p>
  The schema depicted above is not designed to store the full-text index 
  directly. Instead, it is used to one or more b-tree structures. There
  is one b-tree for each row in the %_segdir table. The %_segdir table
  row contains the root node and various meta-data associated with the
  b-tree structure, and the %_segments table contains all other (non-root)
................................................................................
  (or b-trees) later on. Merging of b-tree structures can be performed as
  a background task, or once a certain number of separate b-tree structures
  have been accumulated. Of course, this scheme makes queries more expensive
  (as the FTS3 code may have to look up individual terms in more than one
  b-tree and merge the results), but it has been found that in practice this
  overhead is often negligible.
  
[h2 "Variable Length Integer (varint) Format"]

<p>
  Integer values stored as part of segment b-tree nodes are encoded using the
  FTS3 varint format. This encoding is similar, but <b>not identical</b>, to the
  the <a href="fileformat.html#varint_format">SQLite varint format</a>.

<p>
................................................................................
  is stored in the remaining seven least signficant bits of each byte.
  The first byte of the encoded representation contains the least significant
  seven bits of the encoded integer value. The second byte of the encoded
  representation, if it is present, contains the seven next least significant
  bits of the integer value, and so on. The following table contains examples
  of encoded integer values:

[Table]
  [Tr]<th>Decimal<th>Hexadecimal<th width=100%>Encoded Representation
  [Tr]<td>43<td>0x000000000000002B<td>0x2B 
  [Tr]<td>200815<td>0x000000000003106F<td>0x9C 0xA0 0x0C
  [Tr]<td>-1<td>0xFFFFFFFFFFFFFFFF<td>0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0x01
</table>
  

[h2 "Segment B-Tree Format"]

<p>
  Segment b-trees are prefix-compressed b+-trees. There is one segment b-tree
  for each row in the %_segdir table (see above). The root node of the segment
  b-tree is stored as a blob in the "root" field of the corresponding row
  of the %_segdir table. All other nodes (if any exist) are stored in the 
  "blob" column of the %_segments table. Nodes within the %_segments table are
  identified by the integer value in the blockid field of the corresponding
  row. The following table describes the fields of the %_segdir table:

[Table]
  [Tr]<th>Column           <th width=100%>Interpretion
  [Tr]<td>level            <td> 
    Between them, the contents of the "level" and "idx" fields define the
    relative age of the segment b-tree. The smaller the value stored in the
    "level" field, the more recently the segment b-tree was created. If two
    segment b-trees are of the same "level", the segment with the larger
    value stored in the "idx" column is more recent. The PRIMARY KEY constraint
    on the %_segdir table prevents any two segments from having the same value
    for both the "level" and "idx" fields.
  [Tr]<td>idx              <td> See above.
  [Tr]<td>start_block      <td>
    The blockid that corresponds to the node with the smallest blockid that 
    belongs to this segment b-tree. Or zero if the entire segment b-tree
    fits on the root node. If it exists, this node is always a leaf node.
  [Tr]<td>leaves_end_block <td>
    The blockid that corresponds to the leaf node with the largest blockid 
    that belongs to this segment b-tree. Or zero if the entire segment b-tree
    fits on the root node.
  [Tr]<td>end_block <td>
    The blockid that corresponds to the interior node with the largest 
    blockid that belongs to this segment b-tree.  Or zero if the entire segment
    b-tree fits on the root node. If it exists, this node is always an
    interior node.
  [Tr]<td>root             <td>
    Blob containing the root node of the segment b-tree.
</table>

<p>
  Apart from the root node, the nodes that make up a single segment b-tree are
  always stored using a contiguous sequence of blockids. Furthermore, the
  nodes that make up a single level of the b-tree are themselves stored as
................................................................................
  used to store the b-tree leaves are allocated starting with the blockid
  value stored in the "start_block" column of the corresponding %_segdir row,
  and finishing at the blockid value stored in the "leaves_end_block"
  field of the same row. It is therefore possible to iterate through all the
  leaves of a segment b-tree, in key order, by traversing the %_segments 
  table in blockid order from "start_block" to "leaves_end_block".  

[h3 "Segment B-Tree Leaf Nodes"]

<p>
  The following diagram depicts the format of a segment b-tree leaf node.



[Fig fts3_leaf_node.png "Segment B-Tree Leaf Node Format"]


<p>
  The first term stored on each node ("Term 1" in the figure above) is
  stored verbatim. Each subsequent term is prefix-compressed with respect
  to its predecessor. Terms are stored within a page in sorted (memcmp)
  order.

[h3 "Segment B-Tree Interior Nodes"]

<p>
  The following diagram depicts the format of a segment b-tree interior 
  (non-leaf) node.



[Fig fts3_interior_node.png "Segment B-Tree Interior Node Format"]



[h2 "Doclist Format"]

<p>
  A doclist consists of an array of 64-bit signed integers, serialized using
  the FTS3 varint format. Each doclist entry is made up of a series of two 
  or more integers, as follows:

<ol>
................................................................................
            of storing the term-offset value literally, each integer stored 
            is the difference between the current term-offset and the previous 
            one (or zero if the current term-offset is the first), plus 2.
     </ol>
  <li> Constant value 0.
</ol>



[Fig fts3_doclist2.png "FTS3 Doclist Format"]




[Fig fts3_doclist.png "FTS3 Doclist Entry Format"]


<p>
  For doclists for which the term appears in more than one column of the FTS3
  virtual table, term-offset lists within the doclist are stored in column 
  number order. This ensures that the term-offset list associated with 
  column 0 (if any) is always first, allowing the first two fields of the
  term-offset list to be omitted in this case.


[h1 "Appendix A: Search Application Tips" {} "search application tips"]


<p>
  FTS3 is primarily designed to support Boolean full-text queries - queries
  to find the set of documents that match a specified criteria. However, many 
  (most?) search applications require that results are somehow ranked in order
  of "relevance", where "relevance" is defined as the likelihood that the user
  who performed the search is interested in a specific element of the returned
................................................................................

<p>
  One very simple scheme might be to count the number of instances of the 
  users search terms in each result document. Those documents that contain
  many instances of the terms are considered more relevant than those with
  a small number of instances of each term. In an FTS3 application, the 
  number of term instances in each result could be determined by counting
  the number of integers in the return value of the \[offsets\] function.
  The following example shows a query that could be used to obtain the
  ten most relevant results for a query entered by the user:

[Code {
  <i>-- This example (and all others in this section) assumes the following schema</i>
  CREATE VIRTUAL TABLE documents USING fts3(title, content);

  <i>-- Assuming the application has supplied an SQLite user function named "countintegers"</i>
  <i>-- that returns the number of space-separated integers contained in its only argument,</i>
  <i>-- the following query could be used to return the titles of the 10 documents that contain</i>
  <i>-- the greatest number of instances of the users query terms. Hopefully, these 10</i>
  <i>-- documents will be those that the users considers more or less the most "relevant".</i>
  SELECT title FROM documents 
    WHERE documents MATCH &lt;query&gt;
    ORDER BY countintegers(offsets(document)) DESC
    OFFSET 0 LIMIT 10
}]


<p>
  The query above could be made to run faster by using the FTS3 \[matchinfo\]
  function to determine the number of query term instances that appear in each
  result. The matchinfo function is much more efficient than the offsets 
  function. Furthermore, the matchinfo function provides extra information
  regarding the overall number of occurences of each query term in the entire
  document set (not just the current row) and the number of documents in which 
  each query term appears. This may be used (for example) to attach a higher
  weight to less common terms which may increase the overall computed relevancy 
  of those results the user considers more interesting.

[Code {
  <i>-- If the application supplies an SQLite user function called "rank" that</i>
  <i>-- interprets the blob of data returned by matchinfo and returns a numeric</i>
  <i>-- relevancy based on it, then the following SQL may be used to return the</i>
  <i>-- titles of the 10 most relevant documents in the dataset for a users query.</i>
  SELECT title FROM documents 
    WHERE documents MATCH &lt;query&gt;
    ORDER BY rank(matchinfo(document)) DESC
    OFFSET 0 LIMIT 10
}]

<p>
  The SQL query in the example above uses less CPU than the first example
  in this section, but still has a non-obvious performance problem. SQLite
  satisfies this query by retreiving the value of the "title" column and
  matchinfo data from the FTS3 module for every row matched by the users
  query before it sorts and limits the results. Because of the way SQLite's
................................................................................
  This means that SQLite will load only the docid and matchinfo data for each
  row matching the users query into memory, determine the docid values
  corresponding to the ten most relevant documents, then load only the title
  and content information for those 10 documents only. Because both the matchinfo
  and docid values are gleaned entirely from the full-text index, this results
  in dramatically less data being loaded from the database into memory.

[Code {
  SELECT title FROM documents JOIN ( 
      SELECT docid, rank(matchinfo(document)) AS rank 
      FROM documents
      WHERE documents MATCH &lt;query&gt;
      ORDER BY rank DESC 
      OFFSET 0 LIMIT 10
  ) AS ranktable USING(docid)
  ORDER BY ranktable.rank DESC
}]

<p>
  The next block of SQL enhances the query with solutions to two other problems
  that may arise in developing search applications using FTS3:

<ol>
  <li> <p>
       The \[snippet\] function cannot be used with the above query. Because
       the outer query does not include a "WHERE ... MATCH" clause, the snippet 
       function may not be used with it. One solution is to duplicate the WHERE
       clause used by the sub-query in the outer query. The overhead associated
       with this is usually negligible.
  <li> <p>
       The relevancy of a document may depend on something other than just
       the data available in the return value of matchinfo. For example
................................................................................
</ol>

<p>
  This version of the query is very similar to that used by the 
  <a href="http://www.sqlite.org/search?q=fts3">sqlite.org documentation search</a> 
  application.

[Code {
  <i>-- This table stores the static weight assigned to each document in FTS3 table</i>
  <i>-- "documents". For each row in the documents table there is a corresponding row</i>
  <i>-- with the same docid value in this table.</i>
  CREATE TABLE documents_data(docid INTEGER PRIMARY KEY, weight);

  <i>-- This query is similar to the one in the block above, except that:</i>
  <i>--</i>
................................................................................
      FROM documents JOIN documents_data USING(docid)
      WHERE documents MATCH &lt;query&gt;
      ORDER BY rank DESC 
      OFFSET 0 LIMIT 10
  ) AS ranktable USING(docid)
  WHERE documents MATCH &lt;query&gt;
  ORDER BY ranktable.rank DESC
}]

<p>
  All the example queries above return the ten most relevant query results.
  By modifying the values used with the OFFSET and LIMIT clauses, a query 
  to return (say) the next ten most relevant results is easy to construct. 
  This may be used to obtain the data required for a search applications second
  and subsequent pages of results.

<p>
  The next block contains an example rank function that uses matchinfo data 
  implemented in C. Instead of a single weight, it allows a weight to be 
  externally assigned to each column of each document. It may be registered
  with SQLite like any other user function using \[sqlite3_create_function()\].

[Code [string map {[ &#x5B; ] &#x5D;} {
<i>/*</i>
<i>** SQLite user defined function to use with matchinfo() to calculate the</i>
<i>** relevancy of an FTS3 match. The value returned is the relevancy score</i>
<i>** (a real value greater than or equal to zero). A larger value indicates </i>
<i>** a more relevant document.</i>
<i>**</i>
<i>** The overall relevancy returned is the sum of the relevancies of each </i>
................................................................................
<i>  /* Check that the number of arguments passed to this function is correct.</i>
<i>  ** If not, jump to wrong_number_args. Set aMatchinfo to point to the array</i>
<i>  ** of unsigned integer values returned by FTS3 function matchinfo. Set</i>
<i>  ** nPhrase to contain the number of reportable phrases in the users full-text</i>
<i>  ** query, and nCol to the number of columns in the table.</i>
<i>  */</i>
  if( nVal&lt;1 ) goto wrong_number_args;
  aMatchinfo = (unsigned int *)sqlite3_value_blob(apVal[0]);
  nPhrase = aMatchinfo[0];
  nCol = aMatchinfo[1];
  if( nVal!=(1+nCol) ) goto wrong_number_args;

<i>  /* Iterate through each phrase in the users query. */</i>
  for(iPhrase=0; iPhrase&lt;nPhrase; iPhrase++){
    int iCol;                     <i>/* Current column */</i>

<i>    /* Now iterate through each column in the users query. For each column,</i>
<i>    ** increment the relevancy score by:</i>
<i>    **</i>
<i>    **   (&lt;hit count&gt; / &lt;global hit count&gt) * &lt;column weight&gt;</i>
<i>    **</i>
<i>    ** aPhraseinfo[] points to the start of the data for phrase iPhrase. So</i>
<i>    ** the hit count and global hit counts for each column are found in </i>
<i>    ** aPhraseinfo[iCol*3] and aPhraseinfo[iCol*3+1], respectively.</i>
<i>    */</i>
    int *aPhraseinfo = &aMatchinfo[2 + iPhrase*nCol*3];
    for(iCol=0; iCol&lt;nCol; iCol++){
      int nHitCount = aPhraseinfo[3*iCol];
      int nGlobalHitCount = aPhraseinfo[3*iCol+1];
      double weight = sqlite3_value_double(apVal[iCol+1]);
      if( nHitCount>0 ){
        score += ((double)nHitCount / (double)nGlobalHitCount) * weight;
      }
    }
  }

  sqlite3_result_double(pCtx, score);
  return;

<i>  /* Jump here if the wrong number of arguments are passed to this function */</i>
wrong_number_args:
  sqlite3_result_error(pCtx, "wrong number of arguments to function rank()", -1);
}
}]]

}


<
<

|
<
|

>
>
|







 







|







 







|


<
>







|


<
>













|



|







 







|











|

<
>




|







|


|











<
>


|


|



<
>

|


|




|



|







 







|







 







|





|







 







|


<
>



|






|



|










|







 







|









|












|



|
|







 







|













|





|



|








|








|
|
|







 







|





|


|



|



|


|
|





|




|
|
|


|


<
>






|

<
>







|











|


|

<
>


|







|












|






|










|












|










|










|







|













|







 







|









>
|









|







 







|

>
|







 







|







 







|







|
|
|
|
|






|







 







<
>


|







 







|



|








|
|
|
|
|







|





|
|
|







 







|



|







 







|








|
|
|


|



|

|






|







 







<
>

|






|
|
|



|

|

|

|




|







 







|







 







|
|
|













|
|
|


|



|


|


|







|







 







|










|


<
>












|


|







 







|







 







|









|

<
>










|







 







|


<
>







 







|





|







 







<
>


<
<
<
>







 







|





|







 







|







 







|







 







|







 







|
|
|
|
|



|










|
|
|







|
|



|



|




|







 







|




>
>
|
>







|





>
>
|
>


|







 







>
>
|
>

>
>
|
>








>
|
>







 







|



|












<
>


|









|








|







 







|








|







|







 







|







 







|












|

|







 







|
|
|











|

|

|

|
|
|













<
<
<
<
>


1
2

3
4
5
6
7
8
9
10
11
12
13
14
..
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
..
32
33
34
35
36
37
38
39
40
41

42
43
44
45
46
47
48
49
50
51
52

53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
..
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99

100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127

128
129
130
131
132
133
134
135
136
137

138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
...
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
...
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
...
203
204
205
206
207
208
209
210
211
212

213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
...
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
...
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
...
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403

404
405
406
407
408
409
410
411
412

413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437

438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
...
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
...
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
...
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
...
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
...
705
706
707
708
709
710
711

712
713
714
715
716
717
718
719
720
721
722
...
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
...
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
...
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
...
875
876
877
878
879
880
881

882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
...
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
...
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
....
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078

1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
....
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
....
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180

1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
....
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218

1219
1220
1221
1222
1223
1224
1225
1226
....
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
....
1286
1287
1288
1289
1290
1291
1292

1293
1294
1295



1296
1297
1298
1299
1300
1301
1302
1303
....
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
....
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
....
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
....
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
....
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
....
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
....
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
....
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614

1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
....
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
....
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
....
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
....
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826




1827



<tcl>hd_keywords *fts3 FTS3</tcl>

<title>SQLite FTS3 Extension</title>

<table_of_contents>

<h2 style="margin-left:1.0em" notoc> Overview</h2>

<p>
  FTS3 is an SQLite virtual table module that allows users to perform 
  full-text searches on a set of documents. The most common (and effective) 
  way to describe full-text searches is "what Google, Yahoo and Altavista do
  with documents placed on the World Wide Web". Users input a term, or series 
  of terms, perhaps connected by a binary operator or grouped together into a 
................................................................................
  specified. This document describes the deployment and usage of FTS3.

<p>
  Portions of the original FTS3 code were contributed to the SQLite project 
  by Scott Hess of <a href="http://www.google.com">Google</a>. It is now 
  developed and maintained as part of SQLite.

<h1>Introduction to FTS3</h1>

<p>
  The FTS3 extension module allows users to create special tables with a 
  built-in full-text index (hereafter "FTS3 tables"). The full-text index
  allows the user to efficiently query the database for all rows that contain
  one or more instances specified word (hereafter a "token", even if the table
  contains many large documents.
................................................................................

<p>
  For example, if each of the 517430 documents in the 
  "<a href="http://www.cs.cmu.edu/~enron/">Enron E-Mail Dataset</a>"
  is inserted into both the FTS3 table and the ordinary SQLite table
  created using the following SQL script:

<codeblock>
  CREATE VIRTUAL TABLE enrondata1 USING fts3(content TEXT);     /* FTS3 table */
  CREATE TABLE enrondata2(content TEXT);                        /* Ordinary table */

</codeblock>

<p>
  Then either of the two queries below may be executed to find the number of
  documents in the database that contain the word "linux" (351). Using one
  desktop PC hardware configuration, the query on the FTS3 table returns in
  approximately 0.03 seconds, versus 22.5 for querying the ordinary table.

<codeblock>
  SELECT count(*) FROM enrondata1 WHERE content MATCH 'linux';  /* 0.03 seconds */
  SELECT count(*) FROM enrondata2 WHERE content LIKE '%linux%'; /* 22.5 seconds */

</codeblock>

<p>
  Of course, the two queries above are not entirely equivalent. For example
  the LIKE query matches rows that contain terms such as "linuxophobe"
  or "EnterpriseLinux" (as it happens, the Enron E-Mail Dataset does not
  actually contain any such terms), whereas the MATCH query on the FTS3 table
  selects only those rows that contain "linux" as a discrete token. Both 
  searches are case-insensitive. The FTS3 table consumes around 2006 MB on
  disk compared to just 1453 MB for the ordinary table. Using the same
  hardware configuration used to perform the SELECT queries above, the FTS3
  table took just under 31 minutes to populate, versus 25 for the ordinary
  table.

<h2>Creating and Destroying FTS3 Tables</h2>

<p>
  Like other virtual table types, new FTS3 tables are created using a 
  [CREATE VIRTUAL TABLE] statement. The module name, which follows
  the USING keyword, is "fts3". The virtual table module arguments may
  be left empty, in which case an FTS3 table with a single user-defined 
  column named "content" is created. Alternatively, the module arguments
  may be passed a list of comma separated column names. 

<p>
  If column names are explicitly provided for the FTS3 table as part of
................................................................................
  the CREATE VIRTUAL TABLE statement, then a datatype name may be optionally 
  specified for each column. However, this is pure syntactic sugar, the
  supplied typenames are not used by FTS3 or the SQLite core for any
  purpose. The same applies to any constraints specified along with an
  FTS3 column name - they are parsed but not used or recorded by the system
  in any way.

<codeblock>
  <i>-- Create an FTS3 table named "data" with one column - "content":</i>
  CREATE VIRTUAL TABLE data USING fts3();

  <i>-- Create an FTS3 table named "pages" with three columns:</i>
  CREATE VIRTUAL TABLE pages USING fts3(title, keywords, body);

  <i>-- Create an FTS3 table named "mail" with two columns. Datatypes
  -- and column constraints are specified along with each column. These
  -- are completely ignored by FTS3 and SQLite. </i>
  CREATE VIRTUAL TABLE mail USING fts3(
    subject VARCHAR(256) NOT NULL,
    body TEXT CHECK(length(body)&lt;10240)
  );

</codeblock>

<p>
  As well as a list of columns, the module arguments passed to a CREATE
  VIRTUAL TABLE statement used to create an FTS3 table may be used to specify
  a [tokenizer]. This is done by specifying a string of the form
  "tokenize=&lt;tokenizer name&gt; &lt;tokenizer args&gt;" in place of a column
  name, where &lt;tokenizer name&gt; is the name of the tokenizer to use and
  &lt;tokenizer args&gt; is an optional list of whitespace separated qualifiers
  to pass to the tokenizer implementation. A tokenizer specification may be
  placed anywhere in the column list, but at most one tokenizer declaration is
  allowed for each CREATE VIRTUAL TABLE statement.  The second and subsequent
  tokenizer declaration are interpreted as column names. 
  [tokenizer|See below] for a detailed description of using (and, if
  necessary, implementing) a tokenizer.

<codeblock>
  <i>-- Create an FTS3 table named "papers" with two columns that uses</i>
  <i>-- the tokenizer "porter".</i>
  CREATE VIRTUAL TABLE papers USING fts3(author, document, tokenize=porter);

  <i>-- Create an FTS3 table with a single column - "content" - that uses</i>
  <i>-- the "simple" tokenizer.</i>
  CREATE VIRTUAL TABLE data USING fts3(tokenize=simple);

  <i>-- Create an FTS3 table with two columns that uses the "icu" tokenizer.</i>
  <i>-- The qualifier "en_AU" is passed to the tokenizer implementation</i>
  CREATE VIRTUAL TABLE names USING fts3(a, b, tokenize=icu en_AU);

</codeblock>

<p>
  FTS3 tables may be dropped from the database using an ordinary [DROP TABLE]
  statement. For example:

<codeblock>
  <i>-- Create, then immediately drop, an FTS3 table.</i>
  CREATE VIRTUAL TABLE data USING fts3();
  DROP TABLE data;

</codeblock>

<h2>Populating FTS3 Tables</h2>

  <p>
    FTS3 tables are populated using [INSERT], [UPDATE] and [DELETE]
    statements in the same way as ordinary SQLite tables are.

  <p>
    As well as the columns named by the user (or the "content" column if no
    module arguments where specified as part of the [CREATE VIRTUAL TABLE] 
    statement), each FTS3 table has a "rowid" column. The rowid of an FTS3
    table behaves in the same way as the rowid column of an ordinary SQLite 
    table, except that the values stored in the rowid column of an FTS3 table 
    remain unchanged if the database is rebuilt using the [VACUUM] command. 
    For FTS3 tables, "docid" is allowed as an alias along with the usual "rowid",
    "oid" and "_oid_" identifiers. Attempting to insert or update a row with a 
    docid value that already exists in the table is an error, just as it would 
    be with an ordinary SQLite table.

  <p>
    There is one other subtle difference between "docid" and the normal SQLite
................................................................................
    aliases for the rowid column. Normally, if an INSERT or UPDATE statement 
    assigns discreet values to two or more aliases of the rowid column, SQLite 
    writes the rightmost of such values specified in the INSERT or UPDATE
    statement to the database. However, assigning a non-NULL value to both
    the "docid" and one or more of the SQLite rowid aliases when inserting or
    updating an FTS3 table is considered an error. See below for an example.

<codeblock>
  <i>-- Create an FTS3 table</i>
  CREATE VIRTUAL TABLE pages USING fts3(title, body);

  <i>-- Insert a row with a specific docid value.</i>
  INSERT INTO pages(docid, title, body) VALUES(53, 'Home Page', 'SQLite is a software...');

  <i>-- Insert a row and allow FTS3 to assign a docid value using the same algorithm as</i>
................................................................................

  <i>-- Delete the entire table contents.</i>
  DELETE FROM pages;

  <i>-- The following is an error. It is not possible to assign non-NULL values to both</i>
  <i>-- the rowid and docid columns of an FTS3 table.</i>
  INSERT INTO pages(rowid, docid, title, body) VALUES(1, 2, 'A title', 'A document body');
</codeblock>

  <p>
    To support full-text queries, FTS3 maintains an inverted index that maps
    from each unique term or word that appears in the dataset to the locations
    in which it appears within the table contents. For the curious, a 
    complete description of the [segment btree|data structure] used to store
    this index within the database file is described below. A feature of
    this data structure is that at any time the database may contain not
    one index b-tree, but several different b-trees that are incrementally
    merged as rows are inserted, updated and deleted. This technique improves 
    performance when writing to an FTS3 table, but causes some overhead for
    full-text queries that use the index. Executing an SQL statement of the
    form "INSERT INTO &lt;fts3-table&gt;(&lt;fts3-table&gt;) VALUES('optimize')"
................................................................................
    b-tree containing the entire index. This can be an expensive operation,
    but may speed up future queries. 

  <p>
    For example, to optimize the full-text index for an FTS3 table named
    "docs":

<codeblock>
  <i>-- Optimize the internal structure of FTS3 table "docs".</i>
  INSERT INTO docs(docs) VALUES('optimize');

</codeblock>

  <p>
    The statement above may appear syntacticly incorrect to some. Refer to
    the section describing the [simple fts3 queries] for an explanation.

  <p>
    There is another, deprecated, method for invoking the optimize 
    operation using a SELECT statement. New code should use statements
    similar to the INSERT above to optimize FTS3 structures.

<h2 tags="simple fts3 queries">Simple FTS3 Queries</h2>

<p>
  As for all other SQLite tables, virtual or otherwise, data is retrieved
  from FTS3 tables using a [SELECT] statement.

<p>
  FTS3 tables can be queried efficiently using SELECT statements of two
  different forms:

<ul>
  <li><p>
    <b>Query by rowid</b>. If the WHERE clause of the SELECT statement
    contains a sub-clause of the form "rowid = ?", where ? is an SQL expression,
    FTS3 is able to retreive the requested row directly using the equivalent 
    of an SQLite [INTEGER PRIMARY KEY] index.

  <li><p>
    <b>Full-text query</b>. If the WHERE clause of the SELECT statement contains
    a sub-clause of the form "&lt;column&gt; MATCH ?", FTS3 is able to use 
    the built-in full-text index to restrict the search to those documents 
    that match the full-text query string specified as the right-hand operand
    of the MATCH clause.
................................................................................
<p>
  If neither of the two query strategies enumerated above can be used, all
  queries on FTS3 tables are implemented using a linear scan of the entire
  table. If the table contains large amounts of data, this may be an 
  impractically approach (the first example on this page shows that a linear
  scan of 1.5 GB of data takes around 30 seconds using a modern PC).

<codeblock>
  <i>-- The examples in this block assume the following FTS3 table:</i>
  CREATE VIRTUAL TABLE mail USING fts3(subject, body);

  SELECT * FROM mail WHERE rowid = 15;                <i>-- Fast. Rowid lookup.</i>
  SELECT * FROM mail WHERE body MATCH 'sqlite';       <i>-- Fast. Full-text query.</i>
  SELECT * FROM mail WHERE mail MATCH 'search';       <i>-- Fast. Full-text query.</i>
  SELECT * FROM mail WHERE rowid BETWEEN 15 AND 20;   <i>-- Slow. Linear scan.</i>
  SELECT * FROM mail WHERE subject = 'database';      <i>-- Slow. Linear scan.</i>
  SELECT * FROM mail WHERE subject MATCH 'database';  <i>-- Fast. Full-text query.</i>
</codeblock>

<p>
  In all of the full-text queries above, the right-hand operand of the MATCH
  operator is a string consisting of a single term. In this case, the MATCH
  expression evaluates to true for all documents that contain one or more 
  instances of the specified word ("sqlite", "search" or "database", depending 
  on which example you look at). Specifying a single term as the right-hand
  operand of the MATCH operator results in the simplest (and most common) type 
  of full-text query possible. However more complicated queries are possible,
  including phrase searches, term-prefix searches and searches for documents 
  containing combinations of terms occuring within a defined proximity of each
  other. The various ways in which the full-text index may be queried are
  [FTS3 MATCH|described below].

<p>
  Normally, full-text queries are case-insensitive. However, this is
  is dependent on the specific [tokenizer] used by the FTS3 table
  being queried. Refer to the section on [tokenizer|tokenizers] for details.

<p>
  The paragraph above notes that a MATCH operator with a simple term as the
  right-hand operand evaluates to true for all documents that contain the
  specified term. In this context, the "document" may refer to either the 
  data stored in a single column of a row of an FTS3 table, or to the contents
  of all columns in a single row, depending on the identifier used as the
................................................................................
  the left-hand operand of the MATCH operator is an FTS3 table column name,
  then the document that the search term must be contained in is the value
  stored in the specified column. However, if the identifier is the name
  of the FTS3 <i>table</i> itself, then the MATCH operator evaluates to true
  for each row of the FTS3 table for which any column contains the search 
  term. The following example demonstrates this:

<codeblock>
  <i>-- Example schema</i>
  CREATE VIRTUAL TABLE mail USING fts3(subject, body);

  <i>-- Example table population</i>
  INSERT INTO mail(docid, subject, body) VALUES(1, 'software feedback', 'found it too slow');
  INSERT INTO mail(docid, subject, body) VALUES(2, 'software feedback', 'no feedback');
  INSERT INTO mail(docid, subject, body) VALUES(3, 'slow lunch order',  'was a software problem');

  <i>-- Example queries</i>
  SELECT * FROM mail WHERE subject MATCH 'software';    <i>-- Selects rows 1 and 2</i>
  SELECT * FROM mail WHERE body    MATCH 'feedback';    <i>-- Selects row 2</i>
  SELECT * FROM mail WHERE mail    MATCH 'software';    <i>-- Selects rows 1, 2 and 3</i>
  SELECT * FROM mail WHERE mail    MATCH 'slow';        <i>-- Selects rows 1 and 3</i>
</codeblock>
  
<p>
  At first glance, the final two full-text queries in the example above seem
  to be syntacticly incorrect, as there is a table name ("mail") used as
  an SQL expression. The reason this is acceptable is that each FTS3 table
  actually has a [sqlite3_declare_vtab|HIDDEN] column with the same name
  as the table itself (in this case, "mail"). The value stored in this
  column is not meaningful to the application, but can be used as the 
  left-hand operand to a MATCH operator. This special column may also be
  passed as an argument to the [snippet()|FTS3 auxillary functions].

<p>
  The following example illustrates the above. The expressions "docs", 
  "docs.docs" and "main.docs.docs" all refer to column "docs". However, the 
  expression "main.docs" does not refer to any column. It could be used to 
  refer to a table, but a table name is not allowed in the context in which
  it is used below.

<codeblock>
  <i>-- Example schema</i>
  CREATE VIRTUAL TABLE docs USING fts3(content);

  <i>-- Example queries</i>
  SELECT * FROM docs WHERE docs MATCH 'sqlite';              <i>-- OK.</i>
  SELECT * FROM docs WHERE docs.docs MATCH 'sqlite';         <i>-- OK.</i>
  SELECT * FROM docs WHERE main.docs.docs MATCH 'sqlite';    <i>-- OK.</i>
  SELECT * FROM docs WHERE main.docs MATCH 'sqlite';         <i>-- Error.</i>
</codeblock>
 
<h2>Summary</h2>

<p>
  From the users point of view, FTS3 tables are similar to ordinary SQLite
  tables in many ways. Data may be added to, modified within and removed 
  from FTS3 tables using the INSERT, UPDATE and DELETE commands just as 
  it may be with ordinary tables. Similarly, the SELECT command may be used 
  to query data. The following list summarizes the differences between FTS3
................................................................................
    triggers attached to FTS3 tables. Nor is it possible to use the ALTER TABLE
    command to add extra columns to FTS3 tables (although it is possible to use
    ALTER TABLE to rename an FTS3 table).

  <li><p> 
    Data-types specified as part of the "CREATE VIRTUAL TABLE" statement
    used to create an FTS3 table are ignored completely. Instead of the
    normal rules for applying type [affinity] to inserted values, all
    values inserted into FTS3 table columns (except the special rowid
    column) are converted to type TEXT before being stored.

  <li><p> 
    FTS3 tables permit the special alias "docid" to be used to refer to the
    rowid column supported by all [virtual tables].

  <li><p> 
    The [FTS3 MATCH] operator is supported for queries based on the built-in
    full-text index. 

  <li><p> 
    The FTS3 auxillary functions, [snippet|snippet() and offsets()], are 
    available to support full-text queries.

  <li><p> 
    Each FTS3 table has a [sqlite3_declare_vtab()|HIDDEN column] with the 
    same name as the table itself. The value contained in each row for the
    special column is only useful when used on the left-hand side of a 
    [FTS3 MATCH|MATCH] operator, or when specified as an argument to one 
    of the [snippet|FTS3 auxillary functions].
    

</ol>


<h1 tags="compile fts3">Compiling and Enabling FTS3</h1>

<p>
  Although FTS3 is distributed as part of the SQLite source code, it is not
  enabled by default. To build SQLite with FTS3 functionality enabled, define
  the preprocessor macro [SQLITE_ENABLE_FTS3] when compiling. New applications
  should also define the [SQLITE_ENABLE_FTS3_PARENTHESIS] macro to enable the
  [enhanced query syntax] (see below). Usually, this is done by adding the 
  following two switches to the compiler command line:

<codeblock>
  -DSQLITE_ENABLE_FTS3
  -DSQLITE_ENABLE_FTS3_PARENTHESIS

</codeblock>

<p>
  If using the amalgamation autoconf based build system, setting the CPPFLAGS
  environment variable while running the 'configure' script is an easy
  way to set these macros. For example, the following command:

<codeblock>
  CPPFLAGS="-DSQLITE_ENABLE_FTS3 -DSQLITE_ENABLE_FTS3_PARENTHESIS" ./configure &lt;configure options&gt;

</codeblock>

<p>
  where <i>&lt;configure options&gt;</i> are those options normally passed to
  the configure script, if any.

<p>
  Because FTS3 is a virtual table, it is incompatible with the
  [SQLITE_OMIT_VIRTUALTABLE] option.

<p>
  If an SQLite build does not include FTS3, then any attempt to prepare an
  SQL statement to create an FTS3 table or to drop or access an existing 
  FTS3 table in any way will fail. The error message returned will be similar 
  to "no such module: fts3".

<p>
  If the C version of the <a href=http://site.icu-project.org/>ICU library</a>
  is available, then FTS3 may also be compiled with the SQLITE_ENABLE_ICU
  pre-processor macro defined. Compiling with this macro enables an FTS3
  [tokenizer] that uses the ICU library to split a document into terms
  (words) using the conventions for a specified language and locale.

<codeblock>
  -DSQLITE_ENABLE_ICU

</codeblock>
  

<h1 tags="FTS3 MATCH">Full-text Index Queries</h1>

<p>
  The most useful thing about FTS3 tables is the queries that may be 
  performed using the built-in full-text index. Full-text queries are 
  performed by specifying a clause of the form 
  "&lt;column&gt; MATCH &lt;full-text query expression&gt;" to the WHERE 
  clause of a SELECT statement that reads data from an FTS3 table. 
  [simple fts3 queries|Simple FTS3 queries] that return all documents that 
  contain a given term are described above. In that discussion the right-hand
  operand of the MATCH operator was assumed to be a string consisting of a
  single term. This section describes the more complex query types supported 
  by FTS3 tables, and how they may be utilized by specifying a more
  complex query expression as the right-hand operand of a MATCH operator.

<p>
  FTS3 tables support three basic query types:

<ul>
  <li><p><b>Token or token prefix queries</b>. 
    An FTS3 table may be queried for all documents that contain a specified
    term (the [simple fts3 queries|simple case] described above), or for
    all documents that contain a term with a specified prefix. As we have
    seen, the query expression for a specific term is simply the term itself.
    The query expression used to search for a term prefix is the prefix
    itself with a '*' character appended to it. For example:
</ul>

<codeblock>
  <i>-- Virtual table declaration</i>
  CREATE VIRTUAL TABLE docs USING fts3(title, body);

  <i>-- Query for all documents containing the term "linux":</i>
  SELECT * FROM docs WHERE docs MATCH 'linux';

  <i>-- Query for all documents containing a term with the prefix "lin". This will match</i>
  <i>-- all documents that contain "linux", but also those that contain terms "linear",</i>
  <i>--"linker", "linguistic" and so on.</i>
  SELECT * FROM docs WHERE docs MATCH 'lin*';
</codeblock>

<ul>
  <li style="list-style:none"><p>
    Normally, a token or token prefix query is matched against the FTS3 table 
    column specified as the right-hand side of the MATCH operator. Or, if the
    special column with the same name as the FTS3 table itself is specified,
    against all columns. This may be overridden by specifying a column-name
    followed by a ":" character before a basic term query. There may be space
    between the ":" and the term to query for, but not between the column-name
    and the ":" character. For example:
</ul>
   
<codeblock>
  <i>-- Query the database for documents for which the term "linux" appears in</i>
  <i>-- the document title, and the term "problems" appears in either the title</i>
  <i>-- or body of the document.</i>
  SELECT * FROM docs WHERE docs MATCH 'title:linux problems';

  <i>-- Query the database for documents for which the term "linux" appears in</i>
  <i>-- the document title, and the term "driver" appears in the body of the document</i>
  <i>-- ("driver" may also appear in the title, but this alone will not satisfy the</i>.
  <i>-- query criteria).</i>
  SELECT * FROM docs WHERE body MATCH 'title:linux driver';
</codeblock>

<ul>
  <li><p><b>Phrase queries</b>.
    A phrase query is a query that retrieves all documents that contain a
    nominated set of terms or term prefixes in a specified order with no
    intervening tokens. Phrase queries are specified by enclosing a space
    separated sequence of terms or term prefixes in double quotes (").
    For example:
</ul>

<codeblock>
  <i>-- Query for all documents that contain the phrase "linux applications".</i>
  SELECT * FROM docs WHERE docs MATCH '"linux applications"';

  <i>-- Query for all documents that contain a phrase that matches "lin* app*". As well as</i>
  <i>-- "linux applications", this will match common phrases such as "linoleum appliances"</i>
  <i>-- or "link apprentice".</i>
  SELECT * FROM docs WHERE docs MATCH '"lin* app*"';
</codeblock>

<ul>
  <li><p><b>NEAR queries</b>. 
    A NEAR query is a query that returns documents that contain a two or
    more nominated terms or phrases within a specified proximity of each 
    other (by default with 10 or less intervening terms). A NEAR query is 
    specified by putting the keyword "NEAR" between two phrase, term or 
    term prefix queries. To specify a proximity other than the default,
    an operator of the form "NEAR/<i>&lt;N&gt;</i>" may be used, where
    <i>&lt;N&gt;</i> is the maximum number of intervening terms allowed.
    For example:
</ul>

<codeblock>
  <i>-- Virtual table declaration.</i>
  CREATE VIRTUAL TABLE docs USING fts3();

  <i>-- Virtual table data.</i>
  INSERT INTO docs VALUES('SQLite is an ACID compliant embedded relational database management system');

  <i>-- Search for a document that contains the terms "sqlite" and "database" with</i>
................................................................................
  <i>-- document stored in table docs.</i>
  SELECT * FROM docs WHERE docs MATCH 'database NEAR/2 "ACID compliant"';

  <i>-- Search for a document that contains the phrase "ACID compliant" and the term</i>
  <i>-- "sqlite" with not more than 2 terms separating the two. This also matches</i>
  <i>-- the only document stored in table docs.</i>
  SELECT * FROM docs WHERE docs MATCH '"ACID compliant" NEAR/2 sqlite';
</codeblock>

<ul>
  <li style="list-style: none"><p>
    More than one NEAR operator may appear in a single query. In this case each
    pair of terms or phrases separated by a NEAR operator must appear within the
    specified proximity of each other in the document. Using the same table and
    data as in the block of examples above:
</ul>

<codeblock> 
 <i>-- The following query selects documents that contains an instance of the term </i>
  <i>-- "sqlite" separated by two or fewer terms from an instance of the term "acid",</i>
  <i>-- which is in turn separated by two or fewer terms from an instance of the term</i>
  <i>-- "relational". As it happens, the only document in table docs satisfies this criteria.</i>
  SELECT * FROM docs WHERE docs MATCH 'sqlite NEAR/2 acid NEAR/2 relational';

  <i>-- This query matches no documents. There is an instance of the term "sqlite" with</i>
  <i>-- sufficient proximity to an instance of "acid" but it is not sufficiently close</i>
  <i>-- to an instance of the term "relational".</i>
  SELECT * FROM docs WHERE docs MATCH 'acid NEAR/2 sqlite NEAR/2 relational';
</codeblock>

<p>
  Phrase and NEAR queries may not span multiple columns within a row.

<p>
  The three basic query types described above may be used to query the full-text
  index for the set of documents that match the specified criteria. Using the
................................................................................
<p>
  The FTS3 module may be compiled to use one of two slightly different versions
  of the full-text query syntax, the "standard" query syntax and the "enhanced" 
  query syntax. The basic term, term-prefix, phrase and NEAR queries described 
  above are the same in both versions of the syntax. The way in which set 
  operations are specified is slightly different. The following two sub-sections 
  describe the part of the two query syntaxes that pertains to set operations. 
  Refer to the description of how to [compile fts3] for compilation notes.

<h2 tags="enhanced query syntax">
  Set Operations Using The Enhanced Query Syntax</h2>

<p>
  The enhanced query syntax supports the AND, OR and NOT binary set operators.
  Each of the two operands to an operator may be a basic FTS3 query, or the
  result of another AND, OR or NOT set operation. Operators must be entered
  using capital letters. Otherwise, they are interpreted as basic term queries
  instead of set operators.
................................................................................
<p>
  The AND operator may be implicitly specified. If two basic queries appear 
  with no operator separating them in an FTS3 query string, the results are
  the same as if the two basic queries were separated by an AND operator.
  For example, the query expression "implicit operator" is a more succinct
  version of "implicit AND operator".

<codeblock>
  <i>-- Virtual table declaration</i>
  CREATE VIRTUAL TABLE docs USING fts3();

  <i>-- Virtual table data</i>
  INSERT INTO docs(docid, content) VALUES(1, 'a database is a software system');
  INSERT INTO docs(docid, content) VALUES(2, 'sqlite is a software system');
  INSERT INTO docs(docid, content) VALUES(3, 'sqlite is a database');
................................................................................

  <i>-- The following query matches no documents. Because "and" is in lowercase letters,</i>
  <i>-- it is interpreted as a basic term query instead of an operator. Operators must</i>
  <i>-- be specified using capital letters. In practice, this query will match any documents</i>
  <i>-- that contain each of the three terms "database", "and" and "sqlite" at least once.</i>
  <i>-- No documents in the example data above match this criteria.</i>
  SELECT * FROM docs WHERE docs MATCH 'database and sqlite';
</codeblock>

<p>
  The examples above all use basic full-text term queries as both operands of 
  the set operations demonstrated. Phrase and NEAR queries may also be used,
  as may the results of other set operations. When more than one set operation
  is present in an FTS3 query, the precedence of operators is as follows:

<table striped=1>
  <tr><th>Operator<th>Enhanced Query Syntax Precedence
  <tr><td>NOT <td> Highest precedence (tightest grouping).
  <tr><td>AND <td>
  <tr><td>OR  <td> Lowest precedence (loosest grouping).
</table>

<p>
  When using the enhanced query syntax, parenthesis may be used to override
  the default precedence of the various operators. For example:

<codeblock>

  <i>-- Return the docid values associated with all documents that contain the</i>
  <i>-- two terms "sqlite" and "database", and/or contain the term "library".</i>
  SELECT docid FROM docs WHERE docs MATCH 'sqlite AND database OR library';

  <i>-- This query is equivalent to the above.</i>
  SELECT docid FROM docs WHERE docs MATCH 'sqlite AND database'
................................................................................
  SELECT docid FROM docs WHERE docs MATCH 'linux'
    INTERSECT
  SELECT docid FROM (
    SELECT docid FROM docs WHERE docs MATCH '"sqlite library"'
      UNION
    SELECT docid FROM docs WHERE docs MATCH '"sqlite database"'
  );

</codeblock>


<h2>Set Operations Using The Standard Query Syntax</h2>

<p>
  FTS3 query set operations using the standard query syntax are similar, but
  not identical, to set operations with the enhanced query syntax. There
  are four differences, as follows:

<ol>
................................................................................
    may be applied to basic term and term-prefix queries (but not to phrase
    or NEAR queries). A term or term-prefix that has a unary "-" operator
    attached to it may not appear as an operand to an OR operator. An FTS3
    query may not consist entirely of terms or term-prefix queries with unary
    "-" operators attached to them.
</ol>

<codeblock>
  <i>-- Search for the set of documents that contain the term "sqlite" but do</i>
  <i>-- not contain the term "database".</i>
  SELECT * FROM docs WHERE docs MATCH 'sqlite -database';
</codeblock>

<ol>
  <li value=4><p> The relative precedence of the set operations is different. 
   In particular, using the standard query syntax the "OR" operator has a
   higher precedence than "AND". The precedence of operators when using the
   standard query syntax is: 
</ol>

<table striped=1>
  <tr><th>Operator<th>Standard Query Syntax Precedence
  <tr><td>Unary "-" <td> Highest precedence (tightest grouping).
  <tr><td>OR  <td>
  <tr><td>AND <td> Lowest precedence (loosest grouping).
</table>

<ol><li style="list-style:none">
  The following example illustrates precedence of operators using the standard 
  query syntax:
</ol>

<codeblock>
  <i>-- Search for documents that contains at least one of the terms "database"</i>
  <i>-- and "sqlite", and also contains the term "library". Because of the differences</i>
  <i>-- in operator precedences, this query would have a different interpretation using</i>
  <i>-- the enhanced query syntax.</i>
  SELECT * FROM docs WHERE docs MATCH 'sqlite OR database library';
</codeblock>

<h1 tags="snippet, offsets">Auxillary functions - Snippet, Offsets and Matchinfo</h1>

<p>
  The FTS3 module provides three special SQL scalar functions that may be useful
  to the developers of full-text query systems: "snippet", "offsets" and
  "matchinfo". The purpose of the "snippet" and "offsets" functions is to allow
  the user to identify the location of queried terms in the returned documents.
  The "matchinfo" function provides the user with metrics that may be useful
................................................................................

<p>
  The first argument to all three special SQL scalar functions
  must be the the special hidden column of an FTS3 table that has the same
  name as the table (see above). For example, given an FTS3 table named 
  "mail":

<codeblock>
  SELECT offsets(mail) FROM mail WHERE mail MATCH &lt;full-text query expression&gt;;
  SELECT snippet(mail) FROM mail WHERE mail MATCH &lt;full-text query expression&gt;;
  SELECT matchinfo(mail) FROM mail WHERE mail MATCH &lt;full-text query expression&gt;;
</codeblock>

<p>
  The three auxillary functions are only useful within a SELECT statement that
  uses the FTS3 table's full-text index. If used within a SELECT that uses
  the "query by rowid" or "linear scan" strategies, then the snippet and
  offsets both return an an empty string, and the matchinfo function returns
  a blob value zero bytes in size.
................................................................................

  <li> If the matchable phrase in the FTS3 query is restricted to matching
       data in a specified FTS3 table column, then only phrase matches that 
       occur within that column are considered.
</ol>
 

<h2>The Offsets Function</h2>

<p>
  For a SELECT query that uses the full-text index, the offsets() function 
  returns a text value containing a series of space-separated integers. For
  each term in each <a href=#matchable>phrase match</a> of the current row, 
  there are four integers in the returned list. Each set of four integers is 
  interpreted as follows:

<table striped=1>
  <tr><th>Integer <th>Interpretation
  <tr><td>0 
      <td>The column number that the term instance occurs in (0 for the
          leftmost column of the FTS3 table, 1 for the next leftmost, etc.).
  <tr><td>1
      <td>The term number of the matching term within the full-text query
          expression. Terms within a query expression are numbered starting
          from 0 in the order that they occur.
  <tr><td>2
      <td>The byte offset of the matching term within the column.
  <tr><td>3
      <td>The size of the matching term in bytes.
</table>

<p>
  The following block contains examples that use the offsets function.

<codeblock>
  CREATE VIRTUAL TABLE mail USING fts3(subject, body);
  INSERT INTO mail VALUES('hello world', 'This message is a hello world message.');
  INSERT INTO mail VALUES('urgent: serious', 'This mail is seen as a more serious mail');

  <i>-- The following query returns a single row (as it matches only the first</i>
  <i>-- entry in table "mail". The text returned by the offsets function is</i>
  <i>-- "0 0 6 5 1 0 24 5".</i>
................................................................................
  SELECT offsets(mail) FROM mail WHERE mail MATCH 'message';

  <i>-- The following query matches the second row in table "mail". It returns the</i>
  <i>-- text "1 0 28 7 1 1 36 4". Only those occurences of terms "serious" and "mail"</i>
  <i>-- that are part of an instance of the phrase "serious mail" are identified; the</i>
  <i>-- other occurences of "serious" and "mail" are ignored.</i>
  SELECT offsets(mail) FROM mail WHERE mail MATCH '"serious mail"';

</codeblock>

<h2>The Snippet Function</h2>

<p>
  The snippet function is used to create formatted fragments of document text
  for display as part of a full-text query results report. The snippet function 
  may be passed between one and four arguments, as follows:

<table striped=1>
  <tr><th>Argument <th>Default Value <th>Description
  <tr><td>0 <td>N/A
      <td> The first argument to the snippet function must always be the special
           hidden column of the FTS3 table that takes the same name as the table
           itself.
  <tr><td>1 <td>"&lt;b&gt;"
      <td> The "start match" text.
  <tr><td>2 <td>"&lt;b&gt;"
      <td> The "end match" text.
  <tr><td>3 <td>"&lt;b&gt;...&lt;/b&gt;"
      <td> The "ellipses" text.
  <tr><td>4 <td>-1
      <td> The FTS3 table column number to extract the returned fragments of
           text from. Columns are numbered from left to right starting with
           zero. A negative value indicates that the text may be extracted
           from any column.
  <tr><td>5 <td>-15
      <td> The absolute value of this integer argument is used as the 
           (approximate) number of tokens to include in the returned text 
           value. The maximum allowable absolute value is 64. The value of
           this argument is refered to as <i>N</i> in the discussion below.
</table>

<p>
................................................................................
<p>
  After the <i>M</i> fragments have been located, where <i>M</i> is between
  two and four as described in the paragraphs above, they are joined together
  in sorted order with the "ellipses" text separating them. The three 
  modifications enumerated earlier are performed on the text before it is 
  returned.

<codeblock>
  <b>Note: In this block of examples, newlines and whitespace characters have
  been inserted into the document inserted into the FTS3 table, and the expected
  results described in SQL comments. This is done to enhance readability only,
  they would not be present in actual SQLite commands or output.</b>

  <i>-- Create and populate an FTS3 table.</i>
  CREATE VIRTUAL TABLE text USING fts3();
................................................................................

  <i>-- The following query returns the text value:</i>
  <i>--</i>
  <i>--   "...the upper portion, &#91;minimum&#93; &#91;temperature&#93; 14-16oC and cool elsewhere,</i>
  <i>--    &#91;minimum&#93; &#91;temperature&#93; 17-20oC. Cold..."</i>
  <i>--</i>
  SELECT snippet(text, '&#91; '&#93;', '...') FROM text WHERE text MATCH '"min* tem*"'
</codeblock>

<h2 id=matchinfo tags=matchinfo>The Matchinfo Function</h2>

<p>
  The matchinfo function returns a blob value. If used within a query that
  uses the full-text index (not a "query by rowid" or "linear scan"), then
  the blob consists of (2 + <i>C</i> * <i>P</i> * 3) 32-bit unsigned 
  integers in machine byte-order, where <i>C</i> is the number of columns 
  in the FTS3 table being queried, and <i>P</i> is the number of 
  <a href=#matchable>matchable phrases</a> in the query. 

  <p>
    Phrases and columns are both numbered from left to right starting from 
    zero.

<table striped=1>
  <tr><th>Array Element <th>Interpretation
  <tr><td>0 <td> 
    Number of matchable phrases in the query expression (value <i>P</i> in 
    the formula below).
  <tr><td>1 <td>
    Number of columns in the FTS3 table being queried (value <i>C</i> in 
    the formula below).

  <tr><td style="white-space:nowrap">2 + 3 * (<i>c</i> + <i>C</i>*<i>p</i>) + 0<td>
    Number of phrase matches for matchable phrase <i>p</i> in column 
    <i>c</i> of the current FTS3 table row.
  <tr><td style="white-space:nowrap">2 + 3 * (<i>c</i> + <i>C</i>*<i>p</i>) + 1<td>
    Sum of the number of phrase matches for matchable phrase <i>p</i> in 
    column <i>c</i> for all rows of the FTS3 table.
  <tr><td style="white-space:nowrap">2 + 3 * (<i>c</i> + <i>C</i>*<i>p</i>) + 2<td>
    Number of rows of the FTS3 table for which column <i>c</i> contains at 
    least one phrase match for matchable phrase <i>p</i>.
</table>

<p>
  For example:

<codeblock>
  <i>-- Create and populate an FTS3 table with two columns:</i>
  CREATE VIRTUAL TABLE t1 USING fts3(a, b);
  INSERT INTO t1 VALUES('transaction default models default', 'Non transaction reads');
  INSERT INTO t1 VALUES('the default transaction', 'these semantics present');
  INSERT INTO t1 VALUES('single request', 'default data');

  <i>-- The following query returns a single row consisting of a single blob</i>
................................................................................
  <i>-- 0 of any table row. The 3 hits are spread across 2 different rows.</i>
  <i>--</i>
  <i>-- The next set of three integers (0 1 1) pertain to the hits for "default"</i>
  <i>-- in column 1 of the table (0 in this row, 1 in all rows, spread across </i>
  <i>-- 1 rows).</i>
  <i>--</i>
  SELECT matchinfo(t1) FROM t1 WHERE t1 MATCH 'default transaction "these semantics"';
</codeblock>

<p>
  The matchinfo function is much faster than either the snippet or offsets
  functions. This is because the implementation of both snippet and offsets
  is required to retrieve the documents being analyzed from disk, whereas
  all data required by matchinfo is available as part of the same portions
  of the full-text index that are required to implement the full-text query
  itself. This means that of the following two queries, the first may be
  an order of magnitude faster than the second:

<codeblock>
  SELECT docid, matchinfo(tbl) FROM tbl WHERE tbl MATCH &lt;query expression&gt;;
  SELECT docid, offsets(tbl) FROM tbl WHERE tbl MATCH &lt;query expression&gt;;

</codeblock>

<p>
  The matchinfo function provides much of the information required to calculate
  probabalistic "bag-of-words" relevancy scores such as 
  <a href=http://en.wikipedia.org/wiki/Okapi_BM25>Okapi BM25/BM25F</a> that may
  be used to order results in a full-text search application. Also often
  used in such functions is the length or relative length of each document
  or document field. Unfortunately, this information is not made available
  by the matchinfo function as it would require loading extra data from the
  database, potentially slowing matchinfo() down by an order of magnitude.
  One solution is for the application to store the lengths of each document
  or document field in a separate table for use in calculating relevancy
  scores. Appendix A of this document, "[search application tips]", contains
  an example of using the matchinfo() function efficiently.

<h1 id=tokenizer tags="tokenizer">Tokenizers</h1>

<p>
  An FTS3 tokenizer is a set of rules for extracting terms from a document 
  or basic FTS3 full-text query. 

<p>
  Unless a specific tokenizer is specified as part of the CREATE 
................................................................................
  against similar English language terms. For more information on the 
  Porter Stemmer algorithm, please refer to the page linked above.

<p>
  Example illustrating the difference between the "simple" and "porter"
  tokenizers:

<codeblock>
  <i>-- Create a table using the simple tokenizer. Insert a document into it.</i>
  CREATE VIRTUAL TABLE simple USING fts3(tokenize=simple);
  INSERT INTO simple VALUES('Right now they''re very frustrated');

  <i>-- The first of the following two queries matches the document stored in</i>
  <i>-- table "simple". The second does not.</i>
  SELECT * FROM simple WHERE simple MATCH 'Frustrated');
................................................................................
  <i>-- Create a table using the porter tokenizer. Insert the same document into it</i>
  CREATE VIRTUAL TABLE porter USING fts3(tokenize=porter);
  INSERT INTO porter VALUES('Right now they''re very frustrated');

  <i>-- Both of the following queries match the document stored in table "porter".</i>
  SELECT * FROM porter WHERE porter MATCH 'Frustrated');
  SELECT * FROM porter WHERE porter MATCH 'Frustration');
</codeblock>

<p>
  If this extension is compiled with the SQLITE_ENABLE_ICU pre-processor
  symbol defined, then there exists a built-in tokenizer named "icu"
  implemented using the ICU library. The first argument passed to the
  xCreate() method (see fts3_tokenizer.h) of this tokenizer may be
  an ICU locale identifier. For example "tr_TR" for Turkish as used
  in Turkey, or "en_AU" for English as used in Australia. For example:

<codeblock>
    CREATE VIRTUAL TABLE thai_text USING fts3(text, tokenize=icu th_TH)

</codeblock>

<p>
  The ICU tokenizer implementation is very simple. It splits the input
  text according to the ICU rules for finding word boundaries and discards
  any tokens that consist entirely of white-space. This may be suitable
  for some applications in some locales, but not all. If more complex
  processing is required, for example to implement stemming or
  discard punctuation, this can be done by creating a tokenizer
  implementation that uses the ICU tokenizer as part of its implementation.

<h2>Custom (User Implemented) Tokenizers</h2>

<p>
  As well as the built-in "simple", "porter" and (possibly) "icu" tokenizers,
  FTS3 exports an interface that allows users to implement custom tokenizers
  using C. The interface used to create a new tokenizer is defined and 
  described in the fts3_tokenizer.h source file.

................................................................................
  FTS3 does not expose a C-function that users call to register new
  tokenizer types with a database handle. Instead, the pointer must
  be encoded as an SQL blob value and passed to FTS3 through the SQL
  engine by evaluating a special scalar function, "fts3_tokenizer()".
  The fts3_tokenizer() function may be called with one or two arguments,
  as follows:

<codeblock>
    SELECT fts3_tokenizer(&lt;tokenizer-name&gt;);
    SELECT fts3_tokenizer(&lt;tokenizer-name&gt;, &lt;sqlite3_tokenizer_module ptr&gt;);

</codeblock>

<p>
  Where <tokenizer-name> is a string identifying the tokenizer and
  <sqlite3_tokenizer_module ptr> is a pointer to an sqlite3_tokenizer_module
  structure encoded as an SQL blob. If the second argument is present,
  it is registered as tokenizer <tokenizer-name> and a copy of it
  returned. If only one argument is passed, a pointer to the tokenizer
................................................................................
  encoded as a blob. Or, if no such tokenizer exists, an SQL exception
  (error) is raised.

<p>
  <b>SECURITY WARNING</b>: If the fts3 extension is used in an environment
  where potentially malicious users may execute arbitrary SQL, they should 
  be prevented from invoking the fts3_tokenizer() function, possibly using 
  the [sqlite3_set_authorizer()|authorisation callback].

<p>
  The following block contains an example of calling the fts3_tokenizer()
  function from C code:

<codeblock>
  <i>/*
  ** Register a tokenizer implementation with FTS3.
  */</i>
  int registerTokenizer(
    sqlite3 *db,
    char *zName,
    const sqlite3_tokenizer_module *p
................................................................................
      if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
        memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
      }
    }

    return sqlite3_finalize(pStmt);
  }

</codeblock>

  



<h1 tags="segment btree">Data Structures</h1>

<p>
  This section describes at a high-level the way the FTS3 module stores its
  index and content in the database. It is <b>not necessary to read or 
  understand the material in this section in order to use FTS3</b> in an 
  application. However, it may be useful to application developers attempting 
  to analyze and understand FTS3 performance characteristics, or to developers 
................................................................................
  named "docid". Following this is one column for each column of the FTS3
  virtual table as declared by the user, named by prepending the column name
  supplied by the user with "c<i>N</i>", where <i>N</i> is the index of the 
  column within the table, numbered from left to right starting with 1. Data
  types supplied as part of the virtual table declaration are not used as
  part of the %_content table declaration. For example:

<codeblock>
  <i>-- Virtual table declaration</i>
  CREATE VIRTUAL TABLE abc USING FTS3(a NUMBER, b TEXT, c);

  <i>-- Corresponding %_content table declaration</i>
  CREATE TABLE abc_content(docid INTEGER PRIMARY KEY, c0a, c1b, c2c);
</codeblock>

<p>
  The %_content table contains the unadulterated data inserted by the user 
  into the FTS3 virtual table by the user. If the user does not explicitly
  supply a "docid" value when inserting records, one is selected automatically
  by the system.

................................................................................
  %_content table that contain one or more occurrences of the term. To
  retrieve all documents that contain a specified term, the FTS3 module
  queries this index to determine the set of docid values for records that
  contain the term, then retrieves the required documents from the %_content
  table. Regardless of the schema of the FTS3 virtual table, the %_segments
  and %_segdir tables are always created as follows:

<codeblock>
  CREATE TABLE %_segments(
    blockid INTEGER PRIMARY KEY,       <i>-- B-tree node id</i>
    block blob                         <i>-- B-tree node data</i>
  );

  CREATE TABLE %_segdir(
    level INTEGER,
................................................................................
    idx INTEGER,
    start_block INTEGER,               <i>-- Blockid of first node in %_segments</i>
    leaves_end_block INTEGER,          <i>-- Blockid of last leaf node in %_segments</i>
    end_block INTEGER,                 <i>-- Blockid of last node in %_segments</i>
    root BLOB,                         <i>-- B-tree root node</i>
    PRIMARY KEY(level, idx)
  );
</codeblock>

<p>
  The schema depicted above is not designed to store the full-text index 
  directly. Instead, it is used to one or more b-tree structures. There
  is one b-tree for each row in the %_segdir table. The %_segdir table
  row contains the root node and various meta-data associated with the
  b-tree structure, and the %_segments table contains all other (non-root)
................................................................................
  (or b-trees) later on. Merging of b-tree structures can be performed as
  a background task, or once a certain number of separate b-tree structures
  have been accumulated. Of course, this scheme makes queries more expensive
  (as the FTS3 code may have to look up individual terms in more than one
  b-tree and merge the results), but it has been found that in practice this
  overhead is often negligible.
  
<h2>Variable Length Integer (varint) Format</h2>

<p>
  Integer values stored as part of segment b-tree nodes are encoded using the
  FTS3 varint format. This encoding is similar, but <b>not identical</b>, to the
  the <a href="fileformat.html#varint_format">SQLite varint format</a>.

<p>
................................................................................
  is stored in the remaining seven least signficant bits of each byte.
  The first byte of the encoded representation contains the least significant
  seven bits of the encoded integer value. The second byte of the encoded
  representation, if it is present, contains the seven next least significant
  bits of the integer value, and so on. The following table contains examples
  of encoded integer values:

<table striped=1>
  <tr><th>Decimal<th>Hexadecimal<th width=100%>Encoded Representation
  <tr><td>43<td>0x000000000000002B<td>0x2B 
  <tr><td>200815<td>0x000000000003106F<td>0x9C 0xA0 0x0C
  <tr><td>-1<td>0xFFFFFFFFFFFFFFFF<td>0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0x01
</table>
  

<h2>Segment B-Tree Format</h2>

<p>
  Segment b-trees are prefix-compressed b+-trees. There is one segment b-tree
  for each row in the %_segdir table (see above). The root node of the segment
  b-tree is stored as a blob in the "root" field of the corresponding row
  of the %_segdir table. All other nodes (if any exist) are stored in the 
  "blob" column of the %_segments table. Nodes within the %_segments table are
  identified by the integer value in the blockid field of the corresponding
  row. The following table describes the fields of the %_segdir table:

<table striped=1>
  <tr><th>Column           <th width=100%>Interpretion
  <tr><td>level            <td> 
    Between them, the contents of the "level" and "idx" fields define the
    relative age of the segment b-tree. The smaller the value stored in the
    "level" field, the more recently the segment b-tree was created. If two
    segment b-trees are of the same "level", the segment with the larger
    value stored in the "idx" column is more recent. The PRIMARY KEY constraint
    on the %_segdir table prevents any two segments from having the same value
    for both the "level" and "idx" fields.
  <tr><td>idx              <td> See above.
  <tr><td>start_block      <td>
    The blockid that corresponds to the node with the smallest blockid that 
    belongs to this segment b-tree. Or zero if the entire segment b-tree
    fits on the root node. If it exists, this node is always a leaf node.
  <tr><td>leaves_end_block <td>
    The blockid that corresponds to the leaf node with the largest blockid 
    that belongs to this segment b-tree. Or zero if the entire segment b-tree
    fits on the root node.
  <tr><td>end_block <td>
    The blockid that corresponds to the interior node with the largest 
    blockid that belongs to this segment b-tree.  Or zero if the entire segment
    b-tree fits on the root node. If it exists, this node is always an
    interior node.
  <tr><td>root             <td>
    Blob containing the root node of the segment b-tree.
</table>

<p>
  Apart from the root node, the nodes that make up a single segment b-tree are
  always stored using a contiguous sequence of blockids. Furthermore, the
  nodes that make up a single level of the b-tree are themselves stored as
................................................................................
  used to store the b-tree leaves are allocated starting with the blockid
  value stored in the "start_block" column of the corresponding %_segdir row,
  and finishing at the blockid value stored in the "leaves_end_block"
  field of the same row. It is therefore possible to iterate through all the
  leaves of a segment b-tree, in key order, by traversing the %_segments 
  table in blockid order from "start_block" to "leaves_end_block".  

<h3>Segment B-Tree Leaf Nodes</h3>

<p>
  The following diagram depicts the format of a segment b-tree leaf node.

<center>
  <img src=images/fts3_leaf_node.png>
  <p> Segment B-Tree Leaf Node Format
</center>

<p>
  The first term stored on each node ("Term 1" in the figure above) is
  stored verbatim. Each subsequent term is prefix-compressed with respect
  to its predecessor. Terms are stored within a page in sorted (memcmp)
  order.

<h3>Segment B-Tree Interior Nodes</h3>

<p>
  The following diagram depicts the format of a segment b-tree interior 
  (non-leaf) node.

<center>
  <img src=images/fts3_interior_node.png>
  <p> Segment B-Tree Interior Node Format
</center>


<h2>Doclist Format</h2>

<p>
  A doclist consists of an array of 64-bit signed integers, serialized using
  the FTS3 varint format. Each doclist entry is made up of a series of two 
  or more integers, as follows:

<ol>
................................................................................
            of storing the term-offset value literally, each integer stored 
            is the difference between the current term-offset and the previous 
            one (or zero if the current term-offset is the first), plus 2.
     </ol>
  <li> Constant value 0.
</ol>

<center>
  <img src=images/fts3_doclist2.png>
  <p> FTS3 Doclist Format
</center>

<center>
  <img src=images/fts3_doclist.png>
  <p> FTS3 Doclist Entry Format
</center>

<p>
  For doclists for which the term appears in more than one column of the FTS3
  virtual table, term-offset lists within the doclist are stored in column 
  number order. This ensures that the term-offset list associated with 
  column 0 (if any) is always first, allowing the first two fields of the
  term-offset list to be omitted in this case.

<h1 id=appendix_a nonumber tags="search application tips">
  Appendix A: Search Application Tips
</h1>

<p>
  FTS3 is primarily designed to support Boolean full-text queries - queries
  to find the set of documents that match a specified criteria. However, many 
  (most?) search applications require that results are somehow ranked in order
  of "relevance", where "relevance" is defined as the likelihood that the user
  who performed the search is interested in a specific element of the returned
................................................................................

<p>
  One very simple scheme might be to count the number of instances of the 
  users search terms in each result document. Those documents that contain
  many instances of the terms are considered more relevant than those with
  a small number of instances of each term. In an FTS3 application, the 
  number of term instances in each result could be determined by counting
  the number of integers in the return value of the [offsets] function.
  The following example shows a query that could be used to obtain the
  ten most relevant results for a query entered by the user:

<codeblock>
  <i>-- This example (and all others in this section) assumes the following schema</i>
  CREATE VIRTUAL TABLE documents USING fts3(title, content);

  <i>-- Assuming the application has supplied an SQLite user function named "countintegers"</i>
  <i>-- that returns the number of space-separated integers contained in its only argument,</i>
  <i>-- the following query could be used to return the titles of the 10 documents that contain</i>
  <i>-- the greatest number of instances of the users query terms. Hopefully, these 10</i>
  <i>-- documents will be those that the users considers more or less the most "relevant".</i>
  SELECT title FROM documents 
    WHERE documents MATCH &lt;query&gt;
    ORDER BY countintegers(offsets(document)) DESC
    OFFSET 0 LIMIT 10

</codeblock>

<p>
  The query above could be made to run faster by using the FTS3 [matchinfo]
  function to determine the number of query term instances that appear in each
  result. The matchinfo function is much more efficient than the offsets 
  function. Furthermore, the matchinfo function provides extra information
  regarding the overall number of occurences of each query term in the entire
  document set (not just the current row) and the number of documents in which 
  each query term appears. This may be used (for example) to attach a higher
  weight to less common terms which may increase the overall computed relevancy 
  of those results the user considers more interesting.

<codeblock>
  <i>-- If the application supplies an SQLite user function called "rank" that</i>
  <i>-- interprets the blob of data returned by matchinfo and returns a numeric</i>
  <i>-- relevancy based on it, then the following SQL may be used to return the</i>
  <i>-- titles of the 10 most relevant documents in the dataset for a users query.</i>
  SELECT title FROM documents 
    WHERE documents MATCH &lt;query&gt;
    ORDER BY rank(matchinfo(document)) DESC
    OFFSET 0 LIMIT 10
</codeblock>

<p>
  The SQL query in the example above uses less CPU than the first example
  in this section, but still has a non-obvious performance problem. SQLite
  satisfies this query by retreiving the value of the "title" column and
  matchinfo data from the FTS3 module for every row matched by the users
  query before it sorts and limits the results. Because of the way SQLite's
................................................................................
  This means that SQLite will load only the docid and matchinfo data for each
  row matching the users query into memory, determine the docid values
  corresponding to the ten most relevant documents, then load only the title
  and content information for those 10 documents only. Because both the matchinfo
  and docid values are gleaned entirely from the full-text index, this results
  in dramatically less data being loaded from the database into memory.

<codeblock>
  SELECT title FROM documents JOIN ( 
      SELECT docid, rank(matchinfo(document)) AS rank 
      FROM documents
      WHERE documents MATCH &lt;query&gt;
      ORDER BY rank DESC 
      OFFSET 0 LIMIT 10
  ) AS ranktable USING(docid)
  ORDER BY ranktable.rank DESC
</codeblock>

<p>
  The next block of SQL enhances the query with solutions to two other problems
  that may arise in developing search applications using FTS3:

<ol>
  <li> <p>
       The [snippet] function cannot be used with the above query. Because
       the outer query does not include a "WHERE ... MATCH" clause, the snippet 
       function may not be used with it. One solution is to duplicate the WHERE
       clause used by the sub-query in the outer query. The overhead associated
       with this is usually negligible.
  <li> <p>
       The relevancy of a document may depend on something other than just
       the data available in the return value of matchinfo. For example
................................................................................
</ol>

<p>
  This version of the query is very similar to that used by the 
  <a href="http://www.sqlite.org/search?q=fts3">sqlite.org documentation search</a> 
  application.

<codeblock>
  <i>-- This table stores the static weight assigned to each document in FTS3 table</i>
  <i>-- "documents". For each row in the documents table there is a corresponding row</i>
  <i>-- with the same docid value in this table.</i>
  CREATE TABLE documents_data(docid INTEGER PRIMARY KEY, weight);

  <i>-- This query is similar to the one in the block above, except that:</i>
  <i>--</i>
................................................................................
      FROM documents JOIN documents_data USING(docid)
      WHERE documents MATCH &lt;query&gt;
      ORDER BY rank DESC 
      OFFSET 0 LIMIT 10
  ) AS ranktable USING(docid)
  WHERE documents MATCH &lt;query&gt;
  ORDER BY ranktable.rank DESC
</codeblock>

<p>
  All the example queries above return the ten most relevant query results.
  By modifying the values used with the OFFSET and LIMIT clauses, a query 
  to return (say) the next ten most relevant results is easy to construct. 
  This may be used to obtain the data required for a search applications second
  and subsequent pages of results.

<p>
  The next block contains an example rank function that uses matchinfo data 
  implemented in C. Instead of a single weight, it allows a weight to be 
  externally assigned to each column of each document. It may be registered
  with SQLite like any other user function using [sqlite3_create_function].

<codeblock>
<i>/*</i>
<i>** SQLite user defined function to use with matchinfo() to calculate the</i>
<i>** relevancy of an FTS3 match. The value returned is the relevancy score</i>
<i>** (a real value greater than or equal to zero). A larger value indicates </i>
<i>** a more relevant document.</i>
<i>**</i>
<i>** The overall relevancy returned is the sum of the relevancies of each </i>
................................................................................
<i>  /* Check that the number of arguments passed to this function is correct.</i>
<i>  ** If not, jump to wrong_number_args. Set aMatchinfo to point to the array</i>
<i>  ** of unsigned integer values returned by FTS3 function matchinfo. Set</i>
<i>  ** nPhrase to contain the number of reportable phrases in the users full-text</i>
<i>  ** query, and nCol to the number of columns in the table.</i>
<i>  */</i>
  if( nVal&lt;1 ) goto wrong_number_args;
  aMatchinfo = (unsigned int *)sqlite3_value_blob(apVal&#x5B;0&#x5D;);
  nPhrase = aMatchinfo&#x5B;0&#x5D;;
  nCol = aMatchinfo&#x5B;1&#x5D;;
  if( nVal!=(1+nCol) ) goto wrong_number_args;

<i>  /* Iterate through each phrase in the users query. */</i>
  for(iPhrase=0; iPhrase&lt;nPhrase; iPhrase++){
    int iCol;                     <i>/* Current column */</i>

<i>    /* Now iterate through each column in the users query. For each column,</i>
<i>    ** increment the relevancy score by:</i>
<i>    **</i>
<i>    **   (&lt;hit count&gt; / &lt;global hit count&gt) * &lt;column weight&gt;</i>
<i>    **</i>
<i>    ** aPhraseinfo&#x5B;&#x5D; points to the start of the data for phrase iPhrase. So</i>
<i>    ** the hit count and global hit counts for each column are found in </i>
<i>    ** aPhraseinfo&#x5B;iCol*3&#x5D; and aPhraseinfo&#x5B;iCol*3+1&#x5D;, respectively.</i>
<i>    */</i>
    int *aPhraseinfo = &aMatchinfo&#x5B;2 + iPhrase*nCol*3&#x5D;;
    for(iCol=0; iCol&lt;nCol; iCol++){
      int nHitCount = aPhraseinfo&#x5B;3*iCol&#x5D;;
      int nGlobalHitCount = aPhraseinfo&#x5B;3*iCol+1&#x5D;;
      double weight = sqlite3_value_double(apVal&#x5B;iCol+1&#x5D;);
      if( nHitCount>0 ){
        score += ((double)nHitCount / (double)nGlobalHitCount) * weight;
      }
    }
  }

  sqlite3_result_double(pCtx, score);
  return;

<i>  /* Jump here if the wrong number of arguments are passed to this function */</i>
wrong_number_args:
  sqlite3_result_error(pCtx, "wrong number of arguments to function rank()", -1);
}




</codeblock>

Changes to wrap.tcl.

35
36
37
38
39
40
41


42
43
44
45
46
47
48
...
403
404
405
406
407
408
409

410
411
412
413
414
415
416
...
612
613
614
615
616
617
618

619
620
621
622
623
624
625
...
643
644
645
646
647
648
649

650
651
652
653
654
655
656
# the final output.
#
set DOC [lindex $argv 0]
set SRC [lindex $argv 1]
set DEST [lindex $argv 2]
set HOMEDIR [pwd]            ;# Also remember our home directory.



# Open the SQLite database.
#
sqlite3 db docinfo.db
db eval {
  BEGIN;
  DELETE FROM link;
  DELETE FROM keyword;
................................................................................
    /* rounded corners */
    .se  { background: url(${path}images/se.png) 100% 100% no-repeat #80a796}
    .sw  { background: url(${path}images/sw.png) 0% 100% no-repeat }
    .ne  { background: url(${path}images/ne.png) 100% 0% no-repeat }
    .nw  { background: url(${path}images/nw.png) 0% 0% no-repeat }

    /* Things for "fancyformat" documents start here. */

    .fancy .codeblock i { color: darkblue; }
    .fancy h1,.fancy h2,.fancy h3,.fancy h4 {font-weight:normal;color:#80a796}
    .fancy h2 { margin-left: 10px }
    .fancy h3 { margin-left: 20px }
    .fancy h4 { margin-left: 30px }
    .fancy th {white-space:nowrap;text-align:left;border-bottom:solid 1px #444}
    .fancy th, .fancy td {padding: 0.2em 1ex; vertical-align:top}
................................................................................
#
foreach infile [lrange $argv 3 end] {
  cd $HOMEDIR
  puts "Processing $infile"
  set fd [open $infile r]
  set in [read $fd]
  close $fd

  set title {No Title}
  regexp {<title>([^\n]*)</title>} $in all title
  regsub {<title>[^\n]*</title>} $in {} in
  set outfile [file root [file tail $infile]].html
  hd_open_main $outfile
  db eval {
    INSERT INTO page(filename,pagetitle)
................................................................................
proc hd_requirement {args} {}
foreach infile [lrange $argv 3 end] {
  cd $HOMEDIR
  puts "Processing $infile"
  set fd [open $infile r]
  set in [read $fd]
  close $fd

  set title {No Title}
  regexp {<title>([^\n]*)</title>} $in all title
  regsub {<title>[^\n]*</title>} $in {} in
  set outfile [file root [file tail $infile]].html
  hd_open_main $outfile
  hd_header $title $infile
  regsub -all {<tcl>} $in "\175; eval \173" in







>
>







 







>







 







>







 







>







35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
...
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
...
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
...
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
# the final output.
#
set DOC [lindex $argv 0]
set SRC [lindex $argv 1]
set DEST [lindex $argv 2]
set HOMEDIR [pwd]            ;# Also remember our home directory.

source [file dirname [info script]]/pages/fancyformat.tcl

# Open the SQLite database.
#
sqlite3 db docinfo.db
db eval {
  BEGIN;
  DELETE FROM link;
  DELETE FROM keyword;
................................................................................
    /* rounded corners */
    .se  { background: url(${path}images/se.png) 100% 100% no-repeat #80a796}
    .sw  { background: url(${path}images/sw.png) 0% 100% no-repeat }
    .ne  { background: url(${path}images/ne.png) 100% 0% no-repeat }
    .nw  { background: url(${path}images/nw.png) 0% 0% no-repeat }

    /* Things for "fancyformat" documents start here. */
    .fancy img+p {font-style:italic}
    .fancy .codeblock i { color: darkblue; }
    .fancy h1,.fancy h2,.fancy h3,.fancy h4 {font-weight:normal;color:#80a796}
    .fancy h2 { margin-left: 10px }
    .fancy h3 { margin-left: 20px }
    .fancy h4 { margin-left: 30px }
    .fancy th {white-space:nowrap;text-align:left;border-bottom:solid 1px #444}
    .fancy th, .fancy td {padding: 0.2em 1ex; vertical-align:top}
................................................................................
#
foreach infile [lrange $argv 3 end] {
  cd $HOMEDIR
  puts "Processing $infile"
  set fd [open $infile r]
  set in [read $fd]
  close $fd
  if {[string first <table_of_contents> $in]>=0} { set in [addtoc $in] }
  set title {No Title}
  regexp {<title>([^\n]*)</title>} $in all title
  regsub {<title>[^\n]*</title>} $in {} in
  set outfile [file root [file tail $infile]].html
  hd_open_main $outfile
  db eval {
    INSERT INTO page(filename,pagetitle)
................................................................................
proc hd_requirement {args} {}
foreach infile [lrange $argv 3 end] {
  cd $HOMEDIR
  puts "Processing $infile"
  set fd [open $infile r]
  set in [read $fd]
  close $fd
  if {[string first <table_of_contents> $in]>=0} { set in [addtoc $in] }
  set title {No Title}
  regexp {<title>([^\n]*)</title>} $in all title
  regsub {<title>[^\n]*</title>} $in {} in
  set outfile [file root [file tail $infile]].html
  hd_open_main $outfile
  hd_header $title $infile
  regsub -all {<tcl>} $in "\175; eval \173" in