SQLite

Check-in [8b872e4260]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Delete all fts3 index data the table becomes empty. Previously, deleting all rows from an fts3 table would leave a bunch of index data describing the terms of the original data, plus deletions of those terms, perhaps with some amount of it merged together so the deletions knocked out the originals. Even when all rows were deleted that original data would hang out, though eventually it would mostly be overwritten if new data contained the same set of terms. (CVS 5413)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 8b872e426091d9ef108e52dbec0d968ed7452907
User & Date: shess 2008-07-14 20:43:15.000
Context
2008-07-15
00:27
Increased test coverage. (CVS 5414) (check-in: 7cf91e08c0 user: drh tags: trunk)
2008-07-14
20:43
Delete all fts3 index data the table becomes empty. Previously, deleting all rows from an fts3 table would leave a bunch of index data describing the terms of the original data, plus deletions of those terms, perhaps with some amount of it merged together so the deletions knocked out the originals. Even when all rows were deleted that original data would hang out, though eventually it would mostly be overwritten if new data contained the same set of terms. (CVS 5413) (check-in: 8b872e4260 user: shess tags: trunk)
19:39
Add a new test script to exercise the deadlock avoidance code in the btree mutex logic. (CVS 5412) (check-in: 7d5e1c4375 user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts3/fts3.c.
1901
1902
1903
1904
1905
1906
1907

1908
1909
1910
1911

1912
1913
1914
1915
1916
1917
1918
1919

1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933

1934
1935
1936
1937
1938

1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
} QueryType;

typedef enum fulltext_statement {
  CONTENT_INSERT_STMT,
  CONTENT_SELECT_STMT,
  CONTENT_UPDATE_STMT,
  CONTENT_DELETE_STMT,


  BLOCK_INSERT_STMT,
  BLOCK_SELECT_STMT,
  BLOCK_DELETE_STMT,


  SEGDIR_MAX_INDEX_STMT,
  SEGDIR_SET_STMT,
  SEGDIR_SELECT_LEVEL_STMT,
  SEGDIR_SPAN_STMT,
  SEGDIR_DELETE_STMT,
  SEGDIR_SELECT_SEGMENT_STMT,
  SEGDIR_SELECT_ALL_STMT,


  MAX_STMT                     /* Always at end! */
} fulltext_statement;

/* These must exactly match the enum above. */
/* TODO(shess): Is there some risk that a statement will be used in two
** cursors at once, e.g.  if a query joins a virtual table to itself?
** If so perhaps we should move some of these to the cursor object.
*/
static const char *const fulltext_zStatement[MAX_STMT] = {
  /* CONTENT_INSERT */ NULL,  /* generated in contentInsertStatement() */
  /* CONTENT_SELECT */ NULL,  /* generated in contentSelectStatement() */
  /* CONTENT_UPDATE */ NULL,  /* generated in contentUpdateStatement() */
  /* CONTENT_DELETE */ "delete from %_content where docid = ?",


  /* BLOCK_INSERT */
  "insert into %_segments (blockid, block) values (null, ?)",
  /* BLOCK_SELECT */ "select block from %_segments where blockid = ?",
  /* BLOCK_DELETE */ "delete from %_segments where blockid between ? and ?",


  /* SEGDIR_MAX_INDEX */ "select max(idx) from %_segdir where level = ?",
  /* SEGDIR_SET */ "insert into %_segdir values (?, ?, ?, ?, ?, ?)",
  /* SEGDIR_SELECT_LEVEL */
  "select start_block, leaves_end_block, root from %_segdir "
  " where level = ? order by idx",
  /* SEGDIR_SPAN */
  "select min(start_block), max(end_block) from %_segdir "
  " where level = ? and start_block <> 0",
  /* SEGDIR_DELETE */ "delete from %_segdir where level = ?",

  /* NOTE(shess): The first three results of the following two
  ** statements must match.
  */
  /* SEGDIR_SELECT_SEGMENT */
  "select start_block, leaves_end_block, root from %_segdir "
  " where level = ? and idx = ?",
  /* SEGDIR_SELECT_ALL */
  "select start_block, leaves_end_block, root from %_segdir "
  " order by level desc, idx asc",

};

/*
** A connection to a fulltext index is an instance of the following
** structure.  The xCreate and xConnect methods create an instance
** of this structure and xDestroy and xDisconnect free that instance.
** All other methods receive a pointer to the structure as one of their







>




>








>














>





>




















|







1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
} QueryType;

typedef enum fulltext_statement {
  CONTENT_INSERT_STMT,
  CONTENT_SELECT_STMT,
  CONTENT_UPDATE_STMT,
  CONTENT_DELETE_STMT,
  CONTENT_EXISTS_STMT,

  BLOCK_INSERT_STMT,
  BLOCK_SELECT_STMT,
  BLOCK_DELETE_STMT,
  BLOCK_DELETE_ALL_STMT,

  SEGDIR_MAX_INDEX_STMT,
  SEGDIR_SET_STMT,
  SEGDIR_SELECT_LEVEL_STMT,
  SEGDIR_SPAN_STMT,
  SEGDIR_DELETE_STMT,
  SEGDIR_SELECT_SEGMENT_STMT,
  SEGDIR_SELECT_ALL_STMT,
  SEGDIR_DELETE_ALL_STMT,

  MAX_STMT                     /* Always at end! */
} fulltext_statement;

/* These must exactly match the enum above. */
/* TODO(shess): Is there some risk that a statement will be used in two
** cursors at once, e.g.  if a query joins a virtual table to itself?
** If so perhaps we should move some of these to the cursor object.
*/
static const char *const fulltext_zStatement[MAX_STMT] = {
  /* CONTENT_INSERT */ NULL,  /* generated in contentInsertStatement() */
  /* CONTENT_SELECT */ NULL,  /* generated in contentSelectStatement() */
  /* CONTENT_UPDATE */ NULL,  /* generated in contentUpdateStatement() */
  /* CONTENT_DELETE */ "delete from %_content where docid = ?",
  /* CONTENT_EXISTS */ "select docid from %_content limit 1",

  /* BLOCK_INSERT */
  "insert into %_segments (blockid, block) values (null, ?)",
  /* BLOCK_SELECT */ "select block from %_segments where blockid = ?",
  /* BLOCK_DELETE */ "delete from %_segments where blockid between ? and ?",
  /* BLOCK_DELETE_ALL */ "delete from %_segments",

  /* SEGDIR_MAX_INDEX */ "select max(idx) from %_segdir where level = ?",
  /* SEGDIR_SET */ "insert into %_segdir values (?, ?, ?, ?, ?, ?)",
  /* SEGDIR_SELECT_LEVEL */
  "select start_block, leaves_end_block, root from %_segdir "
  " where level = ? order by idx",
  /* SEGDIR_SPAN */
  "select min(start_block), max(end_block) from %_segdir "
  " where level = ? and start_block <> 0",
  /* SEGDIR_DELETE */ "delete from %_segdir where level = ?",

  /* NOTE(shess): The first three results of the following two
  ** statements must match.
  */
  /* SEGDIR_SELECT_SEGMENT */
  "select start_block, leaves_end_block, root from %_segdir "
  " where level = ? and idx = ?",
  /* SEGDIR_SELECT_ALL */
  "select start_block, leaves_end_block, root from %_segdir "
  " order by level desc, idx asc",
  /* SEGDIR_DELETE_ALL */ "delete from %_segdir",
};

/*
** A connection to a fulltext index is an instance of the following
** structure.  The xCreate and xConnect methods create an instance
** of this structure and xDestroy and xDisconnect free that instance.
** All other methods receive a pointer to the structure as one of their
2245
2246
2247
2248
2249
2250
2251



















2252
2253
2254
2255
2256
2257
2258
  if( rc!=SQLITE_OK ) return rc;

  rc = sqlite3_bind_int64(s, 1, iDocid);
  if( rc!=SQLITE_OK ) return rc;

  return sql_single_step(s);
}




















/* insert into %_segments values ([pData])
**   returns assigned blockid in *piBlockid
*/
static int block_insert(fulltext_vtab *v, const char *pData, int nData,
                        sqlite_int64 *piBlockid){
  sqlite3_stmt *s;







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
  if( rc!=SQLITE_OK ) return rc;

  rc = sqlite3_bind_int64(s, 1, iDocid);
  if( rc!=SQLITE_OK ) return rc;

  return sql_single_step(s);
}

/* Returns SQLITE_ROW if any rows exist in %_content, SQLITE_DONE if
** no rows exist, and any error in case of failure.
*/
static int content_exists(fulltext_vtab *v){
  sqlite3_stmt *s;
  int rc = sql_get_statement(v, CONTENT_EXISTS_STMT, &s);
  if( rc!=SQLITE_OK ) return rc;

  rc = sqlite3_step(s);
  if( rc!=SQLITE_ROW ) return rc;

  /* We expect only one row.  We must execute another sqlite3_step()
   * to complete the iteration; otherwise the table will remain locked. */
  rc = sqlite3_step(s);
  if( rc==SQLITE_DONE ) return SQLITE_ROW;
  if( rc==SQLITE_ROW ) return SQLITE_ERROR;
  return rc;
}

/* insert into %_segments values ([pData])
**   returns assigned blockid in *piBlockid
*/
static int block_insert(fulltext_vtab *v, const char *pData, int nData,
                        sqlite_int64 *piBlockid){
  sqlite3_stmt *s;
2417
2418
2419
2420
2421
2422
2423

















2424
2425
2426
2427
2428
2429
2430
  /* Delete the segment directory itself. */
  rc = sql_get_statement(v, SEGDIR_DELETE_STMT, &s);
  if( rc!=SQLITE_OK ) return rc;

  rc = sqlite3_bind_int64(s, 1, iLevel);
  if( rc!=SQLITE_OK ) return rc;


















  return sql_single_step(s);
}

/* TODO(shess) clearPendingTerms() is far down the file because
** writeZeroSegment() is far down the file because LeafWriter is far
** down the file.  Consider refactoring the code to move the non-vtab
** code above the vtab code so that we don't need this forward







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
  /* Delete the segment directory itself. */
  rc = sql_get_statement(v, SEGDIR_DELETE_STMT, &s);
  if( rc!=SQLITE_OK ) return rc;

  rc = sqlite3_bind_int64(s, 1, iLevel);
  if( rc!=SQLITE_OK ) return rc;

  return sql_single_step(s);
}

/* Delete entire fts index, SQLITE_OK on success, relevant error on
** failure.
*/
static int segdir_delete_all(fulltext_vtab *v){
  sqlite3_stmt *s;
  int rc = sql_get_statement(v, SEGDIR_DELETE_ALL_STMT, &s);
  if( rc!=SQLITE_OK ) return rc;

  rc = sql_single_step(s);
  if( rc!=SQLITE_OK ) return rc;

  rc = sql_get_statement(v, BLOCK_DELETE_ALL_STMT, &s);
  if( rc!=SQLITE_OK ) return rc;

  return sql_single_step(s);
}

/* TODO(shess) clearPendingTerms() is far down the file because
** writeZeroSegment() is far down the file because LeafWriter is far
** down the file.  Consider refactoring the code to move the non-vtab
** code above the vtab code so that we don't need this forward
6108
6109
6110
6111
6112
6113
6114

















6115
6116
6117
6118
6119
6120
6121
  fulltext_vtab *v = (fulltext_vtab *) pVtab;
  int rc;

  FTSTRACE(("FTS3 Update %p\n", pVtab));

  if( nArg<2 ){
    rc = index_delete(v, sqlite3_value_int64(ppArg[0]));

















  } else if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){
    /* An update:
     * ppArg[0] = old rowid
     * ppArg[1] = new rowid
     * ppArg[2..2+v->nColumn-1] = values
     * ppArg[2+v->nColumn] = value for magic column (we ignore this)
     * ppArg[2+v->nColumn+1] = value for docid







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







6149
6150
6151
6152
6153
6154
6155
6156
6157
6158
6159
6160
6161
6162
6163
6164
6165
6166
6167
6168
6169
6170
6171
6172
6173
6174
6175
6176
6177
6178
6179
  fulltext_vtab *v = (fulltext_vtab *) pVtab;
  int rc;

  FTSTRACE(("FTS3 Update %p\n", pVtab));

  if( nArg<2 ){
    rc = index_delete(v, sqlite3_value_int64(ppArg[0]));
    if( rc==SQLITE_OK ){
      /* If we just deleted the last row in the table, clear out the
      ** index data.
      */
      rc = content_exists(v);
      if( rc==SQLITE_ROW ){
        rc = SQLITE_OK;
      }else if( rc==SQLITE_DONE ){
        /* Clear the pending terms so we don't flush a useless level-0
        ** segment when the transaction closes.
        */
        rc = clearPendingTerms(v);
        if( rc==SQLITE_OK ){
          rc = segdir_delete_all(v);
        }
      }
    }
  } else if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){
    /* An update:
     * ppArg[0] = old rowid
     * ppArg[1] = new rowid
     * ppArg[2..2+v->nColumn-1] = values
     * ppArg[2+v->nColumn] = value for magic column (we ignore this)
     * ppArg[2+v->nColumn+1] = value for docid
Added test/fts3d.test.
































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# 2008 June 26
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library.  The focus
# of this script is testing the FTS3 module's optimize() function.
#
# $Id: fts3d.test,v 1.1 2008/07/14 20:43:15 shess Exp $
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl

# If SQLITE_ENABLE_FTS3 is not defined, omit this file.
ifcapable !fts3 {
  finish_test
  return
}

#*************************************************************************
# Probe to see if support for the FTS3 dump_* functions is compiled in.
# TODO(shess): Change main.mk to do the right thing and remove this test.
db eval {
  DROP TABLE IF EXISTS t1;
  CREATE VIRTUAL TABLE t1 USING fts3(c);
  INSERT INTO t1 (docid, c) VALUES (1, 'x');
}

set s {SELECT dump_terms(t1, 1) FROM t1 LIMIT 1}
set r {1 {unable to use function dump_terms in the requested context}}
if {[catchsql $s]==$r} {
  finish_test
  return
}

#*************************************************************************
# Utility function to check for the expected terms in the segment
# level/index.  _all version does same but for entire index.
proc check_terms {test level index terms} {
  # TODO(shess): Figure out why uplevel in do_test can't catch
  # $level and $index directly.
  set ::level $level
  set ::index $index
  do_test $test.terms {
    execsql {
      SELECT dump_terms(t1, $::level, $::index) FROM t1 LIMIT 1;
    }
  } [list $terms]
}
proc check_terms_all {test terms} {
  do_test $test.terms {
    execsql {
      SELECT dump_terms(t1) FROM t1 LIMIT 1;
    }
  } [list $terms]
}

# Utility function to check for the expected doclist for the term in
# segment level/index.  _all version does same for entire index.
proc check_doclist {test level index term doclist} {
  # TODO(shess): Again, why can't the non-:: versions work?
  set ::term $term
  set ::level $level
  set ::index $index
  do_test $test {
    execsql {
      SELECT dump_doclist(t1, $::term, $::level, $::index) FROM t1 LIMIT 1;
    }
  } [list $doclist]
}
proc check_doclist_all {test term doclist} {
  set ::term $term
  do_test $test {
    execsql {
      SELECT dump_doclist(t1, $::term) FROM t1 LIMIT 1;
    }
  } [list $doclist]
}

#*************************************************************************
# Test results when all rows are deleted and one is added back.
# Previously older segments would continue to exist, but now the index
# should be dropped when the table is empty.  The results should look
# exactly like we never added the earlier rows in the first place.
db eval {
  DROP TABLE IF EXISTS t1;
  CREATE VIRTUAL TABLE t1 USING fts3(c);
  INSERT INTO t1 (docid, c) VALUES (1, 'This is a test');
  INSERT INTO t1 (docid, c) VALUES (2, 'That was a test');
  INSERT INTO t1 (docid, c) VALUES (3, 'This is a test');
  DELETE FROM t1 WHERE 1=1; -- Delete each row rather than dropping table.
  INSERT INTO t1 (docid, c) VALUES (1, 'This is a test');
}

# Should be a single initial segment.
do_test fts3d-1.segments {
  execsql {
    SELECT level, idx FROM t1_segdir ORDER BY level, idx;
  }
} {0 0}
do_test fts3d-1.matches {
  execsql {
    SELECT OFFSETS(t1) FROM t1
     WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY docid;
  }
} {{0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4}}

check_terms_all fts3d-1.1 {a is test this}
check_doclist_all fts3d-1.1.1 a {[1 0[2]]}
check_doclist_all fts3d-1.1.2 is {[1 0[1]]}
check_doclist_all fts3d-1.1.3 test {[1 0[3]]}
check_doclist_all fts3d-1.1.4 this {[1 0[0]]}

check_terms   fts3d-1.2   0 0 {a is test this}
check_doclist fts3d-1.2.1 0 0 a {[1 0[2]]}
check_doclist fts3d-1.2.2 0 0 is {[1 0[1]]}
check_doclist fts3d-1.2.3 0 0 test {[1 0[3]]}
check_doclist fts3d-1.2.4 0 0 this {[1 0[0]]}

# TODO(shess): optimize() tests here.

finish_test