Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Delete all fts3 index data the table becomes empty. Previously, deleting all rows from an fts3 table would leave a bunch of index data describing the terms of the original data, plus deletions of those terms, perhaps with some amount of it merged together so the deletions knocked out the originals. Even when all rows were deleted that original data would hang out, though eventually it would mostly be overwritten if new data contained the same set of terms. (CVS 5413) |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
8b872e426091d9ef108e52dbec0d968e |
User & Date: | shess 2008-07-14 20:43:15.000 |
Context
2008-07-15
| ||
00:27 | Increased test coverage. (CVS 5414) (check-in: 7cf91e08c0 user: drh tags: trunk) | |
2008-07-14
| ||
20:43 | Delete all fts3 index data the table becomes empty. Previously, deleting all rows from an fts3 table would leave a bunch of index data describing the terms of the original data, plus deletions of those terms, perhaps with some amount of it merged together so the deletions knocked out the originals. Even when all rows were deleted that original data would hang out, though eventually it would mostly be overwritten if new data contained the same set of terms. (CVS 5413) (check-in: 8b872e4260 user: shess tags: trunk) | |
19:39 | Add a new test script to exercise the deadlock avoidance code in the btree mutex logic. (CVS 5412) (check-in: 7d5e1c4375 user: drh tags: trunk) | |
Changes
Changes to ext/fts3/fts3.c.
︙ | ︙ | |||
1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 | } QueryType; typedef enum fulltext_statement { CONTENT_INSERT_STMT, CONTENT_SELECT_STMT, CONTENT_UPDATE_STMT, CONTENT_DELETE_STMT, BLOCK_INSERT_STMT, BLOCK_SELECT_STMT, BLOCK_DELETE_STMT, SEGDIR_MAX_INDEX_STMT, SEGDIR_SET_STMT, SEGDIR_SELECT_LEVEL_STMT, SEGDIR_SPAN_STMT, SEGDIR_DELETE_STMT, SEGDIR_SELECT_SEGMENT_STMT, SEGDIR_SELECT_ALL_STMT, MAX_STMT /* Always at end! */ } fulltext_statement; /* These must exactly match the enum above. */ /* TODO(shess): Is there some risk that a statement will be used in two ** cursors at once, e.g. if a query joins a virtual table to itself? ** If so perhaps we should move some of these to the cursor object. */ static const char *const fulltext_zStatement[MAX_STMT] = { /* CONTENT_INSERT */ NULL, /* generated in contentInsertStatement() */ /* CONTENT_SELECT */ NULL, /* generated in contentSelectStatement() */ /* CONTENT_UPDATE */ NULL, /* generated in contentUpdateStatement() */ /* CONTENT_DELETE */ "delete from %_content where docid = ?", /* BLOCK_INSERT */ "insert into %_segments (blockid, block) values (null, ?)", /* BLOCK_SELECT */ "select block from %_segments where blockid = ?", /* BLOCK_DELETE */ "delete from %_segments where blockid between ? and ?", /* SEGDIR_MAX_INDEX */ "select max(idx) from %_segdir where level = ?", /* SEGDIR_SET */ "insert into %_segdir values (?, ?, ?, ?, ?, ?)", /* SEGDIR_SELECT_LEVEL */ "select start_block, leaves_end_block, root from %_segdir " " where level = ? order by idx", /* SEGDIR_SPAN */ "select min(start_block), max(end_block) from %_segdir " " where level = ? and start_block <> 0", /* SEGDIR_DELETE */ "delete from %_segdir where level = ?", /* NOTE(shess): The first three results of the following two ** statements must match. */ /* SEGDIR_SELECT_SEGMENT */ "select start_block, leaves_end_block, root from %_segdir " " where level = ? and idx = ?", /* SEGDIR_SELECT_ALL */ "select start_block, leaves_end_block, root from %_segdir " " order by level desc, idx asc", | > > > > > | | 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 | } QueryType; typedef enum fulltext_statement { CONTENT_INSERT_STMT, CONTENT_SELECT_STMT, CONTENT_UPDATE_STMT, CONTENT_DELETE_STMT, CONTENT_EXISTS_STMT, BLOCK_INSERT_STMT, BLOCK_SELECT_STMT, BLOCK_DELETE_STMT, BLOCK_DELETE_ALL_STMT, SEGDIR_MAX_INDEX_STMT, SEGDIR_SET_STMT, SEGDIR_SELECT_LEVEL_STMT, SEGDIR_SPAN_STMT, SEGDIR_DELETE_STMT, SEGDIR_SELECT_SEGMENT_STMT, SEGDIR_SELECT_ALL_STMT, SEGDIR_DELETE_ALL_STMT, MAX_STMT /* Always at end! */ } fulltext_statement; /* These must exactly match the enum above. */ /* TODO(shess): Is there some risk that a statement will be used in two ** cursors at once, e.g. if a query joins a virtual table to itself? ** If so perhaps we should move some of these to the cursor object. */ static const char *const fulltext_zStatement[MAX_STMT] = { /* CONTENT_INSERT */ NULL, /* generated in contentInsertStatement() */ /* CONTENT_SELECT */ NULL, /* generated in contentSelectStatement() */ /* CONTENT_UPDATE */ NULL, /* generated in contentUpdateStatement() */ /* CONTENT_DELETE */ "delete from %_content where docid = ?", /* CONTENT_EXISTS */ "select docid from %_content limit 1", /* BLOCK_INSERT */ "insert into %_segments (blockid, block) values (null, ?)", /* BLOCK_SELECT */ "select block from %_segments where blockid = ?", /* BLOCK_DELETE */ "delete from %_segments where blockid between ? and ?", /* BLOCK_DELETE_ALL */ "delete from %_segments", /* SEGDIR_MAX_INDEX */ "select max(idx) from %_segdir where level = ?", /* SEGDIR_SET */ "insert into %_segdir values (?, ?, ?, ?, ?, ?)", /* SEGDIR_SELECT_LEVEL */ "select start_block, leaves_end_block, root from %_segdir " " where level = ? order by idx", /* SEGDIR_SPAN */ "select min(start_block), max(end_block) from %_segdir " " where level = ? and start_block <> 0", /* SEGDIR_DELETE */ "delete from %_segdir where level = ?", /* NOTE(shess): The first three results of the following two ** statements must match. */ /* SEGDIR_SELECT_SEGMENT */ "select start_block, leaves_end_block, root from %_segdir " " where level = ? and idx = ?", /* SEGDIR_SELECT_ALL */ "select start_block, leaves_end_block, root from %_segdir " " order by level desc, idx asc", /* SEGDIR_DELETE_ALL */ "delete from %_segdir", }; /* ** A connection to a fulltext index is an instance of the following ** structure. The xCreate and xConnect methods create an instance ** of this structure and xDestroy and xDisconnect free that instance. ** All other methods receive a pointer to the structure as one of their |
︙ | ︙ | |||
2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 | if( rc!=SQLITE_OK ) return rc; rc = sqlite3_bind_int64(s, 1, iDocid); if( rc!=SQLITE_OK ) return rc; return sql_single_step(s); } /* insert into %_segments values ([pData]) ** returns assigned blockid in *piBlockid */ static int block_insert(fulltext_vtab *v, const char *pData, int nData, sqlite_int64 *piBlockid){ sqlite3_stmt *s; | > > > > > > > > > > > > > > > > > > > | 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 | if( rc!=SQLITE_OK ) return rc; rc = sqlite3_bind_int64(s, 1, iDocid); if( rc!=SQLITE_OK ) return rc; return sql_single_step(s); } /* Returns SQLITE_ROW if any rows exist in %_content, SQLITE_DONE if ** no rows exist, and any error in case of failure. */ static int content_exists(fulltext_vtab *v){ sqlite3_stmt *s; int rc = sql_get_statement(v, CONTENT_EXISTS_STMT, &s); if( rc!=SQLITE_OK ) return rc; rc = sqlite3_step(s); if( rc!=SQLITE_ROW ) return rc; /* We expect only one row. We must execute another sqlite3_step() * to complete the iteration; otherwise the table will remain locked. */ rc = sqlite3_step(s); if( rc==SQLITE_DONE ) return SQLITE_ROW; if( rc==SQLITE_ROW ) return SQLITE_ERROR; return rc; } /* insert into %_segments values ([pData]) ** returns assigned blockid in *piBlockid */ static int block_insert(fulltext_vtab *v, const char *pData, int nData, sqlite_int64 *piBlockid){ sqlite3_stmt *s; |
︙ | ︙ | |||
2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 | /* Delete the segment directory itself. */ rc = sql_get_statement(v, SEGDIR_DELETE_STMT, &s); if( rc!=SQLITE_OK ) return rc; rc = sqlite3_bind_int64(s, 1, iLevel); if( rc!=SQLITE_OK ) return rc; return sql_single_step(s); } /* TODO(shess) clearPendingTerms() is far down the file because ** writeZeroSegment() is far down the file because LeafWriter is far ** down the file. Consider refactoring the code to move the non-vtab ** code above the vtab code so that we don't need this forward | > > > > > > > > > > > > > > > > > | 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 | /* Delete the segment directory itself. */ rc = sql_get_statement(v, SEGDIR_DELETE_STMT, &s); if( rc!=SQLITE_OK ) return rc; rc = sqlite3_bind_int64(s, 1, iLevel); if( rc!=SQLITE_OK ) return rc; return sql_single_step(s); } /* Delete entire fts index, SQLITE_OK on success, relevant error on ** failure. */ static int segdir_delete_all(fulltext_vtab *v){ sqlite3_stmt *s; int rc = sql_get_statement(v, SEGDIR_DELETE_ALL_STMT, &s); if( rc!=SQLITE_OK ) return rc; rc = sql_single_step(s); if( rc!=SQLITE_OK ) return rc; rc = sql_get_statement(v, BLOCK_DELETE_ALL_STMT, &s); if( rc!=SQLITE_OK ) return rc; return sql_single_step(s); } /* TODO(shess) clearPendingTerms() is far down the file because ** writeZeroSegment() is far down the file because LeafWriter is far ** down the file. Consider refactoring the code to move the non-vtab ** code above the vtab code so that we don't need this forward |
︙ | ︙ | |||
6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 | fulltext_vtab *v = (fulltext_vtab *) pVtab; int rc; FTSTRACE(("FTS3 Update %p\n", pVtab)); if( nArg<2 ){ rc = index_delete(v, sqlite3_value_int64(ppArg[0])); } else if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){ /* An update: * ppArg[0] = old rowid * ppArg[1] = new rowid * ppArg[2..2+v->nColumn-1] = values * ppArg[2+v->nColumn] = value for magic column (we ignore this) * ppArg[2+v->nColumn+1] = value for docid | > > > > > > > > > > > > > > > > > | 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 | fulltext_vtab *v = (fulltext_vtab *) pVtab; int rc; FTSTRACE(("FTS3 Update %p\n", pVtab)); if( nArg<2 ){ rc = index_delete(v, sqlite3_value_int64(ppArg[0])); if( rc==SQLITE_OK ){ /* If we just deleted the last row in the table, clear out the ** index data. */ rc = content_exists(v); if( rc==SQLITE_ROW ){ rc = SQLITE_OK; }else if( rc==SQLITE_DONE ){ /* Clear the pending terms so we don't flush a useless level-0 ** segment when the transaction closes. */ rc = clearPendingTerms(v); if( rc==SQLITE_OK ){ rc = segdir_delete_all(v); } } } } else if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){ /* An update: * ppArg[0] = old rowid * ppArg[1] = new rowid * ppArg[2..2+v->nColumn-1] = values * ppArg[2+v->nColumn] = value for magic column (we ignore this) * ppArg[2+v->nColumn+1] = value for docid |
︙ | ︙ |
Added test/fts3d.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 | # 2008 June 26 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # This file implements regression tests for SQLite library. The focus # of this script is testing the FTS3 module's optimize() function. # # $Id: fts3d.test,v 1.1 2008/07/14 20:43:15 shess Exp $ # set testdir [file dirname $argv0] source $testdir/tester.tcl # If SQLITE_ENABLE_FTS3 is not defined, omit this file. ifcapable !fts3 { finish_test return } #************************************************************************* # Probe to see if support for the FTS3 dump_* functions is compiled in. # TODO(shess): Change main.mk to do the right thing and remove this test. db eval { DROP TABLE IF EXISTS t1; CREATE VIRTUAL TABLE t1 USING fts3(c); INSERT INTO t1 (docid, c) VALUES (1, 'x'); } set s {SELECT dump_terms(t1, 1) FROM t1 LIMIT 1} set r {1 {unable to use function dump_terms in the requested context}} if {[catchsql $s]==$r} { finish_test return } #************************************************************************* # Utility function to check for the expected terms in the segment # level/index. _all version does same but for entire index. proc check_terms {test level index terms} { # TODO(shess): Figure out why uplevel in do_test can't catch # $level and $index directly. set ::level $level set ::index $index do_test $test.terms { execsql { SELECT dump_terms(t1, $::level, $::index) FROM t1 LIMIT 1; } } [list $terms] } proc check_terms_all {test terms} { do_test $test.terms { execsql { SELECT dump_terms(t1) FROM t1 LIMIT 1; } } [list $terms] } # Utility function to check for the expected doclist for the term in # segment level/index. _all version does same for entire index. proc check_doclist {test level index term doclist} { # TODO(shess): Again, why can't the non-:: versions work? set ::term $term set ::level $level set ::index $index do_test $test { execsql { SELECT dump_doclist(t1, $::term, $::level, $::index) FROM t1 LIMIT 1; } } [list $doclist] } proc check_doclist_all {test term doclist} { set ::term $term do_test $test { execsql { SELECT dump_doclist(t1, $::term) FROM t1 LIMIT 1; } } [list $doclist] } #************************************************************************* # Test results when all rows are deleted and one is added back. # Previously older segments would continue to exist, but now the index # should be dropped when the table is empty. The results should look # exactly like we never added the earlier rows in the first place. db eval { DROP TABLE IF EXISTS t1; CREATE VIRTUAL TABLE t1 USING fts3(c); INSERT INTO t1 (docid, c) VALUES (1, 'This is a test'); INSERT INTO t1 (docid, c) VALUES (2, 'That was a test'); INSERT INTO t1 (docid, c) VALUES (3, 'This is a test'); DELETE FROM t1 WHERE 1=1; -- Delete each row rather than dropping table. INSERT INTO t1 (docid, c) VALUES (1, 'This is a test'); } # Should be a single initial segment. do_test fts3d-1.segments { execsql { SELECT level, idx FROM t1_segdir ORDER BY level, idx; } } {0 0} do_test fts3d-1.matches { execsql { SELECT OFFSETS(t1) FROM t1 WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY docid; } } {{0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4}} check_terms_all fts3d-1.1 {a is test this} check_doclist_all fts3d-1.1.1 a {[1 0[2]]} check_doclist_all fts3d-1.1.2 is {[1 0[1]]} check_doclist_all fts3d-1.1.3 test {[1 0[3]]} check_doclist_all fts3d-1.1.4 this {[1 0[0]]} check_terms fts3d-1.2 0 0 {a is test this} check_doclist fts3d-1.2.1 0 0 a {[1 0[2]]} check_doclist fts3d-1.2.2 0 0 is {[1 0[1]]} check_doclist fts3d-1.2.3 0 0 test {[1 0[3]]} check_doclist fts3d-1.2.4 0 0 this {[1 0[0]]} # TODO(shess): optimize() tests here. finish_test |