/ Check-in [cc051fc0]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Enable fts3 tables to use incremental merge by automatically creating the %_stat table when it is needed.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts4-incr-merge
Files: files | file ages | folders
SHA1: cc051fc0b2d89603b27b94cf2afdbda417ee9d94
User & Date: drh 2012-03-24 17:29:05
Context
2012-03-24
19:44
Add SQLITE_DBSTATUS_CACHE_WRITE. Used to query a database connection for the cumulative number of database pages written. check-in: 3cb6a879 user: dan tags: fts4-incr-merge
17:29
Enable fts3 tables to use incremental merge by automatically creating the %_stat table when it is needed. check-in: cc051fc0 user: drh tags: fts4-incr-merge
17:09
Fix a failing assert() in the FTS3_LOG_MERGES related code. check-in: 4220d52c user: dan tags: fts4-incr-merge
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3.c.

   565    565       }
   566    566   
   567    567       sqlite3_free(zSql);
   568    568       sqlite3_free(zCols);
   569    569       *pRc = rc;
   570    570     }
   571    571   }
          572  +
          573  +/*
          574  +** Create the %_stat table if it does not already exist.
          575  +*/
          576  +void sqlite3Fts3CreateStatTable(int *pRc, Fts3Table *p){
          577  +  fts3DbExec(pRc, p->db, 
          578  +      "CREATE TABLE IF NOT EXISTS %Q.'%q_stat'"
          579  +          "(id INTEGER PRIMARY KEY, value BLOB);",
          580  +      p->zDb, p->zName
          581  +  );
          582  +  if( (*pRc)==SQLITE_OK ) p->bHasStat = 1;
          583  +}
   572    584   
   573    585   /*
   574    586   ** Create the backing store tables (%_content, %_segments and %_segdir)
   575    587   ** required by the FTS3 table passed as the only argument. This is done
   576    588   ** as part of the vtab xCreate() method.
   577    589   **
   578    590   ** If the p->bHasDocsize boolean is true (indicating that this is an
................................................................................
   627    639     if( p->bHasDocsize ){
   628    640       fts3DbExec(&rc, db, 
   629    641           "CREATE TABLE %Q.'%q_docsize'(docid INTEGER PRIMARY KEY, size BLOB);",
   630    642           p->zDb, p->zName
   631    643       );
   632    644     }
   633    645     if( p->bHasStat ){
   634         -    fts3DbExec(&rc, db, 
   635         -        "CREATE TABLE %Q.'%q_stat'(id INTEGER PRIMARY KEY, value BLOB);",
   636         -        p->zDb, p->zName
   637         -    );
          646  +    sqlite3Fts3CreateStatTable(&rc, p);
   638    647     }
   639    648     return rc;
   640    649   }
   641    650   
   642    651   /*
   643    652   ** Store the current database page-size in bytes in p->nPgsz.
   644    653   **
................................................................................
  1324   1333   
  1325   1334     /* If this is an xCreate call, create the underlying tables in the 
  1326   1335     ** database. TODO: For xConnect(), it could verify that said tables exist.
  1327   1336     */
  1328   1337     if( isCreate ){
  1329   1338       rc = fts3CreateTables(p);
  1330   1339     }
         1340  +
         1341  +  /* Check to see if a legacy fts3 table has been "upgraded" by the
         1342  +  ** addition of a %_stat table so that it can use incremental merge.
         1343  +  */
         1344  +  if( !isFts4 && !isCreate ){
         1345  +    int rc2 = SQLITE_OK;
         1346  +    fts3DbExec(&rc2, db, "SELECT 1 FROM %Q.'%q_stat' WHERE id=2",
         1347  +               p->zDb, p->zName);
         1348  +    if( rc2==SQLITE_OK ) p->bHasStat = 1;
         1349  +  }
  1331   1350   
  1332   1351     /* Figure out the page-size for the database. This is required in order to
  1333   1352     ** estimate the cost of loading large doclists from the database.  */
  1334   1353     fts3DatabasePageSize(&rc, p);
  1335   1354     p->nNodeSize = p->nPgsz-35;
  1336   1355   
  1337   1356     /* Declare the table schema to SQLite. */

Changes to ext/fts3/fts3Int.h.

   489    489   int sqlite3Fts3GetVarint(const char *, sqlite_int64 *);
   490    490   int sqlite3Fts3GetVarint32(const char *, int *);
   491    491   int sqlite3Fts3VarintLen(sqlite3_uint64);
   492    492   void sqlite3Fts3Dequote(char *);
   493    493   void sqlite3Fts3DoclistPrev(int,char*,int,char**,sqlite3_int64*,int*,u8*);
   494    494   int sqlite3Fts3EvalPhraseStats(Fts3Cursor *, Fts3Expr *, u32 *);
   495    495   int sqlite3Fts3FirstFilter(sqlite3_int64, char *, int, char *);
          496  +void sqlite3Fts3CreateStatTable(int*, Fts3Table*);
   496    497   
   497    498   /* fts3_tokenizer.c */
   498    499   const char *sqlite3Fts3NextToken(const char *, int *);
   499    500   int sqlite3Fts3InitHashTable(sqlite3 *, Fts3Hash *, const char *);
   500    501   int sqlite3Fts3InitTokenizer(Fts3Hash *pHash, const char *, 
   501    502       sqlite3_tokenizer **, char **
   502    503   );

Changes to ext/fts3/fts3_write.c.

  4664   4664       z++;
  4665   4665       nMin = fts3Getint(&z);
  4666   4666     }
  4667   4667   
  4668   4668     if( z[0]!='\0' || nMin<2 ){
  4669   4669       rc = SQLITE_ERROR;
  4670   4670     }else{
  4671         -    rc = sqlite3Fts3Incrmerge(p, nMerge, nMin);
         4671  +    rc = SQLITE_OK;
         4672  +    if( !p->bHasStat ) sqlite3Fts3CreateStatTable(&rc, p);
         4673  +    if( rc==SQLITE_OK ){
         4674  +      rc = sqlite3Fts3Incrmerge(p, nMerge, nMin);
         4675  +    }
  4672   4676       sqlite3Fts3SegmentsClose(p);
  4673   4677     }
  4674   4678     return rc;
  4675   4679   }
  4676   4680   
  4677   4681   /*
  4678   4682   ** Process statements of the form:
................................................................................
  4682   4686   ** where X is an integer.  X==0 means to turn automerge off.  X!=0 means
  4683   4687   ** turn it on.  The setting is persistent.
  4684   4688   */
  4685   4689   static int fts3DoAutoincrmerge(
  4686   4690     Fts3Table *p,                   /* FTS3 table handle */
  4687   4691     const char *zParam              /* Nul-terminated string containing boolean */
  4688   4692   ){
  4689         -  int rc;
         4693  +  int rc = SQLITE_OK;
  4690   4694     sqlite3_stmt *pStmt = 0;
  4691   4695     p->bAutoincrmerge = fts3Getint(&zParam)!=0;
         4696  +  if( !p->bHasStat ){
         4697  +    sqlite3Fts3CreateStatTable(&rc, p);
         4698  +    if( rc ) return rc;
         4699  +  }
  4692   4700     rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pStmt, 0);
  4693   4701     if( rc ) return rc;;
  4694   4702     sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE);
  4695   4703     sqlite3_bind_int(pStmt, 2, p->bAutoincrmerge);
  4696   4704     sqlite3_step(pStmt);
  4697   4705     rc = sqlite3_reset(pStmt);
  4698   4706     return rc;

Added test/fts3merge.test.

            1  +# 2012 March 06
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#*************************************************************************
           11  +# This file implements regression tests for SQLite library.  The
           12  +# focus of this script is testing the incremental merge function.
           13  +#
           14  +
           15  +set testdir [file dirname $argv0]
           16  +source $testdir/tester.tcl
           17  +source $testdir/fts3_common.tcl
           18  +set ::testprefix fts3merge
           19  +
           20  +# If SQLITE_ENABLE_FTS3 is defined, omit this file.
           21  +ifcapable !fts3 {
           22  +  finish_test
           23  +  return
           24  +}
           25  +
           26  +#-------------------------------------------------------------------------
           27  +# Test cases 1.*
           28  +#
           29  +do_test 1.0 { fts3_build_db_1 1004 } {}
           30  +do_test 1.1 { fts3_integrity_check t1 } {ok}
           31  +do_execsql_test 1.1 { 
           32  +  SELECT level, group_concat(idx, ' ') FROM t1_segdir GROUP BY level 
           33  +} {
           34  +  0 {0 1 2 3 4 5 6 7 8 9 10 11} 
           35  +  1 {0 1 2 3 4 5 6 7 8 9 10 11 12 13}
           36  +  2 {0 1 2}
           37  +}
           38  +
           39  +for {set i 0} {$i<20} {incr i} {
           40  +  do_execsql_test 1.2.$i.1 { INSERT INTO t1(t1) VALUES('merge=1') }
           41  +  do_test 1.2.$i.2 { fts3_integrity_check t1 } ok
           42  +  do_execsql_test 1.2.$i.3 { 
           43  +    SELECT docid FROM t1 WHERE t1 MATCH 'zero one two three'
           44  +  } {123 132 213 231 312 321}
           45  +}
           46  +
           47  +do_execsql_test 1.3 { 
           48  +  SELECT level, group_concat(idx, ' ') FROM t1_segdir GROUP BY level 
           49  +} {
           50  +  0 {0 1 2 3} 
           51  +  1 {0 1 2 3 4 5 6} 
           52  +  2 {0 1 2 3}
           53  +}
           54  +
           55  +for {set i 0} {$i<100} {incr i} {
           56  +  do_execsql_test 1.4.$i { INSERT INTO t1(t1) VALUES('merge=1,4') }
           57  +  do_test 1.4.$i.2 { fts3_integrity_check t1 } ok
           58  +  do_execsql_test 1.4.$i.3 { 
           59  +    SELECT docid FROM t1 WHERE t1 MATCH 'zero one two three'
           60  +  } {123 132 213 231 312 321}
           61  +}
           62  +
           63  +do_execsql_test 1.5 { 
           64  +  SELECT level, group_concat(idx, ' ') FROM t1_segdir GROUP BY level 
           65  +} {
           66  +  2 {0 1}
           67  +  3 0
           68  +}
           69  +
           70  +#-------------------------------------------------------------------------
           71  +# Test cases 2.* test that errors in the xxx part of the 'merge=xxx' are
           72  +# handled correctly.
           73  +#
           74  +do_execsql_test 2.0 { CREATE VIRTUAL TABLE t2 USING fts3 }
           75  +
           76  +foreach {tn arg} {
           77  +  1   {merge=abc}
           78  +  2   {merge=%%%}
           79  +  3   {merge=,}
           80  +  4   {merge=5,}
           81  +  5   {merge=6,%}
           82  +  6   {merge=6,six}
           83  +  7   {merge=6,1}
           84  +  8   {merge=6,0}
           85  +} {
           86  +  do_catchsql_test 2.$tn { 
           87  +    INSERT INTO t2(t2) VALUES($arg);
           88  +  } {1 {SQL logic error or missing database}}
           89  +}
           90  +
           91  +#-------------------------------------------------------------------------
           92  +# Test cases 3.*
           93  +#
           94  +do_test 3.0 { 
           95  +  reset_db
           96  +  execsql { PRAGMA page_size = 512 }
           97  +  fts3_build_db_2 30040 
           98  +} {}
           99  +do_test 3.1 { fts3_integrity_check t2 } {ok}
          100  +
          101  +do_execsql_test 3.2 { 
          102  +  SELECT level, group_concat(idx, ' ') FROM t2_segdir GROUP BY level 
          103  +} {
          104  +  0 {0 1 2 3 4 5 6} 
          105  +  1 {0 1 2 3 4} 
          106  +  2 {0 1 2 3 4} 
          107  +  3 {0 1 2 3 4 5 6}
          108  +}
          109  +
          110  +do_execsql_test 3.3 { 
          111  +  INSERT INTO t2(t2) VALUES('merge=1000000,2');
          112  +  SELECT level, group_concat(idx, ' ') FROM t2_segdir GROUP BY level 
          113  +} {
          114  +  0 0 
          115  +  2 0
          116  +  3 0 
          117  +  4 0
          118  +  6 0
          119  +}
          120  +
          121  +#-------------------------------------------------------------------------
          122  +# Test cases 4.*
          123  +#
          124  +reset_db
          125  +do_execsql_test 4.1 {
          126  +  PRAGMA page_size = 512;
          127  +  CREATE VIRTUAL TABLE t4 USING fts3;
          128  +  PRAGMA main.page_size;
          129  +} {512}
          130  +
          131  +do_test 4.2 {
          132  +  foreach x {a c b d e f g h i j k l m n o p} {
          133  +    execsql "INSERT INTO t4 VALUES('[string repeat $x 600]')"
          134  +  }
          135  +  execsql {SELECT level, group_concat(idx, ' ') FROM t4_segdir GROUP BY level}
          136  +} {0 {0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15}}
          137  +
          138  +foreach {tn expect} {
          139  +  1  "0 {0 1 2 3 4 5 6 7 8 9 10 11 12 13} 1 0"
          140  +  2  "0 {0 1 2 3 4 5 6 7 8 9 10 11 12}    1 0"
          141  +  3  "0 {0 1 2 3 4 5 6 7 8 9 10 11}       1 0"
          142  +  4  "0 {0 1 2 3 4 5 6 7 8 9 10}          1 0"
          143  +  5  "0 {0 1 2 3 4 5 6 7 8 9}             1 0"
          144  +  6  "0 {0 1 2 3 4 5 6 7 8}               1 0"
          145  +  7  "0 {0 1 2 3 4 5 6 7}                 1 0"
          146  +  8  "0 {0 1 2 3 4 5 6}                   1 0"
          147  +  9  "0 {0 1 2 3 4 5}                     1 0"
          148  +} {
          149  +  do_execsql_test 4.3.$tn {
          150  +    INSERT INTO t4(t4) VALUES('merge=1,16');
          151  +    SELECT level, group_concat(idx, ' ') FROM t4_segdir GROUP BY level;
          152  +  } $expect
          153  +}
          154  +
          155  +do_execsql_test 4.4.1 {
          156  +  SELECT quote(value) FROM t4_stat WHERE rowid=1
          157  +} {X'0006'}
          158  +
          159  +do_execsql_test 4.4.2 {
          160  +  DELETE FROM t4_stat WHERE rowid=1;
          161  +  INSERT INTO t4(t4) VALUES('merge=1,12');
          162  +  SELECT level, group_concat(idx, ' ') FROM t4_segdir GROUP BY level;
          163  +} "0 {0 1 2 3 4 5}                     1 0"
          164  +
          165  +
          166  +#-------------------------------------------------------------------------
          167  +# Test cases 5.*
          168  +#
          169  +# Test that if a crisis-merge occurs that disrupts an ongoing incremental
          170  +# merge, the next call to "merge=A,B" identifies this and starts a new
          171  +# incremental merge. There are two scenarios:
          172  +#
          173  +#   * There are less segments on the input level that the disrupted
          174  +#     incremental merge operated on, or
          175  +#   
          176  +#   * Sufficient segments exist on the input level but the segments 
          177  +#     contain keys smaller than the largest key in the potential output 
          178  +#     segment.
          179  +# 
          180  +do_test 5.1 {
          181  +  reset_db
          182  +  fts3_build_db_1 1000
          183  +} {}
          184  +
          185  +do_execsql_test 5.2 {
          186  +  SELECT level, group_concat(idx, ' ') FROM t1_segdir GROUP BY level;
          187  +} {
          188  +  0 {0 1 2 3 4 5 6 7} 
          189  +  1 {0 1 2 3 4 5 6 7 8 9 10 11 12 13} 
          190  +  2 {0 1 2}
          191  +}
          192  +
          193  +do_execsql_test 5.3 {
          194  +  INSERT INTO t1(t1) VALUES('merge=1,4');
          195  +  SELECT level, group_concat(idx, ' ') FROM t1_segdir GROUP BY level;
          196  +} {
          197  +  0 {0 1 2 3 4 5 6 7} 
          198  +  1 {0 1 2 3 4 5 6 7 8 9 10 11 12 13} 
          199  +  2 {0 1 2 3}
          200  +}
          201  +
          202  +do_execsql_test 5.4 {SELECT quote(value) from t1_stat WHERE rowid=1} {X'0104'}
          203  +do_test 5.5 {
          204  +  foreach docid [execsql {SELECT docid FROM t1}] {
          205  +    execsql {INSERT INTO t1 SELECT * FROM t1 WHERE docid=$docid}
          206  +  }
          207  +} {}
          208  +
          209  +do_execsql_test 5.6 {SELECT quote(value) from t1_stat WHERE rowid=1} {X'0104'}
          210  +
          211  +do_execsql_test 5.7 {
          212  +  SELECT level, group_concat(idx, ' ') FROM t1_segdir GROUP BY level;
          213  +  SELECT quote(value) from t1_stat WHERE rowid=1;
          214  +} {
          215  +  0 {0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15} 
          216  +  1 {0 1 2 3 4 5 6 7 8 9 10 11} 
          217  +  2 {0 1 2 3 4 5 6 7}
          218  +  X'0104'
          219  +}
          220  +
          221  +do_execsql_test 5.8 {
          222  +  INSERT INTO t1(t1) VALUES('merge=1,4');
          223  +  SELECT level, group_concat(idx, ' ') FROM t1_segdir GROUP BY level;
          224  +  SELECT quote(value) from t1_stat WHERE rowid=1;
          225  +} {
          226  +  0 {0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15} 
          227  +  1 {0 1 2 3 4 5 6 7 8 9 10 11} 
          228  +  2 {0 1 2 3 4 5 6 7}
          229  +  3 {0}
          230  +  X'0204'
          231  +}
          232  +
          233  +do_test 5.9 {
          234  +  set L [expr 16*16*8 + 16*4 + 1]
          235  +  foreach docid [execsql {
          236  +      SELECT docid FROM t1 UNION ALL SELECT docid FROM t1 LIMIT $L
          237  +  }] {
          238  +    execsql {INSERT INTO t1 SELECT * FROM t1 WHERE docid=$docid}
          239  +  }
          240  +} {}
          241  +
          242  +do_execsql_test 5.10 {
          243  +  SELECT level, group_concat(idx, ' ') FROM t1_segdir GROUP BY level;
          244  +  SELECT quote(value) from t1_stat WHERE rowid=1;
          245  +} {
          246  +  0 0   1 0   2 0   3 {0 1}
          247  +  X'0204'
          248  +}
          249  +
          250  +do_execsql_test 5.11 {
          251  +  INSERT INTO t1(t1) VALUES('merge=10,4');
          252  +  SELECT level, group_concat(idx, ' ') FROM t1_segdir GROUP BY level;
          253  +  SELECT quote(value) from t1_stat WHERE rowid=1;
          254  +} {
          255  +  0 0   1 0   2 0   3 {0 1}
          256  +  X'0000'
          257  +}
          258  +
          259  +#-------------------------------------------------------------------------
          260  +# Test cases 6.*
          261  +#
          262  +# At one point the following test caused an assert() to fail (because the
          263  +# second 'merge=1,2' operation below actually "merges" a single input
          264  +# segment, which was unexpected).
          265  +#
          266  +do_test 6.1 {
          267  +  reset_db
          268  +  set a [string repeat a 900]
          269  +  set b [string repeat b 900]
          270  +  set c [string repeat c 900]
          271  +  set d [string repeat d 900]
          272  +  execsql {
          273  +    CREATE VIRTUAL TABLE t1 USING fts3;
          274  +    BEGIN;
          275  +      INSERT INTO t1 VALUES($a);
          276  +      INSERT INTO t1 VALUES($b);
          277  +    COMMIT;
          278  +    BEGIN;
          279  +      INSERT INTO t1 VALUES($c);
          280  +      INSERT INTO t1 VALUES($d);
          281  +    COMMIT;
          282  +  }
          283  +
          284  +  execsql {
          285  +    INSERT INTO t1(t1) VALUES('merge=1,2');
          286  +    INSERT INTO t1(t1) VALUES('merge=1,2');
          287  +  }
          288  +} {}
          289  +
          290  +#-------------------------------------------------------------------------
          291  +# Test cases 7.*
          292  +#
          293  +# Test that the value returned by sqlite3_total_changes() increases by
          294  +# 1 following a no-op "merge=A,B", or by more than 1 if actual work is
          295  +# performed.
          296  +#
          297  +do_test 7.0 {
          298  +  reset_db
          299  +  fts3_build_db_1 1000
          300  +} {}
          301  +
          302  +do_execsql_test 7.1 {
          303  +  SELECT level, group_concat(idx, ' ') FROM t1_segdir GROUP BY level
          304  +} {
          305  +  0 {0 1 2 3 4 5 6 7} 
          306  +  1 {0 1 2 3 4 5 6 7 8 9 10 11 12 13} 
          307  +  2 {0 1 2}
          308  +}
          309  +do_test 7.2 {
          310  +  set x [db total_changes]
          311  +  execsql { INSERT INTO t1(t1) VALUES('merge=2,10') }
          312  +  expr { ([db total_changes] - $x)>1 }
          313  +} {1}
          314  +do_test 7.3 {
          315  +  set x [db total_changes]
          316  +  execsql { INSERT INTO t1(t1) VALUES('merge=200,10') }
          317  +  expr { ([db total_changes] - $x)>1 }
          318  +} {1}
          319  +do_test 7.4 {
          320  +  set x [db total_changes]
          321  +  execsql { INSERT INTO t1(t1) VALUES('merge=200,10') }
          322  +  expr { ([db total_changes] - $x)>1 }
          323  +} {0}
          324  +do_test 7.5 {
          325  +  set x [db total_changes]
          326  +  execsql { INSERT INTO t1(t1) VALUES('merge=200,10') }
          327  +  expr { ([db total_changes] - $x)>1 }
          328  +} {0}
          329  +
          330  +finish_test