/ Check-in [44548009]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add the 'hashsize' configuration option to fts5, for configuring the amount of memory allocated to the in-memory hash table while writing.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 445480095e6877cce8220b1c095f334bbb04c1c3
User & Date: dan 2015-11-05 18:09:16
Context
2015-11-05
21:49
Fix typo in help information for an FTS5 script. check-in: 777ae800 user: mistachkin tags: trunk
20:25
The top of an index equality loop normally starts with OP_SeekGE and OP_IdxGT. This check-in adds a flag to OP_SeekGE such that it fails immediately if the key is not equal, then jumps over the OP_IdxGT, saving a call to the key comparison functions. Consider this check-in a proof-of-concept. It needs improvement before going on trunk. Some tests fail, but only because they new use fewer key comparisons than expected (which is a good thing!). check-in: 32e31b9b user: drh tags: seekeq-experiment
18:09
Add the 'hashsize' configuration option to fts5, for configuring the amount of memory allocated to the in-memory hash table while writing. check-in: 44548009 user: dan tags: trunk
11:47
Remove a #pragma used to work around an issues with MSVC 2012 that has been overcome but subsequent changes. check-in: 8303e4cf user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5Int.h.

   156    156     fts5_tokenizer *pTokApi;
   157    157   
   158    158     /* Values loaded from the %_config table */
   159    159     int iCookie;                    /* Incremented when %_config is modified */
   160    160     int pgsz;                       /* Approximate page size used in %_data */
   161    161     int nAutomerge;                 /* 'automerge' setting */
   162    162     int nCrisisMerge;               /* Maximum allowed segments per level */
          163  +  int nHashSize;                  /* Bytes of memory for in-memory hash */
   163    164     char *zRank;                    /* Name of rank function */
   164    165     char *zRankArgs;                /* Arguments to rank function */
   165    166   
   166    167     /* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */
   167    168     char **pzErrmsg;
   168    169   
   169    170   #ifdef SQLITE_DEBUG

Changes to ext/fts5/fts5_config.c.

    16     16   
    17     17   
    18     18   #include "fts5Int.h"
    19     19   
    20     20   #define FTS5_DEFAULT_PAGE_SIZE   4050
    21     21   #define FTS5_DEFAULT_AUTOMERGE      4
    22     22   #define FTS5_DEFAULT_CRISISMERGE   16
           23  +#define FTS5_DEFAULT_HASHSIZE    (1024*1024)
    23     24   
    24     25   /* Maximum allowed page size */
    25     26   #define FTS5_MAX_PAGE_SIZE (128*1024)
    26     27   
    27     28   static int fts5_iswhitespace(char x){
    28     29     return (x==' ');
    29     30   }
................................................................................
   762    763       }
   763    764       if( pgsz<=0 || pgsz>FTS5_MAX_PAGE_SIZE ){
   764    765         *pbBadkey = 1;
   765    766       }else{
   766    767         pConfig->pgsz = pgsz;
   767    768       }
   768    769     }
          770  +
          771  +  else if( 0==sqlite3_stricmp(zKey, "hashsize") ){
          772  +    int nHashSize = -1;
          773  +    if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
          774  +      nHashSize = sqlite3_value_int(pVal);
          775  +    }
          776  +    if( nHashSize<=0 ){
          777  +      *pbBadkey = 1;
          778  +    }else{
          779  +      pConfig->nHashSize = nHashSize;
          780  +    }
          781  +  }
   769    782   
   770    783     else if( 0==sqlite3_stricmp(zKey, "automerge") ){
   771    784       int nAutomerge = -1;
   772    785       if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
   773    786         nAutomerge = sqlite3_value_int(pVal);
   774    787       }
   775    788       if( nAutomerge<0 || nAutomerge>64 ){
................................................................................
   823    836     int rc = SQLITE_OK;
   824    837     int iVersion = 0;
   825    838   
   826    839     /* Set default values */
   827    840     pConfig->pgsz = FTS5_DEFAULT_PAGE_SIZE;
   828    841     pConfig->nAutomerge = FTS5_DEFAULT_AUTOMERGE;
   829    842     pConfig->nCrisisMerge = FTS5_DEFAULT_CRISISMERGE;
          843  +  pConfig->nHashSize = FTS5_DEFAULT_HASHSIZE;
   830    844   
   831    845     zSql = sqlite3Fts5Mprintf(&rc, zSelect, pConfig->zDb, pConfig->zName);
   832    846     if( zSql ){
   833    847       rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p, 0);
   834    848       sqlite3_free(zSql);
   835    849     }
   836    850   

Changes to ext/fts5/fts5_index.c.

   284    284     int nWorkUnit;                  /* Leaf pages in a "unit" of work */
   285    285   
   286    286     /*
   287    287     ** Variables related to the accumulation of tokens and doclists within the
   288    288     ** in-memory hash tables before they are flushed to disk.
   289    289     */
   290    290     Fts5Hash *pHash;                /* Hash table for in-memory data */
   291         -  int nMaxPendingData;            /* Max pending data before flush to disk */
   292    291     int nPendingData;               /* Current bytes of pending data */
   293    292     i64 iWriteRowid;                /* Rowid for current doc being written */
   294    293     int bDelete;                    /* Current write is a delete */
   295    294   
   296    295     /* Error state. */
   297    296     int rc;                         /* Current error code */
   298    297   
................................................................................
  4444   4443   */
  4445   4444   int sqlite3Fts5IndexBeginWrite(Fts5Index *p, int bDelete, i64 iRowid){
  4446   4445     assert( p->rc==SQLITE_OK );
  4447   4446   
  4448   4447     /* Allocate the hash table if it has not already been allocated */
  4449   4448     if( p->pHash==0 ){
  4450   4449       p->rc = sqlite3Fts5HashNew(&p->pHash, &p->nPendingData);
         4450  +
         4451  +    /* Force the configuration to be loaded */
         4452  +    fts5StructureRelease(fts5StructureRead(p));
  4451   4453     }
  4452   4454   
  4453   4455     /* Flush the hash table to disk if required */
  4454   4456     if( iRowid<p->iWriteRowid 
  4455   4457      || (iRowid==p->iWriteRowid && p->bDelete==0)
  4456         -   || (p->nPendingData > p->nMaxPendingData) 
         4458  +   || (p->nPendingData > p->pConfig->nHashSize) 
  4457   4459     ){
  4458   4460       fts5IndexFlush(p);
  4459   4461     }
  4460   4462   
  4461   4463     p->iWriteRowid = iRowid;
  4462   4464     p->bDelete = bDelete;
  4463   4465     return fts5IndexReturn(p);
................................................................................
  4515   4517     int rc = SQLITE_OK;
  4516   4518     Fts5Index *p;                   /* New object */
  4517   4519   
  4518   4520     *pp = p = (Fts5Index*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Index));
  4519   4521     if( rc==SQLITE_OK ){
  4520   4522       p->pConfig = pConfig;
  4521   4523       p->nWorkUnit = FTS5_WORK_UNIT;
  4522         -    p->nMaxPendingData = 1024*1024;
  4523   4524       p->zDataTbl = sqlite3Fts5Mprintf(&rc, "%s_data", pConfig->zName);
  4524   4525       if( p->zDataTbl && bCreate ){
  4525   4526         rc = sqlite3Fts5CreateTable(
  4526   4527             pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr
  4527   4528         );
  4528   4529         if( rc==SQLITE_OK ){
  4529   4530           rc = sqlite3Fts5CreateTable(pConfig, "idx", 

Changes to ext/fts5/test/fts5simple.test.

    14     14   set testprefix fts5simple
    15     15   
    16     16   # If SQLITE_ENABLE_FTS5 is defined, omit this file.
    17     17   ifcapable !fts5 {
    18     18     finish_test
    19     19     return
    20     20   }
    21         -
           21  + 
    22     22   #-------------------------------------------------------------------------
    23     23   #
    24     24   set doc "x x [string repeat {y } 50]z z"
    25     25   do_execsql_test 1.0 {
    26     26     CREATE VIRTUAL TABLE t1 USING fts5(x);
    27     27     INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
    28     28     BEGIN;
................................................................................
   317    317     1 {1 2 3}
   318    318     3 {2 3 4}
   319    319   }
   320    320   
   321    321   do_execsql_test 13.3 {
   322    322     INSERT INTO xy(xy) VALUES('integrity-check');
   323    323   }
          324  +
          325  +#-------------------------------------------------------------------------
          326  +#
          327  +do_execsql_test 14.1 {
          328  +  CREATE VIRTUAL TABLE ttt USING fts5(x);
          329  +  BEGIN;
          330  +    INSERT INTO ttt(rowid, x) VALUES(1, 'a b c');
          331  +    INSERT INTO ttt(rowid, x) VALUES(2, 'a b c');
          332  +    INSERT INTO ttt(rowid, x) VALUES(3, 'a b c');
          333  +  COMMIT;
          334  +}
          335  +do_test 14.2 { 
          336  +  fts5_level_segs ttt 
          337  +} {1}
   324    338   
   325    339   finish_test
   326    340   

Changes to ext/fts5/tool/loadfts5.tcl.

    44     44     puts stderr "  -porter      (use porter tokenizer)"
    45     45     puts stderr "  -delete      (delete the database file before starting)"
    46     46     puts stderr "  -limit N     (load no more than N documents)"
    47     47     puts stderr "  -automerge N (set the automerge parameter to N)"
    48     48     puts stderr "  -crisismerge N (set the crisismerge parameter to N)"
    49     49     puts stderr "  -prefix PREFIX (comma separated prefix= argument)"
    50     50     puts stderr "  -trans N     (commit after N inserts - 0 == never)"
           51  +  puts stderr "  -hashsize N  (set the fts5 hashsize parameteger to N)"
    51     52     exit 1
    52     53   }
    53     54   
    54     55   set O(vtab)       fts5
    55     56   set O(tok)        ""
    56     57   set O(limit)      0
    57     58   set O(delete)     0
    58     59   set O(automerge)  -1
    59     60   set O(crisismerge)  -1
    60     61   set O(prefix)     ""
    61     62   set O(trans)      0
           63  +set O(hashsize)   -1
    62     64   
    63     65   if {[llength $argv]<2} usage
    64     66   set nOpt [expr {[llength $argv]-2}]
    65     67   for {set i 0} {$i < $nOpt} {incr i} {
    66     68     set arg [lindex $argv $i]
    67     69     switch -- [lindex $argv $i] {
    68     70       -fts4 {
................................................................................
   101    103         set O(crisismerge) [lindex $argv $i]
   102    104       }
   103    105   
   104    106       -prefix {
   105    107         if { [incr i]>=$nOpt } usage
   106    108         set O(prefix) [lindex $argv $i]
   107    109       }
          110  +
          111  +    -hashsize {
          112  +      if { [incr i]>=$nOpt } usage
          113  +      set O(hashsize) [lindex $argv $i]
          114  +    }
   108    115   
   109    116       default {
   110    117         usage
   111    118       }
   112    119     }
   113    120   }
   114    121   
................................................................................
   122    129   db eval BEGIN
   123    130     set pref ""
   124    131     if {$O(prefix)!=""} { set pref ", prefix='$O(prefix)'" }
   125    132     catch {
   126    133       db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok)$pref)"
   127    134       db eval "INSERT INTO t1(t1, rank) VALUES('pgsz', 4050);"
   128    135     }
          136  +
          137  +  if {$O(hashsize)>=0} {
          138  +    catch {
          139  +      db eval "INSERT INTO t1(t1, rank) VALUES('hashsize', $O(hashsize));"
          140  +    }
          141  +  }
          142  +
          143  +
   129    144     if {$O(automerge)>=0} {
   130    145       if {$O(vtab) == "fts5"} {
   131    146         db eval { INSERT INTO t1(t1, rank) VALUES('automerge', $O(automerge)) }
   132    147       } else {
   133    148         db eval { INSERT INTO t1(t1) VALUES('automerge=' || $O(automerge)) }
   134    149       }
   135    150     }
................................................................................
   137    152       if {$O(vtab) == "fts5"} {
   138    153         db eval {INSERT INTO t1(t1, rank) VALUES('crisismerge', $O(crisismerge))}
   139    154       } else {
   140    155       }
   141    156     }
   142    157     load_hierachy [lindex $argv end]
   143    158   db eval COMMIT
          159  +puts ""
   144    160   
   145    161   
   146    162