/ Check-in [ed69434c]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:An attempt at automatic incremental merging for FTS4.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts4-auto-incr-merge
Files: files | file ages | folders
SHA1: ed69434cd89084f4b57bd2cc4f5cc558904af565
User & Date: drh 2012-03-24 02:20:43
Context
2012-03-24
14:45
Modify the way the number of leaves written and the maximum relative level are calculated in the auto-incr-merge code. Closed-Leaf check-in: 0d841c95 user: dan tags: fts4-auto-incr-merge
02:20
An attempt at automatic incremental merging for FTS4. check-in: ed69434c user: drh tags: fts4-auto-incr-merge
2012-03-23
18:26
Fix a spurious SQLITE_CONSTRAINT error that may be returned by an incr-merge operation. check-in: ed7c17ea user: dan tags: fts4-incr-merge
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3.c.

  1272   1272     p->nPendingData = 0;
  1273   1273     p->azColumn = (char **)&p[1];
  1274   1274     p->pTokenizer = pTokenizer;
  1275   1275     p->nMaxPendingData = FTS3_MAX_PENDING_DATA;
  1276   1276     p->bHasDocsize = (isFts4 && bNoDocsize==0);
  1277   1277     p->bHasStat = isFts4;
  1278   1278     p->bDescIdx = bDescIdx;
         1279  +  p->bAutoincrmerge = 0xff;   /* 0xff means setting unknown */
  1279   1280     p->zContentTbl = zContent;
  1280   1281     p->zLanguageid = zLanguageid;
  1281   1282     zContent = 0;
  1282   1283     zLanguageid = 0;
  1283   1284     TESTONLY( p->inTransaction = -1 );
  1284   1285     TESTONLY( p->mxSavepoint = -1 );
  1285   1286   
................................................................................
  3098   3099   }
  3099   3100   
  3100   3101   /*
  3101   3102   ** Implementation of xSync() method. Flush the contents of the pending-terms
  3102   3103   ** hash-table to the database.
  3103   3104   */
  3104   3105   static int fts3SyncMethod(sqlite3_vtab *pVtab){
  3105         -  int rc = sqlite3Fts3PendingTermsFlush((Fts3Table *)pVtab);
  3106         -  sqlite3Fts3SegmentsClose((Fts3Table *)pVtab);
         3106  +  Fts3Table *p = (Fts3Table*)pVtab;
         3107  +  int rc = sqlite3Fts3PendingTermsFlush(p);
         3108  +  if( rc==SQLITE_OK && p->bAutoincrmerge==1 && p->nLeafAdd>0 ){
         3109  +    int A = p->nLeafAdd * p->mxLevel;
         3110  +    A += A/2;
         3111  +    rc = sqlite3Fts3Incrmerge(p, A, 8);
         3112  +  }
         3113  +  sqlite3Fts3SegmentsClose(p);
  3107   3114     return rc;
  3108   3115   }
  3109   3116   
  3110   3117   /*
  3111   3118   ** Implementation of xBegin() method. This is a no-op.
  3112   3119   */
  3113   3120   static int fts3BeginMethod(sqlite3_vtab *pVtab){
................................................................................
  3114   3121     TESTONLY( Fts3Table *p = (Fts3Table*)pVtab );
  3115   3122     UNUSED_PARAMETER(pVtab);
  3116   3123     assert( p->pSegments==0 );
  3117   3124     assert( p->nPendingData==0 );
  3118   3125     assert( p->inTransaction!=1 );
  3119   3126     TESTONLY( p->inTransaction = 1 );
  3120   3127     TESTONLY( p->mxSavepoint = -1; );
         3128  +  p->nLeafAdd = 0;
  3121   3129     return SQLITE_OK;
  3122   3130   }
  3123   3131   
  3124   3132   /*
  3125   3133   ** Implementation of xCommit() method. This is a no-op. The contents of
  3126   3134   ** the pending-terms hash-table have already been flushed into the database
  3127   3135   ** by fts3SyncMethod().

Changes to ext/fts3/fts3Int.h.

   192    192     const char *zDb;                /* logical database name */
   193    193     const char *zName;              /* virtual table name */
   194    194     int nColumn;                    /* number of named columns in virtual table */
   195    195     char **azColumn;                /* column names.  malloced */
   196    196     sqlite3_tokenizer *pTokenizer;  /* tokenizer for inserts and queries */
   197    197     char *zContentTbl;              /* content=xxx option, or NULL */
   198    198     char *zLanguageid;              /* languageid=xxx option, or NULL */
          199  +  u8 bAutoincrmerge;              /* True if automerge=1 */
          200  +  int mxLevel;                    /* Maximum level seen on this transaction */
          201  +  u32 nLeafAdd;                   /* Number of leaf blocks added this trans */
   199    202   
   200    203     /* Precompiled statements used by the implementation. Each of these 
   201    204     ** statements is run and reset within a single virtual table API call. 
   202    205     */
   203    206     sqlite3_stmt *aStmt[36];
   204    207   
   205    208     char *zReadExprlist;
................................................................................
   474    477   
   475    478     /* Output values. Valid only after Fts3SegReaderStep() returns SQLITE_ROW. */
   476    479     char *zTerm;                    /* Pointer to term buffer */
   477    480     int nTerm;                      /* Size of zTerm in bytes */
   478    481     char *aDoclist;                 /* Pointer to doclist buffer */
   479    482     int nDoclist;                   /* Size of aDoclist[] in bytes */
   480    483   };
          484  +
          485  +int sqlite3Fts3Incrmerge(Fts3Table*,int,int);
   481    486   
   482    487   /* fts3.c */
   483    488   int sqlite3Fts3PutVarint(char *, sqlite3_int64);
   484    489   int sqlite3Fts3GetVarint(const char *, sqlite_int64 *);
   485    490   int sqlite3Fts3GetVarint32(const char *, int *);
   486    491   int sqlite3Fts3VarintLen(sqlite3_uint64);
   487    492   void sqlite3Fts3Dequote(char *);

Changes to ext/fts3/fts3_write.c.

    68     68   
    69     69   /*
    70     70   ** The two values that may be meaningfully bound to the :1 parameter in
    71     71   ** statements SQL_REPLACE_STAT and SQL_SELECT_STAT.
    72     72   */
    73     73   #define FTS_STAT_DOCTOTAL      0
    74     74   #define FTS_STAT_INCRMERGEHINT 1
           75  +#define FTS_STAT_AUTOINCRMERGE 2
    75     76   
    76     77   /*
    77     78   ** If FTS_LOG_MERGES is defined, call sqlite3_log() to report each automatic
    78     79   ** and incremental merge operation that takes place. This is used for 
    79     80   ** debugging FTS only, it should not usually be turned on in production
    80     81   ** systems.
    81     82   */
................................................................................
   326    327   ** if no level in the FTS index contains more than ? segments, the statement
   327    328   ** returns zero rows.  */
   328    329   /* 28 */ "SELECT level FROM %Q.'%q_segdir' GROUP BY level HAVING count(*)>=?"
   329    330            "  ORDER BY (level %% 1024) DESC LIMIT 1",
   330    331   
   331    332   /* Estimate the upper limit on the number of leaf nodes in a new segment
   332    333   ** created by merging the oldest :2 segments from absolute level :1. See 
   333         -** function fts3Incrmerge() for details.  */
          334  +** function sqlite3Fts3Incrmerge() for details.  */
   334    335   /* 29 */ "SELECT 2 * total(1 + leaves_end_block - start_block) "
   335    336            "  FROM %Q.'%q_segdir' WHERE level = ? AND idx < ?",
   336    337   
   337    338   /* SQL_DELETE_SEGDIR_ENTRY
   338    339   **   Delete the %_segdir entry on absolute level :1 with index :2.  */
   339    340   /* 30 */ "DELETE FROM %Q.'%q_segdir' WHERE level = ? AND idx = ?",
   340    341   
................................................................................
  1863   1864       sqlite3_bind_int64(pStmt, 1, iBlock);
  1864   1865       sqlite3_bind_blob(pStmt, 2, z, n, SQLITE_STATIC);
  1865   1866       sqlite3_step(pStmt);
  1866   1867       rc = sqlite3_reset(pStmt);
  1867   1868     }
  1868   1869     return rc;
  1869   1870   }
         1871  +
         1872  +/*
         1873  +** Update the Fts3Table.mxLevel field, if appropriate
         1874  +*/
         1875  +static void fts3UpdateMaxLevel(Fts3Table *p, sqlite3_int64 iLevel){
         1876  +  iLevel %= FTS3_SEGDIR_MAXLEVEL;
         1877  +  if( iLevel>p->mxLevel ) p->mxLevel = iLevel;
         1878  +}
  1870   1879   
  1871   1880   /* 
  1872   1881   ** Insert a record into the %_segdir table.
  1873   1882   */
  1874   1883   static int fts3WriteSegdir(
  1875   1884     Fts3Table *p,                   /* Virtual table handle */
  1876   1885     sqlite3_int64 iLevel,           /* Value for "level" field (absolute level) */
................................................................................
  1881   1890     char *zRoot,                    /* Blob value for "root" field */
  1882   1891     int nRoot                       /* Number of bytes in buffer zRoot */
  1883   1892   ){
  1884   1893     sqlite3_stmt *pStmt;
  1885   1894     int rc = fts3SqlStmt(p, SQL_INSERT_SEGDIR, &pStmt, 0);
  1886   1895     if( rc==SQLITE_OK ){
  1887   1896       sqlite3_bind_int64(pStmt, 1, iLevel);
         1897  +    fts3UpdateMaxLevel(p, iLevel);
  1888   1898       sqlite3_bind_int(pStmt, 2, iIdx);
  1889   1899       sqlite3_bind_int64(pStmt, 3, iStartBlock);
  1890   1900       sqlite3_bind_int64(pStmt, 4, iLeafEndBlock);
  1891   1901       sqlite3_bind_int64(pStmt, 5, iEndBlock);
  1892   1902       sqlite3_bind_blob(pStmt, 6, zRoot, nRoot, SQLITE_STATIC);
  1893   1903       sqlite3_step(pStmt);
  1894   1904       rc = sqlite3_reset(pStmt);
................................................................................
  2365   2375     if( rc!=SQLITE_OK ) return rc;
  2366   2376     sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0));
  2367   2377     sqlite3_bind_int64(pStmt, 2, 
  2368   2378         getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1)
  2369   2379     );
  2370   2380     if( SQLITE_ROW==sqlite3_step(pStmt) ){
  2371   2381       *pnMax = sqlite3_column_int64(pStmt, 0);
         2382  +    fts3UpdateMaxLevel(p, *pnMax);
  2372   2383     }
  2373   2384     return sqlite3_reset(pStmt);
  2374   2385   }
  2375   2386   
  2376   2387   /*
  2377   2388   ** Delete all entries in the %_segments table associated with the segment
  2378   2389   ** opened with seg-reader pSeg. This function does not affect the contents
................................................................................
  2995   3006   
  2996   3007   /* 
  2997   3008   ** Flush the contents of pendingTerms to level 0 segments.
  2998   3009   */
  2999   3010   int sqlite3Fts3PendingTermsFlush(Fts3Table *p){
  3000   3011     int rc = SQLITE_OK;
  3001   3012     int i;
         3013  +        
         3014  +  p->nLeafAdd += (p->nPendingData + p->nNodeSize - 1)/p->nNodeSize;
  3002   3015     for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){
  3003   3016       rc = fts3SegmentMerge(p, p->iPrevLangid, i, FTS3_SEGCURSOR_PENDING);
  3004   3017       if( rc==SQLITE_DONE ) rc = SQLITE_OK;
  3005   3018     }
  3006   3019     sqlite3Fts3PendingTermsClear(p);
         3020  +
         3021  +  /* Determine the auto-incr-merge setting if unknown.  If enabled,
         3022  +  ** estimate the number of leaf blocks of content to be written
         3023  +  */
         3024  +  if( rc==SQLITE_OK && p->bHasStat
         3025  +   && p->bAutoincrmerge==0xff && p->nLeafAdd>0
         3026  +  ){
         3027  +    sqlite3_stmt *pStmt = 0;
         3028  +    rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pStmt, 0);
         3029  +    if( rc==SQLITE_OK ){
         3030  +      sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE);
         3031  +      rc = sqlite3_step(pStmt);
         3032  +      p->bAutoincrmerge = (rc==SQLITE_ROW && sqlite3_column_int(pStmt, 0));
         3033  +      rc = sqlite3_reset(pStmt);
         3034  +    }
         3035  +  }
  3007   3036     return rc;
  3008   3037   }
  3009   3038   
  3010   3039   /*
  3011   3040   ** Encode N integers as varints into a blob.
  3012   3041   */
  3013   3042   static void fts3EncodeIntArray(
................................................................................
  4468   4497   **
  4469   4498   ** Incremental merges happen nMin segments at a time. The two
  4470   4499   ** segments to be merged are the nMin oldest segments (the ones with
  4471   4500   ** the smallest indexes) in the highest level that contains at least
  4472   4501   ** nMin segments. Multiple merges might occur in an attempt to write the 
  4473   4502   ** quota of nMerge leaf blocks.
  4474   4503   */
  4475         -static int fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){
         4504  +int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){
  4476   4505     int rc;                         /* Return code */
  4477   4506     int nRem = nMerge;              /* Number of leaf pages yet to  be written */
  4478   4507     int bUseHint = 1;               /* True if hint has not yet been attempted */
  4479   4508     sqlite3_int64 iHintAbsLevel = 0;/* Hint level */
  4480   4509     int nHintSeg = 0;               /* Hint number of segments */
  4481   4510     int nSeg = 0;                   /* Number of input segments */
  4482   4511     sqlite3_int64 iAbsLevel = 0;    /* Absolute level number to work on */
................................................................................
  4573   4602     /* Write the hint values into the %_stat table for the next incr-merger */
  4574   4603     if( rc==SQLITE_OK && (iAbsLevel!=iHintAbsLevel || nHintSeg!=nSeg) ){
  4575   4604       rc = fts3IncrmergeHintStore(p, iAbsLevel, nSeg);
  4576   4605     }
  4577   4606   
  4578   4607     return rc;
  4579   4608   }
         4609  +
         4610  +/*
         4611  +** Convert the text beginning at *pz into an integer and return
         4612  +** its value.  Advance *pz to point to the first character past
         4613  +** the integer.
         4614  +*/
         4615  +static int fts3Getint(const char **pz){
         4616  +  const char *z = *pz;
         4617  +  int i = 0;
         4618  +  while( (*z)>='0' && (*z)<='9' ) i = 10*i + *(z++) - '0';
         4619  +  *pz = z;
         4620  +  return i;
         4621  +}
  4580   4622   
  4581   4623   /*
  4582   4624   ** Process statements of the form:
  4583   4625   **
  4584   4626   **    INSERT INTO table(table) VALUES('merge=A,B');
  4585   4627   **
  4586   4628   ** A and B are integers that decode to be the number of leaf pages
................................................................................
  4593   4635   ){
  4594   4636     int rc;
  4595   4637     int nMin = (FTS3_MERGE_COUNT / 2);
  4596   4638     int nMerge = 0;
  4597   4639     const char *z = zParam;
  4598   4640   
  4599   4641     /* Read the first integer value */
  4600         -  for(z=zParam; z[0]>='0' && z[0]<='9'; z++){
  4601         -    nMerge = nMerge * 10 + (z[0] - '0');
  4602         -  }
         4642  +  nMerge = fts3Getint(&z);
  4603   4643   
  4604   4644     /* If the first integer value is followed by a ',',  read the second
  4605   4645     ** integer value. */
  4606   4646     if( z[0]==',' && z[1]!='\0' ){
  4607   4647       z++;
  4608         -    nMin = 0;
  4609         -    while( z[0]>='0' && z[0]<='9' ){
  4610         -      nMin = nMin * 10 + (z[0] - '0');
  4611         -      z++;
  4612         -    }
         4648  +    nMin = fts3Getint(&z);
  4613   4649     }
  4614   4650   
  4615   4651     if( z[0]!='\0' || nMin<2 ){
  4616   4652       rc = SQLITE_ERROR;
  4617   4653     }else{
  4618         -    rc = fts3Incrmerge(p, nMerge, nMin);
         4654  +    rc = sqlite3Fts3Incrmerge(p, nMerge, nMin);
  4619   4655       sqlite3Fts3SegmentsClose(p);
  4620   4656     }
  4621   4657     return rc;
  4622   4658   }
         4659  +
         4660  +/*
         4661  +** Process statements of the form:
         4662  +**
         4663  +**    INSERT INTO table(table) VALUES('automerge=X');
         4664  +**
         4665  +** where X is an integer.  X==0 means to turn automerge off.  X!=0 means
         4666  +** turn it on.  The setting is persistent.
         4667  +*/
         4668  +static int fts3DoAutoincrmerge(
         4669  +  Fts3Table *p,                   /* FTS3 table handle */
         4670  +  const char *zParam              /* Nul-terminated string containing boolean */
         4671  +){
         4672  +  int rc;
         4673  +  sqlite3_stmt *pStmt = 0;
         4674  +  p->bAutoincrmerge = fts3Getint(&zParam)!=0;
         4675  +  rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pStmt, 0);
         4676  +  if( rc ) return rc;;
         4677  +  sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE);
         4678  +  sqlite3_bind_int(pStmt, 2, p->bAutoincrmerge);
         4679  +  sqlite3_step(pStmt);
         4680  +  rc = sqlite3_reset(pStmt);
         4681  +  return rc;
         4682  +}
         4683  +
  4623   4684   
  4624   4685   /*
  4625   4686   ** Handle a 'special' INSERT of the form:
  4626   4687   **
  4627   4688   **   "INSERT INTO tbl(tbl) VALUES(<expr>)"
  4628   4689   **
  4629   4690   ** Argument pVal contains the result of <expr>. Currently the only 
................................................................................
  4638   4699       return SQLITE_NOMEM;
  4639   4700     }else if( nVal==8 && 0==sqlite3_strnicmp(zVal, "optimize", 8) ){
  4640   4701       rc = fts3DoOptimize(p, 0);
  4641   4702     }else if( nVal==7 && 0==sqlite3_strnicmp(zVal, "rebuild", 7) ){
  4642   4703       rc = fts3DoRebuild(p);
  4643   4704     }else if( nVal>6 && 0==sqlite3_strnicmp(zVal, "merge=", 6) ){
  4644   4705       rc = fts3DoIncrmerge(p, &zVal[6]);
         4706  +  }else if( nVal>10 && 0==sqlite3_strnicmp(zVal, "automerge=", 10) ){
         4707  +    rc = fts3DoAutoincrmerge(p, &zVal[10]);
  4645   4708   #ifdef SQLITE_TEST
  4646   4709     }else if( nVal>9 && 0==sqlite3_strnicmp(zVal, "nodesize=", 9) ){
  4647   4710       p->nNodeSize = atoi(&zVal[9]);
  4648   4711       rc = SQLITE_OK;
  4649   4712     }else if( nVal>11 && 0==sqlite3_strnicmp(zVal, "maxpending=", 9) ){
  4650   4713       p->nMaxPendingData = atoi(&zVal[11]);
  4651   4714       rc = SQLITE_OK;

Changes to test/bc_common.tcl.

    13     13     }
    14     14     foreach file [glob -nocomplain $pattern] {
    15     15       if {[file executable $file] && [file isfile $file]} {lappend binaries $file}
    16     16     }
    17     17   
    18     18     if {[llength $binaries]==0} {
    19     19       puts "WARNING: No historical binaries to test against."
    20         -    puts "WARNING: Omitting backwards-compatibility tests $zFile"
           20  +    puts "WARNING: Omitting backwards-compatibility tests"
    21     21     }
    22     22   
    23     23     foreach bin $binaries {
    24     24       puts -nonewline "Testing against $bin - "
    25     25       flush stdout
    26     26       puts "version [get_version $bin]"
    27     27     }
................................................................................
    66     66   }
    67     67   
    68     68   proc do_all_bc_test {script} {
    69     69     foreach bin $::BC(binaries) {
    70     70       uplevel [list do_bc_test $bin $script]
    71     71     }
    72     72   }
    73         -
    74         -
    75         -
    76         -