/ Check-in [65fa6937]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix the FTS 'optimize' command on multi-lingual databases.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts4-languageid
Files: files | file ages | folders
SHA1: 65fa693729a336e4d905ce72e6b9ccf4faa772bd
User & Date: dan 2012-03-02 12:26:19
Context
2012-03-02
16:18
Add test for FTS 'rebuild' command. check-in: 181bc357 user: dan tags: fts4-languageid
12:26
Fix the FTS 'optimize' command on multi-lingual databases. check-in: 65fa6937 user: dan tags: fts4-languageid
11:48
Fix a bug in merging FTS language tables for languages other than language 0. check-in: d281cb89 user: dan tags: fts4-languageid
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3Int.h.

193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
  sqlite3_tokenizer *pTokenizer;  /* tokenizer for inserts and queries */
  char *zContentTbl;              /* content=xxx option, or NULL */
  char *zLanguageid;              /* languageid=xxx option, or NULL */

  /* Precompiled statements used by the implementation. Each of these 
  ** statements is run and reset within a single virtual table API call. 
  */
  sqlite3_stmt *aStmt[27];

  char *zReadExprlist;
  char *zWriteExprlist;

  int nNodeSize;                  /* Soft limit for node size */
  u8 bHasStat;                    /* True if %_stat table exists */
  u8 bHasDocsize;                 /* True if %_docsize table exists */







|







193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
  sqlite3_tokenizer *pTokenizer;  /* tokenizer for inserts and queries */
  char *zContentTbl;              /* content=xxx option, or NULL */
  char *zLanguageid;              /* languageid=xxx option, or NULL */

  /* Precompiled statements used by the implementation. Each of these 
  ** statements is run and reset within a single virtual table API call. 
  */
  sqlite3_stmt *aStmt[28];

  char *zReadExprlist;
  char *zWriteExprlist;

  int nNodeSize;                  /* Soft limit for node size */
  u8 bHasStat;                    /* True if %_stat table exists */
  u8 bHasDocsize;                 /* True if %_docsize table exists */

Changes to ext/fts3/fts3_write.c.

228
229
230
231
232
233
234


235
236
237
238
239
240
241
...
281
282
283
284
285
286
287

288
289
290
291
292
293
294
....
2225
2226
2227
2228
2229
2230
2231
2232





2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246


2247
2248
2249
2250
2251
2252
2253
....
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
....
3015
3016
3017
3018
3019
3020
3021
3022
3023




3024





3025
3026
3027
3028
3029
3030
3031





3032
3033
3034
3035
3036
3037
3038
#define SQL_REPLACE_DOCTOTAL          23

#define SQL_SELECT_ALL_PREFIX_LEVEL   24
#define SQL_DELETE_ALL_TERMS_SEGDIR   25

#define SQL_DELETE_SEGDIR_RANGE       26



/*
** This function is used to obtain an SQLite prepared statement handle
** for the statement identified by the second argument. If successful,
** *pp is set to the requested statement handle and SQLITE_OK returned.
** Otherwise, an SQLite error code is returned and *pp is set to 0.
**
** If argument apVal is not NULL, then it must point to an array with
................................................................................
/* 21 */  "SELECT size FROM %Q.'%q_docsize' WHERE docid=?",
/* 22 */  "SELECT value FROM %Q.'%q_stat' WHERE id=0",
/* 23 */  "REPLACE INTO %Q.'%q_stat' VALUES(0,?)",
/* 24 */  "",
/* 25 */  "",

/* 26 */ "DELETE FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?",


  };
  int rc = SQLITE_OK;
  sqlite3_stmt *pStmt;

  assert( SizeofArray(azSql)==SizeofArray(p->aStmt) );
  assert( eStmt<SizeofArray(azSql) && eStmt>=0 );
................................................................................
** Set *pnMax to the largest segment level in the database for the index
** iIndex.
**
** Segment levels are stored in the 'level' column of the %_segdir table.
**
** Return SQLITE_OK if successful, or an SQLite error code if not.
*/
static int fts3SegmentMaxLevel(Fts3Table *p, int iIndex, int *pnMax){





  sqlite3_stmt *pStmt;
  int rc;
  assert( iIndex>=0 && iIndex<p->nIndex );

  /* Set pStmt to the compiled version of:
  **
  **   SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?
  **
  ** (1024 is actually the value of macro FTS3_SEGDIR_PREFIXLEVEL_STR).
  */
  rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0);
  if( rc!=SQLITE_OK ) return rc;
  sqlite3_bind_int(pStmt, 1, iIndex*FTS3_SEGDIR_MAXLEVEL);
  sqlite3_bind_int(pStmt, 2, (iIndex+1)*FTS3_SEGDIR_MAXLEVEL - 1);


  if( SQLITE_ROW==sqlite3_step(pStmt) ){
    *pnMax = sqlite3_column_int(pStmt, 0);
  }
  return sqlite3_reset(pStmt);
}

/*
................................................................................
    ** segment. The level of the new segment is equal to the the numerically 
    ** greatest segment level currently present in the database for this
    ** index. The idx of the new segment is always 0.  */
    if( csr.nSegment==1 ){
      rc = SQLITE_DONE;
      goto finished;
    }
    rc = fts3SegmentMaxLevel(p, iIndex, &iNewLevel);
    bIgnoreEmpty = 1;

  }else if( iLevel==FTS3_SEGCURSOR_PENDING ){
    iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, 0);
    rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, 0, &iIdx);
  }else{
    /* This call is to merge all segments at level iLevel. find the next
................................................................................
}

/*
** Merge the entire database so that there is one segment for each 
** iIndex/iLangid combination.
*/
static int fts3DoOptimize(Fts3Table *p, int bReturnDone){
  int i;
  int bSeenDone = 0;




  int rc = SQLITE_OK;





  for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){
    rc = fts3SegmentMerge(p, 0, i, FTS3_SEGCURSOR_ALL);
    if( rc==SQLITE_DONE ){
      bSeenDone = 1;
      rc = SQLITE_OK;
    }
  }





  sqlite3Fts3SegmentsClose(p);
  sqlite3Fts3PendingTermsClear(p);

  return (rc==SQLITE_OK && bReturnDone && bSeenDone) ? SQLITE_DONE : rc;
}

/*







>
>







 







>







 







|
>
>
>
>
>












|
|
>
>







 







|







 







<

>
>
>
>
|
>
>
>
>
>
|
|
|
|
|
|
|
>
>
>
>
>







228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
...
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
....
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
....
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
....
3025
3026
3027
3028
3029
3030
3031

3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
#define SQL_REPLACE_DOCTOTAL          23

#define SQL_SELECT_ALL_PREFIX_LEVEL   24
#define SQL_DELETE_ALL_TERMS_SEGDIR   25

#define SQL_DELETE_SEGDIR_RANGE       26

#define SQL_SELECT_ALL_LANGID         27

/*
** This function is used to obtain an SQLite prepared statement handle
** for the statement identified by the second argument. If successful,
** *pp is set to the requested statement handle and SQLITE_OK returned.
** Otherwise, an SQLite error code is returned and *pp is set to 0.
**
** If argument apVal is not NULL, then it must point to an array with
................................................................................
/* 21 */  "SELECT size FROM %Q.'%q_docsize' WHERE docid=?",
/* 22 */  "SELECT value FROM %Q.'%q_stat' WHERE id=0",
/* 23 */  "REPLACE INTO %Q.'%q_stat' VALUES(0,?)",
/* 24 */  "",
/* 25 */  "",

/* 26 */ "DELETE FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?",
/* 27 */ "SELECT DISTINCT level / (1024 * ?) FROM %Q.'%q_segdir'",

  };
  int rc = SQLITE_OK;
  sqlite3_stmt *pStmt;

  assert( SizeofArray(azSql)==SizeofArray(p->aStmt) );
  assert( eStmt<SizeofArray(azSql) && eStmt>=0 );
................................................................................
** Set *pnMax to the largest segment level in the database for the index
** iIndex.
**
** Segment levels are stored in the 'level' column of the %_segdir table.
**
** Return SQLITE_OK if successful, or an SQLite error code if not.
*/
static int fts3SegmentMaxLevel(
  Fts3Table *p, 
  int iLangid,
  int iIndex, 
  int *pnMax
){
  sqlite3_stmt *pStmt;
  int rc;
  assert( iIndex>=0 && iIndex<p->nIndex );

  /* Set pStmt to the compiled version of:
  **
  **   SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?
  **
  ** (1024 is actually the value of macro FTS3_SEGDIR_PREFIXLEVEL_STR).
  */
  rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0);
  if( rc!=SQLITE_OK ) return rc;
  sqlite3_bind_int(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0));
  sqlite3_bind_int(pStmt, 2, 
      getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1)
  );
  if( SQLITE_ROW==sqlite3_step(pStmt) ){
    *pnMax = sqlite3_column_int(pStmt, 0);
  }
  return sqlite3_reset(pStmt);
}

/*
................................................................................
    ** segment. The level of the new segment is equal to the the numerically 
    ** greatest segment level currently present in the database for this
    ** index. The idx of the new segment is always 0.  */
    if( csr.nSegment==1 ){
      rc = SQLITE_DONE;
      goto finished;
    }
    rc = fts3SegmentMaxLevel(p, iLangid, iIndex, &iNewLevel);
    bIgnoreEmpty = 1;

  }else if( iLevel==FTS3_SEGCURSOR_PENDING ){
    iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, 0);
    rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, 0, &iIdx);
  }else{
    /* This call is to merge all segments at level iLevel. find the next
................................................................................
}

/*
** Merge the entire database so that there is one segment for each 
** iIndex/iLangid combination.
*/
static int fts3DoOptimize(Fts3Table *p, int bReturnDone){

  int bSeenDone = 0;
  int rc;
  sqlite3_stmt *pAllLangid = 0;

  rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0);
  if( rc==SQLITE_OK ){
    int rc2;
    sqlite3_bind_int(pAllLangid, 1, p->nIndex);
    while( sqlite3_step(pAllLangid)==SQLITE_ROW ){
      int i;
      int iLangid = sqlite3_column_int(pAllLangid, 0);
      for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){
        rc = fts3SegmentMerge(p, iLangid, i, FTS3_SEGCURSOR_ALL);
        if( rc==SQLITE_DONE ){
          bSeenDone = 1;
          rc = SQLITE_OK;
        }
      }
    }
    rc2 = sqlite3_reset(pAllLangid);
    if( rc==SQLITE_OK ) rc = rc2;
  }

  sqlite3Fts3SegmentsClose(p);
  sqlite3Fts3PendingTermsClear(p);

  return (rc==SQLITE_OK && bReturnDone && bSeenDone) ? SQLITE_DONE : rc;
}

/*

Changes to test/fts4langid.test.

230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
















249
250
251
252
253
254
255
256
257
258
259
}

do_test 2.0 { 
  reset_db
  build_multilingual_db_1 db
} {}

proc do_test_2.1 {tn query res_script} {
  for {set langid 0} {$langid < 10} {incr langid} {
    rowid_list_set_langid $langid
    set res [eval $res_script]

    set actual [
      execsql {SELECT docid FROM t2 WHERE t2 MATCH $query AND l = $langid}
    ]
    do_test 2.1.$tn.$langid [list set {} $actual] $res
  }
}

















do_test_2.1 1  {delta}          { rowid_list delta }
do_test_2.1 2  {"zero one two"} { rowid_list "zero one two" }
do_test_2.1 3  {zero one two} {
  and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
}
do_test_2.1 4  {"zero one" OR "one two"} {
  or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
}

finish_test








|







|



>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
|


|





230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
}

do_test 2.0 { 
  reset_db
  build_multilingual_db_1 db
} {}

proc do_test_2 {tn query res_script} {
  for {set langid 0} {$langid < 10} {incr langid} {
    rowid_list_set_langid $langid
    set res [eval $res_script]

    set actual [
      execsql {SELECT docid FROM t2 WHERE t2 MATCH $query AND l = $langid}
    ]
    do_test 2.$tn.$langid [list set {} $actual] $res
  }
}

# Run some queries. 
do_test_2 1.1  {delta}          { rowid_list delta }
do_test_2 1.2  {"zero one two"} { rowid_list "zero one two" }
do_test_2 1.3  {zero one two} {
  and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
}
do_test_2 1.4  {"zero one" OR "one two"} {
  or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
}

# Now try the same tests as above, but after running the 'optimize'
# command on the FTS table.
do_execsql_test 2.2 {
  INSERT INTO t2(t2) VALUES('optimize');
  SELECT count(*) FROM t2_segdir;
} {9}
do_test_2 2.1  {delta}          { rowid_list delta }
do_test_2 2.2  {"zero one two"} { rowid_list "zero one two" }
do_test_2 2.3  {zero one two} {
  and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
}
do_test_2 2.4  {"zero one" OR "one two"} {
  or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
}

finish_test