SQLite

Check-in [65fa693729]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix the FTS 'optimize' command on multi-lingual databases.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | fts4-languageid
Files: files | file ages | folders
SHA1: 65fa693729a336e4d905ce72e6b9ccf4faa772bd
User & Date: dan 2012-03-02 12:26:19.396
Context
2012-03-02
16:18
Add test for FTS 'rebuild' command. (check-in: 181bc35731 user: dan tags: fts4-languageid)
12:26
Fix the FTS 'optimize' command on multi-lingual databases. (check-in: 65fa693729 user: dan tags: fts4-languageid)
11:48
Fix a bug in merging FTS language tables for languages other than language 0. (check-in: d281cb8984 user: dan tags: fts4-languageid)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts3/fts3Int.h.
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
  sqlite3_tokenizer *pTokenizer;  /* tokenizer for inserts and queries */
  char *zContentTbl;              /* content=xxx option, or NULL */
  char *zLanguageid;              /* languageid=xxx option, or NULL */

  /* Precompiled statements used by the implementation. Each of these 
  ** statements is run and reset within a single virtual table API call. 
  */
  sqlite3_stmt *aStmt[27];

  char *zReadExprlist;
  char *zWriteExprlist;

  int nNodeSize;                  /* Soft limit for node size */
  u8 bHasStat;                    /* True if %_stat table exists */
  u8 bHasDocsize;                 /* True if %_docsize table exists */







|







193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
  sqlite3_tokenizer *pTokenizer;  /* tokenizer for inserts and queries */
  char *zContentTbl;              /* content=xxx option, or NULL */
  char *zLanguageid;              /* languageid=xxx option, or NULL */

  /* Precompiled statements used by the implementation. Each of these 
  ** statements is run and reset within a single virtual table API call. 
  */
  sqlite3_stmt *aStmt[28];

  char *zReadExprlist;
  char *zWriteExprlist;

  int nNodeSize;                  /* Soft limit for node size */
  u8 bHasStat;                    /* True if %_stat table exists */
  u8 bHasDocsize;                 /* True if %_docsize table exists */
Changes to ext/fts3/fts3_write.c.
228
229
230
231
232
233
234


235
236
237
238
239
240
241
#define SQL_REPLACE_DOCTOTAL          23

#define SQL_SELECT_ALL_PREFIX_LEVEL   24
#define SQL_DELETE_ALL_TERMS_SEGDIR   25

#define SQL_DELETE_SEGDIR_RANGE       26



/*
** This function is used to obtain an SQLite prepared statement handle
** for the statement identified by the second argument. If successful,
** *pp is set to the requested statement handle and SQLITE_OK returned.
** Otherwise, an SQLite error code is returned and *pp is set to 0.
**
** If argument apVal is not NULL, then it must point to an array with







>
>







228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
#define SQL_REPLACE_DOCTOTAL          23

#define SQL_SELECT_ALL_PREFIX_LEVEL   24
#define SQL_DELETE_ALL_TERMS_SEGDIR   25

#define SQL_DELETE_SEGDIR_RANGE       26

#define SQL_SELECT_ALL_LANGID         27

/*
** This function is used to obtain an SQLite prepared statement handle
** for the statement identified by the second argument. If successful,
** *pp is set to the requested statement handle and SQLITE_OK returned.
** Otherwise, an SQLite error code is returned and *pp is set to 0.
**
** If argument apVal is not NULL, then it must point to an array with
281
282
283
284
285
286
287

288
289
290
291
292
293
294
/* 21 */  "SELECT size FROM %Q.'%q_docsize' WHERE docid=?",
/* 22 */  "SELECT value FROM %Q.'%q_stat' WHERE id=0",
/* 23 */  "REPLACE INTO %Q.'%q_stat' VALUES(0,?)",
/* 24 */  "",
/* 25 */  "",

/* 26 */ "DELETE FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?",


  };
  int rc = SQLITE_OK;
  sqlite3_stmt *pStmt;

  assert( SizeofArray(azSql)==SizeofArray(p->aStmt) );
  assert( eStmt<SizeofArray(azSql) && eStmt>=0 );







>







283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
/* 21 */  "SELECT size FROM %Q.'%q_docsize' WHERE docid=?",
/* 22 */  "SELECT value FROM %Q.'%q_stat' WHERE id=0",
/* 23 */  "REPLACE INTO %Q.'%q_stat' VALUES(0,?)",
/* 24 */  "",
/* 25 */  "",

/* 26 */ "DELETE FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?",
/* 27 */ "SELECT DISTINCT level / (1024 * ?) FROM %Q.'%q_segdir'",

  };
  int rc = SQLITE_OK;
  sqlite3_stmt *pStmt;

  assert( SizeofArray(azSql)==SizeofArray(p->aStmt) );
  assert( eStmt<SizeofArray(azSql) && eStmt>=0 );
2225
2226
2227
2228
2229
2230
2231
2232





2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246


2247
2248
2249
2250
2251
2252
2253
** Set *pnMax to the largest segment level in the database for the index
** iIndex.
**
** Segment levels are stored in the 'level' column of the %_segdir table.
**
** Return SQLITE_OK if successful, or an SQLite error code if not.
*/
static int fts3SegmentMaxLevel(Fts3Table *p, int iIndex, int *pnMax){





  sqlite3_stmt *pStmt;
  int rc;
  assert( iIndex>=0 && iIndex<p->nIndex );

  /* Set pStmt to the compiled version of:
  **
  **   SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?
  **
  ** (1024 is actually the value of macro FTS3_SEGDIR_PREFIXLEVEL_STR).
  */
  rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0);
  if( rc!=SQLITE_OK ) return rc;
  sqlite3_bind_int(pStmt, 1, iIndex*FTS3_SEGDIR_MAXLEVEL);
  sqlite3_bind_int(pStmt, 2, (iIndex+1)*FTS3_SEGDIR_MAXLEVEL - 1);


  if( SQLITE_ROW==sqlite3_step(pStmt) ){
    *pnMax = sqlite3_column_int(pStmt, 0);
  }
  return sqlite3_reset(pStmt);
}

/*







|
>
>
>
>
>












|
|
>
>







2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
** Set *pnMax to the largest segment level in the database for the index
** iIndex.
**
** Segment levels are stored in the 'level' column of the %_segdir table.
**
** Return SQLITE_OK if successful, or an SQLite error code if not.
*/
static int fts3SegmentMaxLevel(
  Fts3Table *p, 
  int iLangid,
  int iIndex, 
  int *pnMax
){
  sqlite3_stmt *pStmt;
  int rc;
  assert( iIndex>=0 && iIndex<p->nIndex );

  /* Set pStmt to the compiled version of:
  **
  **   SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?
  **
  ** (1024 is actually the value of macro FTS3_SEGDIR_PREFIXLEVEL_STR).
  */
  rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0);
  if( rc!=SQLITE_OK ) return rc;
  sqlite3_bind_int(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0));
  sqlite3_bind_int(pStmt, 2, 
      getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1)
  );
  if( SQLITE_ROW==sqlite3_step(pStmt) ){
    *pnMax = sqlite3_column_int(pStmt, 0);
  }
  return sqlite3_reset(pStmt);
}

/*
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
    ** segment. The level of the new segment is equal to the the numerically 
    ** greatest segment level currently present in the database for this
    ** index. The idx of the new segment is always 0.  */
    if( csr.nSegment==1 ){
      rc = SQLITE_DONE;
      goto finished;
    }
    rc = fts3SegmentMaxLevel(p, iIndex, &iNewLevel);
    bIgnoreEmpty = 1;

  }else if( iLevel==FTS3_SEGCURSOR_PENDING ){
    iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, 0);
    rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, 0, &iIdx);
  }else{
    /* This call is to merge all segments at level iLevel. find the next







|







2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
    ** segment. The level of the new segment is equal to the the numerically 
    ** greatest segment level currently present in the database for this
    ** index. The idx of the new segment is always 0.  */
    if( csr.nSegment==1 ){
      rc = SQLITE_DONE;
      goto finished;
    }
    rc = fts3SegmentMaxLevel(p, iLangid, iIndex, &iNewLevel);
    bIgnoreEmpty = 1;

  }else if( iLevel==FTS3_SEGCURSOR_PENDING ){
    iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, 0);
    rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, 0, &iIdx);
  }else{
    /* This call is to merge all segments at level iLevel. find the next
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024









3025
3026
3027
3028
3029
3030
3031





3032
3033
3034
3035
3036
3037
3038
}

/*
** Merge the entire database so that there is one segment for each 
** iIndex/iLangid combination.
*/
static int fts3DoOptimize(Fts3Table *p, int bReturnDone){
  int i;
  int bSeenDone = 0;
  int rc = SQLITE_OK;









  for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){
    rc = fts3SegmentMerge(p, 0, i, FTS3_SEGCURSOR_ALL);
    if( rc==SQLITE_DONE ){
      bSeenDone = 1;
      rc = SQLITE_OK;
    }
  }





  sqlite3Fts3SegmentsClose(p);
  sqlite3Fts3PendingTermsClear(p);

  return (rc==SQLITE_OK && bReturnDone && bSeenDone) ? SQLITE_DONE : rc;
}

/*







<

|
>
>
>
>
>
>
>
>
>
|
|
|
|
|
|
|
>
>
>
>
>







3025
3026
3027
3028
3029
3030
3031

3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
}

/*
** Merge the entire database so that there is one segment for each 
** iIndex/iLangid combination.
*/
static int fts3DoOptimize(Fts3Table *p, int bReturnDone){

  int bSeenDone = 0;
  int rc;
  sqlite3_stmt *pAllLangid = 0;

  rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0);
  if( rc==SQLITE_OK ){
    int rc2;
    sqlite3_bind_int(pAllLangid, 1, p->nIndex);
    while( sqlite3_step(pAllLangid)==SQLITE_ROW ){
      int i;
      int iLangid = sqlite3_column_int(pAllLangid, 0);
      for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){
        rc = fts3SegmentMerge(p, iLangid, i, FTS3_SEGCURSOR_ALL);
        if( rc==SQLITE_DONE ){
          bSeenDone = 1;
          rc = SQLITE_OK;
        }
      }
    }
    rc2 = sqlite3_reset(pAllLangid);
    if( rc==SQLITE_OK ) rc = rc2;
  }

  sqlite3Fts3SegmentsClose(p);
  sqlite3Fts3PendingTermsClear(p);

  return (rc==SQLITE_OK && bReturnDone && bSeenDone) ? SQLITE_DONE : rc;
}

/*
Changes to test/fts4langid.test.
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
















249
250
251
252
253
254
255
256
257
258
259
}

do_test 2.0 { 
  reset_db
  build_multilingual_db_1 db
} {}

proc do_test_2.1 {tn query res_script} {
  for {set langid 0} {$langid < 10} {incr langid} {
    rowid_list_set_langid $langid
    set res [eval $res_script]

    set actual [
      execsql {SELECT docid FROM t2 WHERE t2 MATCH $query AND l = $langid}
    ]
    do_test 2.1.$tn.$langid [list set {} $actual] $res
  }
}

















do_test_2.1 1  {delta}          { rowid_list delta }
do_test_2.1 2  {"zero one two"} { rowid_list "zero one two" }
do_test_2.1 3  {zero one two} {
  and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
}
do_test_2.1 4  {"zero one" OR "one two"} {
  or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
}

finish_test








|







|



>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
|


|





230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
}

do_test 2.0 { 
  reset_db
  build_multilingual_db_1 db
} {}

proc do_test_2 {tn query res_script} {
  for {set langid 0} {$langid < 10} {incr langid} {
    rowid_list_set_langid $langid
    set res [eval $res_script]

    set actual [
      execsql {SELECT docid FROM t2 WHERE t2 MATCH $query AND l = $langid}
    ]
    do_test 2.$tn.$langid [list set {} $actual] $res
  }
}

# Run some queries. 
do_test_2 1.1  {delta}          { rowid_list delta }
do_test_2 1.2  {"zero one two"} { rowid_list "zero one two" }
do_test_2 1.3  {zero one two} {
  and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
}
do_test_2 1.4  {"zero one" OR "one two"} {
  or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
}

# Now try the same tests as above, but after running the 'optimize'
# command on the FTS table.
do_execsql_test 2.2 {
  INSERT INTO t2(t2) VALUES('optimize');
  SELECT count(*) FROM t2_segdir;
} {9}
do_test_2 2.1  {delta}          { rowid_list delta }
do_test_2 2.2  {"zero one two"} { rowid_list "zero one two" }
do_test_2 2.3  {zero one two} {
  and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
}
do_test_2 2.4  {"zero one" OR "one two"} {
  or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
}

finish_test