/ Check-in [bc3a2ed5]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add a test to check that the new multi-token phrase optimization is actually helping.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts4-docid-range-constraints
Files: files | file ages | folders
SHA1: bc3a2ed5fb2402805928b0002457eebf06f87c47
User & Date: dan 2013-10-02 08:04:27
Context
2013-10-03
19:27
Allow FTS4 multi-token phrases to use a combination of in-memory and incrementally loaded doclists. This allows phrases to (partially) benefit from incremental doclists without disabling the deferred token optimization. check-in: f6819c5f user: dan tags: fts4-docid-range-constraints
2013-10-02
08:04
Add a test to check that the new multi-token phrase optimization is actually helping. check-in: bc3a2ed5 user: dan tags: fts4-docid-range-constraints
2013-10-01
20:10
Merge trunk changes with this branch. check-in: 65d9c6fa user: dan tags: fts4-docid-range-constraints
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3.c.

4042
4043
4044
4045
4046
4047
4048




4049
4050
4051
4052
4053
4054
4055
....
4261
4262
4263
4264
4265
4266
4267

4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
  ** possible if the bOptOk argument is true, the FTS doclists will be
  ** scanned in forward order, and the phrase consists of 
  ** MAX_INCR_PHRASE_TOKENS or fewer tokens, none of which are are "^first"
  ** tokens or prefix tokens that cannot use a prefix-index.  */
  int bIncrOk = (bOptOk 
   && pCsr->bDesc==pTab->bDescIdx 
   && p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0




  );
  for(i=0; bIncrOk==1 && i<p->nToken; i++){
    Fts3PhraseToken *pToken = &p->aToken[i];
    if( pToken->bFirst || !pToken->pSegcsr || !pToken->pSegcsr->bLookup ){
      bIncrOk = 0;
    }
  }
................................................................................
            }
          }
        }
      }

      /* Check if the current entries really are a phrase match */
      if( bEof==0 ){

        int nByte = a[p->nToken-1].nList;
        char *aDoclist = sqlite3_malloc(nByte+1);
        if( !aDoclist ) return SQLITE_NOMEM;
        memcpy(aDoclist, a[p->nToken-1].pList, nByte+1);

        int nList;
        for(i=0; i<(p->nToken-1); i++){
          char *pLeft = a[i].pList;
          char *pRight = aDoclist;
          char *pOut = aDoclist;
          int nDist = p->nToken-1-i;
          int res = fts3PoslistPhraseMerge(&pOut, nDist, 0, 1, &pLeft, &pRight);
          if( res==0 ) break;







>
>
>
>







 







>





<







4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
....
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277

4278
4279
4280
4281
4282
4283
4284
  ** possible if the bOptOk argument is true, the FTS doclists will be
  ** scanned in forward order, and the phrase consists of 
  ** MAX_INCR_PHRASE_TOKENS or fewer tokens, none of which are are "^first"
  ** tokens or prefix tokens that cannot use a prefix-index.  */
  int bIncrOk = (bOptOk 
   && pCsr->bDesc==pTab->bDescIdx 
   && p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0
   && p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0
#ifdef SQLITE_TEST
   && pTab->bNoIncrDoclist==0
#endif
  );
  for(i=0; bIncrOk==1 && i<p->nToken; i++){
    Fts3PhraseToken *pToken = &p->aToken[i];
    if( pToken->bFirst || !pToken->pSegcsr || !pToken->pSegcsr->bLookup ){
      bIncrOk = 0;
    }
  }
................................................................................
            }
          }
        }
      }

      /* Check if the current entries really are a phrase match */
      if( bEof==0 ){
        int nList = 0;
        int nByte = a[p->nToken-1].nList;
        char *aDoclist = sqlite3_malloc(nByte+1);
        if( !aDoclist ) return SQLITE_NOMEM;
        memcpy(aDoclist, a[p->nToken-1].pList, nByte+1);


        for(i=0; i<(p->nToken-1); i++){
          char *pLeft = a[i].pList;
          char *pRight = aDoclist;
          char *pOut = aDoclist;
          int nDist = p->nToken-1-i;
          int res = fts3PoslistPhraseMerge(&pOut, nDist, 0, 1, &pLeft, &pRight);
          if( res==0 ) break;

Changes to ext/fts3/fts3Int.h.

263
264
265
266
267
268
269






270
271
272
273
274
275
276
  ** methods of the virtual table are called at appropriate times.  These
  ** values do not contribute to FTS functionality; they are used for
  ** verifying the operation of the SQLite core.
  */
  int inTransaction;     /* True after xBegin but before xCommit/xRollback */
  int mxSavepoint;       /* Largest valid xSavepoint integer */
#endif






};

/*
** When the core wants to read from the virtual table, it creates a
** virtual table cursor (an instance of the following structure) using
** the xOpen method. Cursors are destroyed using the xClose method.
*/







>
>
>
>
>
>







263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
  ** methods of the virtual table are called at appropriate times.  These
  ** values do not contribute to FTS functionality; they are used for
  ** verifying the operation of the SQLite core.
  */
  int inTransaction;     /* True after xBegin but before xCommit/xRollback */
  int mxSavepoint;       /* Largest valid xSavepoint integer */
#endif

#ifdef SQLITE_TEST
  /* True to disable the incremental doclist optimization. This is controled
  ** by special insert command 'test-no-incr-doclist'.  */
  int bNoIncrDoclist;
#endif
};

/*
** When the core wants to read from the virtual table, it creates a
** virtual table cursor (an instance of the following structure) using
** the xOpen method. Cursors are destroyed using the xClose method.
*/

Changes to ext/fts3/fts3_write.c.

5045
5046
5047
5048
5049
5050
5051



5052
5053
5054
5055
5056
5057
5058
    rc = fts3DoAutoincrmerge(p, &zVal[10]);
#ifdef SQLITE_TEST
  }else if( nVal>9 && 0==sqlite3_strnicmp(zVal, "nodesize=", 9) ){
    p->nNodeSize = atoi(&zVal[9]);
    rc = SQLITE_OK;
  }else if( nVal>11 && 0==sqlite3_strnicmp(zVal, "maxpending=", 9) ){
    p->nMaxPendingData = atoi(&zVal[11]);



    rc = SQLITE_OK;
#endif
  }else{
    rc = SQLITE_ERROR;
  }

  return rc;







>
>
>







5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
    rc = fts3DoAutoincrmerge(p, &zVal[10]);
#ifdef SQLITE_TEST
  }else if( nVal>9 && 0==sqlite3_strnicmp(zVal, "nodesize=", 9) ){
    p->nNodeSize = atoi(&zVal[9]);
    rc = SQLITE_OK;
  }else if( nVal>11 && 0==sqlite3_strnicmp(zVal, "maxpending=", 9) ){
    p->nMaxPendingData = atoi(&zVal[11]);
    rc = SQLITE_OK;
  }else if( nVal>21 && 0==sqlite3_strnicmp(zVal, "test-no-incr-doclist=", 21) ){
    p->bNoIncrDoclist = atoi(&zVal[21]);
    rc = SQLITE_OK;
#endif
  }else{
    rc = SQLITE_ERROR;
  }

  return rc;

Added test/fts4incr.test.











































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# 2012 March 26
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl
source $testdir/fts3_common.tcl
set ::testprefix fts4docid

# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts3 {
  finish_test
  return
}

# Create the fts_kjv_genesis procedure which fills and FTS3/4 table 
# with the complete text of the Book of Genesis.
#
source $testdir/genesis.tcl

do_test 1.0 {
  execsql { CREATE VIRTUAL TABLE t1 USING fts3(words) }
  fts_kjv_genesis
} {}

do_execsql_test 1.1 {
  SELECT min(docid), max(docid) FROM t1;
} {1001001 1050026}

foreach {tn q res} {
  1 { SELECT count(*) FROM t1 WHERE t1 MATCH 'and' AND docid < 1010000} 224
  2 { SELECT count(*) FROM t1 WHERE t1 MATCH '"in the"' AND docid < 1010000} 47
  3 { SELECT count(*) FROM t1 WHERE t1 MATCH '"And God"' AND docid < 1010000} 33
  4 { SELECT count(*) FROM t1 WHERE t1 
      MATCH '"land of canaan"' AND docid < 1030000 } 7
} {
  foreach s {0 1} {
    execsql "INSERT INTO t1(t1) VALUES('test-no-incr-doclist=$s')"
    do_execsql_test 2.$tn.$s $q $res
    set t($s) [lindex [time [list execsql $q] 100] 0]
  }
  puts "with optimization: $t(0)    without: $t(1)"
}

finish_test