SQLite

Check-in [fa0f2f0e3e]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix a performance problem in the FTS4 auxiliary functions triggered by an OR clause in the full-text query.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: fa0f2f0e3e79ae653118b901e1cca7725dfaf249
User & Date: dan 2013-09-30 18:14:45.059
Context
2013-09-30
19:05
Add some timing tests to the amatch test script. (check-in: ad71c72be5 user: drh tags: trunk)
18:16
Merge trunk changes with this branch. (check-in: e294a9c7c5 user: dan tags: fts4-docid-range-constraints)
18:14
Fix a performance problem in the FTS4 auxiliary functions triggered by an OR clause in the full-text query. (check-in: fa0f2f0e3e user: dan tags: trunk)
17:37
Fix memory leaks in the amatch extension. Add a few simple test cases. (check-in: 604134732e user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts3/fts3.c.
5264
5265
5266
5267
5268
5269
5270

5271
5272

5273

5274
5275
5276
5277



5278
5279


5280
5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298


5299


5300


5301

5302

5303
5304
5305
5306

5307
5308
















5309
5310
5311
5312
5313
5314
5315




5316
5317
5318
5319
5320

5321
5322
5323
5324
5325
5326
5327
    return SQLITE_OK;
  }

  iDocid = pExpr->iDocid;
  pIter = pPhrase->doclist.pList;
  if( iDocid!=pCsr->iPrevId || pExpr->bEof ){
    int bDescDoclist = pTab->bDescIdx;      /* For DOCID_CMP macro */

    int bOr = 0;
    u8 bEof = 0;

    Fts3Expr *p;


    /* Check if this phrase descends from an OR expression node. If not, 
    ** return NULL. Otherwise, the entry that corresponds to docid 
    ** pCsr->iPrevId may lie earlier in the doclist buffer. */



    for(p=pExpr->pParent; p; p=p->pParent){
      if( p->eType==FTSQUERY_OR ) bOr = 1;


    }
    if( bOr==0 ) return SQLITE_OK;

    /* This is the descendent of an OR node. In this case we cannot use
    ** an incremental phrase. Load the entire doclist for the phrase
    ** into memory in this case.  */
    if( pPhrase->bIncr ){
      int rc = SQLITE_OK;
      int bEofSave = pExpr->bEof;
      fts3EvalRestart(pCsr, pExpr, &rc);
      while( rc==SQLITE_OK && !pExpr->bEof ){
        fts3EvalNextRow(pCsr, pExpr, &rc);
        if( bEofSave==0 && pExpr->iDocid==iDocid ) break;
      }
      pIter = pPhrase->doclist.pList;
      assert( rc!=SQLITE_OK || pPhrase->bIncr==0 );
      if( rc!=SQLITE_OK ) return rc;
    }



    if( pExpr->bEof ){


      pIter = 0;


      iDocid = 0;

    }

    bEof = (pPhrase->doclist.nAll==0);
    assert( bDescDoclist==0 || bDescDoclist==1 );
    assert( pCsr->bDesc==0 || pCsr->bDesc==1 );


    if( pCsr->bDesc==bDescDoclist ){
      int dummy;
















      while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)>0 ) && bEof==0 ){
        sqlite3Fts3DoclistPrev(
            bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll, 
            &pIter, &iDocid, &dummy, &bEof
        );
      }
    }else{




      while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)<0 ) && bEof==0 ){
        sqlite3Fts3DoclistNext(
            bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll, 
            &pIter, &iDocid, &bEof
        );

      }
    }

    if( bEof || iDocid!=pCsr->iPrevId ) pIter = 0;
  }
  if( pIter==0 ) return SQLITE_OK;








>


>
|
>



|
>
>
>


>
>


















|
>
>
|
>
>
|
>
>
|
>

>




>
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
|
|
|
|
|
>
>
>
>
|
|
|
|
|
>







5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
    return SQLITE_OK;
  }

  iDocid = pExpr->iDocid;
  pIter = pPhrase->doclist.pList;
  if( iDocid!=pCsr->iPrevId || pExpr->bEof ){
    int bDescDoclist = pTab->bDescIdx;      /* For DOCID_CMP macro */
    int iMul;                     /* +1 if csr dir matches index dir, else -1 */
    int bOr = 0;
    u8 bEof = 0;
    u8 bTreeEof = 0;
    Fts3Expr *p;                  /* Used to iterate from pExpr to root */
    Fts3Expr *pNear;              /* Most senior NEAR ancestor (or pExpr) */

    /* Check if this phrase descends from an OR expression node. If not, 
    ** return NULL. Otherwise, the entry that corresponds to docid 
    ** pCsr->iPrevId may lie earlier in the doclist buffer. Or, if the
    ** tree that the node is part of has been marked as EOF, but the node
    ** itself is not EOF, then it may point to an earlier entry. */
    pNear = pExpr;
    for(p=pExpr->pParent; p; p=p->pParent){
      if( p->eType==FTSQUERY_OR ) bOr = 1;
      if( p->eType==FTSQUERY_NEAR ) pNear = p;
      if( p->bEof ) bTreeEof = 1;
    }
    if( bOr==0 ) return SQLITE_OK;

    /* This is the descendent of an OR node. In this case we cannot use
    ** an incremental phrase. Load the entire doclist for the phrase
    ** into memory in this case.  */
    if( pPhrase->bIncr ){
      int rc = SQLITE_OK;
      int bEofSave = pExpr->bEof;
      fts3EvalRestart(pCsr, pExpr, &rc);
      while( rc==SQLITE_OK && !pExpr->bEof ){
        fts3EvalNextRow(pCsr, pExpr, &rc);
        if( bEofSave==0 && pExpr->iDocid==iDocid ) break;
      }
      pIter = pPhrase->doclist.pList;
      assert( rc!=SQLITE_OK || pPhrase->bIncr==0 );
      if( rc!=SQLITE_OK ) return rc;
    }
    
    iMul = ((pCsr->bDesc==bDescDoclist) ? 1 : -1);
    while( bTreeEof==1 
        && pNear->bEof==0
        && (DOCID_CMP(pNear->iDocid, pCsr->iPrevId) * iMul)<0
    ){
      int rc = SQLITE_OK;
      fts3EvalNextRow(pCsr, pExpr, &rc);
      if( rc!=SQLITE_OK ) return rc;
      iDocid = pExpr->iDocid;
      pIter = pPhrase->doclist.pList;
    }

    bEof = (pPhrase->doclist.nAll==0);
    assert( bDescDoclist==0 || bDescDoclist==1 );
    assert( pCsr->bDesc==0 || pCsr->bDesc==1 );

    if( bEof==0 ){
      if( pCsr->bDesc==bDescDoclist ){
        int dummy;
        if( pNear->bEof ){
          /* This expression is already at EOF. So position it to point to the
          ** last entry in the doclist at pPhrase->doclist.aAll[]. Variable
          ** iDocid is already set for this entry, so all that is required is
          ** to set pIter to point to the first byte of the last position-list
          ** in the doclist. 
          **
          ** It would also be correct to set pIter and iDocid to zero. In
          ** this case, the first call to sqltie3Fts4DoclistPrev() below
          ** would also move the iterator to point to the last entry in the 
          ** doclist. However, this is expensive, as to do so it has to 
          ** iterate through the entire doclist from start to finish (since
          ** it does not know the docid for the last entry).  */
          pIter = &pPhrase->doclist.aAll[pPhrase->doclist.nAll-1];
          fts3ReversePoslist(pPhrase->doclist.aAll, &pIter);
        }
        while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)>0 ) && bEof==0 ){
          sqlite3Fts3DoclistPrev(
              bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll, 
              &pIter, &iDocid, &dummy, &bEof
          );
        }
      }else{
        if( pNear->bEof ){
          pIter = 0;
          iDocid = 0;
        }
        while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)<0 ) && bEof==0 ){
          sqlite3Fts3DoclistNext(
              bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll, 
              &pIter, &iDocid, &bEof
          );
        }
      }
    }

    if( bEof || iDocid!=pCsr->iPrevId ) pIter = 0;
  }
  if( pIter==0 ) return SQLITE_OK;

Changes to test/fts3snippet.test.
12
13
14
15
16
17
18

19
20
21
22
23
24
25
# The tests in this file test the FTS3 auxillary functions offsets(), 
# snippet() and matchinfo() work. At time of writing, running this file 
# provides full coverage of fts3_snippet.c.
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl


# If SQLITE_ENABLE_FTS3 is not defined, omit this file.
ifcapable !fts3 { finish_test ; return }
source $testdir/fts3_common.tcl

set sqlite_fts3_enable_parentheses 1
set DO_MALLOC_TEST 0







>







12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# The tests in this file test the FTS3 auxillary functions offsets(), 
# snippet() and matchinfo() work. At time of writing, running this file 
# provides full coverage of fts3_snippet.c.
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl
set testprefix fts3snippet

# If SQLITE_ENABLE_FTS3 is not defined, omit this file.
ifcapable !fts3 { finish_test ; return }
source $testdir/fts3_common.tcl

set sqlite_fts3_enable_parentheses 1
set DO_MALLOC_TEST 0
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
  forcedelete test.db
  sqlite3 db test.db
  sqlite3_db_config_lookaside db 0 0 0
  db eval "PRAGMA encoding = \"$enc\""

  # Set variable $T to the test name prefix for this iteration of the loop.
  #
  set T "fts3snippet-$enc"

  ##########################################################################
  # Test the offset function.
  #
  do_test $T.1.1 {
    execsql {
      CREATE VIRTUAL TABLE ft USING fts3;







|







135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
  forcedelete test.db
  sqlite3 db test.db
  sqlite3_db_config_lookaside db 0 0 0
  db eval "PRAGMA encoding = \"$enc\""

  # Set variable $T to the test name prefix for this iteration of the loop.
  #
  set T "fts3snippet-1.$enc"

  ##########################################################################
  # Test the offset function.
  #
  do_test $T.1.1 {
    execsql {
      CREATE VIRTUAL TABLE ft USING fts3;
454
455
456
457
458
459
460
461




























































462
463
  do_select_test $T.10.5 {
    SELECT length(matchinfo(ft)), typeof(matchinfo(ft)) FROM ft;
  } {0 blob 0 blob 0 blob}
  do_select_test $T.10.6 {
    SELECT length(matchinfo(ft)), typeof(matchinfo(ft)) FROM ft WHERE rowid = $r
  } {0 blob}
}





























































set sqlite_fts3_enable_parentheses 0
finish_test








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
  do_select_test $T.10.5 {
    SELECT length(matchinfo(ft)), typeof(matchinfo(ft)) FROM ft;
  } {0 blob 0 blob 0 blob}
  do_select_test $T.10.6 {
    SELECT length(matchinfo(ft)), typeof(matchinfo(ft)) FROM ft WHERE rowid = $r
  } {0 blob}
}

#-------------------------------------------------------------------------
# Test an interaction between the snippet() function and OR clauses.
#
do_execsql_test 2.1 {
  CREATE VIRTUAL TABLE t2 USING fts4;
  INSERT INTO t2 VALUES('one two three four five');
  INSERT INTO t2 VALUES('two three four five one');
  INSERT INTO t2 VALUES('three four five one two');
  INSERT INTO t2 VALUES('four five one two three');
  INSERT INTO t2 VALUES('five one two three four');
}

do_execsql_test 2.2 {
  SELECT snippet(t2, '[', ']') FROM t2 WHERE t2 MATCH 'one OR (four AND six)'
} {
  {[one] two three [four] five}
  {two three [four] five [one]}
  {three [four] five [one] two}
  {[four] five [one] two three}
  {five [one] two three [four]}
}

do_execsql_test 2.3 {
  SELECT snippet(t2, '[', ']') FROM t2 
  WHERE t2 MATCH 'one OR (four AND six)' 
  ORDER BY docid DESC
} {
  {five [one] two three [four]}
  {[four] five [one] two three}
  {three [four] five [one] two}
  {two three [four] five [one]}
  {[one] two three [four] five}
}

do_execsql_test 2.4 {
  INSERT INTO t2 VALUES('six');
}

do_execsql_test 2.5 {
  SELECT snippet(t2, '[', ']') FROM t2 WHERE t2 MATCH 'one OR (four AND six)'
} {
  {[one] two three [four] five}
  {two three [four] five [one]}
  {three [four] five [one] two}
  {[four] five [one] two three}
  {five [one] two three [four]}
}

do_execsql_test 2.6 {
  SELECT snippet(t2, '[', ']') FROM t2 
  WHERE t2 MATCH 'one OR (four AND six)' 
  ORDER BY docid DESC
} {
  {five [one] two three [four]}
  {[four] five [one] two three}
  {three [four] five [one] two}
  {two three [four] five [one]}
  {[one] two three [four] five}
}

set sqlite_fts3_enable_parentheses 0
finish_test