SQLite

Check-in [3b5ccd2682]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fixes to problems in FTS3 snippet() function found by th3 tests.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 3b5ccd2682176929f4da8a3f39a7e8f58b179f18
User & Date: dan 2010-01-07 10:54:29.000
Context
2010-01-07
11:27
Changes to test code so that testfixture compiles when OMIT_SHARED_CACHE and OMIT_UTF16 are defined. (check-in: d6ee5ff6c8 user: dan tags: trunk)
10:54
Fixes to problems in FTS3 snippet() function found by th3 tests. (check-in: 3b5ccd2682 user: dan tags: trunk)
03:53
Another attempt at fixing the table generator in lemon. Again, this does not effect the SQLite grammar. (check-in: e22c090f35 user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts3/fts3.c.
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
  sqlite3_value **apVal           /* Array of arguments */
){
  Fts3Cursor *pCsr;               /* Cursor handle passed through apVal[0] */
  const char *zStart = "<b>";
  const char *zEnd = "</b>";
  const char *zEllipsis = "<b>...</b>";
  int iCol = -1;
  int nToken = 15;

  /* There must be at least one argument passed to this function (otherwise
  ** the non-overloaded version would have been called instead of this one).
  */
  assert( nVal>=1 );

  if( nVal>6 ){







|







2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
  sqlite3_value **apVal           /* Array of arguments */
){
  Fts3Cursor *pCsr;               /* Cursor handle passed through apVal[0] */
  const char *zStart = "<b>";
  const char *zEnd = "</b>";
  const char *zEllipsis = "<b>...</b>";
  int iCol = -1;
  int nToken = 15;                /* Default number of tokens in snippet */

  /* There must be at least one argument passed to this function (otherwise
  ** the non-overloaded version would have been called instead of this one).
  */
  assert( nVal>=1 );

  if( nVal>6 ){
Changes to ext/fts3/fts3_snippet.c.
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
static int fts3ExprIterate(
  Fts3Expr *pExpr,                /* Expression to iterate phrases of */
  int (*x)(Fts3Expr *, void *),   /* Callback function to invoke for phrases */
  void *pCtx                      /* Second argument to pass to callback */
){
  int rc;
  int eType = pExpr->eType;
  if( eType==FTSQUERY_NOT ){
    rc = SQLITE_OK;
  }else if( eType!=FTSQUERY_PHRASE ){
    assert( pExpr->pLeft && pExpr->pRight );
    rc = fts3ExprIterate(pExpr->pLeft, x, pCtx);
    if( rc==SQLITE_OK ){
      rc = fts3ExprIterate(pExpr->pRight, x, pCtx);
    }
  }else{
    rc = x(pExpr, pCtx);
  }
  return rc;
}







<
<
|


|







41
42
43
44
45
46
47


48
49
50
51
52
53
54
55
56
57
58
static int fts3ExprIterate(
  Fts3Expr *pExpr,                /* Expression to iterate phrases of */
  int (*x)(Fts3Expr *, void *),   /* Callback function to invoke for phrases */
  void *pCtx                      /* Second argument to pass to callback */
){
  int rc;
  int eType = pExpr->eType;


  if( eType!=FTSQUERY_PHRASE ){
    assert( pExpr->pLeft && pExpr->pRight );
    rc = fts3ExprIterate(pExpr->pLeft, x, pCtx);
    if( rc==SQLITE_OK && eType!=FTSQUERY_NOT ){
      rc = fts3ExprIterate(pExpr->pRight, x, pCtx);
    }
  }else{
    rc = x(pExpr, pCtx);
  }
  return rc;
}
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
  p->nPhrase++;
  p->nToken += pExpr->pPhrase->nToken;

  if( pExpr->isLoaded==0 ){
    rc = sqlite3Fts3ExprLoadDoclist(p->pTab, pExpr);
    pExpr->isLoaded = 1;
    if( rc==SQLITE_OK ){
      fts3ExprNearTrim(pExpr);
    }
  }

  return rc;
}

static int fts3ExprLoadDoclistsCb2(Fts3Expr *pExpr, void *ctx){







|







102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
  p->nPhrase++;
  p->nToken += pExpr->pPhrase->nToken;

  if( pExpr->isLoaded==0 ){
    rc = sqlite3Fts3ExprLoadDoclist(p->pTab, pExpr);
    pExpr->isLoaded = 1;
    if( rc==SQLITE_OK ){
      rc = fts3ExprNearTrim(pExpr);
    }
  }

  return rc;
}

static int fts3ExprLoadDoclistsCb2(Fts3Expr *pExpr, void *ctx){
455
456
457
458
459
460
461
462
463
464
465

466
467
468
469
470
471
472
473
474
475
476
477


478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521

522

523
524
525
526
527
528
529


530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548



549
550






551

552
553
554
555
556
557
558
559
560
561
562
563

564










565
566
567
568
569
570
571
      }
      pC->pTokenizer = pTab->pTokenizer;
      while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){
        const char *ZDUMMY; int DUMMY1, DUMMY2, DUMMY3;
        rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent);
      }
      pMod->xClose(pC);
      if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){
        return rc;
      }
      nShift = iCurrent-nSnippet;

      if( nShift>0 ){
        *piPos += nShift;
        *pHlmask = hlmask >> nShift;
      }
    }
  }
  return SQLITE_OK;
}

static int fts3SnippetText(
  Fts3Cursor *pCsr,               /* FTS3 Cursor */
  SnippetFragment *pFragment,     /* Snippet to extract */


  int nSnippet,                   /* Number of tokens in extracted snippet */
  const char *zOpen,              /* String inserted before highlighted term */
  const char *zClose,             /* String inserted after highlighted term */
  const char *zEllipsis,
  StrBuffer *pOut
){
  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
  int rc;                         /* Return code */
  const char *zDoc;               /* Document text to extract snippet from */
  int nDoc;                       /* Size of zDoc in bytes */
  int iCurrent = 0;               /* Current token number of document */
  int iStart = 0;                 /* Byte offset of current token */
  int iEnd = 0;                   /* Byte offset of end of current token */
  int isShiftDone = 0;
  int iPos = pFragment->iPos;
  u64 hlmask = pFragment->hlmask;

  sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */
  sqlite3_tokenizer_cursor *pC;   /* Tokenizer cursor open on zDoc/nDoc */
  const char *ZDUMMY;             /* Dummy arguments used with tokenizer */
  int DUMMY1, DUMMY2, DUMMY3;     /* Dummy arguments used with tokenizer */
  
  zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, pFragment->iCol+1);
  if( zDoc==0 ){
    if( sqlite3_column_type(pCsr->pStmt, pFragment->iCol+1)!=SQLITE_NULL ){
      return SQLITE_NOMEM;
    }
    return SQLITE_OK;
  }
  nDoc = sqlite3_column_bytes(pCsr->pStmt, pFragment->iCol+1);

  /* Open a token cursor on the document. Read all tokens up to and 
  ** including token iPos (the first token of the snippet). Set variable
  ** iStart to the byte offset in zDoc of the start of token iPos.
  */
  pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
  rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);
  if( rc!=SQLITE_OK ){
    return rc;
  }
  pC->pTokenizer = pTab->pTokenizer;

  while( rc==SQLITE_OK ){
    int iBegin;

    int iFin;

    rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent);

    if( rc==SQLITE_OK ){
      if( iCurrent<iPos ) continue;

      if( !isShiftDone ){
        int n = nDoc - iBegin;


        rc = fts3SnippetShift(pTab, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask);
        if( rc!=SQLITE_OK || iCurrent<iPos ) continue;
      }
      if( iCurrent==iPos ){
        iStart = iEnd = iBegin;
      }

      if( iCurrent>=(iPos+nSnippet) ){
        rc = SQLITE_DONE;
      }else{
        iEnd = iFin;
        if( hlmask & ((u64)1 << (iCurrent-iPos)) ){
          if( fts3StringAppend(pOut, &zDoc[iStart], iBegin-iStart)
           || fts3StringAppend(pOut, zOpen, -1)
           || fts3StringAppend(pOut, &zDoc[iBegin], iEnd-iBegin)
           || fts3StringAppend(pOut, zClose, -1)
          ){
            rc = SQLITE_NOMEM;
          }



          iStart = iEnd;
        }






      }

    }
  }
  assert( rc!=SQLITE_OK );
  if( rc==SQLITE_DONE ){
    rc = fts3StringAppend(pOut, &zDoc[iStart], iEnd-iStart);
    if( rc==SQLITE_OK ){
      rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent);
      if( rc==SQLITE_DONE ){
        rc = fts3StringAppend(pOut, &zDoc[iEnd], -1);
      }else if( rc==SQLITE_OK && zEllipsis ){
        rc = fts3StringAppend(pOut, zEllipsis, -1);
      }

    }










  }

  pMod->xClose(pC);
  return rc;
}









|
<
|
|
>












>
>











<








|










|
<
<
<








|
>
|
>

|
|
<
|
<
|
>
>
|
<

|
<
|
<
|
<
<
<
<
<
<
<
<
<
<
|
>
>
>
|
|
>
>
>
>
>
>

>

|
<
|
<
|
<
<
<
<


>

>
>
>
>
>
>
>
>
>
>







453
454
455
456
457
458
459
460

461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488

489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508



509
510
511
512
513
514
515
516
517
518
519
520
521
522
523

524

525
526
527
528

529
530

531

532










533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548

549

550




551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
      }
      pC->pTokenizer = pTab->pTokenizer;
      while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){
        const char *ZDUMMY; int DUMMY1, DUMMY2, DUMMY3;
        rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent);
      }
      pMod->xClose(pC);
      if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; }


      nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet;
      assert( nShift<=nDesired );
      if( nShift>0 ){
        *piPos += nShift;
        *pHlmask = hlmask >> nShift;
      }
    }
  }
  return SQLITE_OK;
}

static int fts3SnippetText(
  Fts3Cursor *pCsr,               /* FTS3 Cursor */
  SnippetFragment *pFragment,     /* Snippet to extract */
  int iFragment,                  /* Fragment number */
  int isLast,                     /* True for final fragment in snippet */
  int nSnippet,                   /* Number of tokens in extracted snippet */
  const char *zOpen,              /* String inserted before highlighted term */
  const char *zClose,             /* String inserted after highlighted term */
  const char *zEllipsis,
  StrBuffer *pOut
){
  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
  int rc;                         /* Return code */
  const char *zDoc;               /* Document text to extract snippet from */
  int nDoc;                       /* Size of zDoc in bytes */
  int iCurrent = 0;               /* Current token number of document */

  int iEnd = 0;                   /* Byte offset of end of current token */
  int isShiftDone = 0;
  int iPos = pFragment->iPos;
  u64 hlmask = pFragment->hlmask;

  sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */
  sqlite3_tokenizer_cursor *pC;   /* Tokenizer cursor open on zDoc/nDoc */
  const char *ZDUMMY;             /* Dummy arguments used with tokenizer */
  int DUMMY1;                     /* Dummy arguments used with tokenizer */
  
  zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, pFragment->iCol+1);
  if( zDoc==0 ){
    if( sqlite3_column_type(pCsr->pStmt, pFragment->iCol+1)!=SQLITE_NULL ){
      return SQLITE_NOMEM;
    }
    return SQLITE_OK;
  }
  nDoc = sqlite3_column_bytes(pCsr->pStmt, pFragment->iCol+1);

  /* Open a token cursor on the document. */



  pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
  rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);
  if( rc!=SQLITE_OK ){
    return rc;
  }
  pC->pTokenizer = pTab->pTokenizer;

  while( rc==SQLITE_OK ){
    int iBegin;                   /* Offset in zDoc of start of token */
    int iFin;                     /* Offset in zDoc of end of token */
    int isHighlight;

    rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent);
    if( rc!=SQLITE_OK ){
      if( rc==SQLITE_DONE ){

        /* Special case - the last token of the snippet is also the last token

        ** of the column. Append any punctuation that occurred between the end
        ** of the previous token and the end of the document to the output. 
        ** Then break out of the loop. */
        rc = fts3StringAppend(pOut, &zDoc[iEnd], -1);

      }
      break;

    }

    if( iCurrent<iPos ){ continue; }











    if( !isShiftDone ){
      int n = nDoc - iBegin;
      rc = fts3SnippetShift(pTab, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask);
      isShiftDone = 1;

      /* Now that the shift has been done, check if the initial "..." are
      ** required. They are required if (a) this is not the first fragment,
      ** or (b) this fragment does not begin at position 0 of its column. 
      */
      if( rc==SQLITE_OK && (iPos>0 || iFragment>0) ){
        rc = fts3StringAppend(pOut, zEllipsis, -1);
      }
      if( rc!=SQLITE_OK || iCurrent<iPos ) continue;
    }


    if( iCurrent>=(iPos+nSnippet) ){

      if( isLast ){




        rc = fts3StringAppend(pOut, zEllipsis, -1);
      }
      break;
    }

    /* Set isHighlight to true if this term should be highlighted. */
    isHighlight = (hlmask & ((u64)1 << (iCurrent-iPos)))!=0;

    if( iCurrent>iPos ) rc = fts3StringAppend(pOut, &zDoc[iEnd], iBegin-iEnd);
    if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zOpen, -1);
    if( rc==SQLITE_OK ) rc = fts3StringAppend(pOut, &zDoc[iBegin], iFin-iBegin);
    if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zClose, -1);

    iEnd = iFin;
  }

  pMod->xClose(pC);
  return rc;
}


799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
    assert( (mCovered&mSeen)==mCovered );
    if( mSeen==mCovered ) break;
  }while( nSnippet<SizeofArray(aSnippet) );

  assert( nFToken>0 );

  for(i=0; i<nSnippet && rc==SQLITE_OK; i++){
    SnippetFragment *p = &aSnippet[i];
    const char *zTail = ((i==nSnippet-1) ? zEllipsis : 0);

    if( i>0 || p->iPos>0 ){
      fts3StringAppend(&res, zEllipsis, -1);
    }
    rc = fts3SnippetText(pCsr, p, nFToken, zStart, zEnd, zTail, &res);
  }

 snippet_out:
  if( rc!=SQLITE_OK ){
    sqlite3_result_error_code(pCtx, rc);
    sqlite3_free(res.z);
  }else{







|
|
|
<
<
<
<







799
800
801
802
803
804
805
806
807
808




809
810
811
812
813
814
815
    assert( (mCovered&mSeen)==mCovered );
    if( mSeen==mCovered ) break;
  }while( nSnippet<SizeofArray(aSnippet) );

  assert( nFToken>0 );

  for(i=0; i<nSnippet && rc==SQLITE_OK; i++){
    rc = fts3SnippetText(pCsr, &aSnippet[i], 
        i, (i==nSnippet-1), nFToken, zStart, zEnd, zEllipsis, &res
    );




  }

 snippet_out:
  if( rc!=SQLITE_OK ){
    sqlite3_result_error_code(pCtx, rc);
    sqlite3_free(res.z);
  }else{
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
          rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
        }
        if( rc==SQLITE_OK ){
          char aBuffer[64];
          sqlite3_snprintf(sizeof(aBuffer), aBuffer, 
              "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart
          );
          fts3StringAppend(&res, aBuffer, -1);
        }
      }
    }
    if( rc==SQLITE_DONE ){
      rc = SQLITE_ERROR;
    }








|







943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
          rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
        }
        if( rc==SQLITE_OK ){
          char aBuffer[64];
          sqlite3_snprintf(sizeof(aBuffer), aBuffer, 
              "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart
          );
          rc = fts3StringAppend(&res, aBuffer, -1);
        }
      }
    }
    if( rc==SQLITE_DONE ){
      rc = SQLITE_ERROR;
    }

Changes to test/fts3snippet.test.











1
2
3
4
5
6









7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27












set testdir [file dirname $argv0]
source $testdir/tester.tcl

# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts3 { finish_test ; return }










do_test fts3snippet-1.1 {
  execsql {
    CREATE VIRTUAL TABLE ft USING fts3;
    INSERT INTO ft VALUES('xxx xxx xxx xxx');
  }
} {}

proc normalize {L} {
  set ret [list]
  foreach l $L {lappend ret $l}
  return $ret
}

do_test fts3snippet-1.2 {
  execsql { SELECT offsets(ft) FROM ft WHERE ft MATCH 'xxx' }
} {{0 0 0 3 0 0 4 3 0 0 8 3 0 0 12 3}}

do_test fts3snippet-1.3 {
  execsql { SELECT offsets(ft) FROM ft WHERE ft MATCH '"xxx xxx"' }
} [list [normalize {
>
>
>
>
>
>
>
>
>
>
>




|

>
>
>
>
>
>
>
>
>








<
<
<
<
<
<







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34






35
36
37
38
39
40
41
# 2010 January 07
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl

# If SQLITE_ENABLE_FTS3 is not defined, omit this file.
ifcapable !fts3 { finish_test ; return }

# Transform the list $L to its "normal" form. So that it can be compared to
# another list with the same set of elements using [string compare].
#
proc normalize {L} {
  set ret [list]
  foreach l $L {lappend ret $l}
  return $ret
}

do_test fts3snippet-1.1 {
  execsql {
    CREATE VIRTUAL TABLE ft USING fts3;
    INSERT INTO ft VALUES('xxx xxx xxx xxx');
  }
} {}







do_test fts3snippet-1.2 {
  execsql { SELECT offsets(ft) FROM ft WHERE ft MATCH 'xxx' }
} {{0 0 0 3 0 0 4 3 0 0 8 3 0 0 12 3}}

do_test fts3snippet-1.3 {
  execsql { SELECT offsets(ft) FROM ft WHERE ft MATCH '"xxx xxx"' }
} [list [normalize {
59
60
61
62
63
64
65
66


























67
68
    0 2  4 3 
    0 0  8 3 
    0 1  8 3 
    0 2  8 3 
    0 0 12 3
    0 2 12 3
}]]



























finish_test









>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
    0 2  4 3 
    0 0  8 3 
    0 1  8 3 
    0 2  8 3 
    0 0 12 3
    0 2 12 3
}]]

do_test fts3snippet-2.1 {
  execsql {
    DROP TABLE IF EXISTS ft;
    CREATE VIRTUAL TABLE ft USING fts3;
    INSERT INTO ft VALUES('one two three four five six seven eight nine ten');
  }
} {}
foreach {tn expr res} {
   1 one       "[one] two three four five..."
   2 two       "one [two] three four five..."
   3 three     "one two [three] four five..."
   4 four      "...two three [four] five six..."
   5 five      "...three four [five] six seven..."
   6 six       "...four five [six] seven eight..."
   7 seven     "...five six [seven] eight nine..."
   8 eight     "...six seven [eight] nine ten"
   9 nine      "...six seven eight [nine] ten"
  10 ten       "...six seven eight nine [ten]"
} {
  do_test fts3snippet-2.2.$tn {
    execsql {
      SELECT snippet(ft, '[', ']', '...', 0, 5) FROM ft WHERE ft MATCH $expr
    }
  } [list $res]
}

finish_test