SQLite

Check-in [d3f4ae8275]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Bug fix in the FTS1 snippet generator. Improvements in the way the snippet generator handles whitespace. (CVS 3448)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: d3f4ae827582bd0aac54ae3211d272a1429b6523
User & Date: drh 2006-09-28 18:37:16.000
Context
2006-09-28
18:58
More snippet generator improvements and test cases. (CVS 3449) (check-in: 0934d220b3 user: drh tags: trunk)
18:37
Bug fix in the FTS1 snippet generator. Improvements in the way the snippet generator handles whitespace. (CVS 3448) (check-in: d3f4ae8275 user: drh tags: trunk)
11:41
Avoid segfaults when inserted NULL values into FTS1. (CVS 3447) (check-in: 165645d301 user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts1/fts1.c.
2293
2294
2295
2296
2297
2298
2299





















2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
    }
    if( isspace(zDoc[iBreak+i]) ){
      return iBreak + i + 1;
    }
  }
  return iBreak;
}






















/*
** Allowed values for Snippet.aMatch[].snStatus
*/
#define SNIPPET_IGNORE  0   /* It is ok to omit this match from the snippet */
#define SNIPPET_DESIRED 1   /* We want to include this match in the snippet */  

/*
** Generate the text of a snippet.
*/
static void snippetText(
  fulltext_cursor *pCursor,   /* The cursor we need the snippet for */
  const char *zStartMark,     /* Markup to appear before each match */







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>





|







2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
    }
    if( isspace(zDoc[iBreak+i]) ){
      return iBreak + i + 1;
    }
  }
  return iBreak;
}

/*
** If the StringBuffer does not end in white space, add a single
** space character to the end.
*/
static void appendWhiteSpace(StringBuffer *p){
  if( p->len==0 ) return;
  if( isspace(p->s[p->len-1]) ) return;
  append(p, " ");
}

/*
** Remove white space from teh end of the StringBuffer
*/
static void trimWhiteSpace(StringBuffer *p){
  while( p->len>0 && isspace(p->s[p->len-1]) ){
    p->len--;
  }
}



/*
** Allowed values for Snippet.aMatch[].snStatus
*/
#define SNIPPET_IGNORE  0   /* It is ok to omit this match from the snippet */
#define SNIPPET_DESIRED 1   /* We want to include this match in the snippet */

/*
** Generate the text of a snippet.
*/
static void snippetText(
  fulltext_cursor *pCursor,   /* The cursor we need the snippet for */
  const char *zStartMark,     /* Markup to appear before each match */
2364
2365
2366
2367
2368
2369
2370



2371
2372
2373

2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385



2386

2387
2388
2389
2390
2391
2392
2393
    }else{
      wantEllipsis = 1;
    }
    if( iCol==tailCol && iStart<=tailOffset+20 ){
      iStart = tailOffset;
      wantEllipsis = 0;
      tailEllipsis = 0;



    }
    if( wantEllipsis || tailEllipsis ){
      append(&sb, zEllipsis);

    }
    iEnd = aMatch[i].iStart + aMatch[i].nByte + 40;
    iEnd = wordBoundary(iEnd, zDoc, nDoc, aMatch, nMatch, iCol);
    if( iEnd>=nDoc-10 ){
      iEnd = nDoc;
      tailEllipsis = 0;
    }else{
      tailEllipsis = 1;
    }
    while( iMatch<nMatch && aMatch[iMatch].iCol<iCol ){ iMatch++; }
    while( iStart<iEnd ){
      while( iMatch<nMatch && aMatch[iMatch].iStart<iStart ){ iMatch++; }



      if( iMatch<nMatch && aMatch[iMatch].iStart<iEnd ){

        nappend(&sb, &zDoc[iStart], aMatch[iMatch].iStart - iStart);
        iStart = aMatch[iMatch].iStart;
        append(&sb, zStartMark);
        nappend(&sb, &zDoc[iStart], aMatch[iMatch].nByte);
        append(&sb, zEndMark);
        iStart += aMatch[iMatch].nByte;
        for(j=iMatch+1; j<nMatch; j++){







>
>
>



>











|
>
>
>
|
>







2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
    }else{
      wantEllipsis = 1;
    }
    if( iCol==tailCol && iStart<=tailOffset+20 ){
      iStart = tailOffset;
      wantEllipsis = 0;
      tailEllipsis = 0;
    }
    if( iCol!=tailCol || iStart!=tailOffset ){
      appendWhiteSpace(&sb);
    }
    if( wantEllipsis || tailEllipsis ){
      append(&sb, zEllipsis);
      appendWhiteSpace(&sb);
    }
    iEnd = aMatch[i].iStart + aMatch[i].nByte + 40;
    iEnd = wordBoundary(iEnd, zDoc, nDoc, aMatch, nMatch, iCol);
    if( iEnd>=nDoc-10 ){
      iEnd = nDoc;
      tailEllipsis = 0;
    }else{
      tailEllipsis = 1;
    }
    while( iMatch<nMatch && aMatch[iMatch].iCol<iCol ){ iMatch++; }
    while( iStart<iEnd ){
      while( iMatch<nMatch && aMatch[iMatch].iStart<iStart
             && aMatch[iMatch].iCol<=iCol ){
        iMatch++;
      }
      if( iMatch<nMatch && aMatch[iMatch].iStart<iEnd
             && aMatch[iMatch].iCol==iCol ){
        nappend(&sb, &zDoc[iStart], aMatch[iMatch].iStart - iStart);
        iStart = aMatch[iMatch].iStart;
        append(&sb, zStartMark);
        nappend(&sb, &zDoc[iStart], aMatch[iMatch].nByte);
        append(&sb, zEndMark);
        iStart += aMatch[iMatch].nByte;
        for(j=iMatch+1; j<nMatch; j++){
2401
2402
2403
2404
2405
2406
2407

2408

2409
2410
2411
2412
2413
2414
2415
        nappend(&sb, &zDoc[iStart], iEnd - iStart);
        iStart = iEnd;
      }
    }
    tailCol = iCol;
    tailOffset = iEnd;
  }

  if( tailEllipsis ){

    append(&sb, zEllipsis);
  }
  pCursor->snippet.zSnippet = sb.s;
  pCursor->snippet.nSnippet = sb.len;  
}









>

>







2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
        nappend(&sb, &zDoc[iStart], iEnd - iStart);
        iStart = iEnd;
      }
    }
    tailCol = iCol;
    tailOffset = iEnd;
  }
  trimWhiteSpace(&sb);
  if( tailEllipsis ){
    appendWhiteSpace(&sb);
    append(&sb, zEllipsis);
  }
  pCursor->snippet.zSnippet = sb.s;
  pCursor->snippet.nSnippet = sb.len;  
}


Changes to test/fts1c.test.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# 2006 September 14
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is testing the FTS1 module.
#
# $Id: fts1c.test,v 1.7 2006/09/21 02:03:11 drh Exp $
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl

# If SQLITE_ENABLE_FTS1 is defined, omit this file.
ifcapable !fts1 {













|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# 2006 September 14
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is testing the FTS1 module.
#
# $Id: fts1c.test,v 1.8 2006/09/28 18:37:16 drh Exp $
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl

# If SQLITE_ENABLE_FTS1 is defined, omit this file.
ifcapable !fts1 {
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157




1158
1159

1160
1161
  }
} {{Alert Posted 10:00 AM November 20,2000: E-<b>GAS</b> Request <b>Reminder</b>}}
do_test fts1c-4.2 {
  execsql {
    SELECT snippet(email) FROM email
     WHERE email MATCH 'christmas candlelight'
  }
} {{<b>...</b>place.? What do you think about going here <b>Christmas</b> 
eve?? They have an 11:00 a.m. service and a <b>candlelight</b> service at 5:00 p.m., 
among others.

<b>...</b>}}

do_test fts1c-4.3 {
  execsql {
    SELECT snippet(email) FROM email
     WHERE email MATCH 'deal sheet potential reuse'
  }
} {{EOL-Accenture <b>Deal</b> <b>Sheet</b><b>...</b>intent
     Review Enron asset base for <b>potential</b> <b>reuse</b>/ licensing
     Contract negotiations

<b>...</b>}}
do_test fts1c-4.4 {
  execsql {
    SELECT snippet(email,'<<<','>>>',' ') FROM email
     WHERE email MATCH 'deal sheet potential reuse'
  }
} {{EOL-Accenture <<<Deal>>> <<<Sheet>>> intent
     Review Enron asset base for <<<potential>>> <<<reuse>>>/ licensing
     Contract negotiations





 }}


finish_test







|

|
<
<






|

|
<
<





|

|
>
>
>
>
|
<
>


1127
1128
1129
1130
1131
1132
1133
1134
1135
1136


1137
1138
1139
1140
1141
1142
1143
1144
1145


1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158

1159
1160
1161
  }
} {{Alert Posted 10:00 AM November 20,2000: E-<b>GAS</b> Request <b>Reminder</b>}}
do_test fts1c-4.2 {
  execsql {
    SELECT snippet(email) FROM email
     WHERE email MATCH 'christmas candlelight'
  }
} {{<b>...</b> place.? What do you think about going here <b>Christmas</b> 
eve?? They have an 11:00 a.m. service and a <b>candlelight</b> service at 5:00 p.m., 
among others. <b>...</b>}}



do_test fts1c-4.3 {
  execsql {
    SELECT snippet(email) FROM email
     WHERE email MATCH 'deal sheet potential reuse'
  }
} {{EOL-Accenture <b>Deal</b> <b>Sheet</b> <b>...</b> intent
     Review Enron asset base for <b>potential</b> <b>reuse</b>/ licensing
     Contract negotiations <b>...</b>}}


do_test fts1c-4.4 {
  execsql {
    SELECT snippet(email,'<<<','>>>',' ') FROM email
     WHERE email MATCH 'deal sheet potential reuse'
  }
} {{EOL-Accenture <<<Deal>>> <<<Sheet>>>  intent
     Review Enron asset base for <<<potential>>> <<<reuse>>>/ licensing
     Contract negotiations  }}
do_test fts1c-4.5 {
  execsql {
    SELECT snippet(email,'<<<','>>>',' ') FROM email
     WHERE email MATCH 'first things'
  }

} {{Re: <<<First>>> Polish Deal! Congrats!  <<<Things>>> seem to be building rapidly now on the  }}

finish_test