SQLite

Check-in [c564bf8701]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix problems in fts5 found by ASAN.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: c564bf870106faef297594a51995619c80311d06bd5f8a0c7644f666f22ba576
User & Date: dan 2018-12-28 07:37:22.836
Context
2018-12-28
13:57
Fix a buffer overwrite in fts5 triggered by a corrupt database. (check-in: a385298df2 user: dan tags: trunk)
07:37
Fix problems in fts5 found by ASAN. (check-in: c564bf8701 user: dan tags: trunk)
2018-12-27
20:12
Fix another problem with corrupt database handling in fts5. (check-in: fb0d7fba07 user: dan tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts3/fts3_unicode2.c.
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
    'f',       'g',       'h',       'h',       'i',       'i'|HIBIT, 
    'k',       'l',       'l'|HIBIT, 'l',       'm',       'n',       
    'o'|HIBIT, 'p',       'r',       'r'|HIBIT, 'r',       's',       
    's'|HIBIT, 't',       'u',       'u'|HIBIT, 'v',       'w',       
    'w',       'x',       'y',       'z',       'h',       't',       
    'w',       'y',       'a',       'a'|HIBIT, 'a'|HIBIT, 'a'|HIBIT, 
    'e',       'e'|HIBIT, 'e'|HIBIT, 'i',       'o',       'o'|HIBIT, 
    'o'|HIBIT, 'o'|HIBIT, 'u',       'u'|HIBIT, 'u'|HIBIT, 'y',  
  };

  unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
  int iRes = 0;
  int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
  int iLo = 0;
  while( iHi>=iLo ){







|







196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
    'f',       'g',       'h',       'h',       'i',       'i'|HIBIT, 
    'k',       'l',       'l'|HIBIT, 'l',       'm',       'n',       
    'o'|HIBIT, 'p',       'r',       'r'|HIBIT, 'r',       's',       
    's'|HIBIT, 't',       'u',       'u'|HIBIT, 'v',       'w',       
    'w',       'x',       'y',       'z',       'h',       't',       
    'w',       'y',       'a',       'a'|HIBIT, 'a'|HIBIT, 'a'|HIBIT, 
    'e',       'e'|HIBIT, 'e'|HIBIT, 'i',       'o',       'o'|HIBIT, 
    'o'|HIBIT, 'o'|HIBIT, 'u',       'u'|HIBIT, 'u'|HIBIT, 'y',       
  };

  unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
  int iRes = 0;
  int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
  int iLo = 0;
  while( iHi>=iLo ){
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
** is a diacritical modifier character.
*/
int sqlite3FtsUnicodeIsdiacritic(int c){
  unsigned int mask0 = 0x08029FDF;
  unsigned int mask1 = 0x000361F8;
  if( c<768 || c>817 ) return 0;
  return (c < 768+32) ?
      (mask0 & (1 << (c-768))) :
      (mask1 & (1 << (c-768-32)));
}


/*
** Interpret the argument as a unicode codepoint. If the codepoint
** is an upper case character that has a lower case equivalent,
** return the codepoint corresponding to the lower case version.







|
|







227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
** is a diacritical modifier character.
*/
int sqlite3FtsUnicodeIsdiacritic(int c){
  unsigned int mask0 = 0x08029FDF;
  unsigned int mask1 = 0x000361F8;
  if( c<768 || c>817 ) return 0;
  return (c < 768+32) ?
      (mask0 & ((unsigned int)1 << (c-768))) :
      (mask1 & ((unsigned int)1 << (c-768-32)));
}


/*
** Interpret the argument as a unicode codepoint. If the codepoint
** is an upper case character that has a lower case equivalent,
** return the codepoint corresponding to the lower case version.
Changes to ext/fts3/unicode/mkunicode.tcl.
59
60
61
62
63
64
65

66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
    incr i

    puts -nonewline [format "%5d" [expr ($iCode<<3) + $nRange-1]]
    puts -nonewline ", "
  }
  puts ""
  puts "  \};"

  puts "  char aChar\[\] = \{"
  puts -nonewline "    '\\0',      "
  set i 1
  foreach c $aChar f $aFlag {
    if { $f } {
      set str "'$c'|0x80,  "
    } else {
      set str "'$c'|0x00,  "
    }
    if {$c == ""} { set str "'\\0',      " }

    if {($i % 6)==0} {puts "" ; puts -nonewline "    " }
    incr i
    puts -nonewline "$str"
  }







>





|

|







59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
    incr i

    puts -nonewline [format "%5d" [expr ($iCode<<3) + $nRange-1]]
    puts -nonewline ", "
  }
  puts ""
  puts "  \};"
  puts "#define HIBIT ((char)0x80)"
  puts "  char aChar\[\] = \{"
  puts -nonewline "    '\\0',      "
  set i 1
  foreach c $aChar f $aFlag {
    if { $f } {
      set str "'$c'|HIBIT, "
    } else {
      set str "'$c',       "
    }
    if {$c == ""} { set str "'\\0',      " }

    if {($i % 6)==0} {puts "" ; puts -nonewline "    " }
    incr i
    puts -nonewline "$str"
  }
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
  puts "*/"
  puts "int ${zFunc}\(int c)\{"
  puts "  unsigned int mask0 = [format "0x%08X" $i1];"
  puts "  unsigned int mask1 = [format "0x%08X" $i2];"

  puts "  if( c<$iFirst || c>$iLast ) return 0;"
  puts "  return (c < $iFirst+32) ?"
  puts "      (mask0 & (1 << (c-$iFirst))) :"
  puts "      (mask1 & (1 << (c-$iFirst-32)));"
  puts "\}"
}


#-------------------------------------------------------------------------

proc an_load_separator_ranges {} {







|
|







131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
  puts "*/"
  puts "int ${zFunc}\(int c)\{"
  puts "  unsigned int mask0 = [format "0x%08X" $i1];"
  puts "  unsigned int mask1 = [format "0x%08X" $i2];"

  puts "  if( c<$iFirst || c>$iLast ) return 0;"
  puts "  return (c < $iFirst+32) ?"
  puts "      (mask0 & ((unsigned int)1 << (c-$iFirst))) :"
  puts "      (mask1 & ((unsigned int)1 << (c-$iFirst-32)));"
  puts "\}"
}


#-------------------------------------------------------------------------

proc an_load_separator_ranges {} {
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
  set aMapArray [intarray $aMap]
  set aDataArray [intarray $aData]
  puts [code {
    static u16 aFts5UnicodeBlock[] = {$aBlockArray};
    static u16 aFts5UnicodeMap[] = {$aMapArray};
    static u16 aFts5UnicodeData[] = {$aDataArray};

    int sqlite3Fts5UnicodeCategory(int iCode) { 
      int iRes = -1;
      int iHi;
      int iLo;
      int ret;
      u16 iKey;

      if( iCode>=(1<<20) ){







|







696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
  set aMapArray [intarray $aMap]
  set aDataArray [intarray $aData]
  puts [code {
    static u16 aFts5UnicodeBlock[] = {$aBlockArray};
    static u16 aFts5UnicodeMap[] = {$aMapArray};
    static u16 aFts5UnicodeData[] = {$aDataArray};

    int sqlite3Fts5UnicodeCategory(u32 iCode) { 
      int iRes = -1;
      int iHi;
      int iLo;
      int ret;
      u16 iKey;

      if( iCode>=(1<<20) ){
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
        if( aCP[iCP].iCode==i ){
          sqlite3Fts5UnicodeCatParse(aCP[iCP].zCat, aArray);
          iCP++;
        }else{
          aArray[0] = 1;
        }

        c = sqlite3Fts5UnicodeCategory(i);
        if( aArray[c]==0 ){
          *piCode = i;
          return 1;
        }
      }

      return 0;







|







779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
        if( aCP[iCP].iCode==i ){
          sqlite3Fts5UnicodeCatParse(aCP[iCP].zCat, aArray);
          iCP++;
        }else{
          aArray[0] = 1;
        }

        c = sqlite3Fts5UnicodeCategory((u32)i);
        if( aArray[c]==0 ){
          *piCode = i;
          return 1;
        }
      }

      return 0;
Changes to ext/fts5/fts5Int.h.
784
785
786
787
788
789
790
791
792
793
794
795
796
797
/**************************************************************************
** Interface to automatically generated code in fts5_unicode2.c. 
*/
int sqlite3Fts5UnicodeIsdiacritic(int c);
int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic);

int sqlite3Fts5UnicodeCatParse(const char*, u8*);
int sqlite3Fts5UnicodeCategory(int iCode);
void sqlite3Fts5UnicodeAscii(u8*, u8*);
/*
** End of interface to code in fts5_unicode2.c.
**************************************************************************/

#endif







|






784
785
786
787
788
789
790
791
792
793
794
795
796
797
/**************************************************************************
** Interface to automatically generated code in fts5_unicode2.c. 
*/
int sqlite3Fts5UnicodeIsdiacritic(int c);
int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic);

int sqlite3Fts5UnicodeCatParse(const char*, u8*);
int sqlite3Fts5UnicodeCategory(u32 iCode);
void sqlite3Fts5UnicodeAscii(u8*, u8*);
/*
** End of interface to code in fts5_unicode2.c.
**************************************************************************/

#endif
Changes to ext/fts5/fts5_expr.c.
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
    return;
  }
  memset(aArr, 0, sizeof(aArr));
  sqlite3Fts5UnicodeCatParse("L*", aArr);
  sqlite3Fts5UnicodeCatParse("N*", aArr);
  sqlite3Fts5UnicodeCatParse("Co", aArr);
  iCode = sqlite3_value_int(apVal[0]);
  sqlite3_result_int(pCtx, aArr[sqlite3Fts5UnicodeCategory(iCode)]);
}

static void fts5ExprFold(
  sqlite3_context *pCtx,          /* Function call context */
  int nArg,                       /* Number of args */
  sqlite3_value **apVal           /* Function arguments */
){







|







2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
    return;
  }
  memset(aArr, 0, sizeof(aArr));
  sqlite3Fts5UnicodeCatParse("L*", aArr);
  sqlite3Fts5UnicodeCatParse("N*", aArr);
  sqlite3Fts5UnicodeCatParse("Co", aArr);
  iCode = sqlite3_value_int(apVal[0]);
  sqlite3_result_int(pCtx, aArr[sqlite3Fts5UnicodeCategory((u32)iCode)]);
}

static void fts5ExprFold(
  sqlite3_context *pCtx,          /* Function call context */
  int nArg,                       /* Number of args */
  sqlite3_value **apVal           /* Function arguments */
){
Changes to ext/fts5/fts5_index.c.
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
      int i;
      u32 mask;
      memset(aUsed, 0, sizeof(aUsed));
      for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
        for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
          int iId = pStruct->aLevel[iLvl].aSeg[iSeg].iSegid;
          if( iId<=FTS5_MAX_SEGMENT ){
            aUsed[(iId-1) / 32] |= 1 << ((iId-1) % 32);
          }
        }
      }

      for(i=0; aUsed[i]==0xFFFFFFFF; i++);
      mask = aUsed[i];
      for(iSegid=0; mask & (1 << iSegid); iSegid++);
      iSegid += 1 + i*32;

#ifdef SQLITE_DEBUG
      for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
        for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
          assert_nc( iSegid!=pStruct->aLevel[iLvl].aSeg[iSeg].iSegid );
        }







|






|







3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
      int i;
      u32 mask;
      memset(aUsed, 0, sizeof(aUsed));
      for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
        for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
          int iId = pStruct->aLevel[iLvl].aSeg[iSeg].iSegid;
          if( iId<=FTS5_MAX_SEGMENT ){
            aUsed[(iId-1) / 32] |= (u32)1 << ((iId-1) % 32);
          }
        }
      }

      for(i=0; aUsed[i]==0xFFFFFFFF; i++);
      mask = aUsed[i];
      for(iSegid=0; mask & ((u32)1 << iSegid); iSegid++);
      iSegid += 1 + i*32;

#ifdef SQLITE_DEBUG
      for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
        for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
          assert_nc( iSegid!=pStruct->aLevel[iLvl].aSeg[iSeg].iSegid );
        }
6278
6279
6280
6281
6282
6283
6284
6285
6286
6287
6288
6289
6290
6291
6292
6293
  ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents
  ** buffer overreads even if the record is corrupt.  */
  n = sqlite3_value_bytes(apVal[1]);
  aBlob = sqlite3_value_blob(apVal[1]);
  nSpace = n + FTS5_DATA_ZERO_PADDING;
  a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace);
  if( a==0 ) goto decode_out;
  memcpy(a, aBlob, n);


  fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno);

  fts5DebugRowid(&rc, &s, iRowid);
  if( bDlidx ){
    Fts5Data dlidx;
    Fts5DlidxLvl lvl;







|
<







6278
6279
6280
6281
6282
6283
6284
6285

6286
6287
6288
6289
6290
6291
6292
  ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents
  ** buffer overreads even if the record is corrupt.  */
  n = sqlite3_value_bytes(apVal[1]);
  aBlob = sqlite3_value_blob(apVal[1]);
  nSpace = n + FTS5_DATA_ZERO_PADDING;
  a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace);
  if( a==0 ) goto decode_out;
  if( n>0 ) memcpy(a, aBlob, n);


  fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno);

  fts5DebugRowid(&rc, &s, iRowid);
  if( bDlidx ){
    Fts5Data dlidx;
    Fts5DlidxLvl lvl;
Changes to ext/fts5/fts5_test_tok.c.
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
  if( idxNum==1 ){
    const char *zByte = (const char *)sqlite3_value_text(apVal[0]);
    int nByte = sqlite3_value_bytes(apVal[0]);
    pCsr->zInput = sqlite3_malloc(nByte+1);
    if( pCsr->zInput==0 ){
      rc = SQLITE_NOMEM;
    }else{
      memcpy(pCsr->zInput, zByte, nByte);
      pCsr->zInput[nByte] = 0;
      rc = pTab->tok.xTokenize(
          pTab->pTok, (void*)pCsr, 0, zByte, nByte, fts5tokCb
      );
    }
  }








|







374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
  if( idxNum==1 ){
    const char *zByte = (const char *)sqlite3_value_text(apVal[0]);
    int nByte = sqlite3_value_bytes(apVal[0]);
    pCsr->zInput = sqlite3_malloc(nByte+1);
    if( pCsr->zInput==0 ){
      rc = SQLITE_NOMEM;
    }else{
      if( nByte>0 ) memcpy(pCsr->zInput, zByte, nByte);
      pCsr->zInput[nByte] = 0;
      rc = pTab->tok.xTokenize(
          pTab->pTok, (void*)pCsr, 0, zByte, nByte, fts5tokCb
      );
    }
  }

Changes to ext/fts5/fts5_tokenize.c.
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
      while( zCsr<zTerm ){
        int iCode;
        int bToken;
        READ_UTF8(zCsr, zTerm, iCode);
        if( iCode<128 ){
          p->aTokenChar[iCode] = (unsigned char)bTokenChars;
        }else{
          bToken = p->aCategory[sqlite3Fts5UnicodeCategory(iCode)];
          assert( (bToken==0 || bToken==1) ); 
          assert( (bTokenChars==0 || bTokenChars==1) );
          if( bToken!=bTokenChars && sqlite3Fts5UnicodeIsdiacritic(iCode)==0 ){
            int i;
            for(i=0; i<nNew; i++){
              if( aNew[i]>iCode ) break;
            }







|







264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
      while( zCsr<zTerm ){
        int iCode;
        int bToken;
        READ_UTF8(zCsr, zTerm, iCode);
        if( iCode<128 ){
          p->aTokenChar[iCode] = (unsigned char)bTokenChars;
        }else{
          bToken = p->aCategory[sqlite3Fts5UnicodeCategory((u32)iCode)];
          assert( (bToken==0 || bToken==1) ); 
          assert( (bTokenChars==0 || bTokenChars==1) );
          if( bToken!=bTokenChars && sqlite3Fts5UnicodeIsdiacritic(iCode)==0 ){
            int i;
            for(i=0; i<nNew; i++){
              if( aNew[i]>iCode ) break;
            }
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
/*
** Return true if, for the purposes of tokenizing with the tokenizer
** passed as the first argument, codepoint iCode is considered a token 
** character (not a separator).
*/
static int fts5UnicodeIsAlnum(Unicode61Tokenizer *p, int iCode){
  return (
    p->aCategory[sqlite3Fts5UnicodeCategory(iCode)]
    ^ fts5UnicodeIsException(p, iCode)
  );
}

static int fts5UnicodeTokenize(
  Fts5Tokenizer *pTokenizer,
  void *pCtx,







|







425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
/*
** Return true if, for the purposes of tokenizing with the tokenizer
** passed as the first argument, codepoint iCode is considered a token 
** character (not a separator).
*/
static int fts5UnicodeIsAlnum(Unicode61Tokenizer *p, int iCode){
  return (
    p->aCategory[sqlite3Fts5UnicodeCategory((u32)iCode)]
    ^ fts5UnicodeIsException(p, iCode)
  );
}

static int fts5UnicodeTokenize(
  Fts5Tokenizer *pTokenizer,
  void *pCtx,
Changes to ext/fts5/fts5_unicode2.c.
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
    'f',       'g',       'h',       'h',       'i',       'i'|HIBIT, 
    'k',       'l',       'l'|HIBIT, 'l',       'm',       'n',       
    'o'|HIBIT, 'p',       'r',       'r'|HIBIT, 'r',       's',       
    's'|HIBIT, 't',       'u',       'u'|HIBIT, 'v',       'w',       
    'w',       'x',       'y',       'z',       'h',       't',       
    'w',       'y',       'a',       'a'|HIBIT, 'a'|HIBIT, 'a'|HIBIT, 
    'e',       'e'|HIBIT, 'e'|HIBIT, 'i',       'o',       'o'|HIBIT, 
    'o'|HIBIT, 'o'|HIBIT, 'u',       'u'|HIBIT, 'u'|HIBIT, 'y',  
  };

  unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
  int iRes = 0;
  int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
  int iLo = 0;
  while( iHi>=iLo ){







|







65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
    'f',       'g',       'h',       'h',       'i',       'i'|HIBIT, 
    'k',       'l',       'l'|HIBIT, 'l',       'm',       'n',       
    'o'|HIBIT, 'p',       'r',       'r'|HIBIT, 'r',       's',       
    's'|HIBIT, 't',       'u',       'u'|HIBIT, 'v',       'w',       
    'w',       'x',       'y',       'z',       'h',       't',       
    'w',       'y',       'a',       'a'|HIBIT, 'a'|HIBIT, 'a'|HIBIT, 
    'e',       'e'|HIBIT, 'e'|HIBIT, 'i',       'o',       'o'|HIBIT, 
    'o'|HIBIT, 'o'|HIBIT, 'u',       'u'|HIBIT, 'u'|HIBIT, 'y',       
  };

  unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
  int iRes = 0;
  int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
  int iLo = 0;
  while( iHi>=iLo ){
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
** is a diacritical modifier character.
*/
int sqlite3Fts5UnicodeIsdiacritic(int c){
  unsigned int mask0 = 0x08029FDF;
  unsigned int mask1 = 0x000361F8;
  if( c<768 || c>817 ) return 0;
  return (c < 768+32) ?
      (mask0 & (1 << (c-768))) :
      (mask1 & (1 << (c-768-32)));
}


/*
** Interpret the argument as a unicode codepoint. If the codepoint
** is an upper case character that has a lower case equivalent,
** return the codepoint corresponding to the lower case version.







|
|







96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
** is a diacritical modifier character.
*/
int sqlite3Fts5UnicodeIsdiacritic(int c){
  unsigned int mask0 = 0x08029FDF;
  unsigned int mask1 = 0x000361F8;
  if( c<768 || c>817 ) return 0;
  return (c < 768+32) ?
      (mask0 & ((unsigned int)1 << (c-768))) :
      (mask1 & ((unsigned int)1 << (c-768-32)));
}


/*
** Interpret the argument as a unicode codepoint. If the codepoint
** is an upper case character that has a lower case equivalent,
** return the codepoint corresponding to the lower case version.
244
245
246
247
248
249
250

251
252
253
254
255
256
257
  
  else if( c>=66560 && c<66600 ){
    ret = c + 40;
  }

  return ret;
}


int sqlite3Fts5UnicodeCatParse(const char *zCat, u8 *aArray){ 
  aArray[0] = 1;
  switch( zCat[0] ){
    case 'C':
          switch( zCat[1] ){
            case 'c': aArray[1] = 1; break;







>







244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
  
  else if( c>=66560 && c<66600 ){
    ret = c + 40;
  }

  return ret;
}


int sqlite3Fts5UnicodeCatParse(const char *zCat, u8 *aArray){ 
  aArray[0] = 1;
  switch( zCat[0] ){
    case 'C':
          switch( zCat[1] ){
            case 'c': aArray[1] = 1; break;
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
    89,    1434,  3226,  506,   474,   506,   506,   367,   1018,  1946,  
    1402,  954,   1402,  314,   90,    1082,  218,   2266,  666,   1210,  
    186,   570,   2042,  58,    5850,  154,   2010,  154,   794,   2266,  
    378,   2266,  3738,  39,    39,    39,    39,    39,    39,    17351, 
    34,    3074,  7692,  63,    63,    
  };

int sqlite3Fts5UnicodeCategory(int iCode) { 
  int iRes = -1;
  int iHi;
  int iLo;
  int ret;
  u16 iKey;

  if( iCode>=(1<<20) ){







|







727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
    89,    1434,  3226,  506,   474,   506,   506,   367,   1018,  1946,  
    1402,  954,   1402,  314,   90,    1082,  218,   2266,  666,   1210,  
    186,   570,   2042,  58,    5850,  154,   2010,  154,   794,   2266,  
    378,   2266,  3738,  39,    39,    39,    39,    39,    39,    17351, 
    34,    3074,  7692,  63,    63,    
  };

int sqlite3Fts5UnicodeCategory(u32 iCode) { 
  int iRes = -1;
  int iHi;
  int iLo;
  int ret;
  u16 iKey;

  if( iCode>=(1<<20) ){
769
770
771
772
773
774
775

    int n = (aFts5UnicodeData[iTbl] >> 5) + i;
    for(; i<128 && i<n; i++){
      aAscii[i] = bToken;
    }
    iTbl++;
  }
}








>
770
771
772
773
774
775
776
777
    int n = (aFts5UnicodeData[iTbl] >> 5) + i;
    for(; i<128 && i<n; i++){
      aAscii[i] = bToken;
    }
    iTbl++;
  }
}

Changes to ext/fts5/test/fts5unicode3.test.
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

proc fts3_unicode_path {file} {
  file join [file dirname [info script]] .. .. fts3 unicode $file
}

source [fts3_unicode_path parseunicode.tcl]
set testprefix fts5unicode3

set CF [fts3_unicode_path CaseFolding.txt]
set UD [fts3_unicode_path UnicodeData.txt]







|







17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

proc fts3_unicode_path {file} {
  file join .. [file dirname [info script]] .. .. fts3 unicode $file
}

source [fts3_unicode_path parseunicode.tcl]
set testprefix fts5unicode3

set CF [fts3_unicode_path CaseFolding.txt]
set UD [fts3_unicode_path UnicodeData.txt]