/ Check-in [c564bf87]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix problems in fts5 found by ASAN.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: c564bf870106faef297594a51995619c80311d06bd5f8a0c7644f666f22ba576
User & Date: dan 2018-12-28 07:37:22
Context
2018-12-28
13:57
Fix a buffer overwrite in fts5 triggered by a corrupt database. check-in: a385298d user: dan tags: trunk
07:37
Fix problems in fts5 found by ASAN. check-in: c564bf87 user: dan tags: trunk
2018-12-27
20:12
Fix another problem with corrupt database handling in fts5. check-in: fb0d7fba user: dan tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3_unicode2.c.

196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
...
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
    'f',       'g',       'h',       'h',       'i',       'i'|HIBIT, 
    'k',       'l',       'l'|HIBIT, 'l',       'm',       'n',       
    'o'|HIBIT, 'p',       'r',       'r'|HIBIT, 'r',       's',       
    's'|HIBIT, 't',       'u',       'u'|HIBIT, 'v',       'w',       
    'w',       'x',       'y',       'z',       'h',       't',       
    'w',       'y',       'a',       'a'|HIBIT, 'a'|HIBIT, 'a'|HIBIT, 
    'e',       'e'|HIBIT, 'e'|HIBIT, 'i',       'o',       'o'|HIBIT, 
    'o'|HIBIT, 'o'|HIBIT, 'u',       'u'|HIBIT, 'u'|HIBIT, 'y',  
  };

  unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
  int iRes = 0;
  int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
  int iLo = 0;
  while( iHi>=iLo ){
................................................................................
** is a diacritical modifier character.
*/
int sqlite3FtsUnicodeIsdiacritic(int c){
  unsigned int mask0 = 0x08029FDF;
  unsigned int mask1 = 0x000361F8;
  if( c<768 || c>817 ) return 0;
  return (c < 768+32) ?
      (mask0 & (1 << (c-768))) :
      (mask1 & (1 << (c-768-32)));
}


/*
** Interpret the argument as a unicode codepoint. If the codepoint
** is an upper case character that has a lower case equivalent,
** return the codepoint corresponding to the lower case version.







|







 







|
|







196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
...
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
    'f',       'g',       'h',       'h',       'i',       'i'|HIBIT, 
    'k',       'l',       'l'|HIBIT, 'l',       'm',       'n',       
    'o'|HIBIT, 'p',       'r',       'r'|HIBIT, 'r',       's',       
    's'|HIBIT, 't',       'u',       'u'|HIBIT, 'v',       'w',       
    'w',       'x',       'y',       'z',       'h',       't',       
    'w',       'y',       'a',       'a'|HIBIT, 'a'|HIBIT, 'a'|HIBIT, 
    'e',       'e'|HIBIT, 'e'|HIBIT, 'i',       'o',       'o'|HIBIT, 
    'o'|HIBIT, 'o'|HIBIT, 'u',       'u'|HIBIT, 'u'|HIBIT, 'y',       
  };

  unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
  int iRes = 0;
  int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
  int iLo = 0;
  while( iHi>=iLo ){
................................................................................
** is a diacritical modifier character.
*/
int sqlite3FtsUnicodeIsdiacritic(int c){
  unsigned int mask0 = 0x08029FDF;
  unsigned int mask1 = 0x000361F8;
  if( c<768 || c>817 ) return 0;
  return (c < 768+32) ?
      (mask0 & ((unsigned int)1 << (c-768))) :
      (mask1 & ((unsigned int)1 << (c-768-32)));
}


/*
** Interpret the argument as a unicode codepoint. If the codepoint
** is an upper case character that has a lower case equivalent,
** return the codepoint corresponding to the lower case version.

Changes to ext/fts3/unicode/mkunicode.tcl.

59
60
61
62
63
64
65

66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
...
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
...
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
...
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
    incr i

    puts -nonewline [format "%5d" [expr ($iCode<<3) + $nRange-1]]
    puts -nonewline ", "
  }
  puts ""
  puts "  \};"

  puts "  char aChar\[\] = \{"
  puts -nonewline "    '\\0',      "
  set i 1
  foreach c $aChar f $aFlag {
    if { $f } {
      set str "'$c'|0x80,  "
    } else {
      set str "'$c'|0x00,  "
    }
    if {$c == ""} { set str "'\\0',      " }

    if {($i % 6)==0} {puts "" ; puts -nonewline "    " }
    incr i
    puts -nonewline "$str"
  }
................................................................................
  puts "*/"
  puts "int ${zFunc}\(int c)\{"
  puts "  unsigned int mask0 = [format "0x%08X" $i1];"
  puts "  unsigned int mask1 = [format "0x%08X" $i2];"

  puts "  if( c<$iFirst || c>$iLast ) return 0;"
  puts "  return (c < $iFirst+32) ?"
  puts "      (mask0 & (1 << (c-$iFirst))) :"
  puts "      (mask1 & (1 << (c-$iFirst-32)));"
  puts "\}"
}


#-------------------------------------------------------------------------

proc an_load_separator_ranges {} {
................................................................................
  set aMapArray [intarray $aMap]
  set aDataArray [intarray $aData]
  puts [code {
    static u16 aFts5UnicodeBlock[] = {$aBlockArray};
    static u16 aFts5UnicodeMap[] = {$aMapArray};
    static u16 aFts5UnicodeData[] = {$aDataArray};

    int sqlite3Fts5UnicodeCategory(int iCode) { 
      int iRes = -1;
      int iHi;
      int iLo;
      int ret;
      u16 iKey;

      if( iCode>=(1<<20) ){
................................................................................
        if( aCP[iCP].iCode==i ){
          sqlite3Fts5UnicodeCatParse(aCP[iCP].zCat, aArray);
          iCP++;
        }else{
          aArray[0] = 1;
        }

        c = sqlite3Fts5UnicodeCategory(i);
        if( aArray[c]==0 ){
          *piCode = i;
          return 1;
        }
      }

      return 0;







>





|

|







 







|
|







 







|







 







|







59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
...
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
...
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
...
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
    incr i

    puts -nonewline [format "%5d" [expr ($iCode<<3) + $nRange-1]]
    puts -nonewline ", "
  }
  puts ""
  puts "  \};"
  puts "#define HIBIT ((char)0x80)"
  puts "  char aChar\[\] = \{"
  puts -nonewline "    '\\0',      "
  set i 1
  foreach c $aChar f $aFlag {
    if { $f } {
      set str "'$c'|HIBIT, "
    } else {
      set str "'$c',       "
    }
    if {$c == ""} { set str "'\\0',      " }

    if {($i % 6)==0} {puts "" ; puts -nonewline "    " }
    incr i
    puts -nonewline "$str"
  }
................................................................................
  puts "*/"
  puts "int ${zFunc}\(int c)\{"
  puts "  unsigned int mask0 = [format "0x%08X" $i1];"
  puts "  unsigned int mask1 = [format "0x%08X" $i2];"

  puts "  if( c<$iFirst || c>$iLast ) return 0;"
  puts "  return (c < $iFirst+32) ?"
  puts "      (mask0 & ((unsigned int)1 << (c-$iFirst))) :"
  puts "      (mask1 & ((unsigned int)1 << (c-$iFirst-32)));"
  puts "\}"
}


#-------------------------------------------------------------------------

proc an_load_separator_ranges {} {
................................................................................
  set aMapArray [intarray $aMap]
  set aDataArray [intarray $aData]
  puts [code {
    static u16 aFts5UnicodeBlock[] = {$aBlockArray};
    static u16 aFts5UnicodeMap[] = {$aMapArray};
    static u16 aFts5UnicodeData[] = {$aDataArray};

    int sqlite3Fts5UnicodeCategory(u32 iCode) { 
      int iRes = -1;
      int iHi;
      int iLo;
      int ret;
      u16 iKey;

      if( iCode>=(1<<20) ){
................................................................................
        if( aCP[iCP].iCode==i ){
          sqlite3Fts5UnicodeCatParse(aCP[iCP].zCat, aArray);
          iCP++;
        }else{
          aArray[0] = 1;
        }

        c = sqlite3Fts5UnicodeCategory((u32)i);
        if( aArray[c]==0 ){
          *piCode = i;
          return 1;
        }
      }

      return 0;

Changes to ext/fts5/fts5Int.h.

784
785
786
787
788
789
790
791
792
793
794
795
796
797
/**************************************************************************
** Interface to automatically generated code in fts5_unicode2.c. 
*/
int sqlite3Fts5UnicodeIsdiacritic(int c);
int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic);

int sqlite3Fts5UnicodeCatParse(const char*, u8*);
int sqlite3Fts5UnicodeCategory(int iCode);
void sqlite3Fts5UnicodeAscii(u8*, u8*);
/*
** End of interface to code in fts5_unicode2.c.
**************************************************************************/

#endif







|






784
785
786
787
788
789
790
791
792
793
794
795
796
797
/**************************************************************************
** Interface to automatically generated code in fts5_unicode2.c. 
*/
int sqlite3Fts5UnicodeIsdiacritic(int c);
int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic);

int sqlite3Fts5UnicodeCatParse(const char*, u8*);
int sqlite3Fts5UnicodeCategory(u32 iCode);
void sqlite3Fts5UnicodeAscii(u8*, u8*);
/*
** End of interface to code in fts5_unicode2.c.
**************************************************************************/

#endif

Changes to ext/fts5/fts5_expr.c.

2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
    return;
  }
  memset(aArr, 0, sizeof(aArr));
  sqlite3Fts5UnicodeCatParse("L*", aArr);
  sqlite3Fts5UnicodeCatParse("N*", aArr);
  sqlite3Fts5UnicodeCatParse("Co", aArr);
  iCode = sqlite3_value_int(apVal[0]);
  sqlite3_result_int(pCtx, aArr[sqlite3Fts5UnicodeCategory(iCode)]);
}

static void fts5ExprFold(
  sqlite3_context *pCtx,          /* Function call context */
  int nArg,                       /* Number of args */
  sqlite3_value **apVal           /* Function arguments */
){







|







2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
    return;
  }
  memset(aArr, 0, sizeof(aArr));
  sqlite3Fts5UnicodeCatParse("L*", aArr);
  sqlite3Fts5UnicodeCatParse("N*", aArr);
  sqlite3Fts5UnicodeCatParse("Co", aArr);
  iCode = sqlite3_value_int(apVal[0]);
  sqlite3_result_int(pCtx, aArr[sqlite3Fts5UnicodeCategory((u32)iCode)]);
}

static void fts5ExprFold(
  sqlite3_context *pCtx,          /* Function call context */
  int nArg,                       /* Number of args */
  sqlite3_value **apVal           /* Function arguments */
){

Changes to ext/fts5/fts5_index.c.

3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
....
6278
6279
6280
6281
6282
6283
6284
6285
6286
6287
6288
6289
6290
6291
6292
6293
      int i;
      u32 mask;
      memset(aUsed, 0, sizeof(aUsed));
      for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
        for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
          int iId = pStruct->aLevel[iLvl].aSeg[iSeg].iSegid;
          if( iId<=FTS5_MAX_SEGMENT ){
            aUsed[(iId-1) / 32] |= 1 << ((iId-1) % 32);
          }
        }
      }

      for(i=0; aUsed[i]==0xFFFFFFFF; i++);
      mask = aUsed[i];
      for(iSegid=0; mask & (1 << iSegid); iSegid++);
      iSegid += 1 + i*32;

#ifdef SQLITE_DEBUG
      for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
        for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
          assert_nc( iSegid!=pStruct->aLevel[iLvl].aSeg[iSeg].iSegid );
        }
................................................................................
  ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents
  ** buffer overreads even if the record is corrupt.  */
  n = sqlite3_value_bytes(apVal[1]);
  aBlob = sqlite3_value_blob(apVal[1]);
  nSpace = n + FTS5_DATA_ZERO_PADDING;
  a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace);
  if( a==0 ) goto decode_out;
  memcpy(a, aBlob, n);


  fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno);

  fts5DebugRowid(&rc, &s, iRowid);
  if( bDlidx ){
    Fts5Data dlidx;
    Fts5DlidxLvl lvl;







|






|







 







|
<







3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
....
6278
6279
6280
6281
6282
6283
6284
6285

6286
6287
6288
6289
6290
6291
6292
      int i;
      u32 mask;
      memset(aUsed, 0, sizeof(aUsed));
      for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
        for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
          int iId = pStruct->aLevel[iLvl].aSeg[iSeg].iSegid;
          if( iId<=FTS5_MAX_SEGMENT ){
            aUsed[(iId-1) / 32] |= (u32)1 << ((iId-1) % 32);
          }
        }
      }

      for(i=0; aUsed[i]==0xFFFFFFFF; i++);
      mask = aUsed[i];
      for(iSegid=0; mask & ((u32)1 << iSegid); iSegid++);
      iSegid += 1 + i*32;

#ifdef SQLITE_DEBUG
      for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
        for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
          assert_nc( iSegid!=pStruct->aLevel[iLvl].aSeg[iSeg].iSegid );
        }
................................................................................
  ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents
  ** buffer overreads even if the record is corrupt.  */
  n = sqlite3_value_bytes(apVal[1]);
  aBlob = sqlite3_value_blob(apVal[1]);
  nSpace = n + FTS5_DATA_ZERO_PADDING;
  a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace);
  if( a==0 ) goto decode_out;
  if( n>0 ) memcpy(a, aBlob, n);


  fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno);

  fts5DebugRowid(&rc, &s, iRowid);
  if( bDlidx ){
    Fts5Data dlidx;
    Fts5DlidxLvl lvl;

Changes to ext/fts5/fts5_test_tok.c.

374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
  if( idxNum==1 ){
    const char *zByte = (const char *)sqlite3_value_text(apVal[0]);
    int nByte = sqlite3_value_bytes(apVal[0]);
    pCsr->zInput = sqlite3_malloc(nByte+1);
    if( pCsr->zInput==0 ){
      rc = SQLITE_NOMEM;
    }else{
      memcpy(pCsr->zInput, zByte, nByte);
      pCsr->zInput[nByte] = 0;
      rc = pTab->tok.xTokenize(
          pTab->pTok, (void*)pCsr, 0, zByte, nByte, fts5tokCb
      );
    }
  }








|







374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
  if( idxNum==1 ){
    const char *zByte = (const char *)sqlite3_value_text(apVal[0]);
    int nByte = sqlite3_value_bytes(apVal[0]);
    pCsr->zInput = sqlite3_malloc(nByte+1);
    if( pCsr->zInput==0 ){
      rc = SQLITE_NOMEM;
    }else{
      if( nByte>0 ) memcpy(pCsr->zInput, zByte, nByte);
      pCsr->zInput[nByte] = 0;
      rc = pTab->tok.xTokenize(
          pTab->pTok, (void*)pCsr, 0, zByte, nByte, fts5tokCb
      );
    }
  }

Changes to ext/fts5/fts5_tokenize.c.

264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
...
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
      while( zCsr<zTerm ){
        int iCode;
        int bToken;
        READ_UTF8(zCsr, zTerm, iCode);
        if( iCode<128 ){
          p->aTokenChar[iCode] = (unsigned char)bTokenChars;
        }else{
          bToken = p->aCategory[sqlite3Fts5UnicodeCategory(iCode)];
          assert( (bToken==0 || bToken==1) ); 
          assert( (bTokenChars==0 || bTokenChars==1) );
          if( bToken!=bTokenChars && sqlite3Fts5UnicodeIsdiacritic(iCode)==0 ){
            int i;
            for(i=0; i<nNew; i++){
              if( aNew[i]>iCode ) break;
            }
................................................................................
/*
** Return true if, for the purposes of tokenizing with the tokenizer
** passed as the first argument, codepoint iCode is considered a token 
** character (not a separator).
*/
static int fts5UnicodeIsAlnum(Unicode61Tokenizer *p, int iCode){
  return (
    p->aCategory[sqlite3Fts5UnicodeCategory(iCode)]
    ^ fts5UnicodeIsException(p, iCode)
  );
}

static int fts5UnicodeTokenize(
  Fts5Tokenizer *pTokenizer,
  void *pCtx,







|







 







|







264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
...
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
      while( zCsr<zTerm ){
        int iCode;
        int bToken;
        READ_UTF8(zCsr, zTerm, iCode);
        if( iCode<128 ){
          p->aTokenChar[iCode] = (unsigned char)bTokenChars;
        }else{
          bToken = p->aCategory[sqlite3Fts5UnicodeCategory((u32)iCode)];
          assert( (bToken==0 || bToken==1) ); 
          assert( (bTokenChars==0 || bTokenChars==1) );
          if( bToken!=bTokenChars && sqlite3Fts5UnicodeIsdiacritic(iCode)==0 ){
            int i;
            for(i=0; i<nNew; i++){
              if( aNew[i]>iCode ) break;
            }
................................................................................
/*
** Return true if, for the purposes of tokenizing with the tokenizer
** passed as the first argument, codepoint iCode is considered a token 
** character (not a separator).
*/
static int fts5UnicodeIsAlnum(Unicode61Tokenizer *p, int iCode){
  return (
    p->aCategory[sqlite3Fts5UnicodeCategory((u32)iCode)]
    ^ fts5UnicodeIsException(p, iCode)
  );
}

static int fts5UnicodeTokenize(
  Fts5Tokenizer *pTokenizer,
  void *pCtx,

Changes to ext/fts5/fts5_unicode2.c.

65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
..
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
...
244
245
246
247
248
249
250

251
252
253
254
255
256
257
...
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
...
769
770
771
772
773
774
775

    'f',       'g',       'h',       'h',       'i',       'i'|HIBIT, 
    'k',       'l',       'l'|HIBIT, 'l',       'm',       'n',       
    'o'|HIBIT, 'p',       'r',       'r'|HIBIT, 'r',       's',       
    's'|HIBIT, 't',       'u',       'u'|HIBIT, 'v',       'w',       
    'w',       'x',       'y',       'z',       'h',       't',       
    'w',       'y',       'a',       'a'|HIBIT, 'a'|HIBIT, 'a'|HIBIT, 
    'e',       'e'|HIBIT, 'e'|HIBIT, 'i',       'o',       'o'|HIBIT, 
    'o'|HIBIT, 'o'|HIBIT, 'u',       'u'|HIBIT, 'u'|HIBIT, 'y',  
  };

  unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
  int iRes = 0;
  int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
  int iLo = 0;
  while( iHi>=iLo ){
................................................................................
** is a diacritical modifier character.
*/
int sqlite3Fts5UnicodeIsdiacritic(int c){
  unsigned int mask0 = 0x08029FDF;
  unsigned int mask1 = 0x000361F8;
  if( c<768 || c>817 ) return 0;
  return (c < 768+32) ?
      (mask0 & (1 << (c-768))) :
      (mask1 & (1 << (c-768-32)));
}


/*
** Interpret the argument as a unicode codepoint. If the codepoint
** is an upper case character that has a lower case equivalent,
** return the codepoint corresponding to the lower case version.
................................................................................
  
  else if( c>=66560 && c<66600 ){
    ret = c + 40;
  }

  return ret;
}


int sqlite3Fts5UnicodeCatParse(const char *zCat, u8 *aArray){ 
  aArray[0] = 1;
  switch( zCat[0] ){
    case 'C':
          switch( zCat[1] ){
            case 'c': aArray[1] = 1; break;
................................................................................
    89,    1434,  3226,  506,   474,   506,   506,   367,   1018,  1946,  
    1402,  954,   1402,  314,   90,    1082,  218,   2266,  666,   1210,  
    186,   570,   2042,  58,    5850,  154,   2010,  154,   794,   2266,  
    378,   2266,  3738,  39,    39,    39,    39,    39,    39,    17351, 
    34,    3074,  7692,  63,    63,    
  };

int sqlite3Fts5UnicodeCategory(int iCode) { 
  int iRes = -1;
  int iHi;
  int iLo;
  int ret;
  u16 iKey;

  if( iCode>=(1<<20) ){
................................................................................
    int n = (aFts5UnicodeData[iTbl] >> 5) + i;
    for(; i<128 && i<n; i++){
      aAscii[i] = bToken;
    }
    iTbl++;
  }
}








|







 







|
|







 







>







 







|







 







>
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
..
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
...
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
...
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
...
770
771
772
773
774
775
776
777
    'f',       'g',       'h',       'h',       'i',       'i'|HIBIT, 
    'k',       'l',       'l'|HIBIT, 'l',       'm',       'n',       
    'o'|HIBIT, 'p',       'r',       'r'|HIBIT, 'r',       's',       
    's'|HIBIT, 't',       'u',       'u'|HIBIT, 'v',       'w',       
    'w',       'x',       'y',       'z',       'h',       't',       
    'w',       'y',       'a',       'a'|HIBIT, 'a'|HIBIT, 'a'|HIBIT, 
    'e',       'e'|HIBIT, 'e'|HIBIT, 'i',       'o',       'o'|HIBIT, 
    'o'|HIBIT, 'o'|HIBIT, 'u',       'u'|HIBIT, 'u'|HIBIT, 'y',       
  };

  unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
  int iRes = 0;
  int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
  int iLo = 0;
  while( iHi>=iLo ){
................................................................................
** is a diacritical modifier character.
*/
int sqlite3Fts5UnicodeIsdiacritic(int c){
  unsigned int mask0 = 0x08029FDF;
  unsigned int mask1 = 0x000361F8;
  if( c<768 || c>817 ) return 0;
  return (c < 768+32) ?
      (mask0 & ((unsigned int)1 << (c-768))) :
      (mask1 & ((unsigned int)1 << (c-768-32)));
}


/*
** Interpret the argument as a unicode codepoint. If the codepoint
** is an upper case character that has a lower case equivalent,
** return the codepoint corresponding to the lower case version.
................................................................................
  
  else if( c>=66560 && c<66600 ){
    ret = c + 40;
  }

  return ret;
}


int sqlite3Fts5UnicodeCatParse(const char *zCat, u8 *aArray){ 
  aArray[0] = 1;
  switch( zCat[0] ){
    case 'C':
          switch( zCat[1] ){
            case 'c': aArray[1] = 1; break;
................................................................................
    89,    1434,  3226,  506,   474,   506,   506,   367,   1018,  1946,  
    1402,  954,   1402,  314,   90,    1082,  218,   2266,  666,   1210,  
    186,   570,   2042,  58,    5850,  154,   2010,  154,   794,   2266,  
    378,   2266,  3738,  39,    39,    39,    39,    39,    39,    17351, 
    34,    3074,  7692,  63,    63,    
  };

int sqlite3Fts5UnicodeCategory(u32 iCode) { 
  int iRes = -1;
  int iHi;
  int iLo;
  int ret;
  u16 iKey;

  if( iCode>=(1<<20) ){
................................................................................
    int n = (aFts5UnicodeData[iTbl] >> 5) + i;
    for(; i<128 && i<n; i++){
      aAscii[i] = bToken;
    }
    iTbl++;
  }
}

Changes to ext/fts5/test/fts5unicode3.test.

17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

proc fts3_unicode_path {file} {
  file join [file dirname [info script]] .. .. fts3 unicode $file
}

source [fts3_unicode_path parseunicode.tcl]
set testprefix fts5unicode3

set CF [fts3_unicode_path CaseFolding.txt]
set UD [fts3_unicode_path UnicodeData.txt]







|







17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

proc fts3_unicode_path {file} {
  file join .. [file dirname [info script]] .. .. fts3 unicode $file
}

source [fts3_unicode_path parseunicode.tcl]
set testprefix fts5unicode3

set CF [fts3_unicode_path CaseFolding.txt]
set UD [fts3_unicode_path UnicodeData.txt]