Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Fix problems in fts5 found by ASAN. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA3-256: |
c564bf870106faef297594a51995619c |
User & Date: | dan 2018-12-28 07:37:22.836 |
Context
2018-12-28
| ||
13:57 | Fix a buffer overwrite in fts5 triggered by a corrupt database. (check-in: a385298df2 user: dan tags: trunk) | |
07:37 | Fix problems in fts5 found by ASAN. (check-in: c564bf8701 user: dan tags: trunk) | |
2018-12-27
| ||
20:12 | Fix another problem with corrupt database handling in fts5. (check-in: fb0d7fba07 user: dan tags: trunk) | |
Changes
Changes to ext/fts3/fts3_unicode2.c.
︙ | ︙ | |||
196 197 198 199 200 201 202 | 'f', 'g', 'h', 'h', 'i', 'i'|HIBIT, 'k', 'l', 'l'|HIBIT, 'l', 'm', 'n', 'o'|HIBIT, 'p', 'r', 'r'|HIBIT, 'r', 's', 's'|HIBIT, 't', 'u', 'u'|HIBIT, 'v', 'w', 'w', 'x', 'y', 'z', 'h', 't', 'w', 'y', 'a', 'a'|HIBIT, 'a'|HIBIT, 'a'|HIBIT, 'e', 'e'|HIBIT, 'e'|HIBIT, 'i', 'o', 'o'|HIBIT, | | | 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 | 'f', 'g', 'h', 'h', 'i', 'i'|HIBIT, 'k', 'l', 'l'|HIBIT, 'l', 'm', 'n', 'o'|HIBIT, 'p', 'r', 'r'|HIBIT, 'r', 's', 's'|HIBIT, 't', 'u', 'u'|HIBIT, 'v', 'w', 'w', 'x', 'y', 'z', 'h', 't', 'w', 'y', 'a', 'a'|HIBIT, 'a'|HIBIT, 'a'|HIBIT, 'e', 'e'|HIBIT, 'e'|HIBIT, 'i', 'o', 'o'|HIBIT, 'o'|HIBIT, 'o'|HIBIT, 'u', 'u'|HIBIT, 'u'|HIBIT, 'y', }; unsigned int key = (((unsigned int)c)<<3) | 0x00000007; int iRes = 0; int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1; int iLo = 0; while( iHi>=iLo ){ |
︙ | ︙ | |||
227 228 229 230 231 232 233 | ** is a diacritical modifier character. */ int sqlite3FtsUnicodeIsdiacritic(int c){ unsigned int mask0 = 0x08029FDF; unsigned int mask1 = 0x000361F8; if( c<768 || c>817 ) return 0; return (c < 768+32) ? | | | | 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 | ** is a diacritical modifier character. */ int sqlite3FtsUnicodeIsdiacritic(int c){ unsigned int mask0 = 0x08029FDF; unsigned int mask1 = 0x000361F8; if( c<768 || c>817 ) return 0; return (c < 768+32) ? (mask0 & ((unsigned int)1 << (c-768))) : (mask1 & ((unsigned int)1 << (c-768-32))); } /* ** Interpret the argument as a unicode codepoint. If the codepoint ** is an upper case character that has a lower case equivalent, ** return the codepoint corresponding to the lower case version. |
︙ | ︙ |
Changes to ext/fts3/unicode/mkunicode.tcl.
︙ | ︙ | |||
59 60 61 62 63 64 65 66 67 68 69 70 | incr i puts -nonewline [format "%5d" [expr ($iCode<<3) + $nRange-1]] puts -nonewline ", " } puts "" puts " \};" puts " char aChar\[\] = \{" puts -nonewline " '\\0', " set i 1 foreach c $aChar f $aFlag { if { $f } { | > | | | 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 | incr i puts -nonewline [format "%5d" [expr ($iCode<<3) + $nRange-1]] puts -nonewline ", " } puts "" puts " \};" puts "#define HIBIT ((char)0x80)" puts " char aChar\[\] = \{" puts -nonewline " '\\0', " set i 1 foreach c $aChar f $aFlag { if { $f } { set str "'$c'|HIBIT, " } else { set str "'$c', " } if {$c == ""} { set str "'\\0', " } if {($i % 6)==0} {puts "" ; puts -nonewline " " } incr i puts -nonewline "$str" } |
︙ | ︙ | |||
130 131 132 133 134 135 136 | puts "*/" puts "int ${zFunc}\(int c)\{" puts " unsigned int mask0 = [format "0x%08X" $i1];" puts " unsigned int mask1 = [format "0x%08X" $i2];" puts " if( c<$iFirst || c>$iLast ) return 0;" puts " return (c < $iFirst+32) ?" | | | | 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 | puts "*/" puts "int ${zFunc}\(int c)\{" puts " unsigned int mask0 = [format "0x%08X" $i1];" puts " unsigned int mask1 = [format "0x%08X" $i2];" puts " if( c<$iFirst || c>$iLast ) return 0;" puts " return (c < $iFirst+32) ?" puts " (mask0 & ((unsigned int)1 << (c-$iFirst))) :" puts " (mask1 & ((unsigned int)1 << (c-$iFirst-32)));" puts "\}" } #------------------------------------------------------------------------- proc an_load_separator_ranges {} { |
︙ | ︙ | |||
695 696 697 698 699 700 701 | set aMapArray [intarray $aMap] set aDataArray [intarray $aData] puts [code { static u16 aFts5UnicodeBlock[] = {$aBlockArray}; static u16 aFts5UnicodeMap[] = {$aMapArray}; static u16 aFts5UnicodeData[] = {$aDataArray}; | | | 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 | set aMapArray [intarray $aMap] set aDataArray [intarray $aData] puts [code { static u16 aFts5UnicodeBlock[] = {$aBlockArray}; static u16 aFts5UnicodeMap[] = {$aMapArray}; static u16 aFts5UnicodeData[] = {$aDataArray}; int sqlite3Fts5UnicodeCategory(u32 iCode) { int iRes = -1; int iHi; int iLo; int ret; u16 iKey; if( iCode>=(1<<20) ){ |
︙ | ︙ | |||
778 779 780 781 782 783 784 | if( aCP[iCP].iCode==i ){ sqlite3Fts5UnicodeCatParse(aCP[iCP].zCat, aArray); iCP++; }else{ aArray[0] = 1; } | | | 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 | if( aCP[iCP].iCode==i ){ sqlite3Fts5UnicodeCatParse(aCP[iCP].zCat, aArray); iCP++; }else{ aArray[0] = 1; } c = sqlite3Fts5UnicodeCategory((u32)i); if( aArray[c]==0 ){ *piCode = i; return 1; } } return 0; |
︙ | ︙ |
Changes to ext/fts5/fts5Int.h.
︙ | ︙ | |||
784 785 786 787 788 789 790 | /************************************************************************** ** Interface to automatically generated code in fts5_unicode2.c. */ int sqlite3Fts5UnicodeIsdiacritic(int c); int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic); int sqlite3Fts5UnicodeCatParse(const char*, u8*); | | | 784 785 786 787 788 789 790 791 792 793 794 795 796 797 | /************************************************************************** ** Interface to automatically generated code in fts5_unicode2.c. */ int sqlite3Fts5UnicodeIsdiacritic(int c); int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic); int sqlite3Fts5UnicodeCatParse(const char*, u8*); int sqlite3Fts5UnicodeCategory(u32 iCode); void sqlite3Fts5UnicodeAscii(u8*, u8*); /* ** End of interface to code in fts5_unicode2.c. **************************************************************************/ #endif |
Changes to ext/fts5/fts5_expr.c.
︙ | ︙ | |||
2549 2550 2551 2552 2553 2554 2555 | return; } memset(aArr, 0, sizeof(aArr)); sqlite3Fts5UnicodeCatParse("L*", aArr); sqlite3Fts5UnicodeCatParse("N*", aArr); sqlite3Fts5UnicodeCatParse("Co", aArr); iCode = sqlite3_value_int(apVal[0]); | | | 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 | return; } memset(aArr, 0, sizeof(aArr)); sqlite3Fts5UnicodeCatParse("L*", aArr); sqlite3Fts5UnicodeCatParse("N*", aArr); sqlite3Fts5UnicodeCatParse("Co", aArr); iCode = sqlite3_value_int(apVal[0]); sqlite3_result_int(pCtx, aArr[sqlite3Fts5UnicodeCategory((u32)iCode)]); } static void fts5ExprFold( sqlite3_context *pCtx, /* Function call context */ int nArg, /* Number of args */ sqlite3_value **apVal /* Function arguments */ ){ |
︙ | ︙ |
Changes to ext/fts5/fts5_index.c.
︙ | ︙ | |||
3573 3574 3575 3576 3577 3578 3579 | int i; u32 mask; memset(aUsed, 0, sizeof(aUsed)); for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ int iId = pStruct->aLevel[iLvl].aSeg[iSeg].iSegid; if( iId<=FTS5_MAX_SEGMENT ){ | | | | 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 | int i; u32 mask; memset(aUsed, 0, sizeof(aUsed)); for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ int iId = pStruct->aLevel[iLvl].aSeg[iSeg].iSegid; if( iId<=FTS5_MAX_SEGMENT ){ aUsed[(iId-1) / 32] |= (u32)1 << ((iId-1) % 32); } } } for(i=0; aUsed[i]==0xFFFFFFFF; i++); mask = aUsed[i]; for(iSegid=0; mask & ((u32)1 << iSegid); iSegid++); iSegid += 1 + i*32; #ifdef SQLITE_DEBUG for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ assert_nc( iSegid!=pStruct->aLevel[iLvl].aSeg[iSeg].iSegid ); } |
︙ | ︙ | |||
6278 6279 6280 6281 6282 6283 6284 | ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents ** buffer overreads even if the record is corrupt. */ n = sqlite3_value_bytes(apVal[1]); aBlob = sqlite3_value_blob(apVal[1]); nSpace = n + FTS5_DATA_ZERO_PADDING; a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace); if( a==0 ) goto decode_out; | | < | 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 | ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents ** buffer overreads even if the record is corrupt. */ n = sqlite3_value_bytes(apVal[1]); aBlob = sqlite3_value_blob(apVal[1]); nSpace = n + FTS5_DATA_ZERO_PADDING; a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace); if( a==0 ) goto decode_out; if( n>0 ) memcpy(a, aBlob, n); fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno); fts5DebugRowid(&rc, &s, iRowid); if( bDlidx ){ Fts5Data dlidx; Fts5DlidxLvl lvl; |
︙ | ︙ |
Changes to ext/fts5/fts5_test_tok.c.
︙ | ︙ | |||
374 375 376 377 378 379 380 | if( idxNum==1 ){ const char *zByte = (const char *)sqlite3_value_text(apVal[0]); int nByte = sqlite3_value_bytes(apVal[0]); pCsr->zInput = sqlite3_malloc(nByte+1); if( pCsr->zInput==0 ){ rc = SQLITE_NOMEM; }else{ | | | 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 | if( idxNum==1 ){ const char *zByte = (const char *)sqlite3_value_text(apVal[0]); int nByte = sqlite3_value_bytes(apVal[0]); pCsr->zInput = sqlite3_malloc(nByte+1); if( pCsr->zInput==0 ){ rc = SQLITE_NOMEM; }else{ if( nByte>0 ) memcpy(pCsr->zInput, zByte, nByte); pCsr->zInput[nByte] = 0; rc = pTab->tok.xTokenize( pTab->pTok, (void*)pCsr, 0, zByte, nByte, fts5tokCb ); } } |
︙ | ︙ |
Changes to ext/fts5/fts5_tokenize.c.
︙ | ︙ | |||
264 265 266 267 268 269 270 | while( zCsr<zTerm ){ int iCode; int bToken; READ_UTF8(zCsr, zTerm, iCode); if( iCode<128 ){ p->aTokenChar[iCode] = (unsigned char)bTokenChars; }else{ | | | 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 | while( zCsr<zTerm ){ int iCode; int bToken; READ_UTF8(zCsr, zTerm, iCode); if( iCode<128 ){ p->aTokenChar[iCode] = (unsigned char)bTokenChars; }else{ bToken = p->aCategory[sqlite3Fts5UnicodeCategory((u32)iCode)]; assert( (bToken==0 || bToken==1) ); assert( (bTokenChars==0 || bTokenChars==1) ); if( bToken!=bTokenChars && sqlite3Fts5UnicodeIsdiacritic(iCode)==0 ){ int i; for(i=0; i<nNew; i++){ if( aNew[i]>iCode ) break; } |
︙ | ︙ | |||
425 426 427 428 429 430 431 | /* ** Return true if, for the purposes of tokenizing with the tokenizer ** passed as the first argument, codepoint iCode is considered a token ** character (not a separator). */ static int fts5UnicodeIsAlnum(Unicode61Tokenizer *p, int iCode){ return ( | | | 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 | /* ** Return true if, for the purposes of tokenizing with the tokenizer ** passed as the first argument, codepoint iCode is considered a token ** character (not a separator). */ static int fts5UnicodeIsAlnum(Unicode61Tokenizer *p, int iCode){ return ( p->aCategory[sqlite3Fts5UnicodeCategory((u32)iCode)] ^ fts5UnicodeIsException(p, iCode) ); } static int fts5UnicodeTokenize( Fts5Tokenizer *pTokenizer, void *pCtx, |
︙ | ︙ |
Changes to ext/fts5/fts5_unicode2.c.
︙ | ︙ | |||
65 66 67 68 69 70 71 | 'f', 'g', 'h', 'h', 'i', 'i'|HIBIT, 'k', 'l', 'l'|HIBIT, 'l', 'm', 'n', 'o'|HIBIT, 'p', 'r', 'r'|HIBIT, 'r', 's', 's'|HIBIT, 't', 'u', 'u'|HIBIT, 'v', 'w', 'w', 'x', 'y', 'z', 'h', 't', 'w', 'y', 'a', 'a'|HIBIT, 'a'|HIBIT, 'a'|HIBIT, 'e', 'e'|HIBIT, 'e'|HIBIT, 'i', 'o', 'o'|HIBIT, | | | 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | 'f', 'g', 'h', 'h', 'i', 'i'|HIBIT, 'k', 'l', 'l'|HIBIT, 'l', 'm', 'n', 'o'|HIBIT, 'p', 'r', 'r'|HIBIT, 'r', 's', 's'|HIBIT, 't', 'u', 'u'|HIBIT, 'v', 'w', 'w', 'x', 'y', 'z', 'h', 't', 'w', 'y', 'a', 'a'|HIBIT, 'a'|HIBIT, 'a'|HIBIT, 'e', 'e'|HIBIT, 'e'|HIBIT, 'i', 'o', 'o'|HIBIT, 'o'|HIBIT, 'o'|HIBIT, 'u', 'u'|HIBIT, 'u'|HIBIT, 'y', }; unsigned int key = (((unsigned int)c)<<3) | 0x00000007; int iRes = 0; int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1; int iLo = 0; while( iHi>=iLo ){ |
︙ | ︙ | |||
96 97 98 99 100 101 102 | ** is a diacritical modifier character. */ int sqlite3Fts5UnicodeIsdiacritic(int c){ unsigned int mask0 = 0x08029FDF; unsigned int mask1 = 0x000361F8; if( c<768 || c>817 ) return 0; return (c < 768+32) ? | | | | 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 | ** is a diacritical modifier character. */ int sqlite3Fts5UnicodeIsdiacritic(int c){ unsigned int mask0 = 0x08029FDF; unsigned int mask1 = 0x000361F8; if( c<768 || c>817 ) return 0; return (c < 768+32) ? (mask0 & ((unsigned int)1 << (c-768))) : (mask1 & ((unsigned int)1 << (c-768-32))); } /* ** Interpret the argument as a unicode codepoint. If the codepoint ** is an upper case character that has a lower case equivalent, ** return the codepoint corresponding to the lower case version. |
︙ | ︙ | |||
244 245 246 247 248 249 250 251 252 253 254 255 256 257 | else if( c>=66560 && c<66600 ){ ret = c + 40; } return ret; } int sqlite3Fts5UnicodeCatParse(const char *zCat, u8 *aArray){ aArray[0] = 1; switch( zCat[0] ){ case 'C': switch( zCat[1] ){ case 'c': aArray[1] = 1; break; | > | 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 | else if( c>=66560 && c<66600 ){ ret = c + 40; } return ret; } int sqlite3Fts5UnicodeCatParse(const char *zCat, u8 *aArray){ aArray[0] = 1; switch( zCat[0] ){ case 'C': switch( zCat[1] ){ case 'c': aArray[1] = 1; break; |
︙ | ︙ | |||
726 727 728 729 730 731 732 | 89, 1434, 3226, 506, 474, 506, 506, 367, 1018, 1946, 1402, 954, 1402, 314, 90, 1082, 218, 2266, 666, 1210, 186, 570, 2042, 58, 5850, 154, 2010, 154, 794, 2266, 378, 2266, 3738, 39, 39, 39, 39, 39, 39, 17351, 34, 3074, 7692, 63, 63, }; | | | 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 | 89, 1434, 3226, 506, 474, 506, 506, 367, 1018, 1946, 1402, 954, 1402, 314, 90, 1082, 218, 2266, 666, 1210, 186, 570, 2042, 58, 5850, 154, 2010, 154, 794, 2266, 378, 2266, 3738, 39, 39, 39, 39, 39, 39, 17351, 34, 3074, 7692, 63, 63, }; int sqlite3Fts5UnicodeCategory(u32 iCode) { int iRes = -1; int iHi; int iLo; int ret; u16 iKey; if( iCode>=(1<<20) ){ |
︙ | ︙ | |||
769 770 771 772 773 774 775 | int n = (aFts5UnicodeData[iTbl] >> 5) + i; for(; i<128 && i<n; i++){ aAscii[i] = bToken; } iTbl++; } } | > | 770 771 772 773 774 775 776 777 | int n = (aFts5UnicodeData[iTbl] >> 5) + i; for(; i<128 && i<n; i++){ aAscii[i] = bToken; } iTbl++; } } |
Changes to ext/fts5/test/fts5unicode3.test.
︙ | ︙ | |||
17 18 19 20 21 22 23 | # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } proc fts3_unicode_path {file} { | | | 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } proc fts3_unicode_path {file} { file join .. [file dirname [info script]] .. .. fts3 unicode $file } source [fts3_unicode_path parseunicode.tcl] set testprefix fts5unicode3 set CF [fts3_unicode_path CaseFolding.txt] set UD [fts3_unicode_path UnicodeData.txt] |
︙ | ︙ |