SQLite

Check-in [c3a3a11194]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Avoid an undefined left-shift operation in fts5 caused by malformed utf-8 text.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: c3a3a11194586bef80a9d7ca54caae8af30d4e7b464b8bb3d257ba2d2ec4791f
User & Date: dan 2018-12-28 14:33:55.680
Context
2018-12-28
17:45
Fix a problem causing the sqlite_master entry corresponding to a virtual table to be removed by a DROP TABLE even if the call to the vtabs xDestroy() method failed. (check-in: 0140f6dbfb user: dan tags: trunk)
14:33
Avoid an undefined left-shift operation in fts5 caused by malformed utf-8 text. (check-in: c3a3a11194 user: dan tags: trunk)
13:57
Fix a buffer overwrite in fts5 triggered by a corrupt database. (check-in: a385298df2 user: dan tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts5/fts5_tokenize.c.
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
  if( n>0 ){
    aNew = (int*)sqlite3_realloc(p->aiException, (n+p->nException)*sizeof(int));
    if( aNew ){
      int nNew = p->nException;
      const unsigned char *zCsr = (const unsigned char*)z;
      const unsigned char *zTerm = (const unsigned char*)&z[n];
      while( zCsr<zTerm ){
        int iCode;
        int bToken;
        READ_UTF8(zCsr, zTerm, iCode);
        if( iCode<128 ){
          p->aTokenChar[iCode] = (unsigned char)bTokenChars;
        }else{
          bToken = p->aCategory[sqlite3Fts5UnicodeCategory((u32)iCode)];
          assert( (bToken==0 || bToken==1) ); 
          assert( (bTokenChars==0 || bTokenChars==1) );
          if( bToken!=bTokenChars && sqlite3Fts5UnicodeIsdiacritic(iCode)==0 ){
            int i;
            for(i=0; i<nNew; i++){
              if( aNew[i]>iCode ) break;
            }







|





|







258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
  if( n>0 ){
    aNew = (int*)sqlite3_realloc(p->aiException, (n+p->nException)*sizeof(int));
    if( aNew ){
      int nNew = p->nException;
      const unsigned char *zCsr = (const unsigned char*)z;
      const unsigned char *zTerm = (const unsigned char*)&z[n];
      while( zCsr<zTerm ){
        u32 iCode;
        int bToken;
        READ_UTF8(zCsr, zTerm, iCode);
        if( iCode<128 ){
          p->aTokenChar[iCode] = (unsigned char)bTokenChars;
        }else{
          bToken = p->aCategory[sqlite3Fts5UnicodeCategory(iCode)];
          assert( (bToken==0 || bToken==1) ); 
          assert( (bTokenChars==0 || bTokenChars==1) );
          if( bToken!=bTokenChars && sqlite3Fts5UnicodeIsdiacritic(iCode)==0 ){
            int i;
            for(i=0; i<nNew; i++){
              if( aNew[i]>iCode ) break;
            }
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
  const char *pEnd = &aFold[nFold-6];

  UNUSED_PARAM(iUnused);

  /* Each iteration of this loop gobbles up a contiguous run of separators,
  ** then the next token.  */
  while( rc==SQLITE_OK ){
    int iCode;                    /* non-ASCII codepoint read from input */
    char *zOut = aFold;
    int is;
    int ie;

    /* Skip any separator characters. */
    while( 1 ){
      if( zCsr>=zTerm ) goto tokenize_done;







|







454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
  const char *pEnd = &aFold[nFold-6];

  UNUSED_PARAM(iUnused);

  /* Each iteration of this loop gobbles up a contiguous run of separators,
  ** then the next token.  */
  while( rc==SQLITE_OK ){
    u32 iCode;                    /* non-ASCII codepoint read from input */
    char *zOut = aFold;
    int is;
    int ie;

    /* Skip any separator characters. */
    while( 1 ){
      if( zCsr>=zTerm ) goto tokenize_done;