/ Check-in [16a8e84f]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix some problems in fts3 found by address-sanitizer.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 16a8e84fa7f67a467f824bdd7f72cbd6a6e95dab8cc7aa1e0e751720b98f3e31
User & Date: dan 2017-03-20 18:53:32
Context
2017-03-20
19:26
Avoid a technically undefined right-shift of a signed value in rtree.c. check-in: a144875f user: dan tags: trunk
18:53
Fix some problems in fts3 found by address-sanitizer. check-in: 16a8e84f user: dan tags: trunk
16:34
Avoid the possibility of signed integer overflow with oversized precisions in %d conversions in the printf() implementation. check-in: ef3a7c87 user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3.c.

   345    345     if( (v & mask2)==0 ){ var = v; return ret; }
   346    346   
   347    347   /* 
   348    348   ** Read a 64-bit variable-length integer from memory starting at p[0].
   349    349   ** Return the number of bytes read, or 0 on error.
   350    350   ** The value is stored in *v.
   351    351   */
   352         -int sqlite3Fts3GetVarint(const char *p, sqlite_int64 *v){
   353         -  const char *pStart = p;
          352  +int sqlite3Fts3GetVarint(const char *pBuf, sqlite_int64 *v){
          353  +  const unsigned char *p = (const unsigned char*)pBuf;
          354  +  const unsigned char *pStart = p;
   354    355     u32 a;
   355    356     u64 b;
   356    357     int shift;
   357    358   
   358    359     GETVARINT_INIT(a, p, 0,  0x00,     0x80, *v, 1);
   359    360     GETVARINT_STEP(a, p, 7,  0x7F,     0x4000, *v, 2);
   360    361     GETVARINT_STEP(a, p, 14, 0x3FFF,   0x200000, *v, 3);

Changes to ext/fts3/fts3_unicode.c.

   132    132     unicode_tokenizer *p,           /* Tokenizer to add exceptions to */
   133    133     int bAlnum,                     /* Replace Isalnum() return value with this */
   134    134     const char *zIn,                /* Array of characters to make exceptions */
   135    135     int nIn                         /* Length of z in bytes */
   136    136   ){
   137    137     const unsigned char *z = (const unsigned char *)zIn;
   138    138     const unsigned char *zTerm = &z[nIn];
   139         -  int iCode;
          139  +  unsigned int iCode;
   140    140     int nEntry = 0;
   141    141   
   142    142     assert( bAlnum==0 || bAlnum==1 );
   143    143   
   144    144     while( z<zTerm ){
   145    145       READ_UTF8(z, zTerm, iCode);
   146         -    assert( (sqlite3FtsUnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 );
   147         -    if( sqlite3FtsUnicodeIsalnum(iCode)!=bAlnum 
   148         -     && sqlite3FtsUnicodeIsdiacritic(iCode)==0 
          146  +    assert( (sqlite3FtsUnicodeIsalnum((int)iCode) & 0xFFFFFFFE)==0 );
          147  +    if( sqlite3FtsUnicodeIsalnum((int)iCode)!=bAlnum 
          148  +     && sqlite3FtsUnicodeIsdiacritic((int)iCode)==0 
   149    149       ){
   150    150         nEntry++;
   151    151       }
   152    152     }
   153    153   
   154    154     if( nEntry ){
   155    155       int *aNew;                    /* New aiException[] array */
................................................................................
   158    158       aNew = sqlite3_realloc(p->aiException, (p->nException+nEntry)*sizeof(int));
   159    159       if( aNew==0 ) return SQLITE_NOMEM;
   160    160       nNew = p->nException;
   161    161   
   162    162       z = (const unsigned char *)zIn;
   163    163       while( z<zTerm ){
   164    164         READ_UTF8(z, zTerm, iCode);
   165         -      if( sqlite3FtsUnicodeIsalnum(iCode)!=bAlnum 
   166         -       && sqlite3FtsUnicodeIsdiacritic(iCode)==0
          165  +      if( sqlite3FtsUnicodeIsalnum((int)iCode)!=bAlnum 
          166  +       && sqlite3FtsUnicodeIsdiacritic((int)iCode)==0
   167    167         ){
   168    168           int i, j;
   169         -        for(i=0; i<nNew && aNew[i]<iCode; i++);
          169  +        for(i=0; i<nNew && aNew[i]<(int)iCode; i++);
   170    170           for(j=nNew; j>i; j--) aNew[j] = aNew[j-1];
   171         -        aNew[i] = iCode;
          171  +        aNew[i] = (int)iCode;
   172    172           nNew++;
   173    173         }
   174    174       }
   175    175       p->aiException = aNew;
   176    176       p->nException = nNew;
   177    177     }
   178    178   
................................................................................
   314    314     int *pnToken,                   /* OUT: Number of bytes at *paToken */
   315    315     int *piStart,                   /* OUT: Starting offset of token */
   316    316     int *piEnd,                     /* OUT: Ending offset of token */
   317    317     int *piPos                      /* OUT: Position integer of token */
   318    318   ){
   319    319     unicode_cursor *pCsr = (unicode_cursor *)pC;
   320    320     unicode_tokenizer *p = ((unicode_tokenizer *)pCsr->base.pTokenizer);
   321         -  int iCode = 0;
          321  +  unsigned int iCode = 0;
   322    322     char *zOut;
   323    323     const unsigned char *z = &pCsr->aInput[pCsr->iOff];
   324    324     const unsigned char *zStart = z;
   325    325     const unsigned char *zEnd;
   326    326     const unsigned char *zTerm = &pCsr->aInput[pCsr->nInput];
   327    327   
   328    328     /* Scan past any delimiter characters before the start of the next token.
   329    329     ** Return SQLITE_DONE early if this takes us all the way to the end of 
   330    330     ** the input.  */
   331    331     while( z<zTerm ){
   332    332       READ_UTF8(z, zTerm, iCode);
   333         -    if( unicodeIsAlnum(p, iCode) ) break;
          333  +    if( unicodeIsAlnum(p, (int)iCode) ) break;
   334    334       zStart = z;
   335    335     }
   336    336     if( zStart>=zTerm ) return SQLITE_DONE;
   337    337   
   338    338     zOut = pCsr->zToken;
   339    339     do {
   340    340       int iOut;
................................................................................
   346    346         zOut = &zNew[zOut - pCsr->zToken];
   347    347         pCsr->zToken = zNew;
   348    348         pCsr->nAlloc += 64;
   349    349       }
   350    350   
   351    351       /* Write the folded case of the last character read to the output */
   352    352       zEnd = z;
   353         -    iOut = sqlite3FtsUnicodeFold(iCode, p->bRemoveDiacritic);
          353  +    iOut = sqlite3FtsUnicodeFold((int)iCode, p->bRemoveDiacritic);
   354    354       if( iOut ){
   355    355         WRITE_UTF8(zOut, iOut);
   356    356       }
   357    357   
   358    358       /* If the cursor is not at EOF, read the next character */
   359    359       if( z>=zTerm ) break;
   360    360       READ_UTF8(z, zTerm, iCode);
   361         -  }while( unicodeIsAlnum(p, iCode) 
   362         -       || sqlite3FtsUnicodeIsdiacritic(iCode)
          361  +  }while( unicodeIsAlnum(p, (int)iCode) 
          362  +       || sqlite3FtsUnicodeIsdiacritic((int)iCode)
   363    363     );
   364    364   
   365    365     /* Set the output variables and return. */
   366    366     pCsr->iOff = (int)(z - pCsr->aInput);
   367    367     *paToken = pCsr->zToken;
   368    368     *pnToken = (int)(zOut - pCsr->zToken);
   369    369     *piStart = (int)(zStart - pCsr->aInput);

Changes to ext/fts3/fts3_unicode2.c.

   123    123       0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401, 0x38008060,
   124    124       0x380400F0,
   125    125     };
   126    126     static const unsigned int aAscii[4] = {
   127    127       0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001,
   128    128     };
   129    129   
   130         -  if( c<128 ){
   131         -    return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 );
   132         -  }else if( c<(1<<22) ){
          130  +  if( (unsigned int)c<128 ){
          131  +    return ( (aAscii[c >> 5] & ((unsigned int)1 << (c & 0x001F)))==0 );
          132  +  }else if( (unsigned int)c<(1<<22) ){
   133    133       unsigned int key = (((unsigned int)c)<<10) | 0x000003FF;
   134    134       int iRes = 0;
   135    135       int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
   136    136       int iLo = 0;
   137    137       while( iHi>=iLo ){
   138    138         int iTest = (iHi + iLo) / 2;
   139    139         if( key >= aEntry[iTest] ){
................................................................................
   318    318      65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462, 
   319    319      65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511, 
   320    320      65514, 65521, 65527, 65528, 65529, 
   321    321     };
   322    322   
   323    323     int ret = c;
   324    324   
   325         -  assert( c>=0 );
   326    325     assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );
   327    326   
   328    327     if( c<128 ){
   329    328       if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
   330    329     }else if( c<65536 ){
          330  +    const struct TableEntry *p;
   331    331       int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
   332    332       int iLo = 0;
   333    333       int iRes = -1;
   334    334   
          335  +    assert( c>aEntry[0].iCode );
   335    336       while( iHi>=iLo ){
   336    337         int iTest = (iHi + iLo) / 2;
   337    338         int cmp = (c - aEntry[iTest].iCode);
   338    339         if( cmp>=0 ){
   339    340           iRes = iTest;
   340    341           iLo = iTest+1;
   341    342         }else{
   342    343           iHi = iTest-1;
   343    344         }
   344    345       }
   345         -    assert( iRes<0 || c>=aEntry[iRes].iCode );
   346    346   
   347         -    if( iRes>=0 ){
   348         -      const struct TableEntry *p = &aEntry[iRes];
   349         -      if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
   350         -        ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
   351         -        assert( ret>0 );
   352         -      }
          347  +    assert( iRes>=0 && c>=aEntry[iRes].iCode );
          348  +    p = &aEntry[iRes];
          349  +    if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
          350  +      ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
          351  +      assert( ret>0 );
   353    352       }
   354    353   
   355    354       if( bRemoveDiacritic ) ret = remove_diacritic(ret);
   356    355     }
   357    356     
   358    357     else if( c>=66560 && c<66600 ){
   359    358       ret = c + 40;
   360    359     }
   361    360   
   362    361     return ret;
   363    362   }
   364    363   #endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */
   365    364   #endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */

Changes to ext/fts3/fts3_write.c.

  4952   4952     return rc;
  4953   4953   }
  4954   4954   
  4955   4955   /*
  4956   4956   ** Convert the text beginning at *pz into an integer and return
  4957   4957   ** its value.  Advance *pz to point to the first character past
  4958   4958   ** the integer.
         4959  +**
         4960  +** This function used for parameters to merge= and incrmerge=
         4961  +** commands. 
  4959   4962   */
  4960   4963   static int fts3Getint(const char **pz){
  4961   4964     const char *z = *pz;
  4962   4965     int i = 0;
  4963         -  while( (*z)>='0' && (*z)<='9' ) i = 10*i + *(z++) - '0';
         4966  +  while( (*z)>='0' && (*z)<='9' && i<214748363 ) i = 10*i + *(z++) - '0';
  4964   4967     *pz = z;
  4965   4968     return i;
  4966   4969   }
  4967   4970   
  4968   4971   /*
  4969   4972   ** Process statements of the form:
  4970   4973   **

Changes to ext/fts3/unicode/mkunicode.tcl.

   223    223     puts "** is less than zero."
   224    224     puts "*/"
   225    225     puts "int ${zFunc}\(int c)\{"
   226    226     an_print_range_array $lRange
   227    227     an_print_ascii_bitmap $lRange
   228    228     puts {
   229    229     if( (unsigned int)c<128 ){
   230         -    return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 );
          230  +    return ( (aAscii[c >> 5] & ((unsigned int)1 << (c & 0x001F)))==0 );
   231    231     }else if( (unsigned int)c<(1<<22) ){
   232    232       unsigned int key = (((unsigned int)c)<<10) | 0x000003FF;
   233    233       int iRes = 0;
   234    234       int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
   235    235       int iLo = 0;
   236    236       while( iHi>=iLo ){
   237    237         int iTest = (iHi + iLo) / 2;