/ Check-in [a11b393d]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix a spurious report of corruption that could be made by the fts5 integrity-check in SQLITE_DEBUG builds if the fts5 index contains malformed utf text. Ticket [d62981b76de521e3]
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: a11b393dc2c882cf0b3c47c3405bf43ca1d6459605bd39cccce4d32da653a72d
User & Date: dan 2019-12-24 14:27:03
Original Comment: Fix a spurious report of corruption that could be made by the fts5 integrity-check in SQLITE_DEBUG builds if the fts5 index contains malformed utf text.
References
2019-12-24
15:35
Fix an assert() added as part of commit [a11b393dc] that can fail if fts5 database records are corrupt. (check-in: 4630c1ec user: dan tags: trunk)
Context
2019-12-24
15:01
Extra defense against problems following an OOM. dbsqlfuzz find. Also import the latest dbsqlfuzz test cases. (check-in: 0a70f5dd user: drh tags: trunk)
14:27
Fix a spurious report of corruption that could be made by the fts5 integrity-check in SQLITE_DEBUG builds if the fts5 index contains malformed utf text. Ticket [d62981b76de521e3] (check-in: a11b393d user: dan tags: trunk)
13:41
Convert an ALWAYS() into an assert() with an extra error term. Dbsqlfuzz find, with test case in TH3. (check-in: b473ad35 user: drh tags: trunk)
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5_index.c.

5719
5720
5721
5722
5723
5724
5725































5726
5727
5728
5729
5730
5731
5732
....
5759
5760
5761
5762
5763
5764
5765
5766






5767
5768
5769
5770
5771
5772
5773
5774
  }
  sqlite3Fts5IterClose(pIter);

  *pCksum = cksum;
  return rc;
}

































/*
** This function is also purely an internal test. It does not contribute to 
** FTS functionality, or even the integrity-check, in any way.
*/
static void fts5TestTerm(
  Fts5Index *p, 
................................................................................

    /* If this is a prefix query, check that the results returned if the
    ** the index is disabled are the same. In both ASC and DESC order. 
    **
    ** This check may only be performed if the hash table is empty. This
    ** is because the hash table only supports a single scan query at
    ** a time, and the multi-iter loop from which this function is called
    ** is already performing such a scan. */






    if( p->nPendingData==0 ){
      if( iIdx>0 && rc==SQLITE_OK ){
        int f = flags|FTS5INDEX_QUERY_TEST_NOIDX;
        ck2 = 0;
        rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
        if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
      }
      if( iIdx>0 && rc==SQLITE_OK ){







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







|
>
>
>
>
>
>
|







5719
5720
5721
5722
5723
5724
5725
5726
5727
5728
5729
5730
5731
5732
5733
5734
5735
5736
5737
5738
5739
5740
5741
5742
5743
5744
5745
5746
5747
5748
5749
5750
5751
5752
5753
5754
5755
5756
5757
5758
5759
5760
5761
5762
5763
....
5790
5791
5792
5793
5794
5795
5796
5797
5798
5799
5800
5801
5802
5803
5804
5805
5806
5807
5808
5809
5810
5811
  }
  sqlite3Fts5IterClose(pIter);

  *pCksum = cksum;
  return rc;
}

/*
** Check if buffer z[], size n bytes, contains as series of valid utf-8
** encoded codepoints. If so, return 0. Otherwise, if the buffer does not
** contain valid utf-8, return non-zero.
*/
static int fts5TestUtf8(const char *z, int n){
  assert( n>0 );
  int i = 0;
  while( i<n ){
    if( (z[i] & 0x80)==0x00 ){
      i++;
    }else
    if( (z[i] & 0xE0)==0xC0 ){
      if( i+1>=n || (z[i+1] & 0xC0)!=0x80 ) return 1;
      i += 2;
    }else
    if( (z[i] & 0xF0)==0xE0 ){
      if( i+2>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1;
      i += 3;
    }else
    if( (z[i] & 0xF8)==0xF0 ){
      if( i+3>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1;
      if( (z[i+2] & 0xC0)!=0x80 ) return 1;
      i += 3;
    }else{
      return 1;
    }
  }

  return 0;
}

/*
** This function is also purely an internal test. It does not contribute to 
** FTS functionality, or even the integrity-check, in any way.
*/
static void fts5TestTerm(
  Fts5Index *p, 
................................................................................

    /* If this is a prefix query, check that the results returned if the
    ** the index is disabled are the same. In both ASC and DESC order. 
    **
    ** This check may only be performed if the hash table is empty. This
    ** is because the hash table only supports a single scan query at
    ** a time, and the multi-iter loop from which this function is called
    ** is already performing such a scan. 
    **
    ** Also only do this if buffer zTerm contains nTerm bytes of valid
    ** utf-8. Otherwise, the last part of the buffer contents might contain
    ** a non-utf-8 sequence that happens to be a prefix of a valid utf-8
    ** character stored in the main fts index, which will cause the
    ** test to fail.  */
    if( p->nPendingData==0 && 0==fts5TestUtf8(zTerm, nTerm) ){
      if( iIdx>0 && rc==SQLITE_OK ){
        int f = flags|FTS5INDEX_QUERY_TEST_NOIDX;
        ck2 = 0;
        rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
        if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
      }
      if( iIdx>0 && rc==SQLITE_OK ){

Changes to ext/fts5/test/fts5misc.test.

246
247
248
249
250
251
252















253
254
255

do_execsql_test 9.2 {
  SELECT rowid FROM t1('upgrade');
} {
  -4764623217061966105 8324454597464624651
}

















finish_test








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>



246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270

do_execsql_test 9.2 {
  SELECT rowid FROM t1('upgrade');
} {
  -4764623217061966105 8324454597464624651
}

#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 10.0 {
  CREATE VIRTUAL TABLE vt1 USING fts5(c1, c2, prefix = 1, tokenize = "ascii");
  INSERT INTO vt1 VALUES (x'e4', '䔬');
}

do_execsql_test 10.1 {
  SELECT quote(CAST(c1 AS blob)), quote(CAST(c2 AS blob)) FROM vt1
} {X'E4' X'E494AC'}

do_execsql_test 10.2 {
  INSERT INTO vt1(vt1) VALUES('integrity-check');
}

finish_test