/ Check-in [252f0e45]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add extra test cases and changes to fts3 to avoid crashing on a corrupt database.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 252f0e457d3e33404df87d3e6c44ede61b78319c
User & Date: dan 2010-10-29 18:45:11
Context
2010-10-30
15:21
Test cases and minor changes to make fts3 more robust in the face of a corrupt database. check-in: b7702905 user: dan tags: trunk
2010-10-29
18:45
Add extra test cases and changes to fts3 to avoid crashing on a corrupt database. check-in: 252f0e45 user: dan tags: trunk
2010-10-28
15:52
Add new "dynamic_triggers" test case to threadtest3.c. check-in: a4691563 user: dan tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3_write.c.

20
21
22
23
24
25
26












27
28
29
30
31
32
33
...
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
...
896
897
898
899
900
901
902



903
904





905
906
907
908
909
910
911
...
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
....
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
....
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
....
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
....
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103

2104
2105
2106

2107
2108
2109
2110
2111
2112
2113
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)

#include "fts3Int.h"
#include <string.h>
#include <assert.h>
#include <stdlib.h>













typedef struct PendingList PendingList;
typedef struct SegmentNode SegmentNode;
typedef struct SegmentWriter SegmentWriter;

/*
** Data structure used while accumulating terms in the pending-terms hash
** table. The hash table entry maps from term (a string) to a malloc'd
................................................................................
       p->db, p->zDb, p->zSegmentsTbl, "block", iBlockid, 0, &p->pSegments
    );
  }

  if( rc==SQLITE_OK ){
    int nByte = sqlite3_blob_bytes(p->pSegments);
    if( paBlob ){
      char *aByte = sqlite3_malloc(nByte);
      if( !aByte ){
        rc = SQLITE_NOMEM;
      }else{
        rc = sqlite3_blob_read(p->pSegments, aByte, nByte, 0);
        if( rc!=SQLITE_OK ){
          sqlite3_free(aByte);
          aByte = 0;
................................................................................
    rc = sqlite3Fts3ReadBlock(
        p, ++pReader->iCurrentBlock, &pReader->aNode, &pReader->nNode
    );
    if( rc!=SQLITE_OK ) return rc;
    pNext = pReader->aNode;
  }
  



  pNext += sqlite3Fts3GetVarint32(pNext, &nPrefix);
  pNext += sqlite3Fts3GetVarint32(pNext, &nSuffix);






  if( nPrefix+nSuffix>pReader->nTermAlloc ){
    int nNew = (nPrefix+nSuffix)*2;
    char *zNew = sqlite3_realloc(pReader->zTerm, nNew);
    if( !zNew ){
      return SQLITE_NOMEM;
    }
................................................................................
  pReader->nTerm = nPrefix+nSuffix;
  pNext += nSuffix;
  pNext += sqlite3Fts3GetVarint32(pNext, &pReader->nDoclist);
  pReader->aDoclist = pNext;
  pReader->pOffsetList = 0;

  /* Check that the doclist does not appear to extend past the end of the
  ** b-tree node. And that the final byte of the doclist is either an 0x00 
  ** or 0x01. If either of these statements is untrue, then the data structure 
  ** is corrupt.
  */
  if( &pReader->aDoclist[pReader->nDoclist]>&pReader->aNode[pReader->nNode] 
   || (pReader->aDoclist[pReader->nDoclist-1]&0xFE)!=0
  ){
    return SQLITE_CORRUPT;
  }
  return SQLITE_OK;
}

/*
................................................................................
){
  int rc = SQLITE_OK;             /* Return code */
  Fts3SegReader *pReader;         /* Newly allocated SegReader object */
  int nExtra = 0;                 /* Bytes to allocate segment root node */

  assert( iStartLeaf<=iEndLeaf );
  if( iStartLeaf==0 ){
    nExtra = nRoot;
  }

  pReader = (Fts3SegReader *)sqlite3_malloc(sizeof(Fts3SegReader) + nExtra);
  if( !pReader ){
    return SQLITE_NOMEM;
  }
  memset(pReader, 0, sizeof(Fts3SegReader));
................................................................................
    /* The entire segment is stored in the root node. */
    pReader->aNode = (char *)&pReader[1];
    pReader->nNode = nRoot;
    memcpy(pReader->aNode, zRoot, nRoot);
  }else{
    pReader->iCurrentBlock = iStartLeaf-1;
  }
  rc = fts3SegReaderNext(p, pReader);

  if( rc==SQLITE_OK ){
    *ppReader = pReader;
  }else{
    sqlite3Fts3SegReaderFree(p, pReader);
  }
  return rc;
................................................................................
    if( !pReader ){
      rc = SQLITE_NOMEM;
    }else{
      memset(pReader, 0, nByte);
      pReader->iIdx = 0x7FFFFFFF;
      pReader->ppNextElem = (Fts3HashElem **)&pReader[1];
      memcpy(pReader->ppNextElem, aElem, nElem*sizeof(Fts3HashElem *));
      fts3SegReaderNext(p, pReader);
    }
  }

  if( isPrefix ){
    sqlite3_free(aElem);
  }
  *ppReader = pReader;
................................................................................

  /* If the Fts3SegFilter defines a specific term (or term prefix) to search 
  ** for, then advance each segment iterator until it points to a term of
  ** equal or greater value than the specified term. This prevents many
  ** unnecessary merge/sort operations for the case where single segment
  ** b-tree leaf nodes contain more than one term.
  */
  if( pFilter->zTerm ){
    int nTerm = pFilter->nTerm;
    const char *zTerm = pFilter->zTerm;
    for(i=0; i<nSegment; i++){
      Fts3SegReader *pSeg = apSegment[i];
      while( fts3SegReaderTermCmp(pSeg, zTerm, nTerm)<0 ){

        rc = fts3SegReaderNext(p, pSeg);
        if( rc!=SQLITE_OK ) goto finished; }
    }

  }

  fts3SegReaderSort(apSegment, nSegment, nSegment, fts3SegReaderCmp);
  while( apSegment[0]->aNode ){
    int nTerm = apSegment[0]->nTerm;
    char *zTerm = apSegment[0]->zTerm;
    int nMerge = 1;







>
>
>
>
>
>
>
>
>
>
>
>







 







|







 







>
>
>


>
>
>
>
>







 







|
|
<


|







 







|







 







<







 







<







 







|


<
|
<
>
|
|
<
>







20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
...
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
...
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
...
936
937
938
939
940
941
942
943
944

945
946
947
948
949
950
951
952
953
954
....
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
....
1142
1143
1144
1145
1146
1147
1148

1149
1150
1151
1152
1153
1154
1155
....
1237
1238
1239
1240
1241
1242
1243

1244
1245
1246
1247
1248
1249
1250
....
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117

2118

2119
2120
2121

2122
2123
2124
2125
2126
2127
2128
2129
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)

#include "fts3Int.h"
#include <string.h>
#include <assert.h>
#include <stdlib.h>

/*
** When full-text index nodes are loaded from disk, the buffer that they
** are loaded into has the following number of bytes of padding at the end 
** of it. i.e. if a full-text index node is 900 bytes in size, then a buffer
** of 920 bytes is allocated for it.
**
** This means that if we have a pointer into a buffer containing node data,
** it is always safe to read up to two varints from it without risking an
** overread, even if the node data is corrupted.
*/
#define FTS3_NODE_PADDING (FTS3_VARINT_MAX*2)

typedef struct PendingList PendingList;
typedef struct SegmentNode SegmentNode;
typedef struct SegmentWriter SegmentWriter;

/*
** Data structure used while accumulating terms in the pending-terms hash
** table. The hash table entry maps from term (a string) to a malloc'd
................................................................................
       p->db, p->zDb, p->zSegmentsTbl, "block", iBlockid, 0, &p->pSegments
    );
  }

  if( rc==SQLITE_OK ){
    int nByte = sqlite3_blob_bytes(p->pSegments);
    if( paBlob ){
      char *aByte = sqlite3_malloc(nByte + FTS3_NODE_PADDING);
      if( !aByte ){
        rc = SQLITE_NOMEM;
      }else{
        rc = sqlite3_blob_read(p->pSegments, aByte, nByte, 0);
        if( rc!=SQLITE_OK ){
          sqlite3_free(aByte);
          aByte = 0;
................................................................................
    rc = sqlite3Fts3ReadBlock(
        p, ++pReader->iCurrentBlock, &pReader->aNode, &pReader->nNode
    );
    if( rc!=SQLITE_OK ) return rc;
    pNext = pReader->aNode;
  }
  
  /* Because of the FTS3_NODE_PADDING bytes of padding, the following is 
  ** safe (no risk of overread) even if the node data is corrupted.  
  */
  pNext += sqlite3Fts3GetVarint32(pNext, &nPrefix);
  pNext += sqlite3Fts3GetVarint32(pNext, &nSuffix);
  if( nPrefix<0 || nSuffix<=0 
   || &pNext[nSuffix]>&pReader->aNode[pReader->nNode] 
  ){
    return SQLITE_CORRUPT;
  }

  if( nPrefix+nSuffix>pReader->nTermAlloc ){
    int nNew = (nPrefix+nSuffix)*2;
    char *zNew = sqlite3_realloc(pReader->zTerm, nNew);
    if( !zNew ){
      return SQLITE_NOMEM;
    }
................................................................................
  pReader->nTerm = nPrefix+nSuffix;
  pNext += nSuffix;
  pNext += sqlite3Fts3GetVarint32(pNext, &pReader->nDoclist);
  pReader->aDoclist = pNext;
  pReader->pOffsetList = 0;

  /* Check that the doclist does not appear to extend past the end of the
  ** b-tree node. And that the final byte of the doclist is 0x00. If either 
  ** of these statements is untrue, then the data structure is corrupt.

  */
  if( &pReader->aDoclist[pReader->nDoclist]>&pReader->aNode[pReader->nNode] 
   || pReader->aDoclist[pReader->nDoclist-1]
  ){
    return SQLITE_CORRUPT;
  }
  return SQLITE_OK;
}

/*
................................................................................
){
  int rc = SQLITE_OK;             /* Return code */
  Fts3SegReader *pReader;         /* Newly allocated SegReader object */
  int nExtra = 0;                 /* Bytes to allocate segment root node */

  assert( iStartLeaf<=iEndLeaf );
  if( iStartLeaf==0 ){
    nExtra = nRoot + FTS3_NODE_PADDING;
  }

  pReader = (Fts3SegReader *)sqlite3_malloc(sizeof(Fts3SegReader) + nExtra);
  if( !pReader ){
    return SQLITE_NOMEM;
  }
  memset(pReader, 0, sizeof(Fts3SegReader));
................................................................................
    /* The entire segment is stored in the root node. */
    pReader->aNode = (char *)&pReader[1];
    pReader->nNode = nRoot;
    memcpy(pReader->aNode, zRoot, nRoot);
  }else{
    pReader->iCurrentBlock = iStartLeaf-1;
  }


  if( rc==SQLITE_OK ){
    *ppReader = pReader;
  }else{
    sqlite3Fts3SegReaderFree(p, pReader);
  }
  return rc;
................................................................................
    if( !pReader ){
      rc = SQLITE_NOMEM;
    }else{
      memset(pReader, 0, nByte);
      pReader->iIdx = 0x7FFFFFFF;
      pReader->ppNextElem = (Fts3HashElem **)&pReader[1];
      memcpy(pReader->ppNextElem, aElem, nElem*sizeof(Fts3HashElem *));

    }
  }

  if( isPrefix ){
    sqlite3_free(aElem);
  }
  *ppReader = pReader;
................................................................................

  /* If the Fts3SegFilter defines a specific term (or term prefix) to search 
  ** for, then advance each segment iterator until it points to a term of
  ** equal or greater value than the specified term. This prevents many
  ** unnecessary merge/sort operations for the case where single segment
  ** b-tree leaf nodes contain more than one term.
  */
  for(i=0; i<nSegment; i++){
    int nTerm = pFilter->nTerm;
    const char *zTerm = pFilter->zTerm;

    Fts3SegReader *pSeg = apSegment[i];

    do {
      rc = fts3SegReaderNext(p, pSeg);
      if( rc!=SQLITE_OK ) goto finished;

    }while( zTerm && fts3SegReaderTermCmp(pSeg, zTerm, nTerm)<0 );
  }

  fts3SegReaderSort(apSegment, nSegment, nSegment, fts3SegReaderCmp);
  while( apSegment[0]->aNode ){
    int nTerm = apSegment[0]->nTerm;
    char *zTerm = apSegment[0]->zTerm;
    int nMerge = 1;

Changes to test/fts3corrupt.test.

14
15
16
17
18
19
20





21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36





37























38








39









40
41




42
43
source $testdir/tester.tcl

# If SQLITE_ENABLE_FTS3 is not defined, omit this file.
ifcapable !fts3 { finish_test ; return }

set ::testprefix fts3corrupt






do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE t1 USING fts3;
  INSERT INTO t1 VALUES('hello');
} {}

do_test fts3corrupt-1.1 {
  set blob [db one {SELECT root from t1_segdir}]
  set blob [binary format a7ca* $blob 24 [string range $blob 8 end]]
  execsql { UPDATE t1_segdir SET root = $blob }
} {}

do_test fts3corrupt-1.2 {
  foreach w {a b c d e f g h i j k l m n o} {
    execsql { INSERT INTO t1 VALUES($w) }
  }
} {}





























do_catchsql_test 1.3 {








  INSERT INTO t1 VALUES('world');









} {1 {database disk image is malformed}}





finish_test








>
>
>
>
>




<





<





>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>


>
>
>
>


14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29

30
31
32
33
34

35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
source $testdir/tester.tcl

# If SQLITE_ENABLE_FTS3 is not defined, omit this file.
ifcapable !fts3 { finish_test ; return }

set ::testprefix fts3corrupt


# Test that a doclist with a length field that indicates that the doclist
# extends past the end of the node on which it resides is correctly identified
# as database corruption.
#
do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE t1 USING fts3;
  INSERT INTO t1 VALUES('hello');
} {}

do_test fts3corrupt-1.1 {
  set blob [db one {SELECT root from t1_segdir}]
  set blob [binary format a7ca* $blob 24 [string range $blob 8 end]]
  execsql { UPDATE t1_segdir SET root = $blob }
} {}

do_test fts3corrupt-1.2 {
  foreach w {a b c d e f g h i j k l m n o} {
    execsql { INSERT INTO t1 VALUES($w) }
  }
} {}
do_catchsql_test 1.3 {
  INSERT INTO t1 VALUES('world');
} {1 {database disk image is malformed}}
do_execsql_test 1.4 { 
  DROP TABLE t1;
} 

# This block of tests checks that corruption is correctly detected if the
# length field of a term on a leaf node indicates that the term extends past
# the end of the node on which it resides. There are two cases:
#
#   1. The first term on the node.
#   2. The second or subsequent term on the node (prefix compressed term).
#
do_execsql_test 2.0 {
  CREATE VIRTUAL TABLE t1 USING fts3;
  BEGIN;
    INSERT INTO t1 VALUES('hello');
    INSERT INTO t1 VALUES('hello');
    INSERT INTO t1 VALUES('hello');
    INSERT INTO t1 VALUES('hello');
    INSERT INTO t1 VALUES('hello');
  COMMIT;
} {}
do_test fts3corrupt-2.1 {
  set blob [db one {SELECT root from t1_segdir}]
  set blob [binary format a*a* "\x00\x7F" [string range $blob 2 end]]
  execsql { UPDATE t1_segdir SET root = $blob }
} {}
do_catchsql_test 2.2 {
  SELECT rowid FROM t1 WHERE t1 MATCH 'hello'
} {1 {database disk image is malformed}}

do_execsql_test 3.0 {
  DROP TABLE t1;
  CREATE VIRTUAL TABLE t1 USING fts3;
  BEGIN;
    INSERT INTO t1 VALUES('hello');
    INSERT INTO t1 VALUES('world');
  COMMIT;
} {}
do_test fts3corrupt-3.1 {
  set blob [db one {SELECT quote(root) from t1_segdir}]
  set blob [binary format a11a*a* $blob "\x7F" [string range $blob 12 end]]
  execsql { UPDATE t1_segdir SET root = $blob }
} {}
do_catchsql_test 3.2 {
  SELECT rowid FROM t1 WHERE t1 MATCH 'world'
} {1 {database disk image is malformed}}





finish_test