SQLite

Check-in [23dc6fb5b2]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix some problems with FTS3 and 3-way NEAR queries.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 23dc6fb5b28712d1ba18dc7ddb3f2ef3b469d611
User & Date: dan 2009-12-05 11:37:19.000
Context
2009-12-05
14:29
Fix another bug in 3-way NEAR queries. (check-in: 3bb13a0652 user: dan tags: trunk)
11:37
Fix some problems with FTS3 and 3-way NEAR queries. (check-in: 23dc6fb5b2 user: dan tags: trunk)
2009-12-04
23:10
Add the SQLITE_4_BYTE_ALIGNED_MALLOC compile-time option which tells some assert() statements that the underlying system only requires 4-byte alignment of 8-byte data objects like double or int64 and that system malloc() only guarantees 4-byte alignment of returned pointers. (check-in: 08faee686e user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts3/fts3.c.
1059
1060
1061
1062
1063
1064
1065
















































1066
1067
1068
1069
1070
1071
1072
    char *p = *pp;
    memcpy(p, *ppPoslist, n);
    p += n;
    *pp = p;
  }
  *ppPoslist = pEnd;
}

















































/*
**
*/
static void fts3PoslistMerge(
  char **pp,                      /* Output buffer */
  char **pp1,                     /* Left input list */







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
    char *p = *pp;
    memcpy(p, *ppPoslist, n);
    p += n;
    *pp = p;
  }
  *ppPoslist = pEnd;
}

/*
** Value used to signify the end of an offset-list. This is safe because
** it is not possible to have a document with 2^31 terms.
*/
#define OFFSET_LIST_END 0x7fffffff

/*
** This function is used to help parse offset-lists. When this function is
** called, *pp may point to the start of the next varint in the offset-list
** being parsed, or it may point to 1 byte past the end of the offset-list
** (in which case **pp will be 0x00 or 0x01).
**
** If *pp points past the end of the current offset list, set *pi to 
** OFFSET_LIST_END and return. Otherwise, read the next varint from *pp,
** increment the current value of *pi by the value read, and set *pp to
** point to the next value before returning.
*/
static void fts3ReadNextPos(
  char **pp,                      /* IN/OUT: Pointer into offset-list buffer */
  sqlite3_int64 *pi               /* IN/OUT: Value read from offset-list */
){
  if( **pp&0xFE ){
    fts3GetDeltaVarint(pp, pi);
    *pi -= 2;
  }else{
    *pi = OFFSET_LIST_END;
  }
}

/*
** If parameter iCol is not 0, write an 0x01 byte followed by the value of
** iCol encoded as a varint to *pp. 
**
** Set *pp to point to the byte just after the last byte written before 
** returning (do not modify it if iCol==0). Return the total number of bytes
** written (0 if iCol==0).
*/
static int fts3PutColNumber(char **pp, int iCol){
  int n = 0;                      /* Number of bytes written */
  if( iCol ){
    char *p = *pp;                /* Output pointer */
    n = 1 + sqlite3Fts3PutVarint(&p[1], iCol);
    *p = 0x01;
    *pp = &p[n];
  }
  return n;
}

/*
**
*/
static void fts3PoslistMerge(
  char **pp,                      /* Output buffer */
  char **pp1,                     /* Left input list */
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096




1097
1098


1099
1100






1101
1102
1103
1104
1105
1106
1107
1108
1109

1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
    if( *p1==0x01 ) sqlite3Fts3GetVarint32(&p1[1], &iCol1);
    if( *p2==0x01 ) sqlite3Fts3GetVarint32(&p2[1], &iCol2);

    if( iCol1==iCol2 ){
      sqlite3_int64 i1 = 0;
      sqlite3_int64 i2 = 0;
      sqlite3_int64 iPrev = 0;
      if( iCol1!=0 ){
        int n;
        *p++ = 0x01;
        n = sqlite3Fts3PutVarint(p, iCol1);
        p += n;
        p1 += 1 + n;
        p2 += 1 + n;
      }




      while( (*p1&0xFE) || (*p2&0xFE) ){
        if( i1==i2 ){


          fts3GetDeltaVarint(&p1, &i1); i1 -= 2;
          fts3GetDeltaVarint(&p2, &i2); i2 -= 2;






        }else if( i1<i2 ){
          fts3GetDeltaVarint(&p1, &i1); i1 -= 2;
        }else{
          fts3GetDeltaVarint(&p2, &i2); i2 -= 2;
        }
        fts3PutDeltaVarint(&p, &iPrev, (i1<i2 ? i1 : i2) + 2); iPrev -= 2;
        if( 0==(*p1&0xFE) ) i1 = 0x7FFFFFFF;
        if( 0==(*p2&0xFE) ) i2 = 0x7FFFFFFF;
      }

    }else if( iCol1<iCol2 ){
      if( iCol1 ){
        int n = sqlite3Fts3PutVarint(&p[1], iCol1);
        *p = 0x01;
        p += n+1;
        p1 += n+1;
      }
      fts3ColumnlistCopy(&p, &p1);
    }else{
      if( iCol2 ){
        int n = sqlite3Fts3PutVarint(&p[1], iCol2);
        *p = 0x01;
        p += n+1;
        p2 += n+1;
      }
      fts3ColumnlistCopy(&p, &p2);
    }
  }

  *p++ = '\0';
  *pp = p;
  *pp1 = p1 + 1;







<
<
<
|
|
<
|
|
>
>
>
>
|
|
>
>
|
|
>
>
>
>
>
>

|

|

<
<
<
<
>

<
<
<
<
|
<


<
<
<
<
|
<







1130
1131
1132
1133
1134
1135
1136



1137
1138

1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161




1162
1163




1164

1165
1166




1167

1168
1169
1170
1171
1172
1173
1174
    if( *p1==0x01 ) sqlite3Fts3GetVarint32(&p1[1], &iCol1);
    if( *p2==0x01 ) sqlite3Fts3GetVarint32(&p2[1], &iCol2);

    if( iCol1==iCol2 ){
      sqlite3_int64 i1 = 0;
      sqlite3_int64 i2 = 0;
      sqlite3_int64 iPrev = 0;



      int n = fts3PutColNumber(&p, iCol1);
      p1 += n;

      p2 += n;

      /* At this point, both p1 and p2 point to the start of offset-lists.
      ** An offset-list is a list of non-negative delta-encoded varints, each 
      ** incremented by 2 before being stored. Each list is terminated by a 0 
      ** or 1 value (0x00 or 0x01). The following block merges the two lists
      ** and writes the results to buffer p. p is left pointing to the byte
      ** after the list written. No terminator (0x00 or 0x01) is written to
      ** the output.
      */
      fts3GetDeltaVarint(&p1, &i1);
      fts3GetDeltaVarint(&p2, &i2);
      do {
        fts3PutDeltaVarint(&p, &iPrev, (i1<i2) ? i1 : i2); 
        iPrev -= 2;
        if( i1==i2 ){
          fts3ReadNextPos(&p1, &i1);
          fts3ReadNextPos(&p2, &i2);
        }else if( i1<i2 ){
          fts3ReadNextPos(&p1, &i1);
        }else{
          fts3ReadNextPos(&p2, &i2);
        }




      }while( i1!=OFFSET_LIST_END || i2!=OFFSET_LIST_END );
    }else if( iCol1<iCol2 ){




      p1 += fts3PutColNumber(&p, iCol1);

      fts3ColumnlistCopy(&p, &p1);
    }else{




      p2 += fts3PutColNumber(&p, iCol2);

      fts3ColumnlistCopy(&p, &p2);
    }
  }

  *p++ = '\0';
  *pp = p;
  *pp1 = p1 + 1;
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197

1198
1199
1200
1201
1202
1203
1204
            *pp2 = p2;
            return 1;
          }
          iSave = isSaveLeft ? iPos1 : iPos2;
          fts3PutDeltaVarint(&p, &iPrev, iSave+2); iPrev -= 2;
          pSave = 0;
        }
        if( iPos2<=iPos1 ){
          if( (*p2&0xFE)==0 ) break;
          fts3GetDeltaVarint(&p2, &iPos2); iPos2 -= 2;
        }else{
          if( (*p1&0xFE)==0 ) break;
          fts3GetDeltaVarint(&p1, &iPos1); iPos1 -= 2;
        }

      }
      if( pSave && pp ){
        p = pSave;
      }

      fts3ColumnlistCopy(0, &p1);
      fts3ColumnlistCopy(0, &p2);







|






>







1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
            *pp2 = p2;
            return 1;
          }
          iSave = isSaveLeft ? iPos1 : iPos2;
          fts3PutDeltaVarint(&p, &iPrev, iSave+2); iPrev -= 2;
          pSave = 0;
        }
        if( (!isSaveLeft && iPos2<=(iPos1+nToken)) || iPos2<=iPos1 ){
          if( (*p2&0xFE)==0 ) break;
          fts3GetDeltaVarint(&p2, &iPos2); iPos2 -= 2;
        }else{
          if( (*p1&0xFE)==0 ) break;
          fts3GetDeltaVarint(&p1, &iPos1); iPos1 -= 2;
        }

      }
      if( pSave && pp ){
        p = pSave;
      }

      fts3ColumnlistCopy(0, &p1);
      fts3ColumnlistCopy(0, &p2);
1237
1238
1239
1240
1241
1242
1243



1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
    return 0;
  }
  *p++ = 0x00;
  *pp = p;
  return 1;
}




static int fts3PoslistNearMerge(
  char **pp,                      /* Output buffer */
  char *aTmp,                     /* Temporary buffer space */
  int nRight,                     /* Maximum difference in token positions */
  int nLeft,                      /* Maximum difference in token positions */
  char **pp1,                     /* Left input list */
  char **pp2                      /* Right input list */
){
  char *p1 = *pp1;
  char *p2 = *pp2;

  if( !pp ){
    if( fts3PoslistPhraseMerge(0, nRight, 0, pp1, pp2) ) return 1;
    *pp1 = p1;







>
>
>





|
|







1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
    return 0;
  }
  *p++ = 0x00;
  *pp = p;
  return 1;
}

/*
** Merge two position-lists as required by the NEAR operator.
*/
static int fts3PoslistNearMerge(
  char **pp,                      /* Output buffer */
  char *aTmp,                     /* Temporary buffer space */
  int nRight,                     /* Maximum difference in token positions */
  int nLeft,                      /* Maximum difference in token positions */
  char **pp1,                     /* IN/OUT: Left input list */
  char **pp2                      /* IN/OUT: Right input list */
){
  char *p1 = *pp1;
  char *p2 = *pp2;

  if( !pp ){
    if( fts3PoslistPhraseMerge(0, nRight, 0, pp1, pp2) ) return 1;
    *pp1 = p1;
Changes to test/fts3.test.
39
40
41
42
43
44
45

46
47
48
49
50
51
52
rename finish_test really_finish_test
proc finish_test {} {}
set ISQUICK 1

set EXCLUDE {
  fts3.test
  fts3malloc.test

}

# Files to include in the test.  If this list is empty then everything
# that is not in the EXCLUDE list is run.
#
set INCLUDE {
}







>







39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
rename finish_test really_finish_test
proc finish_test {} {}
set ISQUICK 1

set EXCLUDE {
  fts3.test
  fts3malloc.test
  fts3rnd.test
}

# Files to include in the test.  If this list is empty then everything
# that is not in the EXCLUDE list is run.
#
set INCLUDE {
}