SQLite

Check-in [f2f0184e9e]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Improve performance of prefix queries without a prefix index on fts5 tables.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: f2f0184e9e1c9f121ee2ac864cd28c8cd8efecb5
User & Date: dan 2015-10-05 19:41:16.348
Context
2015-10-06
01:44
Adjustments to sqlite3MemoryBarrier() when compiling with MSVC and/or WinCE. (check-in: 3168326ebf user: mistachkin tags: trunk)
2015-10-05
19:41
Improve performance of prefix queries without a prefix index on fts5 tables. (check-in: f2f0184e9e user: dan tags: trunk)
15:39
Update fts3 so that expressions to the left and right of a NOT operator are balanced. This prevents relatively small expressions (a dozen terms or so) that are children of NOT operators from triggering the "expression tree is too large" error. (check-in: d6b66cd7b8 user: dan tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts5/fts5_buffer.c.
229
230
231
232
233
234
235

236
237
238
239
240
241
242

243
244
245
246
247
248
249
int sqlite3Fts5PoslistWriterAppend(
  Fts5Buffer *pBuf, 
  Fts5PoslistWriter *pWriter,
  i64 iPos
){
  static const i64 colmask = ((i64)(0x7FFFFFFF)) << 32;
  int rc = SQLITE_OK;

  if( (iPos & colmask) != (pWriter->iPrev & colmask) ){
    fts5BufferAppendVarint(&rc, pBuf, 1);
    fts5BufferAppendVarint(&rc, pBuf, (iPos >> 32));
    pWriter->iPrev = (iPos & colmask);
  }
  fts5BufferAppendVarint(&rc, pBuf, (iPos - pWriter->iPrev) + 2);
  pWriter->iPrev = iPos;

  return rc;
}

void *sqlite3Fts5MallocZero(int *pRc, int nByte){
  void *pRet = 0;
  if( *pRc==SQLITE_OK ){
    pRet = sqlite3_malloc(nByte);







>
|
|
|
|
|
|
|
>







229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
int sqlite3Fts5PoslistWriterAppend(
  Fts5Buffer *pBuf, 
  Fts5PoslistWriter *pWriter,
  i64 iPos
){
  static const i64 colmask = ((i64)(0x7FFFFFFF)) << 32;
  int rc = SQLITE_OK;
  if( 0==sqlite3Fts5BufferGrow(&rc, pBuf, 5+5+5) ){
    if( (iPos & colmask) != (pWriter->iPrev & colmask) ){
      pBuf->p[pBuf->n++] = 1;
      pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], (iPos>>32));
      pWriter->iPrev = (iPos & colmask);
    }
    pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], (iPos-pWriter->iPrev)+2);
    pWriter->iPrev = iPos;
  }
  return rc;
}

void *sqlite3Fts5MallocZero(int *pRc, int nByte){
  void *pRet = 0;
  if( *pRc==SQLITE_OK ){
    pRet = sqlite3_malloc(nByte);
Changes to ext/fts5/fts5_index.c.
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
  sqlite3_stmt *pIdxWriter;       /* "INSERT ... %_idx VALUES(?,?,?,?)" */
  sqlite3_stmt *pIdxDeleter;      /* "DELETE FROM %_idx WHERE segid=? */
  sqlite3_stmt *pIdxSelect;
  int nRead;                      /* Total number of blocks read */
};

struct Fts5DoclistIter {
  u8 *a;
  int n;
  int i;

  /* Output variables. aPoslist==0 at EOF */
  i64 iRowid;
  u8 *aPoslist;
  int nPoslist;
};








|
<
<







303
304
305
306
307
308
309
310


311
312
313
314
315
316
317
  sqlite3_stmt *pIdxWriter;       /* "INSERT ... %_idx VALUES(?,?,?,?)" */
  sqlite3_stmt *pIdxDeleter;      /* "DELETE FROM %_idx WHERE segid=? */
  sqlite3_stmt *pIdxSelect;
  int nRead;                      /* Total number of blocks read */
};

struct Fts5DoclistIter {
  u8 *aEof;                       /* Pointer to 1 byte past end of doclist */



  /* Output variables. aPoslist==0 at EOF */
  i64 iRowid;
  u8 *aPoslist;
  int nPoslist;
};

3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712





3713
3714
3715
3716
3717
3718
3719
      if( (ret + i) > nMax ) break;
      ret += i;
    }
  }
  return ret;
}

#define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob) { \
  assert( pBuf->nSpace>=(pBuf->n+nBlob) );             \
  memcpy(&pBuf->p[pBuf->n], pBlob, nBlob);             \
  pBuf->n += nBlob;                                    \





}

/*
** Flush the contents of in-memory hash table iHash to a new level-0 
** segment on disk. Also update the corresponding structure record.
**
** If an error occurs, set the Fts5Index.rc error code. If an error has 







|
|
|
|
>
>
>
>
>







3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
      if( (ret + i) > nMax ) break;
      ret += i;
    }
  }
  return ret;
}

#define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob) {     \
  assert( (pBuf)->nSpace>=((pBuf)->n+nBlob) );             \
  memcpy(&(pBuf)->p[(pBuf)->n], pBlob, nBlob);             \
  (pBuf)->n += nBlob;                                      \
}

#define fts5BufferSafeAppendVarint(pBuf, iVal) {                \
  (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf)->n], (iVal));  \
  assert( (pBuf)->nSpace>=(pBuf)->n );                          \
}

/*
** Flush the contents of in-memory hash table iHash to a new level-0 
** segment on disk. Also update the corresponding structure record.
**
** If an error occurs, set the Fts5Index.rc error code. If an error has 
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994



3995

3996
3997
3998
3999
4000
4001

4002
4003
4004
4005
4006




4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020

4021
4022



4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034

4035




4036
4037
4038
4039
4040
4041
4042
4043
      fts5BufferAppendVarint(&p->rc, pBuf, pSeg->nPos*2);
    }
    fts5SegiterPoslist(p, pSeg, pBuf);
  }
}

static void fts5DoclistIterNext(Fts5DoclistIter *pIter){
  if( pIter->i<pIter->n ){
    int bDummy;
    if( pIter->i ){



      i64 iDelta;

      pIter->i += fts5GetVarint(&pIter->a[pIter->i], (u64*)&iDelta);
      pIter->iRowid += iDelta;
    }else{
      pIter->i += fts5GetVarint(&pIter->a[pIter->i], (u64*)&pIter->iRowid);
    }
    pIter->i += fts5GetPoslistSize(

        &pIter->a[pIter->i], &pIter->nPoslist, &bDummy
    );
    pIter->aPoslist = &pIter->a[pIter->i];
    pIter->i += pIter->nPoslist;
  }else{




    pIter->aPoslist = 0;
  }
}

static void fts5DoclistIterInit(
  Fts5Buffer *pBuf, 
  Fts5DoclistIter *pIter
){
  memset(pIter, 0, sizeof(*pIter));
  pIter->a = pBuf->p;
  pIter->n = pBuf->n;
  fts5DoclistIterNext(pIter);
}


/*
** Append a doclist to buffer pBuf.



*/
static void fts5MergeAppendDocid(
  int *pRc,                       /* IN/OUT: Error code */
  Fts5Buffer *pBuf,               /* Buffer to write to */
  i64 *piLastRowid,               /* IN/OUT: Previous rowid written (if any) */
  i64 iRowid                      /* Rowid to append */
){
  if( pBuf->n==0 ){
    fts5BufferAppendVarint(pRc, pBuf, iRowid);
  }else{
    fts5BufferAppendVarint(pRc, pBuf, iRowid - *piLastRowid);
  }

  *piLastRowid = iRowid;




}

/*
** Buffers p1 and p2 contain doclists. This function merges the content
** of the two doclists together and sets buffer p1 to the result before
** returning.
**
** If an error occurs, an error code is left in p->rc. If an error has







|
|
|
>
>
>
|
>
|
|
<
<
|
|
>
|
|
<
|
|
>
>
>
>
|








|
|



>


>
>
>


<




|
|
<
|
|
>
|
>
>
>
>








3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004


4005
4006
4007
4008
4009

4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037

4038
4039
4040
4041
4042
4043

4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
      fts5BufferAppendVarint(&p->rc, pBuf, pSeg->nPos*2);
    }
    fts5SegiterPoslist(p, pSeg, pBuf);
  }
}

static void fts5DoclistIterNext(Fts5DoclistIter *pIter){
  u8 *p = pIter->aPoslist + pIter->nPoslist;

  assert( pIter->aPoslist );
  if( p>=pIter->aEof ){
    pIter->aPoslist = 0;
  }else{
    i64 iDelta;

    p += fts5GetVarint(p, (u64*)&iDelta);
    pIter->iRowid += iDelta;



    /* Read position list size */
    if( p[0] & 0x80 ){
      int nPos;
      p += fts5GetVarint32(p, nPos);

      pIter->nPoslist = (nPos>>1);
    }else{
      pIter->nPoslist = ((int)(p[0])) >> 1;
      p++;
    }

    pIter->aPoslist = p;
  }
}

static void fts5DoclistIterInit(
  Fts5Buffer *pBuf, 
  Fts5DoclistIter *pIter
){
  memset(pIter, 0, sizeof(*pIter));
  pIter->aPoslist = pBuf->p;
  pIter->aEof = &pBuf->p[pBuf->n];
  fts5DoclistIterNext(pIter);
}

#if 0
/*
** Append a doclist to buffer pBuf.
**
** This function assumes that space within the buffer has already been
** allocated.
*/
static void fts5MergeAppendDocid(

  Fts5Buffer *pBuf,               /* Buffer to write to */
  i64 *piLastRowid,               /* IN/OUT: Previous rowid written (if any) */
  i64 iRowid                      /* Rowid to append */
){
  assert( pBuf->n!=0 || (*piLastRowid)==0 );
  fts5BufferSafeAppendVarint(pBuf, iRowid - *piLastRowid);

  *piLastRowid = iRowid;
}
#endif

#define fts5MergeAppendDocid(pBuf, iLastRowid, iRowid) {       \
  assert( (pBuf)->n!=0 || (iLastRowid)==0 );                   \
  fts5BufferSafeAppendVarint((pBuf), (iRowid) - (iLastRowid)); \
  (iLastRowid) = (iRowid);                                     \
}

/*
** Buffers p1 and p2 contain doclists. This function merges the content
** of the two doclists together and sets buffer p1 to the result before
** returning.
**
** If an error occurs, an error code is left in p->rc. If an error has
4053
4054
4055
4056
4057
4058
4059

4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080



4081
4082
4083
4084
4085
4086
4087
4088

4089
4090

4091
4092
4093
4094
4095
4096
4097
4098

4099

4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
    Fts5DoclistIter i1;
    Fts5DoclistIter i2;
    Fts5Buffer out;
    Fts5Buffer tmp;
    memset(&out, 0, sizeof(out));
    memset(&tmp, 0, sizeof(tmp));


    fts5DoclistIterInit(p1, &i1);
    fts5DoclistIterInit(p2, &i2);
    while( p->rc==SQLITE_OK && (i1.aPoslist!=0 || i2.aPoslist!=0) ){
      if( i2.aPoslist==0 || (i1.aPoslist && i1.iRowid<i2.iRowid) ){
        /* Copy entry from i1 */
        fts5MergeAppendDocid(&p->rc, &out, &iLastRowid, i1.iRowid);
        /* WRITEPOSLISTSIZE */
        fts5BufferAppendVarint(&p->rc, &out, i1.nPoslist * 2);
        fts5BufferAppendBlob(&p->rc, &out, i1.nPoslist, i1.aPoslist);
        fts5DoclistIterNext(&i1);
      }
      else if( i1.aPoslist==0 || i2.iRowid!=i1.iRowid ){
        /* Copy entry from i2 */
        fts5MergeAppendDocid(&p->rc, &out, &iLastRowid, i2.iRowid);
        /* WRITEPOSLISTSIZE */
        fts5BufferAppendVarint(&p->rc, &out, i2.nPoslist * 2);
        fts5BufferAppendBlob(&p->rc, &out, i2.nPoslist, i2.aPoslist);
        fts5DoclistIterNext(&i2);
      }
      else{
        Fts5PoslistReader r1;



        Fts5PoslistReader r2;
        Fts5PoslistWriter writer;

        memset(&writer, 0, sizeof(writer));

        /* Merge the two position lists. */ 
        fts5MergeAppendDocid(&p->rc, &out, &iLastRowid, i2.iRowid);
        fts5BufferZero(&tmp);

        sqlite3Fts5PoslistReaderInit(-1, i1.aPoslist, i1.nPoslist, &r1);
        sqlite3Fts5PoslistReaderInit(-1, i2.aPoslist, i2.nPoslist, &r2);

        while( p->rc==SQLITE_OK && (r1.bEof==0 || r2.bEof==0) ){
          i64 iNew;
          if( r2.bEof || (r1.bEof==0 && r1.iPos<r2.iPos) ){
            iNew = r1.iPos;
            sqlite3Fts5PoslistReaderNext(&r1);
          }else{
            iNew = r2.iPos;
            sqlite3Fts5PoslistReaderNext(&r2);

            if( r1.iPos==r2.iPos ) sqlite3Fts5PoslistReaderNext(&r1);

          }
          p->rc = sqlite3Fts5PoslistWriterAppend(&tmp, &writer, iNew);
        }

        /* WRITEPOSLISTSIZE */
        fts5BufferAppendVarint(&p->rc, &out, tmp.n * 2);
        fts5BufferAppendBlob(&p->rc, &out, tmp.n, tmp.p);
        fts5DoclistIterNext(&i1);
        fts5DoclistIterNext(&i2);
      }
    }

    fts5BufferSet(&p->rc, p1, out.n, out.p);
    fts5BufferFree(&tmp);







>





|

|
|




|

|
|



|
>
>
>
|

<



|

>
|
|
>
|

|
|
|

|
|
>
|
>





|
|







4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102

4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
    Fts5DoclistIter i1;
    Fts5DoclistIter i2;
    Fts5Buffer out;
    Fts5Buffer tmp;
    memset(&out, 0, sizeof(out));
    memset(&tmp, 0, sizeof(tmp));

    sqlite3Fts5BufferGrow(&p->rc, &out, p1->n + p2->n);
    fts5DoclistIterInit(p1, &i1);
    fts5DoclistIterInit(p2, &i2);
    while( p->rc==SQLITE_OK && (i1.aPoslist!=0 || i2.aPoslist!=0) ){
      if( i2.aPoslist==0 || (i1.aPoslist && i1.iRowid<i2.iRowid) ){
        /* Copy entry from i1 */
        fts5MergeAppendDocid(&out, iLastRowid, i1.iRowid);
        /* WRITEPOSLISTSIZE */
        fts5BufferSafeAppendVarint(&out, i1.nPoslist * 2);
        fts5BufferSafeAppendBlob(&out, i1.aPoslist, i1.nPoslist);
        fts5DoclistIterNext(&i1);
      }
      else if( i1.aPoslist==0 || i2.iRowid!=i1.iRowid ){
        /* Copy entry from i2 */
        fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid);
        /* WRITEPOSLISTSIZE */
        fts5BufferSafeAppendVarint(&out, i2.nPoslist * 2);
        fts5BufferSafeAppendBlob(&out, i2.aPoslist, i2.nPoslist);
        fts5DoclistIterNext(&i2);
      }
      else{
        i64 iPos1 = 0;
        i64 iPos2 = 0;
        int iOff1 = 0;
        int iOff2 = 0;

        Fts5PoslistWriter writer;

        memset(&writer, 0, sizeof(writer));

        /* Merge the two position lists. */ 
        fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid);
        fts5BufferZero(&tmp);

        sqlite3Fts5PoslistNext64(i1.aPoslist, i1.nPoslist, &iOff1, &iPos1);
        sqlite3Fts5PoslistNext64(i2.aPoslist, i2.nPoslist, &iOff2, &iPos2);

        while( p->rc==SQLITE_OK && (iPos1>=0 || iPos2>=0) ){
          i64 iNew;
          if( iPos2<0 || (iPos1>=0 && iPos1<iPos2) ){
            iNew = iPos1;
            sqlite3Fts5PoslistNext64(i1.aPoslist, i1.nPoslist, &iOff1, &iPos1);
          }else{
            iNew = iPos2;
            sqlite3Fts5PoslistNext64(i2.aPoslist, i2.nPoslist, &iOff2, &iPos2);
            if( iPos1==iPos2 ){
              sqlite3Fts5PoslistNext64(i1.aPoslist, i1.nPoslist, &iOff1,&iPos1);
            }
          }
          p->rc = sqlite3Fts5PoslistWriterAppend(&tmp, &writer, iNew);
        }

        /* WRITEPOSLISTSIZE */
        fts5BufferSafeAppendVarint(&out, tmp.n * 2);
        fts5BufferSafeAppendBlob(&out, tmp.p, tmp.n);
        fts5DoclistIterNext(&i1);
        fts5DoclistIterNext(&i2);
      }
    }

    fts5BufferSet(&p->rc, p1, out.n, out.p);
    fts5BufferFree(&tmp);
4161
4162
4163
4164
4165
4166
4167

4168
4169

4170
4171

4172
4173
4174

4175

4176
4177
4178
4179
4180
4181
4182
            fts5BufferSwap(&doclist, &aBuf[i]);
            fts5BufferZero(&doclist);
          }else{
            fts5MergePrefixLists(p, &doclist, &aBuf[i]);
            fts5BufferZero(&aBuf[i]);
          }
        }

      }


      fts5MergeAppendDocid(&p->rc, &doclist, &iLastRowid, iRowid);
      fts5MultiIterPoslist(p, p1, 1, &doclist);

    }

    for(i=0; i<nBuf; i++){

      fts5MergePrefixLists(p, &doclist, &aBuf[i]);

      fts5BufferFree(&aBuf[i]);
    }
    fts5MultiIterFree(p, p1);

    pData = fts5IdxMalloc(p, sizeof(Fts5Data) + doclist.n);
    if( pData ){
      pData->p = (u8*)&pData[1];







>


>
|
|
>



>
|
>







4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
            fts5BufferSwap(&doclist, &aBuf[i]);
            fts5BufferZero(&doclist);
          }else{
            fts5MergePrefixLists(p, &doclist, &aBuf[i]);
            fts5BufferZero(&aBuf[i]);
          }
        }
        iLastRowid = 0;
      }

      if( 0==sqlite3Fts5BufferGrow(&p->rc, &doclist, 9) ){
        fts5MergeAppendDocid(&doclist, iLastRowid, iRowid);
        fts5MultiIterPoslist(p, p1, 1, &doclist);
      }
    }

    for(i=0; i<nBuf; i++){
      if( p->rc==SQLITE_OK ){
        fts5MergePrefixLists(p, &doclist, &aBuf[i]);
      }
      fts5BufferFree(&aBuf[i]);
    }
    fts5MultiIterFree(p, p1);

    pData = fts5IdxMalloc(p, sizeof(Fts5Data) + doclist.n);
    if( pData ){
      pData->p = (u8*)&pData[1];