SQLite

Check-in [e748651c94]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add tests for fts5.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | fts5
Files: files | file ages | folders
SHA1: e748651c940eae2389fe826cf5c25f1166a5e611
User & Date: dan 2015-04-25 18:56:48.351
Context
2015-04-25
20:29
Improve coverage of fts5_index.c slightly. (check-in: e5aaa01306 user: dan tags: fts5)
18:56
Add tests for fts5. (check-in: e748651c94 user: dan tags: fts5)
2015-04-24
20:18
Merge latest trunk changes with this branch. (check-in: 1c78d8920f user: dan tags: fts5)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts5/fts5_index.c.
252
253
254
255
256
257
258





259
260
261
262
263
264
265
/*
** The height of segment b-trees is actually limited to one less than 
** (1<<HEIGHT_BITS). This is because the rowid address space for nodes
** with such a height is used by doclist indexes.
*/
#define FTS5_SEGMENT_MAX_HEIGHT ((1 << FTS5_DATA_HEIGHT_B)-1)






/*
** The rowid for the doclist index associated with leaf page pgno of segment
** segid in index idx.
*/
#define FTS5_DOCLIST_IDX_ROWID(idx, segid, pgno) \
        FTS5_SEGMENT_ROWID(idx, segid, FTS5_SEGMENT_MAX_HEIGHT, pgno)








>
>
>
>
>







252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
/*
** The height of segment b-trees is actually limited to one less than 
** (1<<HEIGHT_BITS). This is because the rowid address space for nodes
** with such a height is used by doclist indexes.
*/
#define FTS5_SEGMENT_MAX_HEIGHT ((1 << FTS5_DATA_HEIGHT_B)-1)

/*
** Maximum segments permitted in a single index 
*/
#define FTS5_MAX_SEGMENT 2000

/*
** The rowid for the doclist index associated with leaf page pgno of segment
** segid in index idx.
*/
#define FTS5_DOCLIST_IDX_ROWID(idx, segid, pgno) \
        FTS5_SEGMENT_ROWID(idx, segid, FTS5_SEGMENT_MAX_HEIGHT, pgno)

361
362
363
364
365
366
367

368
369
370
371
372
373
374
struct Fts5StructureLevel {
  int nMerge;                     /* Number of segments in incr-merge */
  int nSeg;                       /* Total number of segments on level */
  Fts5StructureSegment *aSeg;     /* Array of segments. aSeg[0] is oldest. */
};
struct Fts5Structure {
  u64 nWriteCounter;              /* Total leaves written to level 0 */

  int nLevel;                     /* Number of levels in this index */
  Fts5StructureLevel aLevel[0];   /* Array of nLevel level objects */
};

/*
** An object of type Fts5SegWriter is used to write to segments.
*/







>







366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
struct Fts5StructureLevel {
  int nMerge;                     /* Number of segments in incr-merge */
  int nSeg;                       /* Total number of segments on level */
  Fts5StructureSegment *aSeg;     /* Array of segments. aSeg[0] is oldest. */
};
struct Fts5Structure {
  u64 nWriteCounter;              /* Total leaves written to level 0 */
  int nSegment;                   /* Total segments in this structure */
  int nLevel;                     /* Number of levels in this index */
  Fts5StructureLevel aLevel[0];   /* Array of nLevel level objects */
};

/*
** An object of type Fts5SegWriter is used to write to segments.
*/
720
721
722
723
724
725
726

727
728
729
730
731
732
733
734
735
736
737
738
739
740
741











742
743
744
745
746
747
748
  Fts5Buffer *pLeft,              /* Left hand side of comparison */
  const u8 *pRight, int nRight    /* Right hand side of comparison */
){
  int nCmp = MIN(pLeft->n, nRight);
  int res = memcmp(pLeft->p, pRight, nCmp);
  return (res==0 ? (pLeft->n - nRight) : res);
}


/*
** Compare the contents of the two buffers using memcmp(). If one buffer
** is a prefix of the other, it is considered the lesser.
**
** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
** +ve if pRight is smaller than pLeft. In other words:
**
**     res = *pLeft - *pRight
*/
static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){
  int nCmp = MIN(pLeft->n, pRight->n);
  int res = memcmp(pLeft->p, pRight->p, nCmp);
  return (res==0 ? (pLeft->n - pRight->n) : res);
}













/*
** Close the read-only blob handle, if it is open.
*/
static void fts5CloseReader(Fts5Index *p){
  if( p->pReader ){







>















>
>
>
>
>
>
>
>
>
>
>







726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
  Fts5Buffer *pLeft,              /* Left hand side of comparison */
  const u8 *pRight, int nRight    /* Right hand side of comparison */
){
  int nCmp = MIN(pLeft->n, nRight);
  int res = memcmp(pLeft->p, pRight, nCmp);
  return (res==0 ? (pLeft->n - nRight) : res);
}


/*
** Compare the contents of the two buffers using memcmp(). If one buffer
** is a prefix of the other, it is considered the lesser.
**
** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
** +ve if pRight is smaller than pLeft. In other words:
**
**     res = *pLeft - *pRight
*/
static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){
  int nCmp = MIN(pLeft->n, pRight->n);
  int res = memcmp(pLeft->p, pRight->p, nCmp);
  return (res==0 ? (pLeft->n - pRight->n) : res);
}

#ifdef SQLITE_DEBUG
static int fts5BlobCompare(
  const u8 *pLeft, int nLeft, 
  const u8 *pRight, int nRight
){
  int nCmp = MIN(nLeft, nRight);
  int res = memcmp(pLeft, pRight, nCmp);
  return (res==0 ? (nLeft - nRight) : res);
}
#endif


/*
** Close the read-only blob handle, if it is open.
*/
static void fts5CloseReader(Fts5Index *p){
  if( p->pReader ){
1038
1039
1040
1041
1042
1043
1044

1045
1046
1047
1048
1049
1050
1051
      sizeof(Fts5Structure) +                    /* Main structure */
      sizeof(Fts5StructureLevel) * (nLevel)      /* aLevel[] array */
  );
  pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte);

  if( pRet ){
    pRet->nLevel = nLevel;

    i += sqlite3GetVarint(&pData[i], &pRet->nWriteCounter);

    for(iLvl=0; rc==SQLITE_OK && iLvl<nLevel; iLvl++){
      Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl];
      int nTotal;
      int iSeg;








>







1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
      sizeof(Fts5Structure) +                    /* Main structure */
      sizeof(Fts5StructureLevel) * (nLevel)      /* aLevel[] array */
  );
  pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte);

  if( pRet ){
    pRet->nLevel = nLevel;
    pRet->nSegment = nSegment;
    i += sqlite3GetVarint(&pData[i], &pRet->nWriteCounter);

    for(iLvl=0; rc==SQLITE_OK && iLvl<nLevel; iLvl++){
      Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl];
      int nTotal;
      int iSeg;

1162
1163
1164
1165
1166
1167
1168
1169

1170

1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181

1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
    fts5StructureRelease(pRet);
    pRet = 0;
  }
  return pRet;
}

/*
** Return the total number of segments in index structure pStruct.

*/

static int fts5StructureCountSegments(Fts5Structure *pStruct){
  int nSegment = 0;               /* Total number of segments */
  if( pStruct ){
    int iLvl;                     /* Used to iterate through levels */
    for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
      nSegment += pStruct->aLevel[iLvl].nSeg;
    }
  }

  return nSegment;
}


/*
** Serialize and store the "structure" record for index iIdx.
**
** If an error occurs, leave an error code in the Fts5Index object. If an
** error has already occurred, this function is a no-op.
*/
static void fts5StructureWrite(Fts5Index *p, int iIdx, Fts5Structure *pStruct){
  if( p->rc==SQLITE_OK ){
    int nSegment;                 /* Total number of segments */
    Fts5Buffer buf;               /* Buffer to serialize record into */
    int iLvl;                     /* Used to iterate through levels */
    int iCookie;                  /* Cookie value to store */

    nSegment = fts5StructureCountSegments(pStruct);
    memset(&buf, 0, sizeof(Fts5Buffer));

    /* Append the current configuration cookie */
    iCookie = p->pConfig->iCookie;
    if( iCookie<0 ) iCookie = 0;
    fts5BufferAppend32(&p->rc, &buf, iCookie);

    fts5BufferAppendVarint(&p->rc, &buf, pStruct->nLevel);
    fts5BufferAppendVarint(&p->rc, &buf, nSegment);
    fts5BufferAppendVarint(&p->rc, &buf, (i64)pStruct->nWriteCounter);

    for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
      int iSeg;                     /* Used to iterate through segments */
      Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
      fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge);
      fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg);







|
>

>











>









<




|








|







1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212

1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
    fts5StructureRelease(pRet);
    pRet = 0;
  }
  return pRet;
}

/*
** Return the total number of segments in index structure pStruct. This
** function is only ever used as part of assert() conditions.
*/
#ifdef SQLITE_DEBUG
static int fts5StructureCountSegments(Fts5Structure *pStruct){
  int nSegment = 0;               /* Total number of segments */
  if( pStruct ){
    int iLvl;                     /* Used to iterate through levels */
    for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
      nSegment += pStruct->aLevel[iLvl].nSeg;
    }
  }

  return nSegment;
}
#endif

/*
** Serialize and store the "structure" record for index iIdx.
**
** If an error occurs, leave an error code in the Fts5Index object. If an
** error has already occurred, this function is a no-op.
*/
static void fts5StructureWrite(Fts5Index *p, int iIdx, Fts5Structure *pStruct){
  if( p->rc==SQLITE_OK ){

    Fts5Buffer buf;               /* Buffer to serialize record into */
    int iLvl;                     /* Used to iterate through levels */
    int iCookie;                  /* Cookie value to store */

    assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
    memset(&buf, 0, sizeof(Fts5Buffer));

    /* Append the current configuration cookie */
    iCookie = p->pConfig->iCookie;
    if( iCookie<0 ) iCookie = 0;
    fts5BufferAppend32(&p->rc, &buf, iCookie);

    fts5BufferAppendVarint(&p->rc, &buf, pStruct->nLevel);
    fts5BufferAppendVarint(&p->rc, &buf, pStruct->nSegment);
    fts5BufferAppendVarint(&p->rc, &buf, (i64)pStruct->nWriteCounter);

    for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
      int iSeg;                     /* Used to iterate through segments */
      Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
      fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge);
      fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg);
1579
1580
1581
1582
1583
1584
1585

1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605




1606

1607
1608
1609
1610
1611
1612
1613
** position list size field. Read the varint and return the number of bytes
** read. Before returning, set *pnSz to the number of bytes in the position
** list, and *pbDel to true if the delete flag is set, or false otherwise.
*/
static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){
  int nSz;
  int n = fts5GetVarint32(p, nSz);

  *pnSz = nSz/2;
  *pbDel = nSz & 0x0001;
  return n;
}

/*
** Fts5SegIter.iLeafOffset currently points to the first byte of a
** position-list size field. Read the value of the field and store it
** in the following variables:
**
**   Fts5SegIter.nPos
**   Fts5SegIter.bDel
**
** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the 
** position list content (if any).
*/
static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){
  if( p->rc==SQLITE_OK ){
    const u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset];
    int iOff = pIter->iLeafOffset;  /* Offset to read at */




    pIter->iLeafOffset += fts5GetPoslistSize(a, &pIter->nPos, &pIter->bDel);

  }
}

/*
** Fts5SegIter.iLeafOffset currently points to the first byte of the 
** "nSuffix" field of a term. Function parameter nKeep contains the value
** of the "nPrefix" field (if there was one - it is passed 0 if this is







>


















<

>
>
>
>
|
>







1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625

1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
** position list size field. Read the varint and return the number of bytes
** read. Before returning, set *pnSz to the number of bytes in the position
** list, and *pbDel to true if the delete flag is set, or false otherwise.
*/
static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){
  int nSz;
  int n = fts5GetVarint32(p, nSz);
  assert_nc( nSz>=0 );
  *pnSz = nSz/2;
  *pbDel = nSz & 0x0001;
  return n;
}

/*
** Fts5SegIter.iLeafOffset currently points to the first byte of a
** position-list size field. Read the value of the field and store it
** in the following variables:
**
**   Fts5SegIter.nPos
**   Fts5SegIter.bDel
**
** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the 
** position list content (if any).
*/
static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){
  if( p->rc==SQLITE_OK ){

    int iOff = pIter->iLeafOffset;  /* Offset to read at */
    if( iOff>=pIter->pLeaf->n ){
      p->rc = FTS5_CORRUPT;
    }else{
      const u8 *a = &pIter->pLeaf->p[iOff];
      pIter->iLeafOffset += fts5GetPoslistSize(a, &pIter->nPos, &pIter->bDel);
    }
  }
}

/*
** Fts5SegIter.iLeafOffset currently points to the first byte of the 
** "nSuffix" field of a term. Function parameter nKeep contains the value
** of the "nPrefix" field (if there was one - it is passed 0 if this is
2024
2025
2026
2027
2028
2029
2030


2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049

  /* Check if the current doclist ends on this page. If it does, return
  ** early without loading the doclist-index (as it belongs to a different
  ** term. */
  if( pIter->iTermLeafPgno==pIter->iLeafPgno ){
    int iOff = pIter->iLeafOffset + pIter->nPos;
    while( iOff<pLeaf->n ){


      i64 iDelta;

      /* iOff is currently the offset of the start of position list data */
      iOff += getVarint(&pLeaf->p[iOff], (u64*)&iDelta);
      if( iDelta==0 ) return;

      if( iOff<pLeaf->n ){
        int bDummy;
        int nPos;
        iOff += fts5GetPoslistSize(&pLeaf->p[iOff], &nPos, &bDummy);
        iOff += nPos;
      }
    }
  }

  pIter->pDlidx = fts5DlidxIterInit(p, bRev, iIdx, iSeg, pIter->iTermLeafPgno);
}

/*







>
>





<
|
<
<
|
|
<







2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063

2064


2065
2066

2067
2068
2069
2070
2071
2072
2073

  /* Check if the current doclist ends on this page. If it does, return
  ** early without loading the doclist-index (as it belongs to a different
  ** term. */
  if( pIter->iTermLeafPgno==pIter->iLeafPgno ){
    int iOff = pIter->iLeafOffset + pIter->nPos;
    while( iOff<pLeaf->n ){
      int bDummy;
      int nPos;
      i64 iDelta;

      /* iOff is currently the offset of the start of position list data */
      iOff += getVarint(&pLeaf->p[iOff], (u64*)&iDelta);
      if( iDelta==0 ) return;

      assert_nc( iOff<pLeaf->n );


      iOff += fts5GetPoslistSize(&pLeaf->p[iOff], &nPos, &bDummy);
      iOff += nPos;

    }
  }

  pIter->pDlidx = fts5DlidxIterInit(p, bRev, iIdx, iSeg, pIter->iTermLeafPgno);
}

/*
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
*/
static void fts5SegIterGotoPage(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegIter *pIter,             /* Iterator to advance */
  int iLeafPgno
){
  assert( iLeafPgno>pIter->iLeafPgno );
  if( p->rc==SQLITE_OK ){
    pIter->iLeafPgno = iLeafPgno-1;
    fts5SegIterNextPage(p, pIter);
    assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno );
  }

  if( p->rc==SQLITE_OK ){
    int iOff;
    u8 *a = pIter->pLeaf->p;
    int n = pIter->pLeaf->n;

    iOff = fts5GetU16(&a[0]);







<
|
|
|
<







2366
2367
2368
2369
2370
2371
2372

2373
2374
2375

2376
2377
2378
2379
2380
2381
2382
*/
static void fts5SegIterGotoPage(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegIter *pIter,             /* Iterator to advance */
  int iLeafPgno
){
  assert( iLeafPgno>pIter->iLeafPgno );

  pIter->iLeafPgno = iLeafPgno-1;
  fts5SegIterNextPage(p, pIter);
  assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno );


  if( p->rc==SQLITE_OK ){
    int iOff;
    u8 *a = pIter->pLeaf->p;
    int n = pIter->pLeaf->n;

    iOff = fts5GetU16(&a[0]);
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
    if( iLeafPgno<pIter->iLeafPgno ){
      pIter->iLeafPgno = iLeafPgno+1;
      fts5SegIterReverseNewPage(p, pIter);
      bMove = 0;
    }
  }

  while( 1 ){
    if( bMove ) fts5SegIterNext(p, pIter, 0);
    if( pIter->pLeaf==0 ) break;
    if( bRev==0 && pIter->iRowid>=iMatch ) break;
    if( bRev!=0 && pIter->iRowid<=iMatch ) break;
    bMove = 1;
  }
}







|







2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
    if( iLeafPgno<pIter->iLeafPgno ){
      pIter->iLeafPgno = iLeafPgno+1;
      fts5SegIterReverseNewPage(p, pIter);
      bMove = 0;
    }
  }

  while( p->rc==SQLITE_OK ){
    if( bMove ) fts5SegIterNext(p, pIter, 0);
    if( pIter->pLeaf==0 ) break;
    if( bRev==0 && pIter->iRowid>=iMatch ) break;
    if( bRev!=0 && pIter->iRowid<=iMatch ) break;
    bMove = 1;
  }
}
2498
2499
2500
2501
2502
2503
2504

2505
2506
2507
2508
2509
2510
2511
){
  if( p->rc==SQLITE_OK ){
    int bUseFrom = bFrom;
    do {
      int iFirst = pIter->aFirst[1].iFirst;
      int bNewTerm = 0;
      Fts5SegIter *pSeg = &pIter->aSeg[iFirst];

      if( bUseFrom && pSeg->pDlidx ){
        fts5SegIterNextFrom(p, pSeg, iFrom);
      }else{
        fts5SegIterNext(p, pSeg, &bNewTerm);
      }

      if( pSeg->pLeaf==0 || bNewTerm 







>







2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
){
  if( p->rc==SQLITE_OK ){
    int bUseFrom = bFrom;
    do {
      int iFirst = pIter->aFirst[1].iFirst;
      int bNewTerm = 0;
      Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
      assert( p->rc==SQLITE_OK );
      if( bUseFrom && pSeg->pDlidx ){
        fts5SegIterNextFrom(p, pSeg, iFrom);
      }else{
        fts5SegIterNext(p, pSeg, &bNewTerm);
      }

      if( pSeg->pLeaf==0 || bNewTerm 
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554

2555
2556

2557
2558
2559
2560
2561


2562
2563
2564
2565
2566
2567
2568
  int bSkipEmpty,                 /* True to ignore delete-keys */
  int flags,                      /* FTS5INDEX_QUERY_XXX flags */
  const u8 *pTerm, int nTerm,     /* Term to seek to (or NULL/0) */
  int iLevel,                     /* Level to iterate (-1 for all) */
  int nSegment,                   /* Number of segments to merge (iLevel>=0) */
  Fts5MultiSegIter **ppOut        /* New object */
){
  int nSeg;                       /* Number of segments merged */
  int nSlot;                      /* Power of two >= nSeg */
  int iIter = 0;                  /* */
  int iSeg;                       /* Used to iterate through segments */
  Fts5StructureLevel *pLvl;
  Fts5MultiSegIter *pNew;

  assert( (pTerm==0 && nTerm==0) || iLevel<0 );

  /* Allocate space for the new multi-seg-iterator. */

  if( iLevel<0 ){
    nSeg = fts5StructureCountSegments(pStruct);

    nSeg += (p->apHash ? 1 : 0);
  }else{
    nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment);
  }
  for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2);


  *ppOut = pNew = fts5IdxMalloc(p, 
      sizeof(Fts5MultiSegIter) +          /* pNew */
      sizeof(Fts5SegIter) * nSlot +       /* pNew->aSeg[] */
      sizeof(Fts5CResult) * nSlot         /* pNew->aFirst[] */
  );
  if( pNew==0 ) return;
  pNew->nSeg = nSlot;







|
|








>
|
|
>
|
|
|
|
|
>
>







2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
  int bSkipEmpty,                 /* True to ignore delete-keys */
  int flags,                      /* FTS5INDEX_QUERY_XXX flags */
  const u8 *pTerm, int nTerm,     /* Term to seek to (or NULL/0) */
  int iLevel,                     /* Level to iterate (-1 for all) */
  int nSegment,                   /* Number of segments to merge (iLevel>=0) */
  Fts5MultiSegIter **ppOut        /* New object */
){
  int nSeg;                       /* Number of segment-iters in use */
  int nSlot = 0;                  /* Power of two >= nSeg */
  int iIter = 0;                  /* */
  int iSeg;                       /* Used to iterate through segments */
  Fts5StructureLevel *pLvl;
  Fts5MultiSegIter *pNew;

  assert( (pTerm==0 && nTerm==0) || iLevel<0 );

  /* Allocate space for the new multi-seg-iterator. */
  if( p->rc==SQLITE_OK ){
    if( iLevel<0 ){
      assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
      nSeg = pStruct->nSegment;
      nSeg += (p->apHash ? 1 : 0);
    }else{
      nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment);
    }
    for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2);
  }

  *ppOut = pNew = fts5IdxMalloc(p, 
      sizeof(Fts5MultiSegIter) +          /* pNew */
      sizeof(Fts5SegIter) * nSlot +       /* pNew->aSeg[] */
      sizeof(Fts5CResult) * nSlot         /* pNew->aFirst[] */
  );
  if( pNew==0 ) return;
  pNew->nSeg = nSlot;
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802

2803
2804
2805
2806
2807
2808
2809
2810
2811



2812

2813
2814
2815
2816
2817
2818
2819
2820

2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855

2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
}

static void fts5ChunkIterRelease(Fts5ChunkIter *pIter){
  fts5DataRelease(pIter->pLeaf);
  pIter->pLeaf = 0;
}

/*
** Read and return the next 32-bit varint from the position-list iterator 
** passed as the second argument.
**
** If an error occurs, zero is returned an an error code left in 
** Fts5Index.rc. If an error has already occurred when this function is
** called, it is a no-op.
*/
static int fts5PosIterReadVarint(Fts5Index *p, Fts5PosIter *pIter){
  int iVal = 0;
  if( p->rc==SQLITE_OK ){
    if( pIter->iOff>=pIter->chunk.n ){
      fts5ChunkIterNext(p, &pIter->chunk);
      if( fts5ChunkIterEof(p, &pIter->chunk) ) return 0;
      pIter->iOff = 0;
    }
    pIter->iOff += fts5GetVarint32(&pIter->chunk.p[pIter->iOff], iVal);
  }
  return iVal;
}

/*
** Advance the position list iterator to the next entry.
*/
static void fts5PosIterNext(Fts5Index *p, Fts5PosIter *pIter){
  int iVal;
  assert( fts5ChunkIterEof(p, &pIter->chunk)==0 );
  iVal = fts5PosIterReadVarint(p, pIter);
  if( fts5ChunkIterEof(p, &pIter->chunk)==0 ){
    if( iVal==1 ){
      pIter->iCol = fts5PosIterReadVarint(p, pIter);
      pIter->iPos = fts5PosIterReadVarint(p, pIter) - 2;
    }else{
      pIter->iPos += (iVal - 2);
    }
  }
}

/*
** Initialize the Fts5PosIter object passed as the final argument to iterate
** through the position-list associated with the index entry that iterator 
** pMulti currently points to.
*/
static void fts5PosIterInit(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5MultiSegIter *pMulti,       /* Multi-seg iterator to read pos-list from */
  Fts5PosIter *pIter              /* Initialize this object */
){
  if( p->rc==SQLITE_OK ){
    Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ];
    memset(pIter, 0, sizeof(*pIter));
    fts5ChunkIterInit(p, pSeg, &pIter->chunk);
    if( fts5ChunkIterEof(p, &pIter->chunk)==0 ){
      fts5PosIterNext(p, pIter);
    }
  }
}

/*
** Return true if the position iterator passed as the second argument is
** at EOF. Or if an error has already occurred. Otherwise, return false.
*/
static int fts5PosIterEof(Fts5Index *p, Fts5PosIter *pIter){
  return (p->rc || pIter->chunk.pLeaf==0);
}

/*
** Allocate a new segment-id for the structure pStruct.

**
** If an error has already occurred, this function is a no-op. 0 is 
** returned in this case.
*/
static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){
  int i;
  if( p->rc!=SQLITE_OK ) return 0;

  for(i=0; i<100; i++){



    int iSegid;

    sqlite3_randomness(sizeof(int), (void*)&iSegid);
    iSegid = iSegid & ((1 << FTS5_DATA_ID_B)-1);
    if( iSegid ){
      int iLvl, iSeg;
      for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
        for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
          if( iSegid==pStruct->aLevel[iLvl].aSeg[iSeg].iSegid ){
            iSegid = 0;

          }
        }
      }
    }
    if( iSegid ) return iSegid;
  }

  p->rc = SQLITE_ERROR;
  return 0;
}

/*
** Discard all data currently cached in the hash-tables.
*/
static void fts5IndexDiscardData(Fts5Index *p){
  assert( p->apHash || p->nPendingData==0 );
  if( p->apHash ){
    Fts5Config *pConfig = p->pConfig;
    int i;
    for(i=0; i<=pConfig->nPrefix; i++){
      if( p->apHash[i] ) sqlite3Fts5HashClear(p->apHash[i]);
    }
    p->nPendingData = 0;
  }
}

/*
** Return the size of the prefix, in bytes, that buffer (nNew/pNew) shares
** with buffer (nOld/pOld).
*/
static int fts5PrefixCompress(
  int nOld, const u8 *pOld,
  int nNew, const u8 *pNew
){
  int i;

  for(i=0; i<nNew && i<nOld; i++){
    if( pOld[i]!=pNew[i] ) break;
  }
  return i;
}

/*
** If an "nEmpty" record must be written to the b-tree before the next
** term, write it now.
*/
static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){
  if( pWriter->nEmpty ){
    int bFlag = 0;
    Fts5PageWriter *pPg;
    pPg = &pWriter->aWriter[1];
    if( pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE && pWriter->cdlidx.n ){
      i64 iKey = FTS5_DOCLIST_IDX_ROWID(
          pWriter->iIdx, pWriter->iSegid, 
          pWriter->aWriter[0].pgno - 1 - pWriter->nEmpty
      );
      assert( pWriter->cdlidx.n>0 );
      fts5DataWrite(p, iKey, pWriter->cdlidx.p, pWriter->cdlidx.n);
      bFlag = 1;







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<

<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
|
<
<
>






|

|
>
>
>
|
>
|
|
|
<
|
|
|
|
>




<


<
|











|














>
|







|






|







2755
2756
2757
2758
2759
2760
2761















2762





2763















2764



















2765







2766


2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784

2785
2786
2787
2788
2789
2790
2791
2792
2793

2794
2795

2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
}

static void fts5ChunkIterRelease(Fts5ChunkIter *pIter){
  fts5DataRelease(pIter->pLeaf);
  pIter->pLeaf = 0;
}






















/*















** Allocate a new segment-id for the structure pStruct. The new segment



















** id must be between 1 and 65335 inclusive, and must not be used by 







** any currently existing segment. If a free segment id cannot be found,


** SQLITE_FULL is returned.
**
** If an error has already occurred, this function is a no-op. 0 is 
** returned in this case.
*/
static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){
  int i;
  u32 iSegid = 0;

  if( p->rc==SQLITE_OK ){
    if( pStruct->nSegment>=FTS5_MAX_SEGMENT ){
      p->rc = SQLITE_FULL;
    }else{
      while( iSegid==0 ){
        int iLvl, iSeg;
        sqlite3_randomness(sizeof(u32), (void*)&iSegid);
        iSegid = (iSegid % ((1 << FTS5_DATA_ID_B) - 2)) + 1;
        assert( iSegid>0 && iSegid<=65535 );

        for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
          for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
            if( iSegid==pStruct->aLevel[iLvl].aSeg[iSeg].iSegid ){
              iSegid = 0;
            }
          }
        }
      }
    }

  }


  return (int)iSegid;
}

/*
** Discard all data currently cached in the hash-tables.
*/
static void fts5IndexDiscardData(Fts5Index *p){
  assert( p->apHash || p->nPendingData==0 );
  if( p->apHash ){
    Fts5Config *pConfig = p->pConfig;
    int i;
    for(i=0; i<=pConfig->nPrefix; i++){
      sqlite3Fts5HashClear(p->apHash[i]);
    }
    p->nPendingData = 0;
  }
}

/*
** Return the size of the prefix, in bytes, that buffer (nNew/pNew) shares
** with buffer (nOld/pOld).
*/
static int fts5PrefixCompress(
  int nOld, const u8 *pOld,
  int nNew, const u8 *pNew
){
  int i;
  assert( fts5BlobCompare(pOld, nOld, pNew, nNew)<0 );
  for(i=0; i<nOld; i++){
    if( pOld[i]!=pNew[i] ) break;
  }
  return i;
}

/*
** If an "nEmpty" record must be written to the b-tree before the next
** term, write it now. 
*/
static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){
  if( pWriter->nEmpty ){
    int bFlag = 0;
    Fts5PageWriter *pPg;
    pPg = &pWriter->aWriter[1];
    if( pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
      i64 iKey = FTS5_DOCLIST_IDX_ROWID(
          pWriter->iIdx, pWriter->iSegid, 
          pWriter->aWriter[0].pgno - 1 - pWriter->nEmpty
      );
      assert( pWriter->cdlidx.n>0 );
      fts5DataWrite(p, iKey, pWriter->cdlidx.p, pWriter->cdlidx.n);
      bFlag = 1;
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
  Fts5Index *p, 
  Fts5SegWriter *pWriter,
  int nTerm, const u8 *pTerm 
){
  int nPrefix;                    /* Bytes of prefix compression for term */
  Fts5PageWriter *pPage = &pWriter->aWriter[0];

  assert( pPage==0 || pPage->buf.n==0 || pPage->buf.n>4 );
  if( pPage && pPage->buf.n==0 ){
    /* Zero the first term and first docid fields */
    static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
    fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
    assert( pWriter->bFirstTermInPage );
  }
  if( p->rc ) return;
  







|
|







3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
  Fts5Index *p, 
  Fts5SegWriter *pWriter,
  int nTerm, const u8 *pTerm 
){
  int nPrefix;                    /* Bytes of prefix compression for term */
  Fts5PageWriter *pPage = &pWriter->aWriter[0];

  assert( pPage->buf.n==0 || pPage->buf.n>4 );
  if( pPage->buf.n==0 ){
    /* Zero the first term and first docid fields */
    static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
    fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
    assert( pWriter->bFirstTermInPage );
  }
  if( p->rc ) return;
  
3133
3134
3135
3136
3137
3138
3139

3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152

3153
3154
3155
3156
3157
3158
3159

    if( pPage->buf.n>=p->pConfig->pgsz ){
      fts5WriteFlushLeaf(p, pWriter);
    }
  }
}


static void fts5WriteAppendPoslistInt(
  Fts5Index *p, 
  Fts5SegWriter *pWriter,
  int iVal
){
  if( p->rc==SQLITE_OK ){
    Fts5PageWriter *pPage = &pWriter->aWriter[0];
    fts5BufferAppendVarint(&p->rc, &pPage->buf, iVal);
    if( pPage->buf.n>=p->pConfig->pgsz ){
      fts5WriteFlushLeaf(p, pWriter);
    }
  }
}


static void fts5WriteAppendPoslistData(
  Fts5Index *p, 
  Fts5SegWriter *pWriter, 
  const u8 *aData, 
  int nData
){







>













>







3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129

    if( pPage->buf.n>=p->pConfig->pgsz ){
      fts5WriteFlushLeaf(p, pWriter);
    }
  }
}

#if 0
static void fts5WriteAppendPoslistInt(
  Fts5Index *p, 
  Fts5SegWriter *pWriter,
  int iVal
){
  if( p->rc==SQLITE_OK ){
    Fts5PageWriter *pPage = &pWriter->aWriter[0];
    fts5BufferAppendVarint(&p->rc, &pPage->buf, iVal);
    if( pPage->buf.n>=p->pConfig->pgsz ){
      fts5WriteFlushLeaf(p, pWriter);
    }
  }
}
#endif

static void fts5WriteAppendPoslistData(
  Fts5Index *p, 
  Fts5SegWriter *pWriter, 
  const u8 *aData, 
  int nData
){
3385
3386
3387
3388
3389
3390
3391

3392
3393
3394
3395
3396
3397
3398

    /* Add the new segment to the output level */
    if( iLvl+1==pStruct->nLevel ) pStruct->nLevel++;
    pSeg = &pLvlOut->aSeg[pLvlOut->nSeg];
    pLvlOut->nSeg++;
    pSeg->pgnoFirst = 1;
    pSeg->iSegid = iSegid;


    /* Read input from all segments in the input level */
    nInput = pLvl->nSeg;
  }
  bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2);

#if 0







>







3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369

    /* Add the new segment to the output level */
    if( iLvl+1==pStruct->nLevel ) pStruct->nLevel++;
    pSeg = &pLvlOut->aSeg[pLvlOut->nSeg];
    pLvlOut->nSeg++;
    pSeg->pgnoFirst = 1;
    pSeg->iSegid = iSegid;
    pStruct->nSegment++;

    /* Read input from all segments in the input level */
    nInput = pLvl->nSeg;
  }
  bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2);

#if 0
3457
3458
3459
3460
3461
3462
3463

3464
3465
3466
3467

3468
3469
3470
3471
3472
3473
3474
    }

    /* Remove the redundant segments from the input level */
    if( pLvl->nSeg!=nInput ){
      int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment);
      memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove);
    }

    pLvl->nSeg -= nInput;
    pLvl->nMerge = 0;
    if( pSeg->pgnoLast==0 ){
      pLvlOut->nSeg--;

    }
  }else{
    assert( pSeg->nHeight>0 && pSeg->pgnoLast>0 );
    fts5TrimSegments(p, pIter);
    pLvl->nMerge = nInput;
  }








>




>







3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
    }

    /* Remove the redundant segments from the input level */
    if( pLvl->nSeg!=nInput ){
      int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment);
      memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove);
    }
    pStruct->nSegment -= nInput;
    pLvl->nSeg -= nInput;
    pLvl->nMerge = 0;
    if( pSeg->pgnoLast==0 ){
      pLvlOut->nSeg--;
      pStruct->nSegment--;
    }
  }else{
    assert( pSeg->nHeight>0 && pSeg->pgnoLast>0 );
    fts5TrimSegments(p, pIter);
    pLvl->nMerge = nInput;
  }

3764
3765
3766
3767
3768
3769
3770

3771
3772
3773
3774
3775
3776
3777
    fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0);
    if( p->rc==SQLITE_OK ){
      pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ];
      pSeg->iSegid = iSegid;
      pSeg->nHeight = nHeight;
      pSeg->pgnoFirst = 1;
      pSeg->pgnoLast = pgnoLast;

    }
    fts5StructurePromote(p, 0, pStruct);
  }


  if( p->pConfig->nAutomerge>0 ) fts5IndexWork(p, iHash, &pStruct, pgnoLast);
  fts5IndexCrisisMerge(p, iHash, &pStruct);







>







3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
    fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0);
    if( p->rc==SQLITE_OK ){
      pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ];
      pSeg->iSegid = iSegid;
      pSeg->nHeight = nHeight;
      pSeg->pgnoFirst = 1;
      pSeg->pgnoLast = pgnoLast;
      pStruct->nSegment++;
    }
    fts5StructurePromote(p, 0, pStruct);
  }


  if( p->pConfig->nAutomerge>0 ) fts5IndexWork(p, iHash, &pStruct, pgnoLast);
  fts5IndexCrisisMerge(p, iHash, &pStruct);
3805
3806
3807
3808
3809
3810
3811
3812

3813
3814
3815
3816
3817
3818
3819

  fts5IndexFlush(p);
  for(i=0; i<=pConfig->nPrefix; i++){
    Fts5Structure *pStruct = fts5StructureRead(p, i);
    Fts5Structure *pNew = 0;
    int nSeg = 0;
    if( pStruct ){
      nSeg = fts5StructureCountSegments(pStruct);

      if( nSeg>1 ){
        int nByte = sizeof(Fts5Structure);
        nByte += (pStruct->nLevel+1) * sizeof(Fts5StructureLevel);
        pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte);
      }
    }
    if( pNew ){







|
>







3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794

  fts5IndexFlush(p);
  for(i=0; i<=pConfig->nPrefix; i++){
    Fts5Structure *pStruct = fts5StructureRead(p, i);
    Fts5Structure *pNew = 0;
    int nSeg = 0;
    if( pStruct ){
      assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
      nSeg = pStruct->nSegment;
      if( nSeg>1 ){
        int nByte = sizeof(Fts5Structure);
        nByte += (pStruct->nLevel+1) * sizeof(Fts5StructureLevel);
        pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte);
      }
    }
    if( pNew ){
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
        int iSegOut = 0;
        for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
          for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
            pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg];
            iSegOut++;
          }
        }
        pLvl->nSeg = nSeg;
      }else{
        sqlite3_free(pNew);
        pNew = 0;
      }
    }

    if( pNew ){







|







3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
        int iSegOut = 0;
        for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
          for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
            pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg];
            iSegOut++;
          }
        }
        pNew->nSegment = pLvl->nSeg = nSeg;
      }else{
        sqlite3_free(pNew);
        pNew = 0;
      }
    }

    if( pNew ){
4142
4143
4144
4145
4146
4147
4148



4149

4150
4151
4152
4153
4154
4155
4156
  int bSz,
  Fts5Buffer *pBuf
){
  if( p->rc==SQLITE_OK ){
    Fts5ChunkIter iter;
    Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ];
    assert( fts5MultiIterEof(p, pMulti)==0 );



    fts5ChunkIterInit(p, pSeg, &iter);

    if( fts5ChunkIterEof(p, &iter)==0 ){
      if( bSz ){
        /* WRITEPOSLISTSIZE */
        fts5BufferAppendVarint(&p->rc, pBuf, iter.nRem * 2);
      }
      while( fts5ChunkIterEof(p, &iter)==0 ){
        fts5BufferAppendBlob(&p->rc, pBuf, iter.n, iter.p);







>
>
>

>







4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
  int bSz,
  Fts5Buffer *pBuf
){
  if( p->rc==SQLITE_OK ){
    Fts5ChunkIter iter;
    Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ];
    assert( fts5MultiIterEof(p, pMulti)==0 );
    static int nCall = 0;
    nCall++;

    fts5ChunkIterInit(p, pSeg, &iter);

    if( fts5ChunkIterEof(p, &iter)==0 ){
      if( bSz ){
        /* WRITEPOSLISTSIZE */
        fts5BufferAppendVarint(&p->rc, pBuf, iter.nRem * 2);
      }
      while( fts5ChunkIterEof(p, &iter)==0 ){
        fts5BufferAppendBlob(&p->rc, pBuf, iter.n, iter.p);
4430
4431
4432
4433
4434
4435
4436

4437
4438
4439
4440
4441
4442
4443
*/
int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
  Fts5Config *pConfig = p->pConfig;
  int iIdx;                       /* Used to iterate through indexes */
  u64 cksum2 = 0;                 /* Checksum based on contents of indexes */
  u64 cksum3 = 0;                 /* Checksum based on contents of indexes */
  Fts5Buffer term = {0,0,0};      /* Buffer used to hold most recent term */


  /* Check that the internal nodes of each segment match the leaves */
  for(iIdx=0; p->rc==SQLITE_OK && iIdx<=pConfig->nPrefix; iIdx++){
    Fts5Structure *pStruct = fts5StructureRead(p, iIdx);
    if( pStruct ){
      int iLvl, iSeg;
      for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){







>







4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
*/
int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
  Fts5Config *pConfig = p->pConfig;
  int iIdx;                       /* Used to iterate through indexes */
  u64 cksum2 = 0;                 /* Checksum based on contents of indexes */
  u64 cksum3 = 0;                 /* Checksum based on contents of indexes */
  Fts5Buffer term = {0,0,0};      /* Buffer used to hold most recent term */
  Fts5Buffer poslist = {0,0,0};   /* Buffer used to hold a poslist */

  /* Check that the internal nodes of each segment match the leaves */
  for(iIdx=0; p->rc==SQLITE_OK && iIdx<=pConfig->nPrefix; iIdx++){
    Fts5Structure *pStruct = fts5StructureRead(p, iIdx);
    if( pStruct ){
      int iLvl, iSeg;
      for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
4466
4467
4468
4469
4470
4471
4472
4473
4474


4475
4476
4477
4478
4479
4480
4481
4482
4483

4484
4485
4486
4487
4488
4489
4490
4491
  for(iIdx=0; iIdx<=pConfig->nPrefix; iIdx++){
    Fts5MultiSegIter *pIter;
    Fts5Structure *pStruct = fts5StructureRead(p, iIdx);
    for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, 0, 0, -1, 0, &pIter);
        fts5MultiIterEof(p, pIter)==0;
        fts5MultiIterNext(p, pIter, 0, 0)
    ){
      Fts5PosIter sPos;           /* Used to iterate through position list */
      int n;                      /* Size of term in bytes */


      i64 iRowid = fts5MultiIterRowid(pIter);
      char *z = (char*)fts5MultiIterTerm(pIter, &n);

      /* Update cksum2 with the entries associated with the current term
      ** and rowid.  */
      for(fts5PosIterInit(p, pIter, &sPos);
          fts5PosIterEof(p, &sPos)==0;
          fts5PosIterNext(p, &sPos)
      ){

        cksum2 ^= fts5IndexEntryCksum(iRowid, sPos.iCol, sPos.iPos, z, n);
      }

      /* If this is a new term, query for it. Update cksum3 with the results. */
      if( p->rc==SQLITE_OK && (term.n!=n || memcmp(term.p, z, n)) ){
        int rc;
        int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX);
        u64 ck1 = 0;







<

>
>



|
<
|
|
|
<
>
|







4446
4447
4448
4449
4450
4451
4452

4453
4454
4455
4456
4457
4458
4459

4460
4461
4462

4463
4464
4465
4466
4467
4468
4469
4470
4471
  for(iIdx=0; iIdx<=pConfig->nPrefix; iIdx++){
    Fts5MultiSegIter *pIter;
    Fts5Structure *pStruct = fts5StructureRead(p, iIdx);
    for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, 0, 0, -1, 0, &pIter);
        fts5MultiIterEof(p, pIter)==0;
        fts5MultiIterNext(p, pIter, 0, 0)
    ){

      int n;                      /* Size of term in bytes */
      i64 iPos = 0;               /* Position read from poslist */
      int iOff = 0;               /* Offset within poslist */
      i64 iRowid = fts5MultiIterRowid(pIter);
      char *z = (char*)fts5MultiIterTerm(pIter, &n);

      poslist.n = 0;

      fts5MultiIterPoslist(p, pIter, 0, &poslist);
      while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){
        int iCol = FTS5_POS2COLUMN(iPos);

        int iTokOff = FTS5_POS2OFFSET(iPos);
        cksum2 ^= fts5IndexEntryCksum(iRowid, iCol, iTokOff, z, n);
      }

      /* If this is a new term, query for it. Update cksum3 with the results. */
      if( p->rc==SQLITE_OK && (term.n!=n || memcmp(term.p, z, n)) ){
        int rc;
        int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX);
        u64 ck1 = 0;
4522
4523
4524
4525
4526
4527
4528

4529
4530
4531
4532
4533
4534
4535
    fts5MultiIterFree(p, pIter);
    fts5StructureRelease(pStruct);
  }
  if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT;
  if( p->rc==SQLITE_OK && cksum!=cksum3 ) p->rc = FTS5_CORRUPT;

  fts5BufferFree(&term);

  return fts5IndexReturn(p);
}


/*
** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain
** to the document with rowid iRowid.







>







4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
    fts5MultiIterFree(p, pIter);
    fts5StructureRelease(pStruct);
  }
  if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT;
  if( p->rc==SQLITE_OK && cksum!=cksum3 ) p->rc = FTS5_CORRUPT;

  fts5BufferFree(&term);
  fts5BufferFree(&poslist);
  return fts5IndexReturn(p);
}


/*
** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain
** to the document with rowid iRowid.
Changes to ext/fts5/test/fts5aa.test.
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
reset_db
do_execsql_test 2.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x,y);
}
do_execsql_test 2.1 {
  INSERT INTO t1 VALUES('a b c', 'd e f');
}
do_execsql_test 2.2 {
  SELECT fts5_decode(id, block) FROM t1_data WHERE id==10
} {
  {{structure idx=0} {lvl=0 nMerge=0 {id=27723 h=1 leaves=1..1}}}
}
do_execsql_test 2.3 {
  INSERT INTO t1(t1) VALUES('integrity-check');
}

#-------------------------------------------------------------------------
#
reset_db







|
|
<
|
<







43
44
45
46
47
48
49
50
51

52

53
54
55
56
57
58
59
reset_db
do_execsql_test 2.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x,y);
}
do_execsql_test 2.1 {
  INSERT INTO t1 VALUES('a b c', 'd e f');
}
do_test 2.2 {
  execsql { SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 }

} {/{{structure idx=0} {lvl=0 nMerge=0 {id=[0123456789]* h=1 leaves=1..1}}}/}

do_execsql_test 2.3 {
  INSERT INTO t1(t1) VALUES('integrity-check');
}

#-------------------------------------------------------------------------
#
reset_db
Changes to ext/fts5/test/fts5ab.test.
258
259
260
261
262
263
264






















265
266
267
do_execsql_test 6.2 {
  COMMIT;
}

do_execsql_test 6.3 {
  SELECT rowid FROM s3 WHERE s3 MATCH 'a'
} {1 2}























finish_test








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>



258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
do_execsql_test 6.2 {
  COMMIT;
}

do_execsql_test 6.3 {
  SELECT rowid FROM s3 WHERE s3 MATCH 'a'
} {1 2}

do_test 6.4 {
  db close
  sqlite3 db test.db
  execsql {
    BEGIN;
      INSERT INTO s3(s3) VALUES('optimize');
    ROLLBACK;
  }
} {}

#-------------------------------------------------------------------------
#
set doc [string repeat "a b c " 500]
breakpoint
do_execsql_test 7.0 {
  CREATE VIRTUAL TABLE x1 USING fts5(x);
  INSERT INTO x1(x1, rank) VALUES('pgsz', 32);
  INSERT INTO x1 VALUES($doc);
}



finish_test

Changes to ext/fts5/test/fts5corrupt2.test.
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
db func rnddoc fts5_rnddoc
do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x);
  INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
  WITH ii(i) AS (SELECT 1 UNION SELECT i+1 FROM ii WHERE i<100)
  INSERT INTO t1 SELECT rnddoc(10) FROM ii;
}

set mask [expr 31 << 31]

# Test 1:
#
#   For each page in the t1_data table, open a transaction and DELETE
#   the t1_data entry. Then run:
#







<







24
25
26
27
28
29
30

31
32
33
34
35
36
37
db func rnddoc fts5_rnddoc
do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x);
  INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
  WITH ii(i) AS (SELECT 1 UNION SELECT i+1 FROM ii WHERE i<100)
  INSERT INTO t1 SELECT rnddoc(10) FROM ii;
}

set mask [expr 31 << 31]

# Test 1:
#
#   For each page in the t1_data table, open a transaction and DELETE
#   the t1_data entry. Then run:
#
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#   Same thing, except instead of deleting a row from t1_data, replace its
#   blob content with integer value 14.
#
foreach {tno stmt} {
  1 { DELETE FROM t1_data WHERE rowid=$rowid }
  2 { UPDATE t1_data SET block=14 WHERE rowid=$rowid }
} {
  break
  set tn 0
  foreach rowid [db eval {SELECT rowid FROM t1_data WHERE rowid>10}] {
    incr tn
    #if {$tn!=224} continue
  
    do_test 1.$tno.$tn.1.$rowid {
      execsql { BEGIN }







<







46
47
48
49
50
51
52

53
54
55
56
57
58
59
#   Same thing, except instead of deleting a row from t1_data, replace its
#   blob content with integer value 14.
#
foreach {tno stmt} {
  1 { DELETE FROM t1_data WHERE rowid=$rowid }
  2 { UPDATE t1_data SET block=14 WHERE rowid=$rowid }
} {

  set tn 0
  foreach rowid [db eval {SELECT rowid FROM t1_data WHERE rowid>10}] {
    incr tn
    #if {$tn!=224} continue
  
    do_test 1.$tno.$tn.1.$rowid {
      execsql { BEGIN }
73
74
75
76
77
78
79


80
81
82
83
84
85
86
    do_execsql_test 1.$tno.$tn.3.$rowid {
      ROLLBACK;
      INSERT INTO t1(t1) VALUES('integrity-check');
    } {}
  }
}



# Run N-1 tests, where N is the number of bytes in the rightmost leaf page
# of the fts index. For test $i, truncate the rightmost leafpage to $i
# bytes. Then test both the integrity-check detects the corruption.
#
# Also tested is that "MATCH 'x*'" does not crash and sometimes reports
# corruption. It may not report the db as corrupt because truncating the
# final leaf to some sizes may create a valid leaf page.







>
>







71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
    do_execsql_test 1.$tno.$tn.3.$rowid {
      ROLLBACK;
      INSERT INTO t1(t1) VALUES('integrity-check');
    } {}
  }
}

# Using the same database as the 1.* tests.
#
# Run N-1 tests, where N is the number of bytes in the rightmost leaf page
# of the fts index. For test $i, truncate the rightmost leafpage to $i
# bytes. Then test both the integrity-check detects the corruption.
#
# Also tested is that "MATCH 'x*'" does not crash and sometimes reports
# corruption. It may not report the db as corrupt because truncating the
# final leaf to some sizes may create a valid leaf page.
107
108
109
110
111
112
113
114


















































































115
116
  } 1

  do_execsql_test 2.$i.4 {
    ROLLBACK;
    INSERT INTO t1(t1) VALUES('integrity-check');
  } {}
}



















































































finish_test









>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
  } 1

  do_execsql_test 2.$i.4 {
    ROLLBACK;
    INSERT INTO t1(t1) VALUES('integrity-check');
  } {}
}

#-------------------------------------------------------------------------
# Test that corruption in leaf page headers is detected by queries that use
# doclist-indexes.
#
set doc "A B C D E F G H I J "
do_execsql_test 3.0 {
  CREATE VIRTUAL TABLE x3 USING fts5(tt);
  INSERT INTO x3(x3, rank) VALUES('pgsz', 32);
  WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<1000) 
  INSERT INTO x3 
  SELECT ($doc || CASE WHEN (i%50)==0 THEN 'X' ELSE 'Y' END) FROM ii;
}

foreach {tn hdr} {
  1 "\00\00\00\00"
  2 "\FF\FF\FF\FF"
} {
  set tn2 0
  set nCorrupt 0
  foreach rowid [db eval {SELECT rowid FROM x3_data WHERE rowid>10}] {
    if {$rowid & $mask} continue
    incr tn2
    do_test 3.$tn.$tn2 {
      execsql BEGIN

      set fd [db incrblob main x3_data block $rowid]
      fconfigure $fd -encoding binary -translation binary
      puts -nonewline $fd $hdr
      close $fd

      set res [catchsql {SELECT rowid FROM x3 WHERE x3 MATCH 'x AND a'}]
      if {$res == "1 {database disk image is malformed}"} {incr nCorrupt}
      set {} 1
    } {1}

    execsql ROLLBACK
  }

  do_test 3.$tn.x { expr $nCorrupt>0 } 1
}

#--------------------------------------------------------------------
#
set doc "A B C D E F G H I J "
do_execsql_test 4.0 {
  CREATE VIRTUAL TABLE x4 USING fts5(tt);
  INSERT INTO x4(x4, rank) VALUES('pgsz', 32);
  WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<10) 
  INSERT INTO x4 
  SELECT ($doc || CASE WHEN (i%50)==0 THEN 'X' ELSE 'Y' END) FROM ii;
}

foreach {tn nCut} {
  1 1
  2 10
} {
  set tn2 0
  set nCorrupt 0
  foreach rowid [db eval {SELECT rowid FROM x4_data WHERE rowid>10}] {
    if {$rowid & $mask} continue
    incr tn2
    do_test 4.$tn.$tn2 {
      execsql {
        BEGIN;
          UPDATE x4_data SET block = substr(block, 1, length(block)-$nCut) 
          WHERE id = $rowid;
      }

      set res [catchsql {
        SELECT rowid FROM x4 WHERE x4 MATCH 'a' ORDER BY 1 DESC
      }]
      if {$res == "1 {database disk image is malformed}"} {incr nCorrupt}
      set {} 1
    } {1}

    execsql ROLLBACK
  }

  do_test 4.$tn.x { expr $nCorrupt>0 } 1
}


finish_test

Changes to ext/fts5/test/fts5fault2.test.
46
47
48
49
50
51
52























53
54
55

do_faultsim_test 1.2 -faults oom-* -prep {
} -body {
  execsql { SELECT rowid FROM x1 WHERE x1 MATCH 'z + xyz' ORDER BY 1 DESC}
} -test {
  faultsim_test_result {0 {1000 900 800 700 600 500 400 300 200 100}}
}
























finish_test








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78

do_faultsim_test 1.2 -faults oom-* -prep {
} -body {
  execsql { SELECT rowid FROM x1 WHERE x1 MATCH 'z + xyz' ORDER BY 1 DESC}
} -test {
  faultsim_test_result {0 {1000 900 800 700 600 500 400 300 200 100}}
}

#-------------------------------------------------------------------------
# OOM within a query that accesses the in-memory hash table. 
#
reset_db 
do_execsql_test 2.0 {
  CREATE VIRTUAL TABLE "a b c" USING fts5(a, b, c);
  INSERT INTO "a b c" VALUES('one two', 'x x x', 'three four');
  INSERT INTO "a b c" VALUES('nine ten', 'y y y', 'two two');
}

do_faultsim_test 2.1 -faults oom-trans* -prep {
  execsql {
    BEGIN;
      INSERT INTO "a b c" VALUES('one one', 'z z z', 'nine ten');
  }
} -body {
  execsql { SELECT rowid FROM "a b c" WHERE "a b c" MATCH 'one' }
} -test {
  faultsim_test_result {0 {1 3}}
  catchsql { ROLLBACK }
}


finish_test

Added ext/fts5/test/fts5full.test.










































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# 2014 Dec 20
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Test that SQLITE_FULL is returned if the FTS5 table cannot find a free 
# segid to use. In practice this can only really happen when automerge and
# crisismerge are both disabled.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5full

do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE x8 USING fts5(i);
  INSERT INTO x8(x8, rank) VALUES('automerge', 0);
  INSERT INTO x8(x8, rank) VALUES('crisismerge', 100000);
}

db func rnddoc fts5_rnddoc
do_test 1.1 {
  list [catch {
    for {set i 0} {$i < 2500} {incr i} {
      execsql { INSERT INTO x8 VALUES( rnddoc(5) ); }
    }
  } msg] $msg
} {1 {database or disk is full}}


finish_test