/ Check-in [50fae1f0]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Logically store updates as (insert+delete) within the FTS tree. This allows keys to be annihilated more quickly under some circumstances.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts5
Files: files | file ages | folders
SHA1:50fae1f0006c0e946b5214e73eedf2687a0016f9
User & Date: dan 2015-04-15 18:49:20
Context
2015-04-20
18:48
Fix some fts5 problems with very large position lists. check-in: 2ea8f9cb user: dan tags: fts5
2015-04-15
18:49
Logically store updates as (insert+delete) within the FTS tree. This allows keys to be annihilated more quickly under some circumstances. check-in: 50fae1f0 user: dan tags: fts5
16:01
Fix a problem preventing doclist indexes from being loaded. check-in: b29109a0 user: dan tags: fts5
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5_index.c.

1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
....
2314
2315
2316
2317
2318
2319
2320
2321



2322
2323
2324
2325
2326
2327
2328
....
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
....
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
....
3404
3405
3406
3407
3408
3409
3410



3411
3412
3413
3414



3415
3416
3417

3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435

3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
....
3878
3879
3880
3881
3882
3883
3884




3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
}

/*
** Return true if the iterator passed as the second argument currently
** points to a delete marker. A delete marker is an entry with a 0 byte
** position-list.
*/
static int fts5SegIterIsDelete(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegIter *pIter              /* Iterator to advance */
){
  int bRet = 0;
  Fts5Data *pLeaf = pIter->pLeaf;
  if( p->rc==SQLITE_OK && pLeaf ){
    bRet = pIter->nPos==0;
    /* bRet = pIter->bDel; */
#if 0
    if( pIter->iLeafOffset<pLeaf->n ){
      bRet = ((pLeaf->p[pIter->iLeafOffset] & 0xFE)==0x00);
    }else{
      Fts5Data *pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(
            pIter->iIdx, pIter->pSeg->iSegid, 0, pIter->iLeafPgno+1
      ));
      if( pNew ){
        bRet = ((pNew->p[4] & 0xFE)==0x00);
        fts5DataRelease(pNew);
      }
    }
#endif
  }
  return bRet;
}

/*
** Advance iterator pIter to the next entry. 
**
** If an error occurs, Fts5Index.rc is set to an appropriate error code. It 
** is not considered an error if the iterator reaches EOF. If an error has 
................................................................................
    iRes = i1;
  }else{
    int res = fts5BufferCompare(&p1->term, &p2->term);
    if( res==0 ){
      assert( i2>i1 );
      assert( i2!=0 );
      pRes->bTermEq = 1;
      if( p1->iRowid==p2->iRowid ) return i2;



      res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1;
    }
    assert( res!=0 );
    if( res<0 ){
      iRes = i1;
    }else{
      iRes = i2;
................................................................................
       || fts5MultiIterAdvanceRowid(p, pIter, iFirst)
      ){
        fts5MultiIterAdvanced(p, pIter, iFirst, 1);
      }
      fts5AssertMultiIterSetup(p, pIter);

      bUseFrom = 0;
    }while( pIter->bSkipEmpty 
         && fts5SegIterIsDelete(p, &pIter->aSeg[pIter->aFirst[1].iFirst])
    );
  }
}

/*
** Allocate a new Fts5MultiSegIter object.
**
** The new object will be used to iterate through data in structure pStruct.
................................................................................
      if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){
        fts5SegIterNext(p, &pNew->aSeg[iEq], 0);
        fts5MultiIterAdvanced(p, pNew, iEq, iIter);
      }
    }
    fts5AssertMultiIterSetup(p, pNew);

    if( pNew->bSkipEmpty 
     && fts5SegIterIsDelete(p, &pNew->aSeg[pNew->aFirst[1].iFirst]) 
    ){
      fts5MultiIterNext(p, pNew, 0, 0);
    }
  }else{
    fts5MultiIterFree(p, pNew);
    *ppOut = 0;
  }
}
................................................................................
  assert( iLvl>=0 );
  for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, 0, 0, iLvl, nInput, &pIter);
      fts5MultiIterEof(p, pIter)==0;
      fts5MultiIterNext(p, pIter, 0, 0)
  ){
    Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
    Fts5ChunkIter sPos;           /* Used to iterate through position list */




    /* If the segment being written is the oldest in the entire index and
    ** the position list is empty (i.e. the entry is a delete marker), no
    ** entry need be written to the output.  */



    fts5ChunkIterInit(p, pSeg, &sPos);
    if( bOldest==0 || sPos.nRem>0 ){
      int nTerm;

      const u8 *pTerm = fts5MultiIterTerm(pIter, &nTerm);
      if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){
        if( pnRem && writer.nLeafWritten>nRem ){
          fts5ChunkIterRelease(&sPos);
          break;
        }

        /* This is a new term. Append a term to the output segment. */
        if( bRequireDoclistTerm ){
          fts5WriteAppendZerobyte(p, &writer);
        }
        fts5WriteAppendTerm(p, &writer, nTerm, pTerm);
        fts5BufferSet(&p->rc, &term, nTerm, pTerm);
        bRequireDoclistTerm = 1;
      }

      /* Append the rowid to the output */
      /* WRITEPOSLISTSIZE */

      fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter), sPos.nRem*2);

      for(/* noop */; !fts5ChunkIterEof(p, &sPos); fts5ChunkIterNext(p, &sPos)){
        fts5WriteAppendPoslistData(p, &writer, sPos.p, sPos.n);
      }
    }

    fts5ChunkIterRelease(&sPos);
  }

  /* Flush the last leaf page to disk. Set the output segment b-tree height
  ** and last leaf page number at the same time.  */
................................................................................
  Fts5StructureSegment *pSeg, 
  Fts5BtreeIter *pIter
){
  int nByte;
  int i;
  nByte = sizeof(pIter->aLvl[0]) * (pSeg->nHeight-1);
  memset(pIter, 0, sizeof(*pIter));




  pIter->nLvl = pSeg->nHeight-1;
  pIter->iIdx = iIdx;
  pIter->p = p;
  pIter->pSeg = pSeg;
  if( nByte && p->rc==SQLITE_OK ){
    pIter->aLvl = (Fts5BtreeIterLevel*)fts5IdxMalloc(p, nByte);
  }
  for(i=0; p->rc==SQLITE_OK && i<pIter->nLvl; i++){
    i64 iRowid = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, i+1, 1);
    Fts5Data *pData;
    pIter->aLvl[i].pData = pData = fts5DataRead(p, iRowid);
    if( pData ){
      fts5NodeIterInit(pData->p, pData->n, &pIter->aLvl[i].s);







|
|
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







 







|
>
>
>







 







|
<
<







 







|
<
<







 







>
>
>

<
<
<
>
>
>

<
<
>
|
|
|
|
|
|

|
|
|
|
|
|
|
|

|
|
>
|

|
|
<







 







>
>
>
>
|
|
|
|
<
<







1767
1768
1769
1770
1771
1772
1773
1774
1775




1776

















1777
1778
1779
1780
1781
1782
1783
....
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
....
2491
2492
2493
2494
2495
2496
2497
2498


2499
2500
2501
2502
2503
2504
2505
....
2587
2588
2589
2590
2591
2592
2593
2594


2595
2596
2597
2598
2599
2600
2601
....
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392



3393
3394
3395
3396


3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420

3421
3422
3423
3424
3425
3426
3427
....
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872


3873
3874
3875
3876
3877
3878
3879
}

/*
** Return true if the iterator passed as the second argument currently
** points to a delete marker. A delete marker is an entry with a 0 byte
** position-list.
*/
static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5MultiSegIter *pIter){
  Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];




  return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0);

















}

/*
** Advance iterator pIter to the next entry. 
**
** If an error occurs, Fts5Index.rc is set to an appropriate error code. It 
** is not considered an error if the iterator reaches EOF. If an error has 
................................................................................
    iRes = i1;
  }else{
    int res = fts5BufferCompare(&p1->term, &p2->term);
    if( res==0 ){
      assert( i2>i1 );
      assert( i2!=0 );
      pRes->bTermEq = 1;
      if( p1->iRowid==p2->iRowid ){
        p1->bDel = p2->bDel;
        return i2;
      }
      res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1;
    }
    assert( res!=0 );
    if( res<0 ){
      iRes = i1;
    }else{
      iRes = i2;
................................................................................
       || fts5MultiIterAdvanceRowid(p, pIter, iFirst)
      ){
        fts5MultiIterAdvanced(p, pIter, iFirst, 1);
      }
      fts5AssertMultiIterSetup(p, pIter);

      bUseFrom = 0;
    }while( pIter->bSkipEmpty && fts5MultiIterIsEmpty(p, pIter) );


  }
}

/*
** Allocate a new Fts5MultiSegIter object.
**
** The new object will be used to iterate through data in structure pStruct.
................................................................................
      if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){
        fts5SegIterNext(p, &pNew->aSeg[iEq], 0);
        fts5MultiIterAdvanced(p, pNew, iEq, iIter);
      }
    }
    fts5AssertMultiIterSetup(p, pNew);

    if( pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew) ){


      fts5MultiIterNext(p, pNew, 0, 0);
    }
  }else{
    fts5MultiIterFree(p, pNew);
    *ppOut = 0;
  }
}
................................................................................
  assert( iLvl>=0 );
  for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, 0, 0, iLvl, nInput, &pIter);
      fts5MultiIterEof(p, pIter)==0;
      fts5MultiIterNext(p, pIter, 0, 0)
  ){
    Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
    Fts5ChunkIter sPos;           /* Used to iterate through position list */
    int nPos;                     /* position-list size field value */
    int nTerm;
    const u8 *pTerm;




    /* Check for key annihilation. */
    if( pSeg->nPos==0 && (bOldest || pSeg->bDel==0) ) continue;

    fts5ChunkIterInit(p, pSeg, &sPos);



    pTerm = fts5MultiIterTerm(pIter, &nTerm);
    if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){
      if( pnRem && writer.nLeafWritten>nRem ){
        fts5ChunkIterRelease(&sPos);
        break;
      }

      /* This is a new term. Append a term to the output segment. */
      if( bRequireDoclistTerm ){
        fts5WriteAppendZerobyte(p, &writer);
      }
      fts5WriteAppendTerm(p, &writer, nTerm, pTerm);
      fts5BufferSet(&p->rc, &term, nTerm, pTerm);
      bRequireDoclistTerm = 1;
    }

    /* Append the rowid to the output */
    /* WRITEPOSLISTSIZE */
    nPos = pSeg->nPos*2 + pSeg->bDel;
    fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter), nPos);

    for(/* noop */; !fts5ChunkIterEof(p, &sPos); fts5ChunkIterNext(p, &sPos)){
      fts5WriteAppendPoslistData(p, &writer, sPos.p, sPos.n);

    }

    fts5ChunkIterRelease(&sPos);
  }

  /* Flush the last leaf page to disk. Set the output segment b-tree height
  ** and last leaf page number at the same time.  */
................................................................................
  Fts5StructureSegment *pSeg, 
  Fts5BtreeIter *pIter
){
  int nByte;
  int i;
  nByte = sizeof(pIter->aLvl[0]) * (pSeg->nHeight-1);
  memset(pIter, 0, sizeof(*pIter));
  if( nByte ){
    pIter->aLvl = (Fts5BtreeIterLevel*)fts5IdxMalloc(p, nByte);
  }
  if( p->rc==SQLITE_OK ){
    pIter->nLvl = pSeg->nHeight-1;
    pIter->iIdx = iIdx;
    pIter->p = p;
    pIter->pSeg = pSeg;


  }
  for(i=0; p->rc==SQLITE_OK && i<pIter->nLvl; i++){
    i64 iRowid = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, i+1, 1);
    Fts5Data *pData;
    pIter->aLvl[i].pData = pData = fts5DataRead(p, iRowid);
    if( pData ){
      fts5NodeIterInit(pData->p, pData->n, &pIter->aLvl[i].s);

Changes to ext/fts5/test/fts5corrupt.test.

59
60
61
62
63
64
65
66
67

68
69
70
71
72
73
74
  CREATE VIRTUAL TABLE t2 USING fts5(x);
  INSERT INTO t2(t2, rank) VALUES('pgsz', 64);
}
db func rnddoc fts5_rnddoc
do_test 2.1 {
  for {set i 0} {$i < 500} {incr i} {
    execsql { INSERT INTO t2 VALUES(rnddoc(50)) }
    execsql { INSERT INTO t2(t2) VALUES('integrity-check') }
  }

} {}

#--------------------------------------------------------------------
#

finish_test








<

>







59
60
61
62
63
64
65

66
67
68
69
70
71
72
73
74
  CREATE VIRTUAL TABLE t2 USING fts5(x);
  INSERT INTO t2(t2, rank) VALUES('pgsz', 64);
}
db func rnddoc fts5_rnddoc
do_test 2.1 {
  for {set i 0} {$i < 500} {incr i} {
    execsql { INSERT INTO t2 VALUES(rnddoc(50)) }

  }
  execsql { INSERT INTO t2(t2) VALUES('integrity-check') }
} {}

#--------------------------------------------------------------------
#

finish_test