SQLite

Check-in [50fae1f000]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Logically store updates as (insert+delete) within the FTS tree. This allows keys to be annihilated more quickly under some circumstances.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | fts5
Files: files | file ages | folders
SHA1: 50fae1f0006c0e946b5214e73eedf2687a0016f9
User & Date: dan 2015-04-15 18:49:20.008
Context
2015-04-20
18:48
Fix some fts5 problems with very large position lists. (check-in: 2ea8f9cbe6 user: dan tags: fts5)
2015-04-15
18:49
Logically store updates as (insert+delete) within the FTS tree. This allows keys to be annihilated more quickly under some circumstances. (check-in: 50fae1f000 user: dan tags: fts5)
16:01
Fix a problem preventing doclist indexes from being loaded. (check-in: b29109a083 user: dan tags: fts5)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts5/fts5_index.c.
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
}

/*
** Return true if the iterator passed as the second argument currently
** points to a delete marker. A delete marker is an entry with a 0 byte
** position-list.
*/
static int fts5SegIterIsDelete(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegIter *pIter              /* Iterator to advance */
){
  int bRet = 0;
  Fts5Data *pLeaf = pIter->pLeaf;
  if( p->rc==SQLITE_OK && pLeaf ){
    bRet = pIter->nPos==0;
    /* bRet = pIter->bDel; */
#if 0
    if( pIter->iLeafOffset<pLeaf->n ){
      bRet = ((pLeaf->p[pIter->iLeafOffset] & 0xFE)==0x00);
    }else{
      Fts5Data *pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(
            pIter->iIdx, pIter->pSeg->iSegid, 0, pIter->iLeafPgno+1
      ));
      if( pNew ){
        bRet = ((pNew->p[4] & 0xFE)==0x00);
        fts5DataRelease(pNew);
      }
    }
#endif
  }
  return bRet;
}

/*
** Advance iterator pIter to the next entry. 
**
** If an error occurs, Fts5Index.rc is set to an appropriate error code. It 
** is not considered an error if the iterator reaches EOF. If an error has 







|
<
|
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







1767
1768
1769
1770
1771
1772
1773
1774

1775



1776

















1777
1778
1779
1780
1781
1782
1783
}

/*
** Return true if the iterator passed as the second argument currently
** points to a delete marker. A delete marker is an entry with a 0 byte
** position-list.
*/
static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5MultiSegIter *pIter){

  Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];



  return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0);

















}

/*
** Advance iterator pIter to the next entry. 
**
** If an error occurs, Fts5Index.rc is set to an appropriate error code. It 
** is not considered an error if the iterator reaches EOF. If an error has 
2314
2315
2316
2317
2318
2319
2320
2321



2322
2323
2324
2325
2326
2327
2328
    iRes = i1;
  }else{
    int res = fts5BufferCompare(&p1->term, &p2->term);
    if( res==0 ){
      assert( i2>i1 );
      assert( i2!=0 );
      pRes->bTermEq = 1;
      if( p1->iRowid==p2->iRowid ) return i2;



      res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1;
    }
    assert( res!=0 );
    if( res<0 ){
      iRes = i1;
    }else{
      iRes = i2;







|
>
>
>







2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
    iRes = i1;
  }else{
    int res = fts5BufferCompare(&p1->term, &p2->term);
    if( res==0 ){
      assert( i2>i1 );
      assert( i2!=0 );
      pRes->bTermEq = 1;
      if( p1->iRowid==p2->iRowid ){
        p1->bDel = p2->bDel;
        return i2;
      }
      res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1;
    }
    assert( res!=0 );
    if( res<0 ){
      iRes = i1;
    }else{
      iRes = i2;
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
       || fts5MultiIterAdvanceRowid(p, pIter, iFirst)
      ){
        fts5MultiIterAdvanced(p, pIter, iFirst, 1);
      }
      fts5AssertMultiIterSetup(p, pIter);

      bUseFrom = 0;
    }while( pIter->bSkipEmpty 
         && fts5SegIterIsDelete(p, &pIter->aSeg[pIter->aFirst[1].iFirst])
    );
  }
}

/*
** Allocate a new Fts5MultiSegIter object.
**
** The new object will be used to iterate through data in structure pStruct.







|
<
<







2491
2492
2493
2494
2495
2496
2497
2498


2499
2500
2501
2502
2503
2504
2505
       || fts5MultiIterAdvanceRowid(p, pIter, iFirst)
      ){
        fts5MultiIterAdvanced(p, pIter, iFirst, 1);
      }
      fts5AssertMultiIterSetup(p, pIter);

      bUseFrom = 0;
    }while( pIter->bSkipEmpty && fts5MultiIterIsEmpty(p, pIter) );


  }
}

/*
** Allocate a new Fts5MultiSegIter object.
**
** The new object will be used to iterate through data in structure pStruct.
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
      if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){
        fts5SegIterNext(p, &pNew->aSeg[iEq], 0);
        fts5MultiIterAdvanced(p, pNew, iEq, iIter);
      }
    }
    fts5AssertMultiIterSetup(p, pNew);

    if( pNew->bSkipEmpty 
     && fts5SegIterIsDelete(p, &pNew->aSeg[pNew->aFirst[1].iFirst]) 
    ){
      fts5MultiIterNext(p, pNew, 0, 0);
    }
  }else{
    fts5MultiIterFree(p, pNew);
    *ppOut = 0;
  }
}







|
<
<







2587
2588
2589
2590
2591
2592
2593
2594


2595
2596
2597
2598
2599
2600
2601
      if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){
        fts5SegIterNext(p, &pNew->aSeg[iEq], 0);
        fts5MultiIterAdvanced(p, pNew, iEq, iIter);
      }
    }
    fts5AssertMultiIterSetup(p, pNew);

    if( pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew) ){


      fts5MultiIterNext(p, pNew, 0, 0);
    }
  }else{
    fts5MultiIterFree(p, pNew);
    *ppOut = 0;
  }
}
3404
3405
3406
3407
3408
3409
3410



3411


3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435

3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
  assert( iLvl>=0 );
  for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, 0, 0, iLvl, nInput, &pIter);
      fts5MultiIterEof(p, pIter)==0;
      fts5MultiIterNext(p, pIter, 0, 0)
  ){
    Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
    Fts5ChunkIter sPos;           /* Used to iterate through position list */






    /* If the segment being written is the oldest in the entire index and
    ** the position list is empty (i.e. the entry is a delete marker), no
    ** entry need be written to the output.  */
    fts5ChunkIterInit(p, pSeg, &sPos);
    if( bOldest==0 || sPos.nRem>0 ){
      int nTerm;
      const u8 *pTerm = fts5MultiIterTerm(pIter, &nTerm);
      if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){
        if( pnRem && writer.nLeafWritten>nRem ){
          fts5ChunkIterRelease(&sPos);
          break;
        }

        /* This is a new term. Append a term to the output segment. */
        if( bRequireDoclistTerm ){
          fts5WriteAppendZerobyte(p, &writer);
        }
        fts5WriteAppendTerm(p, &writer, nTerm, pTerm);
        fts5BufferSet(&p->rc, &term, nTerm, pTerm);
        bRequireDoclistTerm = 1;
      }

      /* Append the rowid to the output */
      /* WRITEPOSLISTSIZE */

      fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter), sPos.nRem*2);

      for(/* noop */; !fts5ChunkIterEof(p, &sPos); fts5ChunkIterNext(p, &sPos)){
        fts5WriteAppendPoslistData(p, &writer, sPos.p, sPos.n);
      }
    }

    fts5ChunkIterRelease(&sPos);
  }

  /* Flush the last leaf page to disk. Set the output segment b-tree height
  ** and last leaf page number at the same time.  */







>
>
>

>
>
|
<
<

|
<
|
|
|
|
|
|

|
|
|
|
|
|
|
|

|
|
>
|

|
|
<







3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395


3396
3397

3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420

3421
3422
3423
3424
3425
3426
3427
  assert( iLvl>=0 );
  for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, 0, 0, iLvl, nInput, &pIter);
      fts5MultiIterEof(p, pIter)==0;
      fts5MultiIterNext(p, pIter, 0, 0)
  ){
    Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
    Fts5ChunkIter sPos;           /* Used to iterate through position list */
    int nPos;                     /* position-list size field value */
    int nTerm;
    const u8 *pTerm;

    /* Check for key annihilation. */
    if( pSeg->nPos==0 && (bOldest || pSeg->bDel==0) ) continue;



    fts5ChunkIterInit(p, pSeg, &sPos);


    pTerm = fts5MultiIterTerm(pIter, &nTerm);
    if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){
      if( pnRem && writer.nLeafWritten>nRem ){
        fts5ChunkIterRelease(&sPos);
        break;
      }

      /* This is a new term. Append a term to the output segment. */
      if( bRequireDoclistTerm ){
        fts5WriteAppendZerobyte(p, &writer);
      }
      fts5WriteAppendTerm(p, &writer, nTerm, pTerm);
      fts5BufferSet(&p->rc, &term, nTerm, pTerm);
      bRequireDoclistTerm = 1;
    }

    /* Append the rowid to the output */
    /* WRITEPOSLISTSIZE */
    nPos = pSeg->nPos*2 + pSeg->bDel;
    fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter), nPos);

    for(/* noop */; !fts5ChunkIterEof(p, &sPos); fts5ChunkIterNext(p, &sPos)){
      fts5WriteAppendPoslistData(p, &writer, sPos.p, sPos.n);

    }

    fts5ChunkIterRelease(&sPos);
  }

  /* Flush the last leaf page to disk. Set the output segment b-tree height
  ** and last leaf page number at the same time.  */
3878
3879
3880
3881
3882
3883
3884




3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
  Fts5StructureSegment *pSeg, 
  Fts5BtreeIter *pIter
){
  int nByte;
  int i;
  nByte = sizeof(pIter->aLvl[0]) * (pSeg->nHeight-1);
  memset(pIter, 0, sizeof(*pIter));




  pIter->nLvl = pSeg->nHeight-1;
  pIter->iIdx = iIdx;
  pIter->p = p;
  pIter->pSeg = pSeg;
  if( nByte && p->rc==SQLITE_OK ){
    pIter->aLvl = (Fts5BtreeIterLevel*)fts5IdxMalloc(p, nByte);
  }
  for(i=0; p->rc==SQLITE_OK && i<pIter->nLvl; i++){
    i64 iRowid = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, i+1, 1);
    Fts5Data *pData;
    pIter->aLvl[i].pData = pData = fts5DataRead(p, iRowid);
    if( pData ){
      fts5NodeIterInit(pData->p, pData->n, &pIter->aLvl[i].s);







>
>
>
>
|
|
|
|
<
<







3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872


3873
3874
3875
3876
3877
3878
3879
  Fts5StructureSegment *pSeg, 
  Fts5BtreeIter *pIter
){
  int nByte;
  int i;
  nByte = sizeof(pIter->aLvl[0]) * (pSeg->nHeight-1);
  memset(pIter, 0, sizeof(*pIter));
  if( nByte ){
    pIter->aLvl = (Fts5BtreeIterLevel*)fts5IdxMalloc(p, nByte);
  }
  if( p->rc==SQLITE_OK ){
    pIter->nLvl = pSeg->nHeight-1;
    pIter->iIdx = iIdx;
    pIter->p = p;
    pIter->pSeg = pSeg;


  }
  for(i=0; p->rc==SQLITE_OK && i<pIter->nLvl; i++){
    i64 iRowid = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, i+1, 1);
    Fts5Data *pData;
    pIter->aLvl[i].pData = pData = fts5DataRead(p, iRowid);
    if( pData ){
      fts5NodeIterInit(pData->p, pData->n, &pIter->aLvl[i].s);
Changes to ext/fts5/test/fts5corrupt.test.
59
60
61
62
63
64
65
66
67

68
69
70
71
72
73
74
  CREATE VIRTUAL TABLE t2 USING fts5(x);
  INSERT INTO t2(t2, rank) VALUES('pgsz', 64);
}
db func rnddoc fts5_rnddoc
do_test 2.1 {
  for {set i 0} {$i < 500} {incr i} {
    execsql { INSERT INTO t2 VALUES(rnddoc(50)) }
    execsql { INSERT INTO t2(t2) VALUES('integrity-check') }
  }

} {}

#--------------------------------------------------------------------
#

finish_test








<

>







59
60
61
62
63
64
65

66
67
68
69
70
71
72
73
74
  CREATE VIRTUAL TABLE t2 USING fts5(x);
  INSERT INTO t2(t2, rank) VALUES('pgsz', 64);
}
db func rnddoc fts5_rnddoc
do_test 2.1 {
  for {set i 0} {$i < 500} {incr i} {
    execsql { INSERT INTO t2 VALUES(rnddoc(50)) }

  }
  execsql { INSERT INTO t2(t2) VALUES('integrity-check') }
} {}

#--------------------------------------------------------------------
#

finish_test