/ Check-in [50fae1f0]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Logically store updates as (insert+delete) within the FTS tree. This allows keys to be annihilated more quickly under some circumstances.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts5
Files: files | file ages | folders
SHA1:50fae1f0006c0e946b5214e73eedf2687a0016f9
User & Date: dan 2015-04-15 18:49:20
Context
2015-04-20
18:48
Fix some fts5 problems with very large position lists. check-in: 2ea8f9cb user: dan tags: fts5
2015-04-15
18:49
Logically store updates as (insert+delete) within the FTS tree. This allows keys to be annihilated more quickly under some circumstances. check-in: 50fae1f0 user: dan tags: fts5
16:01
Fix a problem preventing doclist indexes from being loaded. check-in: b29109a0 user: dan tags: fts5
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5_index.c.

  1767   1767   }
  1768   1768   
  1769   1769   /*
  1770   1770   ** Return true if the iterator passed as the second argument currently
  1771   1771   ** points to a delete marker. A delete marker is an entry with a 0 byte
  1772   1772   ** position-list.
  1773   1773   */
  1774         -static int fts5SegIterIsDelete(
  1775         -  Fts5Index *p,                   /* FTS5 backend object */
  1776         -  Fts5SegIter *pIter              /* Iterator to advance */
  1777         -){
  1778         -  int bRet = 0;
  1779         -  Fts5Data *pLeaf = pIter->pLeaf;
  1780         -  if( p->rc==SQLITE_OK && pLeaf ){
  1781         -    bRet = pIter->nPos==0;
  1782         -    /* bRet = pIter->bDel; */
  1783         -#if 0
  1784         -    if( pIter->iLeafOffset<pLeaf->n ){
  1785         -      bRet = ((pLeaf->p[pIter->iLeafOffset] & 0xFE)==0x00);
  1786         -    }else{
  1787         -      Fts5Data *pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(
  1788         -            pIter->iIdx, pIter->pSeg->iSegid, 0, pIter->iLeafPgno+1
  1789         -      ));
  1790         -      if( pNew ){
  1791         -        bRet = ((pNew->p[4] & 0xFE)==0x00);
  1792         -        fts5DataRelease(pNew);
  1793         -      }
  1794         -    }
  1795         -#endif
  1796         -  }
  1797         -  return bRet;
         1774  +static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5MultiSegIter *pIter){
         1775  +  Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
         1776  +  return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0);
  1798   1777   }
  1799   1778   
  1800   1779   /*
  1801   1780   ** Advance iterator pIter to the next entry. 
  1802   1781   **
  1803   1782   ** If an error occurs, Fts5Index.rc is set to an appropriate error code. It 
  1804   1783   ** is not considered an error if the iterator reaches EOF. If an error has 
................................................................................
  2314   2293       iRes = i1;
  2315   2294     }else{
  2316   2295       int res = fts5BufferCompare(&p1->term, &p2->term);
  2317   2296       if( res==0 ){
  2318   2297         assert( i2>i1 );
  2319   2298         assert( i2!=0 );
  2320   2299         pRes->bTermEq = 1;
  2321         -      if( p1->iRowid==p2->iRowid ) return i2;
         2300  +      if( p1->iRowid==p2->iRowid ){
         2301  +        p1->bDel = p2->bDel;
         2302  +        return i2;
         2303  +      }
  2322   2304         res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1;
  2323   2305       }
  2324   2306       assert( res!=0 );
  2325   2307       if( res<0 ){
  2326   2308         iRes = i1;
  2327   2309       }else{
  2328   2310         iRes = i2;
................................................................................
  2509   2491          || fts5MultiIterAdvanceRowid(p, pIter, iFirst)
  2510   2492         ){
  2511   2493           fts5MultiIterAdvanced(p, pIter, iFirst, 1);
  2512   2494         }
  2513   2495         fts5AssertMultiIterSetup(p, pIter);
  2514   2496   
  2515   2497         bUseFrom = 0;
  2516         -    }while( pIter->bSkipEmpty 
  2517         -         && fts5SegIterIsDelete(p, &pIter->aSeg[pIter->aFirst[1].iFirst])
  2518         -    );
         2498  +    }while( pIter->bSkipEmpty && fts5MultiIterIsEmpty(p, pIter) );
  2519   2499     }
  2520   2500   }
  2521   2501   
  2522   2502   /*
  2523   2503   ** Allocate a new Fts5MultiSegIter object.
  2524   2504   **
  2525   2505   ** The new object will be used to iterate through data in structure pStruct.
................................................................................
  2607   2587         if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){
  2608   2588           fts5SegIterNext(p, &pNew->aSeg[iEq], 0);
  2609   2589           fts5MultiIterAdvanced(p, pNew, iEq, iIter);
  2610   2590         }
  2611   2591       }
  2612   2592       fts5AssertMultiIterSetup(p, pNew);
  2613   2593   
  2614         -    if( pNew->bSkipEmpty 
  2615         -     && fts5SegIterIsDelete(p, &pNew->aSeg[pNew->aFirst[1].iFirst]) 
  2616         -    ){
         2594  +    if( pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew) ){
  2617   2595         fts5MultiIterNext(p, pNew, 0, 0);
  2618   2596       }
  2619   2597     }else{
  2620   2598       fts5MultiIterFree(p, pNew);
  2621   2599       *ppOut = 0;
  2622   2600     }
  2623   2601   }
................................................................................
  3404   3382     assert( iLvl>=0 );
  3405   3383     for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, 0, 0, iLvl, nInput, &pIter);
  3406   3384         fts5MultiIterEof(p, pIter)==0;
  3407   3385         fts5MultiIterNext(p, pIter, 0, 0)
  3408   3386     ){
  3409   3387       Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
  3410   3388       Fts5ChunkIter sPos;           /* Used to iterate through position list */
         3389  +    int nPos;                     /* position-list size field value */
         3390  +    int nTerm;
         3391  +    const u8 *pTerm;
  3411   3392   
  3412         -    /* If the segment being written is the oldest in the entire index and
  3413         -    ** the position list is empty (i.e. the entry is a delete marker), no
  3414         -    ** entry need be written to the output.  */
         3393  +    /* Check for key annihilation. */
         3394  +    if( pSeg->nPos==0 && (bOldest || pSeg->bDel==0) ) continue;
         3395  +
  3415   3396       fts5ChunkIterInit(p, pSeg, &sPos);
  3416         -    if( bOldest==0 || sPos.nRem>0 ){
  3417         -      int nTerm;
  3418         -      const u8 *pTerm = fts5MultiIterTerm(pIter, &nTerm);
  3419         -      if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){
  3420         -        if( pnRem && writer.nLeafWritten>nRem ){
  3421         -          fts5ChunkIterRelease(&sPos);
  3422         -          break;
  3423         -        }
  3424         -
  3425         -        /* This is a new term. Append a term to the output segment. */
  3426         -        if( bRequireDoclistTerm ){
  3427         -          fts5WriteAppendZerobyte(p, &writer);
  3428         -        }
  3429         -        fts5WriteAppendTerm(p, &writer, nTerm, pTerm);
  3430         -        fts5BufferSet(&p->rc, &term, nTerm, pTerm);
  3431         -        bRequireDoclistTerm = 1;
  3432         -      }
  3433         -
  3434         -      /* Append the rowid to the output */
  3435         -      /* WRITEPOSLISTSIZE */
  3436         -      fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter), sPos.nRem*2);
  3437         -
  3438         -      for(/* noop */; !fts5ChunkIterEof(p, &sPos); fts5ChunkIterNext(p, &sPos)){
  3439         -        fts5WriteAppendPoslistData(p, &writer, sPos.p, sPos.n);
  3440         -      }
         3397  +
         3398  +    pTerm = fts5MultiIterTerm(pIter, &nTerm);
         3399  +    if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){
         3400  +      if( pnRem && writer.nLeafWritten>nRem ){
         3401  +        fts5ChunkIterRelease(&sPos);
         3402  +        break;
         3403  +      }
         3404  +
         3405  +      /* This is a new term. Append a term to the output segment. */
         3406  +      if( bRequireDoclistTerm ){
         3407  +        fts5WriteAppendZerobyte(p, &writer);
         3408  +      }
         3409  +      fts5WriteAppendTerm(p, &writer, nTerm, pTerm);
         3410  +      fts5BufferSet(&p->rc, &term, nTerm, pTerm);
         3411  +      bRequireDoclistTerm = 1;
         3412  +    }
         3413  +
         3414  +    /* Append the rowid to the output */
         3415  +    /* WRITEPOSLISTSIZE */
         3416  +    nPos = pSeg->nPos*2 + pSeg->bDel;
         3417  +    fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter), nPos);
         3418  +
         3419  +    for(/* noop */; !fts5ChunkIterEof(p, &sPos); fts5ChunkIterNext(p, &sPos)){
         3420  +      fts5WriteAppendPoslistData(p, &writer, sPos.p, sPos.n);
  3441   3421       }
  3442   3422   
  3443   3423       fts5ChunkIterRelease(&sPos);
  3444   3424     }
  3445   3425   
  3446   3426     /* Flush the last leaf page to disk. Set the output segment b-tree height
  3447   3427     ** and last leaf page number at the same time.  */
................................................................................
  3878   3858     Fts5StructureSegment *pSeg, 
  3879   3859     Fts5BtreeIter *pIter
  3880   3860   ){
  3881   3861     int nByte;
  3882   3862     int i;
  3883   3863     nByte = sizeof(pIter->aLvl[0]) * (pSeg->nHeight-1);
  3884   3864     memset(pIter, 0, sizeof(*pIter));
  3885         -  pIter->nLvl = pSeg->nHeight-1;
  3886         -  pIter->iIdx = iIdx;
  3887         -  pIter->p = p;
  3888         -  pIter->pSeg = pSeg;
  3889         -  if( nByte && p->rc==SQLITE_OK ){
         3865  +  if( nByte ){
  3890   3866       pIter->aLvl = (Fts5BtreeIterLevel*)fts5IdxMalloc(p, nByte);
         3867  +  }
         3868  +  if( p->rc==SQLITE_OK ){
         3869  +    pIter->nLvl = pSeg->nHeight-1;
         3870  +    pIter->iIdx = iIdx;
         3871  +    pIter->p = p;
         3872  +    pIter->pSeg = pSeg;
  3891   3873     }
  3892   3874     for(i=0; p->rc==SQLITE_OK && i<pIter->nLvl; i++){
  3893   3875       i64 iRowid = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, i+1, 1);
  3894   3876       Fts5Data *pData;
  3895   3877       pIter->aLvl[i].pData = pData = fts5DataRead(p, iRowid);
  3896   3878       if( pData ){
  3897   3879         fts5NodeIterInit(pData->p, pData->n, &pIter->aLvl[i].s);

Changes to ext/fts5/test/fts5corrupt.test.

    59     59     CREATE VIRTUAL TABLE t2 USING fts5(x);
    60     60     INSERT INTO t2(t2, rank) VALUES('pgsz', 64);
    61     61   }
    62     62   db func rnddoc fts5_rnddoc
    63     63   do_test 2.1 {
    64     64     for {set i 0} {$i < 500} {incr i} {
    65     65       execsql { INSERT INTO t2 VALUES(rnddoc(50)) }
    66         -    execsql { INSERT INTO t2(t2) VALUES('integrity-check') }
    67     66     }
           67  +  execsql { INSERT INTO t2(t2) VALUES('integrity-check') }
    68     68   } {}
    69     69   
    70     70   #--------------------------------------------------------------------
    71     71   #
    72     72   
    73     73   finish_test
    74     74