Index: ext/fts5/fts5_index.c ================================================================== --- ext/fts5/fts5_index.c +++ ext/fts5/fts5_index.c @@ -408,16 +408,23 @@ ** considered to be greater than all other iterators. ** ** aFirst[1] contains the index in aSeg[] of the iterator that points to ** the smallest key overall. aFirst[0] is unused. */ + +typedef struct Fts5CResult Fts5CResult; +struct Fts5CResult { + u16 iFirst; /* aSeg[] index of firstest iterator */ + u8 bTermEq; /* True if the terms are equal */ +}; + struct Fts5MultiSegIter { int nSeg; /* Size of aSeg[] array */ int bRev; /* True to iterate in reverse order */ int bSkipEmpty; /* True to skip deleted entries */ Fts5SegIter *aSeg; /* Array of segment iterators */ - u16 *aFirst; /* Current merge state (see above) */ + Fts5CResult *aFirst; /* Current merge state (see above) */ }; /* ** Object for iterating through a single segment, visiting each term/docid ** pair in the segment. @@ -1742,12 +1749,14 @@ ** is not considered an error if the iterator reaches EOF. If an error has ** already occurred when this function is called, it is a no-op. */ static void fts5SegIterNext( Fts5Index *p, /* FTS5 backend object */ - Fts5SegIter *pIter /* Iterator to advance */ + Fts5SegIter *pIter, /* Iterator to advance */ + int *pbNewTerm /* OUT: Set for new term */ ){ + assert( pbNewTerm==0 || *pbNewTerm==0 ); if( p->rc==SQLITE_OK ){ if( pIter->flags & FTS5_SEGITER_REVERSE ){ if( pIter->iRowidOffset>0 ){ u8 *a = pIter->pLeaf->p; int iOff; @@ -1839,10 +1848,11 @@ if( pIter->flags & FTS5_SEGITER_ONETERM ){ fts5DataRelease(pIter->pLeaf); pIter->pLeaf = 0; }else{ fts5SegIterLoadTerm(p, pIter, nKeep); + if( pbNewTerm ) *pbNewTerm = 1; } } } } } @@ -2031,11 +2041,11 @@ pIter->iLeafOffset = fts5GetU16(&pIter->pLeaf->p[2]); fts5SegIterLoadTerm(p, pIter, 0); do { res = fts5BufferCompareBlob(&pIter->term, pTerm, nTerm); if( res>=0 ) break; - fts5SegIterNext(p, pIter); + fts5SegIterNext(p, pIter, 0); }while( pIter->pLeaf && p->rc==SQLITE_OK ); if( bGe==0 && res ){ /* Set iterator to point to EOF */ fts5DataRelease(pIter->pLeaf); @@ -2121,10 +2131,83 @@ fts5DlidxIterFree(pIter->pDlidx); sqlite3_free(pIter->aRowidOffset); memset(pIter, 0, sizeof(Fts5SegIter)); } +#ifdef SQLITE_DEBUG + +/* +** This function is used as part of the big assert() procedure implemented by +** fts5AssertMultiIterSetup(). It ensures that the result currently stored +** in *pRes is the correct result of comparing the current positions of the +** two iterators. +*/ +static void fts5AssertComparisonResult( + Fts5MultiSegIter *pIter, + Fts5SegIter *p1, + Fts5SegIter *p2, + Fts5CResult *pRes +){ + int i1 = p1 - pIter->aSeg; + int i2 = p2 - pIter->aSeg; + + if( p1->pLeaf || p2->pLeaf ){ + if( p1->pLeaf==0 ){ + assert( pRes->iFirst==i2 ); + }else if( p2->pLeaf==0 ){ + assert( pRes->iFirst==i1 ); + }else{ + int nMin = MIN(p1->term.n, p2->term.n); + int res = memcmp(p1->term.p, p2->term.p, nMin); + if( res==0 ) res = p1->term.n - p2->term.n; + + if( res==0 ){ + assert( pRes->bTermEq==1 ); + assert( p1->iRowid!=p2->iRowid ); + res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : 1; + }else{ + assert( pRes->bTermEq==0 ); + } + + if( res<0 ){ + assert( pRes->iFirst==i1 ); + }else{ + assert( pRes->iFirst==i2 ); + } + } + } +} + +/* +** This function is a no-op unless SQLITE_DEBUG is defined when this module +** is compiled. In that case, this function is essentially an assert() +** statement used to verify that the contents of the pIter->aFirst[] array +** are correct. +*/ +static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5MultiSegIter *pIter){ + if( p->rc==SQLITE_OK ){ + int i; + for(i=0; inSeg; i+=2){ + Fts5SegIter *p1 = &pIter->aSeg[i]; + Fts5SegIter *p2 = &pIter->aSeg[i+1]; + Fts5CResult *pRes = &pIter->aFirst[(pIter->nSeg + i) / 2]; + fts5AssertComparisonResult(pIter, p1, p2, pRes); + } + + for(i=1; i<(pIter->nSeg / 2); i+=2){ + Fts5CResult *pRes = &pIter->aFirst[i]; + Fts5SegIter *p1 = &pIter->aSeg[ pIter->aFirst[i*2].iFirst ]; + Fts5SegIter *p2 = &pIter->aSeg[ pIter->aFirst[i*2+1].iFirst ]; + + fts5AssertComparisonResult(pIter, p1, p2, pRes); + } + } +} +#else +# define fts5AssertMultiIterSetup(x,y) +#endif + /* ** Do the comparison necessary to populate pIter->aFirst[iOut]. ** ** If the returned value is non-zero, then it is the index of an entry ** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing @@ -2135,33 +2218,36 @@ int i1; /* Index of left-hand Fts5SegIter */ int i2; /* Index of right-hand Fts5SegIter */ int iRes; Fts5SegIter *p1; /* Left-hand Fts5SegIter */ Fts5SegIter *p2; /* Right-hand Fts5SegIter */ + Fts5CResult *pRes = &pIter->aFirst[iOut]; assert( iOutnSeg && iOut>0 ); assert( pIter->bRev==0 || pIter->bRev==1 ); if( iOut>=(pIter->nSeg/2) ){ i1 = (iOut - pIter->nSeg/2) * 2; i2 = i1 + 1; }else{ - i1 = pIter->aFirst[iOut*2]; - i2 = pIter->aFirst[iOut*2+1]; + i1 = pIter->aFirst[iOut*2].iFirst; + i2 = pIter->aFirst[iOut*2+1].iFirst; } p1 = &pIter->aSeg[i1]; p2 = &pIter->aSeg[i2]; + pRes->bTermEq = 0; if( p1->pLeaf==0 ){ /* If p1 is at EOF */ iRes = i2; }else if( p2->pLeaf==0 ){ /* If p2 is at EOF */ iRes = i1; }else{ int res = fts5BufferCompare(&p1->term, &p2->term); if( res==0 ){ assert( i2>i1 ); assert( i2!=0 ); + pRes->bTermEq = 1; if( p1->iRowid==p2->iRowid ) return i2; res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1; } assert( res!=0 ); if( res<0 ){ @@ -2169,11 +2255,11 @@ }else{ iRes = i2; } } - pIter->aFirst[iOut] = iRes; + pRes->iFirst = iRes; return 0; } /* ** Move the seg-iter so that it points to the first rowid on page iLeafPgno. @@ -2250,11 +2336,11 @@ bMove = 0; } } while( 1 ){ - if( bMove ) fts5SegIterNext(p, pIter); + if( bMove ) fts5SegIterNext(p, pIter, 0); if( pIter->pLeaf==0 ) break; if( bRev==0 && pIter->iRowid>=iMatch ) break; if( bRev!=0 && pIter->iRowid<=iMatch ) break; bMove = 1; } @@ -2282,15 +2368,46 @@ ){ int i; for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){ int iEq; if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){ - fts5SegIterNext(p, &pIter->aSeg[iEq]); + fts5SegIterNext(p, &pIter->aSeg[iEq], 0); i = pIter->nSeg + iEq; } } } + +static int fts5MultiIterAdvanceRowid( + Fts5Index *p, /* FTS5 backend to iterate within */ + Fts5MultiSegIter *pIter, /* Iterator to update aFirst[] array for */ + int iChanged /* Index of sub-iterator just advanced */ +){ + int i; + Fts5SegIter *pNew = &pIter->aSeg[iChanged]; + Fts5SegIter *pOther = &pIter->aSeg[iChanged ^ 0x0001]; + + for(i=(pIter->nSeg+iChanged)/2; p->rc==SQLITE_OK; i=i/2){ + Fts5CResult *pRes = &pIter->aFirst[i]; + + assert( pNew->pLeaf ); + assert( pRes->bTermEq==0 || pOther->pLeaf ); + + if( pRes->bTermEq ){ + if( pNew->iRowid==pOther->iRowid ){ + return 1; + }else if( (pOther->iRowid>pNew->iRowid)==pIter->bRev ){ + pNew = pOther; + } + } + pRes->iFirst = (pNew - pIter->aSeg); + if( i==1 ) break; + + pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ]; + } + + return 0; +} /* ** Move the iterator to the next entry. ** ** If an error occurs, an error code is left in Fts5Index.rc. It is not @@ -2304,21 +2421,29 @@ i64 iFrom /* Advance at least as far as this */ ){ if( p->rc==SQLITE_OK ){ int bUseFrom = bFrom; do { - int iFirst = pIter->aFirst[1]; + int iFirst = pIter->aFirst[1].iFirst; + int bNewTerm = 0; Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; if( bUseFrom && pSeg->pDlidx ){ fts5SegIterNextFrom(p, pSeg, iFrom); }else{ - fts5SegIterNext(p, pSeg); + fts5SegIterNext(p, pSeg, &bNewTerm); + } + + if( pSeg->pLeaf==0 || bNewTerm + || fts5MultiIterAdvanceRowid(p, pIter, iFirst) + ){ + fts5MultiIterAdvanced(p, pIter, iFirst, 1); } - fts5MultiIterAdvanced(p, pIter, iFirst, 1); + fts5AssertMultiIterSetup(p, pIter); + bUseFrom = 0; }while( pIter->bSkipEmpty - && fts5SegIterIsDelete(p, &pIter->aSeg[pIter->aFirst[1]]) + && fts5SegIterIsDelete(p, &pIter->aSeg[pIter->aFirst[1].iFirst]) ); } } /* @@ -2334,11 +2459,11 @@ */ static void fts5MultiIterNew( Fts5Index *p, /* FTS5 backend to iterate within */ Fts5Structure *pStruct, /* Structure of specific index */ int iIdx, /* Config.aHash[] index of FTS index */ - int bSkipEmpty, + int bSkipEmpty, /* True to ignore delete-keys */ int flags, /* True for >= */ const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */ int iLevel, /* Level to iterate (-1 for all) */ int nSegment, /* Number of segments to merge (iLevel>=0) */ Fts5MultiSegIter **ppOut /* New object */ @@ -2361,16 +2486,16 @@ } for(nSlot=2; nSlotaSeg[] */ - sizeof(u16) * nSlot /* pNew->aFirst[] */ + sizeof(Fts5CResult) * nSlot /* pNew->aFirst[] */ ); if( pNew==0 ) return; pNew->nSeg = nSlot; pNew->aSeg = (Fts5SegIter*)&pNew[1]; - pNew->aFirst = (u16*)&pNew->aSeg[nSlot]; + pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot]; pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC)); pNew->bSkipEmpty = bSkipEmpty; /* Initialize each of the component segment iterators. */ if( iLevel<0 ){ @@ -2405,17 +2530,18 @@ ** object and set the output variable to NULL. */ if( p->rc==SQLITE_OK ){ for(iIter=nSlot-1; iIter>0; iIter--){ int iEq; if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){ - fts5SegIterNext(p, &pNew->aSeg[iEq]); + fts5SegIterNext(p, &pNew->aSeg[iEq], 0); fts5MultiIterAdvanced(p, pNew, iEq, iIter); } } + fts5AssertMultiIterSetup(p, pNew); if( pNew->bSkipEmpty - && fts5SegIterIsDelete(p, &pNew->aSeg[pNew->aFirst[1]]) + && fts5SegIterIsDelete(p, &pNew->aSeg[pNew->aFirst[1].iFirst]) ){ fts5MultiIterNext(p, pNew, 0, 0); } }else{ fts5MultiIterFree(p, pNew); @@ -2426,21 +2552,21 @@ /* ** Return true if the iterator is at EOF or if an error has occurred. ** False otherwise. */ static int fts5MultiIterEof(Fts5Index *p, Fts5MultiSegIter *pIter){ - return (p->rc || pIter->aSeg[ pIter->aFirst[1] ].pLeaf==0); + return (p->rc || pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0); } /* ** Return the rowid of the entry that the iterator currently points ** to. If the iterator points to EOF when this function is called the ** results are undefined. */ static i64 fts5MultiIterRowid(Fts5MultiSegIter *pIter){ - assert( pIter->aSeg[ pIter->aFirst[1] ].pLeaf ); - return pIter->aSeg[ pIter->aFirst[1] ].iRowid; + assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf ); + return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid; } /* ** Move the iterator to the next entry at or following iMatch. */ @@ -2462,11 +2588,11 @@ /* ** Return a pointer to a buffer containing the term associated with the ** entry that the iterator currently points to. */ static const u8 *fts5MultiIterTerm(Fts5MultiSegIter *pIter, int *pn){ - Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1] ]; + Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; *pn = p->term.n; return p->term.p; } /* @@ -2591,11 +2717,11 @@ Fts5Index *p, /* FTS5 backend object */ Fts5MultiSegIter *pMulti, /* Multi-seg iterator to read pos-list from */ Fts5PosIter *pIter /* Initialize this object */ ){ if( p->rc==SQLITE_OK ){ - Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1] ]; + Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ]; memset(pIter, 0, sizeof(*pIter)); fts5ChunkIterInit(p, pSeg, &pIter->chunk); if( fts5ChunkIterEof(p, &pIter->chunk)==0 ){ fts5PosIterNext(p, pIter); } @@ -3212,11 +3338,11 @@ assert( iLvl>=0 ); for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, 0, 0, iLvl, nInput, &pIter); fts5MultiIterEof(p, pIter)==0; fts5MultiIterNext(p, pIter, 0, 0) ){ - Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1] ]; + Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; Fts5ChunkIter sPos; /* Used to iterate through position list */ /* If the segment being written is the oldest in the entire index and ** the position list is empty (i.e. the entry is a delete marker), no ** entry need be written to the output. */ @@ -3938,11 +4064,11 @@ int bSz, Fts5Buffer *pBuf ){ if( p->rc==SQLITE_OK ){ Fts5ChunkIter iter; - Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1] ]; + Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ]; assert( fts5MultiIterEof(p, pMulti)==0 ); fts5ChunkIterInit(p, pSeg, &iter); if( fts5ChunkIterEof(p, &iter)==0 ){ if( bSz ){ fts5BufferAppendVarint(&p->rc, pBuf, iter.nRem);