Index: ext/fts5/fts5Int.h ================================================================== --- ext/fts5/fts5Int.h +++ ext/fts5/fts5Int.h @@ -344,11 +344,10 @@ /* ** Values used as part of the flags argument passed to IndexQuery(). */ #define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */ #define FTS5INDEX_QUERY_DESC 0x0002 /* Docs in descending rowid order */ -#define FTS5INDEX_QUERY_TEST_NOIDX 0x0004 /* Do not use prefix index */ #define FTS5INDEX_QUERY_SCAN 0x0008 /* Scan query (fts5vocab) */ /* The following are used internally by the fts5_index.c module. They are ** defined here only to make it easier to avoid clashes with the flags ** above. */ @@ -471,11 +470,11 @@ ** Called during virtual module initialization to register UDF ** fts5_decode() with SQLite */ int sqlite3Fts5IndexInit(sqlite3*); -int sqlite3Fts5IndexSetCookie(Fts5Index*, int); +int sqlite3Fts5IndexIncrCookie(Fts5Index*); /* ** Return the total number of entries read from the %_data table by ** this connection since it was created. */ @@ -482,13 +481,15 @@ int sqlite3Fts5IndexReads(Fts5Index *p); int sqlite3Fts5IndexReinit(Fts5Index *p); int sqlite3Fts5IndexOptimize(Fts5Index *p); int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge); -int sqlite3Fts5IndexReset(Fts5Index *p); +int sqlite3Fts5IndexNewTrans(Fts5Index *p); int sqlite3Fts5IndexLoadConfig(Fts5Index *p); + +void sqlite3Fts5IndexCloseReader(Fts5Index*); /* ** End of interface to code in fts5_index.c. **************************************************************************/ Index: ext/fts5/fts5_index.c ================================================================== --- ext/fts5/fts5_index.c +++ ext/fts5/fts5_index.c @@ -10,23 +10,24 @@ ** ****************************************************************************** ** ** Low level access to the FTS index stored in the database file. The ** routines in this file file implement all read and write access to the -** %_data table. Other parts of the system access this functionality via -** the interface defined in fts5Int.h. +** %_data and %_idx tables. Other parts of the system access this +** functionality via the interface defined in fts5Int.h. */ #include "fts5Int.h" /* ** Overview: ** -** The %_data table contains all the FTS indexes for an FTS5 virtual table. -** As well as the main term index, there may be up to 31 prefix indexes. -** The format is similar to FTS3/4, except that: +** The %_data table contains the FTS index for an FTS5 virtual table. +** All entries, for terms and prefixes, are stored in a single data +** structure. The format is similar to FTS3/4, but differs in the +** following ways: ** ** * all segment b-tree leaf data is stored in fixed size page records ** (e.g. 1000 bytes). A single doclist may span multiple pages. Care is ** taken to ensure it is possible to iterate in either direction through ** the entries in a doclist, or to seek to a specific entry within a @@ -37,35 +38,23 @@ ** the doclist. This is used to speed up seek operations, and merges of ** large doclists with very small doclists. ** ** * extra fields in the "structure record" record the state of ongoing ** incremental merge operations. -** */ -#define FTS5_OPT_WORK_UNIT 1000 /* Number of leaf pages per optimize step */ -#define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */ - -#define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */ - -#define FTS5_MAIN_PREFIX '0' - -#if FTS5_MAX_PREFIX_INDEXES > 31 -# error "FTS5_MAX_PREFIX_INDEXES is too large" -#endif - /* -** Details: +** Contents of %_data table: ** ** The %_data table managed by this module, ** ** CREATE TABLE %_data(id INTEGER PRIMARY KEY, block BLOB); ** -** , contains the following 5 types of records. See the comments surrounding +** , contains the following 4 types of records. See the comments surrounding ** the FTS5_*_ROWID macros below for a description of how %_data rowids are -** assigned to each fo them. +** assigned to each of them. ** ** 1. Structure Records: ** ** The set of segments that make up an index - the index structure - are ** recorded in a single record within the %_data table. The record consists @@ -174,11 +163,11 @@ ** very first term of the segment: ** ** varint : size of first term ** blob: first term data ** -** 5. Segment doclist indexes: +** 4. Segment doclist indexes: ** ** Doclist indexes are themselves b-trees, however they usually consist of ** a single leaf record only. The format of each doclist index leaf page ** is: ** @@ -204,10 +193,25 @@ ** ** * A list of delta-encoded varints - the first rowid on each subsequent ** child page. ** */ + + +#define FTS5_OPT_WORK_UNIT 1000 /* Number of leaf pages per optimize step */ +#define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */ +#define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */ + +/* All entries for regular terms in the FTS index are prefixed with '0'. +** Entries for the first prefix index are prefixed with '1'. And so on, +** up to ('0'+31). */ +#define FTS5_MAIN_PREFIX '0' + +#if FTS5_MAX_PREFIX_INDEXES > 31 +# error "FTS5_MAX_PREFIX_INDEXES is too large" +#endif + /* ** Rowids for the averages and structure records in the %_data table. */ #define FTS5_AVERAGES_ROWID 1 /* Rowid used for the averages record */ @@ -260,11 +264,10 @@ typedef struct Fts5Data Fts5Data; typedef struct Fts5DlidxIter Fts5DlidxIter; typedef struct Fts5DlidxLvl Fts5DlidxLvl; typedef struct Fts5DlidxWriter Fts5DlidxWriter; typedef struct Fts5Iter Fts5Iter; -typedef struct Fts5PageWriter Fts5PageWriter; typedef struct Fts5SegIter Fts5SegIter; typedef struct Fts5DoclistIter Fts5DoclistIter; typedef struct Fts5SegWriter Fts5SegWriter; typedef struct Fts5Structure Fts5Structure; typedef struct Fts5StructureLevel Fts5StructureLevel; @@ -303,15 +306,21 @@ sqlite3_stmt *pIdxWriter; /* "INSERT ... %_idx VALUES(?,?,?,?)" */ sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=? */ sqlite3_stmt *pIdxSelect; int nRead; /* Total number of blocks read */ - sqlite3_stmt *pDataVersion; + /* In-memory cache of the 'structure' record */ + sqlite3_stmt *pDataVersion; /* PRAGMA .data_version */ i64 iStructVersion; /* data_version when pStruct read */ Fts5Structure *pStruct; /* Current db structure (or NULL) */ }; +/* +** An iterator of this sort is used to iterate through a doclist stored +** entirely in memory. See functions fts5DoclistIterInit() and +** fts5DoclistIterNext() for details. +*/ struct Fts5DoclistIter { u8 *aEof; /* Pointer to 1 byte past end of doclist */ /* Output variables. aPoslist==0 at EOF */ i64 iRowid; @@ -344,31 +353,27 @@ }; /* ** An object of type Fts5SegWriter is used to write to segments. */ -struct Fts5PageWriter { - int pgno; /* Page number for this page */ - int iPrevPgidx; /* Previous value written into pgidx */ - Fts5Buffer buf; /* Buffer containing leaf data */ - Fts5Buffer pgidx; /* Buffer containing page-index */ - Fts5Buffer term; /* Buffer containing previous term on page */ -}; struct Fts5DlidxWriter { int pgno; /* Page number for this page */ int bPrevValid; /* True if iPrev is valid */ i64 iPrev; /* Previous rowid value written to page */ Fts5Buffer buf; /* Buffer containing page data */ }; struct Fts5SegWriter { int iSegid; /* Segid to write to */ - Fts5PageWriter writer; /* PageWriter object */ + int pgno; /* Page number for current leaf page */ + int iPrevPgidx; /* Previous value written into pgidx */ + Fts5Buffer buf; /* Buffer of current leaf page data */ + Fts5Buffer pgidx; /* Buffer of current leaf page-index */ + Fts5Buffer term; /* Buffer containing previous term on leaf */ + i64 iPrevRowid; /* Previous rowid written to current leaf */ u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */ u8 bFirstRowidInPage; /* True if next rowid is first in page */ - /* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */ - u8 bFirstTermInPage; /* True if next term will be first in leaf */ int nLeafWritten; /* Number of leaf pages written */ int nEmpty; /* Number of contiguous term-less nodes */ int nDlidx; /* Allocated size of aDlidx[] array */ Fts5DlidxWriter *aDlidx; /* Array of Fts5DlidxWriter objects */ @@ -526,10 +531,17 @@ i64 iSwitchRowid; /* Firstest rowid of other than aFirst[1] */ Fts5CResult *aFirst; /* Current merge state (see above) */ Fts5SegIter aSeg[1]; /* Array of segment iterators */ }; + +/* +** Given pointer "x" to an Fts5Iter structure, return a pointer the the +** current first component segment iterator. +*/ +#define fts5IterSegment(x) (&(x)->aSeg [ (x)->aFirst[1].iFirst ]) + /* ** An instance of the following type is used to iterate through the contents ** of a doclist-index record. ** @@ -618,11 +630,11 @@ } /* ** Close the read-only blob handle, if it is open. */ -static void fts5CloseReader(Fts5Index *p){ +void sqlite3Fts5IndexCloseReader(Fts5Index *p){ if( p->pReader ){ sqlite3_blob *pReader = p->pReader; p->pReader = 0; sqlite3_blob_close(pReader); } @@ -648,11 +660,11 @@ p->pReader = 0; rc = sqlite3_blob_reopen(pBlob, iRowid); assert( p->pReader==0 ); p->pReader = pBlob; if( rc!=SQLITE_OK ){ - fts5CloseReader(p); + sqlite3Fts5IndexCloseReader(p); } if( rc==SQLITE_ABORT ) rc = SQLITE_OK; } /* If the blob handle is not open at this point, open it and seek @@ -985,18 +997,26 @@ } return pRet; } +/* +** Execute "PRAGMA db.data_version" and return the integer value that +** it returns. +** +** This function returns 0 if Fts5Index.rc is set to other than SQLITE_OK +** when it is called. If an error occurs while executing the PRAGMA, +** Fts5Index.rc is set to an SQLite error code before returning. +*/ static i64 fts5IndexDataVersion(Fts5Index *p){ i64 iVersion = 0; if( p->rc==SQLITE_OK ){ if( p->pDataVersion==0 ){ p->rc = fts5IndexPrepareStmt(p, &p->pDataVersion, sqlite3_mprintf("PRAGMA %Q.data_version", p->pConfig->zDb) - ); + ); if( p->rc ) return 0; } if( SQLITE_ROW==sqlite3_step(p->pDataVersion) ){ iVersion = sqlite3_column_int64(p->pDataVersion, 0); @@ -1004,10 +1024,23 @@ p->rc = sqlite3_reset(p->pDataVersion); } return iVersion; } + +/* +** If there is currently no cache of the index structure in memory, load +** one from the database. +*/ +static void fts5StructureCache(Fts5Index *p){ + if( p->pStruct==0 ){ + p->iStructVersion = fts5IndexDataVersion(p); + if( p->rc==SQLITE_OK ){ + p->pStruct = fts5StructureReadUncached(p); + } + } +} /* ** Read, deserialize and return the structure record. ** ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array @@ -1017,17 +1050,11 @@ ** If an error occurs, NULL is returned and an error code left in the ** Fts5Index handle. If an error has already occurred when this function ** is called, it is a no-op. */ static Fts5Structure *fts5StructureRead(Fts5Index *p){ - - if( p->pStruct==0 ){ - p->iStructVersion = fts5IndexDataVersion(p); - if( p->rc==SQLITE_OK ){ - p->pStruct = fts5StructureReadUncached(p); - } - } + fts5StructureCache(p); #if 0 else{ Fts5Structure *pTest = fts5StructureReadUncached(p); if( pTest ){ @@ -1830,11 +1857,11 @@ ** Return true if the iterator passed as the second argument currently ** points to a delete marker. A delete marker is an entry with a 0 byte ** position-list. */ static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5Iter *pIter){ - Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; + Fts5SegIter *pSeg = fts5IterSegment(pIter); return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0); } /* ** Advance iterator pIter to the next entry. @@ -2531,11 +2558,11 @@ ** statement used to verify that the contents of the pIter->aFirst[] array ** are correct. */ static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5Iter *pIter){ if( p->rc==SQLITE_OK ){ - Fts5SegIter *pFirst = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; + Fts5SegIter *pFirst = fts5IterSegment(pIter); int i; assert( (pFirst->pLeaf==0)==pIter->base.bEof ); /* Check that pIter->iSwitchRowid is set correctly. */ @@ -2804,11 +2831,11 @@ /* ** Set the pIter->bEof variable based on the state of the sub-iterators. */ static void fts5MultiIterSetEof(Fts5Iter *pIter){ - Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; + Fts5SegIter *pSeg = fts5IterSegment(pIter); pIter->base.bEof = pSeg->pLeaf==0; pIter->iSwitchRowid = pSeg->iRowid; } /* @@ -2839,16 +2866,16 @@ if( pSeg->pLeaf==0 || bNewTerm || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg) ){ fts5MultiIterAdvanced(p, pIter, iFirst, 1); fts5MultiIterSetEof(pIter); - pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; + pSeg = fts5IterSegment(pIter); if( pSeg->pLeaf==0 ) return; } fts5AssertMultiIterSetup(p, pIter); - assert( pSeg==&pIter->aSeg[pIter->aFirst[1].iFirst] && pSeg->pLeaf ); + assert( pSeg==fts5IterSegment(pIter) && pSeg->pLeaf ); if( pIter->bSkipEmpty==0 || pSeg->nPos ){ pIter->xSetOutputs(pIter, pSeg); return; } bUseFrom = 0; @@ -3021,10 +3048,24 @@ } }while( ipLeaf->p[pSeg->iLeafOffset]; int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset); int pgno = pSeg->iLeafPgno; int pgnoSave = 0; - /* This function does notmwork with detail=none databases. */ + /* This function does not work with detail=none databases. */ assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE ); if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){ pgnoSave = pgno+1; } @@ -3413,12 +3454,11 @@ fts5AssertMultiIterSetup(p, pNew); if( pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew) ){ fts5MultiIterNext(p, pNew, 0, 0); }else if( pNew->base.bEof==0 ){ - Fts5SegIter *pSeg = &pNew->aSeg[pNew->aFirst[1].iFirst]; - pNew->xSetOutputs(pNew, pSeg); + pNew->xSetOutputs(pNew, fts5IterSegment(pNew)); } }else{ fts5MultiIterFree(pNew); *ppOut = 0; @@ -3468,24 +3508,22 @@ /* ** Return true if the iterator is at EOF or if an error has occurred. ** False otherwise. */ static int fts5MultiIterEof(Fts5Index *p, Fts5Iter *pIter){ - assert( p->rc - || (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->base.bEof - ); + assert( p->rc || (fts5IterSegment(pIter)->pLeaf==0)==pIter->base.bEof ); return (p->rc || pIter->base.bEof); } /* ** Return the rowid of the entry that the iterator currently points ** to. If the iterator points to EOF when this function is called the ** results are undefined. */ static i64 fts5MultiIterRowid(Fts5Iter *pIter){ - assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf ); - return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid; + assert( fts5IterSegment(pIter)->pLeaf ); + return fts5IterSegment(pIter)->iRowid; } /* ** Move the iterator to the next entry at or following iMatch. */ @@ -3711,11 +3749,11 @@ Fts5SegWriter *pWriter, /* Writer object */ int nTerm, const u8 *pTerm /* First term on new page */ ){ fts5WriteFlushBtree(p, pWriter); fts5BufferSet(&p->rc, &pWriter->btterm, nTerm, pTerm); - pWriter->iBtPage = pWriter->writer.pgno; + pWriter->iBtPage = pWriter->pgno; } /* ** This function is called when flushing a leaf page that contains no ** terms at all to disk. @@ -3734,23 +3772,29 @@ /* Increment the "number of sequential leaves without a term" counter. */ pWriter->nEmpty++; } +/* +** Buffer pBuf contains a doclist-index page. Return the first rowid +** value on the page. +*/ static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){ i64 iRowid; int iOff; + /* Skip past the flags byte and the page number of the left-most child + ** page to the first rowid. */ iOff = 1 + fts5GetVarint(&pBuf->p[1], (u64*)&iRowid); fts5GetVarint(&pBuf->p[iOff], (u64*)&iRowid); return iRowid; } /* ** Rowid iRowid has just been appended to the current leaf page. It is the -** first on the page. This function appends an appropriate entry to the current -** doclist-index. +** first on the page. This function appends an appropriate entry to the +** current doclist-index. */ static void fts5WriteDlidxAppend( Fts5Index *p, Fts5SegWriter *pWriter, i64 iRowid @@ -3795,11 +3839,11 @@ } if( pDlidx->bPrevValid ){ iVal = iRowid - pDlidx->iPrev; }else{ - i64 iPgno = (i==0 ? pWriter->writer.pgno : pDlidx[-1].pgno); + i64 iPgno = (i==0 ? pWriter->pgno : pDlidx[-1].pgno); assert( pDlidx->buf.n==0 ); sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone); sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno); iVal = iRowid; } @@ -3808,49 +3852,46 @@ pDlidx->bPrevValid = 1; pDlidx->iPrev = iRowid; } } +/* +** Flush the writer's current leaf page to disk. Initialize various +** fields ready for the next leaf page. +*/ static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ - static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; - Fts5PageWriter *pPage = &pWriter->writer; i64 iRowid; -static int nCall = 0; -nCall++; - - assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) ); - /* Set the szLeaf header field. */ - assert( 0==fts5GetU16(&pPage->buf.p[2]) ); - fts5PutU16(&pPage->buf.p[2], (u16)pPage->buf.n); + assert( 0==fts5GetU16(&pWriter->buf.p[2]) ); + fts5PutU16(&pWriter->buf.p[2], (u16)pWriter->buf.n); - if( pWriter->bFirstTermInPage ){ + if( pWriter->pgidx.n==0 ){ /* No term was written to this page. */ - assert( pPage->pgidx.n==0 ); + assert( pWriter->pgidx.n==0 ); fts5WriteBtreeNoTerm(p, pWriter); }else{ /* Append the pgidx to the page buffer. Set the szLeaf header field. */ - fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p); + fts5BufferSafeAppendBlob(&pWriter->buf, pWriter->pgidx.p, pWriter->pgidx.n); } /* Write the page out to disk */ - iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno); - fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n); + iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pWriter->pgno); + fts5DataWrite(p, iRowid, pWriter->buf.p, pWriter->buf.n); /* Initialize the next page. */ - fts5BufferZero(&pPage->buf); - fts5BufferZero(&pPage->pgidx); - fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero); - pPage->iPrevPgidx = 0; - pPage->pgno++; + fts5BufferZero(&pWriter->buf); + fts5BufferZero(&pWriter->pgidx); + memset(pWriter->buf.p, 0, 4); + pWriter->buf.n = 4; + pWriter->iPrevPgidx = 0; + pWriter->pgno++; /* Increase the leaves written counter */ pWriter->nLeafWritten++; /* The new leaf holds no terms or rowids */ - pWriter->bFirstTermInPage = 1; pWriter->bFirstRowidInPage = 1; } /* ** Append term pTerm/nTerm to the segment being written by the writer passed @@ -3863,77 +3904,68 @@ Fts5Index *p, Fts5SegWriter *pWriter, int nTerm, const u8 *pTerm ){ int nPrefix; /* Bytes of prefix compression for term */ - Fts5PageWriter *pPage = &pWriter->writer; - Fts5Buffer *pPgidx = &pWriter->writer.pgidx; + int iOff; + Fts5Buffer *pPgidx = &pWriter->pgidx; assert( p->rc==SQLITE_OK ); - assert( pPage->buf.n>=4 ); - assert( pPage->buf.n>4 || pWriter->bFirstTermInPage ); + assert( pWriter->buf.n>=4 ); /* If the current leaf page is full, flush it to disk. */ - if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){ - if( pPage->buf.n>4 ){ + if( (pWriter->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){ + if( pWriter->buf.n>4 ){ fts5WriteFlushLeaf(p, pWriter); } - fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING); - } - - /* TODO1: Updating pgidx here. */ - pPgidx->n += sqlite3Fts5PutVarint( - &pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx - ); - pPage->iPrevPgidx = pPage->buf.n; -#if 0 - fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n); - pPgidx->n += 2; -#endif - - if( pWriter->bFirstTermInPage ){ + fts5BufferGrow(&p->rc, &pWriter->buf, nTerm+FTS5_DATA_PADDING); + } + + iOff = pWriter->buf.n; + if( pPgidx->n==0 ){ nPrefix = 0; - if( pPage->pgno!=1 ){ + if( pWriter->pgno!=1 ){ /* This is the first term on a leaf that is not the leftmost leaf in ** the segment b-tree. In this case it is necessary to add a term to ** the b-tree hierarchy that is (a) larger than the largest term ** already written to the segment and (b) smaller than or equal to ** this term. In other words, a prefix of (pTerm/nTerm) that is one ** byte longer than the longest prefix (pTerm/nTerm) shares with the ** previous term. ** - ** Usually, the previous term is available in pPage->term. The exception + ** Usually, the previous term is available in pWriter->term. The exception ** is if this is the first term written in an incremental-merge step. ** In this case the previous term is not available, so just write a ** copy of (pTerm/nTerm) into the parent node. This is slightly ** inefficient, but still correct. */ int n = nTerm; - if( pPage->term.n ){ - n = 1 + fts5PrefixCompress(pPage->term.n, pPage->term.p, pTerm); + if( pWriter->term.n ){ + n = 1 + fts5PrefixCompress(pWriter->term.n, pWriter->term.p, pTerm); } fts5WriteBtreeTerm(p, pWriter, n, pTerm); - pPage = &pWriter->writer; } }else{ - nPrefix = fts5PrefixCompress(pPage->term.n, pPage->term.p, pTerm); - fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix); + nPrefix = fts5PrefixCompress(pWriter->term.n, pWriter->term.p, pTerm); + fts5BufferAppendVarint(&p->rc, &pWriter->buf, nPrefix); } + + fts5BufferSafeAppendVarint(pPgidx, iOff - pWriter->iPrevPgidx); + pWriter->iPrevPgidx = iOff; /* Append the number of bytes of new data, then the term data itself ** to the page. */ - fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix); - fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix]); + fts5BufferAppendVarint(&p->rc, &pWriter->buf, nTerm - nPrefix); + fts5BufferAppendBlob(&p->rc, &pWriter->buf, nTerm - nPrefix, &pTerm[nPrefix]); - /* Update the Fts5PageWriter.term field. */ - fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm); - pWriter->bFirstTermInPage = 0; + /* Update the Fts5SegWriter.term field. */ + fts5BufferSet(&p->rc, &pWriter->term, nTerm, pTerm); pWriter->bFirstRowidInPage = 0; pWriter->bFirstRowidInDoclist = 1; assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) ); - pWriter->aDlidx[0].pgno = pPage->pgno; + pWriter->aDlidx[0].pgno = pWriter->pgno; } /* ** Append a rowid and position-list size field to the writers output. */ @@ -3941,64 +3973,66 @@ Fts5Index *p, Fts5SegWriter *pWriter, i64 iRowid ){ if( p->rc==SQLITE_OK ){ - Fts5PageWriter *pPage = &pWriter->writer; - if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){ + if( (pWriter->buf.n + pWriter->pgidx.n)>=p->pConfig->pgsz ){ fts5WriteFlushLeaf(p, pWriter); } /* If this is to be the first rowid written to the page, set the ** rowid-pointer in the page-header. Also append a value to the dlidx ** buffer, in case a doclist-index is required. */ if( pWriter->bFirstRowidInPage ){ - fts5PutU16(pPage->buf.p, (u16)pPage->buf.n); + fts5PutU16(pWriter->buf.p, (u16)pWriter->buf.n); fts5WriteDlidxAppend(p, pWriter, iRowid); } /* Write the rowid. */ if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){ - fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid); + fts5BufferSafeAppendVarint(&pWriter->buf, iRowid); }else{ assert( p->rc || iRowid>pWriter->iPrevRowid ); - fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid - pWriter->iPrevRowid); + assert( pWriter->buf.n+9 <= pWriter->buf.nSpace ); + fts5BufferSafeAppendVarint(&pWriter->buf, iRowid - pWriter->iPrevRowid); } pWriter->iPrevRowid = iRowid; pWriter->bFirstRowidInDoclist = 0; pWriter->bFirstRowidInPage = 0; } } +/* +** Append position list data to the writers output. +*/ static void fts5WriteAppendPoslistData( Fts5Index *p, - Fts5SegWriter *pWriter, - const u8 *aData, - int nData + Fts5SegWriter *pWriter, /* Writer object to write to the output of */ + const u8 *aData, /* Buffer containing data to append */ + int nData /* Size of buffer aData[] in bytes */ ){ - Fts5PageWriter *pPage = &pWriter->writer; const u8 *a = aData; int n = nData; assert( p->pConfig->pgsz>0 ); while( p->rc==SQLITE_OK - && (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz + && (pWriter->buf.n + pWriter->pgidx.n + n)>=p->pConfig->pgsz ){ - int nReq = p->pConfig->pgsz - pPage->buf.n - pPage->pgidx.n; + int nReq = p->pConfig->pgsz - pWriter->buf.n - pWriter->pgidx.n; int nCopy = 0; while( nCopyrc, &pPage->buf, nCopy, a); + fts5BufferAppendBlob(&p->rc, &pWriter->buf, nCopy, a); a += nCopy; n -= nCopy; fts5WriteFlushLeaf(p, pWriter); } if( n>0 ){ - fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a); + fts5BufferAppendBlob(&p->rc, &pWriter->buf, n, a); } } /* ** Flush any data cached by the writer object to the database. Free any @@ -4008,32 +4042,35 @@ Fts5Index *p, Fts5SegWriter *pWriter, /* Writer object */ int *pnLeaf /* OUT: Number of leaf pages in b-tree */ ){ int i; - Fts5PageWriter *pLeaf = &pWriter->writer; if( p->rc==SQLITE_OK ){ - assert( pLeaf->pgno>=1 ); - if( pLeaf->buf.n>4 ){ + assert( pWriter->pgno>=1 ); + if( pWriter->buf.n>4 ){ fts5WriteFlushLeaf(p, pWriter); } - *pnLeaf = pLeaf->pgno-1; - if( pLeaf->pgno>1 ){ + *pnLeaf = pWriter->pgno-1; + if( pWriter->pgno>1 ){ fts5WriteFlushBtree(p, pWriter); } } - fts5BufferFree(&pLeaf->term); - fts5BufferFree(&pLeaf->buf); - fts5BufferFree(&pLeaf->pgidx); + fts5BufferFree(&pWriter->term); + fts5BufferFree(&pWriter->buf); + fts5BufferFree(&pWriter->pgidx); fts5BufferFree(&pWriter->btterm); for(i=0; inDlidx; i++){ sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf); } sqlite3_free(pWriter->aDlidx); } +/* +** Initialize the Fts5SegWriter object indicated by the second argument +** to write to the segment with seg-id iSegid. +*/ static void fts5WriteInit( Fts5Index *p, Fts5SegWriter *pWriter, int iSegid ){ @@ -4041,20 +4078,19 @@ memset(pWriter, 0, sizeof(Fts5SegWriter)); pWriter->iSegid = iSegid; fts5WriteDlidxGrow(p, pWriter, 1); - pWriter->writer.pgno = 1; - pWriter->bFirstTermInPage = 1; + pWriter->pgno = 1; pWriter->iBtPage = 1; - assert( pWriter->writer.buf.n==0 ); - assert( pWriter->writer.pgidx.n==0 ); + assert( pWriter->buf.n==0 ); + assert( pWriter->pgidx.n==0 ); /* Grow the two buffers to pgsz + padding bytes in size. */ - sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.pgidx, nBuffer); - sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.buf, nBuffer); + sqlite3Fts5BufferSize(&p->rc, &pWriter->pgidx, nBuffer); + sqlite3Fts5BufferSize(&p->rc, &pWriter->buf, nBuffer); if( p->pIdxWriter==0 ){ Fts5Config *pConfig = p->pConfig; fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf( "INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)", @@ -4062,12 +4098,12 @@ )); } if( p->rc==SQLITE_OK ){ /* Initialize the 4-byte leaf-page header to 0x00. */ - memset(pWriter->writer.buf.p, 0, 4); - pWriter->writer.buf.n = 4; + memset(pWriter->buf.p, 0, 4); + pWriter->buf.n = 4; /* Bind the current output segment id to the index-writer. This is an ** optimization over binding the same value over and over as rows are ** inserted into %_idx by the current writer. */ sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); @@ -4076,10 +4112,12 @@ /* ** Iterator pIter was used to iterate through the input segments of on an ** incremental merge operation. This function is called if the incremental ** merge step has finished but the input has not been completely exhausted. +** It removes (DELETEs) any input segment leaves that are no longer required +** from the database. */ static void fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){ int i; Fts5Buffer buf; memset(&buf, 0, sizeof(Fts5Buffer)); @@ -4134,10 +4172,15 @@ } } fts5BufferFree(&buf); } +/* +** This function is used as an fts5ChunkIterate() callback when merging +** levels. The chunk passed to this function is appended to the output +** segment. +*/ static void fts5MergeChunkCallback( Fts5Index *p, void *pCtx, const u8 *pChunk, int nChunk ){ @@ -4144,17 +4187,25 @@ Fts5SegWriter *pWriter = (Fts5SegWriter*)pCtx; fts5WriteAppendPoslistData(p, pWriter, pChunk, nChunk); } /* +** This function reads data from level iLvl of structure (*ppStruct) and +** writes it into a segment on level (iLvl+1). If (*ppStruct) indicates +** that there is already such a merge underway, this function continues +** it. Otherwise, a new merge is started. (*ppStruct) is updated with the +** results of the merge before this function returns. ** +** When this function is called in/out parameter *pnRem contains the maximum +** number of leaf pages to write to the database. *pnRem is decremented by +** the actual number of pages written before this function returns. */ static void fts5IndexMergeLevel( Fts5Index *p, /* FTS5 backend object */ Fts5Structure **ppStruct, /* IN/OUT: Stucture of index */ int iLvl, /* Level to read input from */ - int *pnRem /* Write up to this many output leaves */ + int *pnRem /* IN/OUT: Write this many output leaves */ ){ Fts5Structure *pStruct = *ppStruct; Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; Fts5StructureLevel *pLvlOut; Fts5Iter *pIter = 0; /* Iterator to read input data */ @@ -4177,11 +4228,11 @@ assert( pLvlOut->nSeg>0 ); nInput = pLvl->nMerge; pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1]; fts5WriteInit(p, &writer, pSeg->iSegid); - writer.writer.pgno = pSeg->pgnoLast+1; + writer.pgno = pSeg->pgnoLast+1; writer.iBtPage = 0; }else{ int iSegid = fts5AllocateSegid(p, pStruct); /* Extend the Fts5Structure object as required to ensure the output @@ -4212,11 +4263,11 @@ assert( iLvl>=0 ); for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, iLvl, nInput, &pIter); fts5MultiIterEof(p, pIter)==0; fts5MultiIterNext(p, pIter, 0, 0) ){ - Fts5SegIter *pSegIter = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; + Fts5SegIter *pSegIter = fts5IterSegment(pIter); int nPos; /* position-list size field value */ int nTerm; const u8 *pTerm; /* Check for key annihilation. */ @@ -4237,19 +4288,19 @@ /* WRITEPOSLISTSIZE */ fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter)); if( eDetail==FTS5_DETAIL_NONE ){ if( pSegIter->bDel ){ - fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0); + fts5BufferAppendVarint(&p->rc, &writer.buf, 0); if( pSegIter->nPos>0 ){ - fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0); + fts5BufferAppendVarint(&p->rc, &writer.buf, 0); } } }else{ /* Append the position-list data to the output */ nPos = pSegIter->nPos*2 + pSegIter->bDel; - fts5BufferAppendVarint(&p->rc, &writer.writer.buf, nPos); + fts5BufferAppendVarint(&p->rc, &writer.buf, nPos); fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback); } } /* Flush the last leaf page to disk. Set the output segment b-tree height @@ -4370,10 +4421,15 @@ fts5IndexMerge(p, ppStruct, nRem, p->pConfig->nAutomerge); } } +/* +** This function is called when a new level 0 segment has just been written +** to the database. If any crisis-merge operations are required as a result, +** they are performed here. +*/ static void fts5IndexCrisismerge( Fts5Index *p, /* FTS5 backend object */ Fts5Structure **ppStruct /* IN/OUT: Current structure of index */ ){ const int nCrisis = p->pConfig->nCrisisMerge; @@ -4388,22 +4444,10 @@ iLvl++; } *ppStruct = pStruct; } -static int fts5IndexReturn(Fts5Index *p){ - int rc = p->rc; - p->rc = SQLITE_OK; - return rc; -} - -typedef struct Fts5FlushCtx Fts5FlushCtx; -struct Fts5FlushCtx { - Fts5Index *pIdx; - Fts5SegWriter writer; -}; - /* ** Buffer aBuf[] contains a list of varints, all small enough to fit ** in a 32-bit integer. Return the size of the largest prefix of this ** list nMax bytes or less in size. */ @@ -4420,196 +4464,193 @@ } return ret; } /* -** Flush the contents of in-memory hash table iHash to a new level-0 -** segment on disk. Also update the corresponding structure record. +** Flush any data stored in the in-memory hash tables to the database. ** ** If an error occurs, set the Fts5Index.rc error code. If an error has ** already occurred, this function is a no-op. */ -static void fts5FlushOneHash(Fts5Index *p){ - Fts5Hash *pHash = p->pHash; - Fts5Structure *pStruct; - int iSegid; - int pgnoLast = 0; /* Last leaf page number in segment */ - - /* Obtain a reference to the index structure and allocate a new segment-id - ** for the new level-0 segment. */ - pStruct = fts5StructureRead(p); - iSegid = fts5AllocateSegid(p, pStruct); - fts5StructureInvalidate(p); - - if( iSegid ){ - const int pgsz = p->pConfig->pgsz; - int eDetail = p->pConfig->eDetail; - Fts5StructureSegment *pSeg; /* New segment within pStruct */ - Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */ - Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */ - - Fts5SegWriter writer; - fts5WriteInit(p, &writer, iSegid); - - pBuf = &writer.writer.buf; - pPgidx = &writer.writer.pgidx; - - /* fts5WriteInit() should have initialized the buffers to (most likely) - ** the maximum space required. */ - assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) ); - assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) ); - - /* Begin scanning through hash table entries. This loop runs once for each - ** term/doclist currently stored within the hash table. */ - if( p->rc==SQLITE_OK ){ - p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0); - } - while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){ - const char *zTerm; /* Buffer containing term */ - const u8 *pDoclist; /* Pointer to doclist for this term */ - int nDoclist; /* Size of doclist in bytes */ - - /* Write the term for this entry to disk. */ - sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist); - fts5WriteAppendTerm(p, &writer, (int)strlen(zTerm), (const u8*)zTerm); - - assert( writer.bFirstRowidInPage==0 ); - if( pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){ - /* The entire doclist will fit on the current leaf. */ - fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist); - }else{ - i64 iRowid = 0; - i64 iDelta = 0; - int iOff = 0; - - /* The entire doclist will not fit on this leaf. The following - ** loop iterates through the poslists that make up the current - ** doclist. */ - while( p->rc==SQLITE_OK && iOffp[0], (u16)pBuf->n); /* first rowid on page */ - pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid); - writer.bFirstRowidInPage = 0; - fts5WriteDlidxAppend(p, &writer, iRowid); - }else{ - pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iDelta); - } - assert( pBuf->n<=pBuf->nSpace ); - - if( eDetail==FTS5_DETAIL_NONE ){ - if( iOffp[pBuf->n++] = 0; - iOff++; - if( iOffp[pBuf->n++] = 0; - iOff++; - } - } - if( (pBuf->n + pPgidx->n)>=pgsz ){ - fts5WriteFlushLeaf(p, &writer); - } - }else{ - int bDummy; - int nPos; - int nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDummy); - nCopy += nPos; - if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){ - /* The entire poslist will fit on the current leaf. So copy - ** it in one go. */ - fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy); - }else{ - /* The entire poslist will not fit on this leaf. So it needs - ** to be broken into sections. The only qualification being - ** that each varint must be stored contiguously. */ - const u8 *pPoslist = &pDoclist[iOff]; - int iPos = 0; - while( p->rc==SQLITE_OK ){ - int nSpace = pgsz - pBuf->n - pPgidx->n; - int n = 0; - if( (nCopy - iPos)<=nSpace ){ - n = nCopy - iPos; - }else{ - n = fts5PoslistPrefix(&pPoslist[iPos], nSpace); - } - assert( n>0 ); - fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n); - iPos += n; - if( (pBuf->n + pPgidx->n)>=pgsz ){ - fts5WriteFlushLeaf(p, &writer); - } - if( iPos>=nCopy ) break; - } - } - iOff += nCopy; - } - } - } - - /* TODO2: Doclist terminator written here. */ - /* pBuf->p[pBuf->n++] = '\0'; */ - assert( pBuf->n<=pBuf->nSpace ); - sqlite3Fts5HashScanNext(pHash); - } - sqlite3Fts5HashClear(pHash); - fts5WriteFinish(p, &writer, &pgnoLast); - - /* Update the Fts5Structure. It is written back to the database by the - ** fts5StructureRelease() call below. */ - if( pStruct->nLevel==0 ){ - fts5StructureAddLevel(&p->rc, &pStruct); - } - fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0); - if( p->rc==SQLITE_OK ){ - pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ]; - pSeg->iSegid = iSegid; - pSeg->pgnoFirst = 1; - pSeg->pgnoLast = pgnoLast; - pStruct->nSegment++; - } - fts5StructurePromote(p, 0, pStruct); - } - - fts5IndexAutomerge(p, &pStruct, pgnoLast); - fts5IndexCrisismerge(p, &pStruct); - fts5StructureWrite(p, pStruct); - fts5StructureRelease(pStruct); -} - -/* -** Flush any data stored in the in-memory hash tables to the database. -*/ -static void fts5IndexFlush(Fts5Index *p){ - /* Unless it is empty, flush the hash table to disk */ - if( p->nPendingData ){ - assert( p->pHash ); - p->nPendingData = 0; - fts5FlushOneHash(p); - } -} - -static Fts5Structure *fts5IndexOptimizeStruct( - Fts5Index *p, - Fts5Structure *pStruct +static void fts5IndexFlush(Fts5Index *p){ + if( p->nPendingData ){ + Fts5Hash *pHash = p->pHash; + Fts5Structure *pStruct; + int iSegid; + int pgnoLast = 0; /* Last leaf page number in segment */ + + p->nPendingData = 0; + + /* Obtain a reference to the index structure and allocate a new segment-id + ** for the new level-0 segment. */ + pStruct = fts5StructureRead(p); + iSegid = fts5AllocateSegid(p, pStruct); + fts5StructureInvalidate(p); + + if( iSegid ){ + const int pgsz = p->pConfig->pgsz; + int eDetail = p->pConfig->eDetail; + Fts5StructureSegment *pSeg; /* New segment within pStruct */ + Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */ + Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */ + + Fts5SegWriter writer; + fts5WriteInit(p, &writer, iSegid); + + pBuf = &writer.buf; + pPgidx = &writer.pgidx; + + /* fts5WriteInit() should have initialized the buffers to (most likely) + ** the maximum space required. */ + assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) ); + assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) ); + + /* Begin scanning through hash table entries. This loop runs once for each + ** term/doclist currently stored within the hash table. */ + if( p->rc==SQLITE_OK ){ + p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0); + } + while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){ + const char *zTerm; /* Buffer containing term */ + const u8 *pDoclist; /* Pointer to doclist for this term */ + int nDoclist; /* Size of doclist in bytes */ + + /* Write the term for this entry to disk. */ + sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist); + fts5WriteAppendTerm(p, &writer, (int)strlen(zTerm), (const u8*)zTerm); + + assert( writer.bFirstRowidInPage==0 ); + if( pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){ + /* The entire doclist will fit on the current leaf. */ + fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist); + }else{ + i64 iRowid = 0; + i64 iDelta = 0; + int iOff = 0; + + /* The entire doclist will not fit on this leaf. The following + ** loop iterates through the poslists that make up the current + ** doclist. */ + while( p->rc==SQLITE_OK && iOffp[0], (u16)pBuf->n); /* first rowid on page */ + pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid); + writer.bFirstRowidInPage = 0; + fts5WriteDlidxAppend(p, &writer, iRowid); + }else{ + pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iDelta); + } + assert( pBuf->n<=pBuf->nSpace ); + + if( eDetail==FTS5_DETAIL_NONE ){ + if( iOffp[pBuf->n++] = 0; + iOff++; + if( iOffp[pBuf->n++] = 0; + iOff++; + } + } + if( (pBuf->n + pPgidx->n)>=pgsz ){ + fts5WriteFlushLeaf(p, &writer); + } + }else{ + int bDummy; + int nPos; + int nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDummy); + nCopy += nPos; + if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){ + /* The entire poslist will fit on the current leaf. So copy + ** it in one go. */ + fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy); + }else{ + /* The entire poslist will not fit on this leaf. So it needs + ** to be broken into sections. The only qualification being + ** that each varint must be stored contiguously. */ + const u8 *pPoslist = &pDoclist[iOff]; + int iPos = 0; + while( p->rc==SQLITE_OK ){ + int nSpace = pgsz - pBuf->n - pPgidx->n; + int n = 0; + if( (nCopy - iPos)<=nSpace ){ + n = nCopy - iPos; + }else{ + n = fts5PoslistPrefix(&pPoslist[iPos], nSpace); + } + assert( n>0 ); + fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n); + iPos += n; + if( (pBuf->n + pPgidx->n)>=pgsz ){ + fts5WriteFlushLeaf(p, &writer); + } + if( iPos>=nCopy ) break; + } + } + iOff += nCopy; + } + } + } + + /* TODO2: Doclist terminator written here. */ + /* pBuf->p[pBuf->n++] = '\0'; */ + assert( pBuf->n<=pBuf->nSpace ); + sqlite3Fts5HashScanNext(pHash); + } + sqlite3Fts5HashClear(pHash); + fts5WriteFinish(p, &writer, &pgnoLast); + + /* Update the Fts5Structure. It is written back to the database by the + ** fts5StructureRelease() call below. */ + if( pStruct->nLevel==0 ){ + fts5StructureAddLevel(&p->rc, &pStruct); + } + fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0); + if( p->rc==SQLITE_OK ){ + pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ]; + pSeg->iSegid = iSegid; + pSeg->pgnoFirst = 1; + pSeg->pgnoLast = pgnoLast; + pStruct->nSegment++; + } + fts5StructurePromote(p, 0, pStruct); + } + + fts5IndexAutomerge(p, &pStruct, pgnoLast); + fts5IndexCrisismerge(p, &pStruct); + fts5StructureWrite(p, pStruct); + fts5StructureRelease(pStruct); + } +} + +/* +** If argument pStruct contains fewer than two segments, NULL is returned. +** +** Otherwise, this function returns a structure reference containing all the +** same segments as argument pStruct, but arranged within levels so that +** running the merge sub-routines merges all content into a single segment. +*/ +static Fts5Structure *fts5IndexOptimizeStruct( + Fts5Index *p, /* Index object */ + Fts5Structure *pStruct /* Structure to optimize */ ){ Fts5Structure *pNew = 0; int nByte = sizeof(Fts5Structure); int nSeg = pStruct->nSegment; int i; - /* Figure out if this structure requires optimization. A structure does - ** not require optimization if either: + /* First figure out if this structure requires optimization. A structure + ** does not require optimization if either: ** ** + it consists of fewer than two segments, or ** + all segments are on the same level, or ** + all segments except one are currently inputs to a merge operation. ** - ** In the first case, return NULL. In the second, increment the ref-count - ** on *pStruct and return a copy of the pointer to it. - */ + ** In the first case, return NULL. In the second and third, increment the + ** ref-count on *pStruct and return a copy of the pointer to it. */ if( nSeg<2 ) return 0; for(i=0; inLevel; i++){ int nThis = pStruct->aLevel[i].nSeg; if( nThis==nSeg || (nThis==nSeg-1 && pStruct->aLevel[i].nMerge==nThis) ){ fts5StructureRef(pStruct); @@ -4616,13 +4657,14 @@ return pStruct; } assert( pStruct->aLevel[i].nMerge<=nThis ); } + /* Allocate a new structure. Copy all segments from pStruct to level nMax+1 + ** of the new structure, where nMax is the largest level in pStruct. */ nByte += (pStruct->nLevel+1) * sizeof(Fts5StructureLevel); pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte); - if( pNew ){ Fts5StructureLevel *pLvl; nByte = nSeg * sizeof(Fts5StructureSegment); pNew->nLevel = pStruct->nLevel+1; pNew->nRef = 1; @@ -4649,10 +4691,22 @@ } return pNew; } +/* +** Return from an Fts5Index API function that may have set the error code. +*/ +static int fts5IndexReturn(Fts5Index *p){ + int rc = p->rc; + p->rc = SQLITE_OK; + return rc; +} + +/* +** The implementation of the special "VALUES('optimize')" command. +*/ int sqlite3Fts5IndexOptimize(Fts5Index *p){ Fts5Structure *pStruct; Fts5Structure *pNew = 0; assert( p->rc==SQLITE_OK ); @@ -4705,10 +4759,17 @@ fts5StructureRelease(pStruct); } return fts5IndexReturn(p); } +/* +** Append varint iDelta to buffer pBuf. +** +** This function is a no-op if Fts5Index.rc is set to other than SQLITE_OK +** when it is called. If an error occurs, Fts5Index.rc is set to an SQLite +** error code before returning. +*/ static void fts5AppendRowid( Fts5Index *p, i64 iDelta, Fts5Iter *pUnused, Fts5Buffer *pBuf @@ -4715,10 +4776,18 @@ ){ UNUSED_PARAM(pUnused); fts5BufferAppendVarint(&p->rc, pBuf, iDelta); } +/* +** Append varint iDelta to buffer pBuf. Then append a copy of the poslist +** currently pointed to by iterator pMulti. +** +** This function is a no-op if Fts5Index.rc is set to other than SQLITE_OK +** when it is called. If an error occurs, Fts5Index.rc is set to an SQLite +** error code before returning. +*/ static void fts5AppendPoslist( Fts5Index *p, i64 iDelta, Fts5Iter *pMulti, Fts5Buffer *pBuf @@ -4730,11 +4799,13 @@ fts5BufferSafeAppendVarint(pBuf, nData*2); fts5BufferSafeAppendBlob(pBuf, pMulti->base.pData, nData); } } - +/* +** Advance iterator pIter to the next rowid in its doclist. +*/ static void fts5DoclistIterNext(Fts5DoclistIter *pIter){ u8 *p = pIter->aPoslist + pIter->nSize + pIter->nPoslist; assert( pIter->aPoslist ); if( p>=pIter->aEof ){ @@ -4757,10 +4828,16 @@ pIter->aPoslist = p; } } +/* +** Buffer pBuf contains a doclist. Set up the structure pointed to by +** pIter to iterate through it. The iterator points to the first rowid +** in the doclist (or EOF if the doclist is empty) when this function +** returns. +*/ static void fts5DoclistIterInit( Fts5Buffer *pBuf, Fts5DoclistIter *pIter ){ memset(pIter, 0, sizeof(*pIter)); @@ -4767,43 +4844,29 @@ pIter->aPoslist = pBuf->p; pIter->aEof = &pBuf->p[pBuf->n]; fts5DoclistIterNext(pIter); } -#if 0 -/* -** Append a doclist to buffer pBuf. -** -** This function assumes that space within the buffer has already been -** allocated. -*/ -static void fts5MergeAppendDocid( - Fts5Buffer *pBuf, /* Buffer to write to */ - i64 *piLastRowid, /* IN/OUT: Previous rowid written (if any) */ - i64 iRowid /* Rowid to append */ -){ - assert( pBuf->n!=0 || (*piLastRowid)==0 ); - fts5BufferSafeAppendVarint(pBuf, iRowid - *piLastRowid); - *piLastRowid = iRowid; -} -#endif - -#define fts5MergeAppendDocid(pBuf, iLastRowid, iRowid) { \ - assert( (pBuf)->n!=0 || (iLastRowid)==0 ); \ - fts5BufferSafeAppendVarint((pBuf), (iRowid) - (iLastRowid)); \ - (iLastRowid) = (iRowid); \ -} - /* ** Swap the contents of buffer *p1 with that of *p2. */ static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){ Fts5Buffer tmp = *p1; *p1 = *p2; *p2 = tmp; } +/* +** Buffer pBuf contains a delta-encoded list of rowids (the sort stored by +** detail=none tables). +** +** When this function is called, *piOff must be set to the byte offset of a +** varint within this list, and *piRowid to the value of the previous +** rowid in the list. If there are no more rowids in the list, *piOff is +** set to -1 before returning. Otherwise, *piRowid is set to the next +** rowid in the list and *piOff to the offset of the rowid following it. +*/ static void fts5NextRowid(Fts5Buffer *pBuf, int *piOff, i64 *piRowid){ int i = *piOff; if( i>=pBuf->n ){ *piOff = -1; }else{ @@ -4853,10 +4916,29 @@ } fts5BufferSwap(&out, p1); fts5BufferFree(&out); } + +/* +** This macro is used by fts5MergePrefixLists(). The arguments passed should +** be of the following types: +** +** Fts5Buffer *pBuf, // Buffer to write to +** i64 iLastRowid, // IN/OUT: Previous rowid written (if any) +** i64 iRowid // Rowid to append +** +** This function appends a single rowid to the doclist stored in pBuf. The +** value of the rowid appended is iRowid. IN/OUT parameter iLastRowid +** should be set to the value of the previous rowid stored in the doclist +** when this macro is invoked. It is set to a copy of iRowid by this macro. +*/ +#define fts5MergeAppendDocid(pBuf, iLastRowid, iRowid) { \ + assert( (pBuf)->n!=0 || (iLastRowid)==0 ); \ + fts5BufferSafeAppendVarint((pBuf), (iRowid) - (iLastRowid)); \ + (iLastRowid) = (iRowid); \ +} /* ** Buffers p1 and p2 contain doclists. This function merges the content ** of the two doclists together and sets buffer p1 to the result before ** returning. @@ -4975,17 +5057,22 @@ fts5BufferFree(&tmp); fts5BufferFree(&out); } } +/* +** This function is used to prepare an iterator for a prefix query for +** which there is no prefix index. It assembles a doclist in memory +** and then sets up an Fts5Iter object to iterate through it. +*/ static void fts5SetupPrefixIter( Fts5Index *p, /* Index to read from */ int bDesc, /* True for "ORDER BY rowid DESC" */ const u8 *pToken, /* Buffer containing prefix to match */ int nToken, /* Size of buffer pToken in bytes */ Fts5Colset *pColset, /* Restrict matches to these columns */ - Fts5Iter **ppIter /* OUT: New iterator */ + Fts5Iter **ppIter /* OUT: New iterator */ ){ Fts5Structure *pStruct; Fts5Buffer *aBuf; const int nBuf = 32; @@ -4997,14 +5084,15 @@ }else{ xMerge = fts5MergePrefixLists; xAppend = fts5AppendPoslist; } + assert( p->pStruct ); aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf); pStruct = fts5StructureRead(p); - if( aBuf && pStruct ){ + if( aBuf ){ const int flags = FTS5INDEX_QUERY_SCAN | FTS5INDEX_QUERY_SKIPEMPTY | FTS5INDEX_QUERY_NOOUTPUT; int i; i64 iLastRowid = 0; @@ -5018,11 +5106,11 @@ fts5IterSetOutputCb(&p->rc, p1); for( /* no-op */ ; fts5MultiIterEof(p, p1)==0; fts5MultiIterNext2(p, p1, &bNewTerm) ){ - Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ]; + Fts5SegIter *pSeg = fts5IterSegment(p1); int nTerm = pSeg->term.n; const u8 *pTerm = pSeg->term.p; p1->xSetOutputs(p1, pSeg); assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 ); @@ -5102,11 +5190,11 @@ ** Commit data to disk. */ int sqlite3Fts5IndexSync(Fts5Index *p, int bCommit){ assert( p->rc==SQLITE_OK ); fts5IndexFlush(p); - if( bCommit ) fts5CloseReader(p); + if( bCommit ) sqlite3Fts5IndexCloseReader(p); return fts5IndexReturn(p); } /* ** Discard any data stored in the in-memory hash tables. Do not write it @@ -5113,21 +5201,22 @@ ** to the database. Additionally, assume that the contents of the %_data ** table may have changed on disk. So any in-memory caches of %_data ** records must be invalidated. */ int sqlite3Fts5IndexRollback(Fts5Index *p){ - fts5CloseReader(p); + sqlite3Fts5IndexCloseReader(p); fts5IndexDiscardData(p); fts5StructureInvalidate(p); + p->pConfig->iCookie = -1; /* assert( p->rc==SQLITE_OK ); */ return SQLITE_OK; } /* ** The %_data table is completely empty when this function is called. This -** function populates it with the initial structure objects for each index, -** and the initial version of the "averages" record (a zero-byte blob). +** function populates it with the initial structure object and the initial +** version of the "averages" record (a zero-byte blob). */ int sqlite3Fts5IndexReinit(Fts5Index *p){ Fts5Structure s; fts5StructureInvalidate(p); memset(&s, 0, sizeof(Fts5Structure)); @@ -5155,18 +5244,19 @@ *pp = p = (Fts5Index*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Index)); if( rc==SQLITE_OK ){ p->pConfig = pConfig; p->nWorkUnit = FTS5_WORK_UNIT; p->zDataTbl = sqlite3Fts5Mprintf(&rc, "%s_data", pConfig->zName); - if( p->zDataTbl && bCreate ){ - rc = sqlite3Fts5CreateTable( - pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr - ); + if( bCreate ){ + if( rc==SQLITE_OK ){ + rc = sqlite3Fts5CreateTable( + pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr + ); + } if( rc==SQLITE_OK ){ rc = sqlite3Fts5CreateTable(pConfig, "idx", - "segid, term, pgno, PRIMARY KEY(segid, term)", - 1, pzErr + "segid, term, pgno, PRIMARY KEY(segid, term)", 1, pzErr ); } if( rc==SQLITE_OK ){ rc = sqlite3Fts5IndexReinit(p); } @@ -5265,10 +5355,12 @@ /* Add the entry to the main terms index. */ rc = sqlite3Fts5HashWrite( p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX, pToken, nToken ); + /* Add an entry for each of the prefix indexes that the token is large + ** enough for (e.g. for which nChar>=nPrefix). */ for(i=0; inPrefix && rc==SQLITE_OK; i++){ const int nChar = pConfig->aPrefix[i]; int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar); if( nByte ){ rc = sqlite3Fts5HashWrite(p->pHash, @@ -5294,12 +5386,15 @@ ){ Fts5Config *pConfig = p->pConfig; Fts5Iter *pRet = 0; Fts5Buffer buf = {0, 0, 0}; - /* If the QUERY_SCAN flag is set, all other flags must be clear. */ + /* If the QUERY_SCAN flag is set, all other flags must be clear. This + ** flag is used by the fts5vocab module only. */ assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || flags==FTS5INDEX_QUERY_SCAN ); + assert( p->rc==SQLITE_OK ); + assert( p->pStruct!=0 || (flags & FTS5INDEX_QUERY_PREFIX)==0 ); if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){ int iIdx = 0; /* Index to search */ memcpy(&buf.p[1], pToken, nToken); @@ -5306,26 +5401,20 @@ /* Figure out which index to search and set iIdx accordingly. If this ** is a prefix query for which there is no prefix index, set iIdx to ** greater than pConfig->nPrefix to indicate that the query will be ** satisfied by scanning multiple terms in the main index. ** - ** If the QUERY_TEST_NOIDX flag was specified, then this must be a - ** prefix-query. Instead of using a prefix-index (if one exists), - ** evaluate the prefix query using the main FTS index. This is used - ** for internal sanity checking by the integrity-check in debug - ** mode only. */ -#ifdef SQLITE_DEBUG - if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){ - assert( flags & FTS5INDEX_QUERY_PREFIX ); - iIdx = 1+pConfig->nPrefix; - }else -#endif + ** If the Fts5Config.bPrefixIndex debugging flag is set, do not use + ** a prefix index even if a suitable one does exist. */ if( flags & FTS5INDEX_QUERY_PREFIX ){ int nChar = fts5IndexCharlen(pToken, nToken); for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){ if( pConfig->aPrefix[iIdx-1]==nChar ) break; } +#ifdef SQLITE_DEBUG + if( pConfig->bPrefixIndex==0 ) iIdx = 1+pConfig->nPrefix; +#endif } if( iIdx<=pConfig->nPrefix ){ /* Straight index lookup */ Fts5Structure *pStruct = fts5StructureRead(p); @@ -5342,30 +5431,27 @@ buf.p[0] = FTS5_MAIN_PREFIX; fts5SetupPrefixIter(p, bDesc, buf.p, nToken+1, pColset, &pRet); assert( p->rc!=SQLITE_OK || pRet->pColset==0 ); fts5IterSetOutputCb(&p->rc, pRet); if( p->rc==SQLITE_OK ){ - Fts5SegIter *pSeg = &pRet->aSeg[pRet->aFirst[1].iFirst]; + Fts5SegIter *pSeg = fts5IterSegment(pRet); if( pSeg->pLeaf ) pRet->xSetOutputs(pRet, pSeg); } } if( p->rc ){ sqlite3Fts5IterClose(&pRet->base); pRet = 0; - fts5CloseReader(p); + sqlite3Fts5IndexCloseReader(p); } *ppIter = &pRet->base; sqlite3Fts5BufferFree(&buf); } return fts5IndexReturn(p); } -/* -** Return true if the iterator passed as the only argument is at EOF. -*/ /* ** Move to the next matching rowid. */ int sqlite3Fts5IterNext(Fts5IndexIter *pIndexIter){ Fts5Iter *pIter = (Fts5Iter*)pIndexIter; @@ -5383,11 +5469,11 @@ assert( pIter->pIndex->rc==SQLITE_OK ); fts5MultiIterNext(p, pIter, 0, 0); if( p->rc==SQLITE_OK ){ - Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; + Fts5SegIter *pSeg = fts5IterSegment(pIter); if( pSeg->pLeaf && pSeg->term.p[0]!=FTS5_MAIN_PREFIX ){ fts5DataRelease(pSeg->pLeaf); pSeg->pLeaf = 0; pIter->base.bEof = 1; } @@ -5407,10 +5493,13 @@ return fts5IndexReturn(pIter->pIndex); } /* ** Return the current term. +** +** This function is only called as part of the fts5vocab module - not as +** part of normal full-text query processing. */ const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){ int n; const char *z = (const char*)fts5MultiIterTerm((Fts5Iter*)pIndexIter, &n); *pn = n-1; @@ -5423,11 +5512,11 @@ void sqlite3Fts5IterClose(Fts5IndexIter *pIndexIter){ if( pIndexIter ){ Fts5Iter *pIter = (Fts5Iter*)pIndexIter; Fts5Index *pIndex = pIter->pIndex; fts5MultiIterFree(pIter); - fts5CloseReader(pIndex); + sqlite3Fts5IndexCloseReader(pIndex); } } /* ** Read and decode the "averages" record from the database. @@ -5455,59 +5544,66 @@ return fts5IndexReturn(p); } /* ** Replace the current "averages" record with the contents of the buffer -** supplied as the second argument. +** supplied as the second argument. +** +** The averages record consists of N+1 varints, where N is the number of +** columns in the fts5 table. The first varint is the total number of +** rows in the FTS table. The second varint is the total number of tokens +** in the first column of the table, and so on. */ int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8 *pData, int nData){ assert( p->rc==SQLITE_OK ); fts5DataWrite(p, FTS5_AVERAGES_ROWID, pData, nData); return fts5IndexReturn(p); } /* ** Return the total number of blocks this module has read from the %_data -** table since it was created. +** table (since it was created by sqlite3Fts5IndexOpen). */ int sqlite3Fts5IndexReads(Fts5Index *p){ return p->nRead; } /* -** Set the 32-bit cookie value stored at the start of all structure -** records to the value passed as the second argument. -** -** Return SQLITE_OK if successful, or an SQLite error code if an error -** occurs. +** Increment the value of the configuration cookie stored as the first +** 32-bits of the structure record in the database. This is called after +** modifying the contents of the %_config table. */ -int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){ - int rc; /* Return code */ - Fts5Config *pConfig = p->pConfig; /* Configuration object */ - u8 aCookie[4]; /* Binary representation of iNew */ - sqlite3_blob *pBlob = 0; - - assert( p->rc==SQLITE_OK ); - sqlite3Fts5Put32(aCookie, iNew); - - rc = sqlite3_blob_open(pConfig->db, pConfig->zDb, p->zDataTbl, - "block", FTS5_STRUCTURE_ROWID, 1, &pBlob - ); - if( rc==SQLITE_OK ){ - sqlite3_blob_write(pBlob, aCookie, 4, 0); - rc = sqlite3_blob_close(pBlob); - } - - return rc; -} - -int sqlite3Fts5IndexLoadConfig(Fts5Index *p){ +int sqlite3Fts5IndexIncrCookie(Fts5Index *p){ Fts5Structure *pStruct; pStruct = fts5StructureRead(p); + p->pConfig->iCookie++; + fts5StructureWrite(p, pStruct); fts5StructureRelease(pStruct); return fts5IndexReturn(p); } + +/* +** Ensure the contents of the %_config table have been loaded into memory. +*/ +int sqlite3Fts5IndexLoadConfig(Fts5Index *p){ + fts5StructureCache(p); + return fts5IndexReturn(p); +} + +/* +** This is called when a new read or write transaction may be being opened. +** It ensures that the in-memory cache of the structure record is valid. +*/ +int sqlite3Fts5IndexNewTrans(Fts5Index *p){ + assert( p->pStruct==0 || p->iStructVersion!=0 ); + assert( p->rc==SQLITE_OK ); + if( p->pConfig->iCookie<0 || fts5IndexDataVersion(p)!=p->iStructVersion ){ + fts5StructureInvalidate(p); + } + return fts5IndexReturn(p); +} + /************************************************************************* ************************************************************************** ** Below this point is the implementation of the integrity-check @@ -5658,22 +5754,23 @@ ** This check may only be performed if the hash table is empty. This ** is because the hash table only supports a single scan query at ** a time, and the multi-iter loop from which this function is called ** is already performing such a scan. */ if( p->nPendingData==0 ){ + int bSaved = p->pConfig->bPrefixIndex; + p->pConfig->bPrefixIndex = 1; if( iIdx>0 && rc==SQLITE_OK ){ - int f = flags|FTS5INDEX_QUERY_TEST_NOIDX; ck2 = 0; - rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); + rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, flags, &ck2); if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; } if( iIdx>0 && rc==SQLITE_OK ){ - int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC; ck2 = 0; - rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); + rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, flags, &ck2); if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; } + p->pConfig->bPrefixIndex = bSaved; } cksum3 ^= ck1; fts5BufferSet(&rc, pPrev, n, (const u8*)z); @@ -6387,44 +6484,10 @@ sqlite3_result_error_code(pCtx, rc); } fts5BufferFree(&s); } -/* -** The implementation of user-defined scalar function fts5_rowid(). -*/ -static void fts5RowidFunction( - sqlite3_context *pCtx, /* Function call context */ - int nArg, /* Number of args (always 2) */ - sqlite3_value **apVal /* Function arguments */ -){ - const char *zArg; - if( nArg==0 ){ - sqlite3_result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1); - }else{ - zArg = (const char*)sqlite3_value_text(apVal[0]); - if( 0==sqlite3_stricmp(zArg, "segment") ){ - i64 iRowid; - int segid, pgno; - if( nArg!=3 ){ - sqlite3_result_error(pCtx, - "should be: fts5_rowid('segment', segid, pgno))", -1 - ); - }else{ - segid = sqlite3_value_int(apVal[1]); - pgno = sqlite3_value_int(apVal[2]); - iRowid = FTS5_SEGMENT_ROWID(segid, pgno); - sqlite3_result_int64(pCtx, iRowid); - } - }else{ - sqlite3_result_error(pCtx, - "first arg to fts5_rowid() must be 'segment'" , -1 - ); - } - } -} - /* ** This is called as part of registering the FTS5 module with database ** connection db. It registers several user-defined scalar functions useful ** with FTS5. ** @@ -6441,21 +6504,8 @@ db, "fts5_decode_none", 2, SQLITE_UTF8, (void*)db, fts5DecodeFunction, 0, 0 ); } - if( rc==SQLITE_OK ){ - rc = sqlite3_create_function( - db, "fts5_rowid", -1, SQLITE_UTF8, 0, fts5RowidFunction, 0, 0 - ); - } return rc; } - -int sqlite3Fts5IndexReset(Fts5Index *p){ - assert( p->pStruct==0 || p->iStructVersion!=0 ); - if( fts5IndexDataVersion(p)!=p->iStructVersion ){ - fts5StructureInvalidate(p); - } - return fts5IndexReturn(p); -} Index: ext/fts5/fts5_main.c ================================================================== --- ext/fts5/fts5_main.c +++ ext/fts5/fts5_main.c @@ -600,11 +600,11 @@ static int fts5NewTransaction(Fts5Table *pTab){ Fts5Cursor *pCsr; for(pCsr=pTab->pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){ if( pCsr->base.pVtab==(sqlite3_vtab*)pTab ) return SQLITE_OK; } - return sqlite3Fts5StorageReset(pTab->pStorage); + return sqlite3Fts5IndexNewTrans(pTab->pIndex); } /* ** Implementation of xOpen method. */ @@ -614,10 +614,13 @@ Fts5Cursor *pCsr = 0; /* New cursor object */ int nByte; /* Bytes of space to allocate */ int rc; /* Return code */ rc = fts5NewTransaction(pTab); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts5IndexLoadConfig(pTab->pIndex); + } if( rc==SQLITE_OK ){ nByte = sizeof(Fts5Cursor) + pConfig->nCol * sizeof(int); pCsr = (Fts5Cursor*)sqlite3_malloc(nByte); if( pCsr ){ Fts5Global *pGlobal = pTab->pGlobal; @@ -628,10 +631,13 @@ pCsr->iCsrId = ++pGlobal->iNextId; }else{ rc = SQLITE_NOMEM; } } + if( rc!=SQLITE_OK ){ + sqlite3Fts5IndexCloseReader(pTab->pIndex); + } *ppCsr = (sqlite3_vtab_cursor*)pCsr; return rc; } static int fts5StmtType(Fts5Cursor *pCsr){ @@ -708,10 +714,11 @@ /* Remove the cursor from the Fts5Global.pCsr list */ for(pp=&pTab->pGlobal->pCsr; (*pp)!=pCsr; pp=&(*pp)->pNext); *pp = pCsr->pNext; sqlite3_free(pCsr); + sqlite3Fts5IndexCloseReader(pTab->pIndex); } return SQLITE_OK; } static int fts5SorterNext(Fts5Cursor *pCsr){ @@ -1587,13 +1594,20 @@ /* ** Implementation of xBegin() method. */ static int fts5BeginMethod(sqlite3_vtab *pVtab){ - fts5CheckTransactionState((Fts5Table*)pVtab, FTS5_BEGIN, 0); - fts5NewTransaction((Fts5Table*)pVtab); - return SQLITE_OK; + Fts5Table *pTab = (Fts5Table*)pVtab; + int rc; + rc = fts5NewTransaction(pTab); + if( rc!=SQLITE_OK ){ + sqlite3Fts5IndexCloseReader(pTab->pIndex); + } +#ifdef SQLITE_DEBUG + if( rc==SQLITE_OK ) fts5CheckTransactionState(pTab, FTS5_BEGIN, 0); +#endif + return rc; } /* ** Implementation of xCommit() method. This is a no-op. The contents of ** the pending-terms hash-table have already been flushed into the database Index: ext/fts5/fts5_storage.c ================================================================== --- ext/fts5/fts5_storage.c +++ ext/fts5/fts5_storage.c @@ -639,14 +639,10 @@ int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge){ return sqlite3Fts5IndexMerge(p->pIndex, nMerge); } -int sqlite3Fts5StorageReset(Fts5Storage *p){ - return sqlite3Fts5IndexReset(p->pIndex); -} - /* ** Allocate a new rowid. This is used for "external content" tables when ** a NULL value is inserted into the rowid column. The new rowid is allocated ** by inserting a dummy row into the %_docsize table. The dummy will be ** overwritten later. @@ -1118,13 +1114,9 @@ } sqlite3_step(pReplace); rc = sqlite3_reset(pReplace); } if( rc==SQLITE_OK && pVal ){ - int iNew = p->pConfig->iCookie + 1; - rc = sqlite3Fts5IndexSetCookie(p->pIndex, iNew); - if( rc==SQLITE_OK ){ - p->pConfig->iCookie = iNew; - } + rc = sqlite3Fts5IndexIncrCookie(p->pIndex); } return rc; } Index: ext/fts5/test/fts5_common.tcl ================================================================== --- ext/fts5/test/fts5_common.tcl +++ ext/fts5/test/fts5_common.tcl @@ -642,6 +642,13 @@ sqlite3_fts5_create_tokenizer db tclnum tclnum_create } # # End of tokenizer code. #------------------------------------------------------------------------- + +proc fts5_rowid_func {segid pgno} { return [expr ($segid<<37) + $pgno] } +proc register_fts5_rowid {db} { + $db func fts5_rowid -argcount 2 fts5_rowid_func +} + + Index: ext/fts5/test/fts5ai.test ================================================================== --- ext/fts5/test/fts5ai.test +++ ext/fts5/test/fts5ai.test @@ -49,10 +49,68 @@ } do_execsql_test 1.2 { INSERT INTO t1(t1) VALUES('integrity-check'); } -} + +#------------------------------------------------------------------------- +# Test that the in-memory configuration does not become inconsistent with +# respect to the on-disk configuration if a savepoint is rolled back. +# +proc posrowid {cmd} { $cmd xRowid } +proc negrowid {cmd} { expr -1 * [$cmd xRowid] } +sqlite3_fts5_create_function db posrowid posrowid +sqlite3_fts5_create_function db negrowid negrowid + +do_execsql_test 2.1 { + INSERT INTO t1(rowid, a) VALUES(1001, 'x y 1'); + INSERT INTO t1(rowid, a) VALUES(1002, 'x y 2'); + INSERT INTO t1(rowid, a) VALUES(1003, 'x y 3'); + BEGIN; + INSERT INTO t1(t1, rank) VALUES('rank', 'posrowid()'); + SELECT a FROM t1('x') ORDER BY rank; +} {{x y 1} {x y 2} {x y 3}} + +do_execsql_test 2.2 { + SAVEPOINT abc; + INSERT INTO t1(t1, rank) VALUES('rank', 'negrowid()'); + SELECT a FROM t1('x') ORDER BY rank; +} {{x y 3} {x y 2} {x y 1}} + +do_execsql_test 2.3 { + ROLLBACK TO abc; + SELECT a FROM t1('x') ORDER BY rank; + COMMIT; +} {{x y 1} {x y 2} {x y 3}} + +#------------------------------------------------------------------------- +# Test that the in-memory structure does not become inconsistent with +# respect to the on-disk configuration if a savepoint is rolled back. +# +do_execsql_test 3.1 { + CREATE VIRTUAL TABLE t2 USING fts5(x, detail=%DETAIL%); + INSERT INTO t2 VALUES('a 1'); + INSERT INTO t2 VALUES('a 2'); + INSERT INTO t2 VALUES('a 3'); + SELECT count(*) FROM t2_data; +} {5} + +do_execsql_test 3.2 { + BEGIN; + SAVEPOINT one; + INSERT INTO t2(rowid, x) VALUES(8, 'a 8'); + INSERT INTO t2(rowid, x) VALUES(7, 'a 7'); + INSERT INTO t2(rowid, x) VALUES(6, 'a 6'); + SELECT count(*) FROM t2_data; +} {7} +do_execsql_test 3.3 { INSERT INTO t2(t2) VALUES('integrity-check') } {} + +do_execsql_test 3.4 { + ROLLBACK TO one; + SELECT count(*) FROM t2_data; +} {5} +do_execsql_test 3.5 { INSERT INTO t2(t2) VALUES('integrity-check') } {} +} ;# foreach_detail_mode finish_test Index: ext/fts5/test/fts5corrupt.test ================================================================== --- ext/fts5/test/fts5corrupt.test +++ ext/fts5/test/fts5corrupt.test @@ -39,22 +39,24 @@ db_save do_execsql_test 1.2 { INSERT INTO t1(t1) VALUES('integrity-check') } set segid [lindex [fts5_level_segids t1] 0] +register_fts5_rowid db do_test 1.3 { execsql { - DELETE FROM t1_data WHERE rowid = fts5_rowid('segment', $segid, 4); + DELETE FROM t1_data WHERE rowid = fts5_rowid($segid, 4); } catchsql { INSERT INTO t1(t1) VALUES('integrity-check') } } {1 {database disk image is malformed}} do_test 1.4 { db_restore_and_reopen + register_fts5_rowid db execsql { UPDATE t1_data set block = X'00000000' || substr(block, 5) WHERE - rowid = fts5_rowid('segment', $segid, 4); + rowid = fts5_rowid($segid, 4); } catchsql { INSERT INTO t1(t1) VALUES('integrity-check') } } {1 {database disk image is malformed}} db_restore_and_reopen Index: ext/fts5/test/fts5fault4.test ================================================================== --- ext/fts5/test/fts5fault4.test +++ ext/fts5/test/fts5fault4.test @@ -84,11 +84,11 @@ set ::res [db eval {SELECT rowid, x1 FROM x1 WHERE x1 MATCH '*reads'}] do_faultsim_test 4 -faults oom-* -body { db eval {SELECT rowid, x, x1 FROM x1 WHERE x1 MATCH '*reads'} } -test { - faultsim_test_result {0 {0 {} 3}} + faultsim_test_result {0 {0 {} 4}} } #------------------------------------------------------------------------- # An OOM within a query that uses a custom rank function. # ADDED ext/fts5/test/fts5faultC.test Index: ext/fts5/test/fts5faultC.test ================================================================== --- /dev/null +++ ext/fts5/test/fts5faultC.test @@ -0,0 +1,121 @@ +# 2016 March 26 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# +# This file is focused on OOM errors. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +source $testdir/malloc_common.tcl +set testprefix fts5faultC +return_if_no_fts5 + +if 1 { + +#-------------------------------------------------------------------------- +# Test that if an OOM error occurs while trying to set a configuration +# option, the in-memory and on-disk configurations are not left in an +# inconsistent state. +# +proc posrowid {cmd} { $cmd xRowid } +proc negrowid {cmd} { expr -1 * [$cmd xRowid] } + +sqlite3_fts5_create_function db posrowid posrowid +sqlite3_fts5_create_function db negrowid negrowid + +do_execsql_test 1.0.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x); + INSERT INTO t1 VALUES('a b c'); + INSERT INTO t1 VALUES('d a d'); + INSERT INTO t1 VALUES('c b a'); +} +do_execsql_test 1.0.1 { + INSERT INTO t1(t1, rank) VALUES('rank', 'posrowid()'); + SELECT rowid FROM t1('a') ORDER BY rank; +} {1 2 3} +do_execsql_test 1.0.2 { + INSERT INTO t1(t1, rank) VALUES('rank', 'negrowid()'); + SELECT rowid FROM t1('a') ORDER BY rank; +} {3 2 1} + +faultsim_save_and_close +do_faultsim_test 1.1 -faults oom-* -prep { + faultsim_restore_and_reopen + sqlite3_fts5_create_function db posrowid posrowid + sqlite3_fts5_create_function db negrowid negrowid + execsql { SELECT * FROM t1('*reads') } +} -body { + execsql { INSERT INTO t1(t1, rank) VALUES('rank', 'posrowid()') } +} -test { + + faultsim_test_result [list 0 {}] + sqlite3 db2 test.db + set ex [db2 one { SELECT v FROM t1_config WHERE k='rank' }] + switch -- $ex { + "posrowid()" { set ex {1 2 3} } + "negrowid()" { set ex {3 2 1} } + default { error 1 } + } + + set res [db eval { SELECT rowid FROM t1('a') ORDER BY rank }] + if {$res != $ex} { + error "2: expected {$ex} got {$res}" + } + db2 close +} + + +#-------------------------------------------------------------------------- + +reset_db +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE t1 USING fts5(y); +} +do_faultsim_test 2.1 -faults * -prep { + sqlite3 db test.db + execsql { SELECT * FROM t1('x') } +} -body { + execsql { SELECT * FROM t1('x') } + execsql { SELECT * FROM t1('x') } +} -test { + faultsim_test_result [list 0 {}] +} +do_faultsim_test 2.2 -faults * -prep { + sqlite3 db test.db + execsql { INSERT INTO t1 VALUES('yy') } +} -body { + execsql { INSERT INTO t1 VALUES('yy') } + execsql { INSERT INTO t1 VALUES('yy') } +} -test { + faultsim_test_result [list 0 {}] +} + +#-------------------------------------------------------------------------- +} +reset_db +do_execsql_test 3.0 { + CREATE VIRTUAL TABLE t1 USING fts5(y); +} +faultsim_save_and_close +do_faultsim_test 3.1 -faults * -prep { + faultsim_restore_and_reopen + execsql { + BEGIN; + INSERT INTO t1 VALUES('x y z'); + INSERT INTO t1 VALUES('a b c'); + } +} -body { + execsql { INSERT INTO t1(t1) VALUES('optimize') } +} -test { + faultsim_test_result [list 0 {}] +} + +finish_test + Index: ext/fts5/test/fts5rowid.test ================================================================== --- ext/fts5/test/fts5rowid.test +++ ext/fts5/test/fts5rowid.test @@ -13,37 +13,17 @@ # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5rowid -# If SQLITE_ENABLE_FTS5 is defined, omit this file. -ifcapable !fts5 { - finish_test - return -} - -do_catchsql_test 1.1 { - SELECT fts5_rowid() -} {1 {should be: fts5_rowid(subject, ....)}} - -do_catchsql_test 1.2 { - SELECT fts5_rowid('segment') -} {1 {should be: fts5_rowid('segment', segid, pgno))}} - -do_execsql_test 1.3 { - SELECT fts5_rowid('segment', 1, 1) -} {137438953473} - -do_catchsql_test 1.4 { - SELECT fts5_rowid('nosucharg'); -} {1 {first arg to fts5_rowid() must be 'segment'}} - +return_if_no_fts5 #------------------------------------------------------------------------- # Tests of the fts5_decode() function. # reset_db +register_fts5_rowid db do_execsql_test 2.1 { CREATE VIRTUAL TABLE x1 USING fts5(a, b); INSERT INTO x1(x1, rank) VALUES('pgsz', 32); } {} @@ -90,11 +70,11 @@ # UPDATE x1_data SET block = X''; # SELECT count(fts5_decode(rowid, block)) FROM x1_data; #} $res do_execsql_test 2.8 { - SELECT fts5_decode(fts5_rowid('segment', 1000, 1), X'AB') + SELECT fts5_decode(fts5_rowid(1000, 1), X'AB') } {corrupt} #------------------------------------------------------------------------- # Tests with very large tokens. # Index: ext/fts5/test/fts5simple.test ================================================================== --- ext/fts5/test/fts5simple.test +++ ext/fts5/test/fts5simple.test @@ -348,11 +348,11 @@ INSERT INTO x1 SELECT rnddoc(5) FROM ii; } do_execsql_test 14.4 { SELECT rowid, x, x1 FROM x1 WHERE x1 MATCH '*reads' -} {0 {} 3} +} {0 {} 4} #------------------------------------------------------------------------- reset_db do_execsql_test 15.0 { CREATE VIRTUAL TABLE x2 USING fts5(x, prefix=1); Index: ext/fts5/test/fts5simple3.test ================================================================== --- ext/fts5/test/fts5simple3.test +++ ext/fts5/test/fts5simple3.test @@ -53,15 +53,15 @@ lappend cols "c$i" lappend vals "'val$i'" } execsql "CREATE VIRTUAL TABLE t2 USING fts5(detail=%DETAIL%,[join $cols ,])" } {} - + do_test 2.2 { execsql "INSERT INTO t2 VALUES([join $vals ,])" } {} - + foreach {tn q res} { 1 { c1:val1 } 1 2 { c300:val300 } 1 3 { c300:val1 } {} 4 { c1:val300 } {} @@ -78,8 +78,53 @@ INSERT INTO x3 VALUES('c b a'); INSERT INTO x3 VALUES('o t t'); SELECT * FROM x3('x OR y OR z'); } + +#------------------------------------------------------------------------- +# Check that if a CREATE VIRTUAL TABLE statement fails within a +# transaction, any changes made to the database are reverted before +# continuing. +# +reset_db +do_catchsql_test 4.0 { + BEGIN; + CREATE VIRTUAL TABLE t1 USING fts5; +} {1 {vtable constructor failed: t1}} + +do_execsql_test 4.1 { SELECT * FROM sqlite_master } {} +do_execsql_test 4.2 { COMMIT } +do_execsql_test 4.3 { SELECT * FROM sqlite_master } {} + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 5.0 { + CREATE VIRTUAL TABLE t1 USING fts5(a); + INSERT INTO t1 VALUES('vtable constructor failed'); +} {} +do_execsql_test 5.1 { + SELECT quote(block) FROM t1_data WHERE rowid=10; +} { + X'000000000101010001010101' +} +do_test 5.3 { + sqlite3 db2 test.db + register_fts5_rowid db2 + db2 eval { + UPDATE t1_data SET block = X'000000000101010001A7080101' WHERE rowid=10; + UPDATE t1_data SET rowid=fts5_rowid(5000, 1) WHERE rowid=fts5_rowid(1,1); + UPDATE t1_idx SET segid=5000 WHERE segid=1; + } + db2 close +} {} +do_execsql_test 5.4 { + SELECT rowid FROM t1('cons*'); +} {1} +breakpoint +do_execsql_test 5.5 { + INSERT INTO t1 VALUES('hello world'); +} finish_test Index: ext/fts5/test/fts5synonym2.test ================================================================== --- ext/fts5/test/fts5synonym2.test +++ ext/fts5/test/fts5synonym2.test @@ -40,17 +40,17 @@ list [sort_poslist $PL] $CL } sqlite3_fts5_create_function db fts5_test_bothlist fts5_test_bothlist -proc fts5_rowid {cmd} { expr [$cmd xColumnText -1] } -sqlite3_fts5_create_function db fts5_rowid fts5_rowid +proc rowid_func {cmd} { expr [$cmd xColumnText -1] } +sqlite3_fts5_create_function db rowid_func rowid_func do_execsql_test 1.$tok.0.1 " CREATE VIRTUAL TABLE ss USING fts5(a, b, tokenize='tclnum $tok', detail=%DETAIL%); - INSERT INTO ss(ss, rank) VALUES('rank', 'fts5_rowid()'); + INSERT INTO ss(ss, rank) VALUES('rank', 'rowid_func()'); " do_execsql_test 1.$tok.0.2 { INSERT INTO ss VALUES('5 5 five seven 3 seven i', '2 1 5 0 two 1 i'); INSERT INTO ss VALUES('six ix iii 7 i vii iii', 'one seven nine 4 9 1 vi'); Index: ext/fts5/test/fts5version.test ================================================================== --- ext/fts5/test/fts5version.test +++ ext/fts5/test/fts5version.test @@ -57,8 +57,27 @@ db close sqlite3 db test.db catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' } } {1 {invalid fts5 file format (found 0, expected 4) - run 'rebuild'}} +#--------------------------------------------------------------------------- + +do_execsql_test 2.1 { + CREATE VIRTUAL TABLE t2 USING fts5(two); + INSERT INTO t2 VALUES('a b c d'); +} + +do_test 2.2 { + sqlite3 db2 test.db + execsql { + INSERT INTO t2(t2, rank) VALUES('pgsz', 512); + UPDATE t2_config SET v=5 WHERE k='version'; + } db2 + db2 close +} {} + +do_catchsql_test 2.3 { + SELECT * FROM t2('b + c'); +} {1 {SQL logic error or missing database}} finish_test Index: src/vdbeaux.c ================================================================== --- src/vdbeaux.c +++ src/vdbeaux.c @@ -507,10 +507,13 @@ ){ hasAbort = 1; break; } if( opcode==OP_CreateTable ) hasCreateTable = 1; +#ifndef SQLITE_OMIT_VIRTUALTABLE + if( opcode==OP_VCreate ) hasAbort = 1; +#endif if( opcode==OP_InitCoroutine ) hasInitCoroutine = 1; #ifndef SQLITE_OMIT_FOREIGN_KEY if( opcode==OP_FkCounter && pOp->p1==0 && pOp->p2==1 ){ hasFkCounter = 1; } Index: src/vtab.c ================================================================== --- src/vtab.c +++ src/vtab.c @@ -314,10 +314,11 @@ int iDb; /* The database the table is being created in */ Table *pTable; /* The new virtual table */ sqlite3 *db; /* Database connection */ sqlite3StartTable(pParse, pName1, pName2, 0, 0, 1, ifNotExists); + sqlite3MayAbort(pParse); pTable = pParse->pNewTable; if( pTable==0 ) return; assert( 0==pTable->pIndex ); db = pParse->db;