Index: ext/fts5/fts5Int.h ================================================================== --- ext/fts5/fts5Int.h +++ ext/fts5/fts5Int.h @@ -45,10 +45,14 @@ # define LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32)) # define SMALLEST_INT64 (((i64)-1) - LARGEST_INT64) #endif +/* Truncate very long tokens to this many bytes. Hard limit is +** (65536-1-1-4-9)==65521 bytes. The limiting factor is the 16-bit offset +** field that occurs at the start of each leaf page (see fts5_index.c). */ +#define FTS5_MAX_TOKEN_SIZE 32768 /* ** Maximum number of prefix indexes on single FTS5 table. This must be ** less than 32. If it is set to anything large than that, an #error ** directive in fts5_index.c will cause the build to fail. Index: ext/fts5/fts5_expr.c ================================================================== --- ext/fts5/fts5_expr.c +++ ext/fts5/fts5_expr.c @@ -1491,10 +1491,11 @@ UNUSED_PARAM2(iUnused1, iUnused2); /* If an error has already occurred, this is a no-op */ if( pCtx->rc!=SQLITE_OK ) return pCtx->rc; + if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE; if( pPhrase && pPhrase->nTerm>0 && (tflags & FTS5_TOKEN_COLOCATED) ){ Fts5ExprTerm *pSyn; int nByte = sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer) + nToken+1; pSyn = (Fts5ExprTerm*)sqlite3_malloc(nByte); @@ -2493,10 +2494,11 @@ Fts5Expr *pExpr = p->pExpr; int i; UNUSED_PARAM2(iUnused1, iUnused2); + if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE; if( (tflags & FTS5_TOKEN_COLOCATED)==0 ) p->iOff++; for(i=0; inPhrase; i++){ Fts5ExprTerm *pTerm; if( p->aPopulator[i].bOk==0 ) continue; for(pTerm=&pExpr->apExprPhrase[i]->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){ Index: ext/fts5/fts5_index.c ================================================================== --- ext/fts5/fts5_index.c +++ ext/fts5/fts5_index.c @@ -2318,10 +2318,22 @@ pIter->iPgidxOff = iPgidx; fts5SegIterLoadRowid(p, pIter); fts5SegIterLoadNPos(p, pIter); } + +static sqlite3_stmt *fts5IdxSelectStmt(Fts5Index *p){ + if( p->pIdxSelect==0 ){ + Fts5Config *pConfig = p->pConfig; + fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf( + "SELECT pgno FROM '%q'.'%q_idx' WHERE " + "segid=? AND term<=? ORDER BY term DESC LIMIT 1", + pConfig->zDb, pConfig->zName + )); + } + return p->pIdxSelect; +} /* ** Initialize the object pIter to point to term pTerm/nTerm within segment ** pSeg. If there is no such term in the index, the iterator is set to EOF. ** @@ -2336,35 +2348,29 @@ Fts5SegIter *pIter /* Object to populate */ ){ int iPg = 1; int bGe = (flags & FTS5INDEX_QUERY_SCAN); int bDlidx = 0; /* True if there is a doclist-index */ + sqlite3_stmt *pIdxSelect = 0; assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 ); assert( pTerm && nTerm ); memset(pIter, 0, sizeof(*pIter)); pIter->pSeg = pSeg; /* This block sets stack variable iPg to the leaf page number that may ** contain term (pTerm/nTerm), if it is present in the segment. */ - if( p->pIdxSelect==0 ){ - Fts5Config *pConfig = p->pConfig; - fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf( - "SELECT pgno FROM '%q'.'%q_idx' WHERE " - "segid=? AND term<=? ORDER BY term DESC LIMIT 1", - pConfig->zDb, pConfig->zName - )); - } + pIdxSelect = fts5IdxSelectStmt(p); if( p->rc ) return; - sqlite3_bind_int(p->pIdxSelect, 1, pSeg->iSegid); - sqlite3_bind_blob(p->pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC); - if( SQLITE_ROW==sqlite3_step(p->pIdxSelect) ){ - i64 val = sqlite3_column_int(p->pIdxSelect, 0); + sqlite3_bind_int(pIdxSelect, 1, pSeg->iSegid); + sqlite3_bind_blob(pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC); + if( SQLITE_ROW==sqlite3_step(pIdxSelect) ){ + i64 val = sqlite3_column_int(pIdxSelect, 0); iPg = (int)(val>>1); bDlidx = (val & 0x0001); } - p->rc = sqlite3_reset(p->pIdxSelect); + p->rc = sqlite3_reset(pIdxSelect); if( iPgpgnoFirst ){ iPg = pSeg->pgnoFirst; bDlidx = 0; } @@ -3550,10 +3556,22 @@ for(iSeg=0; iSegaLevel[iLvl].nSeg; iSeg++){ assert( iSegid!=pStruct->aLevel[iLvl].aSeg[iSeg].iSegid ); } } assert( iSegid>0 && iSegid<=FTS5_MAX_SEGMENT ); + + { + sqlite3_stmt *pIdxSelect = fts5IdxSelectStmt(p); + if( p->rc==SQLITE_OK ){ + int rc; + u8 aBlob[2] = {0xff, 0xff}; + sqlite3_bind_int(pIdxSelect, 1, iSegid); + sqlite3_bind_blob(pIdxSelect, 2, aBlob, 2, SQLITE_STATIC); + assert( sqlite3_step(pIdxSelect)!=SQLITE_ROW ); + p->rc = sqlite3_reset(pIdxSelect); + } + } #endif } } return iSegid; @@ -3795,10 +3813,13 @@ static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; Fts5PageWriter *pPage = &pWriter->writer; i64 iRowid; + +static int nCall = 0; +nCall++; assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) ); /* Set the szLeaf header field. */ assert( 0==fts5GetU16(&pPage->buf.p[2]) ); Index: ext/fts5/fts5_storage.c ================================================================== --- ext/fts5/fts5_storage.c +++ ext/fts5/fts5_storage.c @@ -367,10 +367,11 @@ int iUnused2 /* End offset of token */ ){ Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext; Fts5Index *pIdx = pCtx->pStorage->pIndex; UNUSED_PARAM2(iUnused1, iUnused2); + if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE; if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){ pCtx->szCol++; } return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, pCtx->szCol-1, pToken, nToken); } @@ -813,10 +814,11 @@ int rc = SQLITE_OK; int iPos; int iCol; UNUSED_PARAM2(iUnused1, iUnused2); + if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE; if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){ pCtx->szCol++; } Index: ext/fts5/test/fts5simple.test ================================================================== --- ext/fts5/test/fts5simple.test +++ ext/fts5/test/fts5simple.test @@ -445,7 +445,27 @@ } {} do_execsql_test 20.2 { INSERT INTO x1(x1) VALUES('optimize'); INSERT INTO x1(x1) VALUES('integrity-check'); } {} + +#------------------------------------------------------------------------- +reset_db +set doc "a b [string repeat x 100000]" +do_execsql_test 21.0 { + CREATE VIRTUAL TABLE x1 USING fts5(x); + INSERT INTO x1(rowid, x) VALUES(11111, $doc); + INSERT INTO x1(rowid, x) VALUES(11112, $doc); +} +do_execsql_test 21.1 { + INSERT INTO x1(x1) VALUES('integrity-check'); +} +do_execsql_test 21.2 { + SELECT rowid FROM x1($doc); +} {11111 11112} +do_execsql_test 21.3 { + DELETE FROM x1 WHERE rowid=11111; + INSERT INTO x1(x1) VALUES('integrity-check'); + SELECT rowid FROM x1($doc); +} {11112} finish_test