SQLite: Check-in [0229cba696]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview

Comment:	Buffer updates per-transaction rather than per-update. If lots of updates happen within a single transaction, there was a lot of wasted encode/decode overhead due to segment merges. This code buffers updates in memory and writes out larger level-0 segments. It only works when documents are presented in ascending order by docid. Comparing a test set running 100 documents per transaction, the total runtime is cut almost in half. (CVS 3751)
Downloads:	Tarball \| ZIP archive
Timelines:	family \| ancestors \| descendants \| both \| trunk
Files:	files \| file ages \| folders
SHA1:	0229cba69698ab4b44f8583ef50a87c49422f8ec
User & Date:	shess 2007-03-29 18:41:04.000

Context

2007-03-29
18:46		Add the sqlite3_clear_bindings() API to the loadable extension interface. Ticket #2135. (CVS 3752) (check-in: 3111b43ec3 user: drh tags: trunk)
18:41		Buffer updates per-transaction rather than per-update. If lots of updates happen within a single transaction, there was a lot of wasted encode/decode overhead due to segment merges. This code buffers updates in memory and writes out larger level-0 segments. It only works when documents are presented in ascending order by docid. Comparing a test set running 100 documents per transaction, the total runtime is cut almost in half. (CVS 3751) (check-in: 0229cba696 user: shess tags: trunk)
18:19		Change the name of PAGER_SECTOR_SIZE to SQLITE_DEFAULT_SECTOR_SIZE. Make the new OS-layer interface routine for finding sector size optional. (CVS 3750) (check-in: 0fb9af1d6e user: drh tags: trunk)

Changes

Changes to ext/fts2/fts2.c.

Added test/fts2k.test.

︙			︙
987 988 989 990 991 992 993 994 995 996 997 998 999 1000	/* DLCollector wraps PLWriter and DLWriter to provide a dynamically-allocated doclist area to use during tokenization. dlcNew - malloc up and initialize a collector. dlcDelete - destroy a collector and all contained items. dlcAddPos - append position and offset information. dlcAddDoclist - add the collected doclist to the given buffer. */ typedef struct DLCollector { DataBuffer b; DLWriter dlw; PLWriter plw; } DLCollector;	>	987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001	/* DLCollector wraps PLWriter and DLWriter to provide a dynamically-allocated doclist area to use during tokenization. dlcNew - malloc up and initialize a collector. dlcDelete - destroy a collector and all contained items. dlcAddPos - append position and offset information. dlcAddDoclist - add the collected doclist to the given buffer. ** dlcNext - terminate the current document and open another. */ typedef struct DLCollector { DataBuffer b; DLWriter dlw; PLWriter plw; } DLCollector;
︙			︙
1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023	if( pCollector->dlw.iType>DL_DOCIDS ){ char c[VARINT_MAX]; int n = putVarint(c, POS_END); dataBufferAppend2(b, pCollector->b.pData, pCollector->b.nData, c, n); }else{ dataBufferAppend(b, pCollector->b.pData, pCollector->b.nData); } } static void dlcAddPos(DLCollector pCollector, int iColumn, int iPos, int iStartOffset, int iEndOffset){ plwAdd(&pCollector->plw, iColumn, iPos, iStartOffset, iEndOffset); } static DLCollector dlcNew(sqlite_int64 iDocid, DocListType iType){	> > > > >	1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029	if( pCollector->dlw.iType>DL_DOCIDS ){ char c[VARINT_MAX]; int n = putVarint(c, POS_END); dataBufferAppend2(b, pCollector->b.pData, pCollector->b.nData, c, n); }else{ dataBufferAppend(b, pCollector->b.pData, pCollector->b.nData); } } static void dlcNext(DLCollector pCollector, sqlite_int64 iDocid){ plwTerminate(&pCollector->plw); plwDestroy(&pCollector->plw); plwInit(&pCollector->plw, &pCollector->dlw, iDocid); } static void dlcAddPos(DLCollector pCollector, int iColumn, int iPos, int iStartOffset, int iEndOffset){ plwAdd(&pCollector->plw, iColumn, iPos, iStartOffset, iEndOffset); } static DLCollector *dlcNew(sqlite_int64 iDocid, DocListType iType){
︙			︙
1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663	/* Precompiled statements used for segment merges. We run a ** separate select across the leaf level of each tree being merged. / sqlite3_stmt pLeafSelectStmts[MERGE_COUNT]; /* The statement used to prepare pLeafSelectStmts. / #define LEAF_SELECT \ "select block from %_segments where rowid between ? and ? order by rowid" }; / When the core wants to do a query, it create a cursor using a call to xOpen. This structure is an instance of a cursor. It ** is destroyed by xClose. */	> > > > > > > > > > > > > > >	1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684	/* Precompiled statements used for segment merges. We run a ** separate select across the leaf level of each tree being merged. / sqlite3_stmt pLeafSelectStmts[MERGE_COUNT]; /* The statement used to prepare pLeafSelectStmts. / #define LEAF_SELECT \ "select block from %_segments where rowid between ? and ? order by rowid" / These buffer pending index updates during transactions. nPendingData estimates the memory size of the pending data. It doesn't include the hash-bucket overhead, nor any malloc overhead. When nPendingData exceeds kPendingThreshold, the buffer is flushed even before the transaction closes. pendingTerms stores the data, and is only valid when nPendingData is >=0 (nPendingData<0 means pendingTerms has not been initialized). iPrevDocid is the last docid written, used to make certain we're inserting in sorted order. / int nPendingData; #define kPendingThreshold (110241024) sqlite_int64 iPrevDocid; fts2Hash pendingTerms; }; / When the core wants to do a query, it create a cursor using a call to xOpen. This structure is an instance of a cursor. It ** is destroyed by xClose. */
︙			︙
2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142	rc = sqlite3_bind_int64(s, 1, iLevel); if( rc!=SQLITE_OK ) return rc; return sql_single_step_statement(v, SEGDIR_DELETE_STMT, &s); } /* ** Free the memory used to contain a fulltext_vtab structure. / static void fulltext_vtab_destroy(fulltext_vtab v){ int iStmt, i; TRACE(("FTS2 Destroy %p\n", v));	> > > > > > > >	2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171	rc = sqlite3_bind_int64(s, 1, iLevel); if( rc!=SQLITE_OK ) return rc; return sql_single_step_statement(v, SEGDIR_DELETE_STMT, &s); } /* TODO(shess) clearPendingTerms() is far down the file because writeZeroSegment() is far down the file because LeafWriter is far down the file. Consider refactoring the code to move the non-vtab code above the vtab code so that we don't need this forward reference. / static int clearPendingTerms(fulltext_vtab v); /* ** Free the memory used to contain a fulltext_vtab structure. / static void fulltext_vtab_destroy(fulltext_vtab v){ int iStmt, i; TRACE(("FTS2 Destroy %p\n", v));
︙			︙
2154 2155 2156 2157 2158 2159 2160 ~~2161~~ 2162 2163 2164 2165 2166 2167 2168	} } if( v->pTokenizer!=NULL ){ v->pTokenizer->pModule->xDestroy(v->pTokenizer); v->pTokenizer = NULL; } free(v->azColumn); for(i = 0; i < v->nColumn; ++i) { sqlite3_free(v->azContentColumn[i]); } free(v->azContentColumn); free(v); }	\| > >	2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199	} } if( v->pTokenizer!=NULL ){ v->pTokenizer->pModule->xDestroy(v->pTokenizer); v->pTokenizer = NULL; } clearPendingTerms(v); free(v->azColumn); for(i = 0; i < v->nColumn; ++i) { sqlite3_free(v->azContentColumn[i]); } free(v->azContentColumn); free(v); }
︙			︙
2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640	schema = fulltextSchema(v->nColumn, (const charconst)v->azColumn, spec->zName); rc = sqlite3_declare_vtab(db, schema); sqlite3_free(schema); if( rc!=SQLITE_OK ) goto err; memset(v->pFulltextStatements, 0, sizeof(v->pFulltextStatements)); *ppVTab = &v->base; TRACE(("FTS2 Connect %p\n", v)); return rc; err:	> > >	2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674	schema = fulltextSchema(v->nColumn, (const charconst)v->azColumn, spec->zName); rc = sqlite3_declare_vtab(db, schema); sqlite3_free(schema); if( rc!=SQLITE_OK ) goto err; memset(v->pFulltextStatements, 0, sizeof(v->pFulltextStatements)); /* Indicate that the buffer is not live. / v->nPendingData = -1; ppVTab = &v->base; TRACE(("FTS2 Connect %p\n", v)); return rc; err:
︙			︙
3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217	){ DataBuffer left, right, new; int i, rc; /* No phrase search if no position info. */ assert( pQTerm->nPhrase==0 \|\| DL_DEFAULT!=DL_DOCIDS ); dataBufferInit(&left, 0); rc = termSelect(v, iColumn, pQTerm->pTerm, pQTerm->nTerm, 0<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS, &left); if( rc ) return rc; for(i=1; i<=pQTerm->nPhrase && left.nData>0; i++){ dataBufferInit(&right, 0); rc = termSelect(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm,	> > >	3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254	){ DataBuffer left, right, new; int i, rc; /* No phrase search if no position info. / assert( pQTerm->nPhrase==0 \|\| DL_DEFAULT!=DL_DOCIDS ); / This code should never be called with buffered updates. */ assert( v->nPendingData<0 ); dataBufferInit(&left, 0); rc = termSelect(v, iColumn, pQTerm->pTerm, pQTerm->nTerm, 0<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS, &left); if( rc ) return rc; for(i=1; i<=pQTerm->nPhrase && left.nData>0; i++){ dataBufferInit(&right, 0); rc = termSelect(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm,
︙			︙
3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408	if( inPhrase ){ /* unmatched quote / queryClear(pQuery); return SQLITE_ERROR; } return SQLITE_OK; } / Perform a full-text query using the search expression in zInput[0..nInput-1]. Return a list of matching documents in pResult. Queries must match column iColumn. Or if iColumn>=nColumn ** they are allowed to match against any column. / static int fulltextQuery( fulltext_vtab v, /* The full text index / int iColumn, / Match against this column by default / const char zInput, /* The query string / int nInput, / Number of bytes in zInput[] / DataBuffer pResult, /* Write the result doclist here / Query pQuery /* Put parsed query string here / ){ int i, iNext, rc; DataBuffer left, right, or, new; int nNot = 0; QueryTerm aTerm; /* TODO(shess) I think that the queryClear() calls below are not ** necessary, because fulltextClose() already clears the query. */ rc = parseQuery(v, zInput, nInput, iColumn, pQuery); if( rc!=SQLITE_OK ) return rc;	> > > > > > > > > > > > > > >	3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460	if( inPhrase ){ /* unmatched quote / queryClear(pQuery); return SQLITE_ERROR; } return SQLITE_OK; } / TODO(shess) Refactor the code to remove this forward decl. / static int flushPendingTerms(fulltext_vtab v); /* Perform a full-text query using the search expression in zInput[0..nInput-1]. Return a list of matching documents in pResult. Queries must match column iColumn. Or if iColumn>=nColumn ** they are allowed to match against any column. / static int fulltextQuery( fulltext_vtab v, /* The full text index / int iColumn, / Match against this column by default / const char zInput, /* The query string / int nInput, / Number of bytes in zInput[] / DataBuffer pResult, /* Write the result doclist here / Query pQuery /* Put parsed query string here / ){ int i, iNext, rc; DataBuffer left, right, or, new; int nNot = 0; QueryTerm aTerm; /* TODO(shess) Instead of flushing pendingTerms, we could query for the relevant term and merge the doclist into what we receive from the database. Wait and see if this is a common issue, first. A good reason not to flush is to not generate update-related ** error codes from here. / / Flush any buffered updates before executing the query. / rc = flushPendingTerms(v); if( rc!=SQLITE_OK ) return rc; / TODO(shess) I think that the queryClear() calls below are not ** necessary, because fulltextClose() already clears the query. */ rc = parseQuery(v, zInput, nInput, iColumn, pQuery); if( rc!=SQLITE_OK ) return rc;
︙			︙
3594 3595 3596 3597 3598 3599 3600 ~~3601 3602 3603~~ ~~3604~~ 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 ~~3629~~ 3630 3631 ~~3632~~ 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 ~~3648 3649 3650~~ 3651 3652 3653 ~~3654~~ 3655 3656 3657 3658 3659 ~~3660 3661~~ ~~3662~~ 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 ~~3673~~ 3674 3675 3676 3677 3678 3679 3680 3681 ~~3682~~ 3683 ~~3684 3685~~ 3686 3687 3688 3689 3690 ~~3691~~ 3692 3693 ~~3694 3695~~ ~~3696~~ ~~3697~~ 3698 3699 3700 3701 ~~3702~~ ~~3703~~ 3704 ~~3705~~ 3706 3707 ~~3708~~ 3709 3710 3711 3712 3713 3714 ~~3715~~ 3716 3717 3718 3719 3720 3721 3722	static int fulltextRowid(sqlite3_vtab_cursor pCursor, sqlite_int64 pRowid){ fulltext_cursor c = (fulltext_cursor ) pCursor; pRowid = sqlite3_column_int64(c->pStmt, 0); return SQLITE_OK; } ~~/ Add all terms in [zText] to ~~the giv~~en ~~hash~~ table. If [iColumn] > 0, * we also store positions and offsets in the hash table using th~~e given~~ * column number. /~~ ~~static int buildTerms(fulltext_vtab v, ~~fts2Hash terms,~~ sqlite_int64 iDocid,~~ const char zText, int iColumn){ sqlite3_tokenizer pTokenizer = v->pTokenizer; sqlite3_tokenizer_cursor pCursor; const char pToken; int nTokenBytes; int iStartOffset, iEndOffset, iPosition; int rc; rc = pTokenizer->pModule->xOpen(pTokenizer, zText, -1, &pCursor); if( rc!=SQLITE_OK ) return rc; pCursor->pTokenizer = pTokenizer; while( SQLITE_OK==pTokenizer->pModule->xNext(pCursor, &pToken, &nTokenBytes, &iStartOffset, &iEndOffset, &iPosition) ){ DLCollector p; /* Positions can't be negative; we use -1 as a terminator internally. / if( iPosition<0 ){ pTokenizer->pModule->xClose(pCursor); return SQLITE_ERROR; } ~~p = fts2HashFind(terms, pToken, nTokenBytes);~~ if( p==NULL ){ p = dlcNew(iDocid, DL_DEFAULT); ~~fts2HashInsert(terms, pToken, nTokenBytes, p);~~ } if( iColumn>=0 ){ dlcAddPos(p, iColumn, iPosition, iStartOffset, iEndOffset); } } / TODO(shess) Check return? Should this be able to cause errors at this point? Actually, same question about sqlite3_finalize(), though one could argue that failure there means that the data is ** not durable. ponder / pTokenizer->pModule->xClose(pCursor); return rc; } / Add doclists for all terms in [pValues] to the ~~hash~~ table ~~[terms]~~. / static int insertTerms(fulltext_vtab v, ~~fts2Hash terms,~~ sqlite_int64 iRowid, sqlite3_value pValues){ int i; for(i = 0; i < v->nColumn ; ++i){ char zText = (char)sqlite3_value_text(pValues[i]); ~~int rc = buildTerms(v, ~~terms,~~ iRowid, zText, i);~~ if( rc!=SQLITE_OK ) return rc; } return SQLITE_OK; } ~~/ Add empty doclists for all terms in the given row's content to ~~the hash~~ * ~~tabl~~e [pTerms~~]. /~~~~ ~~static int deleteTerms(fulltext_vtab v, ~~fts2Hash pTerms,~~ sqlite_int64 iRowid){~~ const char pValues; int i, rc; / TODO(shess) Should we allow such tables at all? / if( DL_DEFAULT==DL_DOCIDS ) return SQLITE_ERROR; rc = content_select(v, iRowid, &pValues); if( rc!=SQLITE_OK ) return rc; for(i = 0 ; i < v->nColumn; ++i) { ~~rc = buildTerms(v, ~~pTerms,~~ iRowid, pValues[i], -1);~~ if( rc!=SQLITE_OK ) break; } freeStringArray(v->nColumn, pValues); return SQLITE_OK; } / Insert a row into the %_content table; set piRowid to be the ID of the new row. ~~Fill [pTerms] with new~~ doclists for t~~he %_t~~erm t~~abl~~e~~. /~~ static int index_insert(fulltext_vtab v, sqlite3_value pRequestRowid, ~~sqlite3_value pValues, ~~sqlite_int64 piRowid, fts2Hash pTerms){~~~~ int rc; rc = content_insert(v, pRequestRowid, pValues); / execute an SQL INSERT / if( rc!=SQLITE_OK ) return rc; piRowid = sqlite3_last_insert_rowid(v->db); ~~return insertTerms(v, ~~pTerms,~~ piRowid, pValues);~~ } ~~/ Delete a row from the %_content table; ~~fill [pTerms] with~~ empty doclists * to ~~be writt~~en ~~to the %_t~~erm ~~table. /~~~~ ~~static int index_delete(fulltext_vtab v, sqlite_int64 iRow~~, fts2Hash pTerms~~){~~ ~~~~int~~ rc = deleteTerms(v~~, pTerms~~, iRow);~~ if( rc!=SQLITE_OK ) return rc; return content_delete(v, iRow); / execute an SQL DELETE / } ~~/ Update a row in the %_content table; fil~~l [pTerms] with new~~ doclists ~~for the~~~~ * %_term table. / static int index_update(fulltext_vtab v, sqlite_int64 iRow, ~~sqlite3_value *pValues~~, fts2Hash pTerms~~){~~ /* Generate an empty doclist for each term that previously appeared in this * row. / ~~~~int~~ rc = deleteTerms(v~~, pTerms~~, iRow);~~ if( rc!=SQLITE_OK ) return rc; rc = content_update(v, pValues, iRow); / execute an SQL UPDATE / if( rc!=SQLITE_OK ) return rc; / Now add positions for terms which appear in the updated row. / ~~return insertTerms(v, ~~pTerms,~~ iRow, pValues);~~ } /*****************************************************************/ / InteriorWriter is used to collect terms and block references into interior nodes in %_segments. See commentary at top of file for format. */	\| \| \| > \| > \| > \| > > > > > > > > > \| \| \| \| \| \| > \| \| > > > \| > \| < > > > > \| \| \| > \| > > > \| > \| > > \| \| > > > \| \|	3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804	static int fulltextRowid(sqlite3_vtab_cursor pCursor, sqlite_int64 pRowid){ fulltext_cursor c = (fulltext_cursor ) pCursor; pRowid = sqlite3_column_int64(c->pStmt, 0); return SQLITE_OK; } / Add all terms in [zText] to pendingTerms table. If [iColumn] > 0, we also store positions and offsets in the hash table using that column number. / static int buildTerms(fulltext_vtab v, sqlite_int64 iDocid, const char zText, int iColumn){ sqlite3_tokenizer pTokenizer = v->pTokenizer; sqlite3_tokenizer_cursor pCursor; const char pToken; int nTokenBytes; int iStartOffset, iEndOffset, iPosition; int rc; rc = pTokenizer->pModule->xOpen(pTokenizer, zText, -1, &pCursor); if( rc!=SQLITE_OK ) return rc; pCursor->pTokenizer = pTokenizer; while( SQLITE_OK==pTokenizer->pModule->xNext(pCursor, &pToken, &nTokenBytes, &iStartOffset, &iEndOffset, &iPosition) ){ DLCollector p; int nData; / Size of doclist before our update. / / Positions can't be negative; we use -1 as a terminator internally. / if( iPosition<0 ){ pTokenizer->pModule->xClose(pCursor); return SQLITE_ERROR; } p = fts2HashFind(&v->pendingTerms, pToken, nTokenBytes); if( p==NULL ){ nData = 0; p = dlcNew(iDocid, DL_DEFAULT); fts2HashInsert(&v->pendingTerms, pToken, nTokenBytes, p); / Overhead for our hash table entry, the key, and the value. / v->nPendingData += sizeof(struct fts2HashElem)+sizeof(p)+nTokenBytes; }else{ nData = p->b.nData; if( p->dlw.iPrevDocid!=iDocid ) dlcNext(p, iDocid); } if( iColumn>=0 ){ dlcAddPos(p, iColumn, iPosition, iStartOffset, iEndOffset); } /* Accumulate data added by dlcNew or dlcNext, and dlcAddPos. / v->nPendingData += p->b.nData-nData; } / TODO(shess) Check return? Should this be able to cause errors at this point? Actually, same question about sqlite3_finalize(), though one could argue that failure there means that the data is ** not durable. ponder / pTokenizer->pModule->xClose(pCursor); return rc; } / Add doclists for all terms in [pValues] to pendingTerms table. / static int insertTerms(fulltext_vtab v, sqlite_int64 iRowid, sqlite3_value *pValues){ int i; for(i = 0; i < v->nColumn ; ++i){ char zText = (char)sqlite3_value_text(pValues[i]); int rc = buildTerms(v, iRowid, zText, i); if( rc!=SQLITE_OK ) return rc; } return SQLITE_OK; } / Add empty doclists for all terms in the given row's content to ** pendingTerms. / static int deleteTerms(fulltext_vtab v, sqlite_int64 iRowid){ const char *pValues; int i, rc; / TODO(shess) Should we allow such tables at all? / if( DL_DEFAULT==DL_DOCIDS ) return SQLITE_ERROR; rc = content_select(v, iRowid, &pValues); if( rc!=SQLITE_OK ) return rc; for(i = 0 ; i < v->nColumn; ++i) { rc = buildTerms(v, iRowid, pValues[i], -1); if( rc!=SQLITE_OK ) break; } freeStringArray(v->nColumn, pValues); return SQLITE_OK; } / TODO(shess) Refactor the code to remove this forward decl. / static int initPendingTerms(fulltext_vtab v, sqlite_int64 iDocid); /* Insert a row into the %_content table; set piRowid to be the ID of the * new row. Add doclists for terms to pendingTerms. / static int index_insert(fulltext_vtab v, sqlite3_value pRequestRowid, sqlite3_value pValues, sqlite_int64 piRowid){ int rc; rc = content_insert(v, pRequestRowid, pValues); /* execute an SQL INSERT / if( rc!=SQLITE_OK ) return rc; piRowid = sqlite3_last_insert_rowid(v->db); rc = initPendingTerms(v, piRowid); if( rc!=SQLITE_OK ) return rc; return insertTerms(v, piRowid, pValues); } /* Delete a row from the %_content table; add empty doclists for terms ** to pendingTerms. / static int index_delete(fulltext_vtab v, sqlite_int64 iRow){ int rc = initPendingTerms(v, iRow); if( rc!=SQLITE_OK ) return rc; rc = deleteTerms(v, iRow); if( rc!=SQLITE_OK ) return rc; return content_delete(v, iRow); /* execute an SQL DELETE / } / Update a row in the %_content table; add delete doclists to pendingTerms for old terms not in the new data, add insert doclists to pendingTerms for terms in the new data. / static int index_update(fulltext_vtab v, sqlite_int64 iRow, sqlite3_value *pValues){ int rc = initPendingTerms(v, iRow); if( rc!=SQLITE_OK ) return rc; / Generate an empty doclist for each term that previously appeared in this * row. / rc = deleteTerms(v, iRow); if( rc!=SQLITE_OK ) return rc; rc = content_update(v, pValues, iRow); / execute an SQL UPDATE / if( rc!=SQLITE_OK ) return rc; / Now add positions for terms which appear in the updated row. / return insertTerms(v, iRow, pValues); } /*****************************************************************/ / InteriorWriter is used to collect terms and block references into interior nodes in %_segments. See commentary at top of file for format. */
︙			︙
4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004	** read from pData will overwrite those in out). / static int loadSegmentLeaf(fulltext_vtab v, const char pData, int nData, const char pTerm, int nTerm, DataBuffer out){ LeafReader reader; assert( nData>1 ); assert( *pData=='\0' ); leafReaderInit(pData, nData, &reader); while( !leafReaderAtEnd(&reader) ){ int c = leafReaderTermCmp(&reader, pTerm, nTerm); if( c==0 ){ if( out->nData==0 ){ dataBufferReplace(out,	> > >	5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089	** read from pData will overwrite those in out). / static int loadSegmentLeaf(fulltext_vtab v, const char pData, int nData, const char pTerm, int nTerm, DataBuffer out){ LeafReader reader; assert( nData>1 ); assert( pData=='\0' ); / This code should never be called with buffered updates. */ assert( v->nPendingData<0 ); leafReaderInit(pData, nData, &reader); while( !leafReaderAtEnd(&reader) ){ int c = leafReaderTermCmp(&reader, pTerm, nTerm); if( c==0 ){ if( out->nData==0 ){ dataBufferReplace(out,
︙			︙
5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042	/ static int loadSegment(fulltext_vtab v, const char pData, int nData, const char pTerm, int nTerm, DataBuffer out){ int rc; sqlite3_stmt s = NULL; assert( nData>1 ); /* Process data as an interior node until we reach a leaf. / while( pData!='\0' ){ sqlite_int64 iBlockid; InteriorReader reader; /* Scan the node data until we find a term greater than our term.	> > >	5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130	/ static int loadSegment(fulltext_vtab v, const char pData, int nData, const char pTerm, int nTerm, DataBuffer out){ int rc; sqlite3_stmt s = NULL; assert( nData>1 ); /* This code should never be called with buffered updates. / assert( v->nPendingData<0 ); / Process data as an interior node until we reach a leaf. / while( pData!='\0' ){ sqlite_int64 iBlockid; InteriorReader reader; /* Scan the node data until we find a term greater than our term.
︙			︙
5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104	static int termSelect(fulltext_vtab v, int iColumn, const char pTerm, int nTerm, DocListType iType, DataBuffer out){ DataBuffer doclist; sqlite3_stmt s; int rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s); if( rc!=SQLITE_OK ) return rc; dataBufferInit(&doclist, 0); /* Traverse the segments from oldest to newest so that newer doclist ** elements for given docids overwrite older elements. */ while( (rc=sql_step_statement(v, SEGDIR_SELECT_ALL_STMT, &s))==SQLITE_ROW ){	> > >	5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195	static int termSelect(fulltext_vtab v, int iColumn, const char pTerm, int nTerm, DocListType iType, DataBuffer out){ DataBuffer doclist; sqlite3_stmt s; int rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s); if( rc!=SQLITE_OK ) return rc; /* This code should never be called with buffered updates. / assert( v->nPendingData<0 ); dataBufferInit(&doclist, 0); / Traverse the segments from oldest to newest so that newer doclist ** elements for given docids overwrite older elements. */ while( (rc=sql_step_statement(v, SEGDIR_SELECT_ALL_STMT, &s))==SQLITE_ROW ){
︙			︙
5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 ~~5204~~ 5205 ~~5206~~ 5207 5208 ~~5209 5210 5211~~ 5212 ~~5213~~ 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 ~~5227~~ 5228 5229 5230 5231 5232 5233 5234 5235 ~~5236~~ 5237 5238 ~~5239~~ ~~5240 5241~~ ~~5242~~ ~~5243~~ ~~5244 5245~~ 5246 ~~5247~~ 5248 5249 5250 5251 5252 5253 5254 5255	err: dataBufferDestroy(&dl); free(pData); leafWriterDestroy(&writer); return rc; } /* This function implements the xUpdate callback; it's the top-level entry * point for inserting, deleting or updating a row in a full-text table. / static int fulltextUpdate(sqlite3_vtab pVtab, int nArg, sqlite3_value *ppArg, sqlite_int64 pRowid){ fulltext_vtab v = (fulltext_vtab ) pVtab; ~~fts2Hash terms; /* maps term string -> PosList /~~ int rc; ~~fts2HashElem e;~~ TRACE(("FTS2 Update %p\n", pVtab)); ~~fts2HashInit(&terms, FTS2_HASH_STRING, 1);~~ if( nArg<2 ){ ~~rc = index_delete(v, sqlite3_value_int64(ppArg[0])~~, &terms~~);~~ } else if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){ /* An update: * ppArg[0] = old rowid * ppArg[1] = new rowid * ppArg[2..2+v->nColumn-1] = values * ppArg[2+v->nColumn] = value for magic column (we ignore this) / sqlite_int64 rowid = sqlite3_value_int64(ppArg[0]); if( sqlite3_value_type(ppArg[1]) != SQLITE_INTEGER \|\| sqlite3_value_int64(ppArg[1]) != rowid ){ rc = SQLITE_ERROR; / we don't allow changing the rowid / } else { assert( nArg==2+v->nColumn+1); ~~rc = index_update(v, rowid, &ppArg[2]~~, &terms~~);~~ } } else { / An insert: * ppArg[1] = requested rowid * ppArg[2..2+v->nColumn-1] = values * ppArg[2+v->nColumn] = value for magic column (we ignore this) / assert( nArg==2+v->nColumn+1); ~~rc = index_insert(v, ppArg[1], &ppArg[2], pRowid~~, &terms~~);~~ } ~~if( rc==SQLITE_OK ) rc = writeZeroSegment(v, &terms);~~ ~~/ clean up /~~ ~~for(e=fts2HashFirst(&terms); e; e=fts2HashNext(e)){~~ ~~~~dlcDelete(fts2H~~ashData~~(e)~~);~~ ~~} ~~fts2HashClear(&terms);~~~~ ~~return rc;~~ } / ** Implementation of the snippet() function for FTS2 / static void snippetFunc( sqlite3_context pContext, int argc,	> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > < < \| < < \| \| \| > > \| > > > \| \| > > > \| > > > \| > \| \| > > > > > \| > > > > >	5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411	err: dataBufferDestroy(&dl); free(pData); leafWriterDestroy(&writer); return rc; } /* If pendingTerms has data, free it. / static int clearPendingTerms(fulltext_vtab v){ if( v->nPendingData>=0 ){ fts2HashElem e; for(e=fts2HashFirst(&v->pendingTerms); e; e=fts2HashNext(e)){ dlcDelete(fts2HashData(e)); } fts2HashClear(&v->pendingTerms); v->nPendingData = -1; } return SQLITE_OK; } / If pendingTerms has data, flush it to a level-zero segment, and ** free it. / static int flushPendingTerms(fulltext_vtab v){ if( v->nPendingData>=0 ){ int rc = writeZeroSegment(v, &v->pendingTerms); clearPendingTerms(v); return rc; } return SQLITE_OK; } /* If pendingTerms is "too big", or docid is out of order, flush it. ** Regardless, be certain that pendingTerms is initialized for use. / static int initPendingTerms(fulltext_vtab v, sqlite_int64 iDocid){ /* TODO(shess) Explore whether partially flushing the buffer on forced-flush would provide better performance. I suspect that if we ordered the doclists by size and flushed the largest until the buffer was half empty, that would let the less frequent terms generate longer doclists. / if( iDocid<=v->iPrevDocid \|\| v->nPendingData>kPendingThreshold ){ int rc = flushPendingTerms(v); if( rc!=SQLITE_OK ) return rc; } if( v->nPendingData<0 ){ fts2HashInit(&v->pendingTerms, FTS2_HASH_STRING, 1); v->nPendingData = 0; } v->iPrevDocid = iDocid; return SQLITE_OK; } / This function implements the xUpdate callback; it's the top-level entry * point for inserting, deleting or updating a row in a full-text table. / static int fulltextUpdate(sqlite3_vtab pVtab, int nArg, sqlite3_value *ppArg, sqlite_int64 pRowid){ fulltext_vtab v = (fulltext_vtab ) pVtab; int rc; TRACE(("FTS2 Update %p\n", pVtab)); if( nArg<2 ){ rc = index_delete(v, sqlite3_value_int64(ppArg[0])); } else if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){ /* An update: * ppArg[0] = old rowid * ppArg[1] = new rowid * ppArg[2..2+v->nColumn-1] = values * ppArg[2+v->nColumn] = value for magic column (we ignore this) / sqlite_int64 rowid = sqlite3_value_int64(ppArg[0]); if( sqlite3_value_type(ppArg[1]) != SQLITE_INTEGER \|\| sqlite3_value_int64(ppArg[1]) != rowid ){ rc = SQLITE_ERROR; / we don't allow changing the rowid / } else { assert( nArg==2+v->nColumn+1); rc = index_update(v, rowid, &ppArg[2]); } } else { / An insert: * ppArg[1] = requested rowid * ppArg[2..2+v->nColumn-1] = values * ppArg[2+v->nColumn] = value for magic column (we ignore this) / assert( nArg==2+v->nColumn+1); rc = index_insert(v, ppArg[1], &ppArg[2], pRowid); } return rc; } static int fulltextSync(sqlite3_vtab pVtab){ TRACE(("FTS2 xSync()\n")); return flushPendingTerms((fulltext_vtab )pVtab); } static int fulltextBegin(sqlite3_vtab pVtab){ fulltext_vtab v = (fulltext_vtab ) pVtab; TRACE(("FTS2 xBegin()\n")); /* Any buffered updates should have been cleared by the previous ** transaction. / assert( v->nPendingData<0 ); return clearPendingTerms(v); } static int fulltextCommit(sqlite3_vtab pVtab){ fulltext_vtab v = (fulltext_vtab ) pVtab; TRACE(("FTS2 xCommit()\n")); /* Buffered updates should have been cleared by fulltextSync(). / assert( v->nPendingData<0 ); return clearPendingTerms(v); } static int fulltextRollback(sqlite3_vtab pVtab){ TRACE(("FTS2 xRollback()\n")); return clearPendingTerms((fulltext_vtab )pVtab); } / ** Implementation of the snippet() function for FTS2 / static void snippetFunc( sqlite3_context pContext, int argc,
︙			︙
5336 5337 5338 5339 5340 5341 5342 ~~5343 5344 5345 5346~~ 5347 5348 5349 5350 5351 5352 5353	/* xClose / fulltextClose, / xFilter / fulltextFilter, / xNext / fulltextNext, / xEof / fulltextEof, / xColumn / fulltextColumn, / xRowid / fulltextRowid, / xUpdate / fulltextUpdate, ~~/ xBegin / 0, / xSync / 0, / xCommit / 0, / xRollback / 0,~~ / xFindFunction / fulltextFindFunction, }; int sqlite3Fts2Init(sqlite3 db){ sqlite3_overload_function(db, "snippet", -1); sqlite3_overload_function(db, "offsets", -1); return sqlite3_create_module(db, "fts2", &fulltextModule, 0);	\| \| \| \|	5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509	/* xClose / fulltextClose, / xFilter / fulltextFilter, / xNext / fulltextNext, / xEof / fulltextEof, / xColumn / fulltextColumn, / xRowid / fulltextRowid, / xUpdate / fulltextUpdate, / xBegin / fulltextBegin, / xSync / fulltextSync, / xCommit / fulltextCommit, / xRollback / fulltextRollback, / xFindFunction / fulltextFindFunction, }; int sqlite3Fts2Init(sqlite3 db){ sqlite3_overload_function(db, "snippet", -1); sqlite3_overload_function(db, "offsets", -1); return sqlite3_create_module(db, "fts2", &fulltextModule, 0);
︙			︙