Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Buffer updates per-transaction rather than per-update. If lots of updates happen within a single transaction, there was a lot of wasted encode/decode overhead due to segment merges. This code buffers updates in memory and writes out larger level-0 segments. It only works when documents are presented in ascending order by docid. Comparing a test set running 100 documents per transaction, the total runtime is cut almost in half. (CVS 3751) |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
0229cba69698ab4b44f8583ef50a87c4 |
User & Date: | shess 2007-03-29 18:41:04.000 |
Context
2007-03-29
| ||
18:46 | Add the sqlite3_clear_bindings() API to the loadable extension interface. Ticket #2135. (CVS 3752) (check-in: 3111b43ec3 user: drh tags: trunk) | |
18:41 | Buffer updates per-transaction rather than per-update. If lots of updates happen within a single transaction, there was a lot of wasted encode/decode overhead due to segment merges. This code buffers updates in memory and writes out larger level-0 segments. It only works when documents are presented in ascending order by docid. Comparing a test set running 100 documents per transaction, the total runtime is cut almost in half. (CVS 3751) (check-in: 0229cba696 user: shess tags: trunk) | |
18:19 | Change the name of PAGER_SECTOR_SIZE to SQLITE_DEFAULT_SECTOR_SIZE. Make the new OS-layer interface routine for finding sector size optional. (CVS 3750) (check-in: 0fb9af1d6e user: drh tags: trunk) | |
Changes
Changes to ext/fts2/fts2.c.
︙ | ︙ | |||
987 988 989 990 991 992 993 994 995 996 997 998 999 1000 | /* DLCollector wraps PLWriter and DLWriter to provide a ** dynamically-allocated doclist area to use during tokenization. ** ** dlcNew - malloc up and initialize a collector. ** dlcDelete - destroy a collector and all contained items. ** dlcAddPos - append position and offset information. ** dlcAddDoclist - add the collected doclist to the given buffer. */ typedef struct DLCollector { DataBuffer b; DLWriter dlw; PLWriter plw; } DLCollector; | > | 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 | /* DLCollector wraps PLWriter and DLWriter to provide a ** dynamically-allocated doclist area to use during tokenization. ** ** dlcNew - malloc up and initialize a collector. ** dlcDelete - destroy a collector and all contained items. ** dlcAddPos - append position and offset information. ** dlcAddDoclist - add the collected doclist to the given buffer. ** dlcNext - terminate the current document and open another. */ typedef struct DLCollector { DataBuffer b; DLWriter dlw; PLWriter plw; } DLCollector; |
︙ | ︙ | |||
1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 | if( pCollector->dlw.iType>DL_DOCIDS ){ char c[VARINT_MAX]; int n = putVarint(c, POS_END); dataBufferAppend2(b, pCollector->b.pData, pCollector->b.nData, c, n); }else{ dataBufferAppend(b, pCollector->b.pData, pCollector->b.nData); } } static void dlcAddPos(DLCollector *pCollector, int iColumn, int iPos, int iStartOffset, int iEndOffset){ plwAdd(&pCollector->plw, iColumn, iPos, iStartOffset, iEndOffset); } static DLCollector *dlcNew(sqlite_int64 iDocid, DocListType iType){ | > > > > > | 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 | if( pCollector->dlw.iType>DL_DOCIDS ){ char c[VARINT_MAX]; int n = putVarint(c, POS_END); dataBufferAppend2(b, pCollector->b.pData, pCollector->b.nData, c, n); }else{ dataBufferAppend(b, pCollector->b.pData, pCollector->b.nData); } } static void dlcNext(DLCollector *pCollector, sqlite_int64 iDocid){ plwTerminate(&pCollector->plw); plwDestroy(&pCollector->plw); plwInit(&pCollector->plw, &pCollector->dlw, iDocid); } static void dlcAddPos(DLCollector *pCollector, int iColumn, int iPos, int iStartOffset, int iEndOffset){ plwAdd(&pCollector->plw, iColumn, iPos, iStartOffset, iEndOffset); } static DLCollector *dlcNew(sqlite_int64 iDocid, DocListType iType){ |
︙ | ︙ | |||
1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 | /* Precompiled statements used for segment merges. We run a ** separate select across the leaf level of each tree being merged. */ sqlite3_stmt *pLeafSelectStmts[MERGE_COUNT]; /* The statement used to prepare pLeafSelectStmts. */ #define LEAF_SELECT \ "select block from %_segments where rowid between ? and ? order by rowid" }; /* ** When the core wants to do a query, it create a cursor using a ** call to xOpen. This structure is an instance of a cursor. It ** is destroyed by xClose. */ | > > > > > > > > > > > > > > > | 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 | /* Precompiled statements used for segment merges. We run a ** separate select across the leaf level of each tree being merged. */ sqlite3_stmt *pLeafSelectStmts[MERGE_COUNT]; /* The statement used to prepare pLeafSelectStmts. */ #define LEAF_SELECT \ "select block from %_segments where rowid between ? and ? order by rowid" /* These buffer pending index updates during transactions. ** nPendingData estimates the memory size of the pending data. It ** doesn't include the hash-bucket overhead, nor any malloc ** overhead. When nPendingData exceeds kPendingThreshold, the ** buffer is flushed even before the transaction closes. ** pendingTerms stores the data, and is only valid when nPendingData ** is >=0 (nPendingData<0 means pendingTerms has not been ** initialized). iPrevDocid is the last docid written, used to make ** certain we're inserting in sorted order. */ int nPendingData; #define kPendingThreshold (1*1024*1024) sqlite_int64 iPrevDocid; fts2Hash pendingTerms; }; /* ** When the core wants to do a query, it create a cursor using a ** call to xOpen. This structure is an instance of a cursor. It ** is destroyed by xClose. */ |
︙ | ︙ | |||
2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 | rc = sqlite3_bind_int64(s, 1, iLevel); if( rc!=SQLITE_OK ) return rc; return sql_single_step_statement(v, SEGDIR_DELETE_STMT, &s); } /* ** Free the memory used to contain a fulltext_vtab structure. */ static void fulltext_vtab_destroy(fulltext_vtab *v){ int iStmt, i; TRACE(("FTS2 Destroy %p\n", v)); | > > > > > > > > | 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 | rc = sqlite3_bind_int64(s, 1, iLevel); if( rc!=SQLITE_OK ) return rc; return sql_single_step_statement(v, SEGDIR_DELETE_STMT, &s); } /* TODO(shess) clearPendingTerms() is far down the file because ** writeZeroSegment() is far down the file because LeafWriter is far ** down the file. Consider refactoring the code to move the non-vtab ** code above the vtab code so that we don't need this forward ** reference. */ static int clearPendingTerms(fulltext_vtab *v); /* ** Free the memory used to contain a fulltext_vtab structure. */ static void fulltext_vtab_destroy(fulltext_vtab *v){ int iStmt, i; TRACE(("FTS2 Destroy %p\n", v)); |
︙ | ︙ | |||
2154 2155 2156 2157 2158 2159 2160 | } } if( v->pTokenizer!=NULL ){ v->pTokenizer->pModule->xDestroy(v->pTokenizer); v->pTokenizer = NULL; } | | > > | 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 | } } if( v->pTokenizer!=NULL ){ v->pTokenizer->pModule->xDestroy(v->pTokenizer); v->pTokenizer = NULL; } clearPendingTerms(v); free(v->azColumn); for(i = 0; i < v->nColumn; ++i) { sqlite3_free(v->azContentColumn[i]); } free(v->azContentColumn); free(v); } |
︙ | ︙ | |||
2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 | schema = fulltextSchema(v->nColumn, (const char*const*)v->azColumn, spec->zName); rc = sqlite3_declare_vtab(db, schema); sqlite3_free(schema); if( rc!=SQLITE_OK ) goto err; memset(v->pFulltextStatements, 0, sizeof(v->pFulltextStatements)); *ppVTab = &v->base; TRACE(("FTS2 Connect %p\n", v)); return rc; err: | > > > | 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 | schema = fulltextSchema(v->nColumn, (const char*const*)v->azColumn, spec->zName); rc = sqlite3_declare_vtab(db, schema); sqlite3_free(schema); if( rc!=SQLITE_OK ) goto err; memset(v->pFulltextStatements, 0, sizeof(v->pFulltextStatements)); /* Indicate that the buffer is not live. */ v->nPendingData = -1; *ppVTab = &v->base; TRACE(("FTS2 Connect %p\n", v)); return rc; err: |
︙ | ︙ | |||
3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 | ){ DataBuffer left, right, new; int i, rc; /* No phrase search if no position info. */ assert( pQTerm->nPhrase==0 || DL_DEFAULT!=DL_DOCIDS ); dataBufferInit(&left, 0); rc = termSelect(v, iColumn, pQTerm->pTerm, pQTerm->nTerm, 0<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS, &left); if( rc ) return rc; for(i=1; i<=pQTerm->nPhrase && left.nData>0; i++){ dataBufferInit(&right, 0); rc = termSelect(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm, | > > > | 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 | ){ DataBuffer left, right, new; int i, rc; /* No phrase search if no position info. */ assert( pQTerm->nPhrase==0 || DL_DEFAULT!=DL_DOCIDS ); /* This code should never be called with buffered updates. */ assert( v->nPendingData<0 ); dataBufferInit(&left, 0); rc = termSelect(v, iColumn, pQTerm->pTerm, pQTerm->nTerm, 0<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS, &left); if( rc ) return rc; for(i=1; i<=pQTerm->nPhrase && left.nData>0; i++){ dataBufferInit(&right, 0); rc = termSelect(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm, |
︙ | ︙ | |||
3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 | if( inPhrase ){ /* unmatched quote */ queryClear(pQuery); return SQLITE_ERROR; } return SQLITE_OK; } /* Perform a full-text query using the search expression in ** zInput[0..nInput-1]. Return a list of matching documents ** in pResult. ** ** Queries must match column iColumn. Or if iColumn>=nColumn ** they are allowed to match against any column. */ static int fulltextQuery( fulltext_vtab *v, /* The full text index */ int iColumn, /* Match against this column by default */ const char *zInput, /* The query string */ int nInput, /* Number of bytes in zInput[] */ DataBuffer *pResult, /* Write the result doclist here */ Query *pQuery /* Put parsed query string here */ ){ int i, iNext, rc; DataBuffer left, right, or, new; int nNot = 0; QueryTerm *aTerm; /* TODO(shess) I think that the queryClear() calls below are not ** necessary, because fulltextClose() already clears the query. */ rc = parseQuery(v, zInput, nInput, iColumn, pQuery); if( rc!=SQLITE_OK ) return rc; | > > > > > > > > > > > > > > > | 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 | if( inPhrase ){ /* unmatched quote */ queryClear(pQuery); return SQLITE_ERROR; } return SQLITE_OK; } /* TODO(shess) Refactor the code to remove this forward decl. */ static int flushPendingTerms(fulltext_vtab *v); /* Perform a full-text query using the search expression in ** zInput[0..nInput-1]. Return a list of matching documents ** in pResult. ** ** Queries must match column iColumn. Or if iColumn>=nColumn ** they are allowed to match against any column. */ static int fulltextQuery( fulltext_vtab *v, /* The full text index */ int iColumn, /* Match against this column by default */ const char *zInput, /* The query string */ int nInput, /* Number of bytes in zInput[] */ DataBuffer *pResult, /* Write the result doclist here */ Query *pQuery /* Put parsed query string here */ ){ int i, iNext, rc; DataBuffer left, right, or, new; int nNot = 0; QueryTerm *aTerm; /* TODO(shess) Instead of flushing pendingTerms, we could query for ** the relevant term and merge the doclist into what we receive from ** the database. Wait and see if this is a common issue, first. ** ** A good reason not to flush is to not generate update-related ** error codes from here. */ /* Flush any buffered updates before executing the query. */ rc = flushPendingTerms(v); if( rc!=SQLITE_OK ) return rc; /* TODO(shess) I think that the queryClear() calls below are not ** necessary, because fulltextClose() already clears the query. */ rc = parseQuery(v, zInput, nInput, iColumn, pQuery); if( rc!=SQLITE_OK ) return rc; |
︙ | ︙ | |||
3594 3595 3596 3597 3598 3599 3600 | static int fulltextRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ fulltext_cursor *c = (fulltext_cursor *) pCursor; *pRowid = sqlite3_column_int64(c->pStmt, 0); return SQLITE_OK; } | | | | > | > | > | > > > > > > > > > | | | | | | > | | > > > | > | < > > > > | | | > | > > > | > | > > | | > > > | | | 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 | static int fulltextRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ fulltext_cursor *c = (fulltext_cursor *) pCursor; *pRowid = sqlite3_column_int64(c->pStmt, 0); return SQLITE_OK; } /* Add all terms in [zText] to pendingTerms table. If [iColumn] > 0, ** we also store positions and offsets in the hash table using that ** column number. */ static int buildTerms(fulltext_vtab *v, sqlite_int64 iDocid, const char *zText, int iColumn){ sqlite3_tokenizer *pTokenizer = v->pTokenizer; sqlite3_tokenizer_cursor *pCursor; const char *pToken; int nTokenBytes; int iStartOffset, iEndOffset, iPosition; int rc; rc = pTokenizer->pModule->xOpen(pTokenizer, zText, -1, &pCursor); if( rc!=SQLITE_OK ) return rc; pCursor->pTokenizer = pTokenizer; while( SQLITE_OK==pTokenizer->pModule->xNext(pCursor, &pToken, &nTokenBytes, &iStartOffset, &iEndOffset, &iPosition) ){ DLCollector *p; int nData; /* Size of doclist before our update. */ /* Positions can't be negative; we use -1 as a terminator internally. */ if( iPosition<0 ){ pTokenizer->pModule->xClose(pCursor); return SQLITE_ERROR; } p = fts2HashFind(&v->pendingTerms, pToken, nTokenBytes); if( p==NULL ){ nData = 0; p = dlcNew(iDocid, DL_DEFAULT); fts2HashInsert(&v->pendingTerms, pToken, nTokenBytes, p); /* Overhead for our hash table entry, the key, and the value. */ v->nPendingData += sizeof(struct fts2HashElem)+sizeof(*p)+nTokenBytes; }else{ nData = p->b.nData; if( p->dlw.iPrevDocid!=iDocid ) dlcNext(p, iDocid); } if( iColumn>=0 ){ dlcAddPos(p, iColumn, iPosition, iStartOffset, iEndOffset); } /* Accumulate data added by dlcNew or dlcNext, and dlcAddPos. */ v->nPendingData += p->b.nData-nData; } /* TODO(shess) Check return? Should this be able to cause errors at ** this point? Actually, same question about sqlite3_finalize(), ** though one could argue that failure there means that the data is ** not durable. *ponder* */ pTokenizer->pModule->xClose(pCursor); return rc; } /* Add doclists for all terms in [pValues] to pendingTerms table. */ static int insertTerms(fulltext_vtab *v, sqlite_int64 iRowid, sqlite3_value **pValues){ int i; for(i = 0; i < v->nColumn ; ++i){ char *zText = (char*)sqlite3_value_text(pValues[i]); int rc = buildTerms(v, iRowid, zText, i); if( rc!=SQLITE_OK ) return rc; } return SQLITE_OK; } /* Add empty doclists for all terms in the given row's content to ** pendingTerms. */ static int deleteTerms(fulltext_vtab *v, sqlite_int64 iRowid){ const char **pValues; int i, rc; /* TODO(shess) Should we allow such tables at all? */ if( DL_DEFAULT==DL_DOCIDS ) return SQLITE_ERROR; rc = content_select(v, iRowid, &pValues); if( rc!=SQLITE_OK ) return rc; for(i = 0 ; i < v->nColumn; ++i) { rc = buildTerms(v, iRowid, pValues[i], -1); if( rc!=SQLITE_OK ) break; } freeStringArray(v->nColumn, pValues); return SQLITE_OK; } /* TODO(shess) Refactor the code to remove this forward decl. */ static int initPendingTerms(fulltext_vtab *v, sqlite_int64 iDocid); /* Insert a row into the %_content table; set *piRowid to be the ID of the ** new row. Add doclists for terms to pendingTerms. */ static int index_insert(fulltext_vtab *v, sqlite3_value *pRequestRowid, sqlite3_value **pValues, sqlite_int64 *piRowid){ int rc; rc = content_insert(v, pRequestRowid, pValues); /* execute an SQL INSERT */ if( rc!=SQLITE_OK ) return rc; *piRowid = sqlite3_last_insert_rowid(v->db); rc = initPendingTerms(v, *piRowid); if( rc!=SQLITE_OK ) return rc; return insertTerms(v, *piRowid, pValues); } /* Delete a row from the %_content table; add empty doclists for terms ** to pendingTerms. */ static int index_delete(fulltext_vtab *v, sqlite_int64 iRow){ int rc = initPendingTerms(v, iRow); if( rc!=SQLITE_OK ) return rc; rc = deleteTerms(v, iRow); if( rc!=SQLITE_OK ) return rc; return content_delete(v, iRow); /* execute an SQL DELETE */ } /* Update a row in the %_content table; add delete doclists to ** pendingTerms for old terms not in the new data, add insert doclists ** to pendingTerms for terms in the new data. */ static int index_update(fulltext_vtab *v, sqlite_int64 iRow, sqlite3_value **pValues){ int rc = initPendingTerms(v, iRow); if( rc!=SQLITE_OK ) return rc; /* Generate an empty doclist for each term that previously appeared in this * row. */ rc = deleteTerms(v, iRow); if( rc!=SQLITE_OK ) return rc; rc = content_update(v, pValues, iRow); /* execute an SQL UPDATE */ if( rc!=SQLITE_OK ) return rc; /* Now add positions for terms which appear in the updated row. */ return insertTerms(v, iRow, pValues); } /*******************************************************************/ /* InteriorWriter is used to collect terms and block references into ** interior nodes in %_segments. See commentary at top of file for ** format. */ |
︙ | ︙ | |||
4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 | ** read from pData will overwrite those in *out). */ static int loadSegmentLeaf(fulltext_vtab *v, const char *pData, int nData, const char *pTerm, int nTerm, DataBuffer *out){ LeafReader reader; assert( nData>1 ); assert( *pData=='\0' ); leafReaderInit(pData, nData, &reader); while( !leafReaderAtEnd(&reader) ){ int c = leafReaderTermCmp(&reader, pTerm, nTerm); if( c==0 ){ if( out->nData==0 ){ dataBufferReplace(out, | > > > | 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 | ** read from pData will overwrite those in *out). */ static int loadSegmentLeaf(fulltext_vtab *v, const char *pData, int nData, const char *pTerm, int nTerm, DataBuffer *out){ LeafReader reader; assert( nData>1 ); assert( *pData=='\0' ); /* This code should never be called with buffered updates. */ assert( v->nPendingData<0 ); leafReaderInit(pData, nData, &reader); while( !leafReaderAtEnd(&reader) ){ int c = leafReaderTermCmp(&reader, pTerm, nTerm); if( c==0 ){ if( out->nData==0 ){ dataBufferReplace(out, |
︙ | ︙ | |||
5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 | */ static int loadSegment(fulltext_vtab *v, const char *pData, int nData, const char *pTerm, int nTerm, DataBuffer *out){ int rc; sqlite3_stmt *s = NULL; assert( nData>1 ); /* Process data as an interior node until we reach a leaf. */ while( *pData!='\0' ){ sqlite_int64 iBlockid; InteriorReader reader; /* Scan the node data until we find a term greater than our term. | > > > | 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 | */ static int loadSegment(fulltext_vtab *v, const char *pData, int nData, const char *pTerm, int nTerm, DataBuffer *out){ int rc; sqlite3_stmt *s = NULL; assert( nData>1 ); /* This code should never be called with buffered updates. */ assert( v->nPendingData<0 ); /* Process data as an interior node until we reach a leaf. */ while( *pData!='\0' ){ sqlite_int64 iBlockid; InteriorReader reader; /* Scan the node data until we find a term greater than our term. |
︙ | ︙ | |||
5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 | static int termSelect(fulltext_vtab *v, int iColumn, const char *pTerm, int nTerm, DocListType iType, DataBuffer *out){ DataBuffer doclist; sqlite3_stmt *s; int rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s); if( rc!=SQLITE_OK ) return rc; dataBufferInit(&doclist, 0); /* Traverse the segments from oldest to newest so that newer doclist ** elements for given docids overwrite older elements. */ while( (rc=sql_step_statement(v, SEGDIR_SELECT_ALL_STMT, &s))==SQLITE_ROW ){ | > > > | 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 | static int termSelect(fulltext_vtab *v, int iColumn, const char *pTerm, int nTerm, DocListType iType, DataBuffer *out){ DataBuffer doclist; sqlite3_stmt *s; int rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s); if( rc!=SQLITE_OK ) return rc; /* This code should never be called with buffered updates. */ assert( v->nPendingData<0 ); dataBufferInit(&doclist, 0); /* Traverse the segments from oldest to newest so that newer doclist ** elements for given docids overwrite older elements. */ while( (rc=sql_step_statement(v, SEGDIR_SELECT_ALL_STMT, &s))==SQLITE_ROW ){ |
︙ | ︙ | |||
5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 | err: dataBufferDestroy(&dl); free(pData); leafWriterDestroy(&writer); return rc; } /* This function implements the xUpdate callback; it's the top-level entry * point for inserting, deleting or updating a row in a full-text table. */ static int fulltextUpdate(sqlite3_vtab *pVtab, int nArg, sqlite3_value **ppArg, sqlite_int64 *pRowid){ fulltext_vtab *v = (fulltext_vtab *) pVtab; | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > < < | < < | | | > > | > > > | | > > > | > > > | > | | > > > > > | > > > > > | 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 | err: dataBufferDestroy(&dl); free(pData); leafWriterDestroy(&writer); return rc; } /* If pendingTerms has data, free it. */ static int clearPendingTerms(fulltext_vtab *v){ if( v->nPendingData>=0 ){ fts2HashElem *e; for(e=fts2HashFirst(&v->pendingTerms); e; e=fts2HashNext(e)){ dlcDelete(fts2HashData(e)); } fts2HashClear(&v->pendingTerms); v->nPendingData = -1; } return SQLITE_OK; } /* If pendingTerms has data, flush it to a level-zero segment, and ** free it. */ static int flushPendingTerms(fulltext_vtab *v){ if( v->nPendingData>=0 ){ int rc = writeZeroSegment(v, &v->pendingTerms); clearPendingTerms(v); return rc; } return SQLITE_OK; } /* If pendingTerms is "too big", or docid is out of order, flush it. ** Regardless, be certain that pendingTerms is initialized for use. */ static int initPendingTerms(fulltext_vtab *v, sqlite_int64 iDocid){ /* TODO(shess) Explore whether partially flushing the buffer on ** forced-flush would provide better performance. I suspect that if ** we ordered the doclists by size and flushed the largest until the ** buffer was half empty, that would let the less frequent terms ** generate longer doclists. */ if( iDocid<=v->iPrevDocid || v->nPendingData>kPendingThreshold ){ int rc = flushPendingTerms(v); if( rc!=SQLITE_OK ) return rc; } if( v->nPendingData<0 ){ fts2HashInit(&v->pendingTerms, FTS2_HASH_STRING, 1); v->nPendingData = 0; } v->iPrevDocid = iDocid; return SQLITE_OK; } /* This function implements the xUpdate callback; it's the top-level entry * point for inserting, deleting or updating a row in a full-text table. */ static int fulltextUpdate(sqlite3_vtab *pVtab, int nArg, sqlite3_value **ppArg, sqlite_int64 *pRowid){ fulltext_vtab *v = (fulltext_vtab *) pVtab; int rc; TRACE(("FTS2 Update %p\n", pVtab)); if( nArg<2 ){ rc = index_delete(v, sqlite3_value_int64(ppArg[0])); } else if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){ /* An update: * ppArg[0] = old rowid * ppArg[1] = new rowid * ppArg[2..2+v->nColumn-1] = values * ppArg[2+v->nColumn] = value for magic column (we ignore this) */ sqlite_int64 rowid = sqlite3_value_int64(ppArg[0]); if( sqlite3_value_type(ppArg[1]) != SQLITE_INTEGER || sqlite3_value_int64(ppArg[1]) != rowid ){ rc = SQLITE_ERROR; /* we don't allow changing the rowid */ } else { assert( nArg==2+v->nColumn+1); rc = index_update(v, rowid, &ppArg[2]); } } else { /* An insert: * ppArg[1] = requested rowid * ppArg[2..2+v->nColumn-1] = values * ppArg[2+v->nColumn] = value for magic column (we ignore this) */ assert( nArg==2+v->nColumn+1); rc = index_insert(v, ppArg[1], &ppArg[2], pRowid); } return rc; } static int fulltextSync(sqlite3_vtab *pVtab){ TRACE(("FTS2 xSync()\n")); return flushPendingTerms((fulltext_vtab *)pVtab); } static int fulltextBegin(sqlite3_vtab *pVtab){ fulltext_vtab *v = (fulltext_vtab *) pVtab; TRACE(("FTS2 xBegin()\n")); /* Any buffered updates should have been cleared by the previous ** transaction. */ assert( v->nPendingData<0 ); return clearPendingTerms(v); } static int fulltextCommit(sqlite3_vtab *pVtab){ fulltext_vtab *v = (fulltext_vtab *) pVtab; TRACE(("FTS2 xCommit()\n")); /* Buffered updates should have been cleared by fulltextSync(). */ assert( v->nPendingData<0 ); return clearPendingTerms(v); } static int fulltextRollback(sqlite3_vtab *pVtab){ TRACE(("FTS2 xRollback()\n")); return clearPendingTerms((fulltext_vtab *)pVtab); } /* ** Implementation of the snippet() function for FTS2 */ static void snippetFunc( sqlite3_context *pContext, int argc, |
︙ | ︙ | |||
5336 5337 5338 5339 5340 5341 5342 | /* xClose */ fulltextClose, /* xFilter */ fulltextFilter, /* xNext */ fulltextNext, /* xEof */ fulltextEof, /* xColumn */ fulltextColumn, /* xRowid */ fulltextRowid, /* xUpdate */ fulltextUpdate, | | | | | | 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 | /* xClose */ fulltextClose, /* xFilter */ fulltextFilter, /* xNext */ fulltextNext, /* xEof */ fulltextEof, /* xColumn */ fulltextColumn, /* xRowid */ fulltextRowid, /* xUpdate */ fulltextUpdate, /* xBegin */ fulltextBegin, /* xSync */ fulltextSync, /* xCommit */ fulltextCommit, /* xRollback */ fulltextRollback, /* xFindFunction */ fulltextFindFunction, }; int sqlite3Fts2Init(sqlite3 *db){ sqlite3_overload_function(db, "snippet", -1); sqlite3_overload_function(db, "offsets", -1); return sqlite3_create_module(db, "fts2", &fulltextModule, 0); |
︙ | ︙ |
Added test/fts2k.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | # 2007 March 9 # # The author disclaims copyright to this source code. # #************************************************************************* # This file implements regression tests for SQLite library. These # make sure that inserted documents are visible to selects within the # transaction. # # $Id: fts2k.test,v 1.1 2007/03/29 18:41:05 shess Exp $ # set testdir [file dirname $argv0] source $testdir/tester.tcl # If SQLITE_ENABLE_FTS2 is defined, omit this file. ifcapable !fts2 { finish_test return } db eval { CREATE VIRTUAL TABLE t1 USING fts2(content); INSERT INTO t1 (rowid, content) VALUES(1, "hello world"); INSERT INTO t1 (rowid, content) VALUES(2, "hello there"); INSERT INTO t1 (rowid, content) VALUES(3, "cruel world"); } # Test that possibly-buffered inserts went through after commit. do_test fts2k-1.1 { execsql { BEGIN TRANSACTION; INSERT INTO t1 (rowid, content) VALUES(4, "false world"); INSERT INTO t1 (rowid, content) VALUES(5, "false door"); COMMIT TRANSACTION; SELECT rowid FROM t1 WHERE t1 MATCH 'world'; } } {1 3 4} # Test that buffered inserts are seen by selects in the same # transaction. do_test fts2k-1.2 { execsql { BEGIN TRANSACTION; INSERT INTO t1 (rowid, content) VALUES(6, "another world"); INSERT INTO t1 (rowid, content) VALUES(7, "another test"); SELECT rowid FROM t1 WHERE t1 MATCH 'world'; COMMIT TRANSACTION; } } {1 3 4 6} # Test that buffered inserts are seen within a transaction. This is # really the same test as 1.2. do_test fts2k-1.3 { execsql { BEGIN TRANSACTION; INSERT INTO t1 (rowid, content) VALUES(8, "second world"); INSERT INTO t1 (rowid, content) VALUES(9, "second sight"); SELECT rowid FROM t1 WHERE t1 MATCH 'world'; ROLLBACK TRANSACTION; } } {1 3 4 6 8} # Double-check that the previous result doesn't persist past the # rollback! do_test fts2k-1.4 { execsql { SELECT rowid FROM t1 WHERE t1 MATCH 'world'; } } {1 3 4 6} # Test it all together. do_test fts2k-1.5 { execsql { BEGIN TRANSACTION; INSERT INTO t1 (rowid, content) VALUES(10, "second world"); INSERT INTO t1 (rowid, content) VALUES(11, "second sight"); ROLLBACK TRANSACTION; SELECT rowid FROM t1 WHERE t1 MATCH 'world'; } } {1 3 4 6} finish_test |