Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Implement optimize() function. Backports check-in (5417) from fts3. (CVS 5458) |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
c16900dc7603cab30f8729b25361bc88 |
User & Date: | shess 2008-07-22 23:49:44.000 |
Context
2008-07-22
| ||
23:54 | Be a bit more susicious of invalid results from the tokenizer. Backports check-in (4514) from fts3. (CVS 5459) (check-in: 311aeb9c2b user: shess tags: trunk) | |
23:49 | Implement optimize() function. Backports check-in (5417) from fts3. (CVS 5458) (check-in: c16900dc76 user: shess tags: trunk) | |
23:41 | Delete all fts2 index data the table becomes empty. Backports check-in (5413) from fts3. (CVS 5457) (check-in: 4c98179be2 user: shess tags: trunk) | |
Changes
Changes to ext/fts2/fts2.c.
︙ | ︙ | |||
1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 | SEGDIR_SET_STMT, SEGDIR_SELECT_LEVEL_STMT, SEGDIR_SPAN_STMT, SEGDIR_DELETE_STMT, SEGDIR_SELECT_SEGMENT_STMT, SEGDIR_SELECT_ALL_STMT, SEGDIR_DELETE_ALL_STMT, MAX_STMT /* Always at end! */ } fulltext_statement; /* These must exactly match the enum above. */ /* TODO(shess): Is there some risk that a statement will be used in two ** cursors at once, e.g. if a query joins a virtual table to itself? | > | 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 | SEGDIR_SET_STMT, SEGDIR_SELECT_LEVEL_STMT, SEGDIR_SPAN_STMT, SEGDIR_DELETE_STMT, SEGDIR_SELECT_SEGMENT_STMT, SEGDIR_SELECT_ALL_STMT, SEGDIR_DELETE_ALL_STMT, SEGDIR_COUNT_STMT, MAX_STMT /* Always at end! */ } fulltext_statement; /* These must exactly match the enum above. */ /* TODO(shess): Is there some risk that a statement will be used in two ** cursors at once, e.g. if a query joins a virtual table to itself? |
︙ | ︙ | |||
1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 | /* SEGDIR_SELECT_SEGMENT */ "select start_block, leaves_end_block, root from %_segdir " " where level = ? and idx = ?", /* SEGDIR_SELECT_ALL */ "select start_block, leaves_end_block, root from %_segdir " " order by level desc, idx asc", /* SEGDIR_DELETE_ALL */ "delete from %_segdir", }; /* ** A connection to a fulltext index is an instance of the following ** structure. The xCreate and xConnect methods create an instance ** of this structure and xDestroy and xDisconnect free that instance. ** All other methods receive a pointer to the structure as one of their | > | 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 | /* SEGDIR_SELECT_SEGMENT */ "select start_block, leaves_end_block, root from %_segdir " " where level = ? and idx = ?", /* SEGDIR_SELECT_ALL */ "select start_block, leaves_end_block, root from %_segdir " " order by level desc, idx asc", /* SEGDIR_DELETE_ALL */ "delete from %_segdir", /* SEGDIR_COUNT */ "select count(*), ifnull(max(level),0) from %_segdir", }; /* ** A connection to a fulltext index is an instance of the following ** structure. The xCreate and xConnect methods create an instance ** of this structure and xDestroy and xDisconnect free that instance. ** All other methods receive a pointer to the structure as one of their |
︙ | ︙ | |||
1976 1977 1978 1979 1980 1981 1982 | */ static int sql_single_step(sqlite3_stmt *s){ int rc = sqlite3_step(s); return (rc==SQLITE_DONE) ? SQLITE_OK : rc; } /* Like sql_get_statement(), but for special replicated LEAF_SELECT | | > | > > | | 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 | */ static int sql_single_step(sqlite3_stmt *s){ int rc = sqlite3_step(s); return (rc==SQLITE_DONE) ? SQLITE_OK : rc; } /* Like sql_get_statement(), but for special replicated LEAF_SELECT ** statements. idx -1 is a special case for an uncached version of ** the statement (used in the optimize implementation). */ /* TODO(shess) Write version for generic statements and then share ** that between the cached-statement functions. */ static int sql_get_leaf_statement(fulltext_vtab *v, int idx, sqlite3_stmt **ppStmt){ assert( idx>=-1 && idx<MERGE_COUNT ); if( idx==-1 ){ return sql_prepare(v->db, v->zDb, v->zName, ppStmt, LEAF_SELECT); }else if( v->pLeafSelectStmts[idx]==NULL ){ int rc = sql_prepare(v->db, v->zDb, v->zName, &v->pLeafSelectStmts[idx], LEAF_SELECT); if( rc!=SQLITE_OK ) return rc; }else{ int rc = sqlite3_reset(v->pLeafSelectStmts[idx]); if( rc!=SQLITE_OK ) return rc; } |
︙ | ︙ | |||
2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 | if( rc!=SQLITE_OK ) return rc; rc = sql_get_statement(v, BLOCK_DELETE_ALL_STMT, &s); if( rc!=SQLITE_OK ) return rc; return sql_single_step(s); } /* TODO(shess) clearPendingTerms() is far down the file because ** writeZeroSegment() is far down the file because LeafWriter is far ** down the file. Consider refactoring the code to move the non-vtab ** code above the vtab code so that we don't need this forward ** reference. */ | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 | if( rc!=SQLITE_OK ) return rc; rc = sql_get_statement(v, BLOCK_DELETE_ALL_STMT, &s); if( rc!=SQLITE_OK ) return rc; return sql_single_step(s); } /* Returns SQLITE_OK with *pnSegments set to the number of entries in ** %_segdir and *piMaxLevel set to the highest level which has a ** segment. Otherwise returns the SQLite error which caused failure. */ static int segdir_count(fulltext_vtab *v, int *pnSegments, int *piMaxLevel){ sqlite3_stmt *s; int rc = sql_get_statement(v, SEGDIR_COUNT_STMT, &s); if( rc!=SQLITE_OK ) return rc; rc = sqlite3_step(s); /* TODO(shess): This case should not be possible? Should stronger ** measures be taken if it happens? */ if( rc==SQLITE_DONE ){ *pnSegments = 0; *piMaxLevel = 0; return SQLITE_OK; } if( rc!=SQLITE_ROW ) return rc; *pnSegments = sqlite3_column_int(s, 0); *piMaxLevel = sqlite3_column_int(s, 1); /* We expect only one row. We must execute another sqlite3_step() * to complete the iteration; otherwise the table will remain locked. */ rc = sqlite3_step(s); if( rc==SQLITE_DONE ) return SQLITE_OK; if( rc==SQLITE_ROW ) return SQLITE_ERROR; return rc; } /* TODO(shess) clearPendingTerms() is far down the file because ** writeZeroSegment() is far down the file because LeafWriter is far ** down the file. Consider refactoring the code to move the non-vtab ** code above the vtab code so that we don't need this forward ** reference. */ |
︙ | ︙ | |||
5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 | ** this case. Probably a brittle assumption. */ static int leavesReaderReset(LeavesReader *pReader){ return sqlite3_reset(pReader->pStmt); } static void leavesReaderDestroy(LeavesReader *pReader){ leafReaderDestroy(&pReader->leafReader); dataBufferDestroy(&pReader->rootData); SCRAMBLE(pReader); } /* Initialize pReader with the given root data (if iStartBlockid==0 ** the leaf data was entirely contained in the root), or from the | > > > > > > | 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 | ** this case. Probably a brittle assumption. */ static int leavesReaderReset(LeavesReader *pReader){ return sqlite3_reset(pReader->pStmt); } static void leavesReaderDestroy(LeavesReader *pReader){ /* If idx is -1, that means we're using a non-cached statement ** handle in the optimize() case, so we need to release it. */ if( pReader->pStmt!=NULL && pReader->idx==-1 ){ sqlite3_finalize(pReader->pStmt); } leafReaderDestroy(&pReader->leafReader); dataBufferDestroy(&pReader->rootData); SCRAMBLE(pReader); } /* Initialize pReader with the given root data (if iStartBlockid==0 ** the leaf data was entirely contained in the root), or from the |
︙ | ︙ | |||
5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 | snippetAllOffsets(pCursor); snippetOffsetText(&pCursor->snippet); sqlite3_result_text(pContext, pCursor->snippet.zOffset, pCursor->snippet.nOffset, SQLITE_STATIC); } } #ifdef SQLITE_TEST /* Generate an error of the form "<prefix>: <msg>". If msg is NULL, ** pull the error from the context's db handle. */ static void generateError(sqlite3_context *pContext, const char *prefix, const char *msg){ | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 | snippetAllOffsets(pCursor); snippetOffsetText(&pCursor->snippet); sqlite3_result_text(pContext, pCursor->snippet.zOffset, pCursor->snippet.nOffset, SQLITE_STATIC); } } /* OptLeavesReader is nearly identical to LeavesReader, except that ** where LeavesReader is geared towards the merging of complete ** segment levels (with exactly MERGE_COUNT segments), OptLeavesReader ** is geared towards implementation of the optimize() function, and ** can merge all segments simultaneously. This version may be ** somewhat less efficient than LeavesReader because it merges into an ** accumulator rather than doing an N-way merge, but since segment ** size grows exponentially (so segment count logrithmically) this is ** probably not an immediate problem. */ /* TODO(shess): Prove that assertion, or extend the merge code to ** merge tree fashion (like the prefix-searching code does). */ /* TODO(shess): OptLeavesReader and LeavesReader could probably be ** merged with little or no loss of performance for LeavesReader. The ** merged code would need to handle >MERGE_COUNT segments, and would ** also need to be able to optionally optimize away deletes. */ typedef struct OptLeavesReader { /* Segment number, to order readers by age. */ int segment; LeavesReader reader; } OptLeavesReader; static int optLeavesReaderAtEnd(OptLeavesReader *pReader){ return leavesReaderAtEnd(&pReader->reader); } static int optLeavesReaderTermBytes(OptLeavesReader *pReader){ return leavesReaderTermBytes(&pReader->reader); } static const char *optLeavesReaderData(OptLeavesReader *pReader){ return leavesReaderData(&pReader->reader); } static int optLeavesReaderDataBytes(OptLeavesReader *pReader){ return leavesReaderDataBytes(&pReader->reader); } static const char *optLeavesReaderTerm(OptLeavesReader *pReader){ return leavesReaderTerm(&pReader->reader); } static int optLeavesReaderStep(fulltext_vtab *v, OptLeavesReader *pReader){ return leavesReaderStep(v, &pReader->reader); } static int optLeavesReaderTermCmp(OptLeavesReader *lr1, OptLeavesReader *lr2){ return leavesReaderTermCmp(&lr1->reader, &lr2->reader); } /* Order by term ascending, segment ascending (oldest to newest), with ** exhausted readers to the end. */ static int optLeavesReaderCmp(OptLeavesReader *lr1, OptLeavesReader *lr2){ int c = optLeavesReaderTermCmp(lr1, lr2); if( c!=0 ) return c; return lr1->segment-lr2->segment; } /* Bubble pLr[0] to appropriate place in pLr[1..nLr-1]. Assumes that ** pLr[1..nLr-1] is already sorted. */ static void optLeavesReaderReorder(OptLeavesReader *pLr, int nLr){ while( nLr>1 && optLeavesReaderCmp(pLr, pLr+1)>0 ){ OptLeavesReader tmp = pLr[0]; pLr[0] = pLr[1]; pLr[1] = tmp; nLr--; pLr++; } } /* optimize() helper function. Put the readers in order and iterate ** through them, merging doclists for matching terms into pWriter. ** Returns SQLITE_OK on success, or the SQLite error code which ** prevented success. */ static int optimizeInternal(fulltext_vtab *v, OptLeavesReader *readers, int nReaders, LeafWriter *pWriter){ int i, rc = SQLITE_OK; DataBuffer doclist, merged, tmp; /* Order the readers. */ i = nReaders; while( i-- > 0 ){ optLeavesReaderReorder(&readers[i], nReaders-i); } dataBufferInit(&doclist, LEAF_MAX); dataBufferInit(&merged, LEAF_MAX); /* Exhausted readers bubble to the end, so when the first reader is ** at eof, all are at eof. */ while( !optLeavesReaderAtEnd(&readers[0]) ){ /* Figure out how many readers share the next term. */ for(i=1; i<nReaders && !optLeavesReaderAtEnd(&readers[i]); i++){ if( 0!=optLeavesReaderTermCmp(&readers[0], &readers[i]) ) break; } /* Special-case for no merge. */ if( i==1 ){ /* Trim deletions from the doclist. */ dataBufferReset(&merged); docListTrim(DL_DEFAULT, optLeavesReaderData(&readers[0]), optLeavesReaderDataBytes(&readers[0]), -1, DL_DEFAULT, &merged); }else{ DLReader dlReaders[MERGE_COUNT]; int iReader, nReaders; /* Prime the pipeline with the first reader's doclist. After ** one pass index 0 will reference the accumulated doclist. */ dlrInit(&dlReaders[0], DL_DEFAULT, optLeavesReaderData(&readers[0]), optLeavesReaderDataBytes(&readers[0])); iReader = 1; assert( iReader<i ); /* Must execute the loop at least once. */ while( iReader<i ){ /* Merge 16 inputs per pass. */ for( nReaders=1; iReader<i && nReaders<MERGE_COUNT; iReader++, nReaders++ ){ dlrInit(&dlReaders[nReaders], DL_DEFAULT, optLeavesReaderData(&readers[iReader]), optLeavesReaderDataBytes(&readers[iReader])); } /* Merge doclists and swap result into accumulator. */ dataBufferReset(&merged); docListMerge(&merged, dlReaders, nReaders); tmp = merged; merged = doclist; doclist = tmp; while( nReaders-- > 0 ){ dlrDestroy(&dlReaders[nReaders]); } /* Accumulated doclist to reader 0 for next pass. */ dlrInit(&dlReaders[0], DL_DEFAULT, doclist.pData, doclist.nData); } /* Destroy reader that was left in the pipeline. */ dlrDestroy(&dlReaders[0]); /* Trim deletions from the doclist. */ dataBufferReset(&merged); docListTrim(DL_DEFAULT, doclist.pData, doclist.nData, -1, DL_DEFAULT, &merged); } /* Only pass doclists with hits (skip if all hits deleted). */ if( merged.nData>0 ){ rc = leafWriterStep(v, pWriter, optLeavesReaderTerm(&readers[0]), optLeavesReaderTermBytes(&readers[0]), merged.pData, merged.nData); if( rc!=SQLITE_OK ) goto err; } /* Step merged readers to next term and reorder. */ while( i-- > 0 ){ rc = optLeavesReaderStep(v, &readers[i]); if( rc!=SQLITE_OK ) goto err; optLeavesReaderReorder(&readers[i], nReaders-i); } } err: dataBufferDestroy(&doclist); dataBufferDestroy(&merged); return rc; } /* Implement optimize() function for FTS3. optimize(t) merges all ** segments in the fts index into a single segment. 't' is the magic ** table-named column. */ static void optimizeFunc(sqlite3_context *pContext, int argc, sqlite3_value **argv){ fulltext_cursor *pCursor; if( argc>1 ){ sqlite3_result_error(pContext, "excess arguments to optimize()",-1); }else if( sqlite3_value_type(argv[0])!=SQLITE_BLOB || sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){ sqlite3_result_error(pContext, "illegal first argument to optimize",-1); }else{ fulltext_vtab *v; int i, rc, iMaxLevel; OptLeavesReader *readers; int nReaders; LeafWriter writer; sqlite3_stmt *s; memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor)); v = cursor_vtab(pCursor); /* Flush any buffered updates before optimizing. */ rc = flushPendingTerms(v); if( rc!=SQLITE_OK ) goto err; rc = segdir_count(v, &nReaders, &iMaxLevel); if( rc!=SQLITE_OK ) goto err; if( nReaders==0 || nReaders==1 ){ sqlite3_result_text(pContext, "Index already optimal", -1, SQLITE_STATIC); return; } rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s); if( rc!=SQLITE_OK ) goto err; readers = sqlite3_malloc(nReaders*sizeof(readers[0])); if( readers==NULL ) goto err; /* Note that there will already be a segment at this position ** until we call segdir_delete() on iMaxLevel. */ leafWriterInit(iMaxLevel, 0, &writer); i = 0; while( (rc = sqlite3_step(s))==SQLITE_ROW ){ sqlite_int64 iStart = sqlite3_column_int64(s, 0); sqlite_int64 iEnd = sqlite3_column_int64(s, 1); const char *pRootData = sqlite3_column_blob(s, 2); int nRootData = sqlite3_column_bytes(s, 2); assert( i<nReaders ); rc = leavesReaderInit(v, -1, iStart, iEnd, pRootData, nRootData, &readers[i].reader); if( rc!=SQLITE_OK ) break; readers[i].segment = i; i++; } /* If we managed to succesfully read them all, optimize them. */ if( rc==SQLITE_DONE ){ assert( i==nReaders ); rc = optimizeInternal(v, readers, nReaders, &writer); } while( i-- > 0 ){ leavesReaderDestroy(&readers[i].reader); } sqlite3_free(readers); /* If we've successfully gotten to here, delete the old segments ** and flush the interior structure of the new segment. */ if( rc==SQLITE_OK ){ for( i=0; i<=iMaxLevel; i++ ){ rc = segdir_delete(v, i); if( rc!=SQLITE_OK ) break; } if( rc==SQLITE_OK ) rc = leafWriterFinalize(v, &writer); } leafWriterDestroy(&writer); if( rc!=SQLITE_OK ) goto err; sqlite3_result_text(pContext, "Index optimized", -1, SQLITE_STATIC); return; /* TODO(shess): Error-handling needs to be improved along the ** lines of the dump_ functions. */ err: { char buf[512]; sqlite3_snprintf(sizeof(buf), buf, "Error in optimize: %s", sqlite3_errmsg(sqlite3_context_db_handle(pContext))); sqlite3_result_error(pContext, buf, -1); } } } #ifdef SQLITE_TEST /* Generate an error of the form "<prefix>: <msg>". If msg is NULL, ** pull the error from the context's db handle. */ static void generateError(sqlite3_context *pContext, const char *prefix, const char *msg){ |
︙ | ︙ | |||
6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 | ){ if( strcmp(zName,"snippet")==0 ){ *pxFunc = snippetFunc; return 1; }else if( strcmp(zName,"offsets")==0 ){ *pxFunc = snippetOffsetsFunc; return 1; #ifdef SQLITE_TEST /* NOTE(shess): These functions are present only for testing ** purposes. No particular effort is made to optimize their ** execution or how they build their results. */ }else if( strcmp(zName,"dump_terms")==0 ){ /* fprintf(stderr, "Found dump_terms\n"); */ | > > > | 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 | ){ if( strcmp(zName,"snippet")==0 ){ *pxFunc = snippetFunc; return 1; }else if( strcmp(zName,"offsets")==0 ){ *pxFunc = snippetOffsetsFunc; return 1; }else if( strcmp(zName,"optimize")==0 ){ *pxFunc = optimizeFunc; return 1; #ifdef SQLITE_TEST /* NOTE(shess): These functions are present only for testing ** purposes. No particular effort is made to optimize their ** execution or how they build their results. */ }else if( strcmp(zName,"dump_terms")==0 ){ /* fprintf(stderr, "Found dump_terms\n"); */ |
︙ | ︙ | |||
6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 | ** the two scalar functions. If this is successful, register the ** module with sqlite. */ if( SQLITE_OK==rc && SQLITE_OK==(rc = sqlite3Fts2InitHashTable(db, pHash, "fts2_tokenizer")) && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1)) && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", -1)) #ifdef SQLITE_TEST && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_terms", -1)) && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_doclist", -1)) #endif ){ return sqlite3_create_module_v2( db, "fts2", &fts2Module, (void *)pHash, hashDestroy | > | 6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 | ** the two scalar functions. If this is successful, register the ** module with sqlite. */ if( SQLITE_OK==rc && SQLITE_OK==(rc = sqlite3Fts2InitHashTable(db, pHash, "fts2_tokenizer")) && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1)) && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", -1)) && SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", -1)) #ifdef SQLITE_TEST && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_terms", -1)) && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_doclist", -1)) #endif ){ return sqlite3_create_module_v2( db, "fts2", &fts2Module, (void *)pHash, hashDestroy |
︙ | ︙ |
Changes to test/fts2q.test.
1 2 3 4 5 6 7 8 9 10 11 12 13 | # 2008 June 26 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # This file implements regression tests for SQLite library. The focus # of this script is testing the FTS2 module's optimize() function. # | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 | # 2008 June 26 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # This file implements regression tests for SQLite library. The focus # of this script is testing the FTS2 module's optimize() function. # # $Id: fts2q.test,v 1.2 2008/07/22 23:49:44 shess Exp $ # set testdir [file dirname $argv0] source $testdir/tester.tcl # If SQLITE_ENABLE_FTS2 is not defined, omit this file. ifcapable !fts2 { |
︙ | ︙ | |||
119 120 121 122 123 124 125 | check_terms fts2q-1.2 0 0 {a is test this} check_doclist fts2q-1.2.1 0 0 a {[1 0[2]]} check_doclist fts2q-1.2.2 0 0 is {[1 0[1]]} check_doclist fts2q-1.2.3 0 0 test {[1 0[3]]} check_doclist fts2q-1.2.4 0 0 this {[1 0[0]]} | > > > > > > > > > > > > > > > > > | > > > > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 | check_terms fts2q-1.2 0 0 {a is test this} check_doclist fts2q-1.2.1 0 0 a {[1 0[2]]} check_doclist fts2q-1.2.2 0 0 is {[1 0[1]]} check_doclist fts2q-1.2.3 0 0 test {[1 0[3]]} check_doclist fts2q-1.2.4 0 0 this {[1 0[0]]} #************************************************************************* # Test results when everything is optimized manually. # NOTE(shess): This is a copy of fts2c-1.3. I've pulled a copy here # because fts2q-2 and fts2q-3 should have identical results. db eval { DROP TABLE IF EXISTS t1; CREATE VIRTUAL TABLE t1 USING fts2(c); INSERT INTO t1 (rowid, c) VALUES (1, 'This is a test'); INSERT INTO t1 (rowid, c) VALUES (2, 'That was a test'); INSERT INTO t1 (rowid, c) VALUES (3, 'This is a test'); DELETE FROM t1 WHERE rowid IN (1,3); DROP TABLE IF EXISTS t1old; ALTER TABLE t1 RENAME TO t1old; CREATE VIRTUAL TABLE t1 USING fts2(c); INSERT INTO t1 (rowid, c) SELECT rowid, c FROM t1old; DROP TABLE t1old; } # Should be a single optimal segment with the same logical results. do_test fts2q-2.segments { execsql { SELECT level, idx FROM t1_segdir ORDER BY level, idx; } } {0 0} do_test fts2q-2.matches { execsql { SELECT OFFSETS(t1) FROM t1 WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY rowid; } } {{0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4}} check_terms_all fts2q-2.1 {a test that was} check_doclist_all fts2q-2.1.1 a {[2 0[2]]} check_doclist_all fts2q-2.1.2 test {[2 0[3]]} check_doclist_all fts2q-2.1.3 that {[2 0[0]]} check_doclist_all fts2q-2.1.4 was {[2 0[1]]} check_terms fts2q-2.2 0 0 {a test that was} check_doclist fts2q-2.2.1 0 0 a {[2 0[2]]} check_doclist fts2q-2.2.2 0 0 test {[2 0[3]]} check_doclist fts2q-2.2.3 0 0 that {[2 0[0]]} check_doclist fts2q-2.2.4 0 0 was {[2 0[1]]} #************************************************************************* # Test results when everything is optimized via optimize(). db eval { DROP TABLE IF EXISTS t1; CREATE VIRTUAL TABLE t1 USING fts2(c); INSERT INTO t1 (rowid, c) VALUES (1, 'This is a test'); INSERT INTO t1 (rowid, c) VALUES (2, 'That was a test'); INSERT INTO t1 (rowid, c) VALUES (3, 'This is a test'); DELETE FROM t1 WHERE rowid IN (1,3); SELECT OPTIMIZE(t1) FROM t1 LIMIT 1; } # Should be a single optimal segment with the same logical results. do_test fts2q-3.segments { execsql { SELECT level, idx FROM t1_segdir ORDER BY level, idx; } } {0 0} do_test fts2q-3.matches { execsql { SELECT OFFSETS(t1) FROM t1 WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY rowid; } } {{0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4}} check_terms_all fts2q-3.1 {a test that was} check_doclist_all fts2q-3.1.1 a {[2 0[2]]} check_doclist_all fts2q-3.1.2 test {[2 0[3]]} check_doclist_all fts2q-3.1.3 that {[2 0[0]]} check_doclist_all fts2q-3.1.4 was {[2 0[1]]} check_terms fts2q-3.2 0 0 {a test that was} check_doclist fts2q-3.2.1 0 0 a {[2 0[2]]} check_doclist fts2q-3.2.2 0 0 test {[2 0[3]]} check_doclist fts2q-3.2.3 0 0 that {[2 0[0]]} check_doclist fts2q-3.2.4 0 0 was {[2 0[1]]} #************************************************************************* # Test optimize() against a table involving segment merges. # NOTE(shess): Since there's no transaction, each of the INSERT/UPDATE # statements generates a segment. db eval { DROP TABLE IF EXISTS t1; CREATE VIRTUAL TABLE t1 USING fts2(c); INSERT INTO t1 (rowid, c) VALUES (1, 'This is a test'); INSERT INTO t1 (rowid, c) VALUES (2, 'That was a test'); INSERT INTO t1 (rowid, c) VALUES (3, 'This is a test'); UPDATE t1 SET c = 'This is a test one' WHERE rowid = 1; UPDATE t1 SET c = 'That was a test one' WHERE rowid = 2; UPDATE t1 SET c = 'This is a test one' WHERE rowid = 3; UPDATE t1 SET c = 'This is a test two' WHERE rowid = 1; UPDATE t1 SET c = 'That was a test two' WHERE rowid = 2; UPDATE t1 SET c = 'This is a test two' WHERE rowid = 3; UPDATE t1 SET c = 'This is a test three' WHERE rowid = 1; UPDATE t1 SET c = 'That was a test three' WHERE rowid = 2; UPDATE t1 SET c = 'This is a test three' WHERE rowid = 3; UPDATE t1 SET c = 'This is a test four' WHERE rowid = 1; UPDATE t1 SET c = 'That was a test four' WHERE rowid = 2; UPDATE t1 SET c = 'This is a test four' WHERE rowid = 3; UPDATE t1 SET c = 'This is a test' WHERE rowid = 1; UPDATE t1 SET c = 'That was a test' WHERE rowid = 2; UPDATE t1 SET c = 'This is a test' WHERE rowid = 3; } # 2 segments in level 0, 1 in level 1 (18 segments created, 16 # merged). do_test fts2q-4.segments { execsql { SELECT level, idx FROM t1_segdir ORDER BY level, idx; } } {0 0 0 1 1 0} do_test fts2q-4.matches { execsql { SELECT OFFSETS(t1) FROM t1 WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY rowid; } } [list {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4} \ {0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4} \ {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4}] check_terms_all fts2q-4.1 {a four is one test that this three two was} check_doclist_all fts2q-4.1.1 a {[1 0[2]] [2 0[2]] [3 0[2]]} check_doclist_all fts2q-4.1.2 four {} check_doclist_all fts2q-4.1.3 is {[1 0[1]] [3 0[1]]} check_doclist_all fts2q-4.1.4 one {} check_doclist_all fts2q-4.1.5 test {[1 0[3]] [2 0[3]] [3 0[3]]} check_doclist_all fts2q-4.1.6 that {[2 0[0]]} check_doclist_all fts2q-4.1.7 this {[1 0[0]] [3 0[0]]} check_doclist_all fts2q-4.1.8 three {} check_doclist_all fts2q-4.1.9 two {} check_doclist_all fts2q-4.1.10 was {[2 0[1]]} check_terms fts2q-4.2 0 0 {a four test that was} check_doclist fts2q-4.2.1 0 0 a {[2 0[2]]} check_doclist fts2q-4.2.2 0 0 four {[2]} check_doclist fts2q-4.2.3 0 0 test {[2 0[3]]} check_doclist fts2q-4.2.4 0 0 that {[2 0[0]]} check_doclist fts2q-4.2.5 0 0 was {[2 0[1]]} check_terms fts2q-4.3 0 1 {a four is test this} check_doclist fts2q-4.3.1 0 1 a {[3 0[2]]} check_doclist fts2q-4.3.2 0 1 four {[3]} check_doclist fts2q-4.3.3 0 1 is {[3 0[1]]} check_doclist fts2q-4.3.4 0 1 test {[3 0[3]]} check_doclist fts2q-4.3.5 0 1 this {[3 0[0]]} check_terms fts2q-4.4 1 0 {a four is one test that this three two was} check_doclist fts2q-4.4.1 1 0 a {[1 0[2]] [2 0[2]] [3 0[2]]} check_doclist fts2q-4.4.2 1 0 four {[1] [2 0[4]] [3 0[4]]} check_doclist fts2q-4.4.3 1 0 is {[1 0[1]] [3 0[1]]} check_doclist fts2q-4.4.4 1 0 one {[1] [2] [3]} check_doclist fts2q-4.4.5 1 0 test {[1 0[3]] [2 0[3]] [3 0[3]]} check_doclist fts2q-4.4.6 1 0 that {[2 0[0]]} check_doclist fts2q-4.4.7 1 0 this {[1 0[0]] [3 0[0]]} check_doclist fts2q-4.4.8 1 0 three {[1] [2] [3]} check_doclist fts2q-4.4.9 1 0 two {[1] [2] [3]} check_doclist fts2q-4.4.10 1 0 was {[2 0[1]]} # Optimize should leave the result in the level of the highest-level # prior segment. do_test fts2q-4.5 { execsql { SELECT OPTIMIZE(t1) FROM t1 LIMIT 1; SELECT level, idx FROM t1_segdir ORDER BY level, idx; } } {{Index optimized} 1 0} # Identical to fts2q-4.matches. do_test fts2q-4.5.matches { execsql { SELECT OFFSETS(t1) FROM t1 WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY rowid; } } [list {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4} \ {0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4} \ {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4}] check_terms_all fts2q-4.5.1 {a is test that this was} check_doclist_all fts2q-4.5.1.1 a {[1 0[2]] [2 0[2]] [3 0[2]]} check_doclist_all fts2q-4.5.1.2 is {[1 0[1]] [3 0[1]]} check_doclist_all fts2q-4.5.1.3 test {[1 0[3]] [2 0[3]] [3 0[3]]} check_doclist_all fts2q-4.5.1.4 that {[2 0[0]]} check_doclist_all fts2q-4.5.1.5 this {[1 0[0]] [3 0[0]]} check_doclist_all fts2q-4.5.1.6 was {[2 0[1]]} check_terms fts2q-4.5.2 1 0 {a is test that this was} check_doclist fts2q-4.5.2.1 1 0 a {[1 0[2]] [2 0[2]] [3 0[2]]} check_doclist fts2q-4.5.2.2 1 0 is {[1 0[1]] [3 0[1]]} check_doclist fts2q-4.5.2.3 1 0 test {[1 0[3]] [2 0[3]] [3 0[3]]} check_doclist fts2q-4.5.2.4 1 0 that {[2 0[0]]} check_doclist fts2q-4.5.2.5 1 0 this {[1 0[0]] [3 0[0]]} check_doclist fts2q-4.5.2.6 1 0 was {[2 0[1]]} # Re-optimizing does nothing. do_test fts2q-5.0 { execsql { SELECT OPTIMIZE(t1) FROM t1 LIMIT 1; SELECT level, idx FROM t1_segdir ORDER BY level, idx; } } {{Index already optimal} 1 0} # Even if we move things around, still does nothing. do_test fts2q-5.1 { execsql { UPDATE t1_segdir SET level = 2 WHERE level = 1 AND idx = 0; SELECT OPTIMIZE(t1) FROM t1 LIMIT 1; SELECT level, idx FROM t1_segdir ORDER BY level, idx; } } {{Index already optimal} 2 0} finish_test |