Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Implement optimize() function. This merges all segments in the fts index into a single segment, including dropping delete cookies. (CVS 5417) |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
b22e187bc2b38bd219dd0feba19b9727 |
User & Date: | shess 2008-07-15 21:32:07.000 |
Context
2008-07-15
| ||
22:59 | Work around bugs in older versions of the OS/2 conversion library by trying to minimize calls to UniCreateUconvObject() etc. Use global uconv objects instead. (CVS 5418) (check-in: 80e4218306 user: pweilbacher tags: trunk) | |
21:32 | Implement optimize() function. This merges all segments in the fts index into a single segment, including dropping delete cookies. (CVS 5417) (check-in: b22e187bc2 user: shess tags: trunk) | |
20:56 | Update column naming rules. Ticket #3221. Rules for column naming are still subject to change (except for the AS rule which we promise to keep the same) but are more consistent now. And the rules are tested using a new test script. (CVS 5416) (check-in: 61f6e19755 user: drh tags: trunk) | |
Changes
Changes to ext/fts3/fts3.c.
︙ | ︙ | |||
1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 | SEGDIR_SET_STMT, SEGDIR_SELECT_LEVEL_STMT, SEGDIR_SPAN_STMT, SEGDIR_DELETE_STMT, SEGDIR_SELECT_SEGMENT_STMT, SEGDIR_SELECT_ALL_STMT, SEGDIR_DELETE_ALL_STMT, MAX_STMT /* Always at end! */ } fulltext_statement; /* These must exactly match the enum above. */ /* TODO(shess): Is there some risk that a statement will be used in two ** cursors at once, e.g. if a query joins a virtual table to itself? | > | 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 | SEGDIR_SET_STMT, SEGDIR_SELECT_LEVEL_STMT, SEGDIR_SPAN_STMT, SEGDIR_DELETE_STMT, SEGDIR_SELECT_SEGMENT_STMT, SEGDIR_SELECT_ALL_STMT, SEGDIR_DELETE_ALL_STMT, SEGDIR_COUNT_STMT, MAX_STMT /* Always at end! */ } fulltext_statement; /* These must exactly match the enum above. */ /* TODO(shess): Is there some risk that a statement will be used in two ** cursors at once, e.g. if a query joins a virtual table to itself? |
︙ | ︙ | |||
1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 | /* SEGDIR_SELECT_SEGMENT */ "select start_block, leaves_end_block, root from %_segdir " " where level = ? and idx = ?", /* SEGDIR_SELECT_ALL */ "select start_block, leaves_end_block, root from %_segdir " " order by level desc, idx asc", /* SEGDIR_DELETE_ALL */ "delete from %_segdir", }; /* ** A connection to a fulltext index is an instance of the following ** structure. The xCreate and xConnect methods create an instance ** of this structure and xDestroy and xDisconnect free that instance. ** All other methods receive a pointer to the structure as one of their | > | 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 | /* SEGDIR_SELECT_SEGMENT */ "select start_block, leaves_end_block, root from %_segdir " " where level = ? and idx = ?", /* SEGDIR_SELECT_ALL */ "select start_block, leaves_end_block, root from %_segdir " " order by level desc, idx asc", /* SEGDIR_DELETE_ALL */ "delete from %_segdir", /* SEGDIR_COUNT */ "select count(*), ifnull(max(level),0) from %_segdir", }; /* ** A connection to a fulltext index is an instance of the following ** structure. The xCreate and xConnect methods create an instance ** of this structure and xDestroy and xDisconnect free that instance. ** All other methods receive a pointer to the structure as one of their |
︙ | ︙ | |||
2122 2123 2124 2125 2126 2127 2128 | */ static int sql_single_step(sqlite3_stmt *s){ int rc = sqlite3_step(s); return (rc==SQLITE_DONE) ? SQLITE_OK : rc; } /* Like sql_get_statement(), but for special replicated LEAF_SELECT | | > | > > | | 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 | */ static int sql_single_step(sqlite3_stmt *s){ int rc = sqlite3_step(s); return (rc==SQLITE_DONE) ? SQLITE_OK : rc; } /* Like sql_get_statement(), but for special replicated LEAF_SELECT ** statements. idx -1 is a special case for an uncached version of ** the statement (used in the optimize implementation). */ /* TODO(shess) Write version for generic statements and then share ** that between the cached-statement functions. */ static int sql_get_leaf_statement(fulltext_vtab *v, int idx, sqlite3_stmt **ppStmt){ assert( idx>=-1 && idx<MERGE_COUNT ); if( idx==-1 ){ return sql_prepare(v->db, v->zDb, v->zName, ppStmt, LEAF_SELECT); }else if( v->pLeafSelectStmts[idx]==NULL ){ int rc = sql_prepare(v->db, v->zDb, v->zName, &v->pLeafSelectStmts[idx], LEAF_SELECT); if( rc!=SQLITE_OK ) return rc; }else{ int rc = sqlite3_reset(v->pLeafSelectStmts[idx]); if( rc!=SQLITE_OK ) return rc; } |
︙ | ︙ | |||
2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 | if( rc!=SQLITE_OK ) return rc; rc = sql_get_statement(v, BLOCK_DELETE_ALL_STMT, &s); if( rc!=SQLITE_OK ) return rc; return sql_single_step(s); } /* TODO(shess) clearPendingTerms() is far down the file because ** writeZeroSegment() is far down the file because LeafWriter is far ** down the file. Consider refactoring the code to move the non-vtab ** code above the vtab code so that we don't need this forward ** reference. */ | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 | if( rc!=SQLITE_OK ) return rc; rc = sql_get_statement(v, BLOCK_DELETE_ALL_STMT, &s); if( rc!=SQLITE_OK ) return rc; return sql_single_step(s); } /* Returns SQLITE_OK with *pnSegments set to the number of entries in ** %_segdir and *piMaxLevel set to the highest level which has a ** segment. Otherwise returns the SQLite error which caused failure. */ static int segdir_count(fulltext_vtab *v, int *pnSegments, int *piMaxLevel){ sqlite3_stmt *s; int rc = sql_get_statement(v, SEGDIR_COUNT_STMT, &s); if( rc!=SQLITE_OK ) return rc; rc = sqlite3_step(s); /* TODO(shess): This case should not be possible? Should stronger ** measures be taken if it happens? */ if( rc==SQLITE_DONE ){ *pnSegments = 0; *piMaxLevel = 0; return SQLITE_OK; } if( rc!=SQLITE_ROW ) return rc; *pnSegments = sqlite3_column_int(s, 0); *piMaxLevel = sqlite3_column_int(s, 1); /* We expect only one row. We must execute another sqlite3_step() * to complete the iteration; otherwise the table will remain locked. */ rc = sqlite3_step(s); if( rc==SQLITE_DONE ) return SQLITE_OK; if( rc==SQLITE_ROW ) return SQLITE_ERROR; return rc; } /* TODO(shess) clearPendingTerms() is far down the file because ** writeZeroSegment() is far down the file because LeafWriter is far ** down the file. Consider refactoring the code to move the non-vtab ** code above the vtab code so that we don't need this forward ** reference. */ |
︙ | ︙ | |||
5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 | ** this case. Probably a brittle assumption. */ static int leavesReaderReset(LeavesReader *pReader){ return sqlite3_reset(pReader->pStmt); } static void leavesReaderDestroy(LeavesReader *pReader){ leafReaderDestroy(&pReader->leafReader); dataBufferDestroy(&pReader->rootData); SCRAMBLE(pReader); } /* Initialize pReader with the given root data (if iStartBlockid==0 ** the leaf data was entirely contained in the root), or from the | > > > > > > | 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 | ** this case. Probably a brittle assumption. */ static int leavesReaderReset(LeavesReader *pReader){ return sqlite3_reset(pReader->pStmt); } static void leavesReaderDestroy(LeavesReader *pReader){ /* If idx is -1, that means we're using a non-cached statement ** handle in the optimize() case, so we need to release it. */ if( pReader->pStmt!=NULL && pReader->idx==-1 ){ sqlite3_finalize(pReader->pStmt); } leafReaderDestroy(&pReader->leafReader); dataBufferDestroy(&pReader->rootData); SCRAMBLE(pReader); } /* Initialize pReader with the given root data (if iStartBlockid==0 ** the leaf data was entirely contained in the root), or from the |
︙ | ︙ | |||
6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 | snippetAllOffsets(pCursor); snippetOffsetText(&pCursor->snippet); sqlite3_result_text(pContext, pCursor->snippet.zOffset, pCursor->snippet.nOffset, SQLITE_STATIC); } } #ifdef SQLITE_TEST /* Generate an error of the form "<prefix>: <msg>". If msg is NULL, ** pull the error from the context's db handle. */ static void generateError(sqlite3_context *pContext, const char *prefix, const char *msg){ | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 | snippetAllOffsets(pCursor); snippetOffsetText(&pCursor->snippet); sqlite3_result_text(pContext, pCursor->snippet.zOffset, pCursor->snippet.nOffset, SQLITE_STATIC); } } /* OptLeavesReader is nearly identical to LeavesReader, except that ** where LeavesReader is geared towards the merging of complete ** segment levels (with exactly MERGE_COUNT segments), OptLeavesReader ** is geared towards implementation of the optimize() function, and ** can merge all segments simultaneously. This version may be ** somewhat less efficient than LeavesReader because it merges into an ** accumulator rather than doing an N-way merge, but since segment ** size grows exponentially (so segment count logrithmically) this is ** probably not an immediate problem. */ /* TODO(shess): Prove that assertion, or extend the merge code to ** merge tree fashion (like the prefix-searching code does). */ /* TODO(shess): OptLeavesReader and LeavesReader could probably be ** merged with little or no loss of performance for LeavesReader. The ** merged code would need to handle >MERGE_COUNT segments, and would ** also need to be able to optionally optimize away deletes. */ typedef struct OptLeavesReader { /* Segment number, to order readers by age. */ int segment; LeavesReader reader; } OptLeavesReader; static int optLeavesReaderAtEnd(OptLeavesReader *pReader){ return leavesReaderAtEnd(&pReader->reader); } static int optLeavesReaderTermBytes(OptLeavesReader *pReader){ return leavesReaderTermBytes(&pReader->reader); } static const char *optLeavesReaderData(OptLeavesReader *pReader){ return leavesReaderData(&pReader->reader); } static int optLeavesReaderDataBytes(OptLeavesReader *pReader){ return leavesReaderDataBytes(&pReader->reader); } static const char *optLeavesReaderTerm(OptLeavesReader *pReader){ return leavesReaderTerm(&pReader->reader); } static int optLeavesReaderStep(fulltext_vtab *v, OptLeavesReader *pReader){ return leavesReaderStep(v, &pReader->reader); } static int optLeavesReaderTermCmp(OptLeavesReader *lr1, OptLeavesReader *lr2){ return leavesReaderTermCmp(&lr1->reader, &lr2->reader); } /* Order by term ascending, segment ascending (oldest to newest), with ** exhausted readers to the end. */ static int optLeavesReaderCmp(OptLeavesReader *lr1, OptLeavesReader *lr2){ int c = optLeavesReaderTermCmp(lr1, lr2); if( c!=0 ) return c; return lr1->segment-lr2->segment; } /* Bubble pLr[0] to appropriate place in pLr[1..nLr-1]. Assumes that ** pLr[1..nLr-1] is already sorted. */ static void optLeavesReaderReorder(OptLeavesReader *pLr, int nLr){ while( nLr>1 && optLeavesReaderCmp(pLr, pLr+1)>0 ){ OptLeavesReader tmp = pLr[0]; pLr[0] = pLr[1]; pLr[1] = tmp; nLr--; pLr++; } } /* optimize() helper function. Put the readers in order and iterate ** through them, merging doclists for matching terms into pWriter. ** Returns SQLITE_OK on success, or the SQLite error code which ** prevented success. */ static int optimizeInternal(fulltext_vtab *v, OptLeavesReader *readers, int nReaders, LeafWriter *pWriter){ int i, rc = SQLITE_OK; DataBuffer doclist, merged, tmp; /* Order the readers. */ i = nReaders; while( i-- > 0 ){ optLeavesReaderReorder(&readers[i], nReaders-i); } dataBufferInit(&doclist, LEAF_MAX); dataBufferInit(&merged, LEAF_MAX); /* Exhausted readers bubble to the end, so when the first reader is ** at eof, all are at eof. */ while( !optLeavesReaderAtEnd(&readers[0]) ){ /* Figure out how many readers share the next term. */ for(i=1; i<nReaders && !optLeavesReaderAtEnd(&readers[i]); i++){ if( 0!=optLeavesReaderTermCmp(&readers[0], &readers[i]) ) break; } /* Special-case for no merge. */ if( i==1 ){ /* Trim deletions from the doclist. */ dataBufferReset(&merged); docListTrim(DL_DEFAULT, optLeavesReaderData(&readers[0]), optLeavesReaderDataBytes(&readers[0]), -1, DL_DEFAULT, &merged); }else{ DLReader dlReaders[MERGE_COUNT]; int iReader, nReaders; /* Prime the pipeline with the first reader's doclist. After ** one pass index 0 will reference the accumulated doclist. */ dlrInit(&dlReaders[0], DL_DEFAULT, optLeavesReaderData(&readers[0]), optLeavesReaderDataBytes(&readers[0])); iReader = 1; assert( iReader<i ); /* Must execute the loop at least once. */ while( iReader<i ){ /* Merge 16 inputs per pass. */ for( nReaders=1; iReader<i && nReaders<MERGE_COUNT; iReader++, nReaders++ ){ dlrInit(&dlReaders[nReaders], DL_DEFAULT, optLeavesReaderData(&readers[iReader]), optLeavesReaderDataBytes(&readers[iReader])); } /* Merge doclists and swap result into accumulator. */ dataBufferReset(&merged); docListMerge(&merged, dlReaders, nReaders); tmp = merged; merged = doclist; doclist = tmp; while( nReaders-- > 0 ){ dlrDestroy(&dlReaders[nReaders]); } /* Accumulated doclist to reader 0 for next pass. */ dlrInit(&dlReaders[0], DL_DEFAULT, doclist.pData, doclist.nData); } /* Destroy reader that was left in the pipeline. */ dlrDestroy(&dlReaders[0]); /* Trim deletions from the doclist. */ dataBufferReset(&merged); docListTrim(DL_DEFAULT, doclist.pData, doclist.nData, -1, DL_DEFAULT, &merged); } /* Only pass doclists with hits (skip if all hits deleted). */ if( merged.nData>0 ){ rc = leafWriterStep(v, pWriter, optLeavesReaderTerm(&readers[0]), optLeavesReaderTermBytes(&readers[0]), merged.pData, merged.nData); if( rc!=SQLITE_OK ) goto err; } /* Step merged readers to next term and reorder. */ while( i-- > 0 ){ rc = optLeavesReaderStep(v, &readers[i]); if( rc!=SQLITE_OK ) goto err; optLeavesReaderReorder(&readers[i], nReaders-i); } } err: dataBufferDestroy(&doclist); dataBufferDestroy(&merged); return rc; } /* Implement optimize() function for FTS3. optimize(t) merges all ** segments in the fts index into a single segment. 't' is the magic ** table-named column. */ static void optimizeFunc(sqlite3_context *pContext, int argc, sqlite3_value **argv){ fulltext_cursor *pCursor; if( argc>1 ){ sqlite3_result_error(pContext, "excess arguments to optimize()",-1); }else if( sqlite3_value_type(argv[0])!=SQLITE_BLOB || sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){ sqlite3_result_error(pContext, "illegal first argument to optimize",-1); }else{ fulltext_vtab *v; int i, rc, iMaxLevel; OptLeavesReader *readers; int nReaders; LeafWriter writer; sqlite3_stmt *s; memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor)); v = cursor_vtab(pCursor); /* Flush any buffered updates before optimizing. */ rc = flushPendingTerms(v); if( rc!=SQLITE_OK ) goto err; rc = segdir_count(v, &nReaders, &iMaxLevel); if( rc!=SQLITE_OK ) goto err; if( nReaders==0 || nReaders==1 ){ sqlite3_result_text(pContext, "Index already optimal", -1, SQLITE_STATIC); return; } rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s); if( rc!=SQLITE_OK ) goto err; readers = sqlite3_malloc(nReaders*sizeof(readers[0])); if( readers==NULL ) goto err; /* Note that there will already be a segment at this position ** until we call segdir_delete() on iMaxLevel. */ leafWriterInit(iMaxLevel, 0, &writer); i = 0; while( (rc = sqlite3_step(s))==SQLITE_ROW ){ sqlite_int64 iStart = sqlite3_column_int64(s, 0); sqlite_int64 iEnd = sqlite3_column_int64(s, 1); const char *pRootData = sqlite3_column_blob(s, 2); int nRootData = sqlite3_column_bytes(s, 2); assert( i<nReaders ); rc = leavesReaderInit(v, -1, iStart, iEnd, pRootData, nRootData, &readers[i].reader); if( rc!=SQLITE_OK ) break; readers[i].segment = i; i++; } /* If we managed to succesfully read them all, optimize them. */ if( rc==SQLITE_DONE ){ assert( i==nReaders ); rc = optimizeInternal(v, readers, nReaders, &writer); } while( i-- > 0 ){ leavesReaderDestroy(&readers[i].reader); } sqlite3_free(readers); /* If we've successfully gotten to here, delete the old segments ** and flush the interior structure of the new segment. */ if( rc==SQLITE_OK ){ for( i=0; i<=iMaxLevel; i++ ){ rc = segdir_delete(v, i); if( rc!=SQLITE_OK ) break; } if( rc==SQLITE_OK ) rc = leafWriterFinalize(v, &writer); } leafWriterDestroy(&writer); if( rc!=SQLITE_OK ) goto err; sqlite3_result_text(pContext, "Index optimized", -1, SQLITE_STATIC); return; /* TODO(shess): Error-handling needs to be improved along the ** lines of the dump_ functions. */ err: { char buf[512]; sqlite3_snprintf(sizeof(buf), buf, "Error in optimize: %s", sqlite3_errmsg(sqlite3_context_db_handle(pContext))); sqlite3_result_error(pContext, buf, -1); } } } #ifdef SQLITE_TEST /* Generate an error of the form "<prefix>: <msg>". If msg is NULL, ** pull the error from the context's db handle. */ static void generateError(sqlite3_context *pContext, const char *prefix, const char *msg){ |
︙ | ︙ | |||
6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 | ){ if( strcmp(zName,"snippet")==0 ){ *pxFunc = snippetFunc; return 1; }else if( strcmp(zName,"offsets")==0 ){ *pxFunc = snippetOffsetsFunc; return 1; #ifdef SQLITE_TEST /* NOTE(shess): These functions are present only for testing ** purposes. No particular effort is made to optimize their ** execution or how they build their results. */ }else if( strcmp(zName,"dump_terms")==0 ){ /* fprintf(stderr, "Found dump_terms\n"); */ | > > > | 7020 7021 7022 7023 7024 7025 7026 7027 7028 7029 7030 7031 7032 7033 7034 7035 7036 | ){ if( strcmp(zName,"snippet")==0 ){ *pxFunc = snippetFunc; return 1; }else if( strcmp(zName,"offsets")==0 ){ *pxFunc = snippetOffsetsFunc; return 1; }else if( strcmp(zName,"optimize")==0 ){ *pxFunc = optimizeFunc; return 1; #ifdef SQLITE_TEST /* NOTE(shess): These functions are present only for testing ** purposes. No particular effort is made to optimize their ** execution or how they build their results. */ }else if( strcmp(zName,"dump_terms")==0 ){ /* fprintf(stderr, "Found dump_terms\n"); */ |
︙ | ︙ | |||
6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 | ** the two scalar functions. If this is successful, register the ** module with sqlite. */ if( SQLITE_OK==rc && SQLITE_OK==(rc = sqlite3Fts3InitHashTable(db, pHash, "fts3_tokenizer")) && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1)) && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", -1)) #ifdef SQLITE_TEST && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_terms", -1)) && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_doclist", -1)) #endif ){ return sqlite3_create_module_v2( db, "fts3", &fts3Module, (void *)pHash, hashDestroy | > | 7156 7157 7158 7159 7160 7161 7162 7163 7164 7165 7166 7167 7168 7169 7170 | ** the two scalar functions. If this is successful, register the ** module with sqlite. */ if( SQLITE_OK==rc && SQLITE_OK==(rc = sqlite3Fts3InitHashTable(db, pHash, "fts3_tokenizer")) && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1)) && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", -1)) && SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", -1)) #ifdef SQLITE_TEST && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_terms", -1)) && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_doclist", -1)) #endif ){ return sqlite3_create_module_v2( db, "fts3", &fts3Module, (void *)pHash, hashDestroy |
︙ | ︙ |
Changes to test/fts3d.test.
1 2 3 4 5 6 7 8 9 10 11 12 13 | # 2008 June 26 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # This file implements regression tests for SQLite library. The focus # of this script is testing the FTS3 module's optimize() function. # | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 | # 2008 June 26 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # This file implements regression tests for SQLite library. The focus # of this script is testing the FTS3 module's optimize() function. # # $Id: fts3d.test,v 1.2 2008/07/15 21:32:07 shess Exp $ # set testdir [file dirname $argv0] source $testdir/tester.tcl # If SQLITE_ENABLE_FTS3 is not defined, omit this file. ifcapable !fts3 { |
︙ | ︙ | |||
119 120 121 122 123 124 125 | check_terms fts3d-1.2 0 0 {a is test this} check_doclist fts3d-1.2.1 0 0 a {[1 0[2]]} check_doclist fts3d-1.2.2 0 0 is {[1 0[1]]} check_doclist fts3d-1.2.3 0 0 test {[1 0[3]]} check_doclist fts3d-1.2.4 0 0 this {[1 0[0]]} | > > > > > > > > > > > > > > > > > | > > > > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 | check_terms fts3d-1.2 0 0 {a is test this} check_doclist fts3d-1.2.1 0 0 a {[1 0[2]]} check_doclist fts3d-1.2.2 0 0 is {[1 0[1]]} check_doclist fts3d-1.2.3 0 0 test {[1 0[3]]} check_doclist fts3d-1.2.4 0 0 this {[1 0[0]]} #************************************************************************* # Test results when everything is optimized manually. # NOTE(shess): This is a copy of fts3c-1.3. I've pulled a copy here # because fts3d-2 and fts3d-3 should have identical results. db eval { DROP TABLE IF EXISTS t1; CREATE VIRTUAL TABLE t1 USING fts3(c); INSERT INTO t1 (docid, c) VALUES (1, 'This is a test'); INSERT INTO t1 (docid, c) VALUES (2, 'That was a test'); INSERT INTO t1 (docid, c) VALUES (3, 'This is a test'); DELETE FROM t1 WHERE docid IN (1,3); DROP TABLE IF EXISTS t1old; ALTER TABLE t1 RENAME TO t1old; CREATE VIRTUAL TABLE t1 USING fts3(c); INSERT INTO t1 (docid, c) SELECT docid, c FROM t1old; DROP TABLE t1old; } # Should be a single optimal segment with the same logical results. do_test fts3d-2.segments { execsql { SELECT level, idx FROM t1_segdir ORDER BY level, idx; } } {0 0} do_test fts3d-2.matches { execsql { SELECT OFFSETS(t1) FROM t1 WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY docid; } } {{0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4}} check_terms_all fts3d-2.1 {a test that was} check_doclist_all fts3d-2.1.1 a {[2 0[2]]} check_doclist_all fts3d-2.1.2 test {[2 0[3]]} check_doclist_all fts3d-2.1.3 that {[2 0[0]]} check_doclist_all fts3d-2.1.4 was {[2 0[1]]} check_terms fts3d-2.2 0 0 {a test that was} check_doclist fts3d-2.2.1 0 0 a {[2 0[2]]} check_doclist fts3d-2.2.2 0 0 test {[2 0[3]]} check_doclist fts3d-2.2.3 0 0 that {[2 0[0]]} check_doclist fts3d-2.2.4 0 0 was {[2 0[1]]} #************************************************************************* # Test results when everything is optimized via optimize(). db eval { DROP TABLE IF EXISTS t1; CREATE VIRTUAL TABLE t1 USING fts3(c); INSERT INTO t1 (docid, c) VALUES (1, 'This is a test'); INSERT INTO t1 (docid, c) VALUES (2, 'That was a test'); INSERT INTO t1 (docid, c) VALUES (3, 'This is a test'); DELETE FROM t1 WHERE docid IN (1,3); SELECT OPTIMIZE(t1) FROM t1 LIMIT 1; } # Should be a single optimal segment with the same logical results. do_test fts3d-3.segments { execsql { SELECT level, idx FROM t1_segdir ORDER BY level, idx; } } {0 0} do_test fts3d-3.matches { execsql { SELECT OFFSETS(t1) FROM t1 WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY docid; } } {{0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4}} check_terms_all fts3d-3.1 {a test that was} check_doclist_all fts3d-3.1.1 a {[2 0[2]]} check_doclist_all fts3d-3.1.2 test {[2 0[3]]} check_doclist_all fts3d-3.1.3 that {[2 0[0]]} check_doclist_all fts3d-3.1.4 was {[2 0[1]]} check_terms fts3d-3.2 0 0 {a test that was} check_doclist fts3d-3.2.1 0 0 a {[2 0[2]]} check_doclist fts3d-3.2.2 0 0 test {[2 0[3]]} check_doclist fts3d-3.2.3 0 0 that {[2 0[0]]} check_doclist fts3d-3.2.4 0 0 was {[2 0[1]]} #************************************************************************* # Test optimize() against a table involving segment merges. # NOTE(shess): Since there's no transaction, each of the INSERT/UPDATE # statements generates a segment. db eval { DROP TABLE IF EXISTS t1; CREATE VIRTUAL TABLE t1 USING fts3(c); INSERT INTO t1 (rowid, c) VALUES (1, 'This is a test'); INSERT INTO t1 (rowid, c) VALUES (2, 'That was a test'); INSERT INTO t1 (rowid, c) VALUES (3, 'This is a test'); UPDATE t1 SET c = 'This is a test one' WHERE rowid = 1; UPDATE t1 SET c = 'That was a test one' WHERE rowid = 2; UPDATE t1 SET c = 'This is a test one' WHERE rowid = 3; UPDATE t1 SET c = 'This is a test two' WHERE rowid = 1; UPDATE t1 SET c = 'That was a test two' WHERE rowid = 2; UPDATE t1 SET c = 'This is a test two' WHERE rowid = 3; UPDATE t1 SET c = 'This is a test three' WHERE rowid = 1; UPDATE t1 SET c = 'That was a test three' WHERE rowid = 2; UPDATE t1 SET c = 'This is a test three' WHERE rowid = 3; UPDATE t1 SET c = 'This is a test four' WHERE rowid = 1; UPDATE t1 SET c = 'That was a test four' WHERE rowid = 2; UPDATE t1 SET c = 'This is a test four' WHERE rowid = 3; UPDATE t1 SET c = 'This is a test' WHERE rowid = 1; UPDATE t1 SET c = 'That was a test' WHERE rowid = 2; UPDATE t1 SET c = 'This is a test' WHERE rowid = 3; } # 2 segments in level 0, 1 in level 1 (18 segments created, 16 # merged). do_test fts3d-4.segments { execsql { SELECT level, idx FROM t1_segdir ORDER BY level, idx; } } {0 0 0 1 1 0} do_test fts3d-4.matches { execsql { SELECT OFFSETS(t1) FROM t1 WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY docid; } } [list {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4} \ {0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4} \ {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4}] check_terms_all fts3d-4.1 {a four is one test that this three two was} check_doclist_all fts3d-4.1.1 a {[1 0[2]] [2 0[2]] [3 0[2]]} check_doclist_all fts3d-4.1.2 four {} check_doclist_all fts3d-4.1.3 is {[1 0[1]] [3 0[1]]} check_doclist_all fts3d-4.1.4 one {} check_doclist_all fts3d-4.1.5 test {[1 0[3]] [2 0[3]] [3 0[3]]} check_doclist_all fts3d-4.1.6 that {[2 0[0]]} check_doclist_all fts3d-4.1.7 this {[1 0[0]] [3 0[0]]} check_doclist_all fts3d-4.1.8 three {} check_doclist_all fts3d-4.1.9 two {} check_doclist_all fts3d-4.1.10 was {[2 0[1]]} check_terms fts3d-4.2 0 0 {a four test that was} check_doclist fts3d-4.2.1 0 0 a {[2 0[2]]} check_doclist fts3d-4.2.2 0 0 four {[2]} check_doclist fts3d-4.2.3 0 0 test {[2 0[3]]} check_doclist fts3d-4.2.4 0 0 that {[2 0[0]]} check_doclist fts3d-4.2.5 0 0 was {[2 0[1]]} check_terms fts3d-4.3 0 1 {a four is test this} check_doclist fts3d-4.3.1 0 1 a {[3 0[2]]} check_doclist fts3d-4.3.2 0 1 four {[3]} check_doclist fts3d-4.3.3 0 1 is {[3 0[1]]} check_doclist fts3d-4.3.4 0 1 test {[3 0[3]]} check_doclist fts3d-4.3.5 0 1 this {[3 0[0]]} check_terms fts3d-4.4 1 0 {a four is one test that this three two was} check_doclist fts3d-4.4.1 1 0 a {[1 0[2]] [2 0[2]] [3 0[2]]} check_doclist fts3d-4.4.2 1 0 four {[1] [2 0[4]] [3 0[4]]} check_doclist fts3d-4.4.3 1 0 is {[1 0[1]] [3 0[1]]} check_doclist fts3d-4.4.4 1 0 one {[1] [2] [3]} check_doclist fts3d-4.4.5 1 0 test {[1 0[3]] [2 0[3]] [3 0[3]]} check_doclist fts3d-4.4.6 1 0 that {[2 0[0]]} check_doclist fts3d-4.4.7 1 0 this {[1 0[0]] [3 0[0]]} check_doclist fts3d-4.4.8 1 0 three {[1] [2] [3]} check_doclist fts3d-4.4.9 1 0 two {[1] [2] [3]} check_doclist fts3d-4.4.10 1 0 was {[2 0[1]]} # Optimize should leave the result in the level of the highest-level # prior segment. do_test fts3d-4.5 { execsql { SELECT OPTIMIZE(t1) FROM t1 LIMIT 1; SELECT level, idx FROM t1_segdir ORDER BY level, idx; } } {{Index optimized} 1 0} # Identical to fts3d-4.matches. do_test fts3d-4.5.matches { execsql { SELECT OFFSETS(t1) FROM t1 WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY docid; } } [list {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4} \ {0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4} \ {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4}] check_terms_all fts3d-4.5.1 {a is test that this was} check_doclist_all fts3d-4.5.1.1 a {[1 0[2]] [2 0[2]] [3 0[2]]} check_doclist_all fts3d-4.5.1.2 is {[1 0[1]] [3 0[1]]} check_doclist_all fts3d-4.5.1.3 test {[1 0[3]] [2 0[3]] [3 0[3]]} check_doclist_all fts3d-4.5.1.4 that {[2 0[0]]} check_doclist_all fts3d-4.5.1.5 this {[1 0[0]] [3 0[0]]} check_doclist_all fts3d-4.5.1.6 was {[2 0[1]]} check_terms fts3d-4.5.2 1 0 {a is test that this was} check_doclist fts3d-4.5.2.1 1 0 a {[1 0[2]] [2 0[2]] [3 0[2]]} check_doclist fts3d-4.5.2.2 1 0 is {[1 0[1]] [3 0[1]]} check_doclist fts3d-4.5.2.3 1 0 test {[1 0[3]] [2 0[3]] [3 0[3]]} check_doclist fts3d-4.5.2.4 1 0 that {[2 0[0]]} check_doclist fts3d-4.5.2.5 1 0 this {[1 0[0]] [3 0[0]]} check_doclist fts3d-4.5.2.6 1 0 was {[2 0[1]]} # Re-optimizing does nothing. do_test fts3d-5.0 { execsql { SELECT OPTIMIZE(t1) FROM t1 LIMIT 1; SELECT level, idx FROM t1_segdir ORDER BY level, idx; } } {{Index already optimal} 1 0} # Even if we move things around, still does nothing. do_test fts3d-5.1 { execsql { UPDATE t1_segdir SET level = 2 WHERE level = 1 AND idx = 0; SELECT OPTIMIZE(t1) FROM t1 LIMIT 1; SELECT level, idx FROM t1_segdir ORDER BY level, idx; } } {{Index already optimal} 2 0} finish_test |