/ Check-in [b22e187b]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Implement optimize() function. This merges all segments in the fts index into a single segment, including dropping delete cookies. (CVS 5417)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: b22e187bc2b38bd219dd0feba19b97279bd83089
User & Date: shess 2008-07-15 21:32:07
Context
2008-07-15
22:59
Work around bugs in older versions of the OS/2 conversion library by trying to minimize calls to UniCreateUconvObject() etc. Use global uconv objects instead. (CVS 5418) check-in: 80e42183 user: pweilbacher tags: trunk
21:32
Implement optimize() function. This merges all segments in the fts index into a single segment, including dropping delete cookies. (CVS 5417) check-in: b22e187b user: shess tags: trunk
20:56
Update column naming rules. Ticket #3221. Rules for column naming are still subject to change (except for the AS rule which we promise to keep the same) but are more consistent now. And the rules are tested using a new test script. (CVS 5416) check-in: 61f6e197 user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3.c.

  1916   1916     SEGDIR_SET_STMT,
  1917   1917     SEGDIR_SELECT_LEVEL_STMT,
  1918   1918     SEGDIR_SPAN_STMT,
  1919   1919     SEGDIR_DELETE_STMT,
  1920   1920     SEGDIR_SELECT_SEGMENT_STMT,
  1921   1921     SEGDIR_SELECT_ALL_STMT,
  1922   1922     SEGDIR_DELETE_ALL_STMT,
         1923  +  SEGDIR_COUNT_STMT,
  1923   1924   
  1924   1925     MAX_STMT                     /* Always at end! */
  1925   1926   } fulltext_statement;
  1926   1927   
  1927   1928   /* These must exactly match the enum above. */
  1928   1929   /* TODO(shess): Is there some risk that a statement will be used in two
  1929   1930   ** cursors at once, e.g.  if a query joins a virtual table to itself?
................................................................................
  1958   1959     /* SEGDIR_SELECT_SEGMENT */
  1959   1960     "select start_block, leaves_end_block, root from %_segdir "
  1960   1961     " where level = ? and idx = ?",
  1961   1962     /* SEGDIR_SELECT_ALL */
  1962   1963     "select start_block, leaves_end_block, root from %_segdir "
  1963   1964     " order by level desc, idx asc",
  1964   1965     /* SEGDIR_DELETE_ALL */ "delete from %_segdir",
         1966  +  /* SEGDIR_COUNT */ "select count(*), ifnull(max(level),0) from %_segdir",
  1965   1967   };
  1966   1968   
  1967   1969   /*
  1968   1970   ** A connection to a fulltext index is an instance of the following
  1969   1971   ** structure.  The xCreate and xConnect methods create an instance
  1970   1972   ** of this structure and xDestroy and xDisconnect free that instance.
  1971   1973   ** All other methods receive a pointer to the structure as one of their
................................................................................
  2122   2124   */
  2123   2125   static int sql_single_step(sqlite3_stmt *s){
  2124   2126     int rc = sqlite3_step(s);
  2125   2127     return (rc==SQLITE_DONE) ? SQLITE_OK : rc;
  2126   2128   }
  2127   2129   
  2128   2130   /* Like sql_get_statement(), but for special replicated LEAF_SELECT
  2129         -** statements.
         2131  +** statements.  idx -1 is a special case for an uncached version of
         2132  +** the statement (used in the optimize implementation).
  2130   2133   */
  2131   2134   /* TODO(shess) Write version for generic statements and then share
  2132   2135   ** that between the cached-statement functions.
  2133   2136   */
  2134   2137   static int sql_get_leaf_statement(fulltext_vtab *v, int idx,
  2135   2138                                     sqlite3_stmt **ppStmt){
  2136         -  assert( idx>=0 && idx<MERGE_COUNT );
  2137         -  if( v->pLeafSelectStmts[idx]==NULL ){
         2139  +  assert( idx>=-1 && idx<MERGE_COUNT );
         2140  +  if( idx==-1 ){
         2141  +    return sql_prepare(v->db, v->zDb, v->zName, ppStmt, LEAF_SELECT);
         2142  +  }else if( v->pLeafSelectStmts[idx]==NULL ){
  2138   2143       int rc = sql_prepare(v->db, v->zDb, v->zName, &v->pLeafSelectStmts[idx],
  2139   2144                            LEAF_SELECT);
  2140   2145       if( rc!=SQLITE_OK ) return rc;
  2141   2146     }else{
  2142   2147       int rc = sqlite3_reset(v->pLeafSelectStmts[idx]);
  2143   2148       if( rc!=SQLITE_OK ) return rc;
  2144   2149     }
................................................................................
  2460   2465     if( rc!=SQLITE_OK ) return rc;
  2461   2466   
  2462   2467     rc = sql_get_statement(v, BLOCK_DELETE_ALL_STMT, &s);
  2463   2468     if( rc!=SQLITE_OK ) return rc;
  2464   2469   
  2465   2470     return sql_single_step(s);
  2466   2471   }
         2472  +
         2473  +/* Returns SQLITE_OK with *pnSegments set to the number of entries in
         2474  +** %_segdir and *piMaxLevel set to the highest level which has a
         2475  +** segment.  Otherwise returns the SQLite error which caused failure.
         2476  +*/
         2477  +static int segdir_count(fulltext_vtab *v, int *pnSegments, int *piMaxLevel){
         2478  +  sqlite3_stmt *s;
         2479  +  int rc = sql_get_statement(v, SEGDIR_COUNT_STMT, &s);
         2480  +  if( rc!=SQLITE_OK ) return rc;
         2481  +
         2482  +  rc = sqlite3_step(s);
         2483  +  /* TODO(shess): This case should not be possible?  Should stronger
         2484  +  ** measures be taken if it happens?
         2485  +  */
         2486  +  if( rc==SQLITE_DONE ){
         2487  +    *pnSegments = 0;
         2488  +    *piMaxLevel = 0;
         2489  +    return SQLITE_OK;
         2490  +  }
         2491  +  if( rc!=SQLITE_ROW ) return rc;
         2492  +
         2493  +  *pnSegments = sqlite3_column_int(s, 0);
         2494  +  *piMaxLevel = sqlite3_column_int(s, 1);
         2495  +
         2496  +  /* We expect only one row.  We must execute another sqlite3_step()
         2497  +   * to complete the iteration; otherwise the table will remain locked. */
         2498  +  rc = sqlite3_step(s);
         2499  +  if( rc==SQLITE_DONE ) return SQLITE_OK;
         2500  +  if( rc==SQLITE_ROW ) return SQLITE_ERROR;
         2501  +  return rc;
         2502  +}
  2467   2503   
  2468   2504   /* TODO(shess) clearPendingTerms() is far down the file because
  2469   2505   ** writeZeroSegment() is far down the file because LeafWriter is far
  2470   2506   ** down the file.  Consider refactoring the code to move the non-vtab
  2471   2507   ** code above the vtab code so that we don't need this forward
  2472   2508   ** reference.
  2473   2509   */
................................................................................
  5336   5372   ** this case.  Probably a brittle assumption.
  5337   5373   */
  5338   5374   static int leavesReaderReset(LeavesReader *pReader){
  5339   5375     return sqlite3_reset(pReader->pStmt);
  5340   5376   }
  5341   5377   
  5342   5378   static void leavesReaderDestroy(LeavesReader *pReader){
         5379  +  /* If idx is -1, that means we're using a non-cached statement
         5380  +  ** handle in the optimize() case, so we need to release it.
         5381  +  */
         5382  +  if( pReader->pStmt!=NULL && pReader->idx==-1 ){
         5383  +    sqlite3_finalize(pReader->pStmt);
         5384  +  }
  5343   5385     leafReaderDestroy(&pReader->leafReader);
  5344   5386     dataBufferDestroy(&pReader->rootData);
  5345   5387     SCRAMBLE(pReader);
  5346   5388   }
  5347   5389   
  5348   5390   /* Initialize pReader with the given root data (if iStartBlockid==0
  5349   5391   ** the leaf data was entirely contained in the root), or from the
................................................................................
  6301   6343       snippetAllOffsets(pCursor);
  6302   6344       snippetOffsetText(&pCursor->snippet);
  6303   6345       sqlite3_result_text(pContext,
  6304   6346                           pCursor->snippet.zOffset, pCursor->snippet.nOffset,
  6305   6347                           SQLITE_STATIC);
  6306   6348     }
  6307   6349   }
         6350  +
         6351  +/* OptLeavesReader is nearly identical to LeavesReader, except that
         6352  +** where LeavesReader is geared towards the merging of complete
         6353  +** segment levels (with exactly MERGE_COUNT segments), OptLeavesReader
         6354  +** is geared towards implementation of the optimize() function, and
         6355  +** can merge all segments simultaneously.  This version may be
         6356  +** somewhat less efficient than LeavesReader because it merges into an
         6357  +** accumulator rather than doing an N-way merge, but since segment
         6358  +** size grows exponentially (so segment count logrithmically) this is
         6359  +** probably not an immediate problem.
         6360  +*/
         6361  +/* TODO(shess): Prove that assertion, or extend the merge code to
         6362  +** merge tree fashion (like the prefix-searching code does).
         6363  +*/
         6364  +/* TODO(shess): OptLeavesReader and LeavesReader could probably be
         6365  +** merged with little or no loss of performance for LeavesReader.  The
         6366  +** merged code would need to handle >MERGE_COUNT segments, and would
         6367  +** also need to be able to optionally optimize away deletes.
         6368  +*/
         6369  +typedef struct OptLeavesReader {
         6370  +  /* Segment number, to order readers by age. */
         6371  +  int segment;
         6372  +  LeavesReader reader;
         6373  +} OptLeavesReader;
         6374  +
         6375  +static int optLeavesReaderAtEnd(OptLeavesReader *pReader){
         6376  +  return leavesReaderAtEnd(&pReader->reader);
         6377  +}
         6378  +static int optLeavesReaderTermBytes(OptLeavesReader *pReader){
         6379  +  return leavesReaderTermBytes(&pReader->reader);
         6380  +}
         6381  +static const char *optLeavesReaderData(OptLeavesReader *pReader){
         6382  +  return leavesReaderData(&pReader->reader);
         6383  +}
         6384  +static int optLeavesReaderDataBytes(OptLeavesReader *pReader){
         6385  +  return leavesReaderDataBytes(&pReader->reader);
         6386  +}
         6387  +static const char *optLeavesReaderTerm(OptLeavesReader *pReader){
         6388  +  return leavesReaderTerm(&pReader->reader);
         6389  +}
         6390  +static int optLeavesReaderStep(fulltext_vtab *v, OptLeavesReader *pReader){
         6391  +  return leavesReaderStep(v, &pReader->reader);
         6392  +}
         6393  +static int optLeavesReaderTermCmp(OptLeavesReader *lr1, OptLeavesReader *lr2){
         6394  +  return leavesReaderTermCmp(&lr1->reader, &lr2->reader);
         6395  +}
         6396  +/* Order by term ascending, segment ascending (oldest to newest), with
         6397  +** exhausted readers to the end.
         6398  +*/
         6399  +static int optLeavesReaderCmp(OptLeavesReader *lr1, OptLeavesReader *lr2){
         6400  +  int c = optLeavesReaderTermCmp(lr1, lr2);
         6401  +  if( c!=0 ) return c;
         6402  +  return lr1->segment-lr2->segment;
         6403  +}
         6404  +/* Bubble pLr[0] to appropriate place in pLr[1..nLr-1].  Assumes that
         6405  +** pLr[1..nLr-1] is already sorted.
         6406  +*/
         6407  +static void optLeavesReaderReorder(OptLeavesReader *pLr, int nLr){
         6408  +  while( nLr>1 && optLeavesReaderCmp(pLr, pLr+1)>0 ){
         6409  +    OptLeavesReader tmp = pLr[0];
         6410  +    pLr[0] = pLr[1];
         6411  +    pLr[1] = tmp;
         6412  +    nLr--;
         6413  +    pLr++;
         6414  +  }
         6415  +}
         6416  +
         6417  +/* optimize() helper function.  Put the readers in order and iterate
         6418  +** through them, merging doclists for matching terms into pWriter.
         6419  +** Returns SQLITE_OK on success, or the SQLite error code which
         6420  +** prevented success.
         6421  +*/
         6422  +static int optimizeInternal(fulltext_vtab *v,
         6423  +                            OptLeavesReader *readers, int nReaders,
         6424  +                            LeafWriter *pWriter){
         6425  +  int i, rc = SQLITE_OK;
         6426  +  DataBuffer doclist, merged, tmp;
         6427  +
         6428  +  /* Order the readers. */
         6429  +  i = nReaders;
         6430  +  while( i-- > 0 ){
         6431  +    optLeavesReaderReorder(&readers[i], nReaders-i);
         6432  +  }
         6433  +
         6434  +  dataBufferInit(&doclist, LEAF_MAX);
         6435  +  dataBufferInit(&merged, LEAF_MAX);
         6436  +
         6437  +  /* Exhausted readers bubble to the end, so when the first reader is
         6438  +  ** at eof, all are at eof.
         6439  +  */
         6440  +  while( !optLeavesReaderAtEnd(&readers[0]) ){
         6441  +
         6442  +    /* Figure out how many readers share the next term. */
         6443  +    for(i=1; i<nReaders && !optLeavesReaderAtEnd(&readers[i]); i++){
         6444  +      if( 0!=optLeavesReaderTermCmp(&readers[0], &readers[i]) ) break;
         6445  +    }
         6446  +
         6447  +    /* Special-case for no merge. */
         6448  +    if( i==1 ){
         6449  +      /* Trim deletions from the doclist. */
         6450  +      dataBufferReset(&merged);
         6451  +      docListTrim(DL_DEFAULT,
         6452  +                  optLeavesReaderData(&readers[0]),
         6453  +                  optLeavesReaderDataBytes(&readers[0]),
         6454  +                  -1, DL_DEFAULT, &merged);
         6455  +    }else{
         6456  +      DLReader dlReaders[MERGE_COUNT];
         6457  +      int iReader, nReaders;
         6458  +
         6459  +      /* Prime the pipeline with the first reader's doclist.  After
         6460  +      ** one pass index 0 will reference the accumulated doclist.
         6461  +      */
         6462  +      dlrInit(&dlReaders[0], DL_DEFAULT,
         6463  +              optLeavesReaderData(&readers[0]),
         6464  +              optLeavesReaderDataBytes(&readers[0]));
         6465  +      iReader = 1;
         6466  +
         6467  +      assert( iReader<i );  /* Must execute the loop at least once. */
         6468  +      while( iReader<i ){
         6469  +        /* Merge 16 inputs per pass. */
         6470  +        for( nReaders=1; iReader<i && nReaders<MERGE_COUNT;
         6471  +             iReader++, nReaders++ ){
         6472  +          dlrInit(&dlReaders[nReaders], DL_DEFAULT,
         6473  +                  optLeavesReaderData(&readers[iReader]),
         6474  +                  optLeavesReaderDataBytes(&readers[iReader]));
         6475  +        }
         6476  +
         6477  +        /* Merge doclists and swap result into accumulator. */
         6478  +        dataBufferReset(&merged);
         6479  +        docListMerge(&merged, dlReaders, nReaders);
         6480  +        tmp = merged;
         6481  +        merged = doclist;
         6482  +        doclist = tmp;
         6483  +
         6484  +        while( nReaders-- > 0 ){
         6485  +          dlrDestroy(&dlReaders[nReaders]);
         6486  +        }
         6487  +
         6488  +        /* Accumulated doclist to reader 0 for next pass. */
         6489  +        dlrInit(&dlReaders[0], DL_DEFAULT, doclist.pData, doclist.nData);
         6490  +      }
         6491  +
         6492  +      /* Destroy reader that was left in the pipeline. */
         6493  +      dlrDestroy(&dlReaders[0]);
         6494  +
         6495  +      /* Trim deletions from the doclist. */
         6496  +      dataBufferReset(&merged);
         6497  +      docListTrim(DL_DEFAULT, doclist.pData, doclist.nData,
         6498  +                  -1, DL_DEFAULT, &merged);
         6499  +    }
         6500  +
         6501  +    /* Only pass doclists with hits (skip if all hits deleted). */
         6502  +    if( merged.nData>0 ){
         6503  +      rc = leafWriterStep(v, pWriter,
         6504  +                          optLeavesReaderTerm(&readers[0]),
         6505  +                          optLeavesReaderTermBytes(&readers[0]),
         6506  +                          merged.pData, merged.nData);
         6507  +      if( rc!=SQLITE_OK ) goto err;
         6508  +    }
         6509  +
         6510  +    /* Step merged readers to next term and reorder. */
         6511  +    while( i-- > 0 ){
         6512  +      rc = optLeavesReaderStep(v, &readers[i]);
         6513  +      if( rc!=SQLITE_OK ) goto err;
         6514  +
         6515  +      optLeavesReaderReorder(&readers[i], nReaders-i);
         6516  +    }
         6517  +  }
         6518  +
         6519  + err:
         6520  +  dataBufferDestroy(&doclist);
         6521  +  dataBufferDestroy(&merged);
         6522  +  return rc;
         6523  +}
         6524  +
         6525  +/* Implement optimize() function for FTS3.  optimize(t) merges all
         6526  +** segments in the fts index into a single segment.  't' is the magic
         6527  +** table-named column.
         6528  +*/
         6529  +static void optimizeFunc(sqlite3_context *pContext,
         6530  +                         int argc, sqlite3_value **argv){
         6531  +  fulltext_cursor *pCursor;
         6532  +  if( argc>1 ){
         6533  +    sqlite3_result_error(pContext, "excess arguments to optimize()",-1);
         6534  +  }else if( sqlite3_value_type(argv[0])!=SQLITE_BLOB ||
         6535  +            sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){
         6536  +    sqlite3_result_error(pContext, "illegal first argument to optimize",-1);
         6537  +  }else{
         6538  +    fulltext_vtab *v;
         6539  +    int i, rc, iMaxLevel;
         6540  +    OptLeavesReader *readers;
         6541  +    int nReaders;
         6542  +    LeafWriter writer;
         6543  +    sqlite3_stmt *s;
         6544  +
         6545  +    memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor));
         6546  +    v = cursor_vtab(pCursor);
         6547  +
         6548  +    /* Flush any buffered updates before optimizing. */
         6549  +    rc = flushPendingTerms(v);
         6550  +    if( rc!=SQLITE_OK ) goto err;
         6551  +
         6552  +    rc = segdir_count(v, &nReaders, &iMaxLevel);
         6553  +    if( rc!=SQLITE_OK ) goto err;
         6554  +    if( nReaders==0 || nReaders==1 ){
         6555  +      sqlite3_result_text(pContext, "Index already optimal", -1,
         6556  +                          SQLITE_STATIC);
         6557  +      return;
         6558  +    }
         6559  +
         6560  +    rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s);
         6561  +    if( rc!=SQLITE_OK ) goto err;
         6562  +
         6563  +    readers = sqlite3_malloc(nReaders*sizeof(readers[0]));
         6564  +    if( readers==NULL ) goto err;
         6565  +
         6566  +    /* Note that there will already be a segment at this position
         6567  +    ** until we call segdir_delete() on iMaxLevel.
         6568  +    */
         6569  +    leafWriterInit(iMaxLevel, 0, &writer);
         6570  +
         6571  +    i = 0;
         6572  +    while( (rc = sqlite3_step(s))==SQLITE_ROW ){
         6573  +      sqlite_int64 iStart = sqlite3_column_int64(s, 0);
         6574  +      sqlite_int64 iEnd = sqlite3_column_int64(s, 1);
         6575  +      const char *pRootData = sqlite3_column_blob(s, 2);
         6576  +      int nRootData = sqlite3_column_bytes(s, 2);
         6577  +
         6578  +      assert( i<nReaders );
         6579  +      rc = leavesReaderInit(v, -1, iStart, iEnd, pRootData, nRootData,
         6580  +                            &readers[i].reader);
         6581  +      if( rc!=SQLITE_OK ) break;
         6582  +
         6583  +      readers[i].segment = i;
         6584  +      i++;
         6585  +    }
         6586  +
         6587  +    /* If we managed to succesfully read them all, optimize them. */
         6588  +    if( rc==SQLITE_DONE ){
         6589  +      assert( i==nReaders );
         6590  +      rc = optimizeInternal(v, readers, nReaders, &writer);
         6591  +    }
         6592  +
         6593  +    while( i-- > 0 ){
         6594  +      leavesReaderDestroy(&readers[i].reader);
         6595  +    }
         6596  +    sqlite3_free(readers);
         6597  +
         6598  +    /* If we've successfully gotten to here, delete the old segments
         6599  +    ** and flush the interior structure of the new segment.
         6600  +    */
         6601  +    if( rc==SQLITE_OK ){
         6602  +      for( i=0; i<=iMaxLevel; i++ ){
         6603  +        rc = segdir_delete(v, i);
         6604  +        if( rc!=SQLITE_OK ) break;
         6605  +      }
         6606  +
         6607  +      if( rc==SQLITE_OK ) rc = leafWriterFinalize(v, &writer);
         6608  +    }
         6609  +
         6610  +    leafWriterDestroy(&writer);
         6611  +
         6612  +    if( rc!=SQLITE_OK ) goto err;
         6613  +
         6614  +    sqlite3_result_text(pContext, "Index optimized", -1, SQLITE_STATIC);
         6615  +    return;
         6616  +
         6617  +    /* TODO(shess): Error-handling needs to be improved along the
         6618  +    ** lines of the dump_ functions.
         6619  +    */
         6620  + err:
         6621  +    {
         6622  +      char buf[512];
         6623  +      sqlite3_snprintf(sizeof(buf), buf, "Error in optimize: %s",
         6624  +                       sqlite3_errmsg(sqlite3_context_db_handle(pContext)));
         6625  +      sqlite3_result_error(pContext, buf, -1);
         6626  +    }
         6627  +  }
         6628  +}
  6308   6629   
  6309   6630   #ifdef SQLITE_TEST
  6310   6631   /* Generate an error of the form "<prefix>: <msg>".  If msg is NULL,
  6311   6632   ** pull the error from the context's db handle.
  6312   6633   */
  6313   6634   static void generateError(sqlite3_context *pContext,
  6314   6635                             const char *prefix, const char *msg){
................................................................................
  6699   7020   ){
  6700   7021     if( strcmp(zName,"snippet")==0 ){
  6701   7022       *pxFunc = snippetFunc;
  6702   7023       return 1;
  6703   7024     }else if( strcmp(zName,"offsets")==0 ){
  6704   7025       *pxFunc = snippetOffsetsFunc;
  6705   7026       return 1;
         7027  +  }else if( strcmp(zName,"optimize")==0 ){
         7028  +    *pxFunc = optimizeFunc;
         7029  +    return 1;
  6706   7030   #ifdef SQLITE_TEST
  6707   7031       /* NOTE(shess): These functions are present only for testing
  6708   7032       ** purposes.  No particular effort is made to optimize their
  6709   7033       ** execution or how they build their results.
  6710   7034       */
  6711   7035     }else if( strcmp(zName,"dump_terms")==0 ){
  6712   7036       /* fprintf(stderr, "Found dump_terms\n"); */
................................................................................
  6832   7156     ** the two scalar functions. If this is successful, register the
  6833   7157     ** module with sqlite.
  6834   7158     */
  6835   7159     if( SQLITE_OK==rc 
  6836   7160      && SQLITE_OK==(rc = sqlite3Fts3InitHashTable(db, pHash, "fts3_tokenizer"))
  6837   7161      && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1))
  6838   7162      && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", -1))
         7163  +   && SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", -1))
  6839   7164   #ifdef SQLITE_TEST
  6840   7165      && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_terms", -1))
  6841   7166      && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_doclist", -1))
  6842   7167   #endif
  6843   7168     ){
  6844   7169       return sqlite3_create_module_v2(
  6845   7170           db, "fts3", &fts3Module, (void *)pHash, hashDestroy

Changes to test/fts3d.test.

     7      7   #    May you find forgiveness for yourself and forgive others.
     8      8   #    May you share freely, never taking more than you give.
     9      9   #
    10     10   #*************************************************************************
    11     11   # This file implements regression tests for SQLite library.  The focus
    12     12   # of this script is testing the FTS3 module's optimize() function.
    13     13   #
    14         -# $Id: fts3d.test,v 1.1 2008/07/14 20:43:15 shess Exp $
           14  +# $Id: fts3d.test,v 1.2 2008/07/15 21:32:07 shess Exp $
    15     15   #
    16     16   
    17     17   set testdir [file dirname $argv0]
    18     18   source $testdir/tester.tcl
    19     19   
    20     20   # If SQLITE_ENABLE_FTS3 is not defined, omit this file.
    21     21   ifcapable !fts3 {
................................................................................
   119    119   
   120    120   check_terms   fts3d-1.2   0 0 {a is test this}
   121    121   check_doclist fts3d-1.2.1 0 0 a {[1 0[2]]}
   122    122   check_doclist fts3d-1.2.2 0 0 is {[1 0[1]]}
   123    123   check_doclist fts3d-1.2.3 0 0 test {[1 0[3]]}
   124    124   check_doclist fts3d-1.2.4 0 0 this {[1 0[0]]}
   125    125   
   126         -# TODO(shess): optimize() tests here.
          126  +#*************************************************************************
          127  +# Test results when everything is optimized manually.
          128  +# NOTE(shess): This is a copy of fts3c-1.3.  I've pulled a copy here
          129  +# because fts3d-2 and fts3d-3 should have identical results.
          130  +db eval {
          131  +  DROP TABLE IF EXISTS t1;
          132  +  CREATE VIRTUAL TABLE t1 USING fts3(c);
          133  +  INSERT INTO t1 (docid, c) VALUES (1, 'This is a test');
          134  +  INSERT INTO t1 (docid, c) VALUES (2, 'That was a test');
          135  +  INSERT INTO t1 (docid, c) VALUES (3, 'This is a test');
          136  +  DELETE FROM t1 WHERE docid IN (1,3);
          137  +  DROP TABLE IF EXISTS t1old;
          138  +  ALTER TABLE t1 RENAME TO t1old;
          139  +  CREATE VIRTUAL TABLE t1 USING fts3(c);
          140  +  INSERT INTO t1 (docid, c) SELECT docid, c FROM t1old;
          141  +  DROP TABLE t1old;
          142  +}
          143  +
          144  +# Should be a single optimal segment with the same logical results.
          145  +do_test fts3d-2.segments {
          146  +  execsql {
          147  +    SELECT level, idx FROM t1_segdir ORDER BY level, idx;
          148  +  }
          149  +} {0 0}
          150  +do_test fts3d-2.matches {
          151  +  execsql {
          152  +    SELECT OFFSETS(t1) FROM t1
          153  +     WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY docid;
          154  +  }
          155  +} {{0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4}}
          156  +
          157  +check_terms_all fts3d-2.1 {a test that was}
          158  +check_doclist_all fts3d-2.1.1 a {[2 0[2]]}
          159  +check_doclist_all fts3d-2.1.2 test {[2 0[3]]}
          160  +check_doclist_all fts3d-2.1.3 that {[2 0[0]]}
          161  +check_doclist_all fts3d-2.1.4 was {[2 0[1]]}
          162  +
          163  +check_terms fts3d-2.2 0 0 {a test that was}
          164  +check_doclist fts3d-2.2.1 0 0 a {[2 0[2]]}
          165  +check_doclist fts3d-2.2.2 0 0 test {[2 0[3]]}
          166  +check_doclist fts3d-2.2.3 0 0 that {[2 0[0]]}
          167  +check_doclist fts3d-2.2.4 0 0 was {[2 0[1]]}
          168  +
          169  +#*************************************************************************
          170  +# Test results when everything is optimized via optimize().
          171  +db eval {
          172  +  DROP TABLE IF EXISTS t1;
          173  +  CREATE VIRTUAL TABLE t1 USING fts3(c);
          174  +  INSERT INTO t1 (docid, c) VALUES (1, 'This is a test');
          175  +  INSERT INTO t1 (docid, c) VALUES (2, 'That was a test');
          176  +  INSERT INTO t1 (docid, c) VALUES (3, 'This is a test');
          177  +  DELETE FROM t1 WHERE docid IN (1,3);
          178  +  SELECT OPTIMIZE(t1) FROM t1 LIMIT 1;
          179  +}
          180  +
          181  +# Should be a single optimal segment with the same logical results.
          182  +do_test fts3d-3.segments {
          183  +  execsql {
          184  +    SELECT level, idx FROM t1_segdir ORDER BY level, idx;
          185  +  }
          186  +} {0 0}
          187  +do_test fts3d-3.matches {
          188  +  execsql {
          189  +    SELECT OFFSETS(t1) FROM t1
          190  +     WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY docid;
          191  +  }
          192  +} {{0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4}}
          193  +
          194  +check_terms_all fts3d-3.1 {a test that was}
          195  +check_doclist_all fts3d-3.1.1 a {[2 0[2]]}
          196  +check_doclist_all fts3d-3.1.2 test {[2 0[3]]}
          197  +check_doclist_all fts3d-3.1.3 that {[2 0[0]]}
          198  +check_doclist_all fts3d-3.1.4 was {[2 0[1]]}
          199  +
          200  +check_terms fts3d-3.2 0 0 {a test that was}
          201  +check_doclist fts3d-3.2.1 0 0 a {[2 0[2]]}
          202  +check_doclist fts3d-3.2.2 0 0 test {[2 0[3]]}
          203  +check_doclist fts3d-3.2.3 0 0 that {[2 0[0]]}
          204  +check_doclist fts3d-3.2.4 0 0 was {[2 0[1]]}
          205  +
          206  +#*************************************************************************
          207  +# Test optimize() against a table involving segment merges.
          208  +# NOTE(shess): Since there's no transaction, each of the INSERT/UPDATE
          209  +# statements generates a segment.
          210  +db eval {
          211  +  DROP TABLE IF EXISTS t1;
          212  +  CREATE VIRTUAL TABLE t1 USING fts3(c);
          213  +
          214  +  INSERT INTO t1 (rowid, c) VALUES (1, 'This is a test');
          215  +  INSERT INTO t1 (rowid, c) VALUES (2, 'That was a test');
          216  +  INSERT INTO t1 (rowid, c) VALUES (3, 'This is a test');
          217  +
          218  +  UPDATE t1 SET c = 'This is a test one' WHERE rowid = 1;
          219  +  UPDATE t1 SET c = 'That was a test one' WHERE rowid = 2;
          220  +  UPDATE t1 SET c = 'This is a test one' WHERE rowid = 3;
          221  +
          222  +  UPDATE t1 SET c = 'This is a test two' WHERE rowid = 1;
          223  +  UPDATE t1 SET c = 'That was a test two' WHERE rowid = 2;
          224  +  UPDATE t1 SET c = 'This is a test two' WHERE rowid = 3;
          225  +
          226  +  UPDATE t1 SET c = 'This is a test three' WHERE rowid = 1;
          227  +  UPDATE t1 SET c = 'That was a test three' WHERE rowid = 2;
          228  +  UPDATE t1 SET c = 'This is a test three' WHERE rowid = 3;
          229  +
          230  +  UPDATE t1 SET c = 'This is a test four' WHERE rowid = 1;
          231  +  UPDATE t1 SET c = 'That was a test four' WHERE rowid = 2;
          232  +  UPDATE t1 SET c = 'This is a test four' WHERE rowid = 3;
          233  +
          234  +  UPDATE t1 SET c = 'This is a test' WHERE rowid = 1;
          235  +  UPDATE t1 SET c = 'That was a test' WHERE rowid = 2;
          236  +  UPDATE t1 SET c = 'This is a test' WHERE rowid = 3;
          237  +}
          238  +
          239  +# 2 segments in level 0, 1 in level 1 (18 segments created, 16
          240  +# merged).
          241  +do_test fts3d-4.segments {
          242  +  execsql {
          243  +    SELECT level, idx FROM t1_segdir ORDER BY level, idx;
          244  +  }
          245  +} {0 0 0 1 1 0}
          246  +
          247  +do_test fts3d-4.matches {
          248  +  execsql {
          249  +    SELECT OFFSETS(t1) FROM t1
          250  +     WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY docid;
          251  +  }
          252  +} [list {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4} \
          253  +        {0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4} \
          254  +        {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4}]
          255  +
          256  +check_terms_all fts3d-4.1      {a four is one test that this three two was}
          257  +check_doclist_all fts3d-4.1.1  a {[1 0[2]] [2 0[2]] [3 0[2]]}
          258  +check_doclist_all fts3d-4.1.2  four {}
          259  +check_doclist_all fts3d-4.1.3  is {[1 0[1]] [3 0[1]]}
          260  +check_doclist_all fts3d-4.1.4  one {}
          261  +check_doclist_all fts3d-4.1.5  test {[1 0[3]] [2 0[3]] [3 0[3]]}
          262  +check_doclist_all fts3d-4.1.6  that {[2 0[0]]}
          263  +check_doclist_all fts3d-4.1.7  this {[1 0[0]] [3 0[0]]}
          264  +check_doclist_all fts3d-4.1.8  three {}
          265  +check_doclist_all fts3d-4.1.9  two {}
          266  +check_doclist_all fts3d-4.1.10 was {[2 0[1]]}
          267  +
          268  +check_terms fts3d-4.2     0 0 {a four test that was}
          269  +check_doclist fts3d-4.2.1 0 0 a {[2 0[2]]}
          270  +check_doclist fts3d-4.2.2 0 0 four {[2]}
          271  +check_doclist fts3d-4.2.3 0 0 test {[2 0[3]]}
          272  +check_doclist fts3d-4.2.4 0 0 that {[2 0[0]]}
          273  +check_doclist fts3d-4.2.5 0 0 was {[2 0[1]]}
          274  +
          275  +check_terms fts3d-4.3     0 1 {a four is test this}
          276  +check_doclist fts3d-4.3.1 0 1 a {[3 0[2]]}
          277  +check_doclist fts3d-4.3.2 0 1 four {[3]}
          278  +check_doclist fts3d-4.3.3 0 1 is {[3 0[1]]}
          279  +check_doclist fts3d-4.3.4 0 1 test {[3 0[3]]}
          280  +check_doclist fts3d-4.3.5 0 1 this {[3 0[0]]}
          281  +
          282  +check_terms fts3d-4.4      1 0 {a four is one test that this three two was}
          283  +check_doclist fts3d-4.4.1  1 0 a {[1 0[2]] [2 0[2]] [3 0[2]]}
          284  +check_doclist fts3d-4.4.2  1 0 four {[1] [2 0[4]] [3 0[4]]}
          285  +check_doclist fts3d-4.4.3  1 0 is {[1 0[1]] [3 0[1]]}
          286  +check_doclist fts3d-4.4.4  1 0 one {[1] [2] [3]}
          287  +check_doclist fts3d-4.4.5  1 0 test {[1 0[3]] [2 0[3]] [3 0[3]]}
          288  +check_doclist fts3d-4.4.6  1 0 that {[2 0[0]]}
          289  +check_doclist fts3d-4.4.7  1 0 this {[1 0[0]] [3 0[0]]}
          290  +check_doclist fts3d-4.4.8  1 0 three {[1] [2] [3]}
          291  +check_doclist fts3d-4.4.9  1 0 two {[1] [2] [3]}
          292  +check_doclist fts3d-4.4.10 1 0 was {[2 0[1]]}
          293  +
          294  +# Optimize should leave the result in the level of the highest-level
          295  +# prior segment.
          296  +do_test fts3d-4.5 {
          297  +  execsql {
          298  +    SELECT OPTIMIZE(t1) FROM t1 LIMIT 1;
          299  +    SELECT level, idx FROM t1_segdir ORDER BY level, idx;
          300  +  }
          301  +} {{Index optimized} 1 0}
          302  +
          303  +# Identical to fts3d-4.matches.
          304  +do_test fts3d-4.5.matches {
          305  +  execsql {
          306  +    SELECT OFFSETS(t1) FROM t1
          307  +     WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY docid;
          308  +  }
          309  +} [list {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4} \
          310  +        {0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4} \
          311  +        {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4}]
          312  +
          313  +check_terms_all fts3d-4.5.1     {a is test that this was}
          314  +check_doclist_all fts3d-4.5.1.1 a {[1 0[2]] [2 0[2]] [3 0[2]]}
          315  +check_doclist_all fts3d-4.5.1.2 is {[1 0[1]] [3 0[1]]}
          316  +check_doclist_all fts3d-4.5.1.3 test {[1 0[3]] [2 0[3]] [3 0[3]]}
          317  +check_doclist_all fts3d-4.5.1.4 that {[2 0[0]]}
          318  +check_doclist_all fts3d-4.5.1.5 this {[1 0[0]] [3 0[0]]}
          319  +check_doclist_all fts3d-4.5.1.6 was {[2 0[1]]}
          320  +
          321  +check_terms fts3d-4.5.2     1 0 {a is test that this was}
          322  +check_doclist fts3d-4.5.2.1 1 0 a {[1 0[2]] [2 0[2]] [3 0[2]]}
          323  +check_doclist fts3d-4.5.2.2 1 0 is {[1 0[1]] [3 0[1]]}
          324  +check_doclist fts3d-4.5.2.3 1 0 test {[1 0[3]] [2 0[3]] [3 0[3]]}
          325  +check_doclist fts3d-4.5.2.4 1 0 that {[2 0[0]]}
          326  +check_doclist fts3d-4.5.2.5 1 0 this {[1 0[0]] [3 0[0]]}
          327  +check_doclist fts3d-4.5.2.6 1 0 was {[2 0[1]]}
          328  +
          329  +# Re-optimizing does nothing.
          330  +do_test fts3d-5.0 {
          331  +  execsql {
          332  +    SELECT OPTIMIZE(t1) FROM t1 LIMIT 1;
          333  +    SELECT level, idx FROM t1_segdir ORDER BY level, idx;
          334  +  }
          335  +} {{Index already optimal} 1 0}
          336  +
          337  +# Even if we move things around, still does nothing.
          338  +do_test fts3d-5.1 {
          339  +  execsql {
          340  +    UPDATE t1_segdir SET level = 2 WHERE level = 1 AND idx = 0;
          341  +    SELECT OPTIMIZE(t1) FROM t1 LIMIT 1;
          342  +    SELECT level, idx FROM t1_segdir ORDER BY level, idx;
          343  +  }
          344  +} {{Index already optimal} 2 0}
   127    345   
   128    346   finish_test