/ Check-in [b22e187b]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Implement optimize() function. This merges all segments in the fts index into a single segment, including dropping delete cookies. (CVS 5417)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:b22e187bc2b38bd219dd0feba19b97279bd83089
User & Date: shess 2008-07-15 21:32:07
Context
2008-07-15
22:59
Work around bugs in older versions of the OS/2 conversion library by trying to minimize calls to UniCreateUconvObject() etc. Use global uconv objects instead. (CVS 5418) check-in: 80e42183 user: pweilbacher tags: trunk
21:32
Implement optimize() function. This merges all segments in the fts index into a single segment, including dropping delete cookies. (CVS 5417) check-in: b22e187b user: shess tags: trunk
20:56
Update column naming rules. Ticket #3221. Rules for column naming are still subject to change (except for the AS rule which we promise to keep the same) but are more consistent now. And the rules are tested using a new test script. (CVS 5416) check-in: 61f6e197 user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3.c.

1916
1917
1918
1919
1920
1921
1922

1923
1924
1925
1926
1927
1928
1929
....
1958
1959
1960
1961
1962
1963
1964

1965
1966
1967
1968
1969
1970
1971
....
2122
2123
2124
2125
2126
2127
2128
2129

2130
2131
2132
2133
2134
2135
2136


2137
2138
2139
2140
2141
2142
2143
2144
....
2460
2461
2462
2463
2464
2465
2466































2467
2468
2469
2470
2471
2472
2473
....
5336
5337
5338
5339
5340
5341
5342






5343
5344
5345
5346
5347
5348
5349
....
6301
6302
6303
6304
6305
6306
6307























































































































































































































































































6308
6309
6310
6311
6312
6313
6314
....
6699
6700
6701
6702
6703
6704
6705



6706
6707
6708
6709
6710
6711
6712
....
6832
6833
6834
6835
6836
6837
6838

6839
6840
6841
6842
6843
6844
6845
  SEGDIR_SET_STMT,
  SEGDIR_SELECT_LEVEL_STMT,
  SEGDIR_SPAN_STMT,
  SEGDIR_DELETE_STMT,
  SEGDIR_SELECT_SEGMENT_STMT,
  SEGDIR_SELECT_ALL_STMT,
  SEGDIR_DELETE_ALL_STMT,


  MAX_STMT                     /* Always at end! */
} fulltext_statement;

/* These must exactly match the enum above. */
/* TODO(shess): Is there some risk that a statement will be used in two
** cursors at once, e.g.  if a query joins a virtual table to itself?
................................................................................
  /* SEGDIR_SELECT_SEGMENT */
  "select start_block, leaves_end_block, root from %_segdir "
  " where level = ? and idx = ?",
  /* SEGDIR_SELECT_ALL */
  "select start_block, leaves_end_block, root from %_segdir "
  " order by level desc, idx asc",
  /* SEGDIR_DELETE_ALL */ "delete from %_segdir",

};

/*
** A connection to a fulltext index is an instance of the following
** structure.  The xCreate and xConnect methods create an instance
** of this structure and xDestroy and xDisconnect free that instance.
** All other methods receive a pointer to the structure as one of their
................................................................................
*/
static int sql_single_step(sqlite3_stmt *s){
  int rc = sqlite3_step(s);
  return (rc==SQLITE_DONE) ? SQLITE_OK : rc;
}

/* Like sql_get_statement(), but for special replicated LEAF_SELECT
** statements.

*/
/* TODO(shess) Write version for generic statements and then share
** that between the cached-statement functions.
*/
static int sql_get_leaf_statement(fulltext_vtab *v, int idx,
                                  sqlite3_stmt **ppStmt){
  assert( idx>=0 && idx<MERGE_COUNT );


  if( v->pLeafSelectStmts[idx]==NULL ){
    int rc = sql_prepare(v->db, v->zDb, v->zName, &v->pLeafSelectStmts[idx],
                         LEAF_SELECT);
    if( rc!=SQLITE_OK ) return rc;
  }else{
    int rc = sqlite3_reset(v->pLeafSelectStmts[idx]);
    if( rc!=SQLITE_OK ) return rc;
  }
................................................................................
  if( rc!=SQLITE_OK ) return rc;

  rc = sql_get_statement(v, BLOCK_DELETE_ALL_STMT, &s);
  if( rc!=SQLITE_OK ) return rc;

  return sql_single_step(s);
}
































/* TODO(shess) clearPendingTerms() is far down the file because
** writeZeroSegment() is far down the file because LeafWriter is far
** down the file.  Consider refactoring the code to move the non-vtab
** code above the vtab code so that we don't need this forward
** reference.
*/
................................................................................
** this case.  Probably a brittle assumption.
*/
static int leavesReaderReset(LeavesReader *pReader){
  return sqlite3_reset(pReader->pStmt);
}

static void leavesReaderDestroy(LeavesReader *pReader){






  leafReaderDestroy(&pReader->leafReader);
  dataBufferDestroy(&pReader->rootData);
  SCRAMBLE(pReader);
}

/* Initialize pReader with the given root data (if iStartBlockid==0
** the leaf data was entirely contained in the root), or from the
................................................................................
    snippetAllOffsets(pCursor);
    snippetOffsetText(&pCursor->snippet);
    sqlite3_result_text(pContext,
                        pCursor->snippet.zOffset, pCursor->snippet.nOffset,
                        SQLITE_STATIC);
  }
}
























































































































































































































































































#ifdef SQLITE_TEST
/* Generate an error of the form "<prefix>: <msg>".  If msg is NULL,
** pull the error from the context's db handle.
*/
static void generateError(sqlite3_context *pContext,
                          const char *prefix, const char *msg){
................................................................................
){
  if( strcmp(zName,"snippet")==0 ){
    *pxFunc = snippetFunc;
    return 1;
  }else if( strcmp(zName,"offsets")==0 ){
    *pxFunc = snippetOffsetsFunc;
    return 1;



#ifdef SQLITE_TEST
    /* NOTE(shess): These functions are present only for testing
    ** purposes.  No particular effort is made to optimize their
    ** execution or how they build their results.
    */
  }else if( strcmp(zName,"dump_terms")==0 ){
    /* fprintf(stderr, "Found dump_terms\n"); */
................................................................................
  ** the two scalar functions. If this is successful, register the
  ** module with sqlite.
  */
  if( SQLITE_OK==rc 
   && SQLITE_OK==(rc = sqlite3Fts3InitHashTable(db, pHash, "fts3_tokenizer"))
   && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1))
   && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", -1))

#ifdef SQLITE_TEST
   && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_terms", -1))
   && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_doclist", -1))
#endif
  ){
    return sqlite3_create_module_v2(
        db, "fts3", &fts3Module, (void *)pHash, hashDestroy







>







 







>







 







|
>






|
>
>
|







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







>
>
>
>
>
>







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







>
>
>







 







>







1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
....
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
....
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
....
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
....
5372
5373
5374
5375
5376
5377
5378
5379
5380
5381
5382
5383
5384
5385
5386
5387
5388
5389
5390
5391
....
6343
6344
6345
6346
6347
6348
6349
6350
6351
6352
6353
6354
6355
6356
6357
6358
6359
6360
6361
6362
6363
6364
6365
6366
6367
6368
6369
6370
6371
6372
6373
6374
6375
6376
6377
6378
6379
6380
6381
6382
6383
6384
6385
6386
6387
6388
6389
6390
6391
6392
6393
6394
6395
6396
6397
6398
6399
6400
6401
6402
6403
6404
6405
6406
6407
6408
6409
6410
6411
6412
6413
6414
6415
6416
6417
6418
6419
6420
6421
6422
6423
6424
6425
6426
6427
6428
6429
6430
6431
6432
6433
6434
6435
6436
6437
6438
6439
6440
6441
6442
6443
6444
6445
6446
6447
6448
6449
6450
6451
6452
6453
6454
6455
6456
6457
6458
6459
6460
6461
6462
6463
6464
6465
6466
6467
6468
6469
6470
6471
6472
6473
6474
6475
6476
6477
6478
6479
6480
6481
6482
6483
6484
6485
6486
6487
6488
6489
6490
6491
6492
6493
6494
6495
6496
6497
6498
6499
6500
6501
6502
6503
6504
6505
6506
6507
6508
6509
6510
6511
6512
6513
6514
6515
6516
6517
6518
6519
6520
6521
6522
6523
6524
6525
6526
6527
6528
6529
6530
6531
6532
6533
6534
6535
6536
6537
6538
6539
6540
6541
6542
6543
6544
6545
6546
6547
6548
6549
6550
6551
6552
6553
6554
6555
6556
6557
6558
6559
6560
6561
6562
6563
6564
6565
6566
6567
6568
6569
6570
6571
6572
6573
6574
6575
6576
6577
6578
6579
6580
6581
6582
6583
6584
6585
6586
6587
6588
6589
6590
6591
6592
6593
6594
6595
6596
6597
6598
6599
6600
6601
6602
6603
6604
6605
6606
6607
6608
6609
6610
6611
6612
6613
6614
6615
6616
6617
6618
6619
6620
6621
6622
6623
6624
6625
6626
6627
6628
6629
6630
6631
6632
6633
6634
6635
....
7020
7021
7022
7023
7024
7025
7026
7027
7028
7029
7030
7031
7032
7033
7034
7035
7036
....
7156
7157
7158
7159
7160
7161
7162
7163
7164
7165
7166
7167
7168
7169
7170
  SEGDIR_SET_STMT,
  SEGDIR_SELECT_LEVEL_STMT,
  SEGDIR_SPAN_STMT,
  SEGDIR_DELETE_STMT,
  SEGDIR_SELECT_SEGMENT_STMT,
  SEGDIR_SELECT_ALL_STMT,
  SEGDIR_DELETE_ALL_STMT,
  SEGDIR_COUNT_STMT,

  MAX_STMT                     /* Always at end! */
} fulltext_statement;

/* These must exactly match the enum above. */
/* TODO(shess): Is there some risk that a statement will be used in two
** cursors at once, e.g.  if a query joins a virtual table to itself?
................................................................................
  /* SEGDIR_SELECT_SEGMENT */
  "select start_block, leaves_end_block, root from %_segdir "
  " where level = ? and idx = ?",
  /* SEGDIR_SELECT_ALL */
  "select start_block, leaves_end_block, root from %_segdir "
  " order by level desc, idx asc",
  /* SEGDIR_DELETE_ALL */ "delete from %_segdir",
  /* SEGDIR_COUNT */ "select count(*), ifnull(max(level),0) from %_segdir",
};

/*
** A connection to a fulltext index is an instance of the following
** structure.  The xCreate and xConnect methods create an instance
** of this structure and xDestroy and xDisconnect free that instance.
** All other methods receive a pointer to the structure as one of their
................................................................................
*/
static int sql_single_step(sqlite3_stmt *s){
  int rc = sqlite3_step(s);
  return (rc==SQLITE_DONE) ? SQLITE_OK : rc;
}

/* Like sql_get_statement(), but for special replicated LEAF_SELECT
** statements.  idx -1 is a special case for an uncached version of
** the statement (used in the optimize implementation).
*/
/* TODO(shess) Write version for generic statements and then share
** that between the cached-statement functions.
*/
static int sql_get_leaf_statement(fulltext_vtab *v, int idx,
                                  sqlite3_stmt **ppStmt){
  assert( idx>=-1 && idx<MERGE_COUNT );
  if( idx==-1 ){
    return sql_prepare(v->db, v->zDb, v->zName, ppStmt, LEAF_SELECT);
  }else if( v->pLeafSelectStmts[idx]==NULL ){
    int rc = sql_prepare(v->db, v->zDb, v->zName, &v->pLeafSelectStmts[idx],
                         LEAF_SELECT);
    if( rc!=SQLITE_OK ) return rc;
  }else{
    int rc = sqlite3_reset(v->pLeafSelectStmts[idx]);
    if( rc!=SQLITE_OK ) return rc;
  }
................................................................................
  if( rc!=SQLITE_OK ) return rc;

  rc = sql_get_statement(v, BLOCK_DELETE_ALL_STMT, &s);
  if( rc!=SQLITE_OK ) return rc;

  return sql_single_step(s);
}

/* Returns SQLITE_OK with *pnSegments set to the number of entries in
** %_segdir and *piMaxLevel set to the highest level which has a
** segment.  Otherwise returns the SQLite error which caused failure.
*/
static int segdir_count(fulltext_vtab *v, int *pnSegments, int *piMaxLevel){
  sqlite3_stmt *s;
  int rc = sql_get_statement(v, SEGDIR_COUNT_STMT, &s);
  if( rc!=SQLITE_OK ) return rc;

  rc = sqlite3_step(s);
  /* TODO(shess): This case should not be possible?  Should stronger
  ** measures be taken if it happens?
  */
  if( rc==SQLITE_DONE ){
    *pnSegments = 0;
    *piMaxLevel = 0;
    return SQLITE_OK;
  }
  if( rc!=SQLITE_ROW ) return rc;

  *pnSegments = sqlite3_column_int(s, 0);
  *piMaxLevel = sqlite3_column_int(s, 1);

  /* We expect only one row.  We must execute another sqlite3_step()
   * to complete the iteration; otherwise the table will remain locked. */
  rc = sqlite3_step(s);
  if( rc==SQLITE_DONE ) return SQLITE_OK;
  if( rc==SQLITE_ROW ) return SQLITE_ERROR;
  return rc;
}

/* TODO(shess) clearPendingTerms() is far down the file because
** writeZeroSegment() is far down the file because LeafWriter is far
** down the file.  Consider refactoring the code to move the non-vtab
** code above the vtab code so that we don't need this forward
** reference.
*/
................................................................................
** this case.  Probably a brittle assumption.
*/
static int leavesReaderReset(LeavesReader *pReader){
  return sqlite3_reset(pReader->pStmt);
}

static void leavesReaderDestroy(LeavesReader *pReader){
  /* If idx is -1, that means we're using a non-cached statement
  ** handle in the optimize() case, so we need to release it.
  */
  if( pReader->pStmt!=NULL && pReader->idx==-1 ){
    sqlite3_finalize(pReader->pStmt);
  }
  leafReaderDestroy(&pReader->leafReader);
  dataBufferDestroy(&pReader->rootData);
  SCRAMBLE(pReader);
}

/* Initialize pReader with the given root data (if iStartBlockid==0
** the leaf data was entirely contained in the root), or from the
................................................................................
    snippetAllOffsets(pCursor);
    snippetOffsetText(&pCursor->snippet);
    sqlite3_result_text(pContext,
                        pCursor->snippet.zOffset, pCursor->snippet.nOffset,
                        SQLITE_STATIC);
  }
}

/* OptLeavesReader is nearly identical to LeavesReader, except that
** where LeavesReader is geared towards the merging of complete
** segment levels (with exactly MERGE_COUNT segments), OptLeavesReader
** is geared towards implementation of the optimize() function, and
** can merge all segments simultaneously.  This version may be
** somewhat less efficient than LeavesReader because it merges into an
** accumulator rather than doing an N-way merge, but since segment
** size grows exponentially (so segment count logrithmically) this is
** probably not an immediate problem.
*/
/* TODO(shess): Prove that assertion, or extend the merge code to
** merge tree fashion (like the prefix-searching code does).
*/
/* TODO(shess): OptLeavesReader and LeavesReader could probably be
** merged with little or no loss of performance for LeavesReader.  The
** merged code would need to handle >MERGE_COUNT segments, and would
** also need to be able to optionally optimize away deletes.
*/
typedef struct OptLeavesReader {
  /* Segment number, to order readers by age. */
  int segment;
  LeavesReader reader;
} OptLeavesReader;

static int optLeavesReaderAtEnd(OptLeavesReader *pReader){
  return leavesReaderAtEnd(&pReader->reader);
}
static int optLeavesReaderTermBytes(OptLeavesReader *pReader){
  return leavesReaderTermBytes(&pReader->reader);
}
static const char *optLeavesReaderData(OptLeavesReader *pReader){
  return leavesReaderData(&pReader->reader);
}
static int optLeavesReaderDataBytes(OptLeavesReader *pReader){
  return leavesReaderDataBytes(&pReader->reader);
}
static const char *optLeavesReaderTerm(OptLeavesReader *pReader){
  return leavesReaderTerm(&pReader->reader);
}
static int optLeavesReaderStep(fulltext_vtab *v, OptLeavesReader *pReader){
  return leavesReaderStep(v, &pReader->reader);
}
static int optLeavesReaderTermCmp(OptLeavesReader *lr1, OptLeavesReader *lr2){
  return leavesReaderTermCmp(&lr1->reader, &lr2->reader);
}
/* Order by term ascending, segment ascending (oldest to newest), with
** exhausted readers to the end.
*/
static int optLeavesReaderCmp(OptLeavesReader *lr1, OptLeavesReader *lr2){
  int c = optLeavesReaderTermCmp(lr1, lr2);
  if( c!=0 ) return c;
  return lr1->segment-lr2->segment;
}
/* Bubble pLr[0] to appropriate place in pLr[1..nLr-1].  Assumes that
** pLr[1..nLr-1] is already sorted.
*/
static void optLeavesReaderReorder(OptLeavesReader *pLr, int nLr){
  while( nLr>1 && optLeavesReaderCmp(pLr, pLr+1)>0 ){
    OptLeavesReader tmp = pLr[0];
    pLr[0] = pLr[1];
    pLr[1] = tmp;
    nLr--;
    pLr++;
  }
}

/* optimize() helper function.  Put the readers in order and iterate
** through them, merging doclists for matching terms into pWriter.
** Returns SQLITE_OK on success, or the SQLite error code which
** prevented success.
*/
static int optimizeInternal(fulltext_vtab *v,
                            OptLeavesReader *readers, int nReaders,
                            LeafWriter *pWriter){
  int i, rc = SQLITE_OK;
  DataBuffer doclist, merged, tmp;

  /* Order the readers. */
  i = nReaders;
  while( i-- > 0 ){
    optLeavesReaderReorder(&readers[i], nReaders-i);
  }

  dataBufferInit(&doclist, LEAF_MAX);
  dataBufferInit(&merged, LEAF_MAX);

  /* Exhausted readers bubble to the end, so when the first reader is
  ** at eof, all are at eof.
  */
  while( !optLeavesReaderAtEnd(&readers[0]) ){

    /* Figure out how many readers share the next term. */
    for(i=1; i<nReaders && !optLeavesReaderAtEnd(&readers[i]); i++){
      if( 0!=optLeavesReaderTermCmp(&readers[0], &readers[i]) ) break;
    }

    /* Special-case for no merge. */
    if( i==1 ){
      /* Trim deletions from the doclist. */
      dataBufferReset(&merged);
      docListTrim(DL_DEFAULT,
                  optLeavesReaderData(&readers[0]),
                  optLeavesReaderDataBytes(&readers[0]),
                  -1, DL_DEFAULT, &merged);
    }else{
      DLReader dlReaders[MERGE_COUNT];
      int iReader, nReaders;

      /* Prime the pipeline with the first reader's doclist.  After
      ** one pass index 0 will reference the accumulated doclist.
      */
      dlrInit(&dlReaders[0], DL_DEFAULT,
              optLeavesReaderData(&readers[0]),
              optLeavesReaderDataBytes(&readers[0]));
      iReader = 1;

      assert( iReader<i );  /* Must execute the loop at least once. */
      while( iReader<i ){
        /* Merge 16 inputs per pass. */
        for( nReaders=1; iReader<i && nReaders<MERGE_COUNT;
             iReader++, nReaders++ ){
          dlrInit(&dlReaders[nReaders], DL_DEFAULT,
                  optLeavesReaderData(&readers[iReader]),
                  optLeavesReaderDataBytes(&readers[iReader]));
        }

        /* Merge doclists and swap result into accumulator. */
        dataBufferReset(&merged);
        docListMerge(&merged, dlReaders, nReaders);
        tmp = merged;
        merged = doclist;
        doclist = tmp;

        while( nReaders-- > 0 ){
          dlrDestroy(&dlReaders[nReaders]);
        }

        /* Accumulated doclist to reader 0 for next pass. */
        dlrInit(&dlReaders[0], DL_DEFAULT, doclist.pData, doclist.nData);
      }

      /* Destroy reader that was left in the pipeline. */
      dlrDestroy(&dlReaders[0]);

      /* Trim deletions from the doclist. */
      dataBufferReset(&merged);
      docListTrim(DL_DEFAULT, doclist.pData, doclist.nData,
                  -1, DL_DEFAULT, &merged);
    }

    /* Only pass doclists with hits (skip if all hits deleted). */
    if( merged.nData>0 ){
      rc = leafWriterStep(v, pWriter,
                          optLeavesReaderTerm(&readers[0]),
                          optLeavesReaderTermBytes(&readers[0]),
                          merged.pData, merged.nData);
      if( rc!=SQLITE_OK ) goto err;
    }

    /* Step merged readers to next term and reorder. */
    while( i-- > 0 ){
      rc = optLeavesReaderStep(v, &readers[i]);
      if( rc!=SQLITE_OK ) goto err;

      optLeavesReaderReorder(&readers[i], nReaders-i);
    }
  }

 err:
  dataBufferDestroy(&doclist);
  dataBufferDestroy(&merged);
  return rc;
}

/* Implement optimize() function for FTS3.  optimize(t) merges all
** segments in the fts index into a single segment.  't' is the magic
** table-named column.
*/
static void optimizeFunc(sqlite3_context *pContext,
                         int argc, sqlite3_value **argv){
  fulltext_cursor *pCursor;
  if( argc>1 ){
    sqlite3_result_error(pContext, "excess arguments to optimize()",-1);
  }else if( sqlite3_value_type(argv[0])!=SQLITE_BLOB ||
            sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){
    sqlite3_result_error(pContext, "illegal first argument to optimize",-1);
  }else{
    fulltext_vtab *v;
    int i, rc, iMaxLevel;
    OptLeavesReader *readers;
    int nReaders;
    LeafWriter writer;
    sqlite3_stmt *s;

    memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor));
    v = cursor_vtab(pCursor);

    /* Flush any buffered updates before optimizing. */
    rc = flushPendingTerms(v);
    if( rc!=SQLITE_OK ) goto err;

    rc = segdir_count(v, &nReaders, &iMaxLevel);
    if( rc!=SQLITE_OK ) goto err;
    if( nReaders==0 || nReaders==1 ){
      sqlite3_result_text(pContext, "Index already optimal", -1,
                          SQLITE_STATIC);
      return;
    }

    rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s);
    if( rc!=SQLITE_OK ) goto err;

    readers = sqlite3_malloc(nReaders*sizeof(readers[0]));
    if( readers==NULL ) goto err;

    /* Note that there will already be a segment at this position
    ** until we call segdir_delete() on iMaxLevel.
    */
    leafWriterInit(iMaxLevel, 0, &writer);

    i = 0;
    while( (rc = sqlite3_step(s))==SQLITE_ROW ){
      sqlite_int64 iStart = sqlite3_column_int64(s, 0);
      sqlite_int64 iEnd = sqlite3_column_int64(s, 1);
      const char *pRootData = sqlite3_column_blob(s, 2);
      int nRootData = sqlite3_column_bytes(s, 2);

      assert( i<nReaders );
      rc = leavesReaderInit(v, -1, iStart, iEnd, pRootData, nRootData,
                            &readers[i].reader);
      if( rc!=SQLITE_OK ) break;

      readers[i].segment = i;
      i++;
    }

    /* If we managed to succesfully read them all, optimize them. */
    if( rc==SQLITE_DONE ){
      assert( i==nReaders );
      rc = optimizeInternal(v, readers, nReaders, &writer);
    }

    while( i-- > 0 ){
      leavesReaderDestroy(&readers[i].reader);
    }
    sqlite3_free(readers);

    /* If we've successfully gotten to here, delete the old segments
    ** and flush the interior structure of the new segment.
    */
    if( rc==SQLITE_OK ){
      for( i=0; i<=iMaxLevel; i++ ){
        rc = segdir_delete(v, i);
        if( rc!=SQLITE_OK ) break;
      }

      if( rc==SQLITE_OK ) rc = leafWriterFinalize(v, &writer);
    }

    leafWriterDestroy(&writer);

    if( rc!=SQLITE_OK ) goto err;

    sqlite3_result_text(pContext, "Index optimized", -1, SQLITE_STATIC);
    return;

    /* TODO(shess): Error-handling needs to be improved along the
    ** lines of the dump_ functions.
    */
 err:
    {
      char buf[512];
      sqlite3_snprintf(sizeof(buf), buf, "Error in optimize: %s",
                       sqlite3_errmsg(sqlite3_context_db_handle(pContext)));
      sqlite3_result_error(pContext, buf, -1);
    }
  }
}

#ifdef SQLITE_TEST
/* Generate an error of the form "<prefix>: <msg>".  If msg is NULL,
** pull the error from the context's db handle.
*/
static void generateError(sqlite3_context *pContext,
                          const char *prefix, const char *msg){
................................................................................
){
  if( strcmp(zName,"snippet")==0 ){
    *pxFunc = snippetFunc;
    return 1;
  }else if( strcmp(zName,"offsets")==0 ){
    *pxFunc = snippetOffsetsFunc;
    return 1;
  }else if( strcmp(zName,"optimize")==0 ){
    *pxFunc = optimizeFunc;
    return 1;
#ifdef SQLITE_TEST
    /* NOTE(shess): These functions are present only for testing
    ** purposes.  No particular effort is made to optimize their
    ** execution or how they build their results.
    */
  }else if( strcmp(zName,"dump_terms")==0 ){
    /* fprintf(stderr, "Found dump_terms\n"); */
................................................................................
  ** the two scalar functions. If this is successful, register the
  ** module with sqlite.
  */
  if( SQLITE_OK==rc 
   && SQLITE_OK==(rc = sqlite3Fts3InitHashTable(db, pHash, "fts3_tokenizer"))
   && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1))
   && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", -1))
   && SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", -1))
#ifdef SQLITE_TEST
   && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_terms", -1))
   && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_doclist", -1))
#endif
  ){
    return sqlite3_create_module_v2(
        db, "fts3", &fts3Module, (void *)pHash, hashDestroy

Changes to test/fts3d.test.

7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
...
119
120
121
122
123
124
125
126















127











































































































































































































128
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library.  The focus
# of this script is testing the FTS3 module's optimize() function.
#
# $Id: fts3d.test,v 1.1 2008/07/14 20:43:15 shess Exp $
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl

# If SQLITE_ENABLE_FTS3 is not defined, omit this file.
ifcapable !fts3 {
................................................................................

check_terms   fts3d-1.2   0 0 {a is test this}
check_doclist fts3d-1.2.1 0 0 a {[1 0[2]]}
check_doclist fts3d-1.2.2 0 0 is {[1 0[1]]}
check_doclist fts3d-1.2.3 0 0 test {[1 0[3]]}
check_doclist fts3d-1.2.4 0 0 this {[1 0[0]]}

# TODO(shess): optimize() tests here.



























































































































































































































finish_test







|







 







|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
...
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library.  The focus
# of this script is testing the FTS3 module's optimize() function.
#
# $Id: fts3d.test,v 1.2 2008/07/15 21:32:07 shess Exp $
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl

# If SQLITE_ENABLE_FTS3 is not defined, omit this file.
ifcapable !fts3 {
................................................................................

check_terms   fts3d-1.2   0 0 {a is test this}
check_doclist fts3d-1.2.1 0 0 a {[1 0[2]]}
check_doclist fts3d-1.2.2 0 0 is {[1 0[1]]}
check_doclist fts3d-1.2.3 0 0 test {[1 0[3]]}
check_doclist fts3d-1.2.4 0 0 this {[1 0[0]]}

#*************************************************************************
# Test results when everything is optimized manually.
# NOTE(shess): This is a copy of fts3c-1.3.  I've pulled a copy here
# because fts3d-2 and fts3d-3 should have identical results.
db eval {
  DROP TABLE IF EXISTS t1;
  CREATE VIRTUAL TABLE t1 USING fts3(c);
  INSERT INTO t1 (docid, c) VALUES (1, 'This is a test');
  INSERT INTO t1 (docid, c) VALUES (2, 'That was a test');
  INSERT INTO t1 (docid, c) VALUES (3, 'This is a test');
  DELETE FROM t1 WHERE docid IN (1,3);
  DROP TABLE IF EXISTS t1old;
  ALTER TABLE t1 RENAME TO t1old;
  CREATE VIRTUAL TABLE t1 USING fts3(c);
  INSERT INTO t1 (docid, c) SELECT docid, c FROM t1old;
  DROP TABLE t1old;
}

# Should be a single optimal segment with the same logical results.
do_test fts3d-2.segments {
  execsql {
    SELECT level, idx FROM t1_segdir ORDER BY level, idx;
  }
} {0 0}
do_test fts3d-2.matches {
  execsql {
    SELECT OFFSETS(t1) FROM t1
     WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY docid;
  }
} {{0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4}}

check_terms_all fts3d-2.1 {a test that was}
check_doclist_all fts3d-2.1.1 a {[2 0[2]]}
check_doclist_all fts3d-2.1.2 test {[2 0[3]]}
check_doclist_all fts3d-2.1.3 that {[2 0[0]]}
check_doclist_all fts3d-2.1.4 was {[2 0[1]]}

check_terms fts3d-2.2 0 0 {a test that was}
check_doclist fts3d-2.2.1 0 0 a {[2 0[2]]}
check_doclist fts3d-2.2.2 0 0 test {[2 0[3]]}
check_doclist fts3d-2.2.3 0 0 that {[2 0[0]]}
check_doclist fts3d-2.2.4 0 0 was {[2 0[1]]}

#*************************************************************************
# Test results when everything is optimized via optimize().
db eval {
  DROP TABLE IF EXISTS t1;
  CREATE VIRTUAL TABLE t1 USING fts3(c);
  INSERT INTO t1 (docid, c) VALUES (1, 'This is a test');
  INSERT INTO t1 (docid, c) VALUES (2, 'That was a test');
  INSERT INTO t1 (docid, c) VALUES (3, 'This is a test');
  DELETE FROM t1 WHERE docid IN (1,3);
  SELECT OPTIMIZE(t1) FROM t1 LIMIT 1;
}

# Should be a single optimal segment with the same logical results.
do_test fts3d-3.segments {
  execsql {
    SELECT level, idx FROM t1_segdir ORDER BY level, idx;
  }
} {0 0}
do_test fts3d-3.matches {
  execsql {
    SELECT OFFSETS(t1) FROM t1
     WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY docid;
  }
} {{0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4}}

check_terms_all fts3d-3.1 {a test that was}
check_doclist_all fts3d-3.1.1 a {[2 0[2]]}
check_doclist_all fts3d-3.1.2 test {[2 0[3]]}
check_doclist_all fts3d-3.1.3 that {[2 0[0]]}
check_doclist_all fts3d-3.1.4 was {[2 0[1]]}

check_terms fts3d-3.2 0 0 {a test that was}
check_doclist fts3d-3.2.1 0 0 a {[2 0[2]]}
check_doclist fts3d-3.2.2 0 0 test {[2 0[3]]}
check_doclist fts3d-3.2.3 0 0 that {[2 0[0]]}
check_doclist fts3d-3.2.4 0 0 was {[2 0[1]]}

#*************************************************************************
# Test optimize() against a table involving segment merges.
# NOTE(shess): Since there's no transaction, each of the INSERT/UPDATE
# statements generates a segment.
db eval {
  DROP TABLE IF EXISTS t1;
  CREATE VIRTUAL TABLE t1 USING fts3(c);

  INSERT INTO t1 (rowid, c) VALUES (1, 'This is a test');
  INSERT INTO t1 (rowid, c) VALUES (2, 'That was a test');
  INSERT INTO t1 (rowid, c) VALUES (3, 'This is a test');

  UPDATE t1 SET c = 'This is a test one' WHERE rowid = 1;
  UPDATE t1 SET c = 'That was a test one' WHERE rowid = 2;
  UPDATE t1 SET c = 'This is a test one' WHERE rowid = 3;

  UPDATE t1 SET c = 'This is a test two' WHERE rowid = 1;
  UPDATE t1 SET c = 'That was a test two' WHERE rowid = 2;
  UPDATE t1 SET c = 'This is a test two' WHERE rowid = 3;

  UPDATE t1 SET c = 'This is a test three' WHERE rowid = 1;
  UPDATE t1 SET c = 'That was a test three' WHERE rowid = 2;
  UPDATE t1 SET c = 'This is a test three' WHERE rowid = 3;

  UPDATE t1 SET c = 'This is a test four' WHERE rowid = 1;
  UPDATE t1 SET c = 'That was a test four' WHERE rowid = 2;
  UPDATE t1 SET c = 'This is a test four' WHERE rowid = 3;

  UPDATE t1 SET c = 'This is a test' WHERE rowid = 1;
  UPDATE t1 SET c = 'That was a test' WHERE rowid = 2;
  UPDATE t1 SET c = 'This is a test' WHERE rowid = 3;
}

# 2 segments in level 0, 1 in level 1 (18 segments created, 16
# merged).
do_test fts3d-4.segments {
  execsql {
    SELECT level, idx FROM t1_segdir ORDER BY level, idx;
  }
} {0 0 0 1 1 0}

do_test fts3d-4.matches {
  execsql {
    SELECT OFFSETS(t1) FROM t1
     WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY docid;
  }
} [list {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4} \
        {0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4} \
        {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4}]

check_terms_all fts3d-4.1      {a four is one test that this three two was}
check_doclist_all fts3d-4.1.1  a {[1 0[2]] [2 0[2]] [3 0[2]]}
check_doclist_all fts3d-4.1.2  four {}
check_doclist_all fts3d-4.1.3  is {[1 0[1]] [3 0[1]]}
check_doclist_all fts3d-4.1.4  one {}
check_doclist_all fts3d-4.1.5  test {[1 0[3]] [2 0[3]] [3 0[3]]}
check_doclist_all fts3d-4.1.6  that {[2 0[0]]}
check_doclist_all fts3d-4.1.7  this {[1 0[0]] [3 0[0]]}
check_doclist_all fts3d-4.1.8  three {}
check_doclist_all fts3d-4.1.9  two {}
check_doclist_all fts3d-4.1.10 was {[2 0[1]]}

check_terms fts3d-4.2     0 0 {a four test that was}
check_doclist fts3d-4.2.1 0 0 a {[2 0[2]]}
check_doclist fts3d-4.2.2 0 0 four {[2]}
check_doclist fts3d-4.2.3 0 0 test {[2 0[3]]}
check_doclist fts3d-4.2.4 0 0 that {[2 0[0]]}
check_doclist fts3d-4.2.5 0 0 was {[2 0[1]]}

check_terms fts3d-4.3     0 1 {a four is test this}
check_doclist fts3d-4.3.1 0 1 a {[3 0[2]]}
check_doclist fts3d-4.3.2 0 1 four {[3]}
check_doclist fts3d-4.3.3 0 1 is {[3 0[1]]}
check_doclist fts3d-4.3.4 0 1 test {[3 0[3]]}
check_doclist fts3d-4.3.5 0 1 this {[3 0[0]]}

check_terms fts3d-4.4      1 0 {a four is one test that this three two was}
check_doclist fts3d-4.4.1  1 0 a {[1 0[2]] [2 0[2]] [3 0[2]]}
check_doclist fts3d-4.4.2  1 0 four {[1] [2 0[4]] [3 0[4]]}
check_doclist fts3d-4.4.3  1 0 is {[1 0[1]] [3 0[1]]}
check_doclist fts3d-4.4.4  1 0 one {[1] [2] [3]}
check_doclist fts3d-4.4.5  1 0 test {[1 0[3]] [2 0[3]] [3 0[3]]}
check_doclist fts3d-4.4.6  1 0 that {[2 0[0]]}
check_doclist fts3d-4.4.7  1 0 this {[1 0[0]] [3 0[0]]}
check_doclist fts3d-4.4.8  1 0 three {[1] [2] [3]}
check_doclist fts3d-4.4.9  1 0 two {[1] [2] [3]}
check_doclist fts3d-4.4.10 1 0 was {[2 0[1]]}

# Optimize should leave the result in the level of the highest-level
# prior segment.
do_test fts3d-4.5 {
  execsql {
    SELECT OPTIMIZE(t1) FROM t1 LIMIT 1;
    SELECT level, idx FROM t1_segdir ORDER BY level, idx;
  }
} {{Index optimized} 1 0}

# Identical to fts3d-4.matches.
do_test fts3d-4.5.matches {
  execsql {
    SELECT OFFSETS(t1) FROM t1
     WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY docid;
  }
} [list {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4} \
        {0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4} \
        {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4}]

check_terms_all fts3d-4.5.1     {a is test that this was}
check_doclist_all fts3d-4.5.1.1 a {[1 0[2]] [2 0[2]] [3 0[2]]}
check_doclist_all fts3d-4.5.1.2 is {[1 0[1]] [3 0[1]]}
check_doclist_all fts3d-4.5.1.3 test {[1 0[3]] [2 0[3]] [3 0[3]]}
check_doclist_all fts3d-4.5.1.4 that {[2 0[0]]}
check_doclist_all fts3d-4.5.1.5 this {[1 0[0]] [3 0[0]]}
check_doclist_all fts3d-4.5.1.6 was {[2 0[1]]}

check_terms fts3d-4.5.2     1 0 {a is test that this was}
check_doclist fts3d-4.5.2.1 1 0 a {[1 0[2]] [2 0[2]] [3 0[2]]}
check_doclist fts3d-4.5.2.2 1 0 is {[1 0[1]] [3 0[1]]}
check_doclist fts3d-4.5.2.3 1 0 test {[1 0[3]] [2 0[3]] [3 0[3]]}
check_doclist fts3d-4.5.2.4 1 0 that {[2 0[0]]}
check_doclist fts3d-4.5.2.5 1 0 this {[1 0[0]] [3 0[0]]}
check_doclist fts3d-4.5.2.6 1 0 was {[2 0[1]]}

# Re-optimizing does nothing.
do_test fts3d-5.0 {
  execsql {
    SELECT OPTIMIZE(t1) FROM t1 LIMIT 1;
    SELECT level, idx FROM t1_segdir ORDER BY level, idx;
  }
} {{Index already optimal} 1 0}

# Even if we move things around, still does nothing.
do_test fts3d-5.1 {
  execsql {
    UPDATE t1_segdir SET level = 2 WHERE level = 1 AND idx = 0;
    SELECT OPTIMIZE(t1) FROM t1 LIMIT 1;
    SELECT level, idx FROM t1_segdir ORDER BY level, idx;
  }
} {{Index already optimal} 2 0}

finish_test