/ Check-in [0fc61f99]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:The FTS3 amalgamation can now be appended to the SQLite amalgamation to generate a single source file that contains both components. (CVS 4558)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 0fc61f99b54bd269fcc011f448b9b971e902cb01
User & Date: drh 2007-11-24 00:41:52
Context
2007-11-24
10:23
Declare the invalidateCursorsOnModifiedBtrees function to be static. Ticket #2792. (CVS 4559) check-in: 94f25fc1 user: drh tags: trunk
00:41
The FTS3 amalgamation can now be appended to the SQLite amalgamation to generate a single source file that contains both components. (CVS 4558) check-in: 0fc61f99 user: drh tags: trunk
2007-11-23
18:19
Fix a typo in a change to all.test from earlier today. (CVS 4557) check-in: 8c0b2157 user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3.c.

283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
...
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
...
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
...
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
...
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
...
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
...
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
...
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
...
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
...
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
...
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
...
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
...
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
....
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
....
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
....
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
....
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
....
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
....
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
....
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
....
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
....
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
....
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
....
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
....
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
....
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
....
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
....
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
....
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
....
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
....
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
....
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
....
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
....
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
....
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
....
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
....
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
....
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
....
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
....
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
....
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
....
5992
5993
5994
5995
5996
5997
5998
5999
6000
6001
6002
6003
6004
6005
6006
....
6046
6047
6048
6049
6050
6051
6052
6053
6054
6055
6056
6057
6058
6059
6060
6061
6062
6063
6064
6065
6066
6067
6068
6069
6070
6071
6072
6073
6074
6075
6076
6077
6078
6079
6080
6081
6082
6083
6084
6085
#include <stdio.h>
#include <string.h>
#include <ctype.h>

#include "fts3.h"
#include "fts3_hash.h"
#include "fts3_tokenizer.h"
#include "sqlite3.h"
#ifndef SQLITE_CORE 
  #include "sqlite3ext.h"
  SQLITE_EXTENSION_INIT1
#endif


/* TODO(shess) MAN, this thing needs some refactoring.  At minimum, it
................................................................................
**  - table query functions
**
** Put the query functions last because they're likely to reference
** typedefs or functions from the table update section.
*/

#if 0
# define TRACE(A)  printf A; fflush(stdout)
#else
# define TRACE(A)
#endif

/*
** Default span for NEAR operators.
*/
#define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10

................................................................................

/* We may need up to VARINT_MAX bytes to store an encoded 64-bit integer. */
#define VARINT_MAX 10

/* Write a 64-bit variable-length integer to memory starting at p[0].
 * The length of data written will be between 1 and VARINT_MAX bytes.
 * The number of bytes written is returned. */
static int putVarint(char *p, sqlite_int64 v){
  unsigned char *q = (unsigned char *) p;
  sqlite_uint64 vu = v;
  do{
    *q++ = (unsigned char) ((vu & 0x7f) | 0x80);
    vu >>= 7;
  }while( vu!=0 );
  q[-1] &= 0x7f;  /* turn off high bit in final byte */
................................................................................
  assert( q - (unsigned char *)p <= VARINT_MAX );
  return (int) (q - (unsigned char *)p);
}

/* Read a 64-bit variable-length integer from memory starting at p[0].
 * Return the number of bytes read, or 0 on error.
 * The value is stored in *v. */
static int getVarint(const char *p, sqlite_int64 *v){
  const unsigned char *q = (const unsigned char *) p;
  sqlite_uint64 x = 0, y = 1;
  while( (*q & 0x80) == 0x80 ){
    x += y * (*q++ & 0x7f);
    y <<= 7;
    if( q - (unsigned char *)p >= VARINT_MAX ){  /* bad data */
      assert( 0 );
................................................................................
    }
  }
  x += y * (*q++);
  *v = (sqlite_int64) x;
  return (int) (q - (unsigned char *)p);
}

static int getVarint32(const char *p, int *pi){
 sqlite_int64 i;
 int ret = getVarint(p, &i);
 *pi = (int) i;
 assert( *pi==i );
 return ret;
}

/*******************************************************************/
/* DataBuffer is used to collect data into a buffer in piecemeal
................................................................................
}
/* TODO(shess) Consider adding a field to track iDocid varint length
** to make these two functions faster.  This might matter (a tiny bit)
** for queries.
*/
static const char *dlrPosData(DLReader *pReader){
  sqlite_int64 iDummy;
  int n = getVarint(pReader->pData, &iDummy);
  assert( !dlrAtEnd(pReader) );
  return pReader->pData+n;
}
static int dlrPosDataLen(DLReader *pReader){
  sqlite_int64 iDummy;
  int n = getVarint(pReader->pData, &iDummy);
  assert( !dlrAtEnd(pReader) );
  return pReader->nElement-n;
}
static void dlrStep(DLReader *pReader){
  assert( !dlrAtEnd(pReader) );

  /* Skip past current doclist element. */
................................................................................
  assert( pReader->nElement<=pReader->nData );
  pReader->pData += pReader->nElement;
  pReader->nData -= pReader->nElement;

  /* If there is more data, read the next doclist element. */
  if( pReader->nData!=0 ){
    sqlite_int64 iDocidDelta;
    int iDummy, n = getVarint(pReader->pData, &iDocidDelta);
    pReader->iDocid += iDocidDelta;
    if( pReader->iType>=DL_POSITIONS ){
      assert( n<pReader->nData );
      while( 1 ){
        n += getVarint32(pReader->pData+n, &iDummy);
        assert( n<=pReader->nData );
        if( iDummy==POS_END ) break;
        if( iDummy==POS_COLUMN ){
          n += getVarint32(pReader->pData+n, &iDummy);
          assert( n<pReader->nData );
        }else if( pReader->iType==DL_POSITIONS_OFFSETS ){
          n += getVarint32(pReader->pData+n, &iDummy);
          n += getVarint32(pReader->pData+n, &iDummy);
          assert( n<pReader->nData );
        }
      }
    }
    pReader->nElement = n;
    assert( pReader->nElement<=pReader->nData );
  }
................................................................................
                            sqlite_int64 *pLastDocid){
  sqlite_int64 iPrevDocid = 0;
  assert( nData>0 );
  assert( pData!=0 );
  assert( pData+nData>pData );
  while( nData!=0 ){
    sqlite_int64 iDocidDelta;
    int n = getVarint(pData, &iDocidDelta);
    iPrevDocid += iDocidDelta;
    if( iType>DL_DOCIDS ){
      int iDummy;
      while( 1 ){
        n += getVarint32(pData+n, &iDummy);
        if( iDummy==POS_END ) break;
        if( iDummy==POS_COLUMN ){
          n += getVarint32(pData+n, &iDummy);
        }else if( iType>DL_POSITIONS ){
          n += getVarint32(pData+n, &iDummy);
          n += getVarint32(pData+n, &iDummy);
        }
        assert( n<=nData );
      }
    }
    assert( n<=nData );
    pData += n;
    nData -= n;
................................................................................
  char c[VARINT_MAX];
  int nFirstOld, nFirstNew;     /* Old and new varint len of first docid. */
#ifndef NDEBUG
  sqlite_int64 iLastDocidDelta;
#endif

  /* Recode the initial docid as delta from iPrevDocid. */
  nFirstOld = getVarint(pData, &iDocid);
  assert( nFirstOld<nData || (nFirstOld==nData && pWriter->iType==DL_DOCIDS) );
  nFirstNew = putVarint(c, iFirstDocid-pWriter->iPrevDocid);

  /* Verify that the incoming doclist is valid AND that it ends with
  ** the expected docid.  This is essential because we'll trust this
  ** docid in future delta-encoding.
  */
  ASSERT_VALID_DOCLIST(pWriter->iType, pData, nData, &iLastDocidDelta);
  assert( iLastDocid==iFirstDocid-iDocid+iLastDocidDelta );
................................................................................
}
static void dlwCopy(DLWriter *pWriter, DLReader *pReader){
  dlwAppend(pWriter, dlrDocData(pReader), dlrDocDataBytes(pReader),
            dlrDocid(pReader), dlrDocid(pReader));
}
static void dlwAdd(DLWriter *pWriter, sqlite_int64 iDocid){
  char c[VARINT_MAX];
  int n = putVarint(c, iDocid-pWriter->iPrevDocid);

  /* Docids must ascend. */
  assert( !pWriter->has_iPrevDocid || iDocid>pWriter->iPrevDocid );
  assert( pWriter->iType==DL_DOCIDS );

  dataBufferAppend(pWriter->b, c, n);
  pWriter->iPrevDocid = iDocid;
................................................................................
  assert( !plrAtEnd(pReader) );

  if( pReader->nData==0 ){
    pReader->pData = NULL;
    return;
  }

  n = getVarint32(pReader->pData, &i);
  if( i==POS_COLUMN ){
    n += getVarint32(pReader->pData+n, &pReader->iColumn);
    pReader->iPosition = 0;
    pReader->iStartOffset = 0;
    n += getVarint32(pReader->pData+n, &i);
  }
  /* Should never see adjacent column changes. */
  assert( i!=POS_COLUMN );

  if( i==POS_END ){
    pReader->nData = 0;
    pReader->pData = NULL;
    return;
  }

  pReader->iPosition += i-POS_BASE;
  if( pReader->iType==DL_POSITIONS_OFFSETS ){
    n += getVarint32(pReader->pData+n, &i);
    pReader->iStartOffset += i;
    n += getVarint32(pReader->pData+n, &i);
    pReader->iEndOffset = pReader->iStartOffset+i;
  }
  assert( n<=pReader->nData );
  pReader->pData += n;
  pReader->nData -= n;
}

................................................................................

  /* Ban plwAdd() after plwTerminate(). */
  assert( pWriter->iPos!=-1 );

  if( pWriter->dlw->iType==DL_DOCIDS ) return;

  if( iColumn!=pWriter->iColumn ){
    n += putVarint(c+n, POS_COLUMN);
    n += putVarint(c+n, iColumn);
    pWriter->iColumn = iColumn;
    pWriter->iPos = 0;
    pWriter->iOffset = 0;
  }
  assert( iPos>=pWriter->iPos );
  n += putVarint(c+n, POS_BASE+(iPos-pWriter->iPos));
  pWriter->iPos = iPos;
  if( pWriter->dlw->iType==DL_POSITIONS_OFFSETS ){
    assert( iStartOffset>=pWriter->iOffset );
    n += putVarint(c+n, iStartOffset-pWriter->iOffset);
    pWriter->iOffset = iStartOffset;
    assert( iEndOffset>=iStartOffset );
    n += putVarint(c+n, iEndOffset-iStartOffset);
  }
  dataBufferAppend(pWriter->dlw->b, c, n);
}
static void plwCopy(PLWriter *pWriter, PLReader *pReader){
  plwAdd(pWriter, plrColumn(pReader), plrPosition(pReader),
         plrStartOffset(pReader), plrEndOffset(pReader));
}
................................................................................
  char c[VARINT_MAX];
  int n;

  pWriter->dlw = dlw;

  /* Docids must ascend. */
  assert( !pWriter->dlw->has_iPrevDocid || iDocid>pWriter->dlw->iPrevDocid );
  n = putVarint(c, iDocid-pWriter->dlw->iPrevDocid);
  dataBufferAppend(pWriter->dlw->b, c, n);
  pWriter->dlw->iPrevDocid = iDocid;
#ifndef NDEBUG
  pWriter->dlw->has_iPrevDocid = 1;
#endif

  pWriter->iColumn = 0;
................................................................................
** terminator, so that the output is always correct.  But that would
** add incremental work to the common case with the only benefit being
** API elegance.  Punt for now.
*/
static void plwTerminate(PLWriter *pWriter){
  if( pWriter->dlw->iType>DL_DOCIDS ){
    char c[VARINT_MAX];
    int n = putVarint(c, POS_END);
    dataBufferAppend(pWriter->dlw->b, c, n);
  }
#ifndef NDEBUG
  /* Mark as terminated for assert in plwAdd(). */
  pWriter->iPos = -1;
#endif
}
................................................................................
** percentage of the plwTerminate() calls will cause a realloc), so
** this might be worth revisiting if the DataBuffer implementation
** changes.
*/
static void dlcAddDoclist(DLCollector *pCollector, DataBuffer *b){
  if( pCollector->dlw.iType>DL_DOCIDS ){
    char c[VARINT_MAX];
    int n = putVarint(c, POS_END);
    dataBufferAppend2(b, pCollector->b.pData, pCollector->b.nData, c, n);
  }else{
    dataBufferAppend(b, pCollector->b.pData, pCollector->b.nData);
  }
}
static void dlcNext(DLCollector *pCollector, sqlite_int64 iDocid){
  plwTerminate(&pCollector->plw);
................................................................................
  return result;
}

static int sql_exec(sqlite3 *db, const char *zDb, const char *zName,
                    const char *zFormat){
  char *zCommand = string_format(zFormat, zDb, zName);
  int rc;
  TRACE(("FTS3 sql: %s\n", zCommand));
  rc = sqlite3_exec(db, zCommand, NULL, 0, NULL);
  sqlite3_free(zCommand);
  return rc;
}

static int sql_prepare(sqlite3 *db, const char *zDb, const char *zName,
                       sqlite3_stmt **ppStmt, const char *zFormat){
  char *zCommand = string_format(zFormat, zDb, zName);
  int rc;
  TRACE(("FTS3 prepare: %s\n", zCommand));
  rc = sqlite3_prepare_v2(db, zCommand, -1, ppStmt, NULL);
  sqlite3_free(zCommand);
  return rc;
}

/* end utility functions */

................................................................................

/*
** Free the memory used to contain a fulltext_vtab structure.
*/
static void fulltext_vtab_destroy(fulltext_vtab *v){
  int iStmt, i;

  TRACE(("FTS3 Destroy %p\n", v));
  for( iStmt=0; iStmt<MAX_STMT; iStmt++ ){
    if( v->pFulltextStatements[iStmt]!=NULL ){
      sqlite3_finalize(v->pFulltextStatements[iStmt]);
      v->pFulltextStatements[iStmt] = NULL;
    }
  }

................................................................................
#define TOKEN_SPACE       1    /* Any kind of whitespace */
#define TOKEN_ID          2    /* An identifier */
#define TOKEN_STRING      3    /* A string literal */
#define TOKEN_PUNCT       4    /* A single punctuation character */

/*
** If X is a character that can be used in an identifier then
** IdChar(X) will be true.  Otherwise it is false.
**
** For ASCII, any character with the high-order bit set is
** allowed in an identifier.  For 7-bit characters, 
** sqlite3IsIdChar[X] must be 1.
**
** Ticket #1066.  the SQL standard does not allow '$' in the
** middle of identfiers.  But many SQL implementations do. 
** SQLite will allow '$' in identifiers for compatibility.
** But the feature is undocumented.
*/
static const char isIdChar[] = {
/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
    0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 2x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,  /* 3x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 4x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,  /* 5x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 6x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,  /* 7x */
};
#define IdChar(C)  (((c=C)&0x80)!=0 || (c>0x1f && isIdChar[c-0x20]))


/*
** Return the length of the token that begins at z[0]. 
** Store the token type in *tokenType before returning.
*/
static int getToken(const char *z, int *tokenType){
  int i, c;
  switch( *z ){
    case 0: {
      *tokenType = TOKEN_EOF;
      return 0;
    }
    case ' ': case '\t': case '\n': case '\f': case '\r': {
................................................................................
    }
    case '[': {
      for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){}
      *tokenType = TOKEN_ID;
      return i;
    }
    default: {
      if( !IdChar(*z) ){
        break;
      }
      for(i=1; IdChar(z[i]); i++){}
      *tokenType = TOKEN_ID;
      return i;
    }
  }
  *tokenType = TOKEN_PUNCT;
  return 1;
}

/*
** A token extracted from a string is an instance of the following
** structure.
*/
typedef struct Token {
  const char *z;       /* Pointer to token text.  Not '\000' terminated */
  short int n;         /* Length of the token text in bytes. */
} Token;

/*
** Given a input string (which is really one of the argv[] parameters
** passed into xConnect or xCreate) split the string up into tokens.
** Return an array of pointers to '\000' terminated strings, one string
** for each non-whitespace token.
**
................................................................................
** Space to hold the returned array is obtained from a single
** malloc and should be freed by passing the return value to free().
** The individual strings within the token list are all a part of
** the single memory allocation and will all be freed at once.
*/
static char **tokenizeString(const char *z, int *pnToken){
  int nToken = 0;
  Token *aToken = sqlite3_malloc( strlen(z) * sizeof(aToken[0]) );
  int n = 1;
  int e, i;
  int totalSize = 0;
  char **azToken;
  char *zCopy;
  while( n>0 ){
    n = getToken(z, &e);
    if( e!=TOKEN_SPACE ){
      aToken[nToken].z = z;
      aToken[nToken].n = n;
      nToken++;
      totalSize += n+1;
    }
    z += n;
................................................................................
** Find the first alphanumeric token in the string zIn.  Null-terminate
** this token.  Remove any quotation marks.  And return a pointer to
** the result.
*/
static char *firstToken(char *zIn, char **pzTail){
  int n, ttype;
  while(1){
    n = getToken(zIn, &ttype);
    if( ttype==TOKEN_SPACE ){
      zIn += n;
    }else if( ttype==TOKEN_EOF ){
      *pzTail = zIn;
      return 0;
    }else{
      zIn[n] = 0;
................................................................................

  memset(v->pFulltextStatements, 0, sizeof(v->pFulltextStatements));

  /* Indicate that the buffer is not live. */
  v->nPendingData = -1;

  *ppVTab = &v->base;
  TRACE(("FTS3 Connect %p\n", v));

  return rc;

err:
  fulltext_vtab_destroy(v);
  return rc;
}
................................................................................
*/
static int fulltextCreate(sqlite3 *db, void *pAux,
                          int argc, const char * const *argv,
                          sqlite3_vtab **ppVTab, char **pzErr){
  int rc;
  TableSpec spec;
  StringBuffer schema;
  TRACE(("FTS3 Create\n"));

  rc = parseSpec(&spec, argc, argv, pzErr);
  if( rc!=SQLITE_OK ) return rc;

  initStringBuffer(&schema);
  append(&schema, "CREATE TABLE %_content(");
  append(&schema, "  docid INTEGER PRIMARY KEY,");
................................................................................
  return rc;
}

/* Decide how to handle an SQL query. */
static int fulltextBestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
  fulltext_vtab *v = (fulltext_vtab *)pVTab;
  int i;
  TRACE(("FTS3 BestIndex\n"));

  for(i=0; i<pInfo->nConstraint; ++i){
    const struct sqlite3_index_constraint *pConstraint;
    pConstraint = &pInfo->aConstraint[i];
    if( pConstraint->usable ) {
      if( (pConstraint->iColumn==-1 || pConstraint->iColumn==v->nColumn+1) &&
          pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ ){
        pInfo->idxNum = QUERY_DOCID;      /* lookup by docid */
        TRACE(("FTS3 QUERY_DOCID\n"));
      } else if( pConstraint->iColumn>=0 && pConstraint->iColumn<=v->nColumn &&
                 pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH ){
        /* full-text search */
        pInfo->idxNum = QUERY_FULLTEXT + pConstraint->iColumn;
        TRACE(("FTS3 QUERY_FULLTEXT %d\n", pConstraint->iColumn));
      } else continue;

      pInfo->aConstraintUsage[i].argvIndex = 1;
      pInfo->aConstraintUsage[i].omit = 1;

      /* An arbitrary value for now.
       * TODO: Perhaps docid matches should be considered cheaper than
................................................................................
    }
  }
  pInfo->idxNum = QUERY_GENERIC;
  return SQLITE_OK;
}

static int fulltextDisconnect(sqlite3_vtab *pVTab){
  TRACE(("FTS3 Disconnect %p\n", pVTab));
  fulltext_vtab_destroy((fulltext_vtab *)pVTab);
  return SQLITE_OK;
}

static int fulltextDestroy(sqlite3_vtab *pVTab){
  fulltext_vtab *v = (fulltext_vtab *)pVTab;
  int rc;

  TRACE(("FTS3 Destroy %p\n", pVTab));
  rc = sql_exec(v->db, v->zDb, v->zName,
                "drop table if exists %_content;"
                "drop table if exists %_segments;"
                "drop table if exists %_segdir;"
                );
  if( rc!=SQLITE_OK ) return rc;

................................................................................
  fulltext_cursor *c;

  c = (fulltext_cursor *) sqlite3_malloc(sizeof(fulltext_cursor));
  if( c ){
    memset(c, 0, sizeof(fulltext_cursor));
    /* sqlite will initialize c->base */
    *ppCursor = &c->base;
    TRACE(("FTS3 Open %p: %p\n", pVTab, c));
    return SQLITE_OK;
  }else{
    return SQLITE_NOMEM;
  }
}


................................................................................

/*
** Close the cursor.  For additional information see the documentation
** on the xClose method of the virtual table interface.
*/
static int fulltextClose(sqlite3_vtab_cursor *pCursor){
  fulltext_cursor *c = (fulltext_cursor *) pCursor;
  TRACE(("FTS3 Close %p\n", c));
  sqlite3_finalize(c->pStmt);
  queryClear(&c->q);
  snippetClear(&c->snippet);
  if( c->result.nData!=0 ) dlrDestroy(&c->reader);
  dataBufferDestroy(&c->result);
  sqlite3_free(c);
  return SQLITE_OK;
}

static int fulltextNext(sqlite3_vtab_cursor *pCursor){
  fulltext_cursor *c = (fulltext_cursor *) pCursor;
  int rc;

  TRACE(("FTS3 Next %p\n", pCursor));
  snippetClear(&c->snippet);
  if( c->iCursorType < QUERY_FULLTEXT ){
    /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
    rc = sqlite3_step(c->pStmt);
    switch( rc ){
      case SQLITE_ROW:
        c->eof = 0;
................................................................................
  int argc, sqlite3_value **argv    /* Arguments for the indexing scheme */
){
  fulltext_cursor *c = (fulltext_cursor *) pCursor;
  fulltext_vtab *v = cursor_vtab(c);
  int rc;
  StringBuffer sb;

  TRACE(("FTS3 Filter %p\n",pCursor));

  initStringBuffer(&sb);
  append(&sb, "SELECT docid, ");
  appendList(&sb, v->nColumn, v->azContentColumn);
  append(&sb, " FROM %_content");
  if( idxNum!=QUERY_GENERIC ) append(&sb, " WHERE docid = ?");
  sqlite3_finalize(c->pStmt);
................................................................................
  int n;

  if( block ){
    memset(block, 0, sizeof(*block));
    dataBufferInit(&block->term, 0);
    dataBufferReplace(&block->term, pTerm, nTerm);

    n = putVarint(c, iHeight);
    n += putVarint(c+n, iChildBlock);
    dataBufferInit(&block->data, INTERIOR_MAX);
    dataBufferReplace(&block->data, c, n);
  }
  return block;
}

#ifndef NDEBUG
................................................................................
  sqlite_int64 iBlockid;

  assert( nData>0 );
  assert( pData!=0 );
  assert( pData+nData>pData );

  /* Must lead with height of node as a varint(n), n>0 */
  n = getVarint32(pData, &iDummy);
  assert( n>0 );
  assert( iDummy>0 );
  assert( n<nData );
  pData += n;
  nData -= n;

  /* Must contain iBlockid. */
  n = getVarint(pData, &iBlockid);
  assert( n>0 );
  assert( n<=nData );
  pData += n;
  nData -= n;

  /* Zero or more terms of positive length */
  if( nData!=0 ){
    /* First term is not delta-encoded. */
    n = getVarint32(pData, &iDummy);
    assert( n>0 );
    assert( iDummy>0 );
    assert( n+iDummy>0);
    assert( n+iDummy<=nData );
    pData += n+iDummy;
    nData -= n+iDummy;

    /* Following terms delta-encoded. */
    while( nData!=0 ){
      /* Length of shared prefix. */
      n = getVarint32(pData, &iDummy);
      assert( n>0 );
      assert( iDummy>=0 );
      assert( n<nData );
      pData += n;
      nData -= n;

      /* Length and data of distinct suffix. */
      n = getVarint32(pData, &iDummy);
      assert( n>0 );
      assert( iDummy>0 );
      assert( n+iDummy>0);
      assert( n+iDummy<=nData );
      pData += n+iDummy;
      nData -= n+iDummy;
    }
................................................................................
  /* The first term written into an interior node is actually
  ** associated with the second child added (the first child was added
  ** in interiorWriterInit, or in the if clause at the bottom of this
  ** function).  That term gets encoded straight up, with nPrefix left
  ** at 0.
  */
  if( pWriter->term.nData==0 ){
    n = putVarint(c, nTerm);
  }else{
    while( nPrefix<pWriter->term.nData &&
           pTerm[nPrefix]==pWriter->term.pData[nPrefix] ){
      nPrefix++;
    }

    n = putVarint(c, nPrefix);
    n += putVarint(c+n, nTerm-nPrefix);
  }

#ifndef NDEBUG
  pWriter->iLastChildBlock++;
#endif
  assert( pWriter->iLastChildBlock==iChildBlock );

................................................................................
  /* Require at least the leading flag byte */
  assert( nData>0 );
  assert( pData[0]!='\0' );

  CLEAR(pReader);

  /* Decode the base blockid, and set the cursor to the first term. */
  n = getVarint(pData+1, &pReader->iBlockid);
  assert( 1+n<=nData );
  pReader->pData = pData+1+n;
  pReader->nData = nData-(1+n);

  /* A single-child interior node (such as when a leaf node was too
  ** large for the segment directory) won't have any terms.
  ** Otherwise, decode the first term.
  */
  if( pReader->nData==0 ){
    dataBufferInit(&pReader->term, 0);
  }else{
    n = getVarint32(pReader->pData, &nTerm);
    dataBufferInit(&pReader->term, nTerm);
    dataBufferReplace(&pReader->term, pReader->pData+n, nTerm);
    assert( n+nTerm<=pReader->nData );
    pReader->pData += n+nTerm;
    pReader->nData -= n+nTerm;
  }
}
................................................................................
  ** next term.
  */
  if( pReader->nData==0 ){
    dataBufferReset(&pReader->term);
  }else{
    int n, nPrefix, nSuffix;

    n = getVarint32(pReader->pData, &nPrefix);
    n += getVarint32(pReader->pData+n, &nSuffix);

    /* Truncate the current term and append suffix data. */
    pReader->term.nData = nPrefix;
    dataBufferAppend(&pReader->term, pReader->pData+n, nSuffix);

    assert( n+nSuffix<=pReader->nData );
    pReader->pData += n+nSuffix;
................................................................................

  if( nData==0 ) return;
  assert( nData>0 );
  assert( pData!=0 );
  assert( pData+nData>pData );

  /* Must lead with a varint(0) */
  n = getVarint32(pData, &iDummy);
  assert( iDummy==0 );
  assert( n>0 );
  assert( n<nData );
  pData += n;
  nData -= n;

  /* Leading term length and data must fit in buffer. */
  n = getVarint32(pData, &iDummy);
  assert( n>0 );
  assert( iDummy>0 );
  assert( n+iDummy>0 );
  assert( n+iDummy<nData );
  pData += n+iDummy;
  nData -= n+iDummy;

  /* Leading term's doclist length and data must fit. */
  n = getVarint32(pData, &iDummy);
  assert( n>0 );
  assert( iDummy>0 );
  assert( n+iDummy>0 );
  assert( n+iDummy<=nData );
  ASSERT_VALID_DOCLIST(DL_DEFAULT, pData+n, iDummy, NULL);
  pData += n+iDummy;
  nData -= n+iDummy;

  /* Verify that trailing terms and doclists also are readable. */
  while( nData!=0 ){
    n = getVarint32(pData, &iDummy);
    assert( n>0 );
    assert( iDummy>=0 );
    assert( n<nData );
    pData += n;
    nData -= n;
    n = getVarint32(pData, &iDummy);
    assert( n>0 );
    assert( iDummy>0 );
    assert( n+iDummy>0 );
    assert( n+iDummy<nData );
    pData += n+iDummy;
    nData -= n+iDummy;

    n = getVarint32(pData, &iDummy);
    assert( n>0 );
    assert( iDummy>0 );
    assert( n+iDummy>0 );
    assert( n+iDummy<=nData );
    ASSERT_VALID_DOCLIST(DL_DEFAULT, pData+n, iDummy, NULL);
    pData += n+iDummy;
    nData -= n+iDummy;
................................................................................
  rc = block_insert(v, pWriter->data.pData+iData, nData, &iBlockid);
  if( rc!=SQLITE_OK ) return rc;
  assert( iBlockid!=0 );

  /* Reconstruct the first term in the leaf for purposes of building
  ** the interior node.
  */
  n = getVarint32(pWriter->data.pData+iData+1, &nStartingTerm);
  pStartingTerm = pWriter->data.pData+iData+1+n;
  assert( pWriter->data.nData>iData+1+n+nStartingTerm );
  assert( pWriter->nTermDistinct>0 );
  assert( pWriter->nTermDistinct<=nStartingTerm );
  nStartingTerm = pWriter->nTermDistinct;

  if( pWriter->has_parent ){
................................................................................

  if( pWriter->data.nData==0 ){
    /* Encode the node header and leading term as:
    **  varint(0)
    **  varint(nTerm)
    **  char pTerm[nTerm]
    */
    n = putVarint(c, '\0');
    n += putVarint(c+n, nTerm);
    dataBufferAppend2(&pWriter->data, c, n, pTerm, nTerm);
  }else{
    /* Delta-encode the term as:
    **  varint(nPrefix)
    **  varint(nSuffix)
    **  char pTermSuffix[nSuffix]
    */
    n = putVarint(c, nPrefix);
    n += putVarint(c+n, nTerm-nPrefix);
    dataBufferAppend2(&pWriter->data, c, n, pTerm+nPrefix, nTerm-nPrefix);
  }
  dataBufferReplace(&pWriter->term, pTerm, nTerm);

  return nPrefix+1;
}

................................................................................
** iDoclistData and flushes the resulting complete node using
** leafWriterInternalFlush().
*/
static int leafWriterInlineFlush(fulltext_vtab *v, LeafWriter *pWriter,
                                 const char *pTerm, int nTerm,
                                 int iDoclistData){
  char c[VARINT_MAX+VARINT_MAX];
  int iData, n = putVarint(c, 0);
  n += putVarint(c+n, nTerm);

  /* There should always be room for the header.  Even if pTerm shared
  ** a substantial prefix with the previous term, the entire prefix
  ** could be constructed from earlier data in the doclist, so there
  ** should be room.
  */
  assert( iDoclistData>=n+nTerm );
................................................................................

  /* Estimate the length of the merged doclist so we can leave space
  ** to encode it.
  */
  for(i=0, nData=0; i<nReaders; i++){
    nData += dlrAllDataBytes(&pReaders[i]);
  }
  n = putVarint(c, nData);
  dataBufferAppend(&pWriter->data, c, n);

  docListMerge(&pWriter->data, pReaders, nReaders);
  ASSERT_VALID_DOCLIST(DL_DEFAULT,
                       pWriter->data.pData+iDoclistData+n,
                       pWriter->data.nData-iDoclistData-n, NULL);

  /* The actual amount of doclist data at this point could be smaller
  ** than the length we encoded.  Additionally, the space required to
  ** encode this length could be smaller.  For small doclists, this is
  ** not a big deal, we can just use memmove() to adjust things.
  */
  nActualData = pWriter->data.nData-(iDoclistData+n);
  nActual = putVarint(c, nActualData);
  assert( nActualData<=nData );
  assert( nActual<=n );

  /* If the new doclist is big enough for force a standalone leaf
  ** node, we can immediately flush it inline without doing the
  ** memmove().
  */
................................................................................
    /* Flush out the leading data as a node */
    rc = leafWriterInternalFlush(v, pWriter, 0, iTermData);
    if( rc!=SQLITE_OK ) return rc;

    pWriter->nTermDistinct = nTermDistinct;

    /* Rebuild header using the current term */
    n = putVarint(pWriter->data.pData, 0);
    n += putVarint(pWriter->data.pData+n, nTerm);
    memcpy(pWriter->data.pData+n, pTerm, nTerm);
    n += nTerm;

    /* There should always be room, because the previous encoding
    ** included all data necessary to construct the term.
    */
    assert( n<iDoclistData );
................................................................................
  return pReader->term.pData;
}

/* Access the doclist data for the current term. */
static int leafReaderDataBytes(LeafReader *pReader){
  int nData;
  assert( pReader->term.nData>0 );
  getVarint32(pReader->pData, &nData);
  return nData;
}
static const char *leafReaderData(LeafReader *pReader){
  int n, nData;
  assert( pReader->term.nData>0 );
  n = getVarint32(pReader->pData, &nData);
  return pReader->pData+n;
}

static void leafReaderInit(const char *pData, int nData,
                           LeafReader *pReader){
  int nTerm, n;

  assert( nData>0 );
  assert( pData[0]=='\0' );

  CLEAR(pReader);

  /* Read the first term, skipping the header byte. */
  n = getVarint32(pData+1, &nTerm);
  dataBufferInit(&pReader->term, nTerm);
  dataBufferReplace(&pReader->term, pData+1+n, nTerm);

  /* Position after the first term. */
  assert( 1+n+nTerm<nData );
  pReader->pData = pData+1+n+nTerm;
  pReader->nData = nData-1-n-nTerm;
................................................................................

/* Step the reader forward to the next term. */
static void leafReaderStep(LeafReader *pReader){
  int n, nData, nPrefix, nSuffix;
  assert( !leafReaderAtEnd(pReader) );

  /* Skip previous entry's data block. */
  n = getVarint32(pReader->pData, &nData);
  assert( n+nData<=pReader->nData );
  pReader->pData += n+nData;
  pReader->nData -= n+nData;

  if( !leafReaderAtEnd(pReader) ){
    /* Construct the new term using a prefix from the old term plus a
    ** suffix from the leaf data.
    */
    n = getVarint32(pReader->pData, &nPrefix);
    n += getVarint32(pReader->pData+n, &nSuffix);
    assert( n+nSuffix<pReader->nData );
    pReader->term.nData = nPrefix;
    dataBufferAppend(&pReader->term, pReader->pData+n, nSuffix);

    pReader->pData += n+nSuffix;
    pReader->nData -= n+nSuffix;
  }
................................................................................
/* This function implements the xUpdate callback; it's the top-level entry
 * point for inserting, deleting or updating a row in a full-text table. */
static int fulltextUpdate(sqlite3_vtab *pVtab, int nArg, sqlite3_value **ppArg,
                          sqlite_int64 *pRowid){
  fulltext_vtab *v = (fulltext_vtab *) pVtab;
  int rc;

  TRACE(("FTS3 Update %p\n", pVtab));

  if( nArg<2 ){
    rc = index_delete(v, sqlite3_value_int64(ppArg[0]));
  } else if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){
    /* An update:
     * ppArg[0] = old rowid
     * ppArg[1] = new rowid
................................................................................
    }
  }

  return rc;
}

static int fulltextSync(sqlite3_vtab *pVtab){
  TRACE(("FTS3 xSync()\n"));
  return flushPendingTerms((fulltext_vtab *)pVtab);
}

static int fulltextBegin(sqlite3_vtab *pVtab){
  fulltext_vtab *v = (fulltext_vtab *) pVtab;
  TRACE(("FTS3 xBegin()\n"));

  /* Any buffered updates should have been cleared by the previous
  ** transaction.
  */
  assert( v->nPendingData<0 );
  return clearPendingTerms(v);
}

static int fulltextCommit(sqlite3_vtab *pVtab){
  fulltext_vtab *v = (fulltext_vtab *) pVtab;
  TRACE(("FTS3 xCommit()\n"));

  /* Buffered updates should have been cleared by fulltextSync(). */
  assert( v->nPendingData<0 );
  return clearPendingTerms(v);
}

static int fulltextRollback(sqlite3_vtab *pVtab){
  TRACE(("FTS3 xRollback()\n"));
  return clearPendingTerms((fulltext_vtab *)pVtab);
}

/*
** Implementation of the snippet() function for FTS3
*/
static void snippetFunc(







<







 







|

|







 







|







 







|







 







|

|







 







|





|







 







|




|



|


|
|







 







|




|


|

|
|







 







|

|







 







|







 







|

|


|












|

|







 







|
|





|



|


|







 







|







 







|







 







|







 







|









|







 







|







 







|



|






|








|






|







 







|


|












|


|







 







|






|







 







|







 







|







 







|







 







|








|




|







 







|








|







 







|







 







|













|







 







|







 







|
|







 







|







|








|










|







|







 







|






|
|







 







|











|







 







|
|







 







|







|








|










|





|







|







 







|







 







|
|







|
|







 







|
|







 







|













|







 







|
|







 







|





|













|







 







|








|
|







 







|







 







|





|










|







|







283
284
285
286
287
288
289

290
291
292
293
294
295
296
...
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
...
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
...
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
...
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
...
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
...
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
...
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
...
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
...
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
...
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
...
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
...
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
....
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
....
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
....
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
....
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
....
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
....
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
....
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
....
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
....
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
....
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
....
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
....
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
....
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
....
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
....
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
....
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
....
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
....
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
....
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
....
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
....
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
....
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
....
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
....
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
....
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
....
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
....
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
....
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
....
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000
6001
6002
6003
6004
6005
....
6045
6046
6047
6048
6049
6050
6051
6052
6053
6054
6055
6056
6057
6058
6059
6060
6061
6062
6063
6064
6065
6066
6067
6068
6069
6070
6071
6072
6073
6074
6075
6076
6077
6078
6079
6080
6081
6082
6083
6084
#include <stdio.h>
#include <string.h>
#include <ctype.h>

#include "fts3.h"
#include "fts3_hash.h"
#include "fts3_tokenizer.h"

#ifndef SQLITE_CORE 
  #include "sqlite3ext.h"
  SQLITE_EXTENSION_INIT1
#endif


/* TODO(shess) MAN, this thing needs some refactoring.  At minimum, it
................................................................................
**  - table query functions
**
** Put the query functions last because they're likely to reference
** typedefs or functions from the table update section.
*/

#if 0
# define FTSTRACE(A)  printf A; fflush(stdout)
#else
# define FTSTRACE(A)
#endif

/*
** Default span for NEAR operators.
*/
#define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10

................................................................................

/* We may need up to VARINT_MAX bytes to store an encoded 64-bit integer. */
#define VARINT_MAX 10

/* Write a 64-bit variable-length integer to memory starting at p[0].
 * The length of data written will be between 1 and VARINT_MAX bytes.
 * The number of bytes written is returned. */
static int fts3PutVarint(char *p, sqlite_int64 v){
  unsigned char *q = (unsigned char *) p;
  sqlite_uint64 vu = v;
  do{
    *q++ = (unsigned char) ((vu & 0x7f) | 0x80);
    vu >>= 7;
  }while( vu!=0 );
  q[-1] &= 0x7f;  /* turn off high bit in final byte */
................................................................................
  assert( q - (unsigned char *)p <= VARINT_MAX );
  return (int) (q - (unsigned char *)p);
}

/* Read a 64-bit variable-length integer from memory starting at p[0].
 * Return the number of bytes read, or 0 on error.
 * The value is stored in *v. */
static int fts3GetVarint(const char *p, sqlite_int64 *v){
  const unsigned char *q = (const unsigned char *) p;
  sqlite_uint64 x = 0, y = 1;
  while( (*q & 0x80) == 0x80 ){
    x += y * (*q++ & 0x7f);
    y <<= 7;
    if( q - (unsigned char *)p >= VARINT_MAX ){  /* bad data */
      assert( 0 );
................................................................................
    }
  }
  x += y * (*q++);
  *v = (sqlite_int64) x;
  return (int) (q - (unsigned char *)p);
}

static int fts3GetVarint32(const char *p, int *pi){
 sqlite_int64 i;
 int ret = fts3GetVarint(p, &i);
 *pi = (int) i;
 assert( *pi==i );
 return ret;
}

/*******************************************************************/
/* DataBuffer is used to collect data into a buffer in piecemeal
................................................................................
}
/* TODO(shess) Consider adding a field to track iDocid varint length
** to make these two functions faster.  This might matter (a tiny bit)
** for queries.
*/
static const char *dlrPosData(DLReader *pReader){
  sqlite_int64 iDummy;
  int n = fts3GetVarint(pReader->pData, &iDummy);
  assert( !dlrAtEnd(pReader) );
  return pReader->pData+n;
}
static int dlrPosDataLen(DLReader *pReader){
  sqlite_int64 iDummy;
  int n = fts3GetVarint(pReader->pData, &iDummy);
  assert( !dlrAtEnd(pReader) );
  return pReader->nElement-n;
}
static void dlrStep(DLReader *pReader){
  assert( !dlrAtEnd(pReader) );

  /* Skip past current doclist element. */
................................................................................
  assert( pReader->nElement<=pReader->nData );
  pReader->pData += pReader->nElement;
  pReader->nData -= pReader->nElement;

  /* If there is more data, read the next doclist element. */
  if( pReader->nData!=0 ){
    sqlite_int64 iDocidDelta;
    int iDummy, n = fts3GetVarint(pReader->pData, &iDocidDelta);
    pReader->iDocid += iDocidDelta;
    if( pReader->iType>=DL_POSITIONS ){
      assert( n<pReader->nData );
      while( 1 ){
        n += fts3GetVarint32(pReader->pData+n, &iDummy);
        assert( n<=pReader->nData );
        if( iDummy==POS_END ) break;
        if( iDummy==POS_COLUMN ){
          n += fts3GetVarint32(pReader->pData+n, &iDummy);
          assert( n<pReader->nData );
        }else if( pReader->iType==DL_POSITIONS_OFFSETS ){
          n += fts3GetVarint32(pReader->pData+n, &iDummy);
          n += fts3GetVarint32(pReader->pData+n, &iDummy);
          assert( n<pReader->nData );
        }
      }
    }
    pReader->nElement = n;
    assert( pReader->nElement<=pReader->nData );
  }
................................................................................
                            sqlite_int64 *pLastDocid){
  sqlite_int64 iPrevDocid = 0;
  assert( nData>0 );
  assert( pData!=0 );
  assert( pData+nData>pData );
  while( nData!=0 ){
    sqlite_int64 iDocidDelta;
    int n = fts3GetVarint(pData, &iDocidDelta);
    iPrevDocid += iDocidDelta;
    if( iType>DL_DOCIDS ){
      int iDummy;
      while( 1 ){
        n += fts3GetVarint32(pData+n, &iDummy);
        if( iDummy==POS_END ) break;
        if( iDummy==POS_COLUMN ){
          n += fts3GetVarint32(pData+n, &iDummy);
        }else if( iType>DL_POSITIONS ){
          n += fts3GetVarint32(pData+n, &iDummy);
          n += fts3GetVarint32(pData+n, &iDummy);
        }
        assert( n<=nData );
      }
    }
    assert( n<=nData );
    pData += n;
    nData -= n;
................................................................................
  char c[VARINT_MAX];
  int nFirstOld, nFirstNew;     /* Old and new varint len of first docid. */
#ifndef NDEBUG
  sqlite_int64 iLastDocidDelta;
#endif

  /* Recode the initial docid as delta from iPrevDocid. */
  nFirstOld = fts3GetVarint(pData, &iDocid);
  assert( nFirstOld<nData || (nFirstOld==nData && pWriter->iType==DL_DOCIDS) );
  nFirstNew = fts3PutVarint(c, iFirstDocid-pWriter->iPrevDocid);

  /* Verify that the incoming doclist is valid AND that it ends with
  ** the expected docid.  This is essential because we'll trust this
  ** docid in future delta-encoding.
  */
  ASSERT_VALID_DOCLIST(pWriter->iType, pData, nData, &iLastDocidDelta);
  assert( iLastDocid==iFirstDocid-iDocid+iLastDocidDelta );
................................................................................
}
static void dlwCopy(DLWriter *pWriter, DLReader *pReader){
  dlwAppend(pWriter, dlrDocData(pReader), dlrDocDataBytes(pReader),
            dlrDocid(pReader), dlrDocid(pReader));
}
static void dlwAdd(DLWriter *pWriter, sqlite_int64 iDocid){
  char c[VARINT_MAX];
  int n = fts3PutVarint(c, iDocid-pWriter->iPrevDocid);

  /* Docids must ascend. */
  assert( !pWriter->has_iPrevDocid || iDocid>pWriter->iPrevDocid );
  assert( pWriter->iType==DL_DOCIDS );

  dataBufferAppend(pWriter->b, c, n);
  pWriter->iPrevDocid = iDocid;
................................................................................
  assert( !plrAtEnd(pReader) );

  if( pReader->nData==0 ){
    pReader->pData = NULL;
    return;
  }

  n = fts3GetVarint32(pReader->pData, &i);
  if( i==POS_COLUMN ){
    n += fts3GetVarint32(pReader->pData+n, &pReader->iColumn);
    pReader->iPosition = 0;
    pReader->iStartOffset = 0;
    n += fts3GetVarint32(pReader->pData+n, &i);
  }
  /* Should never see adjacent column changes. */
  assert( i!=POS_COLUMN );

  if( i==POS_END ){
    pReader->nData = 0;
    pReader->pData = NULL;
    return;
  }

  pReader->iPosition += i-POS_BASE;
  if( pReader->iType==DL_POSITIONS_OFFSETS ){
    n += fts3GetVarint32(pReader->pData+n, &i);
    pReader->iStartOffset += i;
    n += fts3GetVarint32(pReader->pData+n, &i);
    pReader->iEndOffset = pReader->iStartOffset+i;
  }
  assert( n<=pReader->nData );
  pReader->pData += n;
  pReader->nData -= n;
}

................................................................................

  /* Ban plwAdd() after plwTerminate(). */
  assert( pWriter->iPos!=-1 );

  if( pWriter->dlw->iType==DL_DOCIDS ) return;

  if( iColumn!=pWriter->iColumn ){
    n += fts3PutVarint(c+n, POS_COLUMN);
    n += fts3PutVarint(c+n, iColumn);
    pWriter->iColumn = iColumn;
    pWriter->iPos = 0;
    pWriter->iOffset = 0;
  }
  assert( iPos>=pWriter->iPos );
  n += fts3PutVarint(c+n, POS_BASE+(iPos-pWriter->iPos));
  pWriter->iPos = iPos;
  if( pWriter->dlw->iType==DL_POSITIONS_OFFSETS ){
    assert( iStartOffset>=pWriter->iOffset );
    n += fts3PutVarint(c+n, iStartOffset-pWriter->iOffset);
    pWriter->iOffset = iStartOffset;
    assert( iEndOffset>=iStartOffset );
    n += fts3PutVarint(c+n, iEndOffset-iStartOffset);
  }
  dataBufferAppend(pWriter->dlw->b, c, n);
}
static void plwCopy(PLWriter *pWriter, PLReader *pReader){
  plwAdd(pWriter, plrColumn(pReader), plrPosition(pReader),
         plrStartOffset(pReader), plrEndOffset(pReader));
}
................................................................................
  char c[VARINT_MAX];
  int n;

  pWriter->dlw = dlw;

  /* Docids must ascend. */
  assert( !pWriter->dlw->has_iPrevDocid || iDocid>pWriter->dlw->iPrevDocid );
  n = fts3PutVarint(c, iDocid-pWriter->dlw->iPrevDocid);
  dataBufferAppend(pWriter->dlw->b, c, n);
  pWriter->dlw->iPrevDocid = iDocid;
#ifndef NDEBUG
  pWriter->dlw->has_iPrevDocid = 1;
#endif

  pWriter->iColumn = 0;
................................................................................
** terminator, so that the output is always correct.  But that would
** add incremental work to the common case with the only benefit being
** API elegance.  Punt for now.
*/
static void plwTerminate(PLWriter *pWriter){
  if( pWriter->dlw->iType>DL_DOCIDS ){
    char c[VARINT_MAX];
    int n = fts3PutVarint(c, POS_END);
    dataBufferAppend(pWriter->dlw->b, c, n);
  }
#ifndef NDEBUG
  /* Mark as terminated for assert in plwAdd(). */
  pWriter->iPos = -1;
#endif
}
................................................................................
** percentage of the plwTerminate() calls will cause a realloc), so
** this might be worth revisiting if the DataBuffer implementation
** changes.
*/
static void dlcAddDoclist(DLCollector *pCollector, DataBuffer *b){
  if( pCollector->dlw.iType>DL_DOCIDS ){
    char c[VARINT_MAX];
    int n = fts3PutVarint(c, POS_END);
    dataBufferAppend2(b, pCollector->b.pData, pCollector->b.nData, c, n);
  }else{
    dataBufferAppend(b, pCollector->b.pData, pCollector->b.nData);
  }
}
static void dlcNext(DLCollector *pCollector, sqlite_int64 iDocid){
  plwTerminate(&pCollector->plw);
................................................................................
  return result;
}

static int sql_exec(sqlite3 *db, const char *zDb, const char *zName,
                    const char *zFormat){
  char *zCommand = string_format(zFormat, zDb, zName);
  int rc;
  FTSTRACE(("FTS3 sql: %s\n", zCommand));
  rc = sqlite3_exec(db, zCommand, NULL, 0, NULL);
  sqlite3_free(zCommand);
  return rc;
}

static int sql_prepare(sqlite3 *db, const char *zDb, const char *zName,
                       sqlite3_stmt **ppStmt, const char *zFormat){
  char *zCommand = string_format(zFormat, zDb, zName);
  int rc;
  FTSTRACE(("FTS3 prepare: %s\n", zCommand));
  rc = sqlite3_prepare_v2(db, zCommand, -1, ppStmt, NULL);
  sqlite3_free(zCommand);
  return rc;
}

/* end utility functions */

................................................................................

/*
** Free the memory used to contain a fulltext_vtab structure.
*/
static void fulltext_vtab_destroy(fulltext_vtab *v){
  int iStmt, i;

  FTSTRACE(("FTS3 Destroy %p\n", v));
  for( iStmt=0; iStmt<MAX_STMT; iStmt++ ){
    if( v->pFulltextStatements[iStmt]!=NULL ){
      sqlite3_finalize(v->pFulltextStatements[iStmt]);
      v->pFulltextStatements[iStmt] = NULL;
    }
  }

................................................................................
#define TOKEN_SPACE       1    /* Any kind of whitespace */
#define TOKEN_ID          2    /* An identifier */
#define TOKEN_STRING      3    /* A string literal */
#define TOKEN_PUNCT       4    /* A single punctuation character */

/*
** If X is a character that can be used in an identifier then
** ftsIdChar(X) will be true.  Otherwise it is false.
**
** For ASCII, any character with the high-order bit set is
** allowed in an identifier.  For 7-bit characters, 
** isFtsIdChar[X] must be 1.
**
** Ticket #1066.  the SQL standard does not allow '$' in the
** middle of identfiers.  But many SQL implementations do. 
** SQLite will allow '$' in identifiers for compatibility.
** But the feature is undocumented.
*/
static const char isFtsIdChar[] = {
/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
    0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 2x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,  /* 3x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 4x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,  /* 5x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 6x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,  /* 7x */
};
#define ftsIdChar(C)  (((c=C)&0x80)!=0 || (c>0x1f && isFtsIdChar[c-0x20]))


/*
** Return the length of the token that begins at z[0]. 
** Store the token type in *tokenType before returning.
*/
static int ftsGetToken(const char *z, int *tokenType){
  int i, c;
  switch( *z ){
    case 0: {
      *tokenType = TOKEN_EOF;
      return 0;
    }
    case ' ': case '\t': case '\n': case '\f': case '\r': {
................................................................................
    }
    case '[': {
      for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){}
      *tokenType = TOKEN_ID;
      return i;
    }
    default: {
      if( !ftsIdChar(*z) ){
        break;
      }
      for(i=1; ftsIdChar(z[i]); i++){}
      *tokenType = TOKEN_ID;
      return i;
    }
  }
  *tokenType = TOKEN_PUNCT;
  return 1;
}

/*
** A token extracted from a string is an instance of the following
** structure.
*/
typedef struct FtsToken {
  const char *z;       /* Pointer to token text.  Not '\000' terminated */
  short int n;         /* Length of the token text in bytes. */
} FtsToken;

/*
** Given a input string (which is really one of the argv[] parameters
** passed into xConnect or xCreate) split the string up into tokens.
** Return an array of pointers to '\000' terminated strings, one string
** for each non-whitespace token.
**
................................................................................
** Space to hold the returned array is obtained from a single
** malloc and should be freed by passing the return value to free().
** The individual strings within the token list are all a part of
** the single memory allocation and will all be freed at once.
*/
static char **tokenizeString(const char *z, int *pnToken){
  int nToken = 0;
  FtsToken *aToken = sqlite3_malloc( strlen(z) * sizeof(aToken[0]) );
  int n = 1;
  int e, i;
  int totalSize = 0;
  char **azToken;
  char *zCopy;
  while( n>0 ){
    n = ftsGetToken(z, &e);
    if( e!=TOKEN_SPACE ){
      aToken[nToken].z = z;
      aToken[nToken].n = n;
      nToken++;
      totalSize += n+1;
    }
    z += n;
................................................................................
** Find the first alphanumeric token in the string zIn.  Null-terminate
** this token.  Remove any quotation marks.  And return a pointer to
** the result.
*/
static char *firstToken(char *zIn, char **pzTail){
  int n, ttype;
  while(1){
    n = ftsGetToken(zIn, &ttype);
    if( ttype==TOKEN_SPACE ){
      zIn += n;
    }else if( ttype==TOKEN_EOF ){
      *pzTail = zIn;
      return 0;
    }else{
      zIn[n] = 0;
................................................................................

  memset(v->pFulltextStatements, 0, sizeof(v->pFulltextStatements));

  /* Indicate that the buffer is not live. */
  v->nPendingData = -1;

  *ppVTab = &v->base;
  FTSTRACE(("FTS3 Connect %p\n", v));

  return rc;

err:
  fulltext_vtab_destroy(v);
  return rc;
}
................................................................................
*/
static int fulltextCreate(sqlite3 *db, void *pAux,
                          int argc, const char * const *argv,
                          sqlite3_vtab **ppVTab, char **pzErr){
  int rc;
  TableSpec spec;
  StringBuffer schema;
  FTSTRACE(("FTS3 Create\n"));

  rc = parseSpec(&spec, argc, argv, pzErr);
  if( rc!=SQLITE_OK ) return rc;

  initStringBuffer(&schema);
  append(&schema, "CREATE TABLE %_content(");
  append(&schema, "  docid INTEGER PRIMARY KEY,");
................................................................................
  return rc;
}

/* Decide how to handle an SQL query. */
static int fulltextBestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
  fulltext_vtab *v = (fulltext_vtab *)pVTab;
  int i;
  FTSTRACE(("FTS3 BestIndex\n"));

  for(i=0; i<pInfo->nConstraint; ++i){
    const struct sqlite3_index_constraint *pConstraint;
    pConstraint = &pInfo->aConstraint[i];
    if( pConstraint->usable ) {
      if( (pConstraint->iColumn==-1 || pConstraint->iColumn==v->nColumn+1) &&
          pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ ){
        pInfo->idxNum = QUERY_DOCID;      /* lookup by docid */
        FTSTRACE(("FTS3 QUERY_DOCID\n"));
      } else if( pConstraint->iColumn>=0 && pConstraint->iColumn<=v->nColumn &&
                 pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH ){
        /* full-text search */
        pInfo->idxNum = QUERY_FULLTEXT + pConstraint->iColumn;
        FTSTRACE(("FTS3 QUERY_FULLTEXT %d\n", pConstraint->iColumn));
      } else continue;

      pInfo->aConstraintUsage[i].argvIndex = 1;
      pInfo->aConstraintUsage[i].omit = 1;

      /* An arbitrary value for now.
       * TODO: Perhaps docid matches should be considered cheaper than
................................................................................
    }
  }
  pInfo->idxNum = QUERY_GENERIC;
  return SQLITE_OK;
}

static int fulltextDisconnect(sqlite3_vtab *pVTab){
  FTSTRACE(("FTS3 Disconnect %p\n", pVTab));
  fulltext_vtab_destroy((fulltext_vtab *)pVTab);
  return SQLITE_OK;
}

static int fulltextDestroy(sqlite3_vtab *pVTab){
  fulltext_vtab *v = (fulltext_vtab *)pVTab;
  int rc;

  FTSTRACE(("FTS3 Destroy %p\n", pVTab));
  rc = sql_exec(v->db, v->zDb, v->zName,
                "drop table if exists %_content;"
                "drop table if exists %_segments;"
                "drop table if exists %_segdir;"
                );
  if( rc!=SQLITE_OK ) return rc;

................................................................................
  fulltext_cursor *c;

  c = (fulltext_cursor *) sqlite3_malloc(sizeof(fulltext_cursor));
  if( c ){
    memset(c, 0, sizeof(fulltext_cursor));
    /* sqlite will initialize c->base */
    *ppCursor = &c->base;
    FTSTRACE(("FTS3 Open %p: %p\n", pVTab, c));
    return SQLITE_OK;
  }else{
    return SQLITE_NOMEM;
  }
}


................................................................................

/*
** Close the cursor.  For additional information see the documentation
** on the xClose method of the virtual table interface.
*/
static int fulltextClose(sqlite3_vtab_cursor *pCursor){
  fulltext_cursor *c = (fulltext_cursor *) pCursor;
  FTSTRACE(("FTS3 Close %p\n", c));
  sqlite3_finalize(c->pStmt);
  queryClear(&c->q);
  snippetClear(&c->snippet);
  if( c->result.nData!=0 ) dlrDestroy(&c->reader);
  dataBufferDestroy(&c->result);
  sqlite3_free(c);
  return SQLITE_OK;
}

static int fulltextNext(sqlite3_vtab_cursor *pCursor){
  fulltext_cursor *c = (fulltext_cursor *) pCursor;
  int rc;

  FTSTRACE(("FTS3 Next %p\n", pCursor));
  snippetClear(&c->snippet);
  if( c->iCursorType < QUERY_FULLTEXT ){
    /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
    rc = sqlite3_step(c->pStmt);
    switch( rc ){
      case SQLITE_ROW:
        c->eof = 0;
................................................................................
  int argc, sqlite3_value **argv    /* Arguments for the indexing scheme */
){
  fulltext_cursor *c = (fulltext_cursor *) pCursor;
  fulltext_vtab *v = cursor_vtab(c);
  int rc;
  StringBuffer sb;

  FTSTRACE(("FTS3 Filter %p\n",pCursor));

  initStringBuffer(&sb);
  append(&sb, "SELECT docid, ");
  appendList(&sb, v->nColumn, v->azContentColumn);
  append(&sb, " FROM %_content");
  if( idxNum!=QUERY_GENERIC ) append(&sb, " WHERE docid = ?");
  sqlite3_finalize(c->pStmt);
................................................................................
  int n;

  if( block ){
    memset(block, 0, sizeof(*block));
    dataBufferInit(&block->term, 0);
    dataBufferReplace(&block->term, pTerm, nTerm);

    n = fts3PutVarint(c, iHeight);
    n += fts3PutVarint(c+n, iChildBlock);
    dataBufferInit(&block->data, INTERIOR_MAX);
    dataBufferReplace(&block->data, c, n);
  }
  return block;
}

#ifndef NDEBUG
................................................................................
  sqlite_int64 iBlockid;

  assert( nData>0 );
  assert( pData!=0 );
  assert( pData+nData>pData );

  /* Must lead with height of node as a varint(n), n>0 */
  n = fts3GetVarint32(pData, &iDummy);
  assert( n>0 );
  assert( iDummy>0 );
  assert( n<nData );
  pData += n;
  nData -= n;

  /* Must contain iBlockid. */
  n = fts3GetVarint(pData, &iBlockid);
  assert( n>0 );
  assert( n<=nData );
  pData += n;
  nData -= n;

  /* Zero or more terms of positive length */
  if( nData!=0 ){
    /* First term is not delta-encoded. */
    n = fts3GetVarint32(pData, &iDummy);
    assert( n>0 );
    assert( iDummy>0 );
    assert( n+iDummy>0);
    assert( n+iDummy<=nData );
    pData += n+iDummy;
    nData -= n+iDummy;

    /* Following terms delta-encoded. */
    while( nData!=0 ){
      /* Length of shared prefix. */
      n = fts3GetVarint32(pData, &iDummy);
      assert( n>0 );
      assert( iDummy>=0 );
      assert( n<nData );
      pData += n;
      nData -= n;

      /* Length and data of distinct suffix. */
      n = fts3GetVarint32(pData, &iDummy);
      assert( n>0 );
      assert( iDummy>0 );
      assert( n+iDummy>0);
      assert( n+iDummy<=nData );
      pData += n+iDummy;
      nData -= n+iDummy;
    }
................................................................................
  /* The first term written into an interior node is actually
  ** associated with the second child added (the first child was added
  ** in interiorWriterInit, or in the if clause at the bottom of this
  ** function).  That term gets encoded straight up, with nPrefix left
  ** at 0.
  */
  if( pWriter->term.nData==0 ){
    n = fts3PutVarint(c, nTerm);
  }else{
    while( nPrefix<pWriter->term.nData &&
           pTerm[nPrefix]==pWriter->term.pData[nPrefix] ){
      nPrefix++;
    }

    n = fts3PutVarint(c, nPrefix);
    n += fts3PutVarint(c+n, nTerm-nPrefix);
  }

#ifndef NDEBUG
  pWriter->iLastChildBlock++;
#endif
  assert( pWriter->iLastChildBlock==iChildBlock );

................................................................................
  /* Require at least the leading flag byte */
  assert( nData>0 );
  assert( pData[0]!='\0' );

  CLEAR(pReader);

  /* Decode the base blockid, and set the cursor to the first term. */
  n = fts3GetVarint(pData+1, &pReader->iBlockid);
  assert( 1+n<=nData );
  pReader->pData = pData+1+n;
  pReader->nData = nData-(1+n);

  /* A single-child interior node (such as when a leaf node was too
  ** large for the segment directory) won't have any terms.
  ** Otherwise, decode the first term.
  */
  if( pReader->nData==0 ){
    dataBufferInit(&pReader->term, 0);
  }else{
    n = fts3GetVarint32(pReader->pData, &nTerm);
    dataBufferInit(&pReader->term, nTerm);
    dataBufferReplace(&pReader->term, pReader->pData+n, nTerm);
    assert( n+nTerm<=pReader->nData );
    pReader->pData += n+nTerm;
    pReader->nData -= n+nTerm;
  }
}
................................................................................
  ** next term.
  */
  if( pReader->nData==0 ){
    dataBufferReset(&pReader->term);
  }else{
    int n, nPrefix, nSuffix;

    n = fts3GetVarint32(pReader->pData, &nPrefix);
    n += fts3GetVarint32(pReader->pData+n, &nSuffix);

    /* Truncate the current term and append suffix data. */
    pReader->term.nData = nPrefix;
    dataBufferAppend(&pReader->term, pReader->pData+n, nSuffix);

    assert( n+nSuffix<=pReader->nData );
    pReader->pData += n+nSuffix;
................................................................................

  if( nData==0 ) return;
  assert( nData>0 );
  assert( pData!=0 );
  assert( pData+nData>pData );

  /* Must lead with a varint(0) */
  n = fts3GetVarint32(pData, &iDummy);
  assert( iDummy==0 );
  assert( n>0 );
  assert( n<nData );
  pData += n;
  nData -= n;

  /* Leading term length and data must fit in buffer. */
  n = fts3GetVarint32(pData, &iDummy);
  assert( n>0 );
  assert( iDummy>0 );
  assert( n+iDummy>0 );
  assert( n+iDummy<nData );
  pData += n+iDummy;
  nData -= n+iDummy;

  /* Leading term's doclist length and data must fit. */
  n = fts3GetVarint32(pData, &iDummy);
  assert( n>0 );
  assert( iDummy>0 );
  assert( n+iDummy>0 );
  assert( n+iDummy<=nData );
  ASSERT_VALID_DOCLIST(DL_DEFAULT, pData+n, iDummy, NULL);
  pData += n+iDummy;
  nData -= n+iDummy;

  /* Verify that trailing terms and doclists also are readable. */
  while( nData!=0 ){
    n = fts3GetVarint32(pData, &iDummy);
    assert( n>0 );
    assert( iDummy>=0 );
    assert( n<nData );
    pData += n;
    nData -= n;
    n = fts3GetVarint32(pData, &iDummy);
    assert( n>0 );
    assert( iDummy>0 );
    assert( n+iDummy>0 );
    assert( n+iDummy<nData );
    pData += n+iDummy;
    nData -= n+iDummy;

    n = fts3GetVarint32(pData, &iDummy);
    assert( n>0 );
    assert( iDummy>0 );
    assert( n+iDummy>0 );
    assert( n+iDummy<=nData );
    ASSERT_VALID_DOCLIST(DL_DEFAULT, pData+n, iDummy, NULL);
    pData += n+iDummy;
    nData -= n+iDummy;
................................................................................
  rc = block_insert(v, pWriter->data.pData+iData, nData, &iBlockid);
  if( rc!=SQLITE_OK ) return rc;
  assert( iBlockid!=0 );

  /* Reconstruct the first term in the leaf for purposes of building
  ** the interior node.
  */
  n = fts3GetVarint32(pWriter->data.pData+iData+1, &nStartingTerm);
  pStartingTerm = pWriter->data.pData+iData+1+n;
  assert( pWriter->data.nData>iData+1+n+nStartingTerm );
  assert( pWriter->nTermDistinct>0 );
  assert( pWriter->nTermDistinct<=nStartingTerm );
  nStartingTerm = pWriter->nTermDistinct;

  if( pWriter->has_parent ){
................................................................................

  if( pWriter->data.nData==0 ){
    /* Encode the node header and leading term as:
    **  varint(0)
    **  varint(nTerm)
    **  char pTerm[nTerm]
    */
    n = fts3PutVarint(c, '\0');
    n += fts3PutVarint(c+n, nTerm);
    dataBufferAppend2(&pWriter->data, c, n, pTerm, nTerm);
  }else{
    /* Delta-encode the term as:
    **  varint(nPrefix)
    **  varint(nSuffix)
    **  char pTermSuffix[nSuffix]
    */
    n = fts3PutVarint(c, nPrefix);
    n += fts3PutVarint(c+n, nTerm-nPrefix);
    dataBufferAppend2(&pWriter->data, c, n, pTerm+nPrefix, nTerm-nPrefix);
  }
  dataBufferReplace(&pWriter->term, pTerm, nTerm);

  return nPrefix+1;
}

................................................................................
** iDoclistData and flushes the resulting complete node using
** leafWriterInternalFlush().
*/
static int leafWriterInlineFlush(fulltext_vtab *v, LeafWriter *pWriter,
                                 const char *pTerm, int nTerm,
                                 int iDoclistData){
  char c[VARINT_MAX+VARINT_MAX];
  int iData, n = fts3PutVarint(c, 0);
  n += fts3PutVarint(c+n, nTerm);

  /* There should always be room for the header.  Even if pTerm shared
  ** a substantial prefix with the previous term, the entire prefix
  ** could be constructed from earlier data in the doclist, so there
  ** should be room.
  */
  assert( iDoclistData>=n+nTerm );
................................................................................

  /* Estimate the length of the merged doclist so we can leave space
  ** to encode it.
  */
  for(i=0, nData=0; i<nReaders; i++){
    nData += dlrAllDataBytes(&pReaders[i]);
  }
  n = fts3PutVarint(c, nData);
  dataBufferAppend(&pWriter->data, c, n);

  docListMerge(&pWriter->data, pReaders, nReaders);
  ASSERT_VALID_DOCLIST(DL_DEFAULT,
                       pWriter->data.pData+iDoclistData+n,
                       pWriter->data.nData-iDoclistData-n, NULL);

  /* The actual amount of doclist data at this point could be smaller
  ** than the length we encoded.  Additionally, the space required to
  ** encode this length could be smaller.  For small doclists, this is
  ** not a big deal, we can just use memmove() to adjust things.
  */
  nActualData = pWriter->data.nData-(iDoclistData+n);
  nActual = fts3PutVarint(c, nActualData);
  assert( nActualData<=nData );
  assert( nActual<=n );

  /* If the new doclist is big enough for force a standalone leaf
  ** node, we can immediately flush it inline without doing the
  ** memmove().
  */
................................................................................
    /* Flush out the leading data as a node */
    rc = leafWriterInternalFlush(v, pWriter, 0, iTermData);
    if( rc!=SQLITE_OK ) return rc;

    pWriter->nTermDistinct = nTermDistinct;

    /* Rebuild header using the current term */
    n = fts3PutVarint(pWriter->data.pData, 0);
    n += fts3PutVarint(pWriter->data.pData+n, nTerm);
    memcpy(pWriter->data.pData+n, pTerm, nTerm);
    n += nTerm;

    /* There should always be room, because the previous encoding
    ** included all data necessary to construct the term.
    */
    assert( n<iDoclistData );
................................................................................
  return pReader->term.pData;
}

/* Access the doclist data for the current term. */
static int leafReaderDataBytes(LeafReader *pReader){
  int nData;
  assert( pReader->term.nData>0 );
  fts3GetVarint32(pReader->pData, &nData);
  return nData;
}
static const char *leafReaderData(LeafReader *pReader){
  int n, nData;
  assert( pReader->term.nData>0 );
  n = fts3GetVarint32(pReader->pData, &nData);
  return pReader->pData+n;
}

static void leafReaderInit(const char *pData, int nData,
                           LeafReader *pReader){
  int nTerm, n;

  assert( nData>0 );
  assert( pData[0]=='\0' );

  CLEAR(pReader);

  /* Read the first term, skipping the header byte. */
  n = fts3GetVarint32(pData+1, &nTerm);
  dataBufferInit(&pReader->term, nTerm);
  dataBufferReplace(&pReader->term, pData+1+n, nTerm);

  /* Position after the first term. */
  assert( 1+n+nTerm<nData );
  pReader->pData = pData+1+n+nTerm;
  pReader->nData = nData-1-n-nTerm;
................................................................................

/* Step the reader forward to the next term. */
static void leafReaderStep(LeafReader *pReader){
  int n, nData, nPrefix, nSuffix;
  assert( !leafReaderAtEnd(pReader) );

  /* Skip previous entry's data block. */
  n = fts3GetVarint32(pReader->pData, &nData);
  assert( n+nData<=pReader->nData );
  pReader->pData += n+nData;
  pReader->nData -= n+nData;

  if( !leafReaderAtEnd(pReader) ){
    /* Construct the new term using a prefix from the old term plus a
    ** suffix from the leaf data.
    */
    n = fts3GetVarint32(pReader->pData, &nPrefix);
    n += fts3GetVarint32(pReader->pData+n, &nSuffix);
    assert( n+nSuffix<pReader->nData );
    pReader->term.nData = nPrefix;
    dataBufferAppend(&pReader->term, pReader->pData+n, nSuffix);

    pReader->pData += n+nSuffix;
    pReader->nData -= n+nSuffix;
  }
................................................................................
/* This function implements the xUpdate callback; it's the top-level entry
 * point for inserting, deleting or updating a row in a full-text table. */
static int fulltextUpdate(sqlite3_vtab *pVtab, int nArg, sqlite3_value **ppArg,
                          sqlite_int64 *pRowid){
  fulltext_vtab *v = (fulltext_vtab *) pVtab;
  int rc;

  FTSTRACE(("FTS3 Update %p\n", pVtab));

  if( nArg<2 ){
    rc = index_delete(v, sqlite3_value_int64(ppArg[0]));
  } else if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){
    /* An update:
     * ppArg[0] = old rowid
     * ppArg[1] = new rowid
................................................................................
    }
  }

  return rc;
}

static int fulltextSync(sqlite3_vtab *pVtab){
  FTSTRACE(("FTS3 xSync()\n"));
  return flushPendingTerms((fulltext_vtab *)pVtab);
}

static int fulltextBegin(sqlite3_vtab *pVtab){
  fulltext_vtab *v = (fulltext_vtab *) pVtab;
  FTSTRACE(("FTS3 xBegin()\n"));

  /* Any buffered updates should have been cleared by the previous
  ** transaction.
  */
  assert( v->nPendingData<0 );
  return clearPendingTerms(v);
}

static int fulltextCommit(sqlite3_vtab *pVtab){
  fulltext_vtab *v = (fulltext_vtab *) pVtab;
  FTSTRACE(("FTS3 xCommit()\n"));

  /* Buffered updates should have been cleared by fulltextSync(). */
  assert( v->nPendingData<0 );
  return clearPendingTerms(v);
}

static int fulltextRollback(sqlite3_vtab *pVtab){
  FTSTRACE(("FTS3 xRollback()\n"));
  return clearPendingTerms((fulltext_vtab *)pVtab);
}

/*
** Implementation of the snippet() function for FTS3
*/
static void snippetFunc(

Changes to ext/fts3/fts3_hash.c.

127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
...
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
...
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
...
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
...
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
...
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332

/*
** Return a pointer to the appropriate hash function given the key class.
**
** The C syntax in this function definition may be unfamilar to some 
** programmers, so we provide the following additional explanation:
**
** The name of the function is "hashFunction".  The function takes a
** single parameter "keyClass".  The return value of hashFunction()
** is a pointer to another function.  Specifically, the return value
** of hashFunction() is a pointer to a function that takes two parameters
** with types "const void*" and "int" and returns an "int".
*/
static int (*hashFunction(int keyClass))(const void*,int){
  if( keyClass==FTS3_HASH_STRING ){
    return &fts3StrHash;
  }else{
    assert( keyClass==FTS3_HASH_BINARY );
    return &fts3BinHash;
  }
}
................................................................................

/*
** Return a pointer to the appropriate hash function given the key class.
**
** For help in interpreted the obscure C code in the function definition,
** see the header comment on the previous function.
*/
static int (*compareFunction(int keyClass))(const void*,int,const void*,int){
  if( keyClass==FTS3_HASH_STRING ){
    return &fts3StrCompare;
  }else{
    assert( keyClass==FTS3_HASH_BINARY );
    return &fts3BinCompare;
  }
}
................................................................................

  assert( (new_size & (new_size-1))==0 );
  new_ht = (struct _fts3ht *)fts3HashMalloc( new_size*sizeof(struct _fts3ht) );
  if( new_ht==0 ) return;
  fts3HashFree(pH->ht);
  pH->ht = new_ht;
  pH->htsize = new_size;
  xHash = hashFunction(pH->keyClass);
  for(elem=pH->first, pH->first=0; elem; elem = next_elem){
    int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1);
    next_elem = elem->next;
    fts3HashInsertElement(pH, &new_ht[h], elem);
  }
}

................................................................................
  int count;                     /* Number of elements left to test */
  int (*xCompare)(const void*,int,const void*,int);  /* comparison function */

  if( pH->ht ){
    struct _fts3ht *pEntry = &pH->ht[h];
    elem = pEntry->chain;
    count = pEntry->count;
    xCompare = compareFunction(pH->keyClass);
    while( count-- && elem ){
      if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){ 
        return elem;
      }
      elem = elem->next;
    }
  }
................................................................................
*/
void *sqlite3Fts3HashFind(const fts3Hash *pH, const void *pKey, int nKey){
  int h;                 /* A hash on key */
  fts3HashElem *elem;    /* The element that matches key */
  int (*xHash)(const void*,int);  /* The hash function */

  if( pH==0 || pH->ht==0 ) return 0;
  xHash = hashFunction(pH->keyClass);
  assert( xHash!=0 );
  h = (*xHash)(pKey,nKey);
  assert( (pH->htsize & (pH->htsize-1))==0 );
  elem = fts3FindElementByHash(pH,pKey,nKey, h & (pH->htsize-1));
  return elem ? elem->data : 0;
}

................................................................................
  int hraw;                 /* Raw hash value of the key */
  int h;                    /* the hash of the key modulo hash table size */
  fts3HashElem *elem;       /* Used to loop thru the element list */
  fts3HashElem *new_elem;   /* New element added to the pH */
  int (*xHash)(const void*,int);  /* The hash function */

  assert( pH!=0 );
  xHash = hashFunction(pH->keyClass);
  assert( xHash!=0 );
  hraw = (*xHash)(pKey, nKey);
  assert( (pH->htsize & (pH->htsize-1))==0 );
  h = hraw & (pH->htsize-1);
  elem = fts3FindElementByHash(pH,pKey,nKey,h);
  if( elem ){
    void *old_data = elem->data;







|
|

|


|







 







|







 







|







 







|







 







|







 







|







127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
...
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
...
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
...
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
...
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
...
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332

/*
** Return a pointer to the appropriate hash function given the key class.
**
** The C syntax in this function definition may be unfamilar to some 
** programmers, so we provide the following additional explanation:
**
** The name of the function is "ftsHashFunction".  The function takes a
** single parameter "keyClass".  The return value of ftsHashFunction()
** is a pointer to another function.  Specifically, the return value
** of ftsHashFunction() is a pointer to a function that takes two parameters
** with types "const void*" and "int" and returns an "int".
*/
static int (*ftsHashFunction(int keyClass))(const void*,int){
  if( keyClass==FTS3_HASH_STRING ){
    return &fts3StrHash;
  }else{
    assert( keyClass==FTS3_HASH_BINARY );
    return &fts3BinHash;
  }
}
................................................................................

/*
** Return a pointer to the appropriate hash function given the key class.
**
** For help in interpreted the obscure C code in the function definition,
** see the header comment on the previous function.
*/
static int (*ftsCompareFunction(int keyClass))(const void*,int,const void*,int){
  if( keyClass==FTS3_HASH_STRING ){
    return &fts3StrCompare;
  }else{
    assert( keyClass==FTS3_HASH_BINARY );
    return &fts3BinCompare;
  }
}
................................................................................

  assert( (new_size & (new_size-1))==0 );
  new_ht = (struct _fts3ht *)fts3HashMalloc( new_size*sizeof(struct _fts3ht) );
  if( new_ht==0 ) return;
  fts3HashFree(pH->ht);
  pH->ht = new_ht;
  pH->htsize = new_size;
  xHash = ftsHashFunction(pH->keyClass);
  for(elem=pH->first, pH->first=0; elem; elem = next_elem){
    int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1);
    next_elem = elem->next;
    fts3HashInsertElement(pH, &new_ht[h], elem);
  }
}

................................................................................
  int count;                     /* Number of elements left to test */
  int (*xCompare)(const void*,int,const void*,int);  /* comparison function */

  if( pH->ht ){
    struct _fts3ht *pEntry = &pH->ht[h];
    elem = pEntry->chain;
    count = pEntry->count;
    xCompare = ftsCompareFunction(pH->keyClass);
    while( count-- && elem ){
      if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){ 
        return elem;
      }
      elem = elem->next;
    }
  }
................................................................................
*/
void *sqlite3Fts3HashFind(const fts3Hash *pH, const void *pKey, int nKey){
  int h;                 /* A hash on key */
  fts3HashElem *elem;    /* The element that matches key */
  int (*xHash)(const void*,int);  /* The hash function */

  if( pH==0 || pH->ht==0 ) return 0;
  xHash = ftsHashFunction(pH->keyClass);
  assert( xHash!=0 );
  h = (*xHash)(pKey,nKey);
  assert( (pH->htsize & (pH->htsize-1))==0 );
  elem = fts3FindElementByHash(pH,pKey,nKey, h & (pH->htsize-1));
  return elem ? elem->data : 0;
}

................................................................................
  int hraw;                 /* Raw hash value of the key */
  int h;                    /* the hash of the key modulo hash table size */
  fts3HashElem *elem;       /* Used to loop thru the element list */
  fts3HashElem *new_elem;   /* New element added to the pH */
  int (*xHash)(const void*,int);  /* The hash function */

  assert( pH!=0 );
  xHash = ftsHashFunction(pH->keyClass);
  assert( xHash!=0 );
  hraw = (*xHash)(pKey, nKey);
  assert( (pH->htsize & (pH->htsize-1))==0 );
  h = hraw & (pH->htsize-1);
  elem = fts3FindElementByHash(pH,pKey,nKey,h);
  if( elem ){
    void *old_data = elem->data;

Changes to ext/fts3/fts3_tokenizer.c.

21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
**       (in which case SQLITE_CORE is not defined), or
**
**     * The FTS3 module is being built into the core of
**       SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
*/
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)


#include "sqlite3.h"
#include "sqlite3ext.h"
SQLITE_EXTENSION_INIT1

#include "fts3_hash.h"
#include "fts3_tokenizer.h"
#include <assert.h>








<
<







21
22
23
24
25
26
27


28
29
30
31
32
33
34
**       (in which case SQLITE_CORE is not defined), or
**
**     * The FTS3 module is being built into the core of
**       SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
*/
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)



#include "sqlite3ext.h"
SQLITE_EXTENSION_INIT1

#include "fts3_hash.h"
#include "fts3_tokenizer.h"
#include <assert.h>