SQLite

Check-in [617e2fac1c]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Replace the hash table borrowed from fts3.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | fts5
Files: files | file ages | folders
SHA1: 617e2fac1c128212254f71b1a8fddaf0d1d90262
User & Date: dan 2014-08-11 19:44:52.686
Context
2014-08-11
20:26
Simplify the way position lists are copied when merging data. (check-in: 9f8d678a0e user: dan tags: fts5)
19:44
Replace the hash table borrowed from fts3. (check-in: 617e2fac1c user: dan tags: fts5)
2014-08-09
18:22
Fix an uninitialized variable causing a problem during fts5 table initialization. (check-in: a14fa876f0 user: dan tags: fts5)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts5/fts5Int.h.
278
279
280
281
282
283
284









































285
286
287
288
289
290
291
** this connection since it was created.
*/
int sqlite3Fts5IndexReads(Fts5Index *p);

/*
** End of interface to code in fts5_index.c.
**************************************************************************/










































/**************************************************************************
** Interface to code in fts5_storage.c. fts5_storage.c contains contains 
** code to access the data stored in the %_content and %_docsize tables.
*/

#define FTS5_STMT_SCAN_ASC  0     /* SELECT rowid, * FROM ... ORDER BY 1 ASC */







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
** this connection since it was created.
*/
int sqlite3Fts5IndexReads(Fts5Index *p);

/*
** End of interface to code in fts5_index.c.
**************************************************************************/

/**************************************************************************
** Interface to code in fts5_hash.c. 
*/
typedef struct Fts5Hash Fts5Hash;

/*
** Create a hash table, free a hash table.
*/
int sqlite3Fts5HashNew(Fts5Hash**, int *pnSize);
void sqlite3Fts5HashFree(Fts5Hash*);

int sqlite3Fts5HashWrite(
  Fts5Hash*,
  i64 iRowid,                     /* Rowid for this entry */
  int iCol,                       /* Column token appears in (-ve -> delete) */
  int iPos,                       /* Position of token within column */
  const char *pToken, int nToken  /* Token to add or remove to or from index */
);

/*
** Empty (but do not delete) a hash table.
*/
void sqlite3Fts5HashClear(Fts5Hash*);

/*
** Iterate through the contents of the hash table.
*/
int sqlite3Fts5HashIterate(
  Fts5Hash*,
  void *pCtx,
  int (*xTerm)(void*, const char*, int),
  int (*xEntry)(void*, i64, const u8*, int),
  int (*xTermDone)(void*)
);



/*
** End of interface to code in fts5_hash.c.
**************************************************************************/

/**************************************************************************
** Interface to code in fts5_storage.c. fts5_storage.c contains contains 
** code to access the data stored in the %_content and %_docsize tables.
*/

#define FTS5_STMT_SCAN_ASC  0     /* SELECT rowid, * FROM ... ORDER BY 1 ASC */
Added ext/fts5/fts5_hash.c.






































































































































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
/*
** 2014 August 11
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
******************************************************************************
**
*/

#include "fts5Int.h"

typedef struct Fts5HashEntry Fts5HashEntry;

/*
** This file contains the implementation of an in-memory hash table used
** to accumuluate "term -> doclist" content before it is flused to a level-0
** segment.
*/


struct Fts5Hash {
  int *pnByte;                    /* Pointer to bytes counter */
  int nEntry;                     /* Number of entries currently in hash */
  int nSlot;                      /* Size of aSlot[] array */
  Fts5HashEntry **aSlot;          /* Array of hash slots */
};

/*
** Each entry in the hash table is represented by an object of the 
** following type. Each object, its key (zKey[]) and its current data
** are stored in a single memory allocation. The position list data 
** immediately follows the key data in memory.
**
** The data that follows the key is in a similar, but not identical format
** to the doclist data stored in the database. It is:
**
**   * Rowid, as a varint
**   * Position list, without 0x00 terminator.
**   * Size of previous position list and rowid, as a 4 byte
**     big-endian integer.
**
** iRowidOff:
**   Offset of last rowid written to data area. Relative to first byte of
**   structure.
**
** nData:
**   Bytes of data written since iRowidOff.
*/
struct Fts5HashEntry {
  Fts5HashEntry *pNext;           /* Next hash entry with same hash-key */
  
  int nAlloc;                     /* Total size of allocation */
  int iRowidOff;                  /* Offset of last rowid written */
  int nData;                      /* Total bytes of data (incl. structure) */

  int iCol;                       /* Column of last value written */
  int iPos;                       /* Position of last value written */
  i64 iRowid;                     /* Rowid of last value written */
  char zKey[0];                   /* Nul-terminated entry key */
};


/*
** Allocate a new hash table.
*/
int sqlite3Fts5HashNew(Fts5Hash **ppNew, int *pnByte){
  int rc = SQLITE_OK;
  Fts5Hash *pNew;

  *ppNew = pNew = (Fts5Hash*)sqlite3_malloc(sizeof(Fts5Hash));
  if( pNew==0 ){
    rc = SQLITE_NOMEM;
  }else{
    int nByte;
    memset(pNew, 0, sizeof(Fts5Hash));
    pNew->pnByte = pnByte;

    pNew->nSlot = 1024;
    nByte = sizeof(Fts5HashEntry*) * pNew->nSlot;
    pNew->aSlot = (Fts5HashEntry**)sqlite3_malloc(nByte);
    if( pNew->aSlot==0 ){
      sqlite3_free(pNew);
      *ppNew = 0;
      rc = SQLITE_NOMEM;
    }else{
      memset(pNew->aSlot, 0, nByte);
    }
  }
  return rc;
}

/*
** Free a hash table object.
*/
void sqlite3Fts5HashFree(Fts5Hash *pHash){
  if( pHash ){
    sqlite3Fts5HashClear(pHash);
    sqlite3_free(pHash->aSlot);
    sqlite3_free(pHash);
  }
}

/*
** Empty (but do not delete) a hash table.
*/
void sqlite3Fts5HashClear(Fts5Hash *pHash){
  int i;
  for(i=0; i<pHash->nSlot; i++){
    if( pHash->aSlot[i] ){
      sqlite3_free(pHash->aSlot[i]);
      pHash->aSlot[i] = 0;
    }
  }
}

static unsigned int fts5HashKey(Fts5Hash *pHash, const char *p, int n){
  int i;
  unsigned int h = 13;
  for(i=n-1; i>=0; i--){
    h = (h << 3) ^ h ^ p[i];
  }
  return (h % pHash->nSlot);
}

/*
** Store the 32-bit integer passed as the second argument in buffer p.
*/
static int fts5PutNativeInt(u8 *p, int i){
  assert( sizeof(i)==4 );
  memcpy(p, &i, sizeof(i));
  return sizeof(i);
}

/*
** Read and return the 32-bit integer stored in buffer p.
*/
static int fts5GetNativeU32(u8 *p){
  int i;
  assert( sizeof(i)==4 );
  memcpy(&i, p, sizeof(i));
  return i;
}

int sqlite3Fts5HashWrite(
  Fts5Hash *pHash,
  i64 iRowid,                     /* Rowid for this entry */
  int iCol,                       /* Column token appears in (-ve -> delete) */
  int iPos,                       /* Position of token within column */
  const char *pToken, int nToken  /* Token to add or remove to or from index */
){
  unsigned int iHash = fts5HashKey(pHash, pToken, nToken);
  Fts5HashEntry *p;
  u8 *pPtr;
  int nIncr = 0;                  /* Amount to increment (*pHash->pnByte) by */

  /* Attempt to locate an existing hash object */
  for(p=pHash->aSlot[iHash]; p; p=p->pNext){
    if( memcmp(p->zKey, pToken, nToken)==0 && p->zKey[nToken]==0 ) break;
  }

  /* If an existing hash entry cannot be found, create a new one. */
  if( p==0 ){
    int nByte = sizeof(Fts5HashEntry) + nToken + 1 + 64;
    if( nByte<128 ) nByte = 128;

    p = (Fts5HashEntry*)sqlite3_malloc(nByte);
    if( !p ) return SQLITE_NOMEM;
    memset(p, 0, sizeof(Fts5HashEntry));
    p->nAlloc = nByte;
    memcpy(p->zKey, pToken, nToken);
    p->zKey[nToken] = '\0';
    p->iRowidOff = p->nData = nToken + 1 + sizeof(Fts5HashEntry);
    p->nData += sqlite3PutVarint(&((u8*)p)[p->nData], iRowid);
    p->iRowid = iRowid;
    p->pNext = pHash->aSlot[iHash];
    pHash->aSlot[iHash] = p;

    nIncr += p->nData;
  }

  /* Check there is enough space to append a new entry. Worst case scenario
  ** is:
  **
  **     + 4 bytes for the previous entry size field,
  **     + 9 bytes for a new rowid,
  **     + 1 byte for a "new column" byte,
  **     + 3 bytes for a new column number (16-bit max) as a varint,
  **     + 5 bytes for the new position offset (32-bit max).
  */
  if( (p->nAlloc - p->nData) < (4 + 9 + 1 + 3 + 5) ){
    int nNew = p->nAlloc * 2;
    Fts5HashEntry *pNew;
    Fts5HashEntry **pp;
    pNew = (Fts5HashEntry*)sqlite3_realloc(p, nNew);
    if( pNew==0 ) return SQLITE_NOMEM;
    pNew->nAlloc = nNew;
    for(pp=&pHash->aSlot[iHash]; *pp!=p; pp=&(*pp)->pNext);
    *pp = pNew;
    p = pNew;
  }
  pPtr = (u8*)p;
  nIncr -= p->nData;

  /* If this is a new rowid, append the 4-byte size field for the previous
  ** entry, and the new rowid for this entry.  */
  if( iRowid!=p->iRowid ){
    p->nData += fts5PutNativeInt(&pPtr[p->nData], p->nData - p->iRowidOff);
    p->iRowidOff = p->nData;
    p->nData += sqlite3PutVarint(&pPtr[p->nData], iRowid);
    p->iCol = 0;
    p->iPos = 0;
    p->iRowid = iRowid;
  }

  if( iCol>=0 ){
    /* Append a new column value, if necessary */
    assert( iCol>=p->iCol );
    if( iCol!=p->iCol ){
      pPtr[p->nData++] = 0x01;
      p->nData += sqlite3PutVarint(&pPtr[p->nData], iCol);
      p->iCol = iCol;
      p->iPos = 0;
    }

    /* Append the new position offset */
    p->nData += sqlite3PutVarint(&pPtr[p->nData], iPos - p->iPos + 2);
    p->iPos = iPos;
  }
  nIncr += p->nData;

  *pHash->pnByte += nIncr;
  return SQLITE_OK;
}


/*
** Arguments pLeft and pRight point to linked-lists of hash-entry objects,
** each sorted in key order. This function merges the two lists into a
** single list and returns a pointer to its first element.
*/
static Fts5HashEntry *fts5HashEntryMerge(
  Fts5HashEntry *pLeft,
  Fts5HashEntry *pRight
){
  Fts5HashEntry *p1 = pLeft;
  Fts5HashEntry *p2 = pRight;
  Fts5HashEntry *pRet = 0;
  Fts5HashEntry **ppOut = &pRet;

  while( p1 || p2 ){
    if( p1==0 ){
      *ppOut = p2;
      p2 = 0;
    }else if( p2==0 ){
      *ppOut = p1;
      p1 = 0;
    }else{
      int i = 0;
      while( p1->zKey[i]==p2->zKey[i] ) i++;

      if( ((u8)p1->zKey[i])>((u8)p2->zKey[i]) ){
        /* p2 is smaller */
        *ppOut = p2;
        ppOut = &p2->pNext;
        p2 = p2->pNext;
      }else{
        /* p1 is smaller */
        *ppOut = p1;
        ppOut = &p1->pNext;
        p1 = p1->pNext;
      }
      *ppOut = 0;
    }
  }

  return pRet;
}

/*
** Extract all tokens from hash table iHash and link them into a list
** in sorted order. The hash table is cleared before returning. It is
** the responsibility of the caller to free the elements of the returned
** list.
*/
static int fts5HashEntrySort(Fts5Hash *pHash, Fts5HashEntry **ppSorted){
  const int nMergeSlot = 32;
  Fts5HashEntry **ap;
  Fts5HashEntry *pList;
  int iSlot;
  int i;

  *ppSorted = 0;
  ap = sqlite3_malloc(sizeof(Fts5HashEntry*) * nMergeSlot);
  if( !ap ) return SQLITE_NOMEM;
  memset(ap, 0, sizeof(Fts5HashEntry*) * nMergeSlot);

  for(iSlot=0; iSlot<pHash->nSlot; iSlot++){
    while( pHash->aSlot[iSlot] ){
      Fts5HashEntry *pEntry = pHash->aSlot[iSlot];
      pHash->aSlot[iSlot] = pEntry->pNext;
      pEntry->pNext = 0;
      for(i=0; ap[i]; i++){
        pEntry = fts5HashEntryMerge(pEntry, ap[i]);
        ap[i] = 0;
      }
      ap[i] = pEntry;
    }
  }

  pList = 0;
  for(i=0; i<nMergeSlot; i++){
    pList = fts5HashEntryMerge(pList, ap[i]);
  }

  sqlite3_free(ap);
  *ppSorted = pList;
  return SQLITE_OK;
}

int sqlite3Fts5HashIterate(
  Fts5Hash *pHash,
  void *pCtx,
  int (*xTerm)(void*, const char*, int),
  int (*xEntry)(void*, i64, const u8*, int),
  int (*xTermDone)(void*)
){
  Fts5HashEntry *pList;
  int rc;

  rc = fts5HashEntrySort(pHash, &pList);
  if( rc==SQLITE_OK ){
    while( pList ){
      Fts5HashEntry *pNext = pList->pNext;
      if( rc==SQLITE_OK ){
        u8 *pPtr = (u8*)pList;
        int nKey = strlen(pList->zKey);
        int iOff = pList->iRowidOff;
        int iEnd = sizeof(Fts5HashEntry) + nKey + 1;
        int nByte = pList->nData - pList->iRowidOff;

        rc = xTerm(pCtx, pList->zKey, nKey);
        while( rc==SQLITE_OK && iOff ){
          int nVarint;
          i64 iRowid;
          nVarint = getVarint(&pPtr[iOff], (u64*)&iRowid);
          rc = xEntry(pCtx, iRowid, &pPtr[iOff+nVarint], nByte-nVarint);
          if( iOff==iEnd ){
            iOff = 0;
          }else{
            nByte = fts5GetNativeU32(&pPtr[iOff-sizeof(int)]);
            iOff = iOff - sizeof(int) - nByte;
          }
        }
        if( rc==SQLITE_OK ){
          rc = xTermDone(pCtx);
        }
      }
      sqlite3_free(pList);
      pList = pNext;
    }
  }
  return rc;
}



Changes to ext/fts5/fts5_index.c.
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
** Low level access to the FTS index stored in the database file. The 
** routines in this file file implement all read and write access to the
** %_data table. Other parts of the system access this functionality via
** the interface defined in fts5Int.h.
*/

#include "fts5Int.h"
#include "fts3_hash.h"

/*
** Overview:
**
** The %_data table contains all the FTS indexes for an FTS5 virtual table.
** As well as the main term index, there may be up to 31 prefix indexes.
** The format is similar to FTS3/4, except that:







<







13
14
15
16
17
18
19

20
21
22
23
24
25
26
** Low level access to the FTS index stored in the database file. The 
** routines in this file file implement all read and write access to the
** %_data table. Other parts of the system access this functionality via
** the interface defined in fts5Int.h.
*/

#include "fts5Int.h"


/*
** Overview:
**
** The %_data table contains all the FTS indexes for an FTS5 virtual table.
** As well as the main term index, there may be up to 31 prefix indexes.
** The format is similar to FTS3/4, except that:
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel;
typedef struct Fts5ChunkIter Fts5ChunkIter;
typedef struct Fts5Data Fts5Data;
typedef struct Fts5DlidxIter Fts5DlidxIter;
typedef struct Fts5MultiSegIter Fts5MultiSegIter;
typedef struct Fts5NodeIter Fts5NodeIter;
typedef struct Fts5PageWriter Fts5PageWriter;
typedef struct Fts5PendingDoclist Fts5PendingDoclist;
typedef struct Fts5PendingPoslist Fts5PendingPoslist;
typedef struct Fts5PosIter Fts5PosIter;
typedef struct Fts5SegIter Fts5SegIter;
typedef struct Fts5DoclistIter Fts5DoclistIter;
typedef struct Fts5SegWriter Fts5SegWriter;
typedef struct Fts5Structure Fts5Structure;
typedef struct Fts5StructureLevel Fts5StructureLevel;
typedef struct Fts5StructureSegment Fts5StructureSegment;







<
<







271
272
273
274
275
276
277


278
279
280
281
282
283
284
typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel;
typedef struct Fts5ChunkIter Fts5ChunkIter;
typedef struct Fts5Data Fts5Data;
typedef struct Fts5DlidxIter Fts5DlidxIter;
typedef struct Fts5MultiSegIter Fts5MultiSegIter;
typedef struct Fts5NodeIter Fts5NodeIter;
typedef struct Fts5PageWriter Fts5PageWriter;


typedef struct Fts5PosIter Fts5PosIter;
typedef struct Fts5SegIter Fts5SegIter;
typedef struct Fts5DoclistIter Fts5DoclistIter;
typedef struct Fts5SegWriter Fts5SegWriter;
typedef struct Fts5Structure Fts5Structure;
typedef struct Fts5StructureLevel Fts5StructureLevel;
typedef struct Fts5StructureSegment Fts5StructureSegment;
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
  int nMinMerge;                  /* Minimum input segments in a merge */
  int nWorkUnit;                  /* Leaf pages in a "unit" of work */

  /*
  ** Variables related to the accumulation of tokens and doclists within the
  ** in-memory hash tables before they are flushed to disk.
  */
  Fts3Hash *aHash;                /* One hash for terms, one for each prefix */
  int nMaxPendingData;            /* Max pending data before flush to disk */
  int nPendingData;               /* Current bytes of pending data */
  i64 iWriteRowid;                /* Rowid for current doc being written */

  /* Error state. */
  int rc;                         /* Current error code */








|







293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
  int nMinMerge;                  /* Minimum input segments in a merge */
  int nWorkUnit;                  /* Leaf pages in a "unit" of work */

  /*
  ** Variables related to the accumulation of tokens and doclists within the
  ** in-memory hash tables before they are flushed to disk.
  */
  Fts5Hash **apHash;              /* Array of hash tables */
  int nMaxPendingData;            /* Max pending data before flush to disk */
  int nPendingData;               /* Current bytes of pending data */
  i64 iWriteRowid;                /* Rowid for current doc being written */

  /* Error state. */
  int rc;                         /* Current error code */

343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
*/
struct Fts5Data {
  u8 *p;                          /* Pointer to buffer containing record */
  int n;                          /* Size of record in bytes */
  int nRef;                       /* Ref count */
};

/*
** Before it is flushed to a level-0 segment, term data is collected in
** the hash tables in the Fts5Index.aHash[] array. Hash table keys are
** terms (or, for prefix indexes, term prefixes) and values are instances
** of type Fts5PendingDoclist.
*/
struct Fts5PendingDoclist {
  u8 *pTerm;                      /* Term for this entry */
  int nTerm;                      /* Bytes of data at pTerm */
  Fts5PendingPoslist *pPoslist;   /* Linked list of position lists */
  int iCol;                       /* Column for last entry in pPending */
  int iPos;                       /* Pos value for last entry in pPending */
  Fts5PendingDoclist *pNext;      /* Used during merge sort */
};
struct Fts5PendingPoslist {
  i64 iRowid;                     /* Rowid for this doclist entry */
  Fts5Buffer buf;                 /* Current doclist contents */
  Fts5PendingPoslist *pNext;      /* Previous poslist for same term */
};

/*
** The contents of the "structure" record for each index are represented
** using an Fts5Structure record in memory. Which uses instances of the 
** other Fts5StructureXXX types as components.
*/
struct Fts5StructureSegment {
  int iSegid;                     /* Segment id */







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







340
341
342
343
344
345
346




















347
348
349
350
351
352
353
*/
struct Fts5Data {
  u8 *p;                          /* Pointer to buffer containing record */
  int n;                          /* Size of record in bytes */
  int nRef;                       /* Ref count */
};





















/*
** The contents of the "structure" record for each index are represented
** using an Fts5Structure record in memory. Which uses instances of the 
** other Fts5StructureXXX types as components.
*/
struct Fts5StructureSegment {
  int iSegid;                     /* Segment id */
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
** Return true if the position iterator passed as the second argument is
** at EOF. Or if an error has already occurred. Otherwise, return false.
*/
static int fts5PosIterEof(Fts5Index *p, Fts5PosIter *pIter){
  return (p->rc || pIter->chunk.pLeaf==0);
}


/*
** Allocate memory. The difference between this function and fts5IdxMalloc()
** is that this increments the Fts5Index.nPendingData variable by the
** number of bytes allocated. It should be used for all allocations used
** to store pending-data within the in-memory hash tables.
*/
static void *fts5PendingMalloc(Fts5Index *p, int nByte){
  p->nPendingData += nByte;
  return fts5IdxMalloc(p, nByte);
}

/*
** Add an entry for (iRowid/iCol/iPos) to the doclist for (pToken/nToken)
** in hash table for index iIdx. If iIdx is zero, this is the main terms 
** index. Values of 1 and greater for iIdx are prefix indexes.
**
** If an OOM error is encountered, set the Fts5Index.rc error code 
** accordingly.
*/
static void fts5AddTermToHash(
  Fts5Index *p,                   /* Index object to write to */
  int iIdx,                       /* Entry in p->aHash[] to update */
  int iCol,                       /* Column token appears in (-ve -> delete) */
  int iPos,                       /* Position of token within column */
  const char *pToken, int nToken  /* Token to add or remove to or from index */
){
  Fts5Config *pConfig = p->pConfig;
  Fts3Hash *pHash;
  Fts5PendingDoclist *pDoclist;
  Fts5PendingPoslist *pPoslist;
  i64 iRowid = p->iWriteRowid;     /* Rowid associated with these tokens */

  /* If an error has already occured this call is a no-op. */
  if( p->rc!=SQLITE_OK ) return;

  /* Find the hash table to use. It has already been allocated. */
  assert( iIdx<=pConfig->nPrefix );
  assert( iIdx==0 || nToken==pConfig->aPrefix[iIdx-1] );
  pHash = &p->aHash[iIdx];

  /* Find the doclist to append to. Allocate a new doclist object if
  ** required. */
  pDoclist = (Fts5PendingDoclist*)fts3HashFind(pHash, pToken, nToken);
  if( pDoclist==0 ){
    Fts5PendingDoclist *pDel;
    pDoclist = fts5PendingMalloc(p, sizeof(Fts5PendingDoclist) + nToken);
    if( pDoclist==0 ) return;
    pDoclist->pTerm = (u8*)&pDoclist[1];
    pDoclist->nTerm = nToken;
    memcpy(pDoclist->pTerm, pToken, nToken);
    pDel = fts3HashInsert(pHash, pDoclist->pTerm, nToken, pDoclist);
    if( pDel ){
      assert( pDoclist==pDel );
      sqlite3_free(pDel);
      p->rc = SQLITE_NOMEM;
      return;
    }
  }

  /* Find the poslist to append to. Allocate a new object if required. */
  pPoslist = pDoclist->pPoslist;
  if( pPoslist==0 || pPoslist->iRowid!=iRowid ){
    pPoslist = fts5PendingMalloc(p, sizeof(Fts5PendingPoslist));
    if( pPoslist==0 ) return;
    pPoslist->pNext = pDoclist->pPoslist;
    pPoslist->iRowid = iRowid;
    pDoclist->pPoslist = pPoslist;
    pDoclist->iCol = 0;
    pDoclist->iPos = 0;
  }

  /* Append the values to the position list. */
  if( iCol>=0 ){
    p->nPendingData -= pPoslist->buf.nSpace;
    if( iCol!=pDoclist->iCol ){
      fts5BufferAppendVarint(&p->rc, &pPoslist->buf, 1);
      fts5BufferAppendVarint(&p->rc, &pPoslist->buf, iCol);
      pDoclist->iCol = iCol;
      pDoclist->iPos = 0;
    }
    fts5BufferAppendVarint(&p->rc, &pPoslist->buf, iPos + 2 - pDoclist->iPos);
    p->nPendingData += pPoslist->buf.nSpace;
    pDoclist->iPos = iPos;
  }
}

/*
** Free the pending-doclist object passed as the only argument.
*/
static void fts5FreePendingDoclist(Fts5PendingDoclist *p){
  Fts5PendingPoslist *pPoslist;
  Fts5PendingPoslist *pNext;
  for(pPoslist=p->pPoslist; pPoslist; pPoslist=pNext){
    pNext = pPoslist->pNext;
    fts5BufferFree(&pPoslist->buf);
    sqlite3_free(pPoslist);
  }
  sqlite3_free(p);
}

/*
** Insert or remove data to or from the index. Each time a document is 
** added to or removed from the index, this function is called one or more
** times.
**







<
<
<
<
<
<
<
<
<
<
<
<















<
<
<
<
<
<
<
|
|
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<

<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







2431
2432
2433
2434
2435
2436
2437












2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452







2453
2454




2455


















2456










2457




























2458
2459
2460
2461
2462
2463
2464
** Return true if the position iterator passed as the second argument is
** at EOF. Or if an error has already occurred. Otherwise, return false.
*/
static int fts5PosIterEof(Fts5Index *p, Fts5PosIter *pIter){
  return (p->rc || pIter->chunk.pLeaf==0);
}













/*
** Add an entry for (iRowid/iCol/iPos) to the doclist for (pToken/nToken)
** in hash table for index iIdx. If iIdx is zero, this is the main terms 
** index. Values of 1 and greater for iIdx are prefix indexes.
**
** If an OOM error is encountered, set the Fts5Index.rc error code 
** accordingly.
*/
static void fts5AddTermToHash(
  Fts5Index *p,                   /* Index object to write to */
  int iIdx,                       /* Entry in p->aHash[] to update */
  int iCol,                       /* Column token appears in (-ve -> delete) */
  int iPos,                       /* Position of token within column */
  const char *pToken, int nToken  /* Token to add or remove to or from index */
){







  if( p->rc==SQLITE_OK ){
    p->rc = sqlite3Fts5HashWrite(




        p->apHash[iIdx], p->iWriteRowid, iCol, iPos, pToken, nToken


















    );










  }




























}

/*
** Insert or remove data to or from the index. Each time a document is 
** added to or removed from the index, this function is called one or more
** times.
**
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
  int i;                          /* Used to iterate through indexes */
  Fts5Config *pConfig = p->pConfig;

  /* If an error has already occured this call is a no-op. */
  if( p->rc!=SQLITE_OK ) return;

  /* Allocate hash tables if they have not already been allocated */
  if( p->aHash==0 ){
    int nHash = pConfig->nPrefix + 1;
    p->aHash = (Fts3Hash*)sqlite3_malloc(sizeof(Fts3Hash) * nHash);
    if( p->aHash==0 ){
      p->rc = SQLITE_NOMEM;
    }else{
      for(i=0; i<nHash; i++){
        fts3HashInit(&p->aHash[i], FTS3_HASH_STRING, 0);
      }
    }
  }

  /* Add the new token to the main terms hash table. And to each of the
  ** prefix hash tables that it is large enough for. */
  fts5AddTermToHash(p, 0, iCol, iPos, pToken, nToken);
  for(i=0; i<pConfig->nPrefix; i++){







|

|
<
<
<
|
|
<







2476
2477
2478
2479
2480
2481
2482
2483
2484
2485



2486
2487

2488
2489
2490
2491
2492
2493
2494
  int i;                          /* Used to iterate through indexes */
  Fts5Config *pConfig = p->pConfig;

  /* If an error has already occured this call is a no-op. */
  if( p->rc!=SQLITE_OK ) return;

  /* Allocate hash tables if they have not already been allocated */
  if( p->apHash==0 ){
    int nHash = pConfig->nPrefix + 1;
    p->apHash = (Fts5Hash**)fts5IdxMalloc(p, sizeof(Fts5Hash*) * nHash);



    for(i=0; p->rc==SQLITE_OK && i<nHash; i++){
      p->rc = sqlite3Fts5HashNew(&p->apHash[i], &p->nPendingData);

    }
  }

  /* Add the new token to the main terms hash table. And to each of the
  ** prefix hash tables that it is large enough for. */
  fts5AddTermToHash(p, 0, iCol, iPos, pToken, nToken);
  for(i=0; i<pConfig->nPrefix; i++){
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
    }
    if( iSegid ) return iSegid;
  }

  p->rc = SQLITE_ERROR;
  return 0;
}

static Fts5PendingDoclist *fts5PendingMerge(
  Fts5Index *p, 
  Fts5PendingDoclist *pLeft,
  Fts5PendingDoclist *pRight
){
  Fts5PendingDoclist *p1 = pLeft;
  Fts5PendingDoclist *p2 = pRight;
  Fts5PendingDoclist *pRet = 0;
  Fts5PendingDoclist **ppOut = &pRet;

  while( p1 || p2 ){
    if( p1==0 ){
      *ppOut = p2;
      p2 = 0;
    }else if( p2==0 ){
      *ppOut = p1;
      p1 = 0;
    }else{
      int nCmp = MIN(p1->nTerm, p2->nTerm);
      int res = memcmp(p1->pTerm, p2->pTerm, nCmp);
      if( res==0 ) res = p1->nTerm - p2->nTerm;

      if( res>0 ){
        /* p2 is smaller */
        *ppOut = p2;
        ppOut = &p2->pNext;
        p2 = p2->pNext;
      }else{
        /* p1 is smaller */
        *ppOut = p1;
        ppOut = &p1->pNext;
        p1 = p1->pNext;
      }
      *ppOut = 0;
    }
  }

  return pRet;
}

/*
** Extract all tokens from hash table iHash and link them into a list
** in sorted order. The hash table is cleared before returning. It is
** the responsibility of the caller to free the elements of the returned
** list.
**
** If an error occurs, set the Fts5Index.rc error code. If an error has 
** already occurred, this function is a no-op.
*/
static Fts5PendingDoclist *fts5PendingList(Fts5Index *p, int iHash){
  const int nMergeSlot = 32;
  Fts3Hash *pHash;
  Fts3HashElem *pE;               /* Iterator variable */
  Fts5PendingDoclist **ap;
  Fts5PendingDoclist *pList;
  int i;

  ap = fts5IdxMalloc(p, sizeof(Fts5PendingDoclist*) * nMergeSlot);
  if( !ap ) return 0;

  pHash = &p->aHash[iHash];
  for(pE=fts3HashFirst(pHash); pE; pE=fts3HashNext(pE)){
    int i;
    Fts5PendingDoclist *pDoclist = (Fts5PendingDoclist*)fts3HashData(pE);
    assert( pDoclist->pNext==0 );
    for(i=0; ap[i]; i++){
      pDoclist = fts5PendingMerge(p, pDoclist, ap[i]);
      ap[i] = 0;
    }
    ap[i] = pDoclist;
  }

  pList = 0;
  for(i=0; i<nMergeSlot; i++){
    pList = fts5PendingMerge(p, pList, ap[i]);
  }

  sqlite3_free(ap);
  fts3HashClear(pHash);
  return pList;
}


/*
** Discard all data currently cached in the hash-tables.
*/
static void fts5IndexDiscardData(Fts5Index *p){
  Fts5Config *pConfig = p->pConfig;
  int i;
  for(i=0; i<=pConfig->nPrefix; i++){
    Fts3Hash *pHash = &p->aHash[i];
    Fts3HashElem *pE;               /* Iterator variable */
    for(pE=fts3HashFirst(pHash); pE; pE=fts3HashNext(pE)){
      Fts5PendingDoclist *pDoclist = (Fts5PendingDoclist*)fts3HashData(pE);
      fts5FreePendingDoclist(pDoclist);
    }
    fts3HashClear(pHash);
  }
  p->nPendingData = 0;
}

/*
** Return the size of the prefix, in bytes, that buffer (nNew/pNew) shares
** with buffer (nOld/pOld).








<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<

<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<






<
<
<
<
<
<
|







2524
2525
2526
2527
2528
2529
2530
2531








































2532











































2533
2534
2535
2536
2537
2538






2539
2540
2541
2542
2543
2544
2545
2546
    }
    if( iSegid ) return iSegid;
  }

  p->rc = SQLITE_ERROR;
  return 0;
}









































/*











































** Discard all data currently cached in the hash-tables.
*/
static void fts5IndexDiscardData(Fts5Index *p){
  Fts5Config *pConfig = p->pConfig;
  int i;
  for(i=0; i<=pConfig->nPrefix; i++){






    sqlite3Fts5HashClear(p->apHash[i]);
  }
  p->nPendingData = 0;
}

/*
** Return the size of the prefix, in bytes, that buffer (nNew/pNew) shares
** with buffer (nOld/pOld).
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
  }
}

static void fts5WriteAppendZerobyte(Fts5Index *p, Fts5SegWriter *pWriter){
  fts5BufferAppendVarint(&p->rc, &pWriter->aWriter[0].buf, 0);
}

/*
** Write the contents of pending-doclist object pDoclist to writer pWriter.
**
** If an error occurs, set the Fts5Index.rc error code. If an error has 
** already occurred, this function is a no-op.
*/
static void fts5WritePendingDoclist(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegWriter *pWriter,         /* Write to this writer object */
  Fts5PendingDoclist *pDoclist    /* Doclist to write to pWriter */
){
  Fts5PendingPoslist *pPoslist;   /* Used to iterate through the doclist */

  /* Append the term */
  fts5WriteAppendTerm(p, pWriter, pDoclist->nTerm, pDoclist->pTerm);

  /* Append the position list for each rowid */
  for(pPoslist=pDoclist->pPoslist; pPoslist; pPoslist=pPoslist->pNext){
    int i = 0;

    /* Append the rowid itself */
    fts5WriteAppendRowid(p, pWriter, pPoslist->iRowid);

    /* Append the size of the position list in bytes */
    fts5WriteAppendPoslistInt(p, pWriter, pPoslist->buf.n);

    /* Copy the position list to the output segment */
    while( i<pPoslist->buf.n){
      int iVal;
      i += getVarint32(&pPoslist->buf.p[i], iVal);
      fts5WriteAppendPoslistInt(p, pWriter, iVal);
    }
  }

  /* Write the doclist terminator */
  fts5WriteAppendZerobyte(p, pWriter);
}

/*
** Flush any data cached by the writer object to the database. Free any
** allocations associated with the writer.
*/
static void fts5WriteFinish(
  Fts5Index *p, 
  Fts5SegWriter *pWriter,         /* Writer object */







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







2813
2814
2815
2816
2817
2818
2819






































2820
2821
2822
2823
2824
2825
2826
  }
}

static void fts5WriteAppendZerobyte(Fts5Index *p, Fts5SegWriter *pWriter){
  fts5BufferAppendVarint(&p->rc, &pWriter->aWriter[0].buf, 0);
}







































/*
** Flush any data cached by the writer object to the database. Free any
** allocations associated with the writer.
*/
static void fts5WriteFinish(
  Fts5Index *p, 
  Fts5SegWriter *pWriter,         /* Writer object */
3381
3382
3383
3384
3385
3386
3387















































3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413


3414
3415
3416
3417
3418
3419
3420
3421
3422
3423




3424
3425
3426
3427
3428
3429
3430
3431
    fts5StructureExtendLevel(&p->rc, pStruct, iBestLvl+1, 1, 0);
    fts5IndexMergeLevel(p, iIdx, pStruct, iBestLvl, &nRem);
    fts5StructurePromote(p, iBestLvl+1, pStruct);
    assert( nRem==0 || p->rc==SQLITE_OK );
    *ppStruct = pStruct;
  }
}
















































/*
** Flush the contents of in-memory hash table iHash to a new level-0 
** segment on disk. Also update the corresponding structure record.
**
** If an error occurs, set the Fts5Index.rc error code. If an error has 
** already occurred, this function is a no-op.
*/
static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){
  Fts5Structure *pStruct;
  int iSegid;
  int pgnoLast = 0;                 /* Last leaf page number in segment */

  /* Obtain a reference to the index structure and allocate a new segment-id
  ** for the new level-0 segment.  */
  pStruct = fts5StructureRead(p, iHash);
  iSegid = fts5AllocateSegid(p, pStruct);

  if( iSegid ){
    Fts5SegWriter writer;
    Fts5PendingDoclist *pList;
    Fts5PendingDoclist *pIter;
    Fts5PendingDoclist *pNext;

    Fts5StructureSegment *pSeg;   /* New segment within pStruct */
    int nHeight;                  /* Height of new segment b-tree */



    pList = fts5PendingList(p, iHash);
    assert( pList!=0 || p->rc!=SQLITE_OK );
    fts5WriteInit(p, &writer, iHash, iSegid);

    for(pIter=pList; pIter; pIter=pNext){
      pNext = pIter->pNext;
      fts5WritePendingDoclist(p, &writer, pIter);
      fts5FreePendingDoclist(pIter);
    }




    fts5WriteFinish(p, &writer, &nHeight, &pgnoLast);

    /* Edit the Fts5Structure and write it back to the database. */
    if( pStruct->nLevel==0 ){
      fts5StructureAddLevel(&p->rc, &pStruct);
    }
    fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0);
    if( p->rc==SQLITE_OK ){







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>



















<
<
<
<
<


>
>

<
<
|
|
<
<
<
<
|
>
>
>
>
|







3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220





3221
3222
3223
3224
3225


3226
3227




3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
    fts5StructureExtendLevel(&p->rc, pStruct, iBestLvl+1, 1, 0);
    fts5IndexMergeLevel(p, iIdx, pStruct, iBestLvl, &nRem);
    fts5StructurePromote(p, iBestLvl+1, pStruct);
    assert( nRem==0 || p->rc==SQLITE_OK );
    *ppStruct = pStruct;
  }
}

typedef struct Fts5FlushCtx Fts5FlushCtx;
struct Fts5FlushCtx {
  Fts5Index *pIdx;
  Fts5SegWriter writer; 
};

static int fts5FlushNewTerm(void *pCtx, const char *zTerm, int nTerm){
  Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx;
  int rc = SQLITE_OK;
  fts5WriteAppendTerm(p->pIdx, &p->writer, nTerm, (const u8*)zTerm);
  return rc;
}

static int fts5FlushTermDone(void *pCtx){
  Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx;
  int rc = SQLITE_OK;
  /* Write the doclist terminator */
  fts5WriteAppendZerobyte(p->pIdx, &p->writer);
  return rc;
}

static int fts5FlushNewEntry(
  void *pCtx, 
  i64 iRowid, 
  const u8 *aPoslist, 
  int nPoslist
){
  Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx;
  int rc = SQLITE_OK;
  int i = 0;

  /* Append the rowid itself */
  fts5WriteAppendRowid(p->pIdx, &p->writer, iRowid);

  /* Append the size of the position list in bytes */
  fts5WriteAppendPoslistInt(p->pIdx, &p->writer, nPoslist);

  /* Copy the position list to the output segment */
  while( i<nPoslist ){
    int iVal;
    i += getVarint32(&aPoslist[i], iVal);
    fts5WriteAppendPoslistInt(p->pIdx, &p->writer, iVal);
  }

  return rc;
}

/*
** Flush the contents of in-memory hash table iHash to a new level-0 
** segment on disk. Also update the corresponding structure record.
**
** If an error occurs, set the Fts5Index.rc error code. If an error has 
** already occurred, this function is a no-op.
*/
static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){
  Fts5Structure *pStruct;
  int iSegid;
  int pgnoLast = 0;                 /* Last leaf page number in segment */

  /* Obtain a reference to the index structure and allocate a new segment-id
  ** for the new level-0 segment.  */
  pStruct = fts5StructureRead(p, iHash);
  iSegid = fts5AllocateSegid(p, pStruct);

  if( iSegid ){





    Fts5StructureSegment *pSeg;   /* New segment within pStruct */
    int nHeight;                  /* Height of new segment b-tree */
    int rc;
    Fts5FlushCtx ctx;



    fts5WriteInit(p, &ctx.writer, iHash, iSegid);
    ctx.pIdx = p;





    rc = sqlite3Fts5HashIterate( p->apHash[iHash], (void*)&ctx, 
        fts5FlushNewTerm, fts5FlushNewEntry, fts5FlushTermDone
    );
    if( p->rc==SQLITE_OK ) p->rc = rc;
    fts5WriteFinish(p, &ctx.writer, &nHeight, &pgnoLast);

    /* Edit the Fts5Structure and write it back to the database. */
    if( pStruct->nLevel==0 ){
      fts5StructureAddLevel(&p->rc, &pStruct);
    }
    fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0);
    if( p->rc==SQLITE_OK ){
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
static void fts5IndexFlush(Fts5Index *p){
  Fts5Config *pConfig = p->pConfig;
  int i;                          /* Used to iterate through indexes */
  int nLeaf = 0;                  /* Number of leaves written */

  /* If an error has already occured this call is a no-op. */
  if( p->rc!=SQLITE_OK || p->nPendingData==0 ) return;
  assert( p->aHash );

  /* Flush the terms and each prefix index to disk */
  for(i=0; i<=pConfig->nPrefix; i++){
    fts5FlushOneHash(p, i, &nLeaf);
  }
  p->nPendingData = 0;
}







|







3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
static void fts5IndexFlush(Fts5Index *p){
  Fts5Config *pConfig = p->pConfig;
  int i;                          /* Used to iterate through indexes */
  int nLeaf = 0;                  /* Number of leaves written */

  /* If an error has already occured this call is a no-op. */
  if( p->rc!=SQLITE_OK || p->nPendingData==0 ) return;
  assert( p->apHash );

  /* Flush the terms and each prefix index to disk */
  for(i=0; i<=pConfig->nPrefix; i++){
    fts5FlushOneHash(p, i, &nLeaf);
  }
  p->nPendingData = 0;
}
3551
3552
3553
3554
3555
3556
3557





3558

3559
3560
3561
3562
3563
3564
3565
  int rc = SQLITE_OK;
  if( bDestroy ){
    rc = sqlite3Fts5DropTable(p->pConfig, "data");
  }
  assert( p->pReader==0 );
  sqlite3_finalize(p->pWriter);
  sqlite3_finalize(p->pDeleter);





  sqlite3_free(p->aHash);

  sqlite3_free(p->zDataTbl);
  sqlite3_free(p);
  return rc;
}

/*
** Return a simple checksum value based on the arguments.







>
>
>
>
>
|
>







3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
  int rc = SQLITE_OK;
  if( bDestroy ){
    rc = sqlite3Fts5DropTable(p->pConfig, "data");
  }
  assert( p->pReader==0 );
  sqlite3_finalize(p->pWriter);
  sqlite3_finalize(p->pDeleter);
  if( p->apHash ){
    int i;
    for(i=0; i<=p->pConfig->nPrefix; i++){
      sqlite3Fts5HashFree(p->apHash[i]);
    }
    sqlite3_free(p->apHash);
  }
  sqlite3_free(p->zDataTbl);
  sqlite3_free(p);
  return rc;
}

/*
** Return a simple checksum value based on the arguments.
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325

  aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf);
  pStruct = fts5StructureRead(p, 0);

  if( aBuf && pStruct ){
    Fts5DoclistIter *pDoclist;
    int i;
    i64 iLastRowid;
    Fts5MultiSegIter *p1 = 0;     /* Iterator used to gather data from index */
    Fts5Buffer doclist;

    memset(&doclist, 0, sizeof(doclist));
    for(fts5MultiIterNew(p, pStruct, 0, 1, pToken, nToken, -1, 0, &p1);
        fts5MultiIterEof(p, p1)==0;
        fts5MultiIterNext(p, p1, 0, 0)







|







4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140

  aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf);
  pStruct = fts5StructureRead(p, 0);

  if( aBuf && pStruct ){
    Fts5DoclistIter *pDoclist;
    int i;
    i64 iLastRowid = 0;
    Fts5MultiSegIter *p1 = 0;     /* Iterator used to gather data from index */
    Fts5Buffer doclist;

    memset(&doclist, 0, sizeof(doclist));
    for(fts5MultiIterNew(p, pStruct, 0, 1, pToken, nToken, -1, 0, &p1);
        fts5MultiIterEof(p, p1)==0;
        fts5MultiIterNext(p, p1, 0, 0)
Changes to main.mk.
73
74
75
76
77
78
79

80
81
82
83
84
85
86
	 vdbetrace.o wal.o walker.o where.o utf.o vtab.o

LIBOBJ += fts5.o
LIBOBJ += fts5_aux.o
LIBOBJ += fts5_buffer.o
LIBOBJ += fts5_config.o
LIBOBJ += fts5_expr.o

LIBOBJ += fts5_index.o
LIBOBJ += fts5_storage.o
LIBOBJ += fts5parse.o



# All of the source code files.







>







73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
	 vdbetrace.o wal.o walker.o where.o utf.o vtab.o

LIBOBJ += fts5.o
LIBOBJ += fts5_aux.o
LIBOBJ += fts5_buffer.o
LIBOBJ += fts5_config.o
LIBOBJ += fts5_expr.o
LIBOBJ += fts5_hash.o
LIBOBJ += fts5_index.o
LIBOBJ += fts5_storage.o
LIBOBJ += fts5parse.o



# All of the source code files.
228
229
230
231
232
233
234

235
236
237
238
239
240
241
   $(TOP)/ext/fts5/fts5.h \
   $(TOP)/ext/fts5/fts5Int.h \
   $(TOP)/ext/fts5/fts5_aux.c \
   $(TOP)/ext/fts5/fts5_buffer.c \
   $(TOP)/ext/fts5/fts5.c \
   $(TOP)/ext/fts5/fts5_config.c \
   $(TOP)/ext/fts5/fts5_expr.c \

   $(TOP)/ext/fts5/fts5_index.c \
   fts5parse.c \
   $(TOP)/ext/fts5/fts5_storage.c 


# Generated source code files
#







>







229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
   $(TOP)/ext/fts5/fts5.h \
   $(TOP)/ext/fts5/fts5Int.h \
   $(TOP)/ext/fts5/fts5_aux.c \
   $(TOP)/ext/fts5/fts5_buffer.c \
   $(TOP)/ext/fts5/fts5.c \
   $(TOP)/ext/fts5/fts5_config.c \
   $(TOP)/ext/fts5/fts5_expr.c \
   $(TOP)/ext/fts5/fts5_hash.c \
   $(TOP)/ext/fts5/fts5_index.c \
   fts5parse.c \
   $(TOP)/ext/fts5/fts5_storage.c 


# Generated source code files
#
594
595
596
597
598
599
600



601
602
603
604
605
606
607
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_buffer.c

fts5_config.o:	$(TOP)/ext/fts5/fts5_config.c $(HDR) $(EXTHDR)
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_config.c

fts5_expr.o:	$(TOP)/ext/fts5/fts5_expr.c $(HDR) $(EXTHDR)
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_expr.c




fts5_index.o:	$(TOP)/ext/fts5/fts5_index.c $(HDR) $(EXTHDR)
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_index.c

fts5_storage.o:	$(TOP)/ext/fts5/fts5_storage.c $(HDR) $(EXTHDR)
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_storage.c








>
>
>







596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_buffer.c

fts5_config.o:	$(TOP)/ext/fts5/fts5_config.c $(HDR) $(EXTHDR)
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_config.c

fts5_expr.o:	$(TOP)/ext/fts5/fts5_expr.c $(HDR) $(EXTHDR)
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_expr.c

fts5_hash.o:	$(TOP)/ext/fts5/fts5_hash.c $(HDR) $(EXTHDR)
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_hash.c

fts5_index.o:	$(TOP)/ext/fts5/fts5_index.c $(HDR) $(EXTHDR)
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_index.c

fts5_storage.o:	$(TOP)/ext/fts5/fts5_storage.c $(HDR) $(EXTHDR)
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_storage.c

Changes to tool/loadfts.c.
65
66
67
68
69
70
71

72
73
74
75
76
77
78
"  the contents of each file into the fts table. All files are assumed to\n"
"  contain UTF-8 text.\n"
"\n"
"Switches are:\n"
"  -fts [345]       FTS version to use (default=5)\n"
"  -idx [01]        Create a mapping from filename to rowid (default=0)\n"
"  -dir <path>      Root of directory tree to load data from (default=.)\n"

, zArgv0
);
  exit(1);
}

/*
** Exit with a message based on the argument and the current value of errno.







>







65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
"  the contents of each file into the fts table. All files are assumed to\n"
"  contain UTF-8 text.\n"
"\n"
"Switches are:\n"
"  -fts [345]       FTS version to use (default=5)\n"
"  -idx [01]        Create a mapping from filename to rowid (default=0)\n"
"  -dir <path>      Root of directory tree to load data from (default=.)\n"
"  -trans <integer> Number of inserts per transaction (default=1)\n"
, zArgv0
);
  exit(1);
}

/*
** Exit with a message based on the argument and the current value of errno.
92
93
94
95
96
97
98

99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114

115





116
117
118
119
120
121
122
}

/*
** Context object for visit_file().
*/
typedef struct VisitContext VisitContext;
struct VisitContext {

  sqlite3 *db;                    /* Database handle */
  sqlite3_stmt *pInsert;          /* INSERT INTO fts VALUES(readtext(:1)) */
};

/*
** Callback used with traverse(). The first argument points to an object
** of type VisitContext. This function inserts the contents of the text
** file zPath into the FTS table.
*/
void visit_file(void *pCtx, const char *zPath){
  int rc;
  VisitContext *p = (VisitContext*)pCtx;
  /* printf("%s\n", zPath); */
  sqlite3_bind_text(p->pInsert, 1, zPath, -1, SQLITE_STATIC);
  sqlite3_step(p->pInsert);
  rc = sqlite3_reset(p->pInsert);

  if( rc!=SQLITE_OK ) sqlite_error_out("insert", p->db);





}

/*
** Recursively traverse directory zDir. For each file that is not a 
** directory, invoke the supplied callback with its path.
*/
static void traverse(







>
















>
|
>
>
>
>
>







93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
}

/*
** Context object for visit_file().
*/
typedef struct VisitContext VisitContext;
struct VisitContext {
  int nRowPerTrans;
  sqlite3 *db;                    /* Database handle */
  sqlite3_stmt *pInsert;          /* INSERT INTO fts VALUES(readtext(:1)) */
};

/*
** Callback used with traverse(). The first argument points to an object
** of type VisitContext. This function inserts the contents of the text
** file zPath into the FTS table.
*/
void visit_file(void *pCtx, const char *zPath){
  int rc;
  VisitContext *p = (VisitContext*)pCtx;
  /* printf("%s\n", zPath); */
  sqlite3_bind_text(p->pInsert, 1, zPath, -1, SQLITE_STATIC);
  sqlite3_step(p->pInsert);
  rc = sqlite3_reset(p->pInsert);
  if( rc!=SQLITE_OK ){
    sqlite_error_out("insert", p->db);
  }else if( p->nRowPerTrans>0 
         && (sqlite3_last_insert_rowid(p->db) % p->nRowPerTrans)==0 
  ){
    sqlite3_exec(p->db, "COMMIT ; BEGIN", 0, 0, 0);
  }
}

/*
** Recursively traverse directory zDir. For each file that is not a 
** directory, invoke the supplied callback with its path.
*/
static void traverse(
146
147
148
149
150
151
152

153
154
155
156
157
158
159
160
161
162
163
164



165
166
167
168
169
170
171

int main(int argc, char **argv){
  int iFts = 5;                   /* Value of -fts option */
  int bMap = 0;                   /* True to create mapping table */
  const char *zDir = ".";         /* Directory to scan */
  int i;
  int rc;

  sqlite3 *db;
  char *zSql;
  VisitContext sCtx;

  if( argc % 2 ) showHelp(argv[0]);

  for(i=1; i<(argc-1); i+=2){
    char *zOpt = argv[i];
    char *zArg = argv[i+1];
    if( strcmp(zOpt, "-fts")==0 ){
      iFts = atoi(zArg);
      if( iFts!=3 && iFts!=4 && iFts!= 5) showHelp(argv[0]);



    }
    else if( strcmp(zOpt, "-idx")==0 ){
      bMap = atoi(zArg);
      if( bMap!=0 && bMap!=1 ) showHelp(argv[0]);
    }
    else if( strcmp(zOpt, "-dir")==0 ){
      zDir = zArg;







>












>
>
>







154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183

int main(int argc, char **argv){
  int iFts = 5;                   /* Value of -fts option */
  int bMap = 0;                   /* True to create mapping table */
  const char *zDir = ".";         /* Directory to scan */
  int i;
  int rc;
  int nRowPerTrans = 0;
  sqlite3 *db;
  char *zSql;
  VisitContext sCtx;

  if( argc % 2 ) showHelp(argv[0]);

  for(i=1; i<(argc-1); i+=2){
    char *zOpt = argv[i];
    char *zArg = argv[i+1];
    if( strcmp(zOpt, "-fts")==0 ){
      iFts = atoi(zArg);
      if( iFts!=3 && iFts!=4 && iFts!= 5) showHelp(argv[0]);
    }
    if( strcmp(zOpt, "-trans")==0 ){
      nRowPerTrans = atoi(zArg);
    }
    else if( strcmp(zOpt, "-idx")==0 ){
      bMap = atoi(zArg);
      if( bMap!=0 && bMap!=1 ) showHelp(argv[0]);
    }
    else if( strcmp(zOpt, "-dir")==0 ){
      zDir = zArg;
185
186
187
188
189
190
191

192
193
194
195
196
197

198

199
200
201
202
203
204
  rc = sqlite3_exec(db, zSql, 0, 0, 0);
  if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_exec(1)", db);
  sqlite3_free(zSql);

  /* Compile the INSERT statement to write data to the FTS table. */
  memset(&sCtx, 0, sizeof(VisitContext));
  sCtx.db = db;

  rc = sqlite3_prepare_v2(db, 
      "INSERT INTO fts VALUES(readtext(?))", -1, &sCtx.pInsert, 0
  );
  if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_prepare_v2(1)", db);

  /* Load all files in the directory hierarchy into the FTS table. */

  traverse(zDir, (void*)&sCtx, visit_file);


  /* Clean up and exit. */
  sqlite3_finalize(sCtx.pInsert);
  sqlite3_close(db);
  return 0;
}







>






>

>






197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
  rc = sqlite3_exec(db, zSql, 0, 0, 0);
  if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_exec(1)", db);
  sqlite3_free(zSql);

  /* Compile the INSERT statement to write data to the FTS table. */
  memset(&sCtx, 0, sizeof(VisitContext));
  sCtx.db = db;
  sCtx.nRowPerTrans = nRowPerTrans;
  rc = sqlite3_prepare_v2(db, 
      "INSERT INTO fts VALUES(readtext(?))", -1, &sCtx.pInsert, 0
  );
  if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_prepare_v2(1)", db);

  /* Load all files in the directory hierarchy into the FTS table. */
  if( sCtx.nRowPerTrans>0 ) sqlite3_exec(db, "BEGIN", 0, 0, 0);
  traverse(zDir, (void*)&sCtx, visit_file);
  if( sCtx.nRowPerTrans>0 ) sqlite3_exec(db, "COMMIT", 0, 0, 0);

  /* Clean up and exit. */
  sqlite3_finalize(sCtx.pInsert);
  sqlite3_close(db);
  return 0;
}