SQLite

Check-in [aa34bf666c]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Change fts5 doclist-index structures to be trees instead of flat lists. This only makes a difference for databases that contain millions of instances of the same token.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | fts5
Files: files | file ages | folders
SHA1: aa34bf666c384cf32a8d8166ab6d9afbca26a256
User & Date: dan 2015-05-13 17:15:32.981
Context
2015-05-13
18:12
Merge latest trunk changes with this branch. (check-in: b5f0e8c5b4 user: dan tags: fts5)
17:15
Change fts5 doclist-index structures to be trees instead of flat lists. This only makes a difference for databases that contain millions of instances of the same token. (check-in: aa34bf666c user: dan tags: fts5)
2015-05-09
18:28
Allow the fts5vocab table to optionally provide data on a per-column basis. (check-in: 3922276135 user: dan tags: fts5)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts5/fts5.c.
13
14
15
16
17
18
19








20
21
22
23
24
25
26
** This is an SQLite module implementing full-text search.
*/

#if defined(SQLITE_ENABLE_FTS5)

#include "fts5Int.h"










typedef struct Fts5Table Fts5Table;
typedef struct Fts5Cursor Fts5Cursor;
typedef struct Fts5Global Fts5Global;
typedef struct Fts5Auxiliary Fts5Auxiliary;
typedef struct Fts5Auxdata Fts5Auxdata;








>
>
>
>
>
>
>
>







13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
** This is an SQLite module implementing full-text search.
*/

#if defined(SQLITE_ENABLE_FTS5)

#include "fts5Int.h"

/*
** This variable is set to true when running corruption tests. Otherwise
** false. If it is false, extra assert() conditions in the fts5 code are
** activated - conditions that are only true if it is guaranteed that the
** fts5 database is not corrupt.
*/
int sqlite3_fts5_may_be_corrupt = 0;


typedef struct Fts5Table Fts5Table;
typedef struct Fts5Cursor Fts5Cursor;
typedef struct Fts5Global Fts5Global;
typedef struct Fts5Auxiliary Fts5Auxiliary;
typedef struct Fts5Auxdata Fts5Auxdata;

Changes to ext/fts5/fts5Int.h.
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#endif

/*
** The assert_nc() macro is similar to the assert() macro, except that it
** is used for assert() conditions that are true only if it can be 
** guranteed that the database is not corrupt.
*/
#ifdef SQLITE_TEST
extern int sqlite3_fts5_may_be_corrupt;
# define assert_nc(x) assert(sqlite3_fts5_may_be_corrupt || (x))
#else
# define assert_nc(x) assert(x)
#endif

typedef struct Fts5Global Fts5Global;







|







40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#endif

/*
** The assert_nc() macro is similar to the assert() macro, except that it
** is used for assert() conditions that are true only if it can be 
** guranteed that the database is not corrupt.
*/
#ifdef SQLITE_DEBUG
extern int sqlite3_fts5_may_be_corrupt;
# define assert_nc(x) assert(sqlite3_fts5_may_be_corrupt || (x))
#else
# define assert_nc(x) assert(x)
#endif

typedef struct Fts5Global Fts5Global;
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
  char *zRankArgs;                /* Arguments to rank function */

  /* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */
  char **pzErrmsg;
};

/* Current expected value of %_config table 'version' field */
#define FTS5_CURRENT_VERSION 1

#define FTS5_CONTENT_NORMAL   0
#define FTS5_CONTENT_NONE     1
#define FTS5_CONTENT_EXTERNAL 2










|







111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
  char *zRankArgs;                /* Arguments to rank function */

  /* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */
  char **pzErrmsg;
};

/* Current expected value of %_config table 'version' field */
#define FTS5_CURRENT_VERSION 2

#define FTS5_CONTENT_NORMAL   0
#define FTS5_CONTENT_NONE     1
#define FTS5_CONTENT_EXTERNAL 2



Changes to ext/fts5/fts5_index.c.
198
199
200
201
202
203
204











205
206
207
208
209
210
211

212






213


214
215
216
217
218
219
220
**
**     * either an 0x00 or 0x01 byte. If the value 0x01 is used, then there 
**       is an associated index-by-rowid record.
**     * the number of zero-term leaves as a varint.
**
** 5. Segment doclist indexes:
**











**   A list of varints. If the first termless page contains at least one
**   docid, the list begins with that docid as a varint followed by the
**   value 1 (0x01). Or, if the first termless page contains no docids,
**   a varint containing the last docid stored on the term page followed
**   by a 0 (0x00) value.
**
**   For each subsequent page in the doclist, either a 0x00 byte if the

**   page contains no terms, or a delta-encoded docid (always +ve) 






**   representing the first docid on the page otherwise.


*/

/*
** Rowids for the averages and structure records in the %_data table.
*/
#define FTS5_AVERAGES_ROWID     1    /* Rowid used for the averages record */
#define FTS5_STRUCTURE_ROWID   10    /* The structure record */







>
>
>
>
>
>
>
>
>
>
>
|
<
|
<
|

|
>
|
>
>
>
>
>
>
|
>
>







198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216

217

218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
**
**     * either an 0x00 or 0x01 byte. If the value 0x01 is used, then there 
**       is an associated index-by-rowid record.
**     * the number of zero-term leaves as a varint.
**
** 5. Segment doclist indexes:
**
**   Doclist indexes are themselves b-trees, however they usually consist of
**   a single leaf record only. The format of each doclist index leaf page 
**   is:
**
**     * Flags byte. Bits are:
**         0x01: Clear if leaf is also the root page, otherwise set.
**
**     * Page number of fts index leaf page. As a varint.
**
**     * First docid on page indicated by previous field. As a varint.
**
**     * A list of varints, one for each subsequent termless page. A 

**       positive delta if the termless page contains at least one docid, 

**       or an 0x00 byte otherwise.
**
**   Internal doclist index nodes are:
**
**     * Flags byte. Bits are:
**         0x01: Clear for root page, otherwise set.
**
**     * Page number of first child page. As a varint.
**
**     * Copy of first docid on page indicated by previous field. As a varint.
**
**     * A list of delta-encoded varints - the first docid on each subsequent
**       child page. 
**
*/

/*
** Rowids for the averages and structure records in the %_data table.
*/
#define FTS5_AVERAGES_ROWID     1    /* Rowid used for the averages record */
#define FTS5_STRUCTURE_ROWID   10    /* The structure record */
236
237
238
239
240
241
242

243
244
245
246

247
248
249
250
251




252
253
254
255
256
257

258
259
260
261
262
263

264
265
266
267
268
269

270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287


288
289
290
291
292
293
294
**
** The rowid for a node is then found using the FTS5_SEGMENT_ROWID() macro
** below. The FTS5_SEGMENT_*_BITS macros define the number of bits used
** to encode the three FTS5_SEGMENT_ROWID() arguments. This module returns
** SQLITE_FULL and fails the current operation if they ever prove too small.
*/
#define FTS5_DATA_ID_B     16     /* Max seg id number 65535 */

#define FTS5_DATA_HEIGHT_B  5     /* Max b-tree height of 32 */
#define FTS5_DATA_PAGE_B   31     /* Max page number of 2147483648 */

#define FTS5_SEGMENT_ROWID(segid, height, pgno) (                         \

 ((i64)(segid)  << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) +                  \
 ((i64)(height) << (FTS5_DATA_PAGE_B)) +                                       \
 ((i64)(pgno))                                                                 \
)





/*
** The height of segment b-trees is actually limited to one less than 
** (1<<HEIGHT_BITS). This is because the rowid address space for nodes
** with such a height is used by doclist indexes.
*/
#define FTS5_SEGMENT_MAX_HEIGHT ((1 << FTS5_DATA_HEIGHT_B)-1)


/*
** Maximum segments permitted in a single index 
*/
#define FTS5_MAX_SEGMENT 2000


/*
** The rowid for the doclist index associated with leaf page pgno of segment
** segid in index idx.
*/
#define FTS5_DOCLIST_IDX_ROWID(segid, pgno) \
        FTS5_SEGMENT_ROWID(segid, FTS5_SEGMENT_MAX_HEIGHT, pgno)


#ifdef SQLITE_DEBUG
int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; }
#endif


/*
** Each time a blob is read from the %_data table, it is padded with this
** many zero bytes. This makes it easier to decode the various record formats
** without overreading if the records are corrupt.
*/
#define FTS5_DATA_ZERO_PADDING 8

typedef struct Fts5BtreeIter Fts5BtreeIter;
typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel;
typedef struct Fts5ChunkIter Fts5ChunkIter;
typedef struct Fts5Data Fts5Data;
typedef struct Fts5DlidxIter Fts5DlidxIter;


typedef struct Fts5MultiSegIter Fts5MultiSegIter;
typedef struct Fts5NodeIter Fts5NodeIter;
typedef struct Fts5PageWriter Fts5PageWriter;
typedef struct Fts5PosIter Fts5PosIter;
typedef struct Fts5SegIter Fts5SegIter;
typedef struct Fts5DoclistIter Fts5DoclistIter;
typedef struct Fts5SegWriter Fts5SegWriter;







>



|
>
|




>
>
>
>






>






>




|

>


















>
>







254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
**
** The rowid for a node is then found using the FTS5_SEGMENT_ROWID() macro
** below. The FTS5_SEGMENT_*_BITS macros define the number of bits used
** to encode the three FTS5_SEGMENT_ROWID() arguments. This module returns
** SQLITE_FULL and fails the current operation if they ever prove too small.
*/
#define FTS5_DATA_ID_B     16     /* Max seg id number 65535 */
#define FTS5_DATA_DLI_B     1     /* Doclist-index flag (1 bit) */
#define FTS5_DATA_HEIGHT_B  5     /* Max b-tree height of 32 */
#define FTS5_DATA_PAGE_B   31     /* Max page number of 2147483648 */

#define fts5_dri(segid, dlidx, height, pgno) (                                 \
 ((i64)(segid)  << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) +    \
 ((i64)(dlidx)  << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) +                  \
 ((i64)(height) << (FTS5_DATA_PAGE_B)) +                                       \
 ((i64)(pgno))                                                                 \
)

#define FTS5_SEGMENT_ROWID(segid, height, pgno) fts5_dri(segid, 0, height, pgno)
#define FTS5_DLIDX_ROWID(segid, height, pgno)   fts5_dri(segid, 1, height, pgno)

#if 0
/*
** The height of segment b-trees is actually limited to one less than 
** (1<<HEIGHT_BITS). This is because the rowid address space for nodes
** with such a height is used by doclist indexes.
*/
#define FTS5_SEGMENT_MAX_HEIGHT ((1 << FTS5_DATA_HEIGHT_B)-1)
#endif

/*
** Maximum segments permitted in a single index 
*/
#define FTS5_MAX_SEGMENT 2000

#if 0
/*
** The rowid for the doclist index associated with leaf page pgno of segment
** segid in index idx.
*/
#define FTS5_DOCLIST_IDX_ROWID(segid, height, pgno) \
        FTS5_SEGMENT_ROWID(segid, FTS5_SEGMENT_MAX_HEIGHT, pgno)
#endif

#ifdef SQLITE_DEBUG
int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; }
#endif


/*
** Each time a blob is read from the %_data table, it is padded with this
** many zero bytes. This makes it easier to decode the various record formats
** without overreading if the records are corrupt.
*/
#define FTS5_DATA_ZERO_PADDING 8

typedef struct Fts5BtreeIter Fts5BtreeIter;
typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel;
typedef struct Fts5ChunkIter Fts5ChunkIter;
typedef struct Fts5Data Fts5Data;
typedef struct Fts5DlidxIter Fts5DlidxIter;
typedef struct Fts5DlidxLvl Fts5DlidxLvl;
typedef struct Fts5DlidxWriter Fts5DlidxWriter;
typedef struct Fts5MultiSegIter Fts5MultiSegIter;
typedef struct Fts5NodeIter Fts5NodeIter;
typedef struct Fts5PageWriter Fts5PageWriter;
typedef struct Fts5PosIter Fts5PosIter;
typedef struct Fts5SegIter Fts5SegIter;
typedef struct Fts5DoclistIter Fts5DoclistIter;
typedef struct Fts5SegWriter Fts5SegWriter;
380
381
382
383
384
385
386






387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
** An object of type Fts5SegWriter is used to write to segments.
*/
struct Fts5PageWriter {
  int pgno;                       /* Page number for this page */
  Fts5Buffer buf;                 /* Buffer containing page data */
  Fts5Buffer term;                /* Buffer containing previous term on page */
};






struct Fts5SegWriter {
  int iSegid;                     /* Segid to write to */
  int nWriter;                    /* Number of entries in aWriter */
  Fts5PageWriter *aWriter;        /* Array of PageWriter objects */
  i64 iPrevRowid;                 /* Previous docid written to current leaf */
  u8 bFirstRowidInDoclist;        /* True if next rowid is first in doclist */
  u8 bFirstRowidInPage;           /* True if next rowid is first in page */
  u8 bFirstTermInPage;            /* True if next term will be first in leaf */
  int nLeafWritten;               /* Number of leaf pages written */
  int nEmpty;                     /* Number of contiguous term-less nodes */
  Fts5Buffer cdlidx;               /* Doclist index */
  i64 iDlidxPrev;                 /* Previous rowid appended to dlidx */
  int bDlidxPrevValid;            /* True if iDlidxPrev is valid */
};

/*
** Object for iterating through the merged results of one or more segments,
** visiting each term/docid pair in the merged data.
**
** nSeg is always a power of two greater than or equal to the number of







>
>
>
>
>
>










|
|
|







409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
** An object of type Fts5SegWriter is used to write to segments.
*/
struct Fts5PageWriter {
  int pgno;                       /* Page number for this page */
  Fts5Buffer buf;                 /* Buffer containing page data */
  Fts5Buffer term;                /* Buffer containing previous term on page */
};
struct Fts5DlidxWriter {
  int pgno;                       /* Page number for this page */
  int bPrevValid;                 /* True if iPrev is valid */
  i64 iPrev;                      /* Previous docid value written to page */
  Fts5Buffer buf;                 /* Buffer containing page data */
};
struct Fts5SegWriter {
  int iSegid;                     /* Segid to write to */
  int nWriter;                    /* Number of entries in aWriter */
  Fts5PageWriter *aWriter;        /* Array of PageWriter objects */
  i64 iPrevRowid;                 /* Previous docid written to current leaf */
  u8 bFirstRowidInDoclist;        /* True if next rowid is first in doclist */
  u8 bFirstRowidInPage;           /* True if next rowid is first in page */
  u8 bFirstTermInPage;            /* True if next term will be first in leaf */
  int nLeafWritten;               /* Number of leaf pages written */
  int nEmpty;                     /* Number of contiguous term-less nodes */

  int nDlidx;                     /* Allocated size of aDlidx[] array */
  Fts5DlidxWriter *aDlidx;        /* Array of Fts5DlidxWriter objects */
};

/*
** Object for iterating through the merged results of one or more segments,
** visiting each term/docid pair in the merged data.
**
** nSeg is always a power of two greater than or equal to the number of
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573






574
575
576
577
578
579
580
**
** bEof:
**   Set to true once iterator has reached EOF.
**
** iOff:
**   Set to the current offset within record pData.
*/
struct Fts5DlidxIter {
  Fts5Data *pData;              /* Data for doclist index, if any */
  int iOff;                     /* Current offset into pDlidx */
  int bEof;                     /* At EOF already */
  int iFirstOff;                /* Used by reverse iterators only */

  /* Output variables */
  int iLeafPgno;                /* Page number of current leaf page */
  i64 iRowid;                   /* First rowid on leaf iLeafPgno */
};








/*
** An Fts5BtreeIter object is used to iterate through all entries in the
** b-tree hierarchy belonging to a single fts5 segment. In this case the
** "b-tree hierarchy" is all b-tree nodes except leaves. Each entry in the
** b-tree hierarchy consists of the following:







|
|
|

|





>
>
>
>
>
>







592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
**
** bEof:
**   Set to true once iterator has reached EOF.
**
** iOff:
**   Set to the current offset within record pData.
*/
struct Fts5DlidxLvl {
  Fts5Data *pData;              /* Data for current page of this level */
  int iOff;                     /* Current offset into pData */
  int bEof;                     /* At EOF already */
  int iFirstOff;                /* Used by reverse iterators */

  /* Output variables */
  int iLeafPgno;                /* Page number of current leaf page */
  i64 iRowid;                   /* First rowid on leaf iLeafPgno */
};
struct Fts5DlidxIter {
  int nLvl;
  int iSegid;
  Fts5DlidxLvl aLvl[1];
};



/*
** An Fts5BtreeIter object is used to iterate through all entries in the
** b-tree hierarchy belonging to a single fts5 segment. In this case the
** "b-tree hierarchy" is all b-tree nodes except leaves. Each entry in the
** b-tree hierarchy consists of the following:
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425

1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444

1445
1446

1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464



1465






1466
1467

1468
1469
1470
1471
1472

1473
1474
1475
1476
1477
















1478
1479
1480

1481
1482
1483
1484
1485

1486



1487
1488
1489







1490
1491





1492
1493
1494
1495
1496
1497
1498
1499
1500
1501


1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516



1517






1518







1519


1520

1521
1522
1523












1524
1525



1526
















1527
1528
1529
1530
1531
1532
1533
1534
1535


1536

1537
1538
1539
1540

1541
1542
1543
1544











1545


1546
1547
1548
1549
1550
1551





1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563


1564
1565
1566
1567
1568
1569
1570
** Free any memory allocated by the iterator object.
*/
static void fts5NodeIterFree(Fts5NodeIter *pIter){
  fts5BufferFree(&pIter->term);
}

/*
** The iterator passed as the first argument has the following fields set
** as follows. This function sets up the rest of the iterator so that it
** points to the first rowid in the doclist-index.
**
**   pData: pointer to doclist-index record, 
**   iLeafPgno: page number that this doclist-index is associated with.
**
** When this function is called pIter->iLeafPgno is the page number the
** doclist is associated with (the one featuring the term).
*/
static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){
  Fts5Data *pData = pIter->pData;
  int i;
  int bPresent;


  assert( pIter->pData );
  assert( pIter->iLeafPgno>0 );

  /* Read the first rowid value. And the "present" flag that follows it. */
  pIter->iOff += getVarint(&pData->p[0], (u64*)&pIter->iRowid);
  bPresent = pData->p[pIter->iOff++];
  if( bPresent ){
    i = 0;
  }else{
    /* Count the number of leading 0x00 bytes. */
    for(i=1; pIter->iOff<pData->n; i++){ 
      if( pData->p[pIter->iOff] ) break;
      pIter->iOff++;
    }

    /* Unless we are already at the end of the doclist-index, load the first
    ** rowid value.  */
    if( pIter->iOff<pData->n ){
      i64 iVal;

      pIter->iOff += getVarint(&pData->p[pIter->iOff], (u64*)&iVal);
      pIter->iRowid += iVal;

    }else{
      pIter->bEof = 1;
    }
  }
  pIter->iLeafPgno += (i+1);

  pIter->iFirstOff = pIter->iOff;
  return pIter->bEof;
}

/*
** Advance the iterator passed as the only argument.
*/
static int fts5DlidxIterNext(Fts5DlidxIter *pIter){
  Fts5Data *pData = pIter->pData;
  int iOff;

  for(iOff=pIter->iOff; iOff<pData->n; iOff++){



    if( pData->p[iOff] ) break; 






  }


  if( iOff<pData->n ){
    i64 iVal;
    pIter->iLeafPgno += (iOff - pIter->iOff) + 1;
    iOff += getVarint(&pData->p[iOff], (u64*)&iVal);
    pIter->iRowid += iVal;

    pIter->iOff = iOff;
  }else{
    pIter->bEof = 1;
  }

















  return pIter->bEof;
}


static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){
  return pIter->bEof;
}

static void fts5DlidxIterLast(Fts5DlidxIter *pIter){

  if( fts5DlidxIterFirst(pIter)==0 ){



    while( 0==fts5DlidxIterNext(pIter) );
    pIter->bEof = 0;
  }







}






static int fts5DlidxIterPrev(Fts5DlidxIter *pIter){
  int iOff = pIter->iOff;

  assert( pIter->bEof==0 );
  if( iOff<=pIter->iFirstOff ){
    pIter->bEof = 1;
  }else{
    u8 *a = pIter->pData->p;
    i64 iVal;
    int iLimit;



    /* Currently iOff points to the first byte of a varint. This block 
    ** decrements iOff until it points to the first byte of the previous 
    ** varint. Taking care not to read any memory locations that occur
    ** before the buffer in memory.  */
    iLimit = (iOff>9 ? iOff-9 : 0);
    for(iOff--; iOff>iLimit; iOff--){
      if( (a[iOff-1] & 0x80)==0 ) break;
    }

    getVarint(&a[iOff], (u64*)&iVal);
    pIter->iRowid -= iVal;
    pIter->iLeafPgno--;

    /* Skip backwards passed any 0x00 bytes. */



    while( iOff>pIter->iFirstOff 






        && a[iOff-1]==0x00 && (a[iOff-2] & 0x80)==0 







    ){


      iOff--;

      pIter->iLeafPgno--;
    }
    pIter->iOff = iOff;












  }




  return pIter->bEof;
















}

static Fts5DlidxIter *fts5DlidxIterInit(
  Fts5Index *p,                   /* Fts5 Backend to iterate within */
  int bRev,                       /* True for ORDER BY ASC */
  int iSegid,                     /* Segment id */
  int iLeafPg                     /* Leaf page number to load dlidx for */
){
  Fts5DlidxIter *pIter;




  pIter = (Fts5DlidxIter*)fts5IdxMalloc(p, sizeof(Fts5DlidxIter));
  if( pIter==0 ) return 0;

  pIter->pData = fts5DataRead(p, FTS5_DOCLIST_IDX_ROWID(iSegid, iLeafPg));

  if( pIter->pData==0 ){
    sqlite3_free(pIter);
    pIter = 0;
  }else{











    pIter->iLeafPgno = iLeafPg;


    if( bRev==0 ){
      fts5DlidxIterFirst(pIter);
    }else{
      fts5DlidxIterLast(pIter);
    }
  }






  return pIter;
}

/*
** Free a doclist-index iterator object allocated by fts5DlidxIterInit().
*/
static void fts5DlidxIterFree(Fts5DlidxIter *pIter){
  if( pIter ){
    fts5DataRelease(pIter->pData);
    sqlite3_free(pIter);
  }


}

static void fts5LeafHeader(Fts5Data *pLeaf, int *piRowid, int *piTerm){
  *piRowid = (int)fts5GetU16(&pLeaf->p[0]);
  *piTerm = (int)fts5GetU16(&pLeaf->p[2]);
}








|
<
<
<
|
<
<
<
<

|
|
<
<

>
|
|
|
<
|
<
<
|

|
|
|
<


<
<
|

>
|
|
>

|


<

<
|





|
|
<

|
>
>
>
|
>
>
>
>
>
>
|
|
>
|
<
<
<
|
>
|
<
|
|

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|


>

|


|
>
|
>
>
>
|
|
|
>
>
>
>
>
>
>
|
|
>
>
>
>
>
|
|

|
|
|

|


>
>











|
|

|
>
>
>
|
>
>
>
>
>
>
|
>
>
>
>
>
>
>
|
>
>
|
>
|
|
|
>
>
>
>
>
>
>
>
>
>
>
>
|
|
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>








|
>
>

>
|
|

<
>
|
<
|
|
>
>
>
>
>
>
>
>
>
>
>
|
>
>



|


>
>
>
>
>




<
<
<
|
|
<
<
|
>
>







1445
1446
1447
1448
1449
1450
1451
1452



1453




1454
1455
1456


1457
1458
1459
1460
1461

1462


1463
1464
1465
1466
1467

1468
1469


1470
1471
1472
1473
1474
1475
1476
1477
1478
1479

1480

1481
1482
1483
1484
1485
1486
1487
1488

1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504



1505
1506
1507

1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660

1661
1662

1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693



1694
1695


1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
** Free any memory allocated by the iterator object.
*/
static void fts5NodeIterFree(Fts5NodeIter *pIter){
  fts5BufferFree(&pIter->term);
}

/*
** Advance the iterator passed as the only argument. If the end of the 



** doclist-index page is reached, return non-zero.




*/
static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){
  Fts5Data *pData = pLvl->pData;



  if( pLvl->iOff==0 ){
    assert( pLvl->bEof==0 );
    pLvl->iOff = 1;
    pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno);

    pLvl->iOff += getVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid);


    pLvl->iFirstOff = pLvl->iOff;
  }else{
    int iOff;
    for(iOff=pLvl->iOff; iOff<pData->n; iOff++){
      if( pData->p[iOff] ) break; 

    }



    if( iOff<pData->n ){
      i64 iVal;
      pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1;
      iOff += getVarint(&pData->p[iOff], (u64*)&iVal);
      pLvl->iRowid += iVal;
      pLvl->iOff = iOff;
    }else{
      pLvl->bEof = 1;
    }
  }



  return pLvl->bEof;
}

/*
** Advance the iterator passed as the only argument.
*/
static int fts5DlidxIterNextR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
  Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];


  assert( iLvl<pIter->nLvl );
  if( fts5DlidxLvlNext(pLvl) ){
    if( (iLvl+1) < pIter->nLvl ){
      fts5DlidxIterNextR(p, pIter, iLvl+1);
      if( pLvl[1].bEof==0 ){
        fts5DataRelease(pLvl->pData);
        memset(pLvl, 0, sizeof(Fts5DlidxLvl));
        pLvl->pData = fts5DataRead(p, 
            FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
        );
        if( pLvl->pData ) fts5DlidxLvlNext(pLvl);
      }
    }
  }




  return pIter->aLvl[0].bEof;
}
static int fts5DlidxIterNext(Fts5Index *p, Fts5DlidxIter *pIter){

  return fts5DlidxIterNextR(p, pIter, 0);
}

/*
** The iterator passed as the first argument has the following fields set
** as follows. This function sets up the rest of the iterator so that it
** points to the first rowid in the doclist-index.
**
**   pData:
**     pointer to doclist-index record, 
**
** When this function is called pIter->iLeafPgno is the page number the
** doclist is associated with (the one featuring the term).
*/
static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){
  int i;
  for(i=0; i<pIter->nLvl; i++){
    fts5DlidxLvlNext(&pIter->aLvl[i]);
  }
  return pIter->aLvl[0].bEof;
}


static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){
  return p->rc!=SQLITE_OK || pIter->aLvl[0].bEof;
}

static void fts5DlidxIterLast(Fts5Index *p, Fts5DlidxIter *pIter){
  int i;

  /* Advance each level to the last entry on the last page */
  for(i=pIter->nLvl-1; p->rc==SQLITE_OK && i>=0; i--){
    Fts5DlidxLvl *pLvl = &pIter->aLvl[i];
    while( fts5DlidxLvlNext(pLvl)==0 );
    pLvl->bEof = 0;

    if( i>0 ){
      Fts5DlidxLvl *pChild = &pLvl[-1];
      fts5DataRelease(pChild->pData);
      memset(pChild, 0, sizeof(Fts5DlidxLvl));
      pChild->pData = fts5DataRead(p, 
          FTS5_DLIDX_ROWID(pIter->iSegid, i-1, pLvl->iLeafPgno)
      );
    }
  }
}

/*
** Move the iterator passed as the only argument to the previous entry.
*/
static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){
  int iOff = pLvl->iOff;

  assert( pLvl->bEof==0 );
  if( iOff<=pLvl->iFirstOff ){
    pLvl->bEof = 1;
  }else{
    u8 *a = pLvl->pData->p;
    i64 iVal;
    int iLimit;
    int ii;
    int nZero = 0;

    /* Currently iOff points to the first byte of a varint. This block 
    ** decrements iOff until it points to the first byte of the previous 
    ** varint. Taking care not to read any memory locations that occur
    ** before the buffer in memory.  */
    iLimit = (iOff>9 ? iOff-9 : 0);
    for(iOff--; iOff>iLimit; iOff--){
      if( (a[iOff-1] & 0x80)==0 ) break;
    }

    getVarint(&a[iOff], (u64*)&iVal);
    pLvl->iRowid -= iVal;
    pLvl->iLeafPgno--;

    /* Skip backwards past any 0x00 varints. */
    for(ii=iOff-1; ii>=pLvl->iFirstOff && a[ii]==0x00; ii--){
      nZero++;
    }
    if( ii>=pLvl->iFirstOff && (a[ii] & 0x80) ){
      /* The byte immediately before the last 0x00 byte has the 0x80 bit
      ** set. So the last 0x00 is only a varint 0 if there are 8 more 0x80
      ** bytes before a[ii]. */
      int bZero = 0;              /* True if last 0x00 counts */
      if( (ii-8)>=pLvl->iFirstOff ){
        int j;
        for(j=1; j<=8 && (a[ii-j] & 0x80); j++);
        bZero = (j>8);
      }
      if( bZero==0 ) nZero--;
    }
    pLvl->iLeafPgno -= nZero;
    pLvl->iOff = iOff - nZero;
  }

  return pLvl->bEof;
}

static int fts5DlidxIterPrevR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
  Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];

  assert( iLvl<pIter->nLvl );
  if( fts5DlidxLvlPrev(pLvl) ){
    if( (iLvl+1) < pIter->nLvl ){
      fts5DlidxIterPrevR(p, pIter, iLvl+1);
      if( pLvl[1].bEof==0 ){
        fts5DataRelease(pLvl->pData);
        memset(pLvl, 0, sizeof(Fts5DlidxLvl));
        pLvl->pData = fts5DataRead(p, 
            FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
        );
        if( pLvl->pData ){
          while( fts5DlidxLvlNext(pLvl)==0 );
          pLvl->bEof = 0;
        }
      }
    }
  }

  return pIter->aLvl[0].bEof;
}
static int fts5DlidxIterPrev(Fts5Index *p, Fts5DlidxIter *pIter){
  return fts5DlidxIterPrevR(p, pIter, 0);
}

/*
** Free a doclist-index iterator object allocated by fts5DlidxIterInit().
*/
static void fts5DlidxIterFree(Fts5DlidxIter *pIter){
  if( pIter ){
    int i;
    for(i=0; i<pIter->nLvl; i++){
      fts5DataRelease(pIter->aLvl[i].pData);
    }
    sqlite3_free(pIter);
  }
}

static Fts5DlidxIter *fts5DlidxIterInit(
  Fts5Index *p,                   /* Fts5 Backend to iterate within */
  int bRev,                       /* True for ORDER BY ASC */
  int iSegid,                     /* Segment id */
  int iLeafPg                     /* Leaf page number to load dlidx for */
){
  Fts5DlidxIter *pIter = 0;
  int i;
  int bDone = 0;

  for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
    int nByte = sizeof(Fts5DlidxIter) + i * sizeof(Fts5DlidxLvl);
    Fts5DlidxIter *pNew;


    pNew = (Fts5DlidxIter*)sqlite3_realloc(pIter, nByte);
    if( pNew==0 ){

      p->rc = SQLITE_NOMEM;
    }else{
      i64 iRowid = FTS5_DLIDX_ROWID(iSegid, i, iLeafPg);
      Fts5DlidxLvl *pLvl = &pNew->aLvl[i];
      pIter = pNew;
      memset(pLvl, 0, sizeof(Fts5DlidxLvl));
      pLvl->pData = fts5DataRead(p, iRowid);
      if( pLvl->pData && (pLvl->pData->p[0] & 0x0001)==0 ){
        bDone = 1;
      }
      pIter->nLvl = i+1;
    }
  }

  if( p->rc==SQLITE_OK ){
    pIter->iSegid = iSegid;
    if( bRev==0 ){
      fts5DlidxIterFirst(pIter);
    }else{
      fts5DlidxIterLast(p, pIter);
    }
  }

  if( p->rc!=SQLITE_OK ){
    fts5DlidxIterFree(pIter);
    pIter = 0;
  }

  return pIter;
}




static i64 fts5DlidxIterRowid(Fts5DlidxIter *pIter){
  return pIter->aLvl[0].iRowid;


}
static int fts5DlidxIterPgno(Fts5DlidxIter *pIter){
  return pIter->aLvl[0].iLeafPgno;
}

static void fts5LeafHeader(Fts5Data *pLeaf, int *piRowid, int *piTerm){
  *piRowid = (int)fts5GetU16(&pLeaf->p[0]);
  *piTerm = (int)fts5GetU16(&pLeaf->p[2]);
}

1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
  int pgnoLast = 0;

  if( pDlidx ){
    /* If the doclist-iterator is already at EOF, then the current doclist
    ** contains no entries except those on the current page. */
    if( fts5DlidxIterEof(p, pDlidx)==0 ){
      int iSegid = pIter->pSeg->iSegid;
      pgnoLast = pDlidx->iLeafPgno;
      pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, 0, pgnoLast));
    }else{
      pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel);
    }
  }else{
    int iOff;                               /* Byte offset within pLeaf */
    Fts5Data *pLeaf = pIter->pLeaf;         /* Current leaf data */







|







2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
  int pgnoLast = 0;

  if( pDlidx ){
    /* If the doclist-iterator is already at EOF, then the current doclist
    ** contains no entries except those on the current page. */
    if( fts5DlidxIterEof(p, pDlidx)==0 ){
      int iSegid = pIter->pSeg->iSegid;
      pgnoLast = fts5DlidxIterPgno(pDlidx);
      pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, 0, pgnoLast));
    }else{
      pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel);
    }
  }else{
    int iOff;                               /* Byte offset within pLeaf */
    Fts5Data *pLeaf = pIter->pLeaf;         /* Current leaf data */
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356



2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372

2373
2374
2375
2376
2377
2378
2379

  pRes->iFirst = iRes;
  return 0;
}

/*
** Move the seg-iter so that it points to the first rowid on page iLeafPgno.
** It is an error if leaf iLeafPgno contains no rowid.
*/
static void fts5SegIterGotoPage(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegIter *pIter,             /* Iterator to advance */
  int iLeafPgno
){
  assert( iLeafPgno>pIter->iLeafPgno );



  pIter->iLeafPgno = iLeafPgno-1;
  fts5SegIterNextPage(p, pIter);
  assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno );

  if( p->rc==SQLITE_OK ){
    int iOff;
    u8 *a = pIter->pLeaf->p;
    int n = pIter->pLeaf->n;

    iOff = fts5GetU16(&a[0]);
    if( iOff<4 || iOff>=n ){
      p->rc = FTS5_CORRUPT;
    }else{
      iOff += getVarint(&a[iOff], (u64*)&pIter->iRowid);
      pIter->iLeafOffset = iOff;
      fts5SegIterLoadNPos(p, pIter);

    }
  }
}

/*
** Advance the iterator passed as the second argument until it is at or 
** past rowid iFrom. Regardless of the value of iFrom, the iterator is







|







>
>
>
|
|
|

|
|
|
|

|
|
|
|
|
|
|
>







2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518

  pRes->iFirst = iRes;
  return 0;
}

/*
** Move the seg-iter so that it points to the first rowid on page iLeafPgno.
** It is an error if leaf iLeafPgno does not exist or contains no rowids.
*/
static void fts5SegIterGotoPage(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegIter *pIter,             /* Iterator to advance */
  int iLeafPgno
){
  assert( iLeafPgno>pIter->iLeafPgno );
  if( iLeafPgno>pIter->pSeg->pgnoLast ){
    p->rc = FTS5_CORRUPT;
  }else{
    pIter->iLeafPgno = iLeafPgno-1;
    fts5SegIterNextPage(p, pIter);
    assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno );

    if( p->rc==SQLITE_OK ){
      int iOff;
      u8 *a = pIter->pLeaf->p;
      int n = pIter->pLeaf->n;

      iOff = fts5GetU16(&a[0]);
      if( iOff<4 || iOff>=n ){
        p->rc = FTS5_CORRUPT;
      }else{
        iOff += getVarint(&a[iOff], (u64*)&pIter->iRowid);
        pIter->iLeafOffset = iOff;
        fts5SegIterLoadNPos(p, pIter);
      }
    }
  }
}

/*
** Advance the iterator passed as the second argument until it is at or 
** past rowid iFrom. Regardless of the value of iFrom, the iterator is
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
  int bMove = 1;

  assert( pIter->flags & FTS5_SEGITER_ONETERM );
  assert( pIter->pDlidx );
  assert( pIter->pLeaf );

  if( bRev==0 ){
    while( fts5DlidxIterEof(p, pDlidx)==0 && iMatch>pDlidx->iRowid ){
      iLeafPgno = pDlidx->iLeafPgno;
      fts5DlidxIterNext(pDlidx);
    }
    assert( iLeafPgno>=pIter->iLeafPgno || p->rc );
    if( iLeafPgno>pIter->iLeafPgno ){
      fts5SegIterGotoPage(p, pIter, iLeafPgno);
      bMove = 0;
    }
  }else{
    assert( iMatch<pIter->iRowid );
    while( fts5DlidxIterEof(p, pDlidx)==0 && iMatch<pDlidx->iRowid ){
      fts5DlidxIterPrev(pDlidx);
    }
    iLeafPgno = pDlidx->iLeafPgno;

    assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno );

    if( iLeafPgno<pIter->iLeafPgno ){
      pIter->iLeafPgno = iLeafPgno+1;
      fts5SegIterReverseNewPage(p, pIter);
      bMove = 0;







|
|
|

|






|
|

|







2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
  int bMove = 1;

  assert( pIter->flags & FTS5_SEGITER_ONETERM );
  assert( pIter->pDlidx );
  assert( pIter->pLeaf );

  if( bRev==0 ){
    while( !fts5DlidxIterEof(p, pDlidx) && iMatch>fts5DlidxIterRowid(pDlidx) ){
      iLeafPgno = fts5DlidxIterPgno(pDlidx);
      fts5DlidxIterNext(p, pDlidx);
    }
    assert_nc( iLeafPgno>=pIter->iLeafPgno || p->rc );
    if( iLeafPgno>pIter->iLeafPgno ){
      fts5SegIterGotoPage(p, pIter, iLeafPgno);
      bMove = 0;
    }
  }else{
    assert( iMatch<pIter->iRowid );
    while( !fts5DlidxIterEof(p, pDlidx) && iMatch<fts5DlidxIterRowid(pDlidx) ){
      fts5DlidxIterPrev(p, pDlidx);
    }
    iLeafPgno = fts5DlidxIterPgno(pDlidx);

    assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno );

    if( iLeafPgno<pIter->iLeafPgno ){
      pIter->iLeafPgno = iLeafPgno+1;
      fts5SegIterReverseNewPage(p, pIter);
      bMove = 0;
2799
2800
2801
2802
2803
2804
2805















































2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823

2824
2825
2826


2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
  int i;
  assert( fts5BlobCompare(pOld, nOld, pNew, nNew)<0 );
  for(i=0; i<nOld; i++){
    if( pOld[i]!=pNew[i] ) break;
  }
  return i;
}
















































/*
** If an "nEmpty" record must be written to the b-tree before the next
** term, write it now. 
*/
static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){
  if( pWriter->nEmpty ){
    int bFlag = 0;
    Fts5PageWriter *pPg;
    pPg = &pWriter->aWriter[1];
    if( pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
      i64 iKey = FTS5_DOCLIST_IDX_ROWID(
          pWriter->iSegid, pWriter->aWriter[0].pgno - 1 - pWriter->nEmpty
      );
      assert( pWriter->cdlidx.n>0 );
      fts5DataWrite(p, iKey, pWriter->cdlidx.p, pWriter->cdlidx.n);
      bFlag = 1;
    }

    fts5BufferAppendVarint(&p->rc, &pPg->buf, bFlag);
    fts5BufferAppendVarint(&p->rc, &pPg->buf, pWriter->nEmpty);
    pWriter->nEmpty = 0;


  }

  /* Whether or not it was written to disk, zero the doclist index at this
  ** point */
  sqlite3Fts5BufferZero(&pWriter->cdlidx);
  pWriter->bDlidxPrevValid = 0;
}

static void fts5WriteBtreeGrow(Fts5Index *p, Fts5SegWriter *pWriter){
  if( p->rc==SQLITE_OK ){
    Fts5PageWriter *aNew;
    Fts5PageWriter *pNew;
    int nNew = sizeof(Fts5PageWriter) * (pWriter->nWriter+1);







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>










|
|
|
<
|
<


>



>
>


<
<
|
|







2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004

3005

3006
3007
3008
3009
3010
3011
3012
3013
3014
3015


3016
3017
3018
3019
3020
3021
3022
3023
3024
  int i;
  assert( fts5BlobCompare(pOld, nOld, pNew, nNew)<0 );
  for(i=0; i<nOld; i++){
    if( pOld[i]!=pNew[i] ) break;
  }
  return i;
}

static void fts5WriteDlidxClear(
  Fts5Index *p, 
  Fts5SegWriter *pWriter,
  int bFlush                      /* If true, write dlidx to disk */
){
  int i;
  assert( bFlush==0 || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n>0) );
  for(i=0; i<pWriter->nDlidx; i++){
    Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
    if( pDlidx->buf.n==0 ) break;
    if( bFlush ){
      assert( pDlidx->pgno!=0 );
      fts5DataWrite(p, 
          FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
          pDlidx->buf.p, pDlidx->buf.n
      );
    }
    sqlite3Fts5BufferZero(&pDlidx->buf);
    pDlidx->bPrevValid = 0;
  }
}

/*
** Grow the pWriter->aDlidx[] array to at least nLvl elements in size.
** Any new array elements are zeroed before returning.
*/
static int fts5WriteDlidxGrow(
  Fts5Index *p,
  Fts5SegWriter *pWriter,
  int nLvl
){
  if( p->rc==SQLITE_OK && nLvl>=pWriter->nDlidx ){
    Fts5DlidxWriter *aDlidx = (Fts5DlidxWriter*)sqlite3_realloc(
        pWriter->aDlidx, sizeof(Fts5DlidxWriter) * nLvl
    );
    if( aDlidx==0 ){
      p->rc = SQLITE_NOMEM;
    }else{
      int nByte = sizeof(Fts5DlidxWriter) * (nLvl - pWriter->nDlidx);
      memset(&aDlidx[pWriter->nDlidx], 0, nByte);
      pWriter->aDlidx = aDlidx;
      pWriter->nDlidx = nLvl;
    }
  }
  return p->rc;
}

/*
** If an "nEmpty" record must be written to the b-tree before the next
** term, write it now. 
*/
static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){
  if( pWriter->nEmpty ){
    int bFlag = 0;
    Fts5PageWriter *pPg;
    pPg = &pWriter->aWriter[1];

    /* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written
    ** to the database, also write the doclist-index to disk.  */

    if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){

      bFlag = 1;
    }
    fts5WriteDlidxClear(p, pWriter, bFlag);
    fts5BufferAppendVarint(&p->rc, &pPg->buf, bFlag);
    fts5BufferAppendVarint(&p->rc, &pPg->buf, pWriter->nEmpty);
    pWriter->nEmpty = 0;
  }else{
    fts5WriteDlidxClear(p, pWriter, 0);
  }



  assert( pWriter->nDlidx==0 || pWriter->aDlidx[0].buf.n==0 );
  assert( pWriter->nDlidx==0 || pWriter->aDlidx[0].bPrevValid==0 );
}

static void fts5WriteBtreeGrow(Fts5Index *p, Fts5SegWriter *pWriter){
  if( p->rc==SQLITE_OK ){
    Fts5PageWriter *aNew;
    Fts5PageWriter *pNew;
    int nNew = sizeof(Fts5PageWriter) * (pWriter->nWriter+1);
2896
2897
2898
2899
2900
2901
2902




2903
2904
2905
2906


2907
2908
2909
2910
2911
2912

2913

2914
2915
2916



2917
2918



2919
2920
2921
2922

2923
2924
2925
2926
2927
2928
2929




2930


































2931
2932
2933


2934

2935
2936

2937
2938
2939

2940
2941
2942
2943
2944
2945
2946
      fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm-nPre, pTerm+nPre);
      fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm);
      break;
    }
  }
}





static void fts5WriteBtreeNoTerm(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegWriter *pWriter          /* Writer object */
){


  if( pWriter->bFirstRowidInPage ){
    /* No rowids on this page. Append an 0x00 byte to the current 
    ** doclist-index */
    if( pWriter->bDlidxPrevValid==0 ){
      i64 iRowid = pWriter->iPrevRowid;
      sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, iRowid);

      pWriter->bDlidxPrevValid = 1;

      pWriter->iDlidxPrev = iRowid;
    }
    sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, 0);



  }
  pWriter->nEmpty++;



}

/*
** Rowid iRowid has just been appended to the current leaf page. As it is

** the first on its page, append an entry to the current doclist-index.
*/
static void fts5WriteDlidxAppend(
  Fts5Index *p, 
  Fts5SegWriter *pWriter, 
  i64 iRowid
){




  i64 iVal;


































  if( pWriter->bDlidxPrevValid ){
    iVal = iRowid - pWriter->iDlidxPrev;
  }else{


    sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, iRowid);

    iVal = 1;
  }

  sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, iVal);
  pWriter->bDlidxPrevValid = 1;
  pWriter->iDlidxPrev = iRowid;

}

static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
  static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
  Fts5PageWriter *pPage = &pWriter->aWriter[0];
  i64 iRowid;








>
>
>
>




>
>
|
<
<
|
|
|
>
|
>
|
|
|
>
>
>
|
<
>
>
>



|
>
|






>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
|
>
>
|
>
|
|
>
|
|
|
>







3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098


3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111

3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
      fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm-nPre, pTerm+nPre);
      fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm);
      break;
    }
  }
}

/*
** This function is called when flushing a leaf page that contains no
** terms at all to disk.
*/
static void fts5WriteBtreeNoTerm(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegWriter *pWriter          /* Writer object */
){
  /* If there were no rowids on the leaf page either and the doclist-index
  ** has already been started, append an 0x00 byte to it.  */
  if( pWriter->bFirstRowidInPage && pWriter->aDlidx[0].buf.n>0 ){


    Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[0];
    assert( pDlidx->bPrevValid );
    sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0);
  }

  /* Increment the "number of sequential leaves without a term" counter. */
  pWriter->nEmpty++;
}

static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){
  i64 iRowid;
  int iOff;


  iOff = 1 + getVarint(&pBuf->p[1], (u64*)&iRowid);
  getVarint(&pBuf->p[iOff], (u64*)&iRowid);
  return iRowid;
}

/*
** Rowid iRowid has just been appended to the current leaf page. It is the
** first on the page. This function appends an appropriate entry to the current
** doclist-index.
*/
static void fts5WriteDlidxAppend(
  Fts5Index *p, 
  Fts5SegWriter *pWriter, 
  i64 iRowid
){
  int i;
  int bDone = 0;

  for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
    i64 iVal;
    Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];

    if( pDlidx->buf.n>=p->pConfig->pgsz ){
      /* The current doclist-index page is full. Write it to disk and push
      ** a copy of iRowid (which will become the first rowid on the next
      ** doclist-index leaf page) up into the next level of the b-tree 
      ** hierarchy. If the node being flushed is currently the root node,
      ** also push its first rowid upwards. */
      pDlidx->buf.p[0] = 0x01;    /* Not the root node */
      fts5DataWrite(p, 
          FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
          pDlidx->buf.p, pDlidx->buf.n
      );
      fts5WriteDlidxGrow(p, pWriter, i+2);
      pDlidx = &pWriter->aDlidx[i];
      if( p->rc==SQLITE_OK && pDlidx[1].buf.n==0 ){
        i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf);

        /* This was the root node. Push its first rowid up to the new root. */
        pDlidx[1].pgno = pDlidx->pgno;
        sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0);
        sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, pDlidx->pgno);
        sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, iFirst);
        pDlidx[1].bPrevValid = 1;
        pDlidx[1].iPrev = iFirst;
      }

      sqlite3Fts5BufferZero(&pDlidx->buf);
      pDlidx->bPrevValid = 0;
      pDlidx->pgno++;
    }else{
      bDone = 1;
    }

    if( pDlidx->bPrevValid ){
      iVal = iRowid - pDlidx->iPrev;
    }else{
      i64 iPgno = (i==0 ? pWriter->aWriter[0].pgno : pDlidx[-1].pgno);
      assert( pDlidx->buf.n==0 );
      sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone);
      sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno);
      iVal = iRowid;
    }

    sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal);
    pDlidx->bPrevValid = 1;
    pDlidx->iPrev = iRowid;
  }
}

static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
  static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
  Fts5PageWriter *pPage = &pWriter->aWriter[0];
  i64 iRowid;

3029
3030
3031
3032
3033
3034
3035



3036
3037
3038
3039
3040
3041
3042

  /* Update the Fts5PageWriter.term field. */
  fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm);
  pWriter->bFirstTermInPage = 0;

  pWriter->bFirstRowidInPage = 0;
  pWriter->bFirstRowidInDoclist = 1;




  /* If the current leaf page is full, flush it to disk. */
  if( pPage->buf.n>=p->pConfig->pgsz ){
    fts5WriteFlushLeaf(p, pWriter);
  }
}








>
>
>







3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285

  /* Update the Fts5PageWriter.term field. */
  fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm);
  pWriter->bFirstTermInPage = 0;

  pWriter->bFirstRowidInPage = 0;
  pWriter->bFirstRowidInDoclist = 1;

  assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) );
  pWriter->aDlidx[0].pgno = pPage->pgno;

  /* If the current leaf page is full, flush it to disk. */
  if( pPage->buf.n>=p->pConfig->pgsz ){
    fts5WriteFlushLeaf(p, pWriter);
  }
}

3167
3168
3169
3170
3171
3172
3173


3174


3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185

3186
3187

3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200

3201
3202
3203
3204

3205
3206
3207
3208
3209
3210
3211
  }
  for(i=0; i<pWriter->nWriter; i++){
    Fts5PageWriter *pPg = &pWriter->aWriter[i];
    fts5BufferFree(&pPg->term);
    fts5BufferFree(&pPg->buf);
  }
  sqlite3_free(pWriter->aWriter);


  sqlite3Fts5BufferFree(&pWriter->cdlidx);


}

static void fts5WriteInit(
  Fts5Index *p, 
  Fts5SegWriter *pWriter, 
  int iSegid
){
  memset(pWriter, 0, sizeof(Fts5SegWriter));
  pWriter->iSegid = iSegid;

  pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p,sizeof(Fts5PageWriter));

  if( pWriter->aWriter==0 ) return;
  pWriter->nWriter = 1;

  pWriter->aWriter[0].pgno = 1;
  pWriter->bFirstTermInPage = 1;
}

static void fts5WriteInitForAppend(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegWriter *pWriter,         /* Writer to initialize */
  Fts5StructureSegment *pSeg      /* Segment object to append to */
){
  int nByte = pSeg->nHeight * sizeof(Fts5PageWriter);
  memset(pWriter, 0, sizeof(Fts5SegWriter));
  pWriter->iSegid = pSeg->iSegid;
  pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p, nByte);


  if( p->rc==SQLITE_OK ){
    int pgno = 1;
    int i;

    pWriter->nWriter = pSeg->nHeight;
    pWriter->aWriter[0].pgno = pSeg->pgnoLast+1;
    for(i=pSeg->nHeight-1; i>0; i--){
      i64 iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, i, pgno);
      Fts5PageWriter *pPg = &pWriter->aWriter[i];
      pPg->pgno = pgno;
      fts5DataBuffer(p, &pPg->buf, iRowid);







>
>
|
>
>










|
>
|

>













>




>







3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
  }
  for(i=0; i<pWriter->nWriter; i++){
    Fts5PageWriter *pPg = &pWriter->aWriter[i];
    fts5BufferFree(&pPg->term);
    fts5BufferFree(&pPg->buf);
  }
  sqlite3_free(pWriter->aWriter);

  for(i=0; i<pWriter->nDlidx; i++){
    sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf);
  }
  sqlite3_free(pWriter->aDlidx);
}

static void fts5WriteInit(
  Fts5Index *p, 
  Fts5SegWriter *pWriter, 
  int iSegid
){
  memset(pWriter, 0, sizeof(Fts5SegWriter));
  pWriter->iSegid = iSegid;

  pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p, sizeof(Fts5PageWriter));
  pWriter->aDlidx = (Fts5DlidxWriter*)fts5IdxMalloc(p, sizeof(Fts5DlidxWriter));
  if( pWriter->aDlidx==0 ) return;
  pWriter->nWriter = 1;
  pWriter->nDlidx = 1;
  pWriter->aWriter[0].pgno = 1;
  pWriter->bFirstTermInPage = 1;
}

static void fts5WriteInitForAppend(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegWriter *pWriter,         /* Writer to initialize */
  Fts5StructureSegment *pSeg      /* Segment object to append to */
){
  int nByte = pSeg->nHeight * sizeof(Fts5PageWriter);
  memset(pWriter, 0, sizeof(Fts5SegWriter));
  pWriter->iSegid = pSeg->iSegid;
  pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p, nByte);
  pWriter->aDlidx = (Fts5DlidxWriter*)fts5IdxMalloc(p, sizeof(Fts5DlidxWriter));

  if( p->rc==SQLITE_OK ){
    int pgno = 1;
    int i;
    pWriter->nDlidx = 1;
    pWriter->nWriter = pSeg->nHeight;
    pWriter->aWriter[0].pgno = pSeg->pgnoLast+1;
    for(i=pSeg->nHeight-1; i>0; i--){
      i64 iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, i, pgno);
      Fts5PageWriter *pPg = &pWriter->aWriter[i];
      pPg->pgno = pgno;
      fts5DataBuffer(p, &pPg->buf, iRowid);
3579
3580
3581
3582
3583
3584
3585
3586

3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614


3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631






3632
3633
3634
3635
3636
3637
3638

    /* Pre-allocate the buffer used to assemble leaf pages to the target
    ** page size.  */
    assert( pgsz>0 );
    pBuf = &writer.aWriter[0].buf;
    fts5BufferGrow(&p->rc, pBuf, pgsz + 20);

    /* Begin scanning through hash table entries. */

    if( p->rc==SQLITE_OK ){
      memset(pBuf->p, 0, 4);
      pBuf->n = 4;
      p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0);
    }

    while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){
      const char *zTerm;
      int nTerm;
      const u8 *pDoclist;
      int nDoclist;
      int nSuffix;                /* Size of term suffix */

      sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist);
      nTerm = strlen(zTerm);

      /* Decide if the term will fit on the current leaf. If it will not, 
      ** flush the leaf to disk here.  */
      if( (pBuf->n + nTerm + 2) > pgsz ){
        fts5WriteFlushLeaf(p, &writer);
        pBuf = &writer.aWriter[0].buf;
        if( (nTerm + 32) > pBuf->nSpace ){
          fts5BufferGrow(&p->rc, pBuf, nTerm + 32 - pBuf->n);
          if( p->rc ) break;
        }
      }

      /* Write the term to the leaf. And push it up into the b-tree hierarchy */


      if( writer.bFirstTermInPage==0 ){
        int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm);
        pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], nPre);
        nSuffix = nTerm - nPre;
      }else{
        fts5PutU16(&pBuf->p[2], pBuf->n);
        writer.bFirstTermInPage = 0;
        if( writer.aWriter[0].pgno!=1 ){
          int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm);
          fts5WriteBtreeTerm(p, &writer, nPre+1, (const u8*)zTerm);
          pBuf = &writer.aWriter[0].buf;
          assert( nPre<nTerm );
        }
        nSuffix = nTerm;
      }
      pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], nSuffix);
      fts5BufferSafeAppendBlob(pBuf, (const u8*)&zTerm[nTerm-nSuffix], nSuffix);







      if( pgsz>=(pBuf->n + nDoclist + 1) ){
        /* The entire doclist will fit on the current leaf. */
        fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist);
      }else{
        i64 iRowid = 0;
        i64 iDelta = 0;







|
>





<

|
|
|
|
















|
>
>

















>
>
>
>
>
>







3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843

3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897

    /* Pre-allocate the buffer used to assemble leaf pages to the target
    ** page size.  */
    assert( pgsz>0 );
    pBuf = &writer.aWriter[0].buf;
    fts5BufferGrow(&p->rc, pBuf, pgsz + 20);

    /* Begin scanning through hash table entries. This loop runs once for each
    ** term/doclist currently stored within the hash table. */
    if( p->rc==SQLITE_OK ){
      memset(pBuf->p, 0, 4);
      pBuf->n = 4;
      p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0);
    }

    while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){
      const char *zTerm;          /* Buffer containing term */
      int nTerm;                  /* Size of zTerm in bytes */
      const u8 *pDoclist;         /* Pointer to doclist for this term */
      int nDoclist;               /* Size of doclist in bytes */
      int nSuffix;                /* Size of term suffix */

      sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist);
      nTerm = strlen(zTerm);

      /* Decide if the term will fit on the current leaf. If it will not, 
      ** flush the leaf to disk here.  */
      if( (pBuf->n + nTerm + 2) > pgsz ){
        fts5WriteFlushLeaf(p, &writer);
        pBuf = &writer.aWriter[0].buf;
        if( (nTerm + 32) > pBuf->nSpace ){
          fts5BufferGrow(&p->rc, pBuf, nTerm + 32 - pBuf->n);
          if( p->rc ) break;
        }
      }

      /* Write the term to the leaf. And if it is the first on the leaf, and
      ** the leaf is not page number 1, push it up into the b-tree hierarchy 
      ** as well.  */
      if( writer.bFirstTermInPage==0 ){
        int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm);
        pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], nPre);
        nSuffix = nTerm - nPre;
      }else{
        fts5PutU16(&pBuf->p[2], pBuf->n);
        writer.bFirstTermInPage = 0;
        if( writer.aWriter[0].pgno!=1 ){
          int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm);
          fts5WriteBtreeTerm(p, &writer, nPre+1, (const u8*)zTerm);
          pBuf = &writer.aWriter[0].buf;
          assert( nPre<nTerm );
        }
        nSuffix = nTerm;
      }
      pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], nSuffix);
      fts5BufferSafeAppendBlob(pBuf, (const u8*)&zTerm[nTerm-nSuffix], nSuffix);

      /* We just wrote a term into page writer.aWriter[0].pgno. If a 
      ** doclist-index is to be generated for this doclist, it will be
      ** associated with this page. */
      assert( writer.nDlidx>0 && writer.aDlidx[0].buf.n==0 );
      writer.aDlidx[0].pgno = writer.aWriter[0].pgno;

      if( pgsz>=(pBuf->n + nDoclist + 1) ){
        /* The entire doclist will fit on the current leaf. */
        fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist);
      }else{
        i64 iRowid = 0;
        i64 iDelta = 0;
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
  int bSz,
  Fts5Buffer *pBuf
){
  if( p->rc==SQLITE_OK ){
    Fts5ChunkIter iter;
    Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ];
    assert( fts5MultiIterEof(p, pMulti)==0 );
    static int nCall = 0;
    nCall++;

    fts5ChunkIterInit(p, pSeg, &iter);

    if( fts5ChunkIterEof(p, &iter)==0 ){
      if( bSz ){
        /* WRITEPOSLISTSIZE */
        fts5BufferAppendVarint(&p->rc, pBuf, iter.nRem * 2);







<
<







4080
4081
4082
4083
4084
4085
4086


4087
4088
4089
4090
4091
4092
4093
  int bSz,
  Fts5Buffer *pBuf
){
  if( p->rc==SQLITE_OK ){
    Fts5ChunkIter iter;
    Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ];
    assert( fts5MultiIterEof(p, pMulti)==0 );



    fts5ChunkIterInit(p, pSeg, &iter);

    if( fts5ChunkIterEof(p, &iter)==0 ){
      if( bSz ){
        /* WRITEPOSLISTSIZE */
        fts5BufferAppendVarint(&p->rc, pBuf, iter.nRem * 2);
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
}

/*
** Return the current term.
*/
const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIter, int *pn){
  int n;
  const char *z = fts5MultiIterTerm(pIter->pMulti, &n);
  *pn = n-1;
  return &z[1];
}


/*
** Return a pointer to a buffer containing a copy of the position list for







|







4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
}

/*
** Return the current term.
*/
const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIter, int *pn){
  int n;
  const char *z = (const char*)fts5MultiIterTerm(pIter->pMulti, &n);
  *pn = n-1;
  return &z[1];
}


/*
** Return a pointer to a buffer containing a copy of the position list for
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663


4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675

4676

4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
#ifdef SQLITE_DEBUG
static void fts5DlidxIterTestReverse(
  Fts5Index *p, 
  int iSegid,                     /* Segment id to load from */
  int iLeaf                       /* Load doclist-index for this leaf */
){
  Fts5DlidxIter *pDlidx = 0;
  i64 cksum1 = 13;
  i64 cksum2 = 13;

  for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf);
      fts5DlidxIterEof(p, pDlidx)==0;
      fts5DlidxIterNext(pDlidx)
  ){


    assert( pDlidx->iLeafPgno>iLeaf );
    cksum1 = (cksum1 ^ ( (i64)(pDlidx->iLeafPgno) << 32 ));
    cksum1 = (cksum1 ^ pDlidx->iRowid);
  }
  fts5DlidxIterFree(pDlidx);
  pDlidx = 0;

  for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf);
      fts5DlidxIterEof(p, pDlidx)==0;
      fts5DlidxIterPrev(pDlidx)
  ){
    assert( pDlidx->iLeafPgno>iLeaf );

    cksum2 = (cksum2 ^ ( (i64)(pDlidx->iLeafPgno) << 32 ));

    cksum2 = (cksum2 ^ pDlidx->iRowid);
  }
  fts5DlidxIterFree(pDlidx);
  pDlidx = 0;

  if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT; 
}
#else
# define fts5DlidxIterTestReverse(x,y,z)
#endif

static void fts5IndexIntegrityCheckSegment(
  Fts5Index *p,                   /* FTS5 backend object */







|
|



|

>
>
|
<
|






|

|
>
|
>
|




|







4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923

4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
#ifdef SQLITE_DEBUG
static void fts5DlidxIterTestReverse(
  Fts5Index *p, 
  int iSegid,                     /* Segment id to load from */
  int iLeaf                       /* Load doclist-index for this leaf */
){
  Fts5DlidxIter *pDlidx = 0;
  u64 cksum1 = 13;
  u64 cksum2 = 13;

  for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf);
      fts5DlidxIterEof(p, pDlidx)==0;
      fts5DlidxIterNext(p, pDlidx)
  ){
    i64 iRowid = fts5DlidxIterRowid(pDlidx);
    int pgno = fts5DlidxIterPgno(pDlidx);
    assert( pgno>iLeaf );

    cksum1 += iRowid + ((i64)pgno<<32);
  }
  fts5DlidxIterFree(pDlidx);
  pDlidx = 0;

  for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf);
      fts5DlidxIterEof(p, pDlidx)==0;
      fts5DlidxIterPrev(p, pDlidx)
  ){
    i64 iRowid = fts5DlidxIterRowid(pDlidx);
    int pgno = fts5DlidxIterPgno(pDlidx);

    assert( fts5DlidxIterPgno(pDlidx)>iLeaf );
    cksum2 += iRowid + ((i64)pgno<<32);
  }
  fts5DlidxIterFree(pDlidx);
  pDlidx = 0;

  if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT;
}
#else
# define fts5DlidxIterTestReverse(x,y,z)
#endif

static void fts5IndexIntegrityCheckSegment(
  Fts5Index *p,                   /* FTS5 backend object */
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
      int iPrevLeaf = iter.iLeaf;
      int iSegid = pSeg->iSegid;
      int iPg;
      i64 iKey;

      for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iter.iLeaf);
          fts5DlidxIterEof(p, pDlidx)==0;
          fts5DlidxIterNext(pDlidx)
      ){

        /* Check any rowid-less pages that occur before the current leaf. */
        for(iPg=iPrevLeaf+1; iPg<pDlidx->iLeafPgno; iPg++){
          iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPg);
          pLeaf = fts5DataRead(p, iKey);
          if( pLeaf ){
            if( fts5GetU16(&pLeaf->p[0])!=0 ) p->rc = FTS5_CORRUPT;
            fts5DataRelease(pLeaf);
          }
        }
        iPrevLeaf = pDlidx->iLeafPgno;

        /* Check that the leaf page indicated by the iterator really does
        ** contain the rowid suggested by the same. */
        iKey = FTS5_SEGMENT_ROWID(iSegid, 0, pDlidx->iLeafPgno);
        pLeaf = fts5DataRead(p, iKey);
        if( pLeaf ){
          i64 iRowid;
          int iRowidOff = fts5GetU16(&pLeaf->p[0]);
          getVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid);
          if( iRowid!=pDlidx->iRowid ) p->rc = FTS5_CORRUPT;
          fts5DataRelease(pLeaf);
        }

      }

      for(iPg=iPrevLeaf+1; iPg<=(iter.iLeaf + iter.nEmpty); iPg++){
        iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPg);
        pLeaf = fts5DataRead(p, iKey);
        if( pLeaf ){
          if( fts5GetU16(&pLeaf->p[0])!=0 ) p->rc = FTS5_CORRUPT;







|



|







|



|





|


<







5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035

5036
5037
5038
5039
5040
5041
5042
      int iPrevLeaf = iter.iLeaf;
      int iSegid = pSeg->iSegid;
      int iPg;
      i64 iKey;

      for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iter.iLeaf);
          fts5DlidxIterEof(p, pDlidx)==0;
          fts5DlidxIterNext(p, pDlidx)
      ){

        /* Check any rowid-less pages that occur before the current leaf. */
        for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){
          iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPg);
          pLeaf = fts5DataRead(p, iKey);
          if( pLeaf ){
            if( fts5GetU16(&pLeaf->p[0])!=0 ) p->rc = FTS5_CORRUPT;
            fts5DataRelease(pLeaf);
          }
        }
        iPrevLeaf = fts5DlidxIterPgno(pDlidx);

        /* Check that the leaf page indicated by the iterator really does
        ** contain the rowid suggested by the same. */
        iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPrevLeaf);
        pLeaf = fts5DataRead(p, iKey);
        if( pLeaf ){
          i64 iRowid;
          int iRowidOff = fts5GetU16(&pLeaf->p[0]);
          getVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid);
          if( iRowid!=fts5DlidxIterRowid(pDlidx) ) p->rc = FTS5_CORRUPT;
          fts5DataRelease(pLeaf);
        }

      }

      for(iPg=iPrevLeaf+1; iPg<=(iter.iLeaf + iter.nEmpty); iPg++){
        iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPg);
        pLeaf = fts5DataRead(p, iKey);
        if( pLeaf ){
          if( fts5GetU16(&pLeaf->p[0])!=0 ) p->rc = FTS5_CORRUPT;
4990
4991
4992
4993
4994
4995
4996

4997
4998
4999
5000
5001
5002
5003
5004



5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
/*
** Decode a segment-data rowid from the %_data table. This function is
** the opposite of macro FTS5_SEGMENT_ROWID().
*/
static void fts5DecodeRowid(
  i64 iRowid,                     /* Rowid from %_data table */
  int *piSegid,                   /* OUT: Segment id */

  int *piHeight,                  /* OUT: Height */
  int *piPgno                     /* OUT: Page number */
){
  *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1));
  iRowid >>= FTS5_DATA_PAGE_B;

  *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1));
  iRowid >>= FTS5_DATA_HEIGHT_B;




  *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1));
}

static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){
  int iSegid, iHeight, iPgno;     /* Rowid compenents */
  fts5DecodeRowid(iKey, &iSegid, &iHeight, &iPgno);

  if( iSegid==0 ){
    if( iKey==FTS5_AVERAGES_ROWID ){
      sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(averages) ");
    }else{
      sqlite3Fts5BufferAppendPrintf(pRc, pBuf, 
          "{structure idx=%d}", (int)(iKey-10)
      );
    }
  }
  else if( iHeight==FTS5_SEGMENT_MAX_HEIGHT ){
    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(dlidx segid=%d pgno=%d)",
        iSegid, iPgno
    );
  }else{
    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(segid=%d h=%d pgno=%d)",
        iSegid, iHeight, iPgno
    );
  }
}

static void fts5DebugStructure(
  int *pRc,                       /* IN/OUT: error code */
  Fts5Buffer *pBuf,







>








>
>
>





|
|





|
<
<


<
<
<
<
|
|
|







5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
5279
5280


5281
5282




5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
/*
** Decode a segment-data rowid from the %_data table. This function is
** the opposite of macro FTS5_SEGMENT_ROWID().
*/
static void fts5DecodeRowid(
  i64 iRowid,                     /* Rowid from %_data table */
  int *piSegid,                   /* OUT: Segment id */
  int *pbDlidx,                   /* OUT: Dlidx flag */
  int *piHeight,                  /* OUT: Height */
  int *piPgno                     /* OUT: Page number */
){
  *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1));
  iRowid >>= FTS5_DATA_PAGE_B;

  *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1));
  iRowid >>= FTS5_DATA_HEIGHT_B;

  *pbDlidx = (int)(iRowid & 0x0001);
  iRowid >>= FTS5_DATA_DLI_B;

  *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1));
}

static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){
  int iSegid, iHeight, iPgno, bDlidx;       /* Rowid compenents */
  fts5DecodeRowid(iKey, &iSegid, &bDlidx, &iHeight, &iPgno);

  if( iSegid==0 ){
    if( iKey==FTS5_AVERAGES_ROWID ){
      sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(averages) ");
    }else{
      sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(structure)");


    }
  }




  else{
    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(%ssegid=%d h=%d pgno=%d)",
        bDlidx ? "dlidx " : "", iSegid, iHeight, iPgno
    );
  }
}

static void fts5DebugStructure(
  int *pRc,                       /* IN/OUT: error code */
  Fts5Buffer *pBuf,
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
*/
static void fts5DecodeFunction(
  sqlite3_context *pCtx,          /* Function call context */
  int nArg,                       /* Number of args (always 2) */
  sqlite3_value **apVal           /* Function arguments */
){
  i64 iRowid;                     /* Rowid for record being decoded */
  int iSegid,iHeight,iPgno;       /* Rowid components */
  const u8 *aBlob; int n;         /* Record to decode */
  u8 *a = 0;
  Fts5Buffer s;                   /* Build up text to return here */
  int rc = SQLITE_OK;             /* Return code */
  int nSpace = 0;

  assert( nArg==2 );
  memset(&s, 0, sizeof(Fts5Buffer));
  iRowid = sqlite3_value_int64(apVal[0]);
  n = sqlite3_value_bytes(apVal[1]);
  aBlob = sqlite3_value_blob(apVal[1]);

  nSpace = n + FTS5_DATA_ZERO_PADDING;
  a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace);
  if( a==0 ) goto decode_out;
  memcpy(a, aBlob, n);
  fts5DecodeRowid(iRowid, &iSegid, &iHeight, &iPgno);

  fts5DebugRowid(&rc, &s, iRowid);
  if( iHeight==FTS5_SEGMENT_MAX_HEIGHT ){
    Fts5Data dlidx;
    Fts5DlidxIter iter;

    dlidx.p = a;
    dlidx.n = n;
    dlidx.nRef = 2;

    memset(&iter, 0, sizeof(Fts5DlidxIter));
    iter.pData = &dlidx;
    iter.iLeafPgno = iPgno;

    for(fts5DlidxIterFirst(&iter); iter.bEof==0; fts5DlidxIterNext(&iter)){
      sqlite3Fts5BufferAppendPrintf(&rc, &s, 
          " %d(%lld)", iter.iLeafPgno, iter.iRowid
      );
    }
  }else if( iSegid==0 ){
    if( iRowid==FTS5_AVERAGES_ROWID ){
      /* todo */
    }else{
      fts5DecodeStructure(&rc, &s, a, n);







|
















|


|

|





|
|
|

|

|







5388
5389
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399
5400
5401
5402
5403
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413
5414
5415
5416
5417
5418
5419
5420
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431
5432
5433
5434
5435
5436
*/
static void fts5DecodeFunction(
  sqlite3_context *pCtx,          /* Function call context */
  int nArg,                       /* Number of args (always 2) */
  sqlite3_value **apVal           /* Function arguments */
){
  i64 iRowid;                     /* Rowid for record being decoded */
  int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */
  const u8 *aBlob; int n;         /* Record to decode */
  u8 *a = 0;
  Fts5Buffer s;                   /* Build up text to return here */
  int rc = SQLITE_OK;             /* Return code */
  int nSpace = 0;

  assert( nArg==2 );
  memset(&s, 0, sizeof(Fts5Buffer));
  iRowid = sqlite3_value_int64(apVal[0]);
  n = sqlite3_value_bytes(apVal[1]);
  aBlob = sqlite3_value_blob(apVal[1]);

  nSpace = n + FTS5_DATA_ZERO_PADDING;
  a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace);
  if( a==0 ) goto decode_out;
  memcpy(a, aBlob, n);
  fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno);

  fts5DebugRowid(&rc, &s, iRowid);
  if( bDlidx ){
    Fts5Data dlidx;
    Fts5DlidxLvl lvl;

    dlidx.p = a;
    dlidx.n = n;
    dlidx.nRef = 2;

    memset(&lvl, 0, sizeof(Fts5DlidxLvl));
    lvl.pData = &dlidx;
    lvl.iLeafPgno = iPgno;

    for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){
      sqlite3Fts5BufferAppendPrintf(&rc, &s, 
          " %d(%lld)", lvl.iLeafPgno, lvl.iRowid
      );
    }
  }else if( iSegid==0 ){
    if( iRowid==FTS5_AVERAGES_ROWID ){
      /* todo */
    }else{
      fts5DecodeStructure(&rc, &s, a, n);
Changes to ext/fts5/fts5_tcl.c.
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38

#ifdef SQLITE_ENABLE_FTS5

#include "fts5.h"
#include <string.h>
#include <assert.h>

/*
** This variable is set to true when running corruption tests. Otherwise
** false. If it is false, extra assert() conditions in the fts5 code are
** activated - conditions that are only true if it is guaranteed that the
** fts5 database is not corrupt.
*/
int sqlite3_fts5_may_be_corrupt = 0;

/*************************************************************************
** This is a copy of the first part of the SqliteDb structure in 
** tclsqlite.c.  We need it here so that the get_sqlite_pointer routine
** can extract the sqlite3* pointer from an existing Tcl SQLite
** connection.
*/







<
<
<
<
<
<
|







18
19
20
21
22
23
24






25
26
27
28
29
30
31
32

#ifdef SQLITE_ENABLE_FTS5

#include "fts5.h"
#include <string.h>
#include <assert.h>







extern int sqlite3_fts5_may_be_corrupt;

/*************************************************************************
** This is a copy of the first part of the SqliteDb structure in 
** tclsqlite.c.  We need it here so that the get_sqlite_pointer routine
** can extract the sqlite3* pointer from an existing Tcl SQLite
** connection.
*/
Changes to ext/fts5/test/fts5aa.test.
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
  CREATE VIRTUAL TABLE t1 USING fts5(x,y);
}
do_execsql_test 2.1 {
  INSERT INTO t1 VALUES('a b c', 'd e f');
}
do_test 2.2 {
  execsql { SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 }
} {/{{structure idx=0} {lvl=0 nMerge=0 {id=[0123456789]* h=1 leaves=1..1}}}/}
do_execsql_test 2.3 {
  INSERT INTO t1(t1) VALUES('integrity-check');
}

#-------------------------------------------------------------------------
#
reset_db







|







45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
  CREATE VIRTUAL TABLE t1 USING fts5(x,y);
}
do_execsql_test 2.1 {
  INSERT INTO t1 VALUES('a b c', 'd e f');
}
do_test 2.2 {
  execsql { SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 }
} {/{\(structure\) {lvl=0 nMerge=0 {id=[0123456789]* h=1 leaves=1..1}}}/}
do_execsql_test 2.3 {
  INSERT INTO t1(t1) VALUES('integrity-check');
}

#-------------------------------------------------------------------------
#
reset_db
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
      set y [doc]
      set z [doc]
      set rowid [expr int(rand() * 100)]
      execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) }
    }
    execsql { INSERT INTO t1(t1) VALUES('integrity-check'); }
  } {}
#  if {$i==1} break
}
#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}
#exit

#-------------------------------------------------------------------------
#
reset_db







<







177
178
179
180
181
182
183

184
185
186
187
188
189
190
      set y [doc]
      set z [doc]
      set rowid [expr int(rand() * 100)]
      execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) }
    }
    execsql { INSERT INTO t1(t1) VALUES('integrity-check'); }
  } {}

}
#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}
#exit

#-------------------------------------------------------------------------
#
reset_db
238
239
240
241
242
243
244

245
246
247
248
249
250
251
      set rowid [expr int(rand() * 100)]
      execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) }
    }
    execsql { INSERT INTO t1(t1) VALUES('integrity-check'); }
  } {}
  if {[set_test_counter errors]} break
}


#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 10.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x,y);
}







>







237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
      set rowid [expr int(rand() * 100)]
      execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) }
    }
    execsql { INSERT INTO t1(t1) VALUES('integrity-check'); }
  } {}
  if {[set_test_counter errors]} break
}


#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 10.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x,y);
}
Changes to ext/fts5/test/fts5al.test.
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
  finish_test
  return
}

do_execsql_test 1.1 {
  CREATE VIRTUAL TABLE ft1 USING fts5(x);
  SELECT * FROM ft1_config;
} {version 1}

do_execsql_test 1.2 {
  INSERT INTO ft1(ft1, rank) VALUES('pgsz', 32);
  SELECT * FROM ft1_config;
} {pgsz 32 version 1}

do_execsql_test 1.3 {
  INSERT INTO ft1(ft1, rank) VALUES('pgsz', 64);
  SELECT * FROM ft1_config;
} {pgsz 64 version 1}

#--------------------------------------------------------------------------
# Test the logic for parsing the rank() function definition.
#
foreach {tn defn} {
  1 "fname()"
  2 "fname(1)"







|




|




|







22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
  finish_test
  return
}

do_execsql_test 1.1 {
  CREATE VIRTUAL TABLE ft1 USING fts5(x);
  SELECT * FROM ft1_config;
} {version 2}

do_execsql_test 1.2 {
  INSERT INTO ft1(ft1, rank) VALUES('pgsz', 32);
  SELECT * FROM ft1_config;
} {pgsz 32 version 2}

do_execsql_test 1.3 {
  INSERT INTO ft1(ft1, rank) VALUES('pgsz', 64);
  SELECT * FROM ft1_config;
} {pgsz 64 version 2}

#--------------------------------------------------------------------------
# Test the logic for parsing the rank() function definition.
#
foreach {tn defn} {
  1 "fname()"
  2 "fname(1)"
Changes to ext/fts5/test/fts5corrupt2.test.
12
13
14
15
16
17
18

19
20
21
22
23
24
25
26
27
28
29
30
31

32
33
34
35
36
37
38
# This file tests that FTS5 handles corrupt databases (i.e. internal
# inconsistencies in the backing tables) correctly. In this case 
# "correctly" means without crashing.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5corrupt2


# Create a simple FTS5 table containing 100 documents. Each document 
# contains 10 terms, each of which start with the character "x".
#
expr srand(0)
db func rnddoc fts5_rnddoc
do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x);
  INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
  WITH ii(i) AS (SELECT 1 UNION SELECT i+1 FROM ii WHERE i<100)
  INSERT INTO t1 SELECT rnddoc(10) FROM ii;
}
set mask [expr 31 << 31]


# Test 1:
#
#   For each page in the t1_data table, open a transaction and DELETE
#   the t1_data entry. Then run:
#
#     * an integrity-check, and







>













>







12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# This file tests that FTS5 handles corrupt databases (i.e. internal
# inconsistencies in the backing tables) correctly. In this case 
# "correctly" means without crashing.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5corrupt2
sqlite3_fts5_may_be_corrupt 1

# Create a simple FTS5 table containing 100 documents. Each document 
# contains 10 terms, each of which start with the character "x".
#
expr srand(0)
db func rnddoc fts5_rnddoc
do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x);
  INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
  WITH ii(i) AS (SELECT 1 UNION SELECT i+1 FROM ii WHERE i<100)
  INSERT INTO t1 SELECT rnddoc(10) FROM ii;
}
set mask [expr 31 << 31]


# Test 1:
#
#   For each page in the t1_data table, open a transaction and DELETE
#   the t1_data entry. Then run:
#
#     * an integrity-check, and
190
191
192
193
194
195
196

197
198
    execsql ROLLBACK
  }

  do_test 4.$tn.x { expr $nCorrupt>0 } 1
}



finish_test








>


192
193
194
195
196
197
198
199
200
201
    execsql ROLLBACK
  }

  do_test 4.$tn.x { expr $nCorrupt>0 } 1
}


sqlite3_fts5_may_be_corrupt 0
finish_test

Changes to ext/fts5/test/fts5dlidx.test.
57
58
59
60
61
62
63

64
65
66
67
68
69
70
      if {($i % $spc2)==0} { 
        lappend ydoc $rowid
        append doc " y" 
      }
    }
    execsql { INSERT INTO t1(rowid, x) VALUES($rowid, $doc) }
  }

  execsql COMMIT

  do_test $tn.1 {
    execsql { INSERT INTO t1(t1) VALUES('integrity-check') }
  } {}
  
  do_fb_test $tn.3.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'a AND x' } $xdoc







>







57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
      if {($i % $spc2)==0} { 
        lappend ydoc $rowid
        append doc " y" 
      }
    }
    execsql { INSERT INTO t1(rowid, x) VALUES($rowid, $doc) }
  }
  breakpoint
  execsql COMMIT

  do_test $tn.1 {
    execsql { INSERT INTO t1(t1) VALUES('integrity-check') }
  } {}
  
  do_fb_test $tn.3.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'a AND x' } $xdoc
78
79
80
81
82
83
84
85
86
87
88
89
90
91
  do_fb_test $tn.5.2 { 
    SELECT rowid FROM t1 WHERE t1 MATCH 'b + c + x + y' } $ydoc
}


do_dlidx_test1 1.1     10 100 10000 0 1000
do_dlidx_test1 1.2     10 10  10000 0 128
do_dlidx_test1 1.3     10 10  100   0 36028797018963970
do_dlidx_test1 1.3     10 10  50    0 150000000000000000



finish_test








|
|





79
80
81
82
83
84
85
86
87
88
89
90
91
92
  do_fb_test $tn.5.2 { 
    SELECT rowid FROM t1 WHERE t1 MATCH 'b + c + x + y' } $ydoc
}


do_dlidx_test1 1.1     10 100 10000 0 1000
do_dlidx_test1 1.2     10 10  10000 0 128
do_dlidx_test1 1.3     10 10  66   0 36028797018963970
do_dlidx_test1 1.4     10 10  50    0 150000000000000000



finish_test

Changes to ext/fts5/test/fts5integrity.test.
26
27
28
29
30
31
32






















33
34
35
do_execsql_test 2.0 {
  CREATE VIRTUAL TABLE yy USING fts5(x, prefix=1);
  INSERT INTO yy VALUES('term');
}
do_execsql_test 2.1 {
  INSERT INTO yy(yy) VALUES('integrity-check');
}























finish_test








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>



26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
do_execsql_test 2.0 {
  CREATE VIRTUAL TABLE yy USING fts5(x, prefix=1);
  INSERT INTO yy VALUES('term');
}
do_execsql_test 2.1 {
  INSERT INTO yy(yy) VALUES('integrity-check');
}

#--------------------------------------------------------------------
#
do_execsql_test 3.0 {
  CREATE VIRTUAL TABLE zz USING fts5(z);
  INSERT INTO zz(zz, rank) VALUES('pgsz', 32);
  INSERT INTO zz VALUES('b b b b b b b b b b b b b b');
  INSERT INTO zz SELECT z FROM zz;
  INSERT INTO zz SELECT z FROM zz;
  INSERT INTO zz SELECT z FROM zz;
  INSERT INTO zz SELECT z FROM zz;
  INSERT INTO zz SELECT z FROM zz;
  INSERT INTO zz SELECT z FROM zz;
  INSERT INTO zz(zz) VALUES('optimize');
}

do_execsql_test 3.1 { INSERT INTO zz(zz) VALUES('integrity-check'); }


#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM zz_data} {puts $r}
#exit


finish_test

Changes to ext/fts5/test/fts5rowid.test.
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35

do_catchsql_test 1.2 {
  SELECT fts5_rowid('segment')
} {1 {should be: fts5_rowid('segment', segid, height, pgno))}}

do_execsql_test 1.3 {
  SELECT fts5_rowid('segment', 1, 1, 1)
} {70866960385}

do_catchsql_test 1.4 {
  SELECT fts5_rowid('nosucharg');
} {1 {first arg to fts5_rowid() must be 'segment' or 'start-of-index'}} 


#-------------------------------------------------------------------------







|







21
22
23
24
25
26
27
28
29
30
31
32
33
34
35

do_catchsql_test 1.2 {
  SELECT fts5_rowid('segment')
} {1 {should be: fts5_rowid('segment', segid, height, pgno))}}

do_execsql_test 1.3 {
  SELECT fts5_rowid('segment', 1, 1, 1)
} {139586437121}

do_catchsql_test 1.4 {
  SELECT fts5_rowid('nosucharg');
} {1 {first arg to fts5_rowid() must be 'segment' or 'start-of-index'}} 


#-------------------------------------------------------------------------
Changes to ext/fts5/test/fts5version.test.
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
do_execsql_test 1.1 {
  CREATE VIRTUAL TABLE t1 USING fts5(one);
  INSERT INTO t1 VALUES('a b c d');
} {}

do_execsql_test 1.2 {
  SELECT * FROM t1_config WHERE k='version'
} {version 1}

do_execsql_test 1.3 {
  SELECT rowid FROM t1 WHERE t1 MATCH 'a';
} {1}

do_execsql_test 1.4 {
  UPDATE t1_config set v=2 WHERE k='version';
} 

do_test 1.5 {
  db close
  sqlite3 db test.db
  catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' }
} {1 {invalid fts5 file format (found 2, expected 1) - run 'rebuild'}}

breakpoint
do_test 1.6 {
  db close
  sqlite3 db test.db
  catchsql { INSERT INTO t1 VALUES('x y z') }
} {1 {invalid fts5 file format (found 2, expected 1) - run 'rebuild'}}

do_test 1.7 {
  execsql { DELETE FROM t1_config WHERE k='version' }
  db close
  sqlite3 db test.db
  catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' }
} {1 {invalid fts5 file format (found 0, expected 1) - run 'rebuild'}}


finish_test








|






|






|






|






|




20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
do_execsql_test 1.1 {
  CREATE VIRTUAL TABLE t1 USING fts5(one);
  INSERT INTO t1 VALUES('a b c d');
} {}

do_execsql_test 1.2 {
  SELECT * FROM t1_config WHERE k='version'
} {version 2}

do_execsql_test 1.3 {
  SELECT rowid FROM t1 WHERE t1 MATCH 'a';
} {1}

do_execsql_test 1.4 {
  UPDATE t1_config set v=3 WHERE k='version';
} 

do_test 1.5 {
  db close
  sqlite3 db test.db
  catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' }
} {1 {invalid fts5 file format (found 3, expected 2) - run 'rebuild'}}

breakpoint
do_test 1.6 {
  db close
  sqlite3 db test.db
  catchsql { INSERT INTO t1 VALUES('x y z') }
} {1 {invalid fts5 file format (found 3, expected 2) - run 'rebuild'}}

do_test 1.7 {
  execsql { DELETE FROM t1_config WHERE k='version' }
  db close
  sqlite3 db test.db
  catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' }
} {1 {invalid fts5 file format (found 0, expected 2) - run 'rebuild'}}


finish_test

Changes to ext/fts5/tool/loadfts5.tcl.
105
106
107
108
109
110
111

112
113
114
115
116
117
118
db func loadfile loadfile

db transaction {
  set pref ""
  if {$O(prefix)!=""} { set pref ", prefix='$O(prefix)'" }
  catch {
    db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok)$pref)"

  }
  if {$O(automerge)>=0} {
    if {$O(vtab) == "fts5"} {
      db eval { INSERT INTO t1(t1, rank) VALUES('automerge', $O(automerge)) }
    } else {
      db eval { INSERT INTO t1(t1) VALUES('automerge=' || $O(automerge)) }
    }







>







105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
db func loadfile loadfile

db transaction {
  set pref ""
  if {$O(prefix)!=""} { set pref ", prefix='$O(prefix)'" }
  catch {
    db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok)$pref)"
    # db eval "INSERT INTO t1(t1, rank) VALUES('pgsz', 4050);"
  }
  if {$O(automerge)>=0} {
    if {$O(vtab) == "fts5"} {
      db eval { INSERT INTO t1(t1, rank) VALUES('automerge', $O(automerge)) }
    } else {
      db eval { INSERT INTO t1(t1) VALUES('automerge=' || $O(automerge)) }
    }
Changes to test/permutations.test.
237
238
239
240
241
242
243








244
245
246
247
248
249
250
  fts3varint.test
  fts4growth.test fts4growth2.test
}

test_suite "fts5" -prefix "" -description {
  All FTS5 tests.
} -files [glob -nocomplain $::testdir/../ext/fts5/test/*.test]









test_suite "nofaultsim" -prefix "" -description {
  "Very" quick test suite. Runs in less than 5 minutes on a workstation. 
  This test suite is the same as the "quick" tests, except that some files
  that test malloc and IO errors are omitted.
} -files [
  test_set $allquicktests -exclude *malloc* *ioerr* *fault*







>
>
>
>
>
>
>
>







237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
  fts3varint.test
  fts4growth.test fts4growth2.test
}

test_suite "fts5" -prefix "" -description {
  All FTS5 tests.
} -files [glob -nocomplain $::testdir/../ext/fts5/test/*.test]

test_suite "fts5-light" -prefix "" -description {
  All FTS5 tests.
} -files [
  test_set \
      [glob -nocomplain $::testdir/../ext/fts5/test/*.test] \
      -exclude *corrupt* *fault* *big* *fts5aj*
]

test_suite "nofaultsim" -prefix "" -description {
  "Very" quick test suite. Runs in less than 5 minutes on a workstation. 
  This test suite is the same as the "quick" tests, except that some files
  that test malloc and IO errors are omitted.
} -files [
  test_set $allquicktests -exclude *malloc* *ioerr* *fault*
Changes to tool/mksqlite3c.tcl.
373
374
375
376
377
378
379

380
381
382
383
384
385
386
   fts5_expr.c
   fts5_hash.c
   fts5_index.c
   fts5parse.c
   fts5_storage.c
   fts5_tokenize.c
   fts5_unicode2.c


   rtree.c
   icu.c
   fts3_icu.c
} {
  copy_file tsrc/$file
}







>







373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
   fts5_expr.c
   fts5_hash.c
   fts5_index.c
   fts5parse.c
   fts5_storage.c
   fts5_tokenize.c
   fts5_unicode2.c
   fts5_vocab.c

   rtree.c
   icu.c
   fts3_icu.c
} {
  copy_file tsrc/$file
}