SQLite

Check-in [90b82d3ef6]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix fts5_index.c to use doclist-indexes when possible. Only some cases work so far.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | fts5
Files: files | file ages | folders
SHA1: 90b82d3ef613b2915e0e280dc1d2e5a2b617d59c
User & Date: dan 2014-08-04 20:07:40.532
Context
2014-08-05
19:00
Use doclist-indexes with "ORDER BY rowid ASC" fts5 queries as well. (check-in: d028ba6589 user: dan tags: fts5)
2014-08-04
20:07
Fix fts5_index.c to use doclist-indexes when possible. Only some cases work so far. (check-in: 90b82d3ef6 user: dan tags: fts5)
2014-08-02
20:49
Start changing things to use doclist indexes as required. code is not activated yet. (check-in: b8864da95d user: dan tags: fts5)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts5/fts5_index.c.
262
263
264
265
266
267
268

269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
#endif


typedef struct Fts5BtreeIter Fts5BtreeIter;
typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel;
typedef struct Fts5ChunkIter Fts5ChunkIter;
typedef struct Fts5Data Fts5Data;

typedef struct Fts5MultiSegIter Fts5MultiSegIter;
typedef struct Fts5NodeIter Fts5NodeIter;
typedef struct Fts5PageWriter Fts5PageWriter;
typedef struct Fts5PendingDoclist Fts5PendingDoclist;
typedef struct Fts5PendingPoslist Fts5PendingPoslist;
typedef struct Fts5PosIter Fts5PosIter;
typedef struct Fts5SegIter Fts5SegIter;
typedef struct Fts5DoclistIter Fts5DoclistIter;
typedef struct Fts5SegWriter Fts5SegWriter;
typedef struct Fts5Structure Fts5Structure;
typedef struct Fts5StructureLevel Fts5StructureLevel;
typedef struct Fts5StructureSegment Fts5StructureSegment;


/*
** One object per %_data table.
*/
struct Fts5Index {
  Fts5Config *pConfig;            /* Virtual table configuration */
  char *zDataTbl;                 /* Name of %_data table */







>












<







262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281

282
283
284
285
286
287
288
#endif


typedef struct Fts5BtreeIter Fts5BtreeIter;
typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel;
typedef struct Fts5ChunkIter Fts5ChunkIter;
typedef struct Fts5Data Fts5Data;
typedef struct Fts5DlidxIter Fts5DlidxIter;
typedef struct Fts5MultiSegIter Fts5MultiSegIter;
typedef struct Fts5NodeIter Fts5NodeIter;
typedef struct Fts5PageWriter Fts5PageWriter;
typedef struct Fts5PendingDoclist Fts5PendingDoclist;
typedef struct Fts5PendingPoslist Fts5PendingPoslist;
typedef struct Fts5PosIter Fts5PosIter;
typedef struct Fts5SegIter Fts5SegIter;
typedef struct Fts5DoclistIter Fts5DoclistIter;
typedef struct Fts5SegWriter Fts5SegWriter;
typedef struct Fts5Structure Fts5Structure;
typedef struct Fts5StructureLevel Fts5StructureLevel;
typedef struct Fts5StructureSegment Fts5StructureSegment;


/*
** One object per %_data table.
*/
struct Fts5Index {
  Fts5Config *pConfig;            /* Virtual table configuration */
  char *zDataTbl;                 /* Name of %_data table */
317
318
319
320
321
322
323



324
325
326
327
328
329
330

  /* Output variables. aPoslist==0 at EOF */
  i64 iRowid;
  u8 *aPoslist;
  int nPoslist;
};




struct Fts5IndexIter {
  Fts5Index *pIndex;
  Fts5Structure *pStruct;
  Fts5MultiSegIter *pMulti;
  Fts5DoclistIter *pDoclist;
  Fts5Buffer poslist;             /* Buffer containing current poslist */
};







>
>
>







317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333

  /* Output variables. aPoslist==0 at EOF */
  i64 iRowid;
  u8 *aPoslist;
  int nPoslist;
};

/*
** Each iterator used by external modules is an instance of this type.
*/
struct Fts5IndexIter {
  Fts5Index *pIndex;
  Fts5Structure *pStruct;
  Fts5MultiSegIter *pMulti;
  Fts5DoclistIter *pDoclist;
  Fts5Buffer poslist;             /* Buffer containing current poslist */
};
485
486
487
488
489
490
491


492
493
494
495
496
497
498
  int iTermLeafOffset;

  /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */
  int iRowidOffset;               /* Current entry in aRowidOffset[] */
  int nRowidOffset;               /* Allocated size of aRowidOffset[] array */
  int *aRowidOffset;              /* Array of offset to rowid fields */



  /* Variables populated based on current entry. */
  Fts5Buffer term;                /* Current term */
  i64 iRowid;                     /* Current rowid */
};

#define FTS5_SEGITER_ONETERM 0x01
#define FTS5_SEGITER_REVERSE 0x02







>
>







488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
  int iTermLeafOffset;

  /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */
  int iRowidOffset;               /* Current entry in aRowidOffset[] */
  int nRowidOffset;               /* Allocated size of aRowidOffset[] array */
  int *aRowidOffset;              /* Array of offset to rowid fields */

  Fts5DlidxIter *pDlidx;          /* If there is a doclist-index */

  /* Variables populated based on current entry. */
  Fts5Buffer term;                /* Current term */
  i64 iRowid;                     /* Current rowid */
};

#define FTS5_SEGITER_ONETERM 0x01
#define FTS5_SEGITER_REVERSE 0x02
535
536
537
538
539
540
541




















542
543
544
545
546
547
548

  /* Output variables */
  Fts5Buffer term;
  int nEmpty;
  int iChild;
  int bDlidx;
};





















/*
** An Fts5BtreeIter object is used to iterate through all entries in the
** b-tree hierarchy belonging to a single fts5 segment. In this case the
** "b-tree hierarchy" is all b-tree nodes except leaves. Each entry in the
** b-tree hierarchy consists of the following:
**







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573

  /* Output variables */
  Fts5Buffer term;
  int nEmpty;
  int iChild;
  int bDlidx;
};

/*
** An instance of the following type is used to iterate through the contents
** of a doclist-index record.
**
** pData:
**   A reference to the dlidx record.
*/
struct Fts5DlidxIter {
  Fts5Data *pData;              /* Data for doclist index, if any */
  int iOff;                     /* Current offset into pDlidx */
  int bRowidValid;              /* iRowid is valid */
  int bEof;                     /* At EOF already */

  /* Output variables */
  int iLeafPgno;                /* Page number of current leaf page */
  int bZero;                    /* True if current leaf has no rowids */
  i64 iRowid;                   /* If bZero==0, first rowid on leaf */
};


/*
** An Fts5BtreeIter object is used to iterate through all entries in the
** b-tree hierarchy belonging to a single fts5 segment. In this case the
** "b-tree hierarchy" is all b-tree nodes except leaves. Each entry in the
** b-tree hierarchy consists of the following:
**
572
573
574
575
576
577
578

















































579
580
581
582
583
584
585
  /* Output variables */
  Fts5Buffer term;                /* Current term */
  int iLeaf;                      /* Leaf containing terms >= current term */
  int nEmpty;                     /* Number of "empty" leaves following iLeaf */
  int bEof;                       /* Set to true at EOF */
  int bDlidx;                     /* True if there exists a dlidx */
};


















































static void fts5PutU16(u8 *aOut, u16 iVal){
  aOut[0] = (iVal>>8);
  aOut[1] = (iVal&0xFF);
}

static u16 fts5GetU16(const u8 *aIn){







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
  /* Output variables */
  Fts5Buffer term;                /* Current term */
  int iLeaf;                      /* Leaf containing terms >= current term */
  int nEmpty;                     /* Number of "empty" leaves following iLeaf */
  int bEof;                       /* Set to true at EOF */
  int bDlidx;                     /* True if there exists a dlidx */
};


/*
** Decode a segment-data rowid from the %_data table. This function is
** the opposite of macro FTS5_SEGMENT_ROWID().
*/
static void fts5DecodeRowid(
  i64 iRowid,                     /* Rowid from %_data table */
  int *piIdx,                     /* OUT: Index */
  int *piSegid,                   /* OUT: Segment id */
  int *piHeight,                  /* OUT: Height */
  int *piPgno                     /* OUT: Page number */
){
  *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1));
  iRowid >>= FTS5_DATA_PAGE_B;

  *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1));
  iRowid >>= FTS5_DATA_HEIGHT_B;

  *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1));
  iRowid >>= FTS5_DATA_ID_B;

  *piIdx = (int)(iRowid & (((i64)1 << FTS5_DATA_IDX_B) - 1));
}

static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){
  int iIdx,iSegid,iHeight,iPgno;  /* Rowid compenents */
  fts5DecodeRowid(iKey, &iIdx, &iSegid, &iHeight, &iPgno);

  if( iSegid==0 ){
    if( iKey==FTS5_AVERAGES_ROWID ){
      sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(averages) ");
    }else{
      sqlite3Fts5BufferAppendPrintf(pRc, pBuf, 
          "(structure idx=%d)", (int)(iKey-10)
      );
    }
  }
  else if( iHeight==FTS5_SEGMENT_MAX_HEIGHT ){
    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(dlidx idx=%d segid=%d pgno=%d)",
        iIdx, iSegid, iPgno
    );
  }else{
    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(idx=%d segid=%d h=%d pgno=%d)",
        iIdx, iSegid, iHeight, iPgno
    );
  }
}


static void fts5PutU16(u8 *aOut, u16 iVal){
  aOut[0] = (iVal>>8);
  aOut[1] = (iVal&0xFF);
}

static u16 fts5GetU16(const u8 *aIn){
662
663
664
665
666
667
668
669








670
671
672
673
674
675
676
static Fts5Data *fts5DataReadOrBuffer(
  Fts5Index *p, 
  Fts5Buffer *pBuf, 
  i64 iRowid
){
  Fts5Data *pRet = 0;
  if( p->rc==SQLITE_OK ){
    int rc;









    /* If the blob handle is not yet open, open and seek it. Otherwise, use
    ** the blob_reopen() API to reseek the existing blob handle.  */
    if( p->pReader==0 ){
      Fts5Config *pConfig = p->pConfig;
      rc = sqlite3_blob_open(pConfig->db, 
          pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader







|
>
>
>
>
>
>
>
>







736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
static Fts5Data *fts5DataReadOrBuffer(
  Fts5Index *p, 
  Fts5Buffer *pBuf, 
  i64 iRowid
){
  Fts5Data *pRet = 0;
  if( p->rc==SQLITE_OK ){
    int rc = SQLITE_OK;

#if 0
Fts5Buffer buf = {0,0,0};
fts5DebugRowid(&rc, &buf, iRowid);
fprintf(stdout, "read: %s\n", buf.p);
fflush(stdout);
sqlite3_free(buf.p);
#endif

    /* If the blob handle is not yet open, open and seek it. Otherwise, use
    ** the blob_reopen() API to reseek the existing blob handle.  */
    if( p->pReader==0 ){
      Fts5Config *pConfig = p->pConfig;
      rc = sqlite3_blob_open(pConfig->db, 
          pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader
1036
1037
1038
1039
1040
1041
1042













































































































1043
1044
1045
1046
1047
1048
1049

/*
** Free any memory allocated by the iterator object.
*/
static void fts5NodeIterFree(Fts5NodeIter *pIter){
  fts5BufferFree(&pIter->term);
}














































































































/*
** Load the next leaf page into the segment iterator.
*/
static void fts5SegIterNextPage(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegIter *pIter              /* Iterator to advance to next page */







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240

/*
** Free any memory allocated by the iterator object.
*/
static void fts5NodeIterFree(Fts5NodeIter *pIter){
  fts5BufferFree(&pIter->term);
}

/*
** Return non-zero if EOF is reached.
*/
static int fts5DlidxIterNext(Fts5DlidxIter *pIter, int bRev){
  if( bRev ){
    i64 iVal;
    int iOff = pIter->iOff;
    int iLimit;
    u8 *a = pIter->pData->p;

    /* Currently iOff points to the first byte of a varint. This block 
    ** decrements iOff until it points to the first byte of the previous 
    ** varint. Taking care not to read any memory locations that occur
    ** before the buffer in memory.  */
    iLimit = (iOff>9 ? iOff-9 : 0);
    for(iOff--; iOff>iLimit; iOff--){
      if( (a[iOff-1] & 0x80)==0 ) break;
    }
    pIter->iOff = iOff;

    if( iOff<=0 ){
      pIter->bEof = 1;
      return 1;
    }

    getVarint(&a[iOff], (u64*)&iVal);
    if( iVal==0 ){
      pIter->bZero = 1;
    }else if( iOff==0 ){
      pIter->iRowid = iVal;
    }else{
      pIter->iRowid += iVal;
    }
    pIter->iLeafPgno--;
  }else{
    i64 iVal;
    if( pIter->iOff>=pIter->pData->n ){
      pIter->bEof = 1;
      return 1;
    }
    pIter->iOff += getVarint(&pIter->pData->p[pIter->iOff], (u64*)&iVal);
    if( iVal==0 ){
      pIter->bZero = 1;
    }else{
      pIter->bZero = 0;
      if( pIter->bRowidValid ){
        pIter->iRowid -= iVal;
      }else{
        pIter->bRowidValid = 1;
        pIter->iRowid = iVal;
      }
    }
    pIter->iLeafPgno++;
  }
  return 0;
}

static void fts5DlidxIterLast(Fts5DlidxIter *pIter){
  while( 0==fts5DlidxIterNext(pIter, 0) );
  assert( pIter->iOff==pIter->pData->n && pIter->bEof==1 );
  pIter->bEof = 0;
}

static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){
  return (p->rc!=SQLITE_OK || pIter->bEof);
}

static void fts5DlidxIterInit(
  Fts5Index *p,                   /* Fts5 Backend to iterate within */
  int bRev,                       /* True for ORDER BY ASC */
  int iIdx, int iSegid,           /* Segment iSegid within index iIdx */
  int iLeafPgno,                  /* Leaf page number to load dlidx for */
  Fts5DlidxIter **ppIter          /* OUT: Populated iterator */
){
  Fts5DlidxIter *pIter = *ppIter;
  Fts5Data *pDlidx;

  pDlidx = fts5DataRead(p, FTS5_DOCLIST_IDX_ROWID(iIdx, iSegid, iLeafPgno));
  if( pDlidx==0 ) return;
  if( pIter==0 ){
    *ppIter = pIter = (Fts5DlidxIter*)fts5IdxMalloc(p, sizeof(Fts5DlidxIter));
    if( pIter==0 ){ 
      fts5DataRelease(pDlidx);
      return;
    }
  }else{
    memset(pIter, 0, sizeof(Fts5DlidxIter));
  }

  pIter->pData = pDlidx;

  pIter->iLeafPgno = iLeafPgno;
  if( bRev==0 ){
    fts5DlidxIterNext(pIter, 0);
  }else{
    fts5DlidxIterLast(pIter);
  }
}

/*
** Free a doclist-index iterator object allocated by fts5DlidxIterInit().
*/
static void fts5DlidxIterFree(Fts5DlidxIter *pIter){
  if( pIter ){
    fts5DataRelease(pIter->pData);
    sqlite3_free(pIter);
  }
}

/*
** Load the next leaf page into the segment iterator.
*/
static void fts5SegIterNextPage(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegIter *pIter              /* Iterator to advance to next page */
1171
1172
1173
1174
1175
1176
1177












































1178
1179
1180
1181
1182
1183
1184

    pIter->aRowidOffset[iRowidOffset++] = pIter->iLeafOffset;
    pIter->iLeafOffset = i;
  }
  pIter->iRowidOffset = iRowidOffset;
}














































/*
** Advance iterator pIter to the next entry. 
**
** If an error occurs, Fts5Index.rc is set to an appropriate error code. It 
** is not considered an error if the iterator reaches EOF. If an error has 
** already occurred when this function is called, it is a no-op.







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419

    pIter->aRowidOffset[iRowidOffset++] = pIter->iLeafOffset;
    pIter->iLeafOffset = i;
  }
  pIter->iRowidOffset = iRowidOffset;
}

/*
**
*/
static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){
  assert( pIter->flags & FTS5_SEGITER_REVERSE );
  assert( pIter->flags & FTS5_SEGITER_ONETERM );

  fts5DataRelease(pIter->pLeaf);
  pIter->pLeaf = 0;
  while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){
    Fts5Data *pNew;
    pIter->iLeafPgno--;
    pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(
          pIter->iIdx, pIter->pSeg->iSegid, 0, pIter->iLeafPgno
    ));
    if( pNew ){
      if( pIter->iLeafPgno==pIter->iTermLeafPgno ){
        if( pIter->iTermLeafOffset<pNew->n ){
          pIter->pLeaf = pNew;
          pIter->iLeafOffset = pIter->iTermLeafOffset;
        }
      }else{
        int iRowidOff, dummy;
        fts5LeafHeader(pNew, &iRowidOff, &dummy);
        if( iRowidOff ){
          pIter->pLeaf = pNew;
          pIter->iLeafOffset = iRowidOff;
        }
      }

      if( pIter->pLeaf ){
        u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset];
        pIter->iLeafOffset += getVarint(a, (u64*)&pIter->iRowid);
        break;
      }else{
        fts5DataRelease(pNew);
      }
    }
  }

  if( pIter->pLeaf ){
    fts5SegIterReverseInitPage(p, pIter);
  }
}

/*
** Advance iterator pIter to the next entry. 
**
** If an error occurs, Fts5Index.rc is set to an appropriate error code. It 
** is not considered an error if the iterator reaches EOF. If an error has 
** already occurred when this function is called, it is a no-op.
1198
1199
1200
1201
1202
1203
1204


1205
1206
1207
1208
1209
1210
1211

        pIter->iLeafOffset = iOff = pIter->aRowidOffset[pIter->iRowidOffset];
        iOff += getVarint32(&a[iOff], nPos);
        iOff += nPos;
        getVarint(&a[iOff], (u64*)&iDelta);
        pIter->iRowid += iDelta;
      }else{


        fts5DataRelease(pIter->pLeaf);
        pIter->pLeaf = 0;
        while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){
          Fts5Data *pNew;
          pIter->iLeafPgno--;
          pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(
                pIter->iIdx, pIter->pSeg->iSegid, 0, pIter->iLeafPgno







>
>







1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448

        pIter->iLeafOffset = iOff = pIter->aRowidOffset[pIter->iRowidOffset];
        iOff += getVarint32(&a[iOff], nPos);
        iOff += nPos;
        getVarint(&a[iOff], (u64*)&iDelta);
        pIter->iRowid += iDelta;
      }else{
        fts5SegIterReverseNewPage(p, pIter);
#if 0
        fts5DataRelease(pIter->pLeaf);
        pIter->pLeaf = 0;
        while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){
          Fts5Data *pNew;
          pIter->iLeafPgno--;
          pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(
                pIter->iIdx, pIter->pSeg->iSegid, 0, pIter->iLeafPgno
1234
1235
1236
1237
1238
1239
1240

1241
1242
1243
1244
1245
1246
1247
            }
          }
        }

        if( pIter->pLeaf ){
          fts5SegIterReverseInitPage(p, pIter);
        }

      }
    }else{
      Fts5Data *pLeaf = pIter->pLeaf;
      int iOff;
      int bNewTerm = 0;
      int nKeep = 0;








>







1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
            }
          }
        }

        if( pIter->pLeaf ){
          fts5SegIterReverseInitPage(p, pIter);
        }
#endif
      }
    }else{
      Fts5Data *pLeaf = pIter->pLeaf;
      int iOff;
      int bNewTerm = 0;
      int nKeep = 0;

1375
1376
1377
1378
1379
1380
1381




































1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401

1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420

1421
1422
1423
1424
1425

1426
1427
1428
1429
1430
1431
1432

1433
1434
1435
1436
1437
1438
1439
    iOff += getVarint(&pLast->p[iOff], (u64*)&pIter->iRowid);
    pIter->iLeafOffset = iOff;
  }

  fts5SegIterReverseInitPage(p, pIter);
  pIter->flags |= FTS5_SEGITER_REVERSE;
}





































/*
** Initialize the object pIter to point to term pTerm/nTerm within segment
** pSeg, index iIdx. If there is no such term in the index, the iterator
** is set to EOF.
**
** If an error occurs, Fts5Index.rc is set to an appropriate error code. If 
** an error has already occurred when this function is called, it is a no-op.
*/
static void fts5SegIterSeekInit(
  Fts5Index *p,                   /* FTS5 backend */
  int iIdx,                       /* Config.aHash[] index of FTS index */
  const u8 *pTerm, int nTerm,     /* Term to seek to */
  int flags,                      /* Mask of FTS5INDEX_XXX flags */
  Fts5StructureSegment *pSeg,     /* Description of segment */
  Fts5SegIter *pIter              /* Object to populate */
){
  int iPg = 1;
  int h;
  int bGe = ((flags & FTS5INDEX_QUERY_PREFIX) && iIdx==0);


  assert( bGe==0 || (flags & FTS5INDEX_QUERY_ASC)==0 );
  assert( pTerm && nTerm );
  memset(pIter, 0, sizeof(*pIter));
  pIter->pSeg = pSeg;
  pIter->iIdx = iIdx;

  /* This block sets stack variable iPg to the leaf page number that may
  ** contain term (pTerm/nTerm), if it is present in the segment. */
  for(h=pSeg->nHeight-1; h>0; h--){
    Fts5NodeIter node;              /* For iterating through internal nodes */
    i64 iRowid = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, h, iPg);
    Fts5Data *pNode = fts5DataRead(p, iRowid);
    if( pNode==0 ) break;

    fts5NodeIterInit(pNode->p, pNode->n, &node);
    assert( node.term.n==0 );

    iPg = node.iChild;

    for(fts5NodeIterNext(&p->rc, &node);
        node.aData && fts5BufferCompareBlob(&node.term, pTerm, nTerm)<=0;
        fts5NodeIterNext(&p->rc, &node)
    ){
      iPg = node.iChild;

    }
    fts5NodeIterFree(&node);
    fts5DataRelease(pNode);
  }

  if( iPg<pSeg->pgnoFirst ){
    iPg = pSeg->pgnoFirst;

  }

  pIter->iLeafPgno = iPg - 1;
  fts5SegIterNextPage(p, pIter);

  if( pIter->pLeaf ){
    int res;







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>




















>



















>





>







>







1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
    iOff += getVarint(&pLast->p[iOff], (u64*)&pIter->iRowid);
    pIter->iLeafOffset = iOff;
  }

  fts5SegIterReverseInitPage(p, pIter);
  pIter->flags |= FTS5_SEGITER_REVERSE;
}

/*
** Iterator pIter currently points to the first rowid of a doclist within
** index iIdx. There is a doclist-index associated with the final term on
** the current page. If the current term is the last term on the page, 
** load the doclist-index from disk and initialize an iterator at 
** (pIter->pDlidx).
*/
static void fts5SegIterLoadDlidx(Fts5Index *p, int iIdx, Fts5SegIter *pIter){
  int iSegid = pIter->pSeg->iSegid;
  int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
  Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
  int iOff = pIter->iLeafOffset;  /* Byte offset within current leaf */

  assert( pIter->flags & FTS5_SEGITER_ONETERM );
  assert( pIter->pDlidx==0 );

  /* Check if the current doclist ends on this page. If it does, return
  ** early without loading the doclist-index (as it belongs to a different
  ** term. */
  while( iOff<pLeaf->n ){
    i64 iDelta;
    int nPoslist;

    /* iOff is currently the offset of the size field of a position list. */
    iOff += getVarint32(&pLeaf->p[iOff], nPoslist);
    iOff += nPoslist;

    if( iOff<pLeaf->n ){
      iOff += getVarint(&pLeaf->p[iOff], (u64*)&iDelta);
      if( iDelta==0 ) return;
    }
  }

  fts5DlidxIterInit(p, bRev, iIdx, iSegid, pIter->iLeafPgno, &pIter->pDlidx);
}

/*
** Initialize the object pIter to point to term pTerm/nTerm within segment
** pSeg, index iIdx. If there is no such term in the index, the iterator
** is set to EOF.
**
** If an error occurs, Fts5Index.rc is set to an appropriate error code. If 
** an error has already occurred when this function is called, it is a no-op.
*/
static void fts5SegIterSeekInit(
  Fts5Index *p,                   /* FTS5 backend */
  int iIdx,                       /* Config.aHash[] index of FTS index */
  const u8 *pTerm, int nTerm,     /* Term to seek to */
  int flags,                      /* Mask of FTS5INDEX_XXX flags */
  Fts5StructureSegment *pSeg,     /* Description of segment */
  Fts5SegIter *pIter              /* Object to populate */
){
  int iPg = 1;
  int h;
  int bGe = ((flags & FTS5INDEX_QUERY_PREFIX) && iIdx==0);
  int bDlidx = 0;                 /* True if there is a doclist-index */

  assert( bGe==0 || (flags & FTS5INDEX_QUERY_ASC)==0 );
  assert( pTerm && nTerm );
  memset(pIter, 0, sizeof(*pIter));
  pIter->pSeg = pSeg;
  pIter->iIdx = iIdx;

  /* This block sets stack variable iPg to the leaf page number that may
  ** contain term (pTerm/nTerm), if it is present in the segment. */
  for(h=pSeg->nHeight-1; h>0; h--){
    Fts5NodeIter node;              /* For iterating through internal nodes */
    i64 iRowid = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, h, iPg);
    Fts5Data *pNode = fts5DataRead(p, iRowid);
    if( pNode==0 ) break;

    fts5NodeIterInit(pNode->p, pNode->n, &node);
    assert( node.term.n==0 );

    iPg = node.iChild;
    bDlidx = node.bDlidx;
    for(fts5NodeIterNext(&p->rc, &node);
        node.aData && fts5BufferCompareBlob(&node.term, pTerm, nTerm)<=0;
        fts5NodeIterNext(&p->rc, &node)
    ){
      iPg = node.iChild;
      bDlidx = node.bDlidx;
    }
    fts5NodeIterFree(&node);
    fts5DataRelease(pNode);
  }

  if( iPg<pSeg->pgnoFirst ){
    iPg = pSeg->pgnoFirst;
    bDlidx = 0;
  }

  pIter->iLeafPgno = iPg - 1;
  fts5SegIterNextPage(p, pIter);

  if( pIter->pLeaf ){
    int res;
1450
1451
1452
1453
1454
1455
1456
1457




1458

1459
1460
1461
1462
1463
1464
1465
1466
1467
1468

1469
1470
1471
1472
1473
1474
1475
      fts5DataRelease(pIter->pLeaf);
      pIter->pLeaf = 0;
    }
  }

  if( bGe==0 ){
    pIter->flags |= FTS5_SEGITER_ONETERM;
    if( pIter->pLeaf && (flags & FTS5INDEX_QUERY_ASC) ){




      fts5SegIterReverse(p, iIdx, pIter);

    }
  }
}

/*
** Zero the iterator passed as the only argument.
*/
static void fts5SegIterClear(Fts5SegIter *pIter){
  fts5BufferFree(&pIter->term);
  fts5DataRelease(pIter->pLeaf);

  sqlite3_free(pIter->aRowidOffset);
  memset(pIter, 0, sizeof(Fts5SegIter));
}

/*
** Do the comparison necessary to populate pIter->aFirst[iOut].
**







|
>
>
>
>
|
>










>







1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
      fts5DataRelease(pIter->pLeaf);
      pIter->pLeaf = 0;
    }
  }

  if( bGe==0 ){
    pIter->flags |= FTS5_SEGITER_ONETERM;
    if( pIter->pLeaf ){
      if( bDlidx ){
        fts5SegIterLoadDlidx(p, iIdx, pIter);
      }
      if( flags & FTS5INDEX_QUERY_ASC ){
        fts5SegIterReverse(p, iIdx, pIter);
      }
    }
  }
}

/*
** Zero the iterator passed as the only argument.
*/
static void fts5SegIterClear(Fts5SegIter *pIter){
  fts5BufferFree(&pIter->term);
  fts5DataRelease(pIter->pLeaf);
  fts5DlidxIterFree(pIter->pDlidx);
  sqlite3_free(pIter->aRowidOffset);
  memset(pIter, 0, sizeof(Fts5SegIter));
}

/*
** Do the comparison necessary to populate pIter->aFirst[iOut].
**
1546
1547
1548
1549
1550
1551
1552
1553
1554














































































1555
1556
1557
1558
1559
1560
1561





1562
1563




1564

1565
1566
1567
1568
1569
1570
1571
    int iEq;
    if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){
      fts5SegIterNext(p, &pIter->aSeg[iEq]);
      i = pIter->nSeg + iEq;
    }
  }
}

/*














































































** Move the iterator to the next entry. 
**
** If an error occurs, an error code is left in Fts5Index.rc. It is not 
** considered an error if the iterator reaches EOF, or if it is already at 
** EOF when this function is called.
*/
static void fts5MultiIterNext(Fts5Index *p, Fts5MultiSegIter *pIter){





  if( p->rc==SQLITE_OK ){
    int iFirst = pIter->aFirst[1];




    fts5SegIterNext(p, &pIter->aSeg[iFirst]);

    fts5MultiIterAdvanced(p, pIter, iFirst, 1);
  }
}

/*
** Allocate a new Fts5MultiSegIter object.
**









>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>






|
>
>
>
>
>


>
>
>
>
|
>







1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
    int iEq;
    if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){
      fts5SegIterNext(p, &pIter->aSeg[iEq]);
      i = pIter->nSeg + iEq;
    }
  }
}

/*
** Move the seg-iter so that it points to the first rowid on page iLeafPgno.
** It is an error if leaf iLeafPgno contains no rowid.
*/
static void fts5SegIterGotoPage(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegIter *pIter,             /* Iterator to advance */
  int iLeafPgno
){
  assert( iLeafPgno>pIter->iLeafPgno );
  if( p->rc==SQLITE_OK ){
    pIter->iLeafPgno = iLeafPgno-1;
    fts5SegIterNextPage(p, pIter);
    assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno );
  }

  if( p->rc==SQLITE_OK ){
    int iOff;
    u8 *a = pIter->pLeaf->p;
    int n = pIter->pLeaf->n;

    iOff = fts5GetU16(&a[0]);
    if( iOff<4 || iOff>=n ){
      p->rc = FTS5_CORRUPT;
    }else{
      iOff += getVarint(&a[iOff], (u64*)&pIter->iRowid);
      pIter->iLeafOffset = iOff;
    }
  }
}

/*
** Advance the iterator passed as the second argument until it is at or 
** past rowid iFrom. Regardless of the value of iFrom, the iterator is
** always advanced at least once.
*/
static void fts5SegIterNextFrom(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegIter *pIter,             /* Iterator to advance */
  i64 iMatch                      /* Advance iterator at least this far */
){
  int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
  Fts5DlidxIter *pDlidx = pIter->pDlidx;
  int iLeafPgno = pIter->iLeafPgno;

  assert( pIter->flags & FTS5_SEGITER_ONETERM );
  assert( pIter->pDlidx );
  assert( pIter->pLeaf );


  if( bRev==0 ){
    while( fts5DlidxIterEof(p, pDlidx)==0 && iMatch<pDlidx->iRowid ){
      if( pDlidx->bZero==0 ) iLeafPgno = pDlidx->iLeafPgno;
      fts5DlidxIterNext(pDlidx, 0);
    }
    assert( iLeafPgno>=pIter->iLeafPgno || p->rc );
    if( iLeafPgno>pIter->iLeafPgno ){
      fts5SegIterGotoPage(p, pIter, iLeafPgno);
    }
  }else if( 0 ){
    while( fts5DlidxIterEof(p, pDlidx)==0 && iMatch>pDlidx->iRowid ){
      fts5DlidxIterNext(pDlidx, 0);
      if( pDlidx->bZero==0 ) iLeafPgno = pDlidx->iLeafPgno;
    }
    assert( iLeafPgno<=pIter->iLeafPgno || p->rc );
    if( iLeafPgno<pIter->iLeafPgno ){
      fts5SegIterGotoPage(p, pIter, iLeafPgno);
    }
  }

  while( 1 ){
    fts5SegIterNext(p, pIter);
    if( pIter->pLeaf==0 ) break;
    if( bRev==0 && pIter->iRowid<=iMatch ) break;
    if( bRev!=0 && pIter->iRowid>=iMatch ) break;
  }
}

/*
** Move the iterator to the next entry. 
**
** If an error occurs, an error code is left in Fts5Index.rc. It is not 
** considered an error if the iterator reaches EOF, or if it is already at 
** EOF when this function is called.
*/
static void fts5MultiIterNext(
  Fts5Index *p, 
  Fts5MultiSegIter *pIter,
  int bFrom,                      /* True if argument iFrom is valid */
  i64 iFrom                       /* Advance at least as far as this */
){
  if( p->rc==SQLITE_OK ){
    int iFirst = pIter->aFirst[1];
    Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
    if( bFrom && pSeg->pDlidx ){
      fts5SegIterNextFrom(p, pSeg, iFrom);
    }else{
      fts5SegIterNext(p, pSeg);
    }
    fts5MultiIterAdvanced(p, pIter, iFirst, 1);
  }
}

/*
** Allocate a new Fts5MultiSegIter object.
**
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
static void fts5MultiIterNextFrom(
  Fts5Index *p, 
  Fts5MultiSegIter *pIter, 
  i64 iMatch
){
  while( 1 ){
    i64 iRowid;
    fts5MultiIterNext(p, pIter);
    if( fts5MultiIterEof(p, pIter) ) break;
    iRowid = fts5MultiIterRowid(pIter);
    if( pIter->bRev==0 && iRowid<=iMatch ) break;
    if( pIter->bRev!=0 && iRowid>=iMatch ) break;
  }
}








|







2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
static void fts5MultiIterNextFrom(
  Fts5Index *p, 
  Fts5MultiSegIter *pIter, 
  i64 iMatch
){
  while( 1 ){
    i64 iRowid;
    fts5MultiIterNext(p, pIter, 1, iMatch);
    if( fts5MultiIterEof(p, pIter) ) break;
    iRowid = fts5MultiIterRowid(pIter);
    if( pIter->bRev==0 && iRowid<=iMatch ) break;
    if( pIter->bRev!=0 && iRowid>=iMatch ) break;
  }
}

2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
#if 0
fprintf(stdout, "merging %d segments from level %d!", nInput, iLvl);
fflush(stdout);
#endif

  for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, 0, iLvl, nInput, &pIter);
      fts5MultiIterEof(p, pIter)==0;
      fts5MultiIterNext(p, pIter)
  ){
    Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1] ];
    Fts5ChunkIter sPos;           /* Used to iterate through position list */
    int nTerm;
    const u8 *pTerm = fts5MultiIterTerm(pIter, &nTerm);

    if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){







|







2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
#if 0
fprintf(stdout, "merging %d segments from level %d!", nInput, iLvl);
fflush(stdout);
#endif

  for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, 0, iLvl, nInput, &pIter);
      fts5MultiIterEof(p, pIter)==0;
      fts5MultiIterNext(p, pIter, 0, 0)
  ){
    Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1] ];
    Fts5ChunkIter sPos;           /* Used to iterate through position list */
    int nTerm;
    const u8 *pTerm = fts5MultiIterTerm(pIter, &nTerm);

    if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
      pLvl->pData = 0;
    }
  }
  sqlite3_free(pIter->aLvl);
  fts5BufferFree(&pIter->term);
}

typedef struct DoclistIdxIter DoclistIdxIter;
struct DoclistIdxIter {
  Fts5Data *pDlidx;             /* Data for doclist index, if any */
  int iOff;                     /* Current offset into pDlidx */
  int bRowidValid;              /* iRowid is valid */

  int bZero;                    /* True if current leaf has no rowid */
  i64 iRowid;                   /* If bZero==0, first rowid on leaf */
};

/*
** Return non-zero if EOF is reached.
*/
static int fts5IndexDoclistIterNext(DoclistIdxIter *pIter){
  i64 iVal;
  if( pIter->iOff>=pIter->pDlidx->n ) return 1;
  pIter->iOff += getVarint(&pIter->pDlidx->p[pIter->iOff], (u64*)&iVal);
  if( iVal==0 ){
    pIter->bZero = 1;
  }else{
    pIter->bZero = 0;
    if( pIter->bRowidValid ){
      pIter->iRowid -= iVal;
    }else{
      pIter->bRowidValid = 1;
      pIter->iRowid = iVal;
    }
  }
  return 0;
}

static void fts5IndexIntegrityCheckSegment(
  Fts5Index *p,                   /* FTS5 backend object */
  int iIdx,                       /* Index that pSeg is a part of */
  Fts5StructureSegment *pSeg      /* Segment to check internal consistency */
){
  Fts5BtreeIter iter;             /* Used to iterate through b-tree hierarchy */

  /* Iterate through the b-tree hierarchy.  */
  for(fts5BtreeIterInit(p, iIdx, pSeg, &iter);
      iter.bEof==0;
      fts5BtreeIterNext(&iter)
  ){
    i64 iRow;                     /* Rowid for this leaf */
    Fts5Data *pLeaf;              /* Data for this leaf */
    int iOff;                     /* Offset of first term on leaf */
    int i;                        /* Used to iterate through empty leaves */
    DoclistIdxIter dliter;        /* For iterating through any doclist index */

    /* If the leaf in question has already been trimmed from the segment, 
    ** ignore this b-tree entry. Otherwise, load it into memory. */
    if( iter.iLeaf<pSeg->pgnoFirst ) continue;
    iRow = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, 0, iter.iLeaf);
    pLeaf = fts5DataRead(p, iRow);
    if( pLeaf==0 ) break;







<
<
<
<
<

<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<

















<







3373
3374
3375
3376
3377
3378
3379





3380
























3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397

3398
3399
3400
3401
3402
3403
3404
      pLvl->pData = 0;
    }
  }
  sqlite3_free(pIter->aLvl);
  fts5BufferFree(&pIter->term);
}
































static void fts5IndexIntegrityCheckSegment(
  Fts5Index *p,                   /* FTS5 backend object */
  int iIdx,                       /* Index that pSeg is a part of */
  Fts5StructureSegment *pSeg      /* Segment to check internal consistency */
){
  Fts5BtreeIter iter;             /* Used to iterate through b-tree hierarchy */

  /* Iterate through the b-tree hierarchy.  */
  for(fts5BtreeIterInit(p, iIdx, pSeg, &iter);
      iter.bEof==0;
      fts5BtreeIterNext(&iter)
  ){
    i64 iRow;                     /* Rowid for this leaf */
    Fts5Data *pLeaf;              /* Data for this leaf */
    int iOff;                     /* Offset of first term on leaf */
    int i;                        /* Used to iterate through empty leaves */


    /* If the leaf in question has already been trimmed from the segment, 
    ** ignore this b-tree entry. Otherwise, load it into memory. */
    if( iter.iLeaf<pSeg->pgnoFirst ) continue;
    iRow = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, 0, iter.iLeaf);
    pLeaf = fts5DataRead(p, iRow);
    if( pLeaf==0 ) break;
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095

3096





3097
3098







3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110

3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
      if( res<0 ){
        p->rc = FTS5_CORRUPT;
      }
    }
    fts5DataRelease(pLeaf);
    if( p->rc ) break;

    memset(&dliter, 0, sizeof(DoclistIdxIter));
    if( iter.bDlidx ){
      i64 iDlidxRowid = FTS5_DOCLIST_IDX_ROWID(iIdx, pSeg->iSegid, iter.iLeaf);
      dliter.pDlidx = fts5DataRead(p, iDlidxRowid);
    }

    /* Now check that the iter.nEmpty leaves following the current leaf
    ** (a) exist and (b) contain no terms. */
    for(i=1; i<=iter.nEmpty; i++){
      pLeaf = fts5DataRead(p, iRow+i);
      if( pLeaf && 0!=fts5GetU16(&pLeaf->p[2]) ){
        p->rc = FTS5_CORRUPT;
      }
      if( pLeaf && dliter.pDlidx ){

        if( fts5IndexDoclistIterNext(&dliter) ){





          p->rc = FTS5_CORRUPT;
        }else{







          int iRowidOff = fts5GetU16(&pLeaf->p[0]);
          if( dliter.bZero ){
            if( iRowidOff!=0 ) p->rc = FTS5_CORRUPT;
          }else{
            i64 iRowid;
            getVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid);
            if( iRowid!=dliter.iRowid ) p->rc = FTS5_CORRUPT;
          }
        }
      }
      fts5DataRelease(pLeaf);
    }


    /* There may (or may not be) a final entry in the doclist. The entry
    ** is only present if the page following the nEmpty termless pages
    ** (a) exists and (b) contains at least one rowid that is part of
    ** the doclist.  */
    if( dliter.pDlidx ){
      if( (iter.iLeaf + iter.nEmpty)==pSeg->pgnoLast ){
        /* The next page does not exist. So the iterator should be at EOF. */
        if( fts5IndexDoclistIterNext(&dliter)==0 ) p->rc = FTS5_CORRUPT;
      }else{
        Fts5Data *pLeaf = fts5DataRead(p, iRow+i);
        if( pLeaf ){
          int iRowidOff = fts5GetU16(&pLeaf->p[0]);
          if( iRowidOff==0 ){
            if( fts5IndexDoclistIterNext(&dliter)==0 ) p->rc = FTS5_CORRUPT;
          }else{
            if( fts5IndexDoclistIterNext(&dliter) ){
              p->rc = FTS5_CORRUPT;
            }else{
              i64 iRowid;
              getVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid);
              if( iRowid!=dliter.iRowid ) p->rc = FTS5_CORRUPT;
            }
          }
          fts5DataRelease(pLeaf);
        }
      }
    }

    fts5DataRelease(dliter.pDlidx);
  }

  if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){
    p->rc = FTS5_CORRUPT;
  }

  fts5BtreeIterFree(&iter);







<
<
<
<
<



|




|
>
|
>
>
>
>
>
|
|
>
>
>
>
>
>
>

|




|

<
<
|
|
>
|
|
<
<
|
<
|
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
|
<
|
|
<
<
<
<







3417
3418
3419
3420
3421
3422
3423





3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456


3457
3458
3459
3460
3461


3462

3463










3464




3465

3466
3467




3468
3469
3470
3471
3472
3473
3474
      if( res<0 ){
        p->rc = FTS5_CORRUPT;
      }
    }
    fts5DataRelease(pLeaf);
    if( p->rc ) break;







    /* Now check that the iter.nEmpty leaves following the current leaf
    ** (a) exist and (b) contain no terms. */
    for(i=1; p->rc==SQLITE_OK && i<=iter.nEmpty; i++){
      pLeaf = fts5DataRead(p, iRow+i);
      if( pLeaf && 0!=fts5GetU16(&pLeaf->p[2]) ){
        p->rc = FTS5_CORRUPT;
      }
      fts5DataRelease(pLeaf);
    }

    /* If there is a doclist-index, check that it looks right. */
    if( iter.bDlidx ){
      Fts5DlidxIter *pDlidx = 0;  /* For iterating through doclist index */
      int nEntry = 0;
      int iSegid = pSeg->iSegid;
      int bRev = 0;

      for(fts5DlidxIterInit(p, bRev, iIdx, iSegid, iter.iLeaf, &pDlidx);
          fts5DlidxIterEof(p, pDlidx)==0;
          fts5DlidxIterNext(pDlidx, bRev)
      ){
        i64 iKey = FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, pDlidx->iLeafPgno);
        pLeaf = fts5DataRead(p, iKey);
        if( pLeaf ){
          int iRowidOff = fts5GetU16(&pLeaf->p[0]);
          if( pDlidx->bZero ){
            if( iRowidOff!=0 ) p->rc = FTS5_CORRUPT;
          }else{
            i64 iRowid;
            getVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid);
            if( iRowid!=pDlidx->iRowid ) p->rc = FTS5_CORRUPT;
          }


          fts5DataRelease(pLeaf);
        }
        nEntry++;
      }



      /* Check that the doclist-index was the right length */

      if( p->rc==SQLITE_OK && nEntry!=iter.nEmpty && nEntry!=iter.nEmpty+1 ){










        p->rc = FTS5_CORRUPT;




      }

      fts5DlidxIterFree(pDlidx);
    }




  }

  if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){
    p->rc = FTS5_CORRUPT;
  }

  fts5BtreeIterFree(&iter);
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179

  /* Check that the checksum of the index matches the argument checksum */
  for(iIdx=0; iIdx<=pConfig->nPrefix; iIdx++){
    Fts5MultiSegIter *pIter;
    Fts5Structure *pStruct = fts5StructureRead(p, iIdx);
    for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, 0, -1, 0, &pIter);
        fts5MultiIterEof(p, pIter)==0;
        fts5MultiIterNext(p, pIter)
    ){
      Fts5PosIter sPos;           /* Used to iterate through position list */
      int n;                      /* Size of term in bytes */
      i64 iRowid = fts5MultiIterRowid(pIter);
      char *z = (char*)fts5MultiIterTerm(pIter, &n);

      for(fts5PosIterInit(p, pIter, &sPos);







|







3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506

  /* Check that the checksum of the index matches the argument checksum */
  for(iIdx=0; iIdx<=pConfig->nPrefix; iIdx++){
    Fts5MultiSegIter *pIter;
    Fts5Structure *pStruct = fts5StructureRead(p, iIdx);
    for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, 0, -1, 0, &pIter);
        fts5MultiIterEof(p, pIter)==0;
        fts5MultiIterNext(p, pIter, 0, 0)
    ){
      Fts5PosIter sPos;           /* Used to iterate through position list */
      int n;                      /* Size of term in bytes */
      i64 iRowid = fts5MultiIterRowid(pIter);
      char *z = (char*)fts5MultiIterTerm(pIter, &n);

      for(fts5PosIterInit(p, pIter, &sPos);
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
    }
    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
  }

  fts5StructureRelease(p);
}

/*
** Decode a segment-data rowid from the %_data table. This function is
** the opposite of macro FTS5_SEGMENT_ROWID().
*/
static void fts5DecodeRowid(
  i64 iRowid,                     /* Rowid from %_data table */
  int *piIdx,                     /* OUT: Index */
  int *piSegid,                   /* OUT: Segment id */
  int *piHeight,                  /* OUT: Height */
  int *piPgno                     /* OUT: Page number */
){
  *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1));
  iRowid >>= FTS5_DATA_PAGE_B;

  *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1));
  iRowid >>= FTS5_DATA_HEIGHT_B;

  *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1));
  iRowid >>= FTS5_DATA_ID_B;

  *piIdx = (int)(iRowid & (((i64)1 << FTS5_DATA_IDX_B) - 1));
}

/*
** Buffer (a/n) is assumed to contain a list of serialized varints. Read
** each varint and append its string representation to buffer pBuf. Return
** after either the input buffer is exhausted or a 0 value is read.
**
** The return value is the number of bytes read from the input buffer.
*/







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







3573
3574
3575
3576
3577
3578
3579























3580
3581
3582
3583
3584
3585
3586
    }
    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
  }

  fts5StructureRelease(p);
}
























/*
** Buffer (a/n) is assumed to contain a list of serialized varints. Read
** each varint and append its string representation to buffer pBuf. Return
** after either the input buffer is exhausted or a 0 value is read.
**
** The return value is the number of bytes read from the input buffer.
*/
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345

3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
*/
static void fts5DecodeFunction(
  sqlite3_context *pCtx,          /* Function call context */
  int nArg,                       /* Number of args (always 2) */
  sqlite3_value **apVal           /* Function arguments */
){
  i64 iRowid;                     /* Rowid for record being decoded */
  int iIdx,iSegid,iHeight,iPgno;  /* Rowid compenents */
  const u8 *a; int n;             /* Record to decode */
  Fts5Buffer s;                   /* Build up text to return here */
  int rc = SQLITE_OK;             /* Return code */

  assert( nArg==2 );
  memset(&s, 0, sizeof(Fts5Buffer));
  iRowid = sqlite3_value_int64(apVal[0]);
  n = sqlite3_value_bytes(apVal[1]);
  a = sqlite3_value_blob(apVal[1]);
  fts5DecodeRowid(iRowid, &iIdx, &iSegid, &iHeight, &iPgno);


  if( iHeight==FTS5_SEGMENT_MAX_HEIGHT ){
    int i = 0;
    i64 iPrev;
    sqlite3Fts5BufferAppendPrintf(&rc, &s, "(dlidx idx=%d segid=%d pgno=%d)",
        iIdx, iSegid, iPgno
    );
    if( n>0 ){
      i = getVarint(&a[i], (u64*)&iPrev);
      sqlite3Fts5BufferAppendPrintf(&rc, &s, " %lld", iPrev);
    }
    while( i<n ){
      i64 iVal;
      i += getVarint(&a[i], (u64*)&iVal);
      if( iVal==0 ){
        sqlite3Fts5BufferAppendPrintf(&rc, &s, " x");
      }else{
        iPrev = iPrev - iVal;
        sqlite3Fts5BufferAppendPrintf(&rc, &s, " %lld", iPrev);
      }
    }

  }else
  if( iSegid==0 ){
    if( iRowid==FTS5_AVERAGES_ROWID ){
      sqlite3Fts5BufferAppendPrintf(&rc, &s, "{averages} ");
    }else{
      sqlite3Fts5BufferAppendPrintf(&rc, &s, 
          "{structure idx=%d}", (int)(iRowid-10)
      );
      fts5DecodeStructure(&rc, &s, a, n);
    }
  }else{

    Fts5Buffer term;
    memset(&term, 0, sizeof(Fts5Buffer));
    sqlite3Fts5BufferAppendPrintf(&rc, &s, "(idx=%d segid=%d h=%d pgno=%d) ",
        iIdx, iSegid, iHeight, iPgno
    );

    if( iHeight==0 ){
      int iTermOff = 0;
      int iRowidOff = 0;
      int iOff;
      int nKeep = 0;








|











>



<
<
<


















|

<
<
<






<
<
<







3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653



3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673



3674
3675
3676
3677
3678
3679



3680
3681
3682
3683
3684
3685
3686
*/
static void fts5DecodeFunction(
  sqlite3_context *pCtx,          /* Function call context */
  int nArg,                       /* Number of args (always 2) */
  sqlite3_value **apVal           /* Function arguments */
){
  i64 iRowid;                     /* Rowid for record being decoded */
  int iIdx,iSegid,iHeight,iPgno;  /* Rowid components */
  const u8 *a; int n;             /* Record to decode */
  Fts5Buffer s;                   /* Build up text to return here */
  int rc = SQLITE_OK;             /* Return code */

  assert( nArg==2 );
  memset(&s, 0, sizeof(Fts5Buffer));
  iRowid = sqlite3_value_int64(apVal[0]);
  n = sqlite3_value_bytes(apVal[1]);
  a = sqlite3_value_blob(apVal[1]);
  fts5DecodeRowid(iRowid, &iIdx, &iSegid, &iHeight, &iPgno);

  fts5DebugRowid(&rc, &s, iRowid);
  if( iHeight==FTS5_SEGMENT_MAX_HEIGHT ){
    int i = 0;
    i64 iPrev;



    if( n>0 ){
      i = getVarint(&a[i], (u64*)&iPrev);
      sqlite3Fts5BufferAppendPrintf(&rc, &s, " %lld", iPrev);
    }
    while( i<n ){
      i64 iVal;
      i += getVarint(&a[i], (u64*)&iVal);
      if( iVal==0 ){
        sqlite3Fts5BufferAppendPrintf(&rc, &s, " x");
      }else{
        iPrev = iPrev - iVal;
        sqlite3Fts5BufferAppendPrintf(&rc, &s, " %lld", iPrev);
      }
    }

  }else
  if( iSegid==0 ){
    if( iRowid==FTS5_AVERAGES_ROWID ){
      /* todo */
    }else{



      fts5DecodeStructure(&rc, &s, a, n);
    }
  }else{

    Fts5Buffer term;
    memset(&term, 0, sizeof(Fts5Buffer));




    if( iHeight==0 ){
      int iTermOff = 0;
      int iRowidOff = 0;
      int iOff;
      int nKeep = 0;

3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
    i64 iLastRowid;
    Fts5MultiSegIter *p1 = 0;     /* Iterator used to gather data from index */
    Fts5Buffer doclist;

    memset(&doclist, 0, sizeof(doclist));
    for(fts5MultiIterNew(p, pStruct, 0, 1, pToken, nToken, -1, 0, &p1);
        fts5MultiIterEof(p, p1)==0;
        fts5MultiIterNext(p, p1)
    ){
      i64 iRowid = fts5MultiIterRowid(p1);
      int nTerm;
      const u8 *pTerm = fts5MultiIterTerm(p1, &nTerm);
      assert( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 );
      if( nTerm<nToken || memcmp(pToken, pTerm, nToken) ) break;








|







3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
    i64 iLastRowid;
    Fts5MultiSegIter *p1 = 0;     /* Iterator used to gather data from index */
    Fts5Buffer doclist;

    memset(&doclist, 0, sizeof(doclist));
    for(fts5MultiIterNew(p, pStruct, 0, 1, pToken, nToken, -1, 0, &p1);
        fts5MultiIterEof(p, p1)==0;
        fts5MultiIterNext(p, p1, 0, 0)
    ){
      i64 iRowid = fts5MultiIterRowid(p1);
      int nTerm;
      const u8 *pTerm = fts5MultiIterTerm(p1, &nTerm);
      assert( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 );
      if( nTerm<nToken || memcmp(pToken, pTerm, nToken) ) break;

3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
** Move to the next matching rowid. 
*/
void sqlite3Fts5IterNext(Fts5IndexIter *pIter){
  if( pIter->pDoclist ){
    fts5DoclistIterNext(pIter->pDoclist);
  }else{
    fts5BufferZero(&pIter->poslist);
    fts5MultiIterNext(pIter->pIndex, pIter->pMulti);
  }
}

/*
** Move to the next matching rowid that occurs at or after iMatch. The
** definition of "at or after" depends on whether this iterator iterates
** in ascending or descending rowid order.







|







4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
** Move to the next matching rowid. 
*/
void sqlite3Fts5IterNext(Fts5IndexIter *pIter){
  if( pIter->pDoclist ){
    fts5DoclistIterNext(pIter->pDoclist);
  }else{
    fts5BufferZero(&pIter->poslist);
    fts5MultiIterNext(pIter->pIndex, pIter->pMulti, 0, 0);
  }
}

/*
** Move to the next matching rowid that occurs at or after iMatch. The
** definition of "at or after" depends on whether this iterator iterates
** in ascending or descending rowid order.
Changes to test/fts5aa.test.
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
}
do_execsql_test 2.1 {
  INSERT INTO t1 VALUES('a b c', 'd e f');
}
do_execsql_test 2.2 {
  SELECT fts5_decode(id, block) FROM t1_data WHERE id==10
} {
  {{structure idx=0} {lvl=0 nMerge=0 {id=27723 h=1 leaves=1..1}}}
}
do_execsql_test 2.3 {
  INSERT INTO t1(t1) VALUES('integrity-check');
}

#-------------------------------------------------------------------------
#







|







46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
}
do_execsql_test 2.1 {
  INSERT INTO t1 VALUES('a b c', 'd e f');
}
do_execsql_test 2.2 {
  SELECT fts5_decode(id, block) FROM t1_data WHERE id==10
} {
  {(structure idx=0) {lvl=0 nMerge=0 {id=27723 h=1 leaves=1..1}}}
}
do_execsql_test 2.3 {
  INSERT INTO t1(t1) VALUES('integrity-check');
}

#-------------------------------------------------------------------------
#
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
      set y [doc]
      set z [doc]
      set rowid [expr int(rand() * 100)]
      execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) }
    }
    execsql { INSERT INTO t1(t1) VALUES('integrity-check'); }
  } {}
  if {[set_test_counter errors]} exit
}

#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 8.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x, prefix="1,2,3");







<







178
179
180
181
182
183
184

185
186
187
188
189
190
191
      set y [doc]
      set z [doc]
      set rowid [expr int(rand() * 100)]
      execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) }
    }
    execsql { INSERT INTO t1(t1) VALUES('integrity-check'); }
  } {}

}

#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 8.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x, prefix="1,2,3");
Changes to test/fts5ah.test.
51
52
53
54
55
56
57
58
59

60
61







62
63




64



65

66
67





68
69
70

proc reads {} {
  db one {SELECT t1 FROM t1 WHERE t1 MATCH '*reads'}
}

do_test 1.4 {
  set nRead [reads]
  db eval { SELECT rowid FROM t1 WHERE t1 MATCH 'x' }
  set a [expr [reads] - $nRead]

} {}








do_test 1.5 {
  set nRead [reads]




  db eval { SELECT rowid FROM t1 WHERE t1 MATCH 'x + w' }



  set a [expr [reads] - $nRead]

} {}







finish_test








|
|
>
|

>
>
>
>
>
>
>
|
|
>
>
>
>
|
>
>
>
|
>
|

>
>
>
>
>



51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91

proc reads {} {
  db one {SELECT t1 FROM t1 WHERE t1 MATCH '*reads'}
}

do_test 1.4 {
  set nRead [reads]
  execsql { SELECT rowid FROM t1 WHERE t1 MATCH 'x' }
  set nReadX [expr [reads] - $nRead]
  expr $nReadX>1000
} {1}

foreach {tn q res} "
  1 { SELECT rowid FROM t1 WHERE t1 MATCH 'w + x'   }  [list $W]
  2 { SELECT rowid FROM t1 WHERE t1 MATCH 'x + w'   }  [list $W]
  3 { SELECT rowid FROM t1 WHERE t1 MATCH 'x AND w' }  [list $W]
  4 { SELECT rowid FROM t1 WHERE t1 MATCH 'y AND x' }  [list $Y]
" {

  do_test 1.5.$tn.1 {
    set nRead [reads]
    execsql $q
    set n [expr [reads] - $nRead]
    expr {$n < ($nReadX / 10)}
  } {1}

  do_test 1.5.$tn.2 {
    set nRead [reads]
    execsql "$q ORDER BY rowid ASC"
    set n [expr [reads] - $nRead]
    expr {$n < ($nReadX / 10)}
  } {1}

  do_execsql_test 1.5.$tn.3 $q [lsort -int -decr $res]
  do_execsql_test 1.5.$tn.4 "$q ORDER BY rowid ASC" [lsort -int -incr $res]
}

#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}

finish_test