SQLite

Check-in [d028ba6589]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Use doclist-indexes with "ORDER BY rowid ASC" fts5 queries as well.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | fts5
Files: files | file ages | folders
SHA1: d028ba6589f3122b635474c2683c0f93d5bc6c7c
User & Date: dan 2014-08-05 19:00:22.438
Context
2014-08-05
19:35
Use doclist indexes for AND queries as well as phrases. (check-in: 5d38e6edc4 user: dan tags: fts5)
19:00
Use doclist-indexes with "ORDER BY rowid ASC" fts5 queries as well. (check-in: d028ba6589 user: dan tags: fts5)
2014-08-04
20:07
Fix fts5_index.c to use doclist-indexes when possible. Only some cases work so far. (check-in: 90b82d3ef6 user: dan tags: fts5)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts5/fts5_index.c.
546
547
548
549
550
551
552


553




554
555
556
557
558
559

560
561
562
563
564
565
566
567
568
569
570
571
};

/*
** An instance of the following type is used to iterate through the contents
** of a doclist-index record.
**
** pData:


**   A reference to the dlidx record.




*/
struct Fts5DlidxIter {
  Fts5Data *pData;              /* Data for doclist index, if any */
  int iOff;                     /* Current offset into pDlidx */
  int bRowidValid;              /* iRowid is valid */
  int bEof;                     /* At EOF already */


  /* Output variables */
  int iLeafPgno;                /* Page number of current leaf page */
  int bZero;                    /* True if current leaf has no rowids */
  i64 iRowid;                   /* If bZero==0, first rowid on leaf */
};


/*
** An Fts5BtreeIter object is used to iterate through all entries in the
** b-tree hierarchy belonging to a single fts5 segment. In this case the
** "b-tree hierarchy" is all b-tree nodes except leaves. Each entry in the







>
>
|
>
>
>
>




<

>



<
|







546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563

564
565
566
567
568

569
570
571
572
573
574
575
576
};

/*
** An instance of the following type is used to iterate through the contents
** of a doclist-index record.
**
** pData:
**   Record containing the doclist-index data.
**
** bEof:
**   Set to true once iterator has reached EOF.
**
** iOff:
**   Set to the current offset within record pData.
*/
struct Fts5DlidxIter {
  Fts5Data *pData;              /* Data for doclist index, if any */
  int iOff;                     /* Current offset into pDlidx */

  int bEof;                     /* At EOF already */
  int iFirstOff;                /* Used by reverse iterators only */

  /* Output variables */
  int iLeafPgno;                /* Page number of current leaf page */

  i64 iRowid;                   /* First rowid on leaf iLeafPgno */
};


/*
** An Fts5BtreeIter object is used to iterate through all entries in the
** b-tree hierarchy belonging to a single fts5 segment. In this case the
** "b-tree hierarchy" is all b-tree nodes except leaves. Each entry in the
1120
1121
1122
1123
1124
1125
1126










1127























1128
1129


1130





1131























1132
1133




1134


1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177

1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
** Free any memory allocated by the iterator object.
*/
static void fts5NodeIterFree(Fts5NodeIter *pIter){
  fts5BufferFree(&pIter->term);
}

/*










** Return non-zero if EOF is reached.























*/
static int fts5DlidxIterNext(Fts5DlidxIter *pIter, int bRev){


  if( bRev ){





    i64 iVal;























    int iOff = pIter->iOff;
    int iLimit;




    u8 *a = pIter->pData->p;



    /* Currently iOff points to the first byte of a varint. This block 
    ** decrements iOff until it points to the first byte of the previous 
    ** varint. Taking care not to read any memory locations that occur
    ** before the buffer in memory.  */
    iLimit = (iOff>9 ? iOff-9 : 0);
    for(iOff--; iOff>iLimit; iOff--){
      if( (a[iOff-1] & 0x80)==0 ) break;
    }
    pIter->iOff = iOff;

    if( iOff<=0 ){
      pIter->bEof = 1;
      return 1;
    }

    getVarint(&a[iOff], (u64*)&iVal);
    if( iVal==0 ){
      pIter->bZero = 1;
    }else if( iOff==0 ){
      pIter->iRowid = iVal;
    }else{
      pIter->iRowid += iVal;
    }
    pIter->iLeafPgno--;
  }else{
    i64 iVal;
    if( pIter->iOff>=pIter->pData->n ){
      pIter->bEof = 1;
      return 1;
    }
    pIter->iOff += getVarint(&pIter->pData->p[pIter->iOff], (u64*)&iVal);
    if( iVal==0 ){
      pIter->bZero = 1;
    }else{
      pIter->bZero = 0;
      if( pIter->bRowidValid ){
        pIter->iRowid -= iVal;
      }else{
        pIter->bRowidValid = 1;
        pIter->iRowid = iVal;
      }
    }

    pIter->iLeafPgno++;
  }
  return 0;
}

static void fts5DlidxIterLast(Fts5DlidxIter *pIter){
  while( 0==fts5DlidxIterNext(pIter, 0) );
  assert( pIter->iOff==pIter->pData->n && pIter->bEof==1 );
  pIter->bEof = 0;
}

static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){
  return (p->rc!=SQLITE_OK || pIter->bEof);
}

static void fts5DlidxIterInit(
  Fts5Index *p,                   /* Fts5 Backend to iterate within */
  int bRev,                       /* True for ORDER BY ASC */
  int iIdx, int iSegid,           /* Segment iSegid within index iIdx */
  int iLeafPgno,                  /* Leaf page number to load dlidx for */







>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

|
>
>
|
>
>
>
>
>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
>
>
>
>

>
>









<

<
<
<
<
<

<
<
<
<
<
|
<

<
<
<
<
<
|
<
|
<
<
<
<
<
<
<
<
<
<
>
|
|
<
<
<
<
<
<
|
|

<
|







1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217

1218





1219





1220

1221





1222

1223










1224
1225
1226






1227
1228
1229

1230
1231
1232
1233
1234
1235
1236
1237
** Free any memory allocated by the iterator object.
*/
static void fts5NodeIterFree(Fts5NodeIter *pIter){
  fts5BufferFree(&pIter->term);
}

/*
** The iterator passed as the first argument has the following fields set
** as follows. This function sets up the rest of the iterator so that it
** points to the first rowid in the doclist-index.
**
**   pData: pointer to doclist-index record, 
**   iLeafPgno: page number that this doclist-index is associated with.
*/
static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){
  Fts5Data *pData = pIter->pData;
  int i;

  assert( pIter->pData );
  assert( pIter->iLeafPgno>0 );

  /* Count the number of leading 0x00 bytes. Then set iLeafPgno. */
  for(i=0; i<pData->n; i++){ 
    if( pData->p[i] ) break;
  }
  pIter->iLeafPgno += (i+1);
  pIter->iOff = i;

  /* Unless we are already at the end of the doclist-index, load the first
  ** rowid value.  */
  if( pIter->iOff<pData->n ){
    pIter->iOff += getVarint(&pData->p[pIter->iOff], (u64*)&pIter->iRowid);
  }else{
    pIter->bEof = 1;
  }
  pIter->iFirstOff = pIter->iOff;
  return pIter->bEof;
}

/*
** Advance the iterator passed as the only argument.
*/
static int fts5DlidxIterNext(Fts5DlidxIter *pIter){
  Fts5Data *pData = pIter->pData;
  int iOff;

  for(iOff=pIter->iOff; iOff<pData->n; iOff++){
    if( pData->p[iOff] ) break; 
  }

  if( iOff<pData->n ){
    i64 iVal;
    pIter->iLeafPgno += (iOff - pIter->iOff) + 1;
    iOff += getVarint(&pData->p[iOff], (u64*)&iVal);
    pIter->iRowid -= iVal;
    pIter->iOff = iOff;
  }else{
    pIter->bEof = 1;
  }

  return pIter->bEof;
}

static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){
  return (p->rc!=SQLITE_OK || pIter->bEof);
}

static void fts5DlidxIterLast(Fts5DlidxIter *pIter){
  if( fts5DlidxIterFirst(pIter)==0 ){
    while( 0==fts5DlidxIterNext(pIter) );
    pIter->bEof = 0;
  }
}

static int fts5DlidxIterPrev(Fts5DlidxIter *pIter){
  int iOff = pIter->iOff;

  assert( pIter->bEof==0 );
  if( iOff<=pIter->iFirstOff ){
    pIter->bEof = 1;
  }else{
    u8 *a = pIter->pData->p;
    i64 iVal;
    int iLimit;

    /* Currently iOff points to the first byte of a varint. This block 
    ** decrements iOff until it points to the first byte of the previous 
    ** varint. Taking care not to read any memory locations that occur
    ** before the buffer in memory.  */
    iLimit = (iOff>9 ? iOff-9 : 0);
    for(iOff--; iOff>iLimit; iOff--){
      if( (a[iOff-1] & 0x80)==0 ) break;
    }







    getVarint(&a[iOff], (u64*)&iVal);





    pIter->iRowid += iVal;

    pIter->iLeafPgno--;







    while( a[iOff-1]==0x00 ){










      iOff--;
      pIter->iLeafPgno--;
    }






    pIter->iOff = iOff;
  }


  return pIter->bEof;
}

static void fts5DlidxIterInit(
  Fts5Index *p,                   /* Fts5 Backend to iterate within */
  int bRev,                       /* True for ORDER BY ASC */
  int iIdx, int iSegid,           /* Segment iSegid within index iIdx */
  int iLeafPgno,                  /* Leaf page number to load dlidx for */
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
      return;
    }
  }else{
    memset(pIter, 0, sizeof(Fts5DlidxIter));
  }

  pIter->pData = pDlidx;

  pIter->iLeafPgno = iLeafPgno;
  if( bRev==0 ){
    fts5DlidxIterNext(pIter, 0);
  }else{
    fts5DlidxIterLast(pIter);
  }
}

/*
** Free a doclist-index iterator object allocated by fts5DlidxIterInit().







<


|







1249
1250
1251
1252
1253
1254
1255

1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
      return;
    }
  }else{
    memset(pIter, 0, sizeof(Fts5DlidxIter));
  }

  pIter->pData = pDlidx;

  pIter->iLeafPgno = iLeafPgno;
  if( bRev==0 ){
    fts5DlidxIterFirst(pIter);
  }else{
    fts5DlidxIterLast(pIter);
  }
}

/*
** Free a doclist-index iterator object allocated by fts5DlidxIterInit().
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
        pIter->iLeafOffset = iOff = pIter->aRowidOffset[pIter->iRowidOffset];
        iOff += getVarint32(&a[iOff], nPos);
        iOff += nPos;
        getVarint(&a[iOff], (u64*)&iDelta);
        pIter->iRowid += iDelta;
      }else{
        fts5SegIterReverseNewPage(p, pIter);
#if 0
        fts5DataRelease(pIter->pLeaf);
        pIter->pLeaf = 0;
        while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){
          Fts5Data *pNew;
          pIter->iLeafPgno--;
          pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(
                pIter->iIdx, pIter->pSeg->iSegid, 0, pIter->iLeafPgno
          ));
          if( pNew ){
            if( pIter->iLeafPgno==pIter->iTermLeafPgno ){
              if( pIter->iTermLeafOffset<pNew->n ){
                pIter->pLeaf = pNew;
                pIter->iLeafOffset = pIter->iTermLeafOffset;
              }
            }else{
              int iRowidOff, dummy;
              fts5LeafHeader(pNew, &iRowidOff, &dummy);
              if( iRowidOff ){
                pIter->pLeaf = pNew;
                pIter->iLeafOffset = iRowidOff;
              }
            }

            if( pIter->pLeaf ){
              u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset];
              pIter->iLeafOffset += getVarint(a, (u64*)&pIter->iRowid);
              break;
            }else{
              fts5DataRelease(pNew);
            }
          }
        }

        if( pIter->pLeaf ){
          fts5SegIterReverseInitPage(p, pIter);
        }
#endif
      }
    }else{
      Fts5Data *pLeaf = pIter->pLeaf;
      int iOff;
      int bNewTerm = 0;
      int nKeep = 0;








<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







1473
1474
1475
1476
1477
1478
1479






































1480
1481
1482
1483
1484
1485
1486
        pIter->iLeafOffset = iOff = pIter->aRowidOffset[pIter->iRowidOffset];
        iOff += getVarint32(&a[iOff], nPos);
        iOff += nPos;
        getVarint(&a[iOff], (u64*)&iDelta);
        pIter->iRowid += iDelta;
      }else{
        fts5SegIterReverseNewPage(p, pIter);






































      }
    }else{
      Fts5Data *pLeaf = pIter->pLeaf;
      int iOff;
      int bNewTerm = 0;
      int nKeep = 0;

1551
1552
1553
1554
1555
1556
1557





1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594

1595
1596
1597
1598
1599
1600
1601
  int iOff = pIter->iLeafOffset;  /* Byte offset within current leaf */
  Fts5Data *pLast = 0;
  int pgnoLast = 0;

  /* Move to the page that contains the last rowid in this doclist. */
  pLeaf = pIter->pLeaf;






  while( iOff<pLeaf->n ){
    int nPos;
    i64 iDelta;

    /* Position list size in bytes */
    iOff += getVarint32(&pLeaf->p[iOff], nPos);
    iOff += nPos;
    if( iOff>=pLeaf->n ) break;

    /* Rowid delta. Or, if 0x00, the end of doclist marker. */
    nPos = getVarint(&pLeaf->p[iOff], (u64*)&iDelta);
    if( iDelta==0 ) break;
    iOff += nPos;
  }

  if( iOff>=pLeaf->n ){
    Fts5StructureSegment *pSeg = pIter->pSeg;
    i64 iAbs = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, 0, pIter->iLeafPgno);
    i64 iLast = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, 0, pSeg->pgnoLast);

    /* The last rowid in the doclist may not be on the current page. Search
    ** forward to find the page containing the last rowid.  */
    for(iAbs++; p->rc==SQLITE_OK && iAbs<=iLast; iAbs++){
      Fts5Data *pNew = fts5DataRead(p, iAbs);
      if( pNew ){
        int iRowid, iTerm;
        fts5LeafHeader(pNew, &iRowid, &iTerm);
        if( iRowid ){
          Fts5Data *pTmp = pLast;
          pLast = pNew;
          pNew = pTmp;
          pgnoLast = iAbs & (((i64)1 << FTS5_DATA_PAGE_B) - 1);
        }
        if( iTerm ){
          iAbs = iLast;
        }
        fts5DataRelease(pNew);

      }
    }
  }

  /* If pLast is NULL at this point, then the last rowid for this doclist
  ** lies on the page currently indicated by the iterator. In this case 
  ** iLastOff is set to the value that pIter->iLeafOffset will take when







>
>
>
>
>
|
|
|

|
|
|
|

|
|
|
|
|

|
|
|
|

|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>







1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
  int iOff = pIter->iLeafOffset;  /* Byte offset within current leaf */
  Fts5Data *pLast = 0;
  int pgnoLast = 0;

  /* Move to the page that contains the last rowid in this doclist. */
  pLeaf = pIter->pLeaf;

  if( pIter->pDlidx ){
    int iSegid = pIter->pSeg->iSegid;
    pgnoLast = pIter->pDlidx->iLeafPgno;
    pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, pgnoLast));
  }else{
    while( iOff<pLeaf->n ){
      int nPos;
      i64 iDelta;

      /* Position list size in bytes */
      iOff += getVarint32(&pLeaf->p[iOff], nPos);
      iOff += nPos;
      if( iOff>=pLeaf->n ) break;

      /* Rowid delta. Or, if 0x00, the end of doclist marker. */
      nPos = getVarint(&pLeaf->p[iOff], (u64*)&iDelta);
      if( iDelta==0 ) break;
      iOff += nPos;
    }

    if( iOff>=pLeaf->n ){
      Fts5StructureSegment *pSeg = pIter->pSeg;
      i64 iAbs = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, 0, pIter->iLeafPgno);
      i64 iLast = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, 0, pSeg->pgnoLast);

      /* The last rowid in the doclist may not be on the current page. Search
       ** forward to find the page containing the last rowid.  */
      for(iAbs++; p->rc==SQLITE_OK && iAbs<=iLast; iAbs++){
        Fts5Data *pNew = fts5DataRead(p, iAbs);
        if( pNew ){
          int iRowid, iTerm;
          fts5LeafHeader(pNew, &iRowid, &iTerm);
          if( iRowid ){
            Fts5Data *pTmp = pLast;
            pLast = pNew;
            pNew = pTmp;
            pgnoLast = iAbs & (((i64)1 << FTS5_DATA_PAGE_B) - 1);
          }
          if( iTerm ){
            iAbs = iLast;
          }
          fts5DataRelease(pNew);
        }
      }
    }
  }

  /* If pLast is NULL at this point, then the last rowid for this doclist
  ** lies on the page currently indicated by the iterator. In this case 
  ** iLastOff is set to the value that pIter->iLeafOffset will take when
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
    pIter->iLeafPgno = pgnoLast;
    fts5LeafHeader(pLast, &iOff, &dummy);
    iOff += getVarint(&pLast->p[iOff], (u64*)&pIter->iRowid);
    pIter->iLeafOffset = iOff;
  }

  fts5SegIterReverseInitPage(p, pIter);
  pIter->flags |= FTS5_SEGITER_REVERSE;
}

/*
** Iterator pIter currently points to the first rowid of a doclist within
** index iIdx. There is a doclist-index associated with the final term on
** the current page. If the current term is the last term on the page, 
** load the doclist-index from disk and initialize an iterator at 







<







1618
1619
1620
1621
1622
1623
1624

1625
1626
1627
1628
1629
1630
1631
    pIter->iLeafPgno = pgnoLast;
    fts5LeafHeader(pLast, &iOff, &dummy);
    iOff += getVarint(&pLast->p[iOff], (u64*)&pIter->iRowid);
    pIter->iLeafOffset = iOff;
  }

  fts5SegIterReverseInitPage(p, pIter);

}

/*
** Iterator pIter currently points to the first rowid of a doclist within
** index iIdx. There is a doclist-index associated with the final term on
** the current page. If the current term is the last term on the page, 
** load the doclist-index from disk and initialize an iterator at 
1729
1730
1731
1732
1733
1734
1735



1736
1737
1738
1739
1740
1741
1742
      pIter->pLeaf = 0;
    }
  }

  if( bGe==0 ){
    pIter->flags |= FTS5_SEGITER_ONETERM;
    if( pIter->pLeaf ){



      if( bDlidx ){
        fts5SegIterLoadDlidx(p, iIdx, pIter);
      }
      if( flags & FTS5INDEX_QUERY_ASC ){
        fts5SegIterReverse(p, iIdx, pIter);
      }
    }







>
>
>







1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
      pIter->pLeaf = 0;
    }
  }

  if( bGe==0 ){
    pIter->flags |= FTS5_SEGITER_ONETERM;
    if( pIter->pLeaf ){
      if( flags & FTS5INDEX_QUERY_ASC ){
        pIter->flags |= FTS5_SEGITER_REVERSE;
      }
      if( bDlidx ){
        fts5SegIterLoadDlidx(p, iIdx, pIter);
      }
      if( flags & FTS5INDEX_QUERY_ASC ){
        fts5SegIterReverse(p, iIdx, pIter);
      }
    }
1875
1876
1877
1878
1879
1880
1881

1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895

1896
1897

1898
1899

1900
1901
1902

1903

1904

1905
1906
1907
1908
1909
1910
1911
1912

1913
1914
1915
1916
1917
1918
1919
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegIter *pIter,             /* Iterator to advance */
  i64 iMatch                      /* Advance iterator at least this far */
){
  int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
  Fts5DlidxIter *pDlidx = pIter->pDlidx;
  int iLeafPgno = pIter->iLeafPgno;


  assert( pIter->flags & FTS5_SEGITER_ONETERM );
  assert( pIter->pDlidx );
  assert( pIter->pLeaf );


  if( bRev==0 ){
    while( fts5DlidxIterEof(p, pDlidx)==0 && iMatch<pDlidx->iRowid ){
      if( pDlidx->bZero==0 ) iLeafPgno = pDlidx->iLeafPgno;
      fts5DlidxIterNext(pDlidx, 0);
    }
    assert( iLeafPgno>=pIter->iLeafPgno || p->rc );
    if( iLeafPgno>pIter->iLeafPgno ){
      fts5SegIterGotoPage(p, pIter, iLeafPgno);

    }
  }else if( 0 ){

    while( fts5DlidxIterEof(p, pDlidx)==0 && iMatch>pDlidx->iRowid ){
      fts5DlidxIterNext(pDlidx, 0);

      if( pDlidx->bZero==0 ) iLeafPgno = pDlidx->iLeafPgno;
    }
    assert( iLeafPgno<=pIter->iLeafPgno || p->rc );

    if( iLeafPgno<pIter->iLeafPgno ){

      fts5SegIterGotoPage(p, pIter, iLeafPgno);

    }
  }

  while( 1 ){
    fts5SegIterNext(p, pIter);
    if( pIter->pLeaf==0 ) break;
    if( bRev==0 && pIter->iRowid<=iMatch ) break;
    if( bRev!=0 && pIter->iRowid>=iMatch ) break;

  }
}

/*
** Move the iterator to the next entry. 
**
** If an error occurs, an error code is left in Fts5Index.rc. It is not 







>





<


|
|




>

|
>

|
>
|
|
|
>

>
|
>




|



>







1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896

1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegIter *pIter,             /* Iterator to advance */
  i64 iMatch                      /* Advance iterator at least this far */
){
  int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
  Fts5DlidxIter *pDlidx = pIter->pDlidx;
  int iLeafPgno = pIter->iLeafPgno;
  int bMove = 1;

  assert( pIter->flags & FTS5_SEGITER_ONETERM );
  assert( pIter->pDlidx );
  assert( pIter->pLeaf );


  if( bRev==0 ){
    while( fts5DlidxIterEof(p, pDlidx)==0 && iMatch<pDlidx->iRowid ){
      iLeafPgno = pDlidx->iLeafPgno;
      fts5DlidxIterNext(pDlidx);
    }
    assert( iLeafPgno>=pIter->iLeafPgno || p->rc );
    if( iLeafPgno>pIter->iLeafPgno ){
      fts5SegIterGotoPage(p, pIter, iLeafPgno);
      bMove = 0;
    }
  }else{
    assert( iMatch>pIter->iRowid );
    while( fts5DlidxIterEof(p, pDlidx)==0 && iMatch>pDlidx->iRowid ){
      fts5DlidxIterPrev(pDlidx);
    }
    iLeafPgno = pDlidx->iLeafPgno;

    assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno );

    if( iLeafPgno<pIter->iLeafPgno ){
      pIter->iLeafPgno = iLeafPgno+1;
      fts5SegIterReverseNewPage(p, pIter);
      bMove = 0;
    }
  }

  while( 1 ){
    if( bMove ) fts5SegIterNext(p, pIter);
    if( pIter->pLeaf==0 ) break;
    if( bRev==0 && pIter->iRowid<=iMatch ) break;
    if( bRev!=0 && pIter->iRowid>=iMatch ) break;
    bMove = 1;
  }
}

/*
** Move the iterator to the next entry. 
**
** If an error occurs, an error code is left in Fts5Index.rc. It is not 
3373
3374
3375
3376
3377
3378
3379


















3380

























3381
3382
3383
3384
3385
3386
3387
      pLvl->pData = 0;
    }
  }
  sqlite3_free(pIter->aLvl);
  fts5BufferFree(&pIter->term);
}














































static void fts5IndexIntegrityCheckSegment(
  Fts5Index *p,                   /* FTS5 backend object */
  int iIdx,                       /* Index that pSeg is a part of */
  Fts5StructureSegment *pSeg      /* Segment to check internal consistency */
){
  Fts5BtreeIter iter;             /* Used to iterate through b-tree hierarchy */







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
      pLvl->pData = 0;
    }
  }
  sqlite3_free(pIter->aLvl);
  fts5BufferFree(&pIter->term);
}

/*
** This function is purely an internal test. It does not contribute to 
** FTS functionality, or even the integrity-check, in any way.
**
** Instead, it tests that the same set of pgno/rowid combinations are 
** visited regardless of whether the doclist-index identified by parameters
** iIdx/iSegid/iLeaf is iterated in forwards or reverse order.
*/
#ifdef SQLITE_DEBUG
static void fts5DlidxIterTestReverse(
  Fts5Index *p, 
  int iIdx,                       /* Index to load doclist-index from */
  int iSegid,                     /* Segment id to load from */
  int iLeaf                       /* Load doclist-index for this leaf */
){
  Fts5DlidxIter *pDlidx = 0;
  i64 cksum1 = 13;
  i64 cksum2 = 13;

  for(fts5DlidxIterInit(p, 0, iIdx, iSegid, iLeaf, &pDlidx);
      fts5DlidxIterEof(p, pDlidx)==0;
      fts5DlidxIterNext(pDlidx)
  ){
    cksum1 = (cksum1 ^ ( (i64)(pDlidx->iLeafPgno) << 32 ));
    cksum1 = (cksum1 ^ pDlidx->iRowid);
  }
  fts5DlidxIterFree(pDlidx);
  pDlidx = 0;

  for(fts5DlidxIterInit(p, 1, iIdx, iSegid, iLeaf, &pDlidx);
      fts5DlidxIterEof(p, pDlidx)==0;
      fts5DlidxIterPrev(pDlidx)
  ){
    cksum2 = (cksum2 ^ ( (i64)(pDlidx->iLeafPgno) << 32 ));
    cksum2 = (cksum2 ^ pDlidx->iRowid);
  }
  fts5DlidxIterFree(pDlidx);
  pDlidx = 0;

  if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT; 
}
#else
# define fts5DlidxIterTestReverse(w,x,y,z)
#endif

static void fts5IndexIntegrityCheckSegment(
  Fts5Index *p,                   /* FTS5 backend object */
  int iIdx,                       /* Index that pSeg is a part of */
  Fts5StructureSegment *pSeg      /* Segment to check internal consistency */
){
  Fts5BtreeIter iter;             /* Used to iterate through b-tree hierarchy */
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440

3441
3442
3443
3444
3445



3446
3447
3448
3449



3450
3451




3452
3453

3454
3455

3456








3457
3458
3459
3460
3461
3462
3463
3464
3465
3466

3467
3468
3469
3470
3471
3472
3473
      if( res<0 ){
        p->rc = FTS5_CORRUPT;
      }
    }
    fts5DataRelease(pLeaf);
    if( p->rc ) break;


    /* Now check that the iter.nEmpty leaves following the current leaf
    ** (a) exist and (b) contain no terms. */
    for(i=1; p->rc==SQLITE_OK && i<=iter.nEmpty; i++){
      pLeaf = fts5DataRead(p, iRow+i);
      if( pLeaf && 0!=fts5GetU16(&pLeaf->p[2]) ){
        p->rc = FTS5_CORRUPT;
      }
      fts5DataRelease(pLeaf);
    }

    /* If there is a doclist-index, check that it looks right. */
    if( iter.bDlidx ){
      Fts5DlidxIter *pDlidx = 0;  /* For iterating through doclist index */
      int nEntry = 0;
      int iSegid = pSeg->iSegid;
      int bRev = 0;


      for(fts5DlidxIterInit(p, bRev, iIdx, iSegid, iter.iLeaf, &pDlidx);
          fts5DlidxIterEof(p, pDlidx)==0;
          fts5DlidxIterNext(pDlidx, bRev)
      ){



        i64 iKey = FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, pDlidx->iLeafPgno);
        pLeaf = fts5DataRead(p, iKey);
        if( pLeaf ){
          int iRowidOff = fts5GetU16(&pLeaf->p[0]);



          if( pDlidx->bZero ){
            if( iRowidOff!=0 ) p->rc = FTS5_CORRUPT;




          }else{
            i64 iRowid;

            getVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid);
            if( iRowid!=pDlidx->iRowid ) p->rc = FTS5_CORRUPT;

          }








          fts5DataRelease(pLeaf);
        }
        nEntry++;
      }

      /* Check that the doclist-index was the right length */
      if( p->rc==SQLITE_OK && nEntry!=iter.nEmpty && nEntry!=iter.nEmpty+1 ){
        p->rc = FTS5_CORRUPT;
      }
      fts5DlidxIterFree(pDlidx);

    }
  }

  if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){
    p->rc = FTS5_CORRUPT;
  }








<













|

|
>

|

|

>
>
>
|
|
|
|
>
>
>
|
|
>
>
>
>
|
|
>
|
|
>
|
>
>
>
>
>
>
>
>


<


<
<
<
<

>







3476
3477
3478
3479
3480
3481
3482

3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537

3538
3539




3540
3541
3542
3543
3544
3545
3546
3547
3548
      if( res<0 ){
        p->rc = FTS5_CORRUPT;
      }
    }
    fts5DataRelease(pLeaf);
    if( p->rc ) break;


    /* Now check that the iter.nEmpty leaves following the current leaf
    ** (a) exist and (b) contain no terms. */
    for(i=1; p->rc==SQLITE_OK && i<=iter.nEmpty; i++){
      pLeaf = fts5DataRead(p, iRow+i);
      if( pLeaf && 0!=fts5GetU16(&pLeaf->p[2]) ){
        p->rc = FTS5_CORRUPT;
      }
      fts5DataRelease(pLeaf);
    }

    /* If there is a doclist-index, check that it looks right. */
    if( iter.bDlidx ){
      Fts5DlidxIter *pDlidx = 0;  /* For iterating through doclist index */
      int iPrevLeaf = iter.iLeaf;
      int iSegid = pSeg->iSegid;
      int iPg;
      i64 iKey;

      for(fts5DlidxIterInit(p, 0, iIdx, iSegid, iter.iLeaf, &pDlidx);
          fts5DlidxIterEof(p, pDlidx)==0;
          fts5DlidxIterNext(pDlidx)
      ){

        /* Check any rowid-less pages that occur before the current leaf. */
        for(iPg=iPrevLeaf+1; iPg<pDlidx->iLeafPgno; iPg++){
          iKey = FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, iPg);
          pLeaf = fts5DataRead(p, iKey);
          if( pLeaf ){
            if( fts5GetU16(&pLeaf->p[0])!=0 ) p->rc = FTS5_CORRUPT;
            fts5DataRelease(pLeaf);
          }
        }
        iPrevLeaf = pDlidx->iLeafPgno;

        /* Check that the leaf page indicated by the iterator really does
        ** contain the rowid suggested by the same. */
        iKey = FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, pDlidx->iLeafPgno);
        pLeaf = fts5DataRead(p, iKey);
        if( pLeaf ){
          i64 iRowid;
          int iRowidOff = fts5GetU16(&pLeaf->p[0]);
          getVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid);
          if( iRowid!=pDlidx->iRowid ) p->rc = FTS5_CORRUPT;
          fts5DataRelease(pLeaf);
        }

      }

      for(iPg=iPrevLeaf+1; iPg<=(iter.iLeaf + iter.nEmpty); iPg++){
        iKey = FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, iPg);
        pLeaf = fts5DataRead(p, iKey);
        if( pLeaf ){
          if( fts5GetU16(&pLeaf->p[0])!=0 ) p->rc = FTS5_CORRUPT;
          fts5DataRelease(pLeaf);
        }

      }





      fts5DlidxIterFree(pDlidx);
      fts5DlidxIterTestReverse(p, iIdx, iSegid, iter.iLeaf);
    }
  }

  if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){
    p->rc = FTS5_CORRUPT;
  }

Changes to test/fts5aa.test.
144
145
146
147
148
149
150

151
152
153
154
155
156
157
do_execsql_test 6.2 {
  INSERT INTO t1(t1) VALUES('integrity-check') 
}

#-------------------------------------------------------------------------
#
reset_db

do_execsql_test 7.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x,y,z);
  INSERT INTO t1(t1) VALUES('pgsz=32');
}

proc doc {} {
  set v [list aaa aab abc abcde b c d dd ddd dddd ddddd]







>







144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
do_execsql_test 6.2 {
  INSERT INTO t1(t1) VALUES('integrity-check') 
}

#-------------------------------------------------------------------------
#
reset_db
expr srand(0)
do_execsql_test 7.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x,y,z);
  INSERT INTO t1(t1) VALUES('pgsz=32');
}

proc doc {} {
  set v [list aaa aab abc abcde b c d dd ddd dddd ddddd]
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
      puts "[lrange $lvl 0 1] $seg"
    }
  }
}

for {set i 1} {$i <= 10} {incr i} {
  do_test 7.$i {
    for {set j 0} {$j < 100} {incr j} {
      set x [doc]
      set y [doc]
      set z [doc]
      set rowid [expr int(rand() * 100)]
      execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) }
    }
    execsql { INSERT INTO t1(t1) VALUES('integrity-check'); }







|







170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
      puts "[lrange $lvl 0 1] $seg"
    }
  }
}

for {set i 1} {$i <= 10} {incr i} {
  do_test 7.$i {
    for {set j 0} {$j < 10} {incr j} {
      set x [doc]
      set y [doc]
      set z [doc]
      set rowid [expr int(rand() * 100)]
      execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) }
    }
    execsql { INSERT INTO t1(t1) VALUES('integrity-check'); }
Changes to test/fts5ac.test.
360
361
362
363
364
365
366




367
368
369
370
371
372
373
  1 {a b} {[N $x -- {a}] && [N $x -- {b}]}
} {
  do_execsql_test 5.$tn {SELECT fts5_expr_tcl($expr, 'N $x')} [list $tclexpr]
}

#-------------------------------------------------------------------------
#




foreach {bAsc sql} {
  0 {SELECT rowid FROM xx WHERE xx MATCH $expr}
  1 {SELECT rowid FROM xx WHERE xx MATCH $expr ORDER BY rowid ASC}
} {
  foreach {tn expr} {
    0.1 x
    1 { NEAR(r c) }







>
>
>
>







360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
  1 {a b} {[N $x -- {a}] && [N $x -- {b}]}
} {
  do_execsql_test 5.$tn {SELECT fts5_expr_tcl($expr, 'N $x')} [list $tclexpr]
}

#-------------------------------------------------------------------------
#
do_execsql_test 6.integrity {
  INSERT INTO xx(xx) VALUES('integrity-check');
}
#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM xx_data} {puts $r}
foreach {bAsc sql} {
  0 {SELECT rowid FROM xx WHERE xx MATCH $expr}
  1 {SELECT rowid FROM xx WHERE xx MATCH $expr ORDER BY rowid ASC}
} {
  foreach {tn expr} {
    0.1 x
    1 { NEAR(r c) }
Changes to test/fts5ah.test.
48
49
50
51
52
53
54






55
56
57
58
59
60
61








62
63
64
65
66
67
68
69
70
71


72

73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
do_execsql_test 1.3 {
  INSERT INTO t1(t1) VALUES('integrity-check');
}

proc reads {} {
  db one {SELECT t1 FROM t1 WHERE t1 MATCH '*reads'}
}







do_test 1.4 {
  set nRead [reads]
  execsql { SELECT rowid FROM t1 WHERE t1 MATCH 'x' }
  set nReadX [expr [reads] - $nRead]
  expr $nReadX>1000
} {1}









foreach {tn q res} "
  1 { SELECT rowid FROM t1 WHERE t1 MATCH 'w + x'   }  [list $W]
  2 { SELECT rowid FROM t1 WHERE t1 MATCH 'x + w'   }  [list $W]
  3 { SELECT rowid FROM t1 WHERE t1 MATCH 'x AND w' }  [list $W]
  4 { SELECT rowid FROM t1 WHERE t1 MATCH 'y AND x' }  [list $Y]
" {

  do_test 1.5.$tn.1 {
    set nRead [reads]


    execsql $q

    set n [expr [reads] - $nRead]
    expr {$n < ($nReadX / 10)}
  } {1}

  do_test 1.5.$tn.2 {
    set nRead [reads]
    execsql "$q ORDER BY rowid ASC"
    set n [expr [reads] - $nRead]
    expr {$n < ($nReadX / 10)}
  } {1}

  do_execsql_test 1.5.$tn.3 $q [lsort -int -decr $res]
  do_execsql_test 1.5.$tn.4 "$q ORDER BY rowid ASC" [lsort -int -incr $res]
}

#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}

finish_test








>
>
>
>
>
>







>
>
>
>
>
>
>
>








|
|
>
>
|
>
|



<
<
<
<
<
<
<
|
|






48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93







94
95
96
97
98
99
100
101
do_execsql_test 1.3 {
  INSERT INTO t1(t1) VALUES('integrity-check');
}

proc reads {} {
  db one {SELECT t1 FROM t1 WHERE t1 MATCH '*reads'}
}

proc execsql_reads {sql} {
  set nRead [reads]
  execsql $sql
  expr [reads] - $nRead
}

do_test 1.4 {
  set nRead [reads]
  execsql { SELECT rowid FROM t1 WHERE t1 MATCH 'x' }
  set nReadX [expr [reads] - $nRead]
  expr $nReadX>1000
} {1}

do_test 1.5 {
  set fwd [execsql_reads {SELECT rowid FROM t1 WHERE t1 MATCH 'x' }]
  set bwd [execsql_reads {
    SELECT rowid FROM t1 WHERE t1 MATCH 'x' ORDER BY 1 ASC 
  }]
  expr {$bwd < $fwd + 10}
} {1}

foreach {tn q res} "
  1 { SELECT rowid FROM t1 WHERE t1 MATCH 'w + x'   }  [list $W]
  2 { SELECT rowid FROM t1 WHERE t1 MATCH 'x + w'   }  [list $W]
  3 { SELECT rowid FROM t1 WHERE t1 MATCH 'x AND w' }  [list $W]
  4 { SELECT rowid FROM t1 WHERE t1 MATCH 'y AND x' }  [list $Y]
" {

  do_test 1.6.$tn.1 {
    set n [execsql_reads $q]
    expr {$n < ($nReadX / 10)}
  } {1}

  do_test 1.6.$tn.2 {
    set n [execsql_reads "$q ORDER BY rowid ASC"]
    expr {$n < ($nReadX / 10)}
  } {1}








  do_execsql_test 1.6.$tn.3 $q [lsort -int -decr $res]
  do_execsql_test 1.6.$tn.4 "$q ORDER BY rowid ASC" [lsort -int -incr $res]
}

#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}

finish_test