SQLite

Check-in [f704bc059e]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix a problem with fts5 doclist-indexes that occured if the first rowid of the first non-term page of a doclist is zero.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | fts5
Files: files | file ages | folders
SHA1: f704bc059e06b01f1d68fa7dad89e33eace6c389
User & Date: dan 2015-01-27 20:41:00.681
Context
2015-01-29
20:59
Fix some problems with transactions that both read and write an fts5 table. (check-in: 0e225b1535 user: dan tags: fts5)
2015-01-27
20:41
Fix a problem with fts5 doclist-indexes that occured if the first rowid of the first non-term page of a doclist is zero. (check-in: f704bc059e user: dan tags: fts5)
2015-01-24
19:57
Have fts5 store rowids in ascending order. Query speed is virtually the same regardless of rowid order, and ascending order makes some insert optimizations easier. (check-in: 5206ca6005 user: dan tags: fts5)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts5/fts5_index.c.
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
**     incremental merge operations.
**
*/

#define FTS5_OPT_WORK_UNIT  1000  /* Number of leaf pages per optimize step */
#define FTS5_WORK_UNIT      64    /* Number of leaf pages in unit of work */

#define FTS5_MIN_DLIDX_SIZE 4000  /* Add dlidx if this many empty pages */

/*
** Details:
**
** The %_data table managed by this module,
**
**     CREATE TABLE %_data(id INTEGER PRIMARY KEY, block BLOB);







|







40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
**     incremental merge operations.
**
*/

#define FTS5_OPT_WORK_UNIT  1000  /* Number of leaf pages per optimize step */
#define FTS5_WORK_UNIT      64    /* Number of leaf pages in unit of work */

#define FTS5_MIN_DLIDX_SIZE 4     /* Add dlidx if this many empty pages */

/*
** Details:
**
** The %_data table managed by this module,
**
**     CREATE TABLE %_data(id INTEGER PRIMARY KEY, block BLOB);
188
189
190
191
192
193
194
195

196

197

198

199
200
201
202
203
204
205
206
**
**     * either an 0x00 or 0x01 byte. If the value 0x01 is used, then there 
**       is an associated index-by-rowid record.
**     * the number of zero-term leaves as a varint.
**
** 5. Segment doclist indexes:
**
**   A list of varints - the first docid on each page (starting with the

**   first termless page) of the doclist. First element in the list is a

**   literal docid. Each docid thereafter is a (negative) delta. If there

**   are no docids at all on a page, a 0x00 byte takes the place of the

**   delta value.
*/

/*
** Rowids for the averages and structure records in the %_data table.
*/
#define FTS5_AVERAGES_ROWID     1    /* Rowid used for the averages record */
#define FTS5_STRUCTURE_ROWID(iIdx) (10 + (iIdx))     /* For structure records */







|
>
|
>
|
>
|
>
|







188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
**
**     * either an 0x00 or 0x01 byte. If the value 0x01 is used, then there 
**       is an associated index-by-rowid record.
**     * the number of zero-term leaves as a varint.
**
** 5. Segment doclist indexes:
**
**   A list of varints. If the first termless page contains at least one
**   docid, the list begins with that docid as a varint followed by the
**   value 1 (0x01). Or, if the first termless page contains no docids,
**   a varint containing the last docid stored on the term page followed
**   by a 0 (0x00) value.
**
**   For each subsequent page in the doclist, either a 0x00 byte if the
**   page contains no terms, or a delta-encoded docid (always +ve) 
**   representing the first docid on the page otherwise.
*/

/*
** Rowids for the averages and structure records in the %_data table.
*/
#define FTS5_AVERAGES_ROWID     1    /* Rowid used for the averages record */
#define FTS5_STRUCTURE_ROWID(iIdx) (10 + (iIdx))     /* For structure records */
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
  Fts5PageWriter *aWriter;        /* Array of PageWriter objects */
  i64 iPrevRowid;                 /* Previous docid written to current leaf */
  u8 bFirstRowidInDoclist;        /* True if next rowid is first in doclist */
  u8 bFirstRowidInPage;           /* True if next rowid is first in page */
  u8 bFirstTermInPage;            /* True if next term will be first in leaf */
  int nLeafWritten;               /* Number of leaf pages written */
  int nEmpty;                     /* Number of contiguous term-less nodes */
  Fts5Buffer dlidx;               /* Doclist index */
  i64 iDlidxPrev;                 /* Previous rowid appended to dlidx */
  int bDlidxPrevValid;            /* True if iDlidxPrev is valid */
};

/*
** Object for iterating through the merged results of one or more segments,
** visiting each term/docid pair in the merged data.







|







373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
  Fts5PageWriter *aWriter;        /* Array of PageWriter objects */
  i64 iPrevRowid;                 /* Previous docid written to current leaf */
  u8 bFirstRowidInDoclist;        /* True if next rowid is first in doclist */
  u8 bFirstRowidInPage;           /* True if next rowid is first in page */
  u8 bFirstTermInPage;            /* True if next term will be first in leaf */
  int nLeafWritten;               /* Number of leaf pages written */
  int nEmpty;                     /* Number of contiguous term-less nodes */
  Fts5Buffer cdlidx;               /* Doclist index */
  i64 iDlidxPrev;                 /* Previous rowid appended to dlidx */
  int bDlidxPrevValid;            /* True if iDlidxPrev is valid */
};

/*
** Object for iterating through the merged results of one or more segments,
** visiting each term/docid pair in the merged data.
1331
1332
1333
1334
1335
1336
1337



1338
1339
1340
1341

1342
1343
1344
1345






1346
1347
1348

1349
1350
1351
1352
1353
1354
1355

1356

1357
1358
1359



1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
/*
** The iterator passed as the first argument has the following fields set
** as follows. This function sets up the rest of the iterator so that it
** points to the first rowid in the doclist-index.
**
**   pData: pointer to doclist-index record, 
**   iLeafPgno: page number that this doclist-index is associated with.



*/
static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){
  Fts5Data *pData = pIter->pData;
  int i;


  assert( pIter->pData );
  assert( pIter->iLeafPgno>0 );







  /* Count the number of leading 0x00 bytes. Then set iLeafPgno. */
  for(i=0; i<pData->n; i++){ 
    if( pData->p[i] ) break;

  }
  pIter->iLeafPgno += (i+1);
  pIter->iOff = i;

  /* Unless we are already at the end of the doclist-index, load the first
  ** rowid value.  */
  if( pIter->iOff<pData->n ){

    pIter->iOff += getVarint(&pData->p[pIter->iOff], (u64*)&pIter->iRowid);

  }else{
    pIter->bEof = 1;
  }



  pIter->iFirstOff = pIter->iOff;
  return pIter->bEof;
}

/*
** Advance the iterator passed as the only argument.
*/
static int fts5DlidxIterNext(Fts5DlidxIter *pIter){
  Fts5Data *pData = pIter->pData;
  int iOff;

  for(iOff=pIter->iOff; iOff<pData->n; iOff++){
    if( pData->p[iOff] ) break; 
  }

  if( iOff<pData->n ){
    i64 iVal;
    pIter->iLeafPgno += (iOff - pIter->iOff) + 1;
    iOff += getVarint(&pData->p[iOff], (u64*)&iVal);
    pIter->iRowid -= iVal;
    pIter->iOff = iOff;
  }else{
    pIter->bEof = 1;
  }

  return pIter->bEof;
}







>
>
>




>




>
>
>
>
>
>
|
|
|
>
|
<
<

|
|
|
>
|
>
|
|
|
>
>
>



















|







1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364


1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
/*
** The iterator passed as the first argument has the following fields set
** as follows. This function sets up the rest of the iterator so that it
** points to the first rowid in the doclist-index.
**
**   pData: pointer to doclist-index record, 
**   iLeafPgno: page number that this doclist-index is associated with.
**
** When this function is called pIter->iLeafPgno is the page number the
** doclist is associated with (the one featuring the term).
*/
static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){
  Fts5Data *pData = pIter->pData;
  int i;
  int bPresent;

  assert( pIter->pData );
  assert( pIter->iLeafPgno>0 );

  /* Read the first rowid value. And the "present" flag that follows it. */
  pIter->iOff += getVarint(&pData->p[0], (u64*)&pIter->iRowid);
  bPresent = pData->p[pIter->iOff++];
  if( bPresent ){
    i = 0;
  }else{
    /* Count the number of leading 0x00 bytes. */
    for(i=1; pIter->iOff<pData->n; i++){ 
      if( pData->p[pIter->iOff] ) break;
      pIter->iOff++;
    }



    /* Unless we are already at the end of the doclist-index, load the first
    ** rowid value.  */
    if( pIter->iOff<pData->n ){
      i64 iVal;
      pIter->iOff += getVarint(&pData->p[pIter->iOff], (u64*)&iVal);
      pIter->iRowid += iVal;
    }else{
      pIter->bEof = 1;
    }
  }
  pIter->iLeafPgno += (i+1);

  pIter->iFirstOff = pIter->iOff;
  return pIter->bEof;
}

/*
** Advance the iterator passed as the only argument.
*/
static int fts5DlidxIterNext(Fts5DlidxIter *pIter){
  Fts5Data *pData = pIter->pData;
  int iOff;

  for(iOff=pIter->iOff; iOff<pData->n; iOff++){
    if( pData->p[iOff] ) break; 
  }

  if( iOff<pData->n ){
    i64 iVal;
    pIter->iLeafPgno += (iOff - pIter->iOff) + 1;
    iOff += getVarint(&pData->p[iOff], (u64*)&iVal);
    pIter->iRowid += iVal;
    pIter->iOff = iOff;
  }else{
    pIter->bEof = 1;
  }

  return pIter->bEof;
}
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439

1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464













1465
1466
1467
1468
1469
1470
1471
    ** before the buffer in memory.  */
    iLimit = (iOff>9 ? iOff-9 : 0);
    for(iOff--; iOff>iLimit; iOff--){
      if( (a[iOff-1] & 0x80)==0 ) break;
    }

    getVarint(&a[iOff], (u64*)&iVal);
    pIter->iRowid += iVal;
    pIter->iLeafPgno--;

    while( iOff>pIter->iFirstOff 
        && a[iOff-1]==0x00 && (a[iOff-2] & 0x80)==0 
    ){
      iOff--;
      pIter->iLeafPgno--;
    }
    pIter->iOff = iOff;
  }

  return pIter->bEof;
}

static void fts5DlidxIterInit(
  Fts5Index *p,                   /* Fts5 Backend to iterate within */
  int bRev,                       /* True for ORDER BY ASC */
  int iIdx, int iSegid,           /* Segment iSegid within index iIdx */
  int iLeafPgno,                  /* Leaf page number to load dlidx for */

  Fts5DlidxIter **ppIter          /* OUT: Populated iterator */
){
  Fts5DlidxIter *pIter = *ppIter;
  Fts5Data *pDlidx;

  pDlidx = fts5DataRead(p, FTS5_DOCLIST_IDX_ROWID(iIdx, iSegid, iLeafPgno));
  if( pDlidx==0 ) return;
  if( pIter==0 ){
    *ppIter = pIter = (Fts5DlidxIter*)fts5IdxMalloc(p, sizeof(Fts5DlidxIter));
    if( pIter==0 ){ 
      fts5DataRelease(pDlidx);
      return;
    }
  }else{
    memset(pIter, 0, sizeof(Fts5DlidxIter));
  }

  pIter->pData = pDlidx;
  pIter->iLeafPgno = iLeafPgno;
  if( bRev==0 ){
    fts5DlidxIterFirst(pIter);
  }else{
    fts5DlidxIterLast(pIter);
  }
}














/*
** Free a doclist-index iterator object allocated by fts5DlidxIterInit().
*/
static void fts5DlidxIterFree(Fts5DlidxIter *pIter){
  if( pIter ){
    fts5DataRelease(pIter->pData);







|














|


<
|
>



<

<
<


















>
>
>
>
>
>
>
>
>
>
>
>
>







1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455

1456
1457
1458
1459
1460

1461


1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
    ** before the buffer in memory.  */
    iLimit = (iOff>9 ? iOff-9 : 0);
    for(iOff--; iOff>iLimit; iOff--){
      if( (a[iOff-1] & 0x80)==0 ) break;
    }

    getVarint(&a[iOff], (u64*)&iVal);
    pIter->iRowid -= iVal;
    pIter->iLeafPgno--;

    while( iOff>pIter->iFirstOff 
        && a[iOff-1]==0x00 && (a[iOff-2] & 0x80)==0 
    ){
      iOff--;
      pIter->iLeafPgno--;
    }
    pIter->iOff = iOff;
  }

  return pIter->bEof;
}

static void fts5DlidxIterInitFromData(
  Fts5Index *p,                   /* Fts5 Backend to iterate within */
  int bRev,                       /* True for ORDER BY ASC */

  int iLeafPgno,                  /* Leaf page number dlidx is for */
  Fts5Data *pDlidx,               /* Leaf index data */
  Fts5DlidxIter **ppIter          /* OUT: Populated iterator */
){
  Fts5DlidxIter *pIter = *ppIter;




  if( pIter==0 ){
    *ppIter = pIter = (Fts5DlidxIter*)fts5IdxMalloc(p, sizeof(Fts5DlidxIter));
    if( pIter==0 ){ 
      fts5DataRelease(pDlidx);
      return;
    }
  }else{
    memset(pIter, 0, sizeof(Fts5DlidxIter));
  }

  pIter->pData = pDlidx;
  pIter->iLeafPgno = iLeafPgno;
  if( bRev==0 ){
    fts5DlidxIterFirst(pIter);
  }else{
    fts5DlidxIterLast(pIter);
  }
}

static void fts5DlidxIterInit(
  Fts5Index *p,                   /* Fts5 Backend to iterate within */
  int bRev,                       /* True for ORDER BY ASC */
  int iIdx, int iSegid,           /* Segment iSegid within index iIdx */
  int iLeafPgno,                  /* Leaf page number to load dlidx for */
  Fts5DlidxIter **ppIter          /* OUT: Populated iterator */
){
  Fts5Data *pDlidx;
  pDlidx = fts5DataRead(p, FTS5_DOCLIST_IDX_ROWID(iIdx, iSegid, iLeafPgno));
  if( pDlidx==0 ) return;
  fts5DlidxIterInitFromData(p, bRev, iLeafPgno, pDlidx, ppIter);
}

/*
** Free a doclist-index iterator object allocated by fts5DlidxIterInit().
*/
static void fts5DlidxIterFree(Fts5DlidxIter *pIter){
  if( pIter ){
    fts5DataRelease(pIter->pData);
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
  int bMove = 1;

  assert( pIter->flags & FTS5_SEGITER_ONETERM );
  assert( pIter->pDlidx );
  assert( pIter->pLeaf );

  if( bRev==0 ){
    while( fts5DlidxIterEof(p, pDlidx)==0 && iMatch<pDlidx->iRowid ){
      iLeafPgno = pDlidx->iLeafPgno;
      fts5DlidxIterNext(pDlidx);
    }
    assert( iLeafPgno>=pIter->iLeafPgno || p->rc );
    if( iLeafPgno>pIter->iLeafPgno ){
      fts5SegIterGotoPage(p, pIter, iLeafPgno);
      bMove = 0;
    }
  }else{
    assert( iMatch>pIter->iRowid );
    while( fts5DlidxIterEof(p, pDlidx)==0 && iMatch>pDlidx->iRowid ){
      fts5DlidxIterPrev(pDlidx);
    }
    iLeafPgno = pDlidx->iLeafPgno;

    assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno );

    if( iLeafPgno<pIter->iLeafPgno ){
      pIter->iLeafPgno = iLeafPgno+1;
      fts5SegIterReverseNewPage(p, pIter);
      bMove = 0;
    }
  }

  while( 1 ){
    if( bMove ) fts5SegIterNext(p, pIter);
    if( pIter->pLeaf==0 ) break;
    if( bRev==0 && pIter->iRowid<=iMatch ) break;
    if( bRev!=0 && pIter->iRowid>=iMatch ) break;
    bMove = 1;
  }
}


/*
** Free the iterator object passed as the second argument.







|









|
|
















|
|







2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
  int bMove = 1;

  assert( pIter->flags & FTS5_SEGITER_ONETERM );
  assert( pIter->pDlidx );
  assert( pIter->pLeaf );

  if( bRev==0 ){
    while( fts5DlidxIterEof(p, pDlidx)==0 && iMatch>pDlidx->iRowid ){
      iLeafPgno = pDlidx->iLeafPgno;
      fts5DlidxIterNext(pDlidx);
    }
    assert( iLeafPgno>=pIter->iLeafPgno || p->rc );
    if( iLeafPgno>pIter->iLeafPgno ){
      fts5SegIterGotoPage(p, pIter, iLeafPgno);
      bMove = 0;
    }
  }else{
    assert( iMatch<pIter->iRowid );
    while( fts5DlidxIterEof(p, pDlidx)==0 && iMatch<pDlidx->iRowid ){
      fts5DlidxIterPrev(pDlidx);
    }
    iLeafPgno = pDlidx->iLeafPgno;

    assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno );

    if( iLeafPgno<pIter->iLeafPgno ){
      pIter->iLeafPgno = iLeafPgno+1;
      fts5SegIterReverseNewPage(p, pIter);
      bMove = 0;
    }
  }

  while( 1 ){
    if( bMove ) fts5SegIterNext(p, pIter);
    if( pIter->pLeaf==0 ) break;
    if( bRev==0 && pIter->iRowid>=iMatch ) break;
    if( bRev!=0 && pIter->iRowid<=iMatch ) break;
    bMove = 1;
  }
}


/*
** Free the iterator object passed as the second argument.
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
    Fts5PageWriter *pPg;
    pPg = &pWriter->aWriter[1];
    if( pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
      i64 iKey = FTS5_DOCLIST_IDX_ROWID(
          pWriter->iIdx, pWriter->iSegid, 
          pWriter->aWriter[0].pgno - 1 - pWriter->nEmpty
      );
      assert( pWriter->dlidx.n>0 );
      fts5DataWrite(p, iKey, pWriter->dlidx.p, pWriter->dlidx.n);
      bFlag = 1;
    }
    fts5BufferAppendVarint(&p->rc, &pPg->buf, bFlag);
    fts5BufferAppendVarint(&p->rc, &pPg->buf, pWriter->nEmpty);
    pWriter->nEmpty = 0;
  }

  /* Whether or not it was written to disk, zero the doclist index at this
  ** point */
  sqlite3Fts5BufferZero(&pWriter->dlidx);
  pWriter->bDlidxPrevValid = 0;
}

static void fts5WriteBtreeGrow(Fts5Index *p, Fts5SegWriter *pWriter){
  if( p->rc==SQLITE_OK ){
    Fts5PageWriter *aNew;
    Fts5PageWriter *pNew;







|
|









|







2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
    Fts5PageWriter *pPg;
    pPg = &pWriter->aWriter[1];
    if( pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
      i64 iKey = FTS5_DOCLIST_IDX_ROWID(
          pWriter->iIdx, pWriter->iSegid, 
          pWriter->aWriter[0].pgno - 1 - pWriter->nEmpty
      );
      assert( pWriter->cdlidx.n>0 );
      fts5DataWrite(p, iKey, pWriter->cdlidx.p, pWriter->cdlidx.n);
      bFlag = 1;
    }
    fts5BufferAppendVarint(&p->rc, &pPg->buf, bFlag);
    fts5BufferAppendVarint(&p->rc, &pPg->buf, pWriter->nEmpty);
    pWriter->nEmpty = 0;
  }

  /* Whether or not it was written to disk, zero the doclist index at this
  ** point */
  sqlite3Fts5BufferZero(&pWriter->cdlidx);
  pWriter->bDlidxPrevValid = 0;
}

static void fts5WriteBtreeGrow(Fts5Index *p, Fts5SegWriter *pWriter){
  if( p->rc==SQLITE_OK ){
    Fts5PageWriter *aNew;
    Fts5PageWriter *pNew;
2639
2640
2641
2642
2643
2644
2645


2646




2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663

2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
static void fts5WriteBtreeNoTerm(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegWriter *pWriter          /* Writer object */
){
  if( pWriter->bFirstRowidInPage ){
    /* No rowids on this page. Append an 0x00 byte to the current 
    ** doclist-index */


    sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->dlidx, 0);




  }
  pWriter->nEmpty++;
}

/*
** Rowid iRowid has just been appended to the current leaf page. As it is
** the first on its page, append an entry to the current doclist-index.
*/
static void fts5WriteDlidxAppend(
  Fts5Index *p, 
  Fts5SegWriter *pWriter, 
  i64 iRowid
){
  i64 iVal;
  if( pWriter->bDlidxPrevValid ){
    iVal = pWriter->iDlidxPrev - iRowid;
  }else{

    iVal = iRowid;
  }
  sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->dlidx, iVal);
  pWriter->bDlidxPrevValid = 1;
  pWriter->iDlidxPrev = iRowid;
}

static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
  static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
  Fts5PageWriter *pPage = &pWriter->aWriter[0];







>
>
|
>
>
>
>















|

>
|

|







2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
static void fts5WriteBtreeNoTerm(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegWriter *pWriter          /* Writer object */
){
  if( pWriter->bFirstRowidInPage ){
    /* No rowids on this page. Append an 0x00 byte to the current 
    ** doclist-index */
    if( pWriter->bDlidxPrevValid==0 ){
      i64 iRowid = pWriter->iPrevRowid;
      sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, iRowid);
      pWriter->bDlidxPrevValid = 1;
      pWriter->iDlidxPrev = iRowid;
    }
    sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, 0);
  }
  pWriter->nEmpty++;
}

/*
** Rowid iRowid has just been appended to the current leaf page. As it is
** the first on its page, append an entry to the current doclist-index.
*/
static void fts5WriteDlidxAppend(
  Fts5Index *p, 
  Fts5SegWriter *pWriter, 
  i64 iRowid
){
  i64 iVal;
  if( pWriter->bDlidxPrevValid ){
    iVal = iRowid - pWriter->iDlidxPrev;
  }else{
    sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, iRowid);
    iVal = 1;
  }
  sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, iVal);
  pWriter->bDlidxPrevValid = 1;
  pWriter->iDlidxPrev = iRowid;
}

static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
  static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
  Fts5PageWriter *pPage = &pWriter->aWriter[0];
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
    assert( pPg || p->rc!=SQLITE_OK );
    if( pPg ){
      fts5BufferFree(&pPg->term);
      fts5BufferFree(&pPg->buf);
    }
  }
  sqlite3_free(pWriter->aWriter);
  sqlite3Fts5BufferFree(&pWriter->dlidx);
}

static void fts5WriteInit(
  Fts5Index *p, 
  Fts5SegWriter *pWriter, 
  int iIdx, int iSegid
){







|







2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
    assert( pPg || p->rc!=SQLITE_OK );
    if( pPg ){
      fts5BufferFree(&pPg->term);
      fts5BufferFree(&pPg->buf);
    }
  }
  sqlite3_free(pWriter->aWriter);
  sqlite3Fts5BufferFree(&pWriter->cdlidx);
}

static void fts5WriteInit(
  Fts5Index *p, 
  Fts5SegWriter *pWriter, 
  int iIdx, int iSegid
){
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
        if( pLeaf ){
          if( fts5GetU16(&pLeaf->p[0])!=0 ) p->rc = FTS5_CORRUPT;
          fts5DataRelease(pLeaf);
        }
      }

      fts5DlidxIterFree(pDlidx);
      fts5DlidxIterTestReverse(p, iIdx, iSegid, iter.iLeaf);
    }
  }

  if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){
    p->rc = FTS5_CORRUPT;
  }








|







3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
        if( pLeaf ){
          if( fts5GetU16(&pLeaf->p[0])!=0 ) p->rc = FTS5_CORRUPT;
          fts5DataRelease(pLeaf);
        }
      }

      fts5DlidxIterFree(pDlidx);
      // fts5DlidxIterTestReverse(p, iIdx, iSegid, iter.iLeaf);
    }
  }

  if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){
    p->rc = FTS5_CORRUPT;
  }

4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655



4656




4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
  a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace);
  if( a==0 ) goto decode_out;
  memcpy(a, aBlob, n);
  fts5DecodeRowid(iRowid, &iIdx, &iSegid, &iHeight, &iPgno);

  fts5DebugRowid(&rc, &s, iRowid);
  if( iHeight==FTS5_SEGMENT_MAX_HEIGHT ){
    int i = 0;
    i64 iPrev;
    if( n>0 ){
      i = getVarint(&a[i], (u64*)&iPrev);
      sqlite3Fts5BufferAppendPrintf(&rc, &s, " %lld", iPrev);
    }
    while( i<n ){
      i64 iVal;
      i += getVarint(&a[i], (u64*)&iVal);
      if( iVal==0 ){
        sqlite3Fts5BufferAppendPrintf(&rc, &s, " x");
      }else{
        iPrev = iPrev - iVal;
        sqlite3Fts5BufferAppendPrintf(&rc, &s, " %lld", iPrev);
      }



    }





  }else
  if( iSegid==0 ){
    if( iRowid==FTS5_AVERAGES_ROWID ){
      /* todo */
    }else{
      fts5DecodeStructure(&rc, &s, a, n);
    }
  }else{








|
|
<
<
<
|
<
|
<
<
<
|
|
<
|
>
>
>
|
>
>
>
>
|
<
|







4669
4670
4671
4672
4673
4674
4675
4676
4677



4678

4679



4680
4681

4682
4683
4684
4685
4686
4687
4688
4689
4690
4691

4692
4693
4694
4695
4696
4697
4698
4699
  a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace);
  if( a==0 ) goto decode_out;
  memcpy(a, aBlob, n);
  fts5DecodeRowid(iRowid, &iIdx, &iSegid, &iHeight, &iPgno);

  fts5DebugRowid(&rc, &s, iRowid);
  if( iHeight==FTS5_SEGMENT_MAX_HEIGHT ){
    Fts5Data dlidx;
    Fts5DlidxIter iter;





    dlidx.p = a;



    dlidx.n = n;
    dlidx.nRef = 2;


    memset(&iter, 0, sizeof(Fts5DlidxIter));
    iter.pData = &dlidx;
    iter.iLeafPgno = iPgno;

    for(fts5DlidxIterFirst(&iter); iter.bEof==0; fts5DlidxIterNext(&iter)){
      sqlite3Fts5BufferAppendPrintf(&rc, &s, 
          " %d(%lld)", iter.iLeafPgno, iter.iRowid
      );
    }

  }else if( iSegid==0 ){
    if( iRowid==FTS5_AVERAGES_ROWID ){
      /* todo */
    }else{
      fts5DecodeStructure(&rc, &s, a, n);
    }
  }else{

Changes to ext/fts5/test/fts5aa.test.
181
182
183
184
185
186
187
188
189
190

191
192
193
194
195
196
197
      set y [doc]
      set z [doc]
      set rowid [expr int(rand() * 100)]
      execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) }
    }
    execsql { INSERT INTO t1(t1) VALUES('integrity-check'); }
  } {}
  if {$i==2} break
}
#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}


#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 8.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x, prefix="1,2,3");
  INSERT INTO t1(t1, rank) VALUES('pgsz', 32);







|


>







181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
      set y [doc]
      set z [doc]
      set rowid [expr int(rand() * 100)]
      execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) }
    }
    execsql { INSERT INTO t1(t1) VALUES('integrity-check'); }
  } {}
#  if {$i==1} break
}
#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}
#exit

#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 8.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x, prefix="1,2,3");
  INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
Changes to ext/fts5/test/fts5ac.test.
299
300
301
302
303
304
305

306
307
308
309
310
311
312
313
314
315
316
317
318
319
  for {set i 0} {$i < [$cmd xInstCount]} {incr i} {
    lappend res [string map {{ } .} [$cmd xInst $i]]
  }
  set res
}

sqlite3_fts5_create_function db fts5_test_poslist fts5_test_poslist


#-------------------------------------------------------------------------
# Test phrase queries.
#
foreach {tn phrase} {
  8 "c"

  1 "o"
  2 "b q"
  3 "e a e"
  4 "m d g q q b k b w f q q p p"
  5 "l o o l v v k"
  6 "a"
  7 "b"







>





<
<







299
300
301
302
303
304
305
306
307
308
309
310
311


312
313
314
315
316
317
318
  for {set i 0} {$i < [$cmd xInstCount]} {incr i} {
    lappend res [string map {{ } .} [$cmd xInst $i]]
  }
  set res
}

sqlite3_fts5_create_function db fts5_test_poslist fts5_test_poslist


#-------------------------------------------------------------------------
# Test phrase queries.
#
foreach {tn phrase} {


  1 "o"
  2 "b q"
  3 "e a e"
  4 "m d g q q b k b w f q q p p"
  5 "l o o l v v k"
  6 "a"
  7 "b"
Changes to ext/fts5/test/fts5ah.test.
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
  }
  set v {w w w w w w w w w w w w w w w w w w w w}
  execsql { INSERT INTO t1 VALUES($v) }
} {}

do_execsql_test 1.1.1 {
  SELECT rowid FROM t1 WHERE t1 MATCH 'x AND w'
} [lsort -integer -decr $W]

do_execsql_test 1.1.2 {
  SELECT rowid FROM t1 WHERE t1 MATCH 'x* AND w*'
} [lsort -integer -decr $W]

do_execsql_test 1.2 {
  SELECT rowid FROM t1 WHERE t1 MATCH 'y AND x'
} [lsort -integer -decr $Y]

do_execsql_test 1.3 {
  INSERT INTO t1(t1) VALUES('integrity-check');
}

proc reads {} {
  db one {SELECT t1 FROM t1 WHERE t1 MATCH '*reads'}







|



|



|







38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
  }
  set v {w w w w w w w w w w w w w w w w w w w w}
  execsql { INSERT INTO t1 VALUES($v) }
} {}

do_execsql_test 1.1.1 {
  SELECT rowid FROM t1 WHERE t1 MATCH 'x AND w'
} [lsort -integer -incr $W]

do_execsql_test 1.1.2 {
  SELECT rowid FROM t1 WHERE t1 MATCH 'x* AND w*'
} [lsort -integer -incr $W]

do_execsql_test 1.2 {
  SELECT rowid FROM t1 WHERE t1 MATCH 'y AND x'
} [lsort -integer -incr $Y]

do_execsql_test 1.3 {
  INSERT INTO t1(t1) VALUES('integrity-check');
}

proc reads {} {
  db one {SELECT t1 FROM t1 WHERE t1 MATCH '*reads'}
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
  } {1}

  do_test 1.6.$tn.2 {
    set n [execsql_reads "$q ORDER BY rowid ASC"]
    expr {$n < ($nReadX / 10)}
  } {1}

  do_execsql_test 1.6.$tn.3 $q [lsort -int -decr $res]
  do_execsql_test 1.6.$tn.4 "$q ORDER BY rowid ASC" [lsort -int -incr $res]
}

#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}

finish_test








|
|






94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
  } {1}

  do_test 1.6.$tn.2 {
    set n [execsql_reads "$q ORDER BY rowid ASC"]
    expr {$n < ($nReadX / 10)}
  } {1}

  do_execsql_test 1.6.$tn.3 $q [lsort -int -incr $res]
  do_execsql_test 1.6.$tn.4 "$q ORDER BY rowid DESC" [lsort -int -decr $res]
}

#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}

finish_test