SQLite

Check-in [6a6f7bc40d]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix some problems with fts5 detail=none tables. Some still remain.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | fts5-offsets
Files: files | file ages | folders
SHA1: 6a6f7bc40d6b4c8a4a254a9098c9d2f31fde69ca
User & Date: dan 2015-12-31 17:36:58.906
Context
2015-12-31
18:39
Fix the fts5 integrity-check code so that it works with detail=none tables. (check-in: 3a1df69e58 user: dan tags: fts5-offsets)
17:36
Fix some problems with fts5 detail=none tables. Some still remain. (check-in: 6a6f7bc40d user: dan tags: fts5-offsets)
2015-12-30
19:58
Updates to fts5 to support detail=none mode. As of this commit, many cases are still broken. (check-in: ac8f4cf0ce user: dan tags: fts5-offsets)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts5/fts5_index.c.
1626
1627
1628
1629
1630
1631
1632

1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647




1648
1649

1650
1651
1652
1653

1654
1655
1656
1657
1658
1659
1660
** This function advances the iterator so that it points to the last 
** relevant rowid on the page and, if necessary, initializes the 
** aRowidOffset[] and iRowidOffset variables. At this point the iterator
** is in its regular state - Fts5SegIter.iLeafOffset points to the first
** byte of the position list content associated with said rowid.
*/
static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){

  int n = pIter->pLeaf->szLeaf;
  int i = pIter->iLeafOffset;
  u8 *a = pIter->pLeaf->p;
  int iRowidOffset = 0;

  if( n>pIter->iEndofDoclist ){
    n = pIter->iEndofDoclist;
  }

  ASSERT_SZLEAF_OK(pIter->pLeaf);
  while( 1 ){
    i64 iDelta = 0;
    int nPos;
    int bDummy;





    i += fts5GetPoslistSize(&a[i], &nPos, &bDummy);
    i += nPos;

    if( i>=n ) break;
    i += fts5GetVarint(&a[i], (u64*)&iDelta);
    pIter->iRowid += iDelta;


    if( iRowidOffset>=pIter->nRowidOffset ){
      int nNew = pIter->nRowidOffset + 8;
      int *aNew = (int*)sqlite3_realloc(pIter->aRowidOffset, nNew*sizeof(int));
      if( aNew==0 ){
        p->rc = SQLITE_NOMEM;
        break;
      }







>















>
>
>
>
|
|
>




>







1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
** This function advances the iterator so that it points to the last 
** relevant rowid on the page and, if necessary, initializes the 
** aRowidOffset[] and iRowidOffset variables. At this point the iterator
** is in its regular state - Fts5SegIter.iLeafOffset points to the first
** byte of the position list content associated with said rowid.
*/
static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){
  int eDetail = p->pConfig->eDetail;
  int n = pIter->pLeaf->szLeaf;
  int i = pIter->iLeafOffset;
  u8 *a = pIter->pLeaf->p;
  int iRowidOffset = 0;

  if( n>pIter->iEndofDoclist ){
    n = pIter->iEndofDoclist;
  }

  ASSERT_SZLEAF_OK(pIter->pLeaf);
  while( 1 ){
    i64 iDelta = 0;
    int nPos;
    int bDummy;

    if( eDetail==FTS5_DETAIL_NONE ){
      /* todo */

    }else{
      i += fts5GetPoslistSize(&a[i], &nPos, &bDummy);
      i += nPos;
    }
    if( i>=n ) break;
    i += fts5GetVarint(&a[i], (u64*)&iDelta);
    pIter->iRowid += iDelta;

    /* If necessary, grow the pIter->aRowidOffset[] array. */
    if( iRowidOffset>=pIter->nRowidOffset ){
      int nNew = pIter->nRowidOffset + 8;
      int *aNew = (int*)sqlite3_realloc(pIter->aRowidOffset, nNew*sizeof(int));
      if( aNew==0 ){
        p->rc = SQLITE_NOMEM;
        break;
      }
1750
1751
1752
1753
1754
1755
1756

1757
1758

1759
1760
1761
1762
1763
1764
1765
        int iOff;
        int nPos;
        int bDummy;
        i64 iDelta;

        pIter->iRowidOffset--;
        pIter->iLeafOffset = iOff = pIter->aRowidOffset[pIter->iRowidOffset];

        iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDummy);
        iOff += nPos;

        fts5GetVarint(&a[iOff], (u64*)&iDelta);
        pIter->iRowid -= iDelta;
        fts5SegIterLoadNPos(p, pIter);
      }else{
        fts5SegIterReverseNewPage(p, pIter);
      }
    }else{







>
|
|
>







1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
        int iOff;
        int nPos;
        int bDummy;
        i64 iDelta;

        pIter->iRowidOffset--;
        pIter->iLeafOffset = iOff = pIter->aRowidOffset[pIter->iRowidOffset];
        if( p->pConfig->eDetail!=FTS5_DETAIL_NONE ){
          iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDummy);
          iOff += nPos;
        }
        fts5GetVarint(&a[iOff], (u64*)&iDelta);
        pIter->iRowid -= iDelta;
        fts5SegIterLoadNPos(p, pIter);
      }else{
        fts5SegIterReverseNewPage(p, pIter);
      }
    }else{
1889
1890
1891
1892
1893
1894
1895

1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909

1910

1911
1912
1913
1914
1915
1916
1917

/*
** Iterator pIter currently points to the first rowid in a doclist. This
** function sets the iterator up so that iterates in reverse order through
** the doclist.
*/
static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){

  Fts5DlidxIter *pDlidx = pIter->pDlidx;
  Fts5Data *pLast = 0;
  int pgnoLast = 0;

  if( pDlidx ){
    int iSegid = pIter->pSeg->iSegid;
    pgnoLast = fts5DlidxIterPgno(pDlidx);
    pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast));
  }else{
    Fts5Data *pLeaf = pIter->pLeaf;         /* Current leaf data */

    /* Currently, Fts5SegIter.iLeafOffset points to the first byte of
    ** position-list content for the current rowid. Back it up so that it
    ** points to the start of the position-list size field. */

    pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel);


    /* If this condition is true then the largest rowid for the current
    ** term may not be stored on the current page. So search forward to
    ** see where said rowid really is.  */
    if( pIter->iEndofDoclist>=pLeaf->szLeaf ){
      int pgno;
      Fts5StructureSegment *pSeg = pIter->pSeg;







>














>
|
>







1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929

/*
** Iterator pIter currently points to the first rowid in a doclist. This
** function sets the iterator up so that iterates in reverse order through
** the doclist.
*/
static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){
  int eDetail = p->pConfig->eDetail;
  Fts5DlidxIter *pDlidx = pIter->pDlidx;
  Fts5Data *pLast = 0;
  int pgnoLast = 0;

  if( pDlidx ){
    int iSegid = pIter->pSeg->iSegid;
    pgnoLast = fts5DlidxIterPgno(pDlidx);
    pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast));
  }else{
    Fts5Data *pLeaf = pIter->pLeaf;         /* Current leaf data */

    /* Currently, Fts5SegIter.iLeafOffset points to the first byte of
    ** position-list content for the current rowid. Back it up so that it
    ** points to the start of the position-list size field. */
    if( eDetail!=FTS5_DETAIL_NONE ){
      pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel);
    }

    /* If this condition is true then the largest rowid for the current
    ** term may not be stored on the current page. So search forward to
    ** see where said rowid really is.  */
    if( pIter->iEndofDoclist>=pLeaf->szLeaf ){
      int pgno;
      Fts5StructureSegment *pSeg = pIter->pSeg;
2891
2892
2893
2894
2895
2896
2897



2898
2899
2900
2901
2902
2903
2904
  int nRem = pSeg->nPos;          /* Number of bytes still to come */
  Fts5Data *pData = 0;
  u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset];
  int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset);
  int pgno = pSeg->iLeafPgno;
  int pgnoSave = 0;




  if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){
    pgnoSave = pgno+1;
  }

  while( 1 ){
    xChunk(p, pCtx, pChunk, nChunk);
    nRem -= nChunk;







>
>
>







2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
  int nRem = pSeg->nPos;          /* Number of bytes still to come */
  Fts5Data *pData = 0;
  u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset];
  int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset);
  int pgno = pSeg->iLeafPgno;
  int pgnoSave = 0;

  /* This function does notmwork with detail=none databases. */
  assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );

  if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){
    pgnoSave = pgno+1;
  }

  while( 1 ){
    xChunk(p, pCtx, pChunk, nChunk);
    nRem -= nChunk;
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329

/*
** Append a rowid and position-list size field to the writers output. 
*/
static void fts5WriteAppendRowid(
  Fts5Index *p, 
  Fts5SegWriter *pWriter,
  i64 iRowid,
  int nPos
){
  if( p->rc==SQLITE_OK ){
    Fts5PageWriter *pPage = &pWriter->writer;

    if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){
      fts5WriteFlushLeaf(p, pWriter);
    }







|
<







3329
3330
3331
3332
3333
3334
3335
3336

3337
3338
3339
3340
3341
3342
3343

/*
** Append a rowid and position-list size field to the writers output. 
*/
static void fts5WriteAppendRowid(
  Fts5Index *p, 
  Fts5SegWriter *pWriter,
  i64 iRowid

){
  if( p->rc==SQLITE_OK ){
    Fts5PageWriter *pPage = &pWriter->writer;

    if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){
      fts5WriteFlushLeaf(p, pWriter);
    }
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
    }else{
      assert( p->rc || iRowid>pWriter->iPrevRowid );
      fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid - pWriter->iPrevRowid);
    }
    pWriter->iPrevRowid = iRowid;
    pWriter->bFirstRowidInDoclist = 0;
    pWriter->bFirstRowidInPage = 0;

    fts5BufferAppendVarint(&p->rc, &pPage->buf, nPos);
  }
}

static void fts5WriteAppendPoslistData(
  Fts5Index *p, 
  Fts5SegWriter *pWriter, 
  const u8 *aData, 







<
<







3356
3357
3358
3359
3360
3361
3362


3363
3364
3365
3366
3367
3368
3369
    }else{
      assert( p->rc || iRowid>pWriter->iPrevRowid );
      fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid - pWriter->iPrevRowid);
    }
    pWriter->iPrevRowid = iRowid;
    pWriter->bFirstRowidInDoclist = 0;
    pWriter->bFirstRowidInPage = 0;


  }
}

static void fts5WriteAppendPoslistData(
  Fts5Index *p, 
  Fts5SegWriter *pWriter, 
  const u8 *aData, 
3539
3540
3541
3542
3543
3544
3545

3546
3547
3548
3549
3550
3551
3552
  Fts5IndexIter *pIter = 0;       /* Iterator to read input data */
  int nRem = pnRem ? *pnRem : 0;  /* Output leaf pages left to write */
  int nInput;                     /* Number of input segments */
  Fts5SegWriter writer;           /* Writer object */
  Fts5StructureSegment *pSeg;     /* Output segment */
  Fts5Buffer term;
  int bOldest;                    /* True if the output segment is the oldest */


  assert( iLvl<pStruct->nLevel );
  assert( pLvl->nMerge<=pLvl->nSeg );

  memset(&writer, 0, sizeof(Fts5SegWriter));
  memset(&term, 0, sizeof(Fts5Buffer));
  if( pLvl->nMerge ){







>







3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
  Fts5IndexIter *pIter = 0;       /* Iterator to read input data */
  int nRem = pnRem ? *pnRem : 0;  /* Output leaf pages left to write */
  int nInput;                     /* Number of input segments */
  Fts5SegWriter writer;           /* Writer object */
  Fts5StructureSegment *pSeg;     /* Output segment */
  Fts5Buffer term;
  int bOldest;                    /* True if the output segment is the oldest */
  int eDetail = p->pConfig->eDetail;

  assert( iLvl<pStruct->nLevel );
  assert( pLvl->nMerge<=pLvl->nSeg );

  memset(&writer, 0, sizeof(Fts5SegWriter));
  memset(&term, 0, sizeof(Fts5Buffer));
  if( pLvl->nMerge ){
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617








3618


3619

3620
3621
3622
3623
3624
3625
3626
      /* This is a new term. Append a term to the output segment. */
      fts5WriteAppendTerm(p, &writer, nTerm, pTerm);
      fts5BufferSet(&p->rc, &term, nTerm, pTerm);
    }

    /* Append the rowid to the output */
    /* WRITEPOSLISTSIZE */
    nPos = pSegIter->nPos*2 + pSegIter->bDel;
    fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter), nPos);









    /* Append the position-list data to the output */


    fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback);

  }

  /* Flush the last leaf page to disk. Set the output segment b-tree height
  ** and last leaf page number at the same time.  */
  fts5WriteFinish(p, &writer, &pSeg->pgnoLast);

  if( fts5MultiIterEof(p, pIter) ){







<
|

>
>
>
>
>
>
>
>
|
>
>
|
>







3621
3622
3623
3624
3625
3626
3627

3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
      /* This is a new term. Append a term to the output segment. */
      fts5WriteAppendTerm(p, &writer, nTerm, pTerm);
      fts5BufferSet(&p->rc, &term, nTerm, pTerm);
    }

    /* Append the rowid to the output */
    /* WRITEPOSLISTSIZE */

    fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter));

    if( eDetail==FTS5_DETAIL_NONE ){
      if( pSegIter->bDel ){
        fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
        if( pSegIter->nPos>0 ){
          fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
        }
      }
    }else{
      /* Append the position-list data to the output */
      nPos = pSegIter->nPos*2 + pSegIter->bDel;
      fts5BufferAppendVarint(&p->rc, &writer.writer.buf, nPos);
      fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback);
    }
  }

  /* Flush the last leaf page to disk. Set the output segment b-tree height
  ** and last leaf page number at the same time.  */
  fts5WriteFinish(p, &writer, &pSeg->pgnoLast);

  if( fts5MultiIterEof(p, pIter) ){
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392

  Fts5Buffer out;
  memset(&out, 0, sizeof(out));
  sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n);
  if( p->rc ) return;

  fts5NextRowid(p1, &i1, &iRowid1);
  fts5NextRowid(p1, &i2, &iRowid2);
  while( i1>=0 || i2>=0 ){
    if( i1>=0 && (i2<0 || iRowid1<iRowid2) ){
      fts5BufferSafeAppendVarint(&out, iRowid1 - iOut);
      iOut = iRowid1;
      fts5NextRowid(p1, &i1, &iRowid1);
    }else{
      fts5BufferSafeAppendVarint(&out, iRowid2 - iOut);







|







4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415

  Fts5Buffer out;
  memset(&out, 0, sizeof(out));
  sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n);
  if( p->rc ) return;

  fts5NextRowid(p1, &i1, &iRowid1);
  fts5NextRowid(p2, &i2, &iRowid2);
  while( i1>=0 || i2>=0 ){
    if( i1>=0 && (i2<0 || iRowid1<iRowid2) ){
      fts5BufferSafeAppendVarint(&out, iRowid1 - iOut);
      iOut = iRowid1;
      fts5NextRowid(p1, &i1, &iRowid1);
    }else{
      fts5BufferSafeAppendVarint(&out, iRowid2 - iOut);
Changes to ext/fts5/test/fts5simple2.test.
61
62
63
64
65
66
67
68
69
70
71
72
73





































































74
75
  CREATE VIRTUAL TABLE t1 USING fts5(a, detail=none);
  BEGIN;
    INSERT INTO t1 VALUES('a1 b1 c1');
    INSERT INTO t1 VALUES('a2 b2 c2');
    INSERT INTO t1 VALUES('a3 b3 c3');
  COMMIT;
}
breakpoint
do_execsql_test 4.1 {
  SELECT rowid FROM t1('b*');
} {1 2 3}







































































finish_test








<





>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


61
62
63
64
65
66
67

68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
  CREATE VIRTUAL TABLE t1 USING fts5(a, detail=none);
  BEGIN;
    INSERT INTO t1 VALUES('a1 b1 c1');
    INSERT INTO t1 VALUES('a2 b2 c2');
    INSERT INTO t1 VALUES('a3 b3 c3');
  COMMIT;
}

do_execsql_test 4.1 {
  SELECT rowid FROM t1('b*');
} {1 2 3}


#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 5.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(a, detail=none);
  BEGIN;
  INSERT INTO t1 VALUES('a1 b1 c1');
  INSERT INTO t1 VALUES('a2 b2 c2');
  INSERT INTO t1 VALUES('a1 b1 c1');
  COMMIT;
}
do_execsql_test 5.1 { SELECT rowid FROM t1('b*') } {1 2 3}

#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 6.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(a, detail=full);
  BEGIN;
  INSERT INTO t1 VALUES('a1 b1 c1');
  INSERT INTO t1 VALUES('a1 b1 c1');
  INSERT INTO t1 VALUES('a1 b1 c1');
  COMMIT;
}

do_execsql_test 6.1 { SELECT rowid FROM t1('a1') ORDER BY rowid DESC } {3 2 1}
do_execsql_test 6.2 { SELECT rowid FROM t1('b1') ORDER BY rowid DESC } {3 2 1}
do_execsql_test 6.3 { SELECT rowid FROM t1('c1') ORDER BY rowid DESC } {3 2 1}

#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 7.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(a, detail=none);
  BEGIN;
  INSERT INTO t1 VALUES('a1 b1');
  INSERT INTO t1 VALUES('a1 b2');
  COMMIT;
}
do_execsql_test 7.0.4 { SELECT rowid FROM t1('b*') ORDER BY rowid DESC } {2 1}
do_execsql_test 7.0.5 { SELECT rowid FROM t1('a1') ORDER BY rowid DESC } {2 1}

#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 7.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(a, detail=none);
  INSERT INTO t1 VALUES('a1 b1 c1');
  INSERT INTO t1 VALUES('a2 b2 c2');
  INSERT INTO t1 VALUES('a1 b1 c1');
}
do_execsql_test 7.0.1 { SELECT rowid FROM t1('b*') } {1 2 3}
do_execsql_test 7.0.2 { SELECT rowid FROM t1('a1') } {1 3}
do_execsql_test 7.0.3 { SELECT rowid FROM t1('c2') } {2}

do_execsql_test 7.0.4 { SELECT rowid FROM t1('b*') ORDER BY rowid DESC } {3 2 1}
do_execsql_test 7.0.5 { SELECT rowid FROM t1('a1') ORDER BY rowid DESC } {3 1}
do_execsql_test 7.0.7 { SELECT rowid FROM t1('c2') ORDER BY rowid DESC } {2}

do_execsql_test 7.1.0 { INSERT INTO t1(t1) VALUES('optimize') }

do_execsql_test 7.1.1 { SELECT rowid FROM t1('b*') } {1 2 3}
do_execsql_test 7.1.2 { SELECT rowid FROM t1('a1') } {1 3}
do_execsql_test 7.1.3 { SELECT rowid FROM t1('c2') } {2}

do_execsql_test 7.2.1 { SELECT rowid FROM t1('b*') ORDER BY rowid DESC} {3 2 1}
do_execsql_test 7.2.2 { SELECT rowid FROM t1('a1') ORDER BY rowid DESC} {3 1}
do_execsql_test 7.2.3 { SELECT rowid FROM t1('c2') ORDER BY rowid DESC} {2}

finish_test