Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Fix some problems with fts5 detail=none tables. Some still remain. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts5-offsets |
Files: | files | file ages | folders |
SHA1: |
6a6f7bc40d6b4c8a4a254a9098c9d2f3 |
User & Date: | dan 2015-12-31 17:36:58.906 |
Context
2015-12-31
| ||
18:39 | Fix the fts5 integrity-check code so that it works with detail=none tables. (check-in: 3a1df69e58 user: dan tags: fts5-offsets) | |
17:36 | Fix some problems with fts5 detail=none tables. Some still remain. (check-in: 6a6f7bc40d user: dan tags: fts5-offsets) | |
2015-12-30
| ||
19:58 | Updates to fts5 to support detail=none mode. As of this commit, many cases are still broken. (check-in: ac8f4cf0ce user: dan tags: fts5-offsets) | |
Changes
Changes to ext/fts5/fts5_index.c.
︙ | ︙ | |||
1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 | ** This function advances the iterator so that it points to the last ** relevant rowid on the page and, if necessary, initializes the ** aRowidOffset[] and iRowidOffset variables. At this point the iterator ** is in its regular state - Fts5SegIter.iLeafOffset points to the first ** byte of the position list content associated with said rowid. */ static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){ int n = pIter->pLeaf->szLeaf; int i = pIter->iLeafOffset; u8 *a = pIter->pLeaf->p; int iRowidOffset = 0; if( n>pIter->iEndofDoclist ){ n = pIter->iEndofDoclist; } ASSERT_SZLEAF_OK(pIter->pLeaf); while( 1 ){ i64 iDelta = 0; int nPos; int bDummy; | > > > > > | | > > | 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 | ** This function advances the iterator so that it points to the last ** relevant rowid on the page and, if necessary, initializes the ** aRowidOffset[] and iRowidOffset variables. At this point the iterator ** is in its regular state - Fts5SegIter.iLeafOffset points to the first ** byte of the position list content associated with said rowid. */ static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){ int eDetail = p->pConfig->eDetail; int n = pIter->pLeaf->szLeaf; int i = pIter->iLeafOffset; u8 *a = pIter->pLeaf->p; int iRowidOffset = 0; if( n>pIter->iEndofDoclist ){ n = pIter->iEndofDoclist; } ASSERT_SZLEAF_OK(pIter->pLeaf); while( 1 ){ i64 iDelta = 0; int nPos; int bDummy; if( eDetail==FTS5_DETAIL_NONE ){ /* todo */ }else{ i += fts5GetPoslistSize(&a[i], &nPos, &bDummy); i += nPos; } if( i>=n ) break; i += fts5GetVarint(&a[i], (u64*)&iDelta); pIter->iRowid += iDelta; /* If necessary, grow the pIter->aRowidOffset[] array. */ if( iRowidOffset>=pIter->nRowidOffset ){ int nNew = pIter->nRowidOffset + 8; int *aNew = (int*)sqlite3_realloc(pIter->aRowidOffset, nNew*sizeof(int)); if( aNew==0 ){ p->rc = SQLITE_NOMEM; break; } |
︙ | ︙ | |||
1750 1751 1752 1753 1754 1755 1756 | int iOff; int nPos; int bDummy; i64 iDelta; pIter->iRowidOffset--; pIter->iLeafOffset = iOff = pIter->aRowidOffset[pIter->iRowidOffset]; | > | | > | 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 | int iOff; int nPos; int bDummy; i64 iDelta; pIter->iRowidOffset--; pIter->iLeafOffset = iOff = pIter->aRowidOffset[pIter->iRowidOffset]; if( p->pConfig->eDetail!=FTS5_DETAIL_NONE ){ iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDummy); iOff += nPos; } fts5GetVarint(&a[iOff], (u64*)&iDelta); pIter->iRowid -= iDelta; fts5SegIterLoadNPos(p, pIter); }else{ fts5SegIterReverseNewPage(p, pIter); } }else{ |
︙ | ︙ | |||
1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 | /* ** Iterator pIter currently points to the first rowid in a doclist. This ** function sets the iterator up so that iterates in reverse order through ** the doclist. */ static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){ Fts5DlidxIter *pDlidx = pIter->pDlidx; Fts5Data *pLast = 0; int pgnoLast = 0; if( pDlidx ){ int iSegid = pIter->pSeg->iSegid; pgnoLast = fts5DlidxIterPgno(pDlidx); pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast)); }else{ Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ /* Currently, Fts5SegIter.iLeafOffset points to the first byte of ** position-list content for the current rowid. Back it up so that it ** points to the start of the position-list size field. */ | > > | > | 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 | /* ** Iterator pIter currently points to the first rowid in a doclist. This ** function sets the iterator up so that iterates in reverse order through ** the doclist. */ static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){ int eDetail = p->pConfig->eDetail; Fts5DlidxIter *pDlidx = pIter->pDlidx; Fts5Data *pLast = 0; int pgnoLast = 0; if( pDlidx ){ int iSegid = pIter->pSeg->iSegid; pgnoLast = fts5DlidxIterPgno(pDlidx); pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast)); }else{ Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ /* Currently, Fts5SegIter.iLeafOffset points to the first byte of ** position-list content for the current rowid. Back it up so that it ** points to the start of the position-list size field. */ if( eDetail!=FTS5_DETAIL_NONE ){ pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel); } /* If this condition is true then the largest rowid for the current ** term may not be stored on the current page. So search forward to ** see where said rowid really is. */ if( pIter->iEndofDoclist>=pLeaf->szLeaf ){ int pgno; Fts5StructureSegment *pSeg = pIter->pSeg; |
︙ | ︙ | |||
2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 | int nRem = pSeg->nPos; /* Number of bytes still to come */ Fts5Data *pData = 0; u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset]; int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset); int pgno = pSeg->iLeafPgno; int pgnoSave = 0; if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){ pgnoSave = pgno+1; } while( 1 ){ xChunk(p, pCtx, pChunk, nChunk); nRem -= nChunk; | > > > | 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 | int nRem = pSeg->nPos; /* Number of bytes still to come */ Fts5Data *pData = 0; u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset]; int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset); int pgno = pSeg->iLeafPgno; int pgnoSave = 0; /* This function does notmwork with detail=none databases. */ assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE ); if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){ pgnoSave = pgno+1; } while( 1 ){ xChunk(p, pCtx, pChunk, nChunk); nRem -= nChunk; |
︙ | ︙ | |||
3314 3315 3316 3317 3318 3319 3320 | /* ** Append a rowid and position-list size field to the writers output. */ static void fts5WriteAppendRowid( Fts5Index *p, Fts5SegWriter *pWriter, | | < | 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 | /* ** Append a rowid and position-list size field to the writers output. */ static void fts5WriteAppendRowid( Fts5Index *p, Fts5SegWriter *pWriter, i64 iRowid ){ if( p->rc==SQLITE_OK ){ Fts5PageWriter *pPage = &pWriter->writer; if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){ fts5WriteFlushLeaf(p, pWriter); } |
︙ | ︙ | |||
3342 3343 3344 3345 3346 3347 3348 | }else{ assert( p->rc || iRowid>pWriter->iPrevRowid ); fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid - pWriter->iPrevRowid); } pWriter->iPrevRowid = iRowid; pWriter->bFirstRowidInDoclist = 0; pWriter->bFirstRowidInPage = 0; | < < | 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 | }else{ assert( p->rc || iRowid>pWriter->iPrevRowid ); fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid - pWriter->iPrevRowid); } pWriter->iPrevRowid = iRowid; pWriter->bFirstRowidInDoclist = 0; pWriter->bFirstRowidInPage = 0; } } static void fts5WriteAppendPoslistData( Fts5Index *p, Fts5SegWriter *pWriter, const u8 *aData, |
︙ | ︙ | |||
3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 | Fts5IndexIter *pIter = 0; /* Iterator to read input data */ int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */ int nInput; /* Number of input segments */ Fts5SegWriter writer; /* Writer object */ Fts5StructureSegment *pSeg; /* Output segment */ Fts5Buffer term; int bOldest; /* True if the output segment is the oldest */ assert( iLvl<pStruct->nLevel ); assert( pLvl->nMerge<=pLvl->nSeg ); memset(&writer, 0, sizeof(Fts5SegWriter)); memset(&term, 0, sizeof(Fts5Buffer)); if( pLvl->nMerge ){ | > | 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 | Fts5IndexIter *pIter = 0; /* Iterator to read input data */ int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */ int nInput; /* Number of input segments */ Fts5SegWriter writer; /* Writer object */ Fts5StructureSegment *pSeg; /* Output segment */ Fts5Buffer term; int bOldest; /* True if the output segment is the oldest */ int eDetail = p->pConfig->eDetail; assert( iLvl<pStruct->nLevel ); assert( pLvl->nMerge<=pLvl->nSeg ); memset(&writer, 0, sizeof(Fts5SegWriter)); memset(&term, 0, sizeof(Fts5Buffer)); if( pLvl->nMerge ){ |
︙ | ︙ | |||
3608 3609 3610 3611 3612 3613 3614 | /* This is a new term. Append a term to the output segment. */ fts5WriteAppendTerm(p, &writer, nTerm, pTerm); fts5BufferSet(&p->rc, &term, nTerm, pTerm); } /* Append the rowid to the output */ /* WRITEPOSLISTSIZE */ | < | > > > > > > > > | > > | > | 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 | /* This is a new term. Append a term to the output segment. */ fts5WriteAppendTerm(p, &writer, nTerm, pTerm); fts5BufferSet(&p->rc, &term, nTerm, pTerm); } /* Append the rowid to the output */ /* WRITEPOSLISTSIZE */ fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter)); if( eDetail==FTS5_DETAIL_NONE ){ if( pSegIter->bDel ){ fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0); if( pSegIter->nPos>0 ){ fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0); } } }else{ /* Append the position-list data to the output */ nPos = pSegIter->nPos*2 + pSegIter->bDel; fts5BufferAppendVarint(&p->rc, &writer.writer.buf, nPos); fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback); } } /* Flush the last leaf page to disk. Set the output segment b-tree height ** and last leaf page number at the same time. */ fts5WriteFinish(p, &writer, &pSeg->pgnoLast); if( fts5MultiIterEof(p, pIter) ){ |
︙ | ︙ | |||
4378 4379 4380 4381 4382 4383 4384 | Fts5Buffer out; memset(&out, 0, sizeof(out)); sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n); if( p->rc ) return; fts5NextRowid(p1, &i1, &iRowid1); | | | 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 | Fts5Buffer out; memset(&out, 0, sizeof(out)); sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n); if( p->rc ) return; fts5NextRowid(p1, &i1, &iRowid1); fts5NextRowid(p2, &i2, &iRowid2); while( i1>=0 || i2>=0 ){ if( i1>=0 && (i2<0 || iRowid1<iRowid2) ){ fts5BufferSafeAppendVarint(&out, iRowid1 - iOut); iOut = iRowid1; fts5NextRowid(p1, &i1, &iRowid1); }else{ fts5BufferSafeAppendVarint(&out, iRowid2 - iOut); |
︙ | ︙ |
Changes to ext/fts5/test/fts5simple2.test.
︙ | ︙ | |||
61 62 63 64 65 66 67 | CREATE VIRTUAL TABLE t1 USING fts5(a, detail=none); BEGIN; INSERT INTO t1 VALUES('a1 b1 c1'); INSERT INTO t1 VALUES('a2 b2 c2'); INSERT INTO t1 VALUES('a3 b3 c3'); COMMIT; } | < > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 | CREATE VIRTUAL TABLE t1 USING fts5(a, detail=none); BEGIN; INSERT INTO t1 VALUES('a1 b1 c1'); INSERT INTO t1 VALUES('a2 b2 c2'); INSERT INTO t1 VALUES('a3 b3 c3'); COMMIT; } do_execsql_test 4.1 { SELECT rowid FROM t1('b*'); } {1 2 3} #------------------------------------------------------------------------- # reset_db do_execsql_test 5.0 { CREATE VIRTUAL TABLE t1 USING fts5(a, detail=none); BEGIN; INSERT INTO t1 VALUES('a1 b1 c1'); INSERT INTO t1 VALUES('a2 b2 c2'); INSERT INTO t1 VALUES('a1 b1 c1'); COMMIT; } do_execsql_test 5.1 { SELECT rowid FROM t1('b*') } {1 2 3} #------------------------------------------------------------------------- # reset_db do_execsql_test 6.0 { CREATE VIRTUAL TABLE t1 USING fts5(a, detail=full); BEGIN; INSERT INTO t1 VALUES('a1 b1 c1'); INSERT INTO t1 VALUES('a1 b1 c1'); INSERT INTO t1 VALUES('a1 b1 c1'); COMMIT; } do_execsql_test 6.1 { SELECT rowid FROM t1('a1') ORDER BY rowid DESC } {3 2 1} do_execsql_test 6.2 { SELECT rowid FROM t1('b1') ORDER BY rowid DESC } {3 2 1} do_execsql_test 6.3 { SELECT rowid FROM t1('c1') ORDER BY rowid DESC } {3 2 1} #------------------------------------------------------------------------- # reset_db do_execsql_test 7.0 { CREATE VIRTUAL TABLE t1 USING fts5(a, detail=none); BEGIN; INSERT INTO t1 VALUES('a1 b1'); INSERT INTO t1 VALUES('a1 b2'); COMMIT; } do_execsql_test 7.0.4 { SELECT rowid FROM t1('b*') ORDER BY rowid DESC } {2 1} do_execsql_test 7.0.5 { SELECT rowid FROM t1('a1') ORDER BY rowid DESC } {2 1} #------------------------------------------------------------------------- # reset_db do_execsql_test 7.0 { CREATE VIRTUAL TABLE t1 USING fts5(a, detail=none); INSERT INTO t1 VALUES('a1 b1 c1'); INSERT INTO t1 VALUES('a2 b2 c2'); INSERT INTO t1 VALUES('a1 b1 c1'); } do_execsql_test 7.0.1 { SELECT rowid FROM t1('b*') } {1 2 3} do_execsql_test 7.0.2 { SELECT rowid FROM t1('a1') } {1 3} do_execsql_test 7.0.3 { SELECT rowid FROM t1('c2') } {2} do_execsql_test 7.0.4 { SELECT rowid FROM t1('b*') ORDER BY rowid DESC } {3 2 1} do_execsql_test 7.0.5 { SELECT rowid FROM t1('a1') ORDER BY rowid DESC } {3 1} do_execsql_test 7.0.7 { SELECT rowid FROM t1('c2') ORDER BY rowid DESC } {2} do_execsql_test 7.1.0 { INSERT INTO t1(t1) VALUES('optimize') } do_execsql_test 7.1.1 { SELECT rowid FROM t1('b*') } {1 2 3} do_execsql_test 7.1.2 { SELECT rowid FROM t1('a1') } {1 3} do_execsql_test 7.1.3 { SELECT rowid FROM t1('c2') } {2} do_execsql_test 7.2.1 { SELECT rowid FROM t1('b*') ORDER BY rowid DESC} {3 2 1} do_execsql_test 7.2.2 { SELECT rowid FROM t1('a1') ORDER BY rowid DESC} {3 1} do_execsql_test 7.2.3 { SELECT rowid FROM t1('c2') ORDER BY rowid DESC} {2} finish_test |