Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Change the array of 16-bit offsets at the end of each page to an array of varints. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts5-incompatible |
Files: | files | file ages | folders |
SHA1: |
fab245bea4f283714c17bca22428d5eb |
User & Date: | dan 2015-09-10 05:40:17.756 |
Context
2015-09-10
| ||
10:01 | Fix an fts5 problem that could occur if a term and the first associated rowid are on different leaf pages. (check-in: ffe2796ac9 user: dan tags: fts5-incompatible) | |
05:40 | Change the array of 16-bit offsets at the end of each page to an array of varints. (check-in: fab245bea4 user: dan tags: fts5-incompatible) | |
2015-09-09
| ||
08:15 | Fix a bug in preprocessor macros within fts5_main.c. (check-in: 0eb2b9521f user: dan tags: fts5-incompatible) | |
Changes
Changes to ext/fts5/fts5_index.c.
︙ | ︙ | |||
374 375 376 377 378 379 380 381 382 383 384 385 386 387 | }; /* ** An object of type Fts5SegWriter is used to write to segments. */ struct Fts5PageWriter { int pgno; /* Page number for this page */ Fts5Buffer buf; /* Buffer containing leaf data */ Fts5Buffer pgidx; /* Buffer containing page-index */ Fts5Buffer term; /* Buffer containing previous term on page */ }; struct Fts5DlidxWriter { int pgno; /* Page number for this page */ int bPrevValid; /* True if iPrev is valid */ | > | 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 | }; /* ** An object of type Fts5SegWriter is used to write to segments. */ struct Fts5PageWriter { int pgno; /* Page number for this page */ int iPrevPgidx; /* Previous value written into pgidx */ Fts5Buffer buf; /* Buffer containing leaf data */ Fts5Buffer pgidx; /* Buffer containing page-index */ Fts5Buffer term; /* Buffer containing previous term on page */ }; struct Fts5DlidxWriter { int pgno; /* Page number for this page */ int bPrevValid; /* True if iPrev is valid */ |
︙ | ︙ | |||
488 489 490 491 492 493 494 | int iLeafOffset; /* Byte offset within current leaf */ /* The page and offset from which the current term was read. The offset ** is the offset of the first rowid in the current doclist. */ int iTermLeafPgno; int iTermLeafOffset; | | | 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 | int iLeafOffset; /* Byte offset within current leaf */ /* The page and offset from which the current term was read. The offset ** is the offset of the first rowid in the current doclist. */ int iTermLeafPgno; int iTermLeafOffset; int iPgidxOff; /* Next offset in pgidx */ int iEndofDoclist; /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */ int iRowidOffset; /* Current entry in aRowidOffset[] */ int nRowidOffset; /* Allocated size of aRowidOffset[] array */ int *aRowidOffset; /* Array of offset to rowid fields */ |
︙ | ︙ | |||
528 529 530 531 532 533 534 | */ #define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn) #define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2])) #define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p)) | < < | 529 530 531 532 533 534 535 536 537 538 539 540 541 542 | */ #define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn) #define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2])) #define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p)) /* ** poslist: ** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered. ** There is no way to tell if this is populated or not. */ struct Fts5IndexIter { Fts5Index *pIndex; /* Index that owns this iterator */ |
︙ | ︙ | |||
644 645 646 647 648 649 650 651 652 653 654 655 656 657 | ){ int nCmp = MIN(nLeft, nRight); int res = memcmp(pLeft, pRight, nCmp); return (res==0 ? (nLeft - nRight) : res); } #endif /* ** Close the read-only blob handle, if it is open. */ static void fts5CloseReader(Fts5Index *p){ if( p->pReader ){ sqlite3_blob *pReader = p->pReader; | > > > > > | 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 | ){ int nCmp = MIN(nLeft, nRight); int res = memcmp(pLeft, pRight, nCmp); return (res==0 ? (nLeft - nRight) : res); } #endif static int fts5LeafFirstTermOff(Fts5Data *pLeaf){ int ret; fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret); return ret; } /* ** Close the read-only blob handle, if it is open. */ static void fts5CloseReader(Fts5Index *p){ if( p->pReader ){ sqlite3_blob *pReader = p->pReader; |
︙ | ︙ | |||
1519 1520 1521 1522 1523 1524 1525 | }else{ const u8 *a = &pIter->pLeaf->p[iOff]; pIter->iLeafOffset += fts5GetPoslistSize(a, &pIter->nPos, &pIter->bDel); } } } | < < < < < < < < < < < < < < | 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 | }else{ const u8 *a = &pIter->pLeaf->p[iOff]; pIter->iLeafOffset += fts5GetPoslistSize(a, &pIter->nPos, &pIter->bDel); } } } static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){ u8 *a = pIter->pLeaf->p; /* Buffer to read data from */ int iOff = pIter->iLeafOffset; ASSERT_SZLEAF_OK(pIter->pLeaf); if( iOff>=pIter->pLeaf->szLeaf ){ fts5SegIterNextPage(p, pIter); |
︙ | ︙ | |||
1579 1580 1581 1582 1583 1584 1585 | pIter->term.n = nKeep; fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]); iOff += nNew; pIter->iTermLeafOffset = iOff; pIter->iTermLeafPgno = pIter->iLeafPgno; pIter->iLeafOffset = iOff; | > > > > > > > | | 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 | pIter->term.n = nKeep; fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]); iOff += nNew; pIter->iTermLeafOffset = iOff; pIter->iTermLeafPgno = pIter->iLeafPgno; pIter->iLeafOffset = iOff; if( pIter->iPgidxOff>=pIter->pLeaf->nn ){ pIter->iEndofDoclist = pIter->pLeaf->nn+1; }else{ int nExtra; pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra); pIter->iEndofDoclist += nExtra; } fts5SegIterLoadRowid(p, pIter); } /* ** Initialize the iterator object pIter to iterate through the entries in ** segment pSeg. The iterator is left pointing to the first entry when ** this function returns. |
︙ | ︙ | |||
1617 1618 1619 1620 1621 1622 1623 | fts5SegIterNextPage(p, pIter); } if( p->rc==SQLITE_OK ){ pIter->iLeafOffset = 4; assert_nc( pIter->pLeaf->nn>4 ); assert( fts5LeafFirstTermOff(pIter->pLeaf)==4 ); | | | 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 | fts5SegIterNextPage(p, pIter); } if( p->rc==SQLITE_OK ){ pIter->iLeafOffset = 4; assert_nc( pIter->pLeaf->nn>4 ); assert( fts5LeafFirstTermOff(pIter->pLeaf)==4 ); pIter->iPgidxOff = pIter->pLeaf->szLeaf+1; fts5SegIterLoadTerm(p, pIter, 0); fts5SegIterLoadNPos(p, pIter); } } /* ** This function is only ever called on iterators created by calls to |
︙ | ︙ | |||
1785 1786 1787 1788 1789 1790 1791 | iOff = pIter->iLeafOffset + pIter->nPos; if( iOff<n ){ /* The next entry is on the current page. */ assert_nc( iOff<=pIter->iEndofDoclist ); if( iOff>=pIter->iEndofDoclist ){ bNewTerm = 1; | < < < < < | 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 | iOff = pIter->iLeafOffset + pIter->nPos; if( iOff<n ){ /* The next entry is on the current page. */ assert_nc( iOff<=pIter->iEndofDoclist ); if( iOff>=pIter->iEndofDoclist ){ bNewTerm = 1; if( iOff!=fts5LeafFirstTermOff(pLeaf) ){ iOff += fts5GetVarint32(&a[iOff], nKeep); } }else{ u64 iDelta; iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta); pIter->iRowid += iDelta; |
︙ | ︙ | |||
1831 1832 1833 1834 1835 1836 1837 | fts5SegIterNextPage(p, pIter); pLeaf = pIter->pLeaf; if( pLeaf==0 ) break; ASSERT_SZLEAF_OK(pLeaf); if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){ iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid); pIter->iLeafOffset = iOff; | | | > > > > | > > > > > > < > | 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 | fts5SegIterNextPage(p, pIter); pLeaf = pIter->pLeaf; if( pLeaf==0 ) break; ASSERT_SZLEAF_OK(pLeaf); if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){ iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid); pIter->iLeafOffset = iOff; if( pLeaf->nn>pLeaf->szLeaf ){ pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32( &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist ); } } else if( pLeaf->nn>pLeaf->szLeaf ){ pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32( &pLeaf->p[pLeaf->szLeaf], iOff ); pIter->iLeafOffset = iOff; pIter->iEndofDoclist = iOff; bNewTerm = 1; } if( iOff>=pLeaf->szLeaf ){ p->rc = FTS5_CORRUPT; return; } } } /* Check if the iterator is now at EOF. If so, return early. */ if( pIter->pLeaf ){ if( bNewTerm ){ if( pIter->flags & FTS5_SEGITER_ONETERM ){ fts5DataRelease(pIter->pLeaf); pIter->pLeaf = 0; }else{ int nExtra; fts5SegIterLoadTerm(p, pIter, nKeep); fts5SegIterLoadNPos(p, pIter); if( pbNewTerm ) *pbNewTerm = 1; } }else{ fts5SegIterLoadNPos(p, pIter); } |
︙ | ︙ | |||
1937 1938 1939 1940 1941 1942 1943 | iOff = fts5LeafFirstRowidOff(pLast); iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid); pIter->iLeafOffset = iOff; if( fts5LeafIsTermless(pLast) ){ pIter->iEndofDoclist = pLast->nn+1; }else{ | | | 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 | iOff = fts5LeafFirstRowidOff(pLast); iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid); pIter->iLeafOffset = iOff; if( fts5LeafIsTermless(pLast) ){ pIter->iEndofDoclist = pLast->nn+1; }else{ pIter->iEndofDoclist = fts5LeafFirstTermOff(pLast); } } fts5SegIterReverseInitPage(p, pIter); } |
︙ | ︙ | |||
2006 2007 2008 2009 2010 2011 2012 | Fts5Index *p, /* Leave any error code here */ int bGe, /* True for a >= search */ Fts5SegIter *pIter, /* Iterator to seek */ const u8 *pTerm, int nTerm /* Term to search for */ ){ int iOff; const u8 *a = pIter->pLeaf->p; | > | | > > < > | | < < | < < < < > > < < | | | > > > | | < > > > > > > > > | | 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 | Fts5Index *p, /* Leave any error code here */ int bGe, /* True for a >= search */ Fts5SegIter *pIter, /* Iterator to seek */ const u8 *pTerm, int nTerm /* Term to search for */ ){ int iOff; const u8 *a = pIter->pLeaf->p; int szLeaf = pIter->pLeaf->szLeaf; int n = pIter->pLeaf->nn; int nMatch = 0; int nKeep = 0; int nNew = 0; int iTerm = 0; int iTermOff; int iPgidx; /* Current offset in pgidx */ int bEndOfPage = 0; assert( p->rc==SQLITE_OK ); iPgidx = szLeaf; iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff); iOff = iTermOff; while( 1 ){ /* Figure out how many new bytes are in this term */ fts5IndexGetVarint32(a, iOff, nNew); if( nKeep<nMatch ){ goto search_failed; } assert( nKeep>=nMatch ); if( nKeep==nMatch ){ int nCmp; int i; nCmp = MIN(nNew, nTerm-nMatch); for(i=0; i<nCmp; i++){ if( a[iOff+i]!=pTerm[nMatch+i] ) break; } nMatch += i; if( nTerm==nMatch ){ if( i==nNew ){ goto search_success; }else{ goto search_failed; } }else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){ goto search_failed; } } if( iPgidx>=n ){ bEndOfPage = 1; break; } iPgidx += fts5GetVarint32(&a[iPgidx], nKeep); iTermOff += nKeep; iOff = iTermOff; /* Read the nKeep field of the next term. */ fts5IndexGetVarint32(a, iOff, nKeep); } search_failed: if( bGe==0 ){ fts5DataRelease(pIter->pLeaf); pIter->pLeaf = 0; return; }else if( bEndOfPage ){ do { iTerm = 0; fts5SegIterNextPage(p, pIter); if( pIter->pLeaf==0 ) return; a = pIter->pLeaf->p; if( fts5LeafIsTermless(pIter->pLeaf)==0 ){ fts5GetVarint32(&pIter->pLeaf->p[pIter->pLeaf->szLeaf], iOff); if( iOff<4 || iOff>=pIter->pLeaf->szLeaf ){ p->rc = FTS5_CORRUPT; }else{ nKeep = 0; iOff += fts5GetVarint32(&a[iOff], nNew); break; } } }while( 1 ); } search_success: pIter->iLeafOffset = iOff + nNew; pIter->iTermLeafOffset = pIter->iLeafOffset; pIter->iTermLeafPgno = pIter->iLeafPgno; fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm); fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]); if( iPgidx>=n ){ pIter->iEndofDoclist = pIter->pLeaf->nn+1; }else{ int nExtra; iPgidx += fts5GetVarint32(&a[iPgidx], nExtra); pIter->iEndofDoclist = iTermOff + nExtra; } pIter->iPgidxOff = iPgidx; fts5SegIterLoadRowid(p, pIter); fts5SegIterLoadNPos(p, pIter); } /* ** Initialize the object pIter to point to term pTerm/nTerm within segment ** pSeg. If there is no such term in the index, the iterator is set to EOF. |
︙ | ︙ | |||
3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 | iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, 0, pPage->pgno); fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n); /* Initialize the next page. */ fts5BufferZero(&pPage->buf); fts5BufferZero(&pPage->pgidx); fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero); pPage->pgno++; /* Increase the leaves written counter */ pWriter->nLeafWritten++; /* The new leaf holds no terms or rowids */ pWriter->bFirstTermInPage = 1; | > | 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 | iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, 0, pPage->pgno); fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n); /* Initialize the next page. */ fts5BufferZero(&pPage->buf); fts5BufferZero(&pPage->pgidx); fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero); pPage->iPrevPgidx = 0; pPage->pgno++; /* Increase the leaves written counter */ pWriter->nLeafWritten++; /* The new leaf holds no terms or rowids */ pWriter->bFirstTermInPage = 1; |
︙ | ︙ | |||
3200 3201 3202 3203 3204 3205 3206 | Fts5Buffer *pPgidx = &pWriter->writer.pgidx; if( p->rc ) return; assert( pPage->buf.n>=4 ); assert( pPage->buf.n>4 || pWriter->bFirstTermInPage ); /* If the current leaf page is full, flush it to disk. */ | | > > > > > > | 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 | Fts5Buffer *pPgidx = &pWriter->writer.pgidx; if( p->rc ) return; assert( pPage->buf.n>=4 ); assert( pPage->buf.n>4 || pWriter->bFirstTermInPage ); /* If the current leaf page is full, flush it to disk. */ if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){ if( pPage->buf.n>4 ){ fts5WriteFlushLeaf(p, pWriter); } fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING); } /* TODO1: Updating pgidx here. */ pPgidx->n += sqlite3Fts5PutVarint( &pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx ); pPage->iPrevPgidx = pPage->buf.n; #if 0 fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n); pPgidx->n += 2; #endif if( pWriter->bFirstTermInPage ){ nPrefix = 0; if( pPage->pgno!=1 ){ /* This is the first term on a leaf that is not the leftmost leaf in ** the segment b-tree. In this case it is necessary to add a term to ** the b-tree hierarchy that is (a) larger than the largest term |
︙ | ︙ | |||
3404 3405 3406 3407 3408 3409 3410 | /* Bind the current output segment id to the index-writer. This is an ** optimization over binding the same value over and over as rows are ** inserted into %_idx by the current writer. */ sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); } } | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 | /* Bind the current output segment id to the index-writer. This is an ** optimization over binding the same value over and over as rows are ** inserted into %_idx by the current writer. */ sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); } } /* ** Iterator pIter was used to iterate through the input segments of on an ** incremental merge operation. This function is called if the incremental ** merge step has finished but the input has not been completely exhausted. */ static void fts5TrimSegments(Fts5Index *p, Fts5IndexIter *pIter){ int i; |
︙ | ︙ | |||
3491 3492 3493 3494 3495 3496 3497 | fts5BufferAppendBlob(&p->rc, &buf, pData->szLeaf-iOff, &pData->p[iOff]); if( p->rc==SQLITE_OK ){ /* Set the szLeaf field */ fts5PutU16(&buf.p[2], buf.n); } /* Set up the new page-index array */ | > > | > > > > > > > | 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 | fts5BufferAppendBlob(&p->rc, &buf, pData->szLeaf-iOff, &pData->p[iOff]); if( p->rc==SQLITE_OK ){ /* Set the szLeaf field */ fts5PutU16(&buf.p[2], buf.n); } /* Set up the new page-index array */ fts5BufferAppendVarint(&p->rc, &buf, 4); if( pSeg->iLeafPgno==pSeg->iTermLeafPgno && pSeg->iEndofDoclist<pData->szLeaf ){ int nDiff = pData->szLeaf - pSeg->iEndofDoclist; fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4); fts5BufferAppendBlob(&p->rc, &buf, pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff] ); } fts5DataRelease(pData); pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno; fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 0, 1), iLeafRowid); fts5DataWrite(p, iLeafRowid, buf.p, buf.n); } } |
︙ | ︙ | |||
4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 | fts5DataRelease(pLeaf); if( p->rc ) break; } } static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){ int nPg = (pLeaf->nn - pLeaf->szLeaf) / 2; int ii; Fts5Buffer buf1 = {0,0,0}; Fts5Buffer buf2 = {0,0,0}; | > > > | | > > > > > > | | 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 | fts5DataRelease(pLeaf); if( p->rc ) break; } } static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){ int nPg = (pLeaf->nn - pLeaf->szLeaf) / 2; int iTermOff = 0; int ii; Fts5Buffer buf1 = {0,0,0}; Fts5Buffer buf2 = {0,0,0}; ii = pLeaf->szLeaf; while( ii<pLeaf->nn && p->rc==SQLITE_OK ){ int res; int iOff; int nIncr; ii += fts5GetVarint32(&pLeaf->p[ii], nIncr); iTermOff += nIncr; iOff = iTermOff; if( iOff>=pLeaf->szLeaf ){ p->rc = FTS5_CORRUPT; }else if( iTermOff==nIncr ){ int nByte; iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte); if( (iOff+nByte)>pLeaf->szLeaf ){ p->rc = FTS5_CORRUPT; }else{ fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff]); } |
︙ | ︙ | |||
5422 5423 5424 5425 5426 5427 5428 | }else if( iSegid==0 ){ if( iRowid==FTS5_AVERAGES_ROWID ){ /* todo */ }else{ fts5DecodeStructure(&rc, &s, a, n); } }else{ | | > > > < < < | < | | > > > > | > | > | > > > > > > | | > | 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 | }else if( iSegid==0 ){ if( iRowid==FTS5_AVERAGES_ROWID ){ /* todo */ }else{ fts5DecodeStructure(&rc, &s, a, n); } }else{ Fts5Buffer term; /* Current term read from page */ int szLeaf; /* Offset of pgidx in a[] */ int iPgidxOff; int iPgidxPrev = 0; /* Previous value read from pgidx */ int iTermOff = 0; int iRowidOff = 0; int iOff; int nDoclist; memset(&term, 0, sizeof(Fts5Buffer)); if( n<4 ){ sqlite3Fts5BufferSet(&rc, &s, 8, (const u8*)"corrupt"); goto decode_out; }else{ iRowidOff = fts5GetU16(&a[0]); iPgidxOff = szLeaf = fts5GetU16(&a[2]); if( iPgidxOff<n ){ fts5GetVarint32(&a[iPgidxOff], iTermOff); } } /* Decode the position list tail at the start of the page */ if( iRowidOff!=0 ){ iOff = iRowidOff; }else if( iTermOff!=0 ){ iOff = iTermOff; }else{ iOff = szLeaf; } fts5DecodePoslist(&rc, &s, &a[4], iOff-4); /* Decode any more doclist data that appears on the page before the ** first term. */ nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff; fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist); while( iPgidxOff<n ){ int bFirst = (iPgidxOff==szLeaf); /* True for first term on page */ int nByte; /* Bytes of data */ int iEnd; iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nByte); iPgidxPrev += nByte; iOff = iPgidxPrev; if( iPgidxOff<n ){ fts5GetVarint32(&a[iPgidxOff], nByte); iEnd = iPgidxPrev + nByte; }else{ iEnd = szLeaf; } if( bFirst==0 ){ iOff += fts5GetVarint32(&a[iOff], nByte); term.n = nByte; } iOff += fts5GetVarint32(&a[iOff], nByte); fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]); iOff += nByte; sqlite3Fts5BufferAppendPrintf( &rc, &s, " term=%.*s", term.n, (const char*)term.p ); iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff); } fts5BufferFree(&term); |
︙ | ︙ |
Changes to ext/fts5/test/fts5simple.test.
︙ | ︙ | |||
62 63 64 65 66 67 68 | SELECT * FROM t1 WHERE t1 MATCH 'o*'; } {one} do_execsql_test 3.1 { INSERT INTO t1(t1) VALUES('integrity-check'); } {} | < < | 62 63 64 65 66 67 68 69 70 71 72 73 74 75 | SELECT * FROM t1 WHERE t1 MATCH 'o*'; } {one} do_execsql_test 3.1 { INSERT INTO t1(t1) VALUES('integrity-check'); } {} #------------------------------------------------------------------------- reset_db do_execsql_test 4.1 { CREATE VIRTUAL TABLE t11 USING fts5(content); INSERT INTO t11(t11, rank) VALUES('pgsz', 32); INSERT INTO t11 VALUES('another'); INSERT INTO t11 VALUES('string'); |
︙ | ︙ | |||
110 111 112 113 114 115 116 117 118 119 | } {1 2 3 4 5 6 7 8} do_execsql_test 5.3 { SELECT rowid FROM yy WHERE yy MATCH 'y' ORDER BY rowid DESC } {8 7 6 5 4 3 2 1} #db eval { SELECT fts5_decode(rowid, block) as x FROM yy_data } { puts $x } finish_test | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 | } {1 2 3 4 5 6 7 8} do_execsql_test 5.3 { SELECT rowid FROM yy WHERE yy MATCH 'y' ORDER BY rowid DESC } {8 7 6 5 4 3 2 1} #db eval { SELECT fts5_decode(rowid, block) as x FROM yy_data } { puts $x } #------------------------------------------------------------------------- reset_db do_execsql_test 5.1 { CREATE VIRTUAL TABLE tt USING fts5(content); INSERT INTO tt(tt, rank) VALUES('pgsz', 32); INSERT INTO tt VALUES('aa'); } do_execsql_test 5.2 { SELECT rowid FROM tt WHERE tt MATCH 'a*'; } {1} do_execsql_test 5.3 { DELETE FROM tt; BEGIN; INSERT INTO tt VALUES('aa'); INSERT INTO tt VALUES('ab'); COMMIT; } {} do_execsql_test 5.4 { SELECT rowid FROM tt WHERE tt MATCH 'a*'; } {1 2} } do_execsql_test 5.5 { DELETE FROM tt; BEGIN; INSERT INTO tt VALUES('aa'); INSERT INTO tt VALUES('ab'); INSERT INTO tt VALUES('aa'); INSERT INTO tt VALUES('ab'); INSERT INTO tt VALUES('aa'); INSERT INTO tt VALUES('ab'); INSERT INTO tt VALUES('aa'); INSERT INTO tt VALUES('ab'); COMMIT; SELECT rowid FROM tt WHERE tt MATCH 'a*'; } {1 2 3 4 5 6 7 8} do_execsql_test 5.6 { INSERT INTO tt(tt) VALUES('integrity-check'); } reset_db do_execsql_test 5.7 { CREATE VIRTUAL TABLE tt USING fts5(content); INSERT INTO tt(tt, rank) VALUES('pgsz', 32); INSERT INTO tt VALUES('aa ab ac ad ae af'); } do_execsql_test 5.8 { SELECT rowid FROM tt WHERE tt MATCH 'a*'; } {1} finish_test |
Changes to ext/fts5/tool/loadfts5.tcl.
︙ | ︙ | |||
14 15 16 17 18 19 20 21 22 23 24 25 26 27 | foreach f [glob -nocomplain -dir $dir *] { if {$::O(limit) && $::nRow>=$::O(limit)} break if {[file isdir $f]} { load_hierachy $f } else { db eval { INSERT INTO t1 VALUES($f, loadfile($f)) } incr ::nRow if {($::nRow % $::nRowPerDot)==0} { puts -nonewline . if {($::nRow % (65*$::nRowPerDot))==0} { puts "" } flush stdout } | > > > > > > | 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 | foreach f [glob -nocomplain -dir $dir *] { if {$::O(limit) && $::nRow>=$::O(limit)} break if {[file isdir $f]} { load_hierachy $f } else { db eval { INSERT INTO t1 VALUES($f, loadfile($f)) } incr ::nRow if {$::O(trans) && ($::nRow % $::O(trans))==0} { db eval { COMMIT } db eval { INSERT INTO t1(t1) VALUES('integrity-check') } db eval { BEGIN } } if {($::nRow % $::nRowPerDot)==0} { puts -nonewline . if {($::nRow % (65*$::nRowPerDot))==0} { puts "" } flush stdout } |
︙ | ︙ | |||
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 | puts stderr " -fts5 (use fts5)" puts stderr " -porter (use porter tokenizer)" puts stderr " -delete (delete the database file before starting)" puts stderr " -limit N (load no more than N documents)" puts stderr " -automerge N (set the automerge parameter to N)" puts stderr " -crisismerge N (set the crisismerge parameter to N)" puts stderr " -prefix PREFIX (comma separated prefix= argument)" exit 1 } set O(vtab) fts5 set O(tok) "" set O(limit) 0 set O(delete) 0 set O(automerge) -1 set O(crisismerge) -1 set O(prefix) "" if {[llength $argv]<2} usage set nOpt [expr {[llength $argv]-2}] for {set i 0} {$i < $nOpt} {incr i} { set arg [lindex $argv $i] switch -- [lindex $argv $i] { -fts4 { | > > | 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 | puts stderr " -fts5 (use fts5)" puts stderr " -porter (use porter tokenizer)" puts stderr " -delete (delete the database file before starting)" puts stderr " -limit N (load no more than N documents)" puts stderr " -automerge N (set the automerge parameter to N)" puts stderr " -crisismerge N (set the crisismerge parameter to N)" puts stderr " -prefix PREFIX (comma separated prefix= argument)" puts stderr " -trans N (commit after N inserts - 0 == never)" exit 1 } set O(vtab) fts5 set O(tok) "" set O(limit) 0 set O(delete) 0 set O(automerge) -1 set O(crisismerge) -1 set O(prefix) "" set O(trans) 0 if {[llength $argv]<2} usage set nOpt [expr {[llength $argv]-2}] for {set i 0} {$i < $nOpt} {incr i} { set arg [lindex $argv $i] switch -- [lindex $argv $i] { -fts4 { |
︙ | ︙ | |||
73 74 75 76 77 78 79 80 81 82 83 84 85 86 | set O(delete) 1 } -limit { if { [incr i]>=$nOpt } usage set O(limit) [lindex $argv $i] } -automerge { if { [incr i]>=$nOpt } usage set O(automerge) [lindex $argv $i] } -crisismerge { | > > > > > | 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 | set O(delete) 1 } -limit { if { [incr i]>=$nOpt } usage set O(limit) [lindex $argv $i] } -trans { if { [incr i]>=$nOpt } usage set O(trans) [lindex $argv $i] } -automerge { if { [incr i]>=$nOpt } usage set O(automerge) [lindex $argv $i] } -crisismerge { |
︙ | ︙ | |||
102 103 104 105 106 107 108 | set dbfile [lindex $argv end-1] if {$O(delete)} { file delete -force $dbfile } sqlite3 db $dbfile catch { load_static_extension db fts5 } db func loadfile loadfile db eval "PRAGMA page_size=4096" | | < > | 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 | set dbfile [lindex $argv end-1] if {$O(delete)} { file delete -force $dbfile } sqlite3 db $dbfile catch { load_static_extension db fts5 } db func loadfile loadfile db eval "PRAGMA page_size=4096" db eval BEGIN set pref "" if {$O(prefix)!=""} { set pref ", prefix='$O(prefix)'" } catch { db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok)$pref)" db eval "INSERT INTO t1(t1, rank) VALUES('pgsz', 4050);" } if {$O(automerge)>=0} { if {$O(vtab) == "fts5"} { db eval { INSERT INTO t1(t1, rank) VALUES('automerge', $O(automerge)) } } else { db eval { INSERT INTO t1(t1) VALUES('automerge=' || $O(automerge)) } } } if {$O(crisismerge)>=0} { if {$O(vtab) == "fts5"} { db eval {INSERT INTO t1(t1, rank) VALUES('crisismerge', $O(crisismerge))} } else { } } load_hierachy [lindex $argv end] db eval COMMIT |