Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Fix an fts5 integrity-check problem that affects offsets=0 tables with prefix indexes. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts5-offsets |
Files: | files | file ages | folders |
SHA1: |
609a0bc7f34e6dae74ce756aff920f3d |
User & Date: | dan 2015-12-21 18:45:09.329 |
Context
2015-12-22
| ||
18:54 | Updates so that fts5 API functions xInst, xPhraseFirst and xPhraseNext work with the offsets=0 option. (check-in: 69bffc1632 user: dan tags: fts5-offsets) | |
2015-12-21
| ||
18:45 | Fix an fts5 integrity-check problem that affects offsets=0 tables with prefix indexes. (check-in: 609a0bc7f3 user: dan tags: fts5-offsets) | |
2015-12-18
| ||
19:07 | Fix a problem with prefix queries on fts5 offsets=0 tables. (check-in: ad0987d83c user: dan tags: fts5-offsets) | |
Changes
Changes to ext/fts5/fts5Int.h.
︙ | ︙ | |||
293 294 295 296 297 298 299 | /* Character set tests (like isspace(), isalpha() etc.) */ int sqlite3Fts5IsBareword(char t); /* Bucket of terms object used by the integrity-check in offsets=0 mode. */ typedef struct Fts5Termset Fts5Termset; int sqlite3Fts5TermsetNew(Fts5Termset**); | | | 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 | /* Character set tests (like isspace(), isalpha() etc.) */ int sqlite3Fts5IsBareword(char t); /* Bucket of terms object used by the integrity-check in offsets=0 mode. */ typedef struct Fts5Termset Fts5Termset; int sqlite3Fts5TermsetNew(Fts5Termset**); int sqlite3Fts5TermsetAdd(Fts5Termset*, int, const char*, int, int *pbPresent); void sqlite3Fts5TermsetFree(Fts5Termset*); /* ** End of interface to code in fts5_buffer.c. **************************************************************************/ /************************************************************************** |
︙ | ︙ |
Changes to ext/fts5/fts5_buffer.c.
︙ | ︙ | |||
294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 | /************************************************************************* */ typedef struct Fts5TermsetEntry Fts5TermsetEntry; struct Fts5TermsetEntry { char *pTerm; int nTerm; Fts5TermsetEntry *pNext; }; struct Fts5Termset { Fts5TermsetEntry *apHash[512]; }; int sqlite3Fts5TermsetNew(Fts5Termset **pp){ int rc = SQLITE_OK; *pp = sqlite3Fts5MallocZero(&rc, sizeof(Fts5Termset)); return rc; } int sqlite3Fts5TermsetAdd( Fts5Termset *p, const char *pTerm, int nTerm, int *pbPresent ){ int rc = SQLITE_OK; | > > > > | | | | > | | | | < | > > | > | | | | | | | | | > | | | > | 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 | /************************************************************************* */ typedef struct Fts5TermsetEntry Fts5TermsetEntry; struct Fts5TermsetEntry { char *pTerm; int nTerm; int iIdx; /* Index (main or aPrefix[] entry) */ Fts5TermsetEntry *pNext; }; struct Fts5Termset { Fts5TermsetEntry *apHash[512]; }; int sqlite3Fts5TermsetNew(Fts5Termset **pp){ int rc = SQLITE_OK; *pp = sqlite3Fts5MallocZero(&rc, sizeof(Fts5Termset)); return rc; } int sqlite3Fts5TermsetAdd( Fts5Termset *p, int iIdx, const char *pTerm, int nTerm, int *pbPresent ){ int rc = SQLITE_OK; *pbPresent = 0; if( p ){ int i; int hash; Fts5TermsetEntry *pEntry; /* Calculate a hash value for this term */ hash = 104 + iIdx; for(i=0; i<nTerm; i++){ hash += (hash << 3) + (int)pTerm[i]; } hash = hash % ArraySize(p->apHash); for(pEntry=p->apHash[hash]; pEntry; pEntry=pEntry->pNext){ if( pEntry->iIdx==iIdx && pEntry->nTerm==nTerm && memcmp(pEntry->pTerm, pTerm, nTerm)==0 ){ *pbPresent = 1; break; } } if( pEntry==0 ){ pEntry = sqlite3Fts5MallocZero(&rc, sizeof(Fts5TermsetEntry) + nTerm); if( pEntry ){ pEntry->pTerm = (char*)&pEntry[1]; pEntry->nTerm = nTerm; pEntry->iIdx = iIdx; memcpy(pEntry->pTerm, pTerm, nTerm); pEntry->pNext = p->apHash[hash]; p->apHash[hash] = pEntry; } } } return rc; } void sqlite3Fts5TermsetFree(Fts5Termset *p){ |
︙ | ︙ |
Changes to ext/fts5/fts5_index.c.
︙ | ︙ | |||
4369 4370 4371 4372 4373 4374 4375 | }else{ iNew = iPos2; sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2); if( iPos1==iPos2 ){ sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1,&iPos1); } } | > | > | 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 | }else{ iNew = iPos2; sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2); if( iPos1==iPos2 ){ sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1,&iPos1); } } if( iNew!=writer.iPrev || tmp.n==0 ){ p->rc = sqlite3Fts5PoslistWriterAppend(&tmp, &writer, iNew); } } /* WRITEPOSLISTSIZE */ fts5BufferSafeAppendVarint(&out, tmp.n * 2); fts5BufferSafeAppendBlob(&out, tmp.p, tmp.n); fts5DoclistIterNext(&i1); fts5DoclistIterNext(&i2); |
︙ | ︙ | |||
4604 4605 4606 4607 4608 4609 4610 | } /* ** Argument p points to a buffer containing utf-8 text that is n bytes in ** size. Return the number of bytes in the nChar character prefix of the ** buffer, or 0 if there are less than nChar characters in total. */ | | > > > > | 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 | } /* ** Argument p points to a buffer containing utf-8 text that is n bytes in ** size. Return the number of bytes in the nChar character prefix of the ** buffer, or 0 if there are less than nChar characters in total. */ static int sqlite3Fts5IndexCharlenToBytelen( const char *p, int nByte, int nChar ){ int n = 0; int i; for(i=0; i<nChar; i++){ if( n>=nByte ) return 0; /* Input contains fewer than nChar chars */ if( (unsigned char)p[n++]>=0xc0 ){ while( (p[n] & 0xc0)==0x80 ) n++; } |
︙ | ︙ | |||
4661 4662 4663 4664 4665 4666 4667 | /* Add the entry to the main terms index. */ rc = sqlite3Fts5HashWrite( p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX, pToken, nToken ); for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK; i++){ | > | | 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 | /* Add the entry to the main terms index. */ rc = sqlite3Fts5HashWrite( p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX, pToken, nToken ); for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK; i++){ const int nChar = pConfig->aPrefix[i]; int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar); if( nByte ){ rc = sqlite3Fts5HashWrite(p->pHash, p->iWriteRowid, iCol, iPos, (char)(FTS5_MAIN_PREFIX+i+1), pToken, nByte ); } } |
︙ | ︙ | |||
4979 4980 4981 4982 4983 4984 4985 | ** Below this point is the implementation of the integrity-check ** functionality. */ /* ** Return a simple checksum value based on the arguments. */ | | | 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 | ** Below this point is the implementation of the integrity-check ** functionality. */ /* ** Return a simple checksum value based on the arguments. */ u64 sqlite3Fts5IndexEntryCksum( i64 iRowid, int iCol, int iPos, int iIdx, const char *pTerm, int nTerm ){ |
︙ | ︙ | |||
5067 5068 5069 5070 5071 5072 5073 | Fts5PoslistReader sReader; for(sqlite3Fts5PoslistReaderInit(pPos, nPos, &sReader); sReader.bEof==0; sqlite3Fts5PoslistReaderNext(&sReader) ){ int iCol = FTS5_POS2COLUMN(sReader.iPos); int iOff = FTS5_POS2OFFSET(sReader.iPos); | | | 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 | Fts5PoslistReader sReader; for(sqlite3Fts5PoslistReaderInit(pPos, nPos, &sReader); sReader.bEof==0; sqlite3Fts5PoslistReaderNext(&sReader) ){ int iCol = FTS5_POS2COLUMN(sReader.iPos); int iOff = FTS5_POS2OFFSET(sReader.iPos); cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n); } rc = sqlite3Fts5IterNext(pIdxIter); } } sqlite3Fts5IterClose(pIdxIter); *pCksum = cksum; |
︙ | ︙ | |||
5366 5367 5368 5369 5370 5371 5372 | #endif } /* ** Run internal checks to ensure that the FTS index (a) is internally ** consistent and (b) contains entries for which the XOR of the checksums | | | 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 | #endif } /* ** Run internal checks to ensure that the FTS index (a) is internally ** consistent and (b) contains entries for which the XOR of the checksums ** as calculated by sqlite3Fts5IndexEntryCksum() is cksum. ** ** Return SQLITE_CORRUPT if any of the internal checks fail, or if the ** checksum does not match. Return SQLITE_OK if all checks pass without ** error, or some other SQLite error code if another error (e.g. OOM) ** occurs. */ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ |
︙ | ︙ | |||
5430 5431 5432 5433 5434 5435 5436 | fts5TestTerm(p, &term, z, n, cksum2, &cksum3); poslist.n = 0; fts5SegiterPoslist(p, &pIter->aSeg[pIter->aFirst[1].iFirst] , 0, &poslist); while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){ int iCol = FTS5_POS2COLUMN(iPos); int iTokOff = FTS5_POS2OFFSET(iPos); | | < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 | fts5TestTerm(p, &term, z, n, cksum2, &cksum3); poslist.n = 0; fts5SegiterPoslist(p, &pIter->aSeg[pIter->aFirst[1].iFirst] , 0, &poslist); while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){ int iCol = FTS5_POS2COLUMN(iPos); int iTokOff = FTS5_POS2OFFSET(iPos); cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n); } } fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3); fts5MultiIterFree(p, pIter); if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT; fts5StructureRelease(pStruct); #ifdef SQLITE_DEBUG fts5BufferFree(&term); #endif fts5BufferFree(&poslist); return fts5IndexReturn(p); } /************************************************************************* ************************************************************************** ** Below this point is the implementation of the fts5_decode() scalar ** function only. */ /* |
︙ | ︙ |
Changes to ext/fts5/fts5_storage.c.
︙ | ︙ | |||
824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 | i64 iRowid; int iCol; int szCol; u64 cksum; Fts5Termset *pTermset; Fts5Config *pConfig; }; /* ** Tokenization callback used by integrity check. */ static int fts5StorageIntegrityCallback( void *pContext, /* Pointer to Fts5IntegrityCtx object */ int tflags, const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ int iEnd /* End offset of token */ ){ int rc = SQLITE_OK; | > > > > > > > | > | | | | | | | | | > > > > > > | | | | > > > | 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 | i64 iRowid; int iCol; int szCol; u64 cksum; Fts5Termset *pTermset; Fts5Config *pConfig; }; /* ** Tokenization callback used by integrity check. */ static int fts5StorageIntegrityCallback( void *pContext, /* Pointer to Fts5IntegrityCtx object */ int tflags, const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ int iEnd /* End offset of token */ ){ Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext; Fts5Termset *pTermset = pCtx->pTermset; int bPresent; int ii; int rc = SQLITE_OK; int iPos; int iCol; if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){ pCtx->szCol++; } iPos = pTermset ? pCtx->iCol : pCtx->szCol-1; iCol = pTermset ? 0 : pCtx->iCol; rc = sqlite3Fts5TermsetAdd(pTermset, 0, pToken, nToken, &bPresent); if( rc==SQLITE_OK && bPresent==0 ){ pCtx->cksum ^= sqlite3Fts5IndexEntryCksum( pCtx->iRowid, iCol, iPos, 0, pToken, nToken ); } for(ii=0; rc==SQLITE_OK && ii<pCtx->pConfig->nPrefix; ii++){ const int nChar = pCtx->pConfig->aPrefix[ii]; int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar); if( nByte ){ rc = sqlite3Fts5TermsetAdd(pTermset, ii+1, pToken, nByte, &bPresent); if( bPresent==0 ){ pCtx->cksum ^= sqlite3Fts5IndexEntryCksum( pCtx->iRowid, iCol, iPos, ii+1, pToken, nByte ); } } } return rc; } /* ** Check that the contents of the FTS index match that of the %_content ** table. Return SQLITE_OK if they do, or SQLITE_CORRUPT if not. Return ** some other SQLite error code if an error occurs while attempting to |
︙ | ︙ |
Changes to ext/fts5/test/fts5ad.test.
︙ | ︙ | |||
69 70 71 72 73 74 75 76 77 78 79 80 81 82 | } 5 { CREATE VIRTUAL TABLE t1 USING fts5(a, b, prefix=1,2,3,4,5); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); BEGIN; } } { do_test $T.1 { execsql { DROP TABLE IF EXISTS t1 } execsql $create } {} | > > > > > > > > > > > > > > > > | 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 | } 5 { CREATE VIRTUAL TABLE t1 USING fts5(a, b, prefix=1,2,3,4,5); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); BEGIN; } 6 { CREATE VIRTUAL TABLE t1 USING fts5(a, b, offsets=0); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } 7 { CREATE VIRTUAL TABLE t1 USING fts5(a, b, offsets=0, prefix="1,2,3,4,5"); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } 8 { CREATE VIRTUAL TABLE t1 USING fts5(a, b, offsets=0, prefix="1,2,3,4,5"); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); BEGIN; } } { do_test $T.1 { execsql { DROP TABLE IF EXISTS t1 } execsql $create } {} |
︙ | ︙ |
Changes to ext/fts5/test/fts5auto.test.
︙ | ︙ | |||
365 366 367 368 369 370 371 | 4 {c1 : x} 5 {c2 : x} 6 {c3 : x} 7 {c1 : y} 8 {c2 : y} 9 {c3 : y} 10 {c1 : z} 11 {c2 : z} 12 {c3 : z} } { | < | 365 366 367 368 369 370 371 372 373 374 375 376 377 378 | 4 {c1 : x} 5 {c2 : x} 6 {c3 : x} 7 {c1 : y} 8 {c2 : y} 9 {c3 : y} 10 {c1 : z} 11 {c2 : z} 12 {c3 : z} } { do_auto_test 4.$tn yy {c1 c2 c3} $expr } finish_test |
Changes to ext/fts5/test/fts5offsets.test.
︙ | ︙ | |||
17 18 19 20 21 22 23 | # If SQLITE_ENABLE_FTS5 is not defined, omit this file. ifcapable !fts5 { finish_test return } | < | 17 18 19 20 21 22 23 24 25 26 27 28 29 30 | # If SQLITE_ENABLE_FTS5 is not defined, omit this file. ifcapable !fts5 { finish_test return } #-------------------------------------------------------------------------- # Simple tests. # do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(a, b, c, offsets=0); INSERT INTO t1 VALUES('h d g', 'j b b g b', 'i e i d h g g'); -- 1 INSERT INTO t1 VALUES('h j d', 'j h d a h', 'f d d g g f b'); -- 2 |
︙ | ︙ | |||
60 61 62 63 64 65 66 67 68 69 | do_catchsql_test 1.3.1 { SELECT rowid FROM t1('h + d'); } {1 {fts5: phrase queries are not supported (offsets=0)}} do_catchsql_test 1.3.2 { SELECT rowid FROM t1('NEAR(h d)'); } {1 {fts5: NEAR queries are not supported (offsets=0)}} finish_test | > > > > > > > > > > > > > > > | 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | do_catchsql_test 1.3.1 { SELECT rowid FROM t1('h + d'); } {1 {fts5: phrase queries are not supported (offsets=0)}} do_catchsql_test 1.3.2 { SELECT rowid FROM t1('NEAR(h d)'); } {1 {fts5: NEAR queries are not supported (offsets=0)}} #------------------------------------------------------------------------- # integrity-check with both offsets= and prefix= options. # do_execsql_test 2.0 { CREATE VIRTUAL TABLE t2 USING fts5(a, offsets=0, prefix="1"); INSERT INTO t2(a) VALUES('aa ab'); } #db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t2_data} {puts $r} breakpoint do_execsql_test 2.1 { INSERT INTO t2(t2) VALUES('integrity-check'); } finish_test |
Changes to ext/fts5/test/fts5simple.test.
︙ | ︙ | |||
14 15 16 17 18 19 20 | set testprefix fts5simple # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } | | | 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 | set testprefix fts5simple # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } #------------------------------------------------------------------------- # set doc "x x [string repeat {y } 50]z z" do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(x); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); BEGIN; |
︙ | ︙ | |||
346 347 348 349 350 351 352 353 354 355 | INSERT INTO x1 SELECT rnddoc(5) FROM ii; } do_execsql_test 4.1 { SELECT rowid, x, x1 FROM x1 WHERE x1 MATCH '*reads' } {0 {} 4} finish_test | > > > > > > > > > > | 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 | INSERT INTO x1 SELECT rnddoc(5) FROM ii; } do_execsql_test 4.1 { SELECT rowid, x, x1 FROM x1 WHERE x1 MATCH '*reads' } {0 {} 4} #------------------------------------------------------------------------- reset_db do_execsql_test 15.0 { CREATE VIRTUAL TABLE x2 USING fts5(x, prefix=1); INSERT INTO x2 VALUES('ab'); } do_execsql_test 15.1 { INSERT INTO x2(x2) VALUES('integrity-check'); } finish_test |