Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Begin changes to allow synonym support by adding multiple terms to a query (an alternative to adding multiple terms to the FTS index). |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts5-incompatible |
Files: | files | file ages | folders |
SHA1: |
ad7feaed4cd6b1d6e6376bb82d1f5664 |
User & Date: | dan 2015-08-31 20:06:06.235 |
Context
2015-09-01
| ||
18:08 | Add tests for fts5 synonyms implemented by adding extra terms to queries. And fixes for the same. (check-in: dbcb73802b user: dan tags: fts5-incompatible) | |
2015-08-31
| ||
20:06 | Begin changes to allow synonym support by adding multiple terms to a query (an alternative to adding multiple terms to the FTS index). (check-in: ad7feaed4c user: dan tags: fts5-incompatible) | |
2015-08-29
| ||
18:46 | Add a test for an fts5 tokenizer that supports synonyms by adding multiple entries to the fts index. (check-in: 98d07d16ca user: dan tags: fts5-incompatible) | |
Changes
Changes to ext/fts5/fts5Int.h.
︙ | ︙ | |||
232 233 234 235 236 237 238 | /* Variables used only by sqlite3Fts5PoslistIterXXX() functions. */ int iCol; /* If (iCol>=0), this column only */ const u8 *a; /* Position list to iterate through */ int n; /* Size of buffer at a[] in bytes */ int i; /* Current offset in a[] */ /* Output variables */ | | | 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 | /* Variables used only by sqlite3Fts5PoslistIterXXX() functions. */ int iCol; /* If (iCol>=0), this column only */ const u8 *a; /* Position list to iterate through */ int n; /* Size of buffer at a[] in bytes */ int i; /* Current offset in a[] */ /* Output variables */ u8 bEof; /* Set to true at EOF */ i64 iPos; /* (iCol<<32) + iPos */ }; int sqlite3Fts5PoslistReaderInit( int iCol, /* If (iCol>=0), this column only */ const u8 *a, int n, /* Poslist buffer to iterate through */ Fts5PoslistReader *pIter /* Iterator object to initialize */ ); |
︙ | ︙ |
Changes to ext/fts5/fts5_expr.c.
︙ | ︙ | |||
69 70 71 72 73 74 75 76 77 78 79 80 81 82 | ** An instance of the following structure represents a single search term ** or term prefix. */ struct Fts5ExprTerm { int bPrefix; /* True for a prefix term */ char *zTerm; /* nul-terminated term */ Fts5IndexIter *pIter; /* Iterator for this term */ }; /* ** A phrase. One or more terms that must appear in a contiguous sequence ** within a document for it to match. */ struct Fts5ExprPhrase { | > | 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | ** An instance of the following structure represents a single search term ** or term prefix. */ struct Fts5ExprTerm { int bPrefix; /* True for a prefix term */ char *zTerm; /* nul-terminated term */ Fts5IndexIter *pIter; /* Iterator for this term */ Fts5ExprTerm *pSynonym; /* Pointer to first in list of synonyms */ }; /* ** A phrase. One or more terms that must appear in a contiguous sequence ** within a document for it to match. */ struct Fts5ExprPhrase { |
︙ | ︙ | |||
177 178 179 180 181 182 183 184 185 186 187 188 189 190 | } pToken->n = (z2 - z); break; } default: { const char *z2; tok = FTS5_STRING; for(z2=&z[1]; sqlite3Fts5IsBareword(*z2); z2++); pToken->n = (z2 - z); if( pToken->n==2 && memcmp(pToken->p, "OR", 2)==0 ) tok = FTS5_OR; if( pToken->n==3 && memcmp(pToken->p, "NOT", 3)==0 ) tok = FTS5_NOT; if( pToken->n==3 && memcmp(pToken->p, "AND", 3)==0 ) tok = FTS5_AND; break; | > > > > | 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 | } pToken->n = (z2 - z); break; } default: { const char *z2; if( sqlite3Fts5IsBareword(z[0])==0 ){ sqlite3Fts5ParseError(pParse, "fts5: syntax error near \"%.1s\"", z); return FTS5_EOF; } tok = FTS5_STRING; for(z2=&z[1]; sqlite3Fts5IsBareword(*z2); z2++); pToken->n = (z2 - z); if( pToken->n==2 && memcmp(pToken->p, "OR", 2)==0 ) tok = FTS5_OR; if( pToken->n==3 && memcmp(pToken->p, "NOT", 3)==0 ) tok = FTS5_NOT; if( pToken->n==3 && memcmp(pToken->p, "AND", 3)==0 ) tok = FTS5_AND; break; |
︙ | ︙ | |||
345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 | static int fts5ExprColsetTest(Fts5ExprColset *pColset, int iCol){ int i; for(i=0; i<pColset->nCol; i++){ if( pColset->aiCol[i]==iCol ) return 1; } return 0; } /* ** All individual term iterators in pPhrase are guaranteed to be valid and ** pointing to the same rowid when this function is called. This function ** checks if the current rowid really is a match, and if so populates ** the pPhrase->poslist buffer accordingly. Output parameter *pbMatch ** is set to true if this is really a match, or false otherwise. ** ** SQLITE_OK is returned if an error occurs, or an SQLite error code ** otherwise. It is not considered an error code if the current rowid is ** not a match. */ static int fts5ExprPhraseIsMatch( | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 | static int fts5ExprColsetTest(Fts5ExprColset *pColset, int iCol){ int i; for(i=0; i<pColset->nCol; i++){ if( pColset->aiCol[i]==iCol ) return 1; } return 0; } /* ** Argument pTerm must be a synonym iterator. Return the current rowid ** that it points to. */ static i64 fts5ExprSynonymRowid(Fts5ExprTerm *pTerm, int bDesc){ i64 iRet; int bRetValid = 0; Fts5ExprTerm *p; assert( pTerm->pSynonym ); assert( bDesc==0 || bDesc==1 ); for(p=pTerm; p; p=p->pSynonym){ if( 0==sqlite3Fts5IterEof(p->pIter) ){ i64 iRowid = sqlite3Fts5IterRowid(p->pIter); if( bRetValid==0 || (bDesc!=(iRowid<iRet)) ){ iRet = iRowid; bRetValid = 1; } } } assert( bRetValid ); return iRet; } /* ** Argument pTerm must be a synonym iterator. */ static int fts5ExprSynonymPoslist( Fts5ExprTerm *pTerm, i64 iRowid, u8 **pa, int *pn ){ Fts5PoslistWriter writer = {0}; Fts5PoslistReader aStatic[4]; Fts5PoslistReader *aIter = aStatic; int nIter = 0; Fts5ExprTerm *p; assert( pTerm->pSynonym ); for(p=pTerm; p; p=p->pSynonym){ Fts5IndexIter *pIter = p->pIter; if( sqlite3Fts5IterEof(pIter)==0 && sqlite3Fts5IterRowid(pIter)==iRowid ){ i64 dummy; int rc = sqlite3Fts5IterPoslist(pIter, (const u8**)pa, pn, &dummy); return rc; } } assert( 0 ); return SQLITE_ERROR; } /* ** All individual term iterators in pPhrase are guaranteed to be valid and ** pointing to the same rowid when this function is called. This function ** checks if the current rowid really is a match, and if so populates ** the pPhrase->poslist buffer accordingly. Output parameter *pbMatch ** is set to true if this is really a match, or false otherwise. ** ** SQLITE_OK is returned if an error occurs, or an SQLite error code ** otherwise. It is not considered an error code if the current rowid is ** not a match. */ static int fts5ExprPhraseIsMatch( Fts5ExprNode *pNode, /* Node pPhrase belongs to */ Fts5ExprColset *pColset, /* Restrict matches to these columns */ Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ int *pbMatch /* OUT: Set to true if really a match */ ){ Fts5PoslistWriter writer = {0}; Fts5PoslistReader aStatic[4]; Fts5PoslistReader *aIter = aStatic; |
︙ | ︙ | |||
387 388 389 390 391 392 393 394 395 396 | int nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm; aIter = (Fts5PoslistReader*)sqlite3_malloc(nByte); if( !aIter ) return SQLITE_NOMEM; } /* Initialize a term iterator for each term in the phrase */ for(i=0; i<pPhrase->nTerm; i++){ i64 dummy; int n; const u8 *a; | > > > > | > | 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 | int nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm; aIter = (Fts5PoslistReader*)sqlite3_malloc(nByte); if( !aIter ) return SQLITE_NOMEM; } /* Initialize a term iterator for each term in the phrase */ for(i=0; i<pPhrase->nTerm; i++){ Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; i64 dummy; int n; const u8 *a; if( pTerm->pSynonym ){ rc = fts5ExprSynonymPoslist(pTerm, pNode->iRowid, (u8**)&a, &n); }else{ rc = sqlite3Fts5IterPoslist(pTerm->pIter, &a, &n, &dummy); } if( rc || sqlite3Fts5PoslistReaderInit(iCol, a, n, &aIter[i]) ){ goto ismatch_out; } } while( 1 ){ int bMatch; |
︙ | ︙ | |||
594 595 596 597 598 599 600 | */ static int fts5ExprNearAdvanceFirst( Fts5Expr *pExpr, /* Expression pPhrase belongs to */ Fts5ExprNode *pNode, /* FTS5_STRING or FTS5_TERM node */ int bFromValid, i64 iFrom ){ | | > > > > > > > > > > > > > > > > > > > > > > > > | | | | | | | > > | 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 | */ static int fts5ExprNearAdvanceFirst( Fts5Expr *pExpr, /* Expression pPhrase belongs to */ Fts5ExprNode *pNode, /* FTS5_STRING or FTS5_TERM node */ int bFromValid, i64 iFrom ){ Fts5ExprTerm *pTerm = &pNode->pNear->apPhrase[0]->aTerm[0]; int rc; if( pTerm->pSynonym ){ int bEof = 1; Fts5ExprTerm *p; /* Find the firstest rowid any synonym points to. */ i64 iRowid = fts5ExprSynonymRowid(pTerm, pExpr->bDesc); /* Advance each iterator that currently points to iRowid */ for(p=pTerm; p; p=p->pSynonym){ if( sqlite3Fts5IterEof(p->pIter)==0 ){ bEof = 0; if( sqlite3Fts5IterRowid(p->pIter)==iRowid ){ rc = sqlite3Fts5IterNext(p->pIter); if( rc!=SQLITE_OK ) break; } } } /* Set the EOF flag if either all synonym iterators are at EOF or an ** error has occurred. */ pNode->bEof = (rc || bEof); }else{ Fts5IndexIter *pIter = pTerm->pIter; assert( Fts5NodeIsString(pNode) ); if( bFromValid ){ rc = sqlite3Fts5IterNextFrom(pIter, iFrom); }else{ rc = sqlite3Fts5IterNext(pIter); } pNode->bEof = (rc || sqlite3Fts5IterEof(pIter)); } return rc; } /* ** Advance iterator pIter until it points to a value equal to or laster ** than the initial value of *piLast. If this means the iterator points ** to a value laster than *piLast, update *piLast to the new lastest value. |
︙ | ︙ | |||
715 716 717 718 719 720 721 | /* Check that each phrase in the nearset matches the current row. ** Populate the pPhrase->poslist buffers at the same time. If any ** phrase is not a match, break out of the loop early. */ for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; if( pPhrase->nTerm>1 || pNear->pColset ){ int bMatch = 0; | | | 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 | /* Check that each phrase in the nearset matches the current row. ** Populate the pPhrase->poslist buffers at the same time. If any ** phrase is not a match, break out of the loop early. */ for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; if( pPhrase->nTerm>1 || pNear->pColset ){ int bMatch = 0; rc = fts5ExprPhraseIsMatch(pNode, pNear->pColset, pPhrase, &bMatch); if( bMatch==0 ) break; }else{ rc = sqlite3Fts5IterPoslistBuffer( pPhrase->aTerm[0].pIter, &pPhrase->poslist ); } } |
︙ | ︙ | |||
751 752 753 754 755 756 757 758 759 760 761 762 763 764 | Fts5ExprColset *pColset = pNear->pColset; const u8 *pPos; int nPos; int rc; assert( pNode->eType==FTS5_TERM ); assert( pNear->nPhrase==1 && pPhrase->nTerm==1 ); rc = sqlite3Fts5IterPoslist(pIter, &pPos, &nPos, &pNode->iRowid); /* If the term may match any column, then this must be a match. ** Return immediately in this case. Otherwise, try to find the ** part of the poslist that corresponds to the required column. ** If it can be found, return. If it cannot, the next iteration | > | 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 | Fts5ExprColset *pColset = pNear->pColset; const u8 *pPos; int nPos; int rc; assert( pNode->eType==FTS5_TERM ); assert( pNear->nPhrase==1 && pPhrase->nTerm==1 ); assert( pPhrase->aTerm[0].pSynonym==0 ); rc = sqlite3Fts5IterPoslist(pIter, &pPos, &nPos, &pNode->iRowid); /* If the term may match any column, then this must be a match. ** Return immediately in this case. Otherwise, try to find the ** part of the poslist that corresponds to the required column. ** If it can be found, return. If it cannot, the next iteration |
︙ | ︙ | |||
797 798 799 800 801 802 803 804 | ){ Fts5ExprNearset *pNear = pNode->pNear; Fts5ExprPhrase *pLeft = pNear->apPhrase[0]; int rc = SQLITE_OK; i64 iLast; /* Lastest rowid any iterator points to */ int i, j; /* Phrase and token index, respectively */ int bMatch; /* True if all terms are at the same rowid */ | > > | > > > > > > | > > > > > > > > > > > > > > > > > > > > > | | > | | | > < > | | < < | | > > > > | | | | | | | | | | | | > > > > > | | 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 | ){ Fts5ExprNearset *pNear = pNode->pNear; Fts5ExprPhrase *pLeft = pNear->apPhrase[0]; int rc = SQLITE_OK; i64 iLast; /* Lastest rowid any iterator points to */ int i, j; /* Phrase and token index, respectively */ int bMatch; /* True if all terms are at the same rowid */ const int bDesc = pExpr->bDesc; /* Check that this node should not be FTS5_TERM */ assert( pNear->nPhrase>1 || pNear->apPhrase[0]->nTerm>1 || pNear->apPhrase[0]->aTerm[0].pSynonym ); /* Initialize iLast, the "lastest" rowid any iterator points to. If the ** iterator skips through rowids in the default ascending order, this means ** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it ** means the minimum rowid. */ if( pLeft->aTerm[0].pSynonym ){ iLast = fts5ExprSynonymRowid(&pLeft->aTerm[0], bDesc); }else{ iLast = sqlite3Fts5IterRowid(pLeft->aTerm[0].pIter); } do { bMatch = 1; for(i=0; i<pNear->nPhrase; i++){ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; for(j=0; j<pPhrase->nTerm; j++){ Fts5ExprTerm *pTerm = &pPhrase->aTerm[j]; if( pTerm->pSynonym ){ Fts5ExprTerm *p; int bEof = 1; i64 iRowid = fts5ExprSynonymRowid(pTerm, bDesc); if( iRowid==iLast ) continue; for(p=pTerm; p; p=p->pSynonym){ Fts5IndexIter *pIter = p->pIter; int dummy; if( 0==sqlite3Fts5IterEof(pIter) && 0==fts5ExprAdvanceto(pIter, bDesc, &iLast, &rc, &dummy)==0 ){ bEof = 0; } } if( bEof || rc ){ pNode->bEof = 1; return rc; } }else{ Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter; i64 iRowid = sqlite3Fts5IterRowid(pIter); if( iRowid==iLast ) continue; bMatch = 0; if( fts5ExprAdvanceto(pIter, bDesc, &iLast, &rc, &pNode->bEof) ){ return rc; } } } } }while( bMatch==0 ); pNode->iRowid = iLast; pNode->bNomatch = (0==fts5ExprNearTest(&rc, pExpr, pNode)); return rc; } /* ** Initialize all term iterators in the pNear object. If any term is found ** to match no documents at all, return immediately without initializing any ** further iterators. */ static int fts5ExprNearInitAll( Fts5Expr *pExpr, Fts5ExprNode *pNode ){ Fts5ExprNearset *pNear = pNode->pNear; int i, j; int rc = SQLITE_OK; for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; for(j=0; j<pPhrase->nTerm; j++){ Fts5ExprTerm *pTerm = &pPhrase->aTerm[j]; Fts5ExprTerm *p; int bEof = 1; for(p=pTerm; p && rc==SQLITE_OK; p=p->pSynonym){ if( p->pIter ){ sqlite3Fts5IterClose(p->pIter); p->pIter = 0; } rc = sqlite3Fts5IndexQuery( pExpr->pIndex, p->zTerm, strlen(p->zTerm), (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) | (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0), &p->pIter ); assert( rc==SQLITE_OK || p->pIter==0 ); if( p->pIter && 0==sqlite3Fts5IterEof(pTerm->pIter) ){ bEof = 0; } } if( bEof ){ pNode->bEof = 1; return rc; } } } return rc; } |
︙ | ︙ | |||
1262 1263 1264 1265 1266 1267 1268 1269 1270 | /* ** Free the phrase object passed as the only argument. */ static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){ if( pPhrase ){ int i; for(i=0; i<pPhrase->nTerm; i++){ Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; sqlite3_free(pTerm->zTerm); | > > | > > > | > | 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 | /* ** Free the phrase object passed as the only argument. */ static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){ if( pPhrase ){ int i; for(i=0; i<pPhrase->nTerm; i++){ Fts5ExprTerm *pSyn; Fts5ExprTerm *pNext; Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; sqlite3_free(pTerm->zTerm); sqlite3Fts5IterClose(pTerm->pIter); for(pSyn=pTerm->pSynonym; pSyn; pSyn=pNext){ pNext = pSyn->pSynonym; sqlite3Fts5IterClose(pSyn->pIter); sqlite3_free(pSyn); } } if( pPhrase->poslist.nSpace>0 ) fts5BufferFree(&pPhrase->poslist); sqlite3_free(pPhrase); } } |
︙ | ︙ | |||
1344 1345 1346 1347 1348 1349 1350 | int iStart, /* Start offset of token */ int iEnd /* End offset of token */ ){ int rc = SQLITE_OK; const int SZALLOC = 8; TokenCtx *pCtx = (TokenCtx*)pContext; Fts5ExprPhrase *pPhrase = pCtx->pPhrase; | < > | | > > > > > > > > > > | | | | | | | | | | | | | | | | 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 | int iStart, /* Start offset of token */ int iEnd /* End offset of token */ ){ int rc = SQLITE_OK; const int SZALLOC = 8; TokenCtx *pCtx = (TokenCtx*)pContext; Fts5ExprPhrase *pPhrase = pCtx->pPhrase; assert( pPhrase==0 || pPhrase->nTerm>0 ); if( pPhrase && (tflags & FTS5_TOKEN_COLOCATED) ){ Fts5ExprTerm *pSyn; int nByte = sizeof(Fts5ExprTerm) + nToken+1; pSyn = (Fts5ExprTerm*)sqlite3_malloc(nByte); if( pSyn==0 ) return SQLITE_NOMEM; memset(pSyn, 0, nByte); pSyn->zTerm = (char*)&pSyn[1]; memcpy(pSyn->zTerm, pToken, nToken); pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym; pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn; }else{ Fts5ExprTerm *pTerm; if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){ Fts5ExprPhrase *pNew; int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0); pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase, sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew ); if( pNew==0 ) return SQLITE_NOMEM; if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase)); pCtx->pPhrase = pPhrase = pNew; pNew->nTerm = nNew - SZALLOC; } pTerm = &pPhrase->aTerm[pPhrase->nTerm++]; memset(pTerm, 0, sizeof(Fts5ExprTerm)); pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken); } return rc; } /* ** Free the phrase object passed as the only argument. */ |
︙ | ︙ | |||
1632 1633 1634 1635 1636 1637 1638 | pRet->eType = eType; pRet->pNear = pNear; if( eType==FTS5_STRING ){ int iPhrase; for(iPhrase=0; iPhrase<pNear->nPhrase; iPhrase++){ pNear->apPhrase[iPhrase]->pNode = pRet; } | | > > > > > > > > > | > > > > | | | | | | | > > | 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 | pRet->eType = eType; pRet->pNear = pNear; if( eType==FTS5_STRING ){ int iPhrase; for(iPhrase=0; iPhrase<pNear->nPhrase; iPhrase++){ pNear->apPhrase[iPhrase]->pNode = pRet; } if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 && pNear->apPhrase[0]->aTerm[0].pSynonym==0 ){ pRet->eType = FTS5_TERM; } }else{ fts5ExprAddChildren(pRet, pLeft); fts5ExprAddChildren(pRet, pRight); } } } if( pRet==0 ){ assert( pParse->rc!=SQLITE_OK ); sqlite3Fts5ParseNodeFree(pLeft); sqlite3Fts5ParseNodeFree(pRight); sqlite3Fts5ParseNearsetFree(pNear); } return pRet; } static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){ int nByte = 0; Fts5ExprTerm *p; char *zQuoted; /* Determine the maximum amount of space required. */ for(p=pTerm; p; p=p->pSynonym){ nByte += strlen(pTerm->zTerm) * 2 + 3 + 2; } zQuoted = sqlite3_malloc(nByte); if( zQuoted ){ int i = 0; for(p=pTerm; p; p=p->pSynonym){ char *zIn = p->zTerm; zQuoted[i++] = '"'; while( *zIn ){ if( *zIn=='"' ) zQuoted[i++] = '"'; zQuoted[i++] = *zIn++; } zQuoted[i++] = '"'; if( p->pSynonym ) zQuoted[i++] = '|'; } if( pTerm->bPrefix ){ zQuoted[i++] = ' '; zQuoted[i++] = '*'; } zQuoted[i++] = '\0'; } return zQuoted; |
︙ | ︙ |
Changes to ext/fts5/test/fts5aa.test.
︙ | ︙ | |||
339 340 341 342 343 344 345 | } {} do_execsql_test 13.5 { SELECT rowid FROM t1 WHERE t1 MATCH 'o'; } {1} do_execsql_test 13.6 { | | | 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 | } {} do_execsql_test 13.5 { SELECT rowid FROM t1 WHERE t1 MATCH 'o'; } {1} do_execsql_test 13.6 { SELECT rowid FROM t1 WHERE t1 MATCH '""'; } {} #------------------------------------------------------------------------- # reset_db do_execsql_test 14.1 { CREATE VIRTUAL TABLE t1 USING fts5(x, y); |
︙ | ︙ |
Changes to ext/fts5/test/fts5ea.test.
︙ | ︙ | |||
83 84 85 86 87 88 89 90 91 92 93 | #------------------------------------------------------------------------- # Experiment with a tokenizer that considers " to be a token character. # do_execsql_test 4.0 { SELECT fts5_expr('a AND """"', 'x', 'tokenize="unicode61 tokenchars ''""''"'); } {{"a" AND """"}} finish_test | > > > > > > | 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 | #------------------------------------------------------------------------- # Experiment with a tokenizer that considers " to be a token character. # do_execsql_test 4.0 { SELECT fts5_expr('a AND """"', 'x', 'tokenize="unicode61 tokenchars ''""''"'); } {{"a" AND """"}} #------------------------------------------------------------------------- # Experiment with a tokenizer that considers " to be a token character. # do_catchsql_test 5.0 { SELECT fts5_expr('abc | def'); } {1 {fts5: syntax error near "|"}} finish_test |
Changes to ext/fts5/test/fts5eb.test.
︙ | ︙ | |||
26 27 28 29 30 31 32 | proc do_syntax_test {tn expr res} { set ::se_expr $expr do_execsql_test $tn {SELECT fts5_expr($se_expr)} [list $res] } foreach {tn expr res} { | | | | | | | | | | | | | | 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 | proc do_syntax_test {tn expr res} { set ::se_expr $expr do_execsql_test $tn {SELECT fts5_expr($se_expr)} [list $res] } foreach {tn expr res} { 1 {abc} {"abc"} 2 {abc ""} {"abc"} 3 {""} {} 4 {abc OR ""} {"abc"} 5 {abc NOT ""} {"abc"} 6 {abc AND ""} {"abc"} 7 {"" OR abc} {"abc"} 8 {"" NOT abc} {"abc"} 9 {"" AND abc} {"abc"} 10 {abc + "" + def} {"abc" + "def"} 11 {abc "" def} {"abc" AND "def"} 12 {r+e OR w} {"r" + "e" OR "w"} } { do_execsql_test 1.$tn {SELECT fts5_expr($expr)} [list $res] } do_catchsql_test 2.1 { SELECT fts5_expr() } {1 {wrong number of arguments to function fts5_expr}} |
︙ | ︙ |
Changes to ext/fts5/test/fts5synonym.test.
︙ | ︙ | |||
17 18 19 20 21 22 23 24 25 26 27 28 29 30 | # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } proc gobble_whitespace {textvar} { upvar $textvar t regexp {([ ]*)(.*)} $t -> space t return [string length $space] } | > > > > > > > > > > > > > > > > > > | 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 | # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } foreach S { {zero 0} {one 1} {two 2} {three 3 iii} {four 4} {five 5} {six 6} {seven 7} {eight 8} {nine 9} } { foreach s $S { set o [list] foreach x $S {if {$x!=$s} {lappend o $x}} set ::syn($s) $o } } proc gobble_whitespace {textvar} { upvar $textvar t regexp {([ ]*)(.*)} $t -> space t return [string length $space] } |
︙ | ︙ | |||
71 72 73 74 75 76 77 | SELECT x, rowid FROM ft WHERE ft MATCH 'pqr'; } {1 {abc def ghi} {jkl mno pqr} 2} #------------------------------------------------------------------------- # Test a tokenizer that supports synonyms by adding extra entries to the # FTS index. # | < < < < < < < < < < < < < < < < < < | 89 90 91 92 93 94 95 96 97 98 99 100 101 102 | SELECT x, rowid FROM ft WHERE ft MATCH 'pqr'; } {1 {abc def ghi} {jkl mno pqr} 2} #------------------------------------------------------------------------- # Test a tokenizer that supports synonyms by adding extra entries to the # FTS index. # proc tcl_tokenize {tflags text} { foreach {w iStart iEnd} [do_tokenize_split $text] { sqlite3_fts5_token $w $iStart $iEnd if {$tflags=="document" && [info exists ::syn($w)]} { foreach s $::syn($w) { sqlite3_fts5_token -colo $s $iStart $iEnd |
︙ | ︙ | |||
189 190 191 192 193 194 195 196 197 198 | SELECT rowid FROM ft WHERE ft MATCH 'one two three'; SELECT rowid FROM ft WHERE ft MATCH 'one + one + two + three'; } {1 1} do_execsql_test 3.2.2 { SELECT rowid FROM ft WHERE ft MATCH 'one two two three'; SELECT rowid FROM ft WHERE ft MATCH 'one + two + two + three'; } {1} finish_test | > > > > > > > > > > > > > > > > > > > > > > > > > | 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 | SELECT rowid FROM ft WHERE ft MATCH 'one two three'; SELECT rowid FROM ft WHERE ft MATCH 'one + one + two + three'; } {1 1} do_execsql_test 3.2.2 { SELECT rowid FROM ft WHERE ft MATCH 'one two two three'; SELECT rowid FROM ft WHERE ft MATCH 'one + two + two + three'; } {1} #------------------------------------------------------------------------- # Check that expressions with synonyms can be parsed. # reset_db sqlite3_fts5_create_tokenizer db tcl tcl_create proc tcl_tokenize {tflags text} { foreach {w iStart iEnd} [do_tokenize_split $text] { sqlite3_fts5_token $w $iStart $iEnd if {$tflags=="query" && [info exists ::syn($w)]} { foreach s $::syn($w) { sqlite3_fts5_token -colo $s $iStart $iEnd } } } } foreach {tn expr res} { 1 {abc} {"abc"} 2 {one} {"one"|"1"} 3 {3} {"3"|"iii"|"three"} 4 {3*} {"3"|"iii"|"three" *} } { do_execsql_test 4.$tn {SELECT fts5_expr($expr, 'tokenize=tcl')} [list $res] } finish_test |