Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Have NEAR queries use incremental merging. Fix issues surrounding the deferred token optimization. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts3-prefix-search |
Files: | files | file ages | folders |
SHA1: |
9d10a6846b12a9cc8fd4fdc3affd931a |
User & Date: | dan 2011-06-07 18:35:45.780 |
Context
2011-06-08
| ||
18:39 | Fix various issues to do with deferred tokens, NEAR expressions and matchinfo(). (check-in: 3972a787df user: dan tags: fts3-prefix-search) | |
2011-06-07
| ||
18:35 | Have NEAR queries use incremental merging. Fix issues surrounding the deferred token optimization. (check-in: 9d10a6846b user: dan tags: fts3-prefix-search) | |
2011-06-06
| ||
18:14 | Merge the latest trunk changes into the fts3-prefix-search branch. (check-in: 567dd84359 user: drh tags: fts3-prefix-search) | |
Changes
Changes to ext/fts3/fts3.c.
︙ | ︙ | |||
1065 1066 1067 1068 1069 1070 1071 | } memset(p, 0, nByte); p->db = db; p->nColumn = nCol; p->nPendingData = 0; p->azColumn = (char **)&p[1]; p->pTokenizer = pTokenizer; | < | 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 | } memset(p, 0, nByte); p->db = db; p->nColumn = nCol; p->nPendingData = 0; p->azColumn = (char **)&p[1]; p->pTokenizer = pTokenizer; p->nMaxPendingData = FTS3_MAX_PENDING_DATA; p->bHasDocsize = (isFts4 && bNoDocsize==0); p->bHasStat = isFts4; p->bDescIdx = bDescIdx; TESTONLY( p->inTransaction = -1 ); TESTONLY( p->mxSavepoint = -1 ); |
︙ | ︙ | |||
1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 | } /* Figure out the page-size for the database. This is required in order to ** estimate the cost of loading large doclists from the database (see ** function sqlite3Fts3SegReaderCost() for details). */ fts3DatabasePageSize(&rc, p); /* Declare the table schema to SQLite. */ fts3DeclareVtab(&rc, p); fts3_init_out: sqlite3_free(zPrefix); sqlite3_free(aFree); | > | 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 | } /* Figure out the page-size for the database. This is required in order to ** estimate the cost of loading large doclists from the database (see ** function sqlite3Fts3SegReaderCost() for details). */ fts3DatabasePageSize(&rc, p); p->nNodeSize = p->nPgsz-35; /* Declare the table schema to SQLite. */ fts3DeclareVtab(&rc, p); fts3_init_out: sqlite3_free(zPrefix); sqlite3_free(aFree); |
︙ | ︙ | |||
1858 1859 1860 1861 1862 1863 1864 | } *p++ = 0x00; *pp = p; return 1; } /* | | > > > > > > > > > > > > < < < < < < | | | | | | | | | | | | | | | | | | | < | 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 | } *p++ = 0x00; *pp = p; return 1; } /* ** Merge two position-lists as required by the NEAR operator. The argument ** position lists correspond to the left and right phrases of an expression ** like: ** ** "phrase 1" NEAR "phrase number 2" ** ** Position list *pp1 corresponds to the left-hand side of the NEAR ** expression and *pp2 to the right. As usual, the indexes in the position ** lists are the offsets of the last token in each phrase (tokens "1" and "2" ** in the example above). ** ** The output position list - written to *pp - is a copy of *pp2 with those ** entries that are not sufficiently NEAR entries in *pp1 removed. */ static int fts3PoslistNearMerge( char **pp, /* Output buffer */ char *aTmp, /* Temporary buffer space */ int nRight, /* Maximum difference in token positions */ int nLeft, /* Maximum difference in token positions */ char **pp1, /* IN/OUT: Left input list */ char **pp2 /* IN/OUT: Right input list */ ){ char *p1 = *pp1; char *p2 = *pp2; char *pTmp1 = aTmp; char *pTmp2; char *aTmp2; int res = 1; fts3PoslistPhraseMerge(&pTmp1, nRight, 0, 0, pp1, pp2); aTmp2 = pTmp2 = pTmp1; *pp1 = p1; *pp2 = p2; fts3PoslistPhraseMerge(&pTmp2, nLeft, 1, 0, pp2, pp1); if( pTmp1!=aTmp && pTmp2!=aTmp2 ){ fts3PoslistMerge(pp, &aTmp, &aTmp2); }else if( pTmp1!=aTmp ){ fts3PoslistCopy(pp, &aTmp); }else if( pTmp2!=aTmp2 ){ fts3PoslistCopy(pp, &aTmp2); }else{ res = 0; } return res; } /* ** A pointer to an instance of this structure is used as the context ** argument to sqlite3Fts3SegReaderIterate() */ typedef struct TermSelect TermSelect; |
︙ | ︙ | |||
3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 | ** and merged incrementally. Otherwise, it has to be merged into an in-memory ** doclist and then traversed. */ static void fts3EvalAllocateReaders( Fts3Cursor *pCsr, Fts3Expr *pExpr, int *pnToken, /* OUT: Total number of tokens in phrase. */ int *pRc ){ if( pExpr && SQLITE_OK==*pRc ){ if( pExpr->eType==FTSQUERY_PHRASE ){ int i; int nToken = pExpr->pPhrase->nToken; *pnToken += nToken; for(i=0; i<nToken; i++){ Fts3PhraseToken *pToken = &pExpr->pPhrase->aToken[i]; int rc = sqlite3Fts3TermSegReaderCursor(pCsr, pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr ); if( rc!=SQLITE_OK ){ *pRc = rc; return; } } }else{ | > > | | | 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 | ** and merged incrementally. Otherwise, it has to be merged into an in-memory ** doclist and then traversed. */ static void fts3EvalAllocateReaders( Fts3Cursor *pCsr, Fts3Expr *pExpr, int *pnToken, /* OUT: Total number of tokens in phrase. */ int *pnOr, /* OUT: Total number of OR nodes in expr. */ int *pRc ){ if( pExpr && SQLITE_OK==*pRc ){ if( pExpr->eType==FTSQUERY_PHRASE ){ int i; int nToken = pExpr->pPhrase->nToken; *pnToken += nToken; for(i=0; i<nToken; i++){ Fts3PhraseToken *pToken = &pExpr->pPhrase->aToken[i]; int rc = sqlite3Fts3TermSegReaderCursor(pCsr, pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr ); if( rc!=SQLITE_OK ){ *pRc = rc; return; } } }else{ *pnOr += (pExpr->eType==FTSQUERY_OR); fts3EvalAllocateReaders(pCsr, pExpr->pLeft, pnToken, pnOr, pRc); fts3EvalAllocateReaders(pCsr, pExpr->pRight, pnToken, pnOr, pRc); } } } static int fts3EvalPhraseLoad( Fts3Cursor *pCsr, Fts3Phrase *p |
︙ | ︙ | |||
3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 | ){ int rc = SQLITE_OK; Fts3Doclist *pDL = &p->doclist; Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; if( p->bIncr ){ assert( p->nToken==1 ); rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr, &pDL->iDocid, &pDL->pList, &pDL->nList ); if( rc==SQLITE_OK && !pDL->pList ){ *pbEof = 1; } }else if( pCsr->bDesc!=pTab->bDescIdx && pDL->nAll ){ | > | 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 | ){ int rc = SQLITE_OK; Fts3Doclist *pDL = &p->doclist; Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; if( p->bIncr ){ assert( p->nToken==1 ); assert( pDL->pNextDocid==0 ); rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr, &pDL->iDocid, &pDL->pList, &pDL->nList ); if( rc==SQLITE_OK && !pDL->pList ){ *pbEof = 1; } }else if( pCsr->bDesc!=pTab->bDescIdx && pDL->nAll ){ |
︙ | ︙ | |||
3598 3599 3600 3601 3602 3603 3604 | int nToken = pExpr->pPhrase->nToken; for(i=0; i<nToken; i++){ if( pExpr->pPhrase->aToken[i].pDeferred==0 ) break; } pExpr->bDeferred = (i==nToken); *pRc = fts3EvalPhraseStart(pCsr, bOptOk, pExpr->pPhrase); }else{ | < < < | 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 | int nToken = pExpr->pPhrase->nToken; for(i=0; i<nToken; i++){ if( pExpr->pPhrase->aToken[i].pDeferred==0 ) break; } pExpr->bDeferred = (i==nToken); *pRc = fts3EvalPhraseStart(pCsr, bOptOk, pExpr->pPhrase); }else{ fts3EvalStartReaders(pCsr, pExpr->pLeft, bOptOk, pRc); fts3EvalStartReaders(pCsr, pExpr->pRight, bOptOk, pRc); pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred); } } } |
︙ | ︙ | |||
3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 | } } } typedef struct Fts3TokenAndCost Fts3TokenAndCost; struct Fts3TokenAndCost { Fts3PhraseToken *pToken; int nOvfl; int iCol; }; static void fts3EvalTokenCosts( Fts3Cursor *pCsr, Fts3Expr *pExpr, Fts3TokenAndCost **ppTC, int *pRc ){ if( *pRc==SQLITE_OK && pExpr ){ if( pExpr->eType==FTSQUERY_PHRASE ){ Fts3Phrase *pPhrase = pExpr->pPhrase; int i; for(i=0; *pRc==SQLITE_OK && i<pPhrase->nToken; i++){ Fts3TokenAndCost *pTC = (*ppTC)++; pTC->pToken = &pPhrase->aToken[i]; pTC->iCol = pPhrase->iColumn; *pRc = sqlite3Fts3MsrOvfl(pCsr, pTC->pToken->pSegcsr, &pTC->nOvfl); } | > > > > | > > > > > | | | 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 | } } } typedef struct Fts3TokenAndCost Fts3TokenAndCost; struct Fts3TokenAndCost { Fts3PhraseToken *pToken; Fts3Expr *pRoot; int nOvfl; int iCol; }; static void fts3EvalTokenCosts( Fts3Cursor *pCsr, Fts3Expr *pRoot, Fts3Expr *pExpr, Fts3TokenAndCost **ppTC, Fts3Expr ***ppOr, int *pRc ){ if( *pRc==SQLITE_OK && pExpr ){ if( pExpr->eType==FTSQUERY_PHRASE ){ Fts3Phrase *pPhrase = pExpr->pPhrase; int i; for(i=0; *pRc==SQLITE_OK && i<pPhrase->nToken; i++){ Fts3TokenAndCost *pTC = (*ppTC)++; pTC->pRoot = pRoot; pTC->pToken = &pPhrase->aToken[i]; pTC->iCol = pPhrase->iColumn; *pRc = sqlite3Fts3MsrOvfl(pCsr, pTC->pToken->pSegcsr, &pTC->nOvfl); } }else if( pExpr->eType!=FTSQUERY_NOT ){ if( pExpr->eType==FTSQUERY_OR ){ pRoot = pExpr; **ppOr = pExpr; (*ppOr)++; } fts3EvalTokenCosts(pCsr, pRoot, pExpr->pLeft, ppTC, ppOr, pRc); fts3EvalTokenCosts(pCsr, pRoot, pExpr->pRight, ppTC, ppOr, pRc); } } } static int fts3EvalAverageDocsize(Fts3Cursor *pCsr, int *pnPage){ if( pCsr->nRowAvg==0 ){ /* The average document size, which is required to calculate the cost |
︙ | ︙ | |||
3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 | rc = sqlite3_reset(pStmt); if( rc!=SQLITE_OK ) return rc; } *pnPage = pCsr->nRowAvg; return SQLITE_OK; } int sqlite3Fts3EvalStart(Fts3Cursor *pCsr, Fts3Expr *pExpr, int bOptOk){ Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; int rc = SQLITE_OK; int nToken = 0; /* Allocate a MultiSegReader for each token in the expression. */ | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | | > | > > > > > < > < | > > > > > > > > | 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 | rc = sqlite3_reset(pStmt); if( rc!=SQLITE_OK ) return rc; } *pnPage = pCsr->nRowAvg; return SQLITE_OK; } static int fts3EvalSelectDeferred( Fts3Cursor *pCsr, Fts3Expr *pRoot, Fts3TokenAndCost *aTC, int nTC ){ int nDocSize = 0; int nDocEst = 0; int rc = SQLITE_OK; Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; int ii; int nOvfl = 0; int nTerm = 0; for(ii=0; ii<nTC; ii++){ if( aTC[ii].pRoot==pRoot ){ nOvfl += aTC[ii].nOvfl; nTerm++; } } if( nOvfl==0 || nTerm<2 ) return SQLITE_OK; rc = fts3EvalAverageDocsize(pCsr, &nDocSize); for(ii=0; ii<nTerm && rc==SQLITE_OK; ii++){ int jj; Fts3TokenAndCost *pTC = 0; for(jj=0; jj<nTC; jj++){ if( aTC[jj].pToken && aTC[jj].pRoot==pRoot && (!pTC || aTC[jj].nOvfl<pTC->nOvfl) ){ pTC = &aTC[jj]; } } assert( pTC ); /* At this point pTC points to the cheapest remaining token. */ if( ii==0 ){ if( pTC->nOvfl ){ nDocEst = (pTC->nOvfl * pTab->nPgsz + pTab->nPgsz) / 10; }else{ /* TODO: Fix this so that the doclist need not be read twice. */ Fts3PhraseToken *pToken = pTC->pToken; int nList = 0; char *pList = 0; rc = fts3TermSelect(pTab, pToken, pTC->iCol, 1, &nList, &pList); if( rc==SQLITE_OK ){ nDocEst = fts3DoclistCountDocids(1, pList, nList); } sqlite3_free(pList); if( rc==SQLITE_OK ){ rc = sqlite3Fts3TermSegReaderCursor(pCsr, pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr ); } } }else{ if( pTC->nOvfl>=(nDocEst*nDocSize) ){ Fts3PhraseToken *pToken = pTC->pToken; rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol); fts3SegReaderCursorFree(pToken->pSegcsr); pToken->pSegcsr = 0; } nDocEst = 1 + (nDocEst/4); } pTC->pToken = 0; } return rc; } int sqlite3Fts3EvalStart(Fts3Cursor *pCsr, Fts3Expr *pExpr, int bOptOk){ Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; int rc = SQLITE_OK; int nToken = 0; int nOr = 0; /* Allocate a MultiSegReader for each token in the expression. */ fts3EvalAllocateReaders(pCsr, pExpr, &nToken, &nOr, &rc); /* Call fts3EvalPhraseStart() on all phrases in the expression. TODO: ** This call will eventually also be responsible for determining which ** tokens are 'deferred' until the document text is loaded into memory. ** ** Each token in each phrase is dealt with using one of the following ** three strategies: ** ** 1. Entire doclist loaded into memory as part of the ** fts3EvalStartReaders() call. ** ** 2. Doclist loaded into memory incrementally, as part of each ** sqlite3Fts3EvalNext() call. ** ** 3. Token doclist is never loaded. Instead, documents are loaded into ** memory and scanned for the token as part of the sqlite3Fts3EvalNext() ** call. This is known as a "deferred" token. */ /* If bOptOk is true, check if there are any tokens that should be deferred. */ if( rc==SQLITE_OK && bOptOk && nToken>1 && pTab->bHasStat ){ Fts3TokenAndCost *aTC; Fts3Expr **apOr; aTC = (Fts3TokenAndCost *)sqlite3_malloc( sizeof(Fts3TokenAndCost) * nToken + sizeof(Fts3Expr *) * nOr ); apOr = (Fts3Expr **)&aTC[nToken]; if( !aTC ){ rc = SQLITE_NOMEM; }else{ int ii; int nDocSize; Fts3TokenAndCost *pTC = aTC; Fts3Expr **ppOr = apOr; fts3EvalTokenCosts(pCsr, 0, pExpr, &pTC, &ppOr, &rc); nToken = pTC-aTC; nOr = ppOr-apOr; rc = fts3EvalSelectDeferred(pCsr, 0, aTC, nToken); for(ii=0; rc==SQLITE_OK && ii<nOr; ii++){ rc = fts3EvalSelectDeferred(pCsr, apOr[ii], aTC, nToken); } #if 0 for(ii=0; rc==SQLITE_OK && ii<nToken; ii++){ int jj; pTC = 0; for(jj=0; jj<nToken; jj++){ if( aTC[jj].pToken && (!pTC || aTC[jj].nOvfl<pTC->nOvfl) ){ pTC = &aTC[jj]; } } assert( pTC ); /* At this point pTC points to the cheapest remaining token. */ if( ii==0 ){ if( pTC->nOvfl ){ nDocEst = (pTC->nOvfl * pTab->nPgsz + pTab->nPgsz) / 10; }else{ /* TODO: Fix this so that the doclist need not be read twice. */ |
︙ | ︙ | |||
3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 | fts3SegReaderCursorFree(pToken->pSegcsr); pToken->pSegcsr = 0; } nDocEst = 1 + (nDocEst/4); } pTC->pToken = 0; } sqlite3_free(aTC); } } fts3EvalStartReaders(pCsr, pExpr, bOptOk, &rc); | > > < < < < > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 | fts3SegReaderCursorFree(pToken->pSegcsr); pToken->pSegcsr = 0; } nDocEst = 1 + (nDocEst/4); } pTC->pToken = 0; } #endif sqlite3_free(aTC); } } fts3EvalStartReaders(pCsr, pExpr, bOptOk, &rc); return rc; } static void fts3EvalFreeDeferredDoclist(Fts3Phrase *pPhrase){ if( pPhrase->doclist.bFreeList ){ sqlite3_free(pPhrase->doclist.pList); pPhrase->doclist.pList = 0; pPhrase->doclist.nList = 0; pPhrase->doclist.bFreeList = 0; } } static int fts3EvalNearTrim2( int nNear, char *aTmp, /* Temporary space to use */ char **paPoslist, /* IN/OUT: Position list */ int *pnToken, /* IN/OUT: Tokens in phrase of *paPoslist */ Fts3Phrase *pPhrase /* The phrase object to trim the doclist of */ ){ int nParam1 = nNear + pPhrase->nToken; int nParam2 = nNear + *pnToken; char *p2; char *pOut; int res; p2 = pOut = pPhrase->doclist.pList; res = fts3PoslistNearMerge( &pOut, aTmp, nParam1, nParam2, paPoslist, &p2 ); pPhrase->doclist.nList = pOut - pPhrase->doclist.pList; *paPoslist = pPhrase->doclist.pList; *pnToken = pPhrase->nToken; return res; } static int fts3EvalNearTest(Fts3Expr *pExpr, int *pRc){ int res = 1; /* The following block runs if pExpr is the root of a NEAR query. ** For example, the query: ** ** "w" NEAR "x" NEAR "y" NEAR "z" ** ** which is represented in tree form as: ** ** | ** +--NEAR--+ <-- root of NEAR query ** | | ** +--NEAR--+ "z" ** | | ** +--NEAR--+ "y" ** | | ** "w" "x" ** ** The right-hand child of a NEAR node is always a phrase. The ** left-hand child may be either a phrase or a NEAR node. There are ** no exceptions to this. */ if( *pRc==SQLITE_OK && pExpr->eType==FTSQUERY_NEAR && pExpr->bEof==0 && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR) ){ Fts3Expr *p; int nTmp = 0; /* Bytes of temp space */ char *aTmp; /* Temp space for PoslistNearMerge() */ /* Allocate temporary working space. */ for(p=pExpr; p->pLeft; p=p->pLeft){ nTmp += p->pRight->pPhrase->doclist.nList; } nTmp += p->pPhrase->doclist.nList; aTmp = sqlite3_malloc(nTmp*2); if( !aTmp ){ *pRc = SQLITE_NOMEM; res = 0; }else{ char *aPoslist = p->pPhrase->doclist.pList; int nToken = p->pPhrase->nToken; for(p=p->pParent;res && p && p->eType==FTSQUERY_NEAR; p=p->pParent){ Fts3Phrase *pPhrase = p->pRight->pPhrase; int nNear = p->nNear; res = fts3EvalNearTrim2(nNear, aTmp, &aPoslist, &nToken, pPhrase); } aPoslist = pExpr->pRight->pPhrase->doclist.pList; nToken = pExpr->pRight->pPhrase->nToken; for(p=pExpr->pLeft; p && res; p=p->pLeft){ int nNear = p->pParent->nNear; Fts3Phrase *pPhrase = ( p->eType==FTSQUERY_NEAR ? p->pRight->pPhrase : p->pPhrase ); res = fts3EvalNearTrim2(nNear, aTmp, &aPoslist, &nToken, pPhrase); } } sqlite3_free(aTmp); } return res; } /* ** This macro is used by the fts3EvalNext() function. The two arguments are ** 64-bit docid values. If the current query is "ORDER BY docid ASC", then ** the macro returns (i1 - i2). Or if it is "ORDER BY docid DESC", then ** it returns (i2 - i1). This allows the same code to be used for merging ** doclists in ascending or descending order. |
︙ | ︙ | |||
3897 3898 3899 3900 3901 3902 3903 | pExpr->bStart = 1; switch( pExpr->eType ){ case FTSQUERY_NEAR: case FTSQUERY_AND: { Fts3Expr *pLeft = pExpr->pLeft; Fts3Expr *pRight = pExpr->pRight; | < | 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 | pExpr->bStart = 1; switch( pExpr->eType ){ case FTSQUERY_NEAR: case FTSQUERY_AND: { Fts3Expr *pLeft = pExpr->pLeft; Fts3Expr *pRight = pExpr->pRight; assert( !pLeft->bDeferred || !pRight->bDeferred ); if( pLeft->bDeferred ){ fts3EvalNext(pCsr, pRight, pRc); pExpr->iDocid = pRight->iDocid; pExpr->bEof = pRight->bEof; }else if( pRight->bDeferred ){ fts3EvalNext(pCsr, pLeft, pRc); |
︙ | ︙ | |||
3920 3921 3922 3923 3924 3925 3926 | if( iDiff==0 ) break; if( iDiff<0 ){ fts3EvalNext(pCsr, pLeft, pRc); }else{ fts3EvalNext(pCsr, pRight, pRc); } } | | | 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 | if( iDiff==0 ) break; if( iDiff<0 ){ fts3EvalNext(pCsr, pLeft, pRc); }else{ fts3EvalNext(pCsr, pRight, pRc); } } pExpr->iDocid = pLeft->iDocid; pExpr->bEof = (pLeft->bEof || pRight->bEof); } break; } case FTSQUERY_OR: { |
︙ | ︙ | |||
3983 3984 3985 3986 3987 3988 3989 | } default: { Fts3Phrase *pPhrase = pExpr->pPhrase; fts3EvalFreeDeferredDoclist(pPhrase); *pRc = fts3EvalPhraseNext(pCsr, pPhrase, &pExpr->bEof); pExpr->iDocid = pPhrase->doclist.iDocid; | < < < < < < | > > > > > > > > > > > > > > > > > > > > > > > > > | < | | < > > > | | | | | > > > | 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 | } default: { Fts3Phrase *pPhrase = pExpr->pPhrase; fts3EvalFreeDeferredDoclist(pPhrase); *pRc = fts3EvalPhraseNext(pCsr, pPhrase, &pExpr->bEof); pExpr->iDocid = pPhrase->doclist.iDocid; break; } } } } static int fts3EvalDeferredTest(Fts3Cursor *pCsr, Fts3Expr *pExpr, int *pRc){ int bHit = 1; if( *pRc==SQLITE_OK ){ switch( pExpr->eType ){ case FTSQUERY_NEAR: case FTSQUERY_AND: bHit = ( fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc) && fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc) && fts3EvalNearTest(pExpr, pRc) ); /* If this is a NEAR node and the NEAR expression does not match ** any rows, zero the doclist for all phrases involved in the NEAR. ** This is because the snippet(), offsets() and matchinfo() functions ** are not supposed to recognize any instances of phrases that are ** part of unmatched NEAR queries. For example if this expression: ** ** ... MATCH 'a OR (b NEAR c)' ** ** is matched against a row containing: ** ** 'a b d e' ** ** then any snippet() should ony highlight the "a" term, not the "b" ** (as "b" is part of a non-matching NEAR clause). */ if( pExpr->eType==FTSQUERY_NEAR && bHit==0 ){ Fts3Expr *p; for(p=pExpr; p->pPhrase==0; p=p->pLeft){ p->pRight->pPhrase->doclist.pList = 0; } p->pPhrase->doclist.pList = 0; } break; case FTSQUERY_OR: { int bHit1 = fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc); int bHit2 = fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc); bHit = bHit1 || bHit2; break; } case FTSQUERY_NOT: bHit = ( fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc) && !fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc) ); break; default: { if( pCsr->pDeferred ){ Fts3Phrase *pPhrase = pExpr->pPhrase; fts3EvalFreeDeferredDoclist(pPhrase); *pRc = fts3EvalDeferredPhrase(pCsr, pPhrase); bHit = (pPhrase->doclist.pList!=0); pExpr->iDocid = pCsr->iPrevId; }else{ bHit = (pExpr->bEof==0 && pExpr->iDocid==pCsr->iPrevId); } break; } } } return bHit; } |
︙ | ︙ | |||
4048 4049 4050 4051 4052 4053 4054 | ** ** Or, if no error occurs and it seems the current row does match the FTS ** query, return 0. */ static int fts3EvalLoadDeferred(Fts3Cursor *pCsr, int *pRc){ int rc = *pRc; int bMiss = 0; | | > | | < | > | 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 | ** ** Or, if no error occurs and it seems the current row does match the FTS ** query, return 0. */ static int fts3EvalLoadDeferred(Fts3Cursor *pCsr, int *pRc){ int rc = *pRc; int bMiss = 0; if( rc==SQLITE_OK ){ if( pCsr->pDeferred ){ rc = fts3CursorSeek(0, pCsr); if( rc==SQLITE_OK ){ rc = sqlite3Fts3CacheDeferredDoclists(pCsr); } } bMiss = (0==fts3EvalDeferredTest(pCsr, pCsr->pExpr, &rc)); sqlite3Fts3FreeDeferredDoclists(pCsr); *pRc = rc; } return (rc==SQLITE_OK && bMiss); } |
︙ | ︙ | |||
4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 | if( pExpr->bStart && !pExpr->bEof ){ pExpr->bStart = 0; while( rc==SQLITE_OK && (pExpr->bStart==0 || pExpr->iDocid!=iDocid) ){ fts3EvalNext(pCsr, pExpr, &rc); assert( !pExpr->bEof ); } } } *pnList = pPhrase->doclist.nAll; *ppList = pPhrase->doclist.aAll; return rc; } | > > > > > > > | 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 | if( pExpr->bStart && !pExpr->bEof ){ pExpr->bStart = 0; while( rc==SQLITE_OK && (pExpr->bStart==0 || pExpr->iDocid!=iDocid) ){ fts3EvalNext(pCsr, pExpr, &rc); assert( !pExpr->bEof ); } } } if( rc==SQLITE_OK && pExpr->pParent && pExpr->pParent->eType==FTSQUERY_NEAR ){ } *pnList = pPhrase->doclist.nAll; *ppList = pPhrase->doclist.aAll; return rc; } |
︙ | ︙ |
Changes to test/fts3defer2.test.
︙ | ︙ | |||
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 | INSERT INTO t1(t1) VALUES('optimize'); } do_execsql_test 1.1.4 { SELECT count(*) FROM t1_segments WHERE length(block)>10000; UPDATE t1_segments SET block = zeroblob(length(block)) WHERE length(block)>10000; } {2} do_execsql_test 1.2.1 { SELECT content FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)'; } {{a b c d e f a x y}} do_execsql_test 1.2.2 { SELECT snippet(t1, '[', ']'), offsets(t1), mit(matchinfo(t1, 'pcxnal')) FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)'; } [list \ {a b c d [e] [f] [a] x y} \ {0 1 8 1 0 0 10 1 0 2 12 1} \ | > > > > > | | | 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 | INSERT INTO t1(t1) VALUES('optimize'); } do_execsql_test 1.1.4 { SELECT count(*) FROM t1_segments WHERE length(block)>10000; UPDATE t1_segments SET block = zeroblob(length(block)) WHERE length(block)>10000; } {2} do_execsql_test 1.2.0 { SELECT content FROM t1 WHERE t1 MATCH 'f (e a)'; } {{a b c d e f a x y}} do_execsql_test 1.2.1 { SELECT content FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)'; } {{a b c d e f a x y}} breakpoint do_execsql_test 1.2.2 { SELECT snippet(t1, '[', ']'), offsets(t1), mit(matchinfo(t1, 'pcxnal')) FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)'; } [list \ {a b c d [e] [f] [a] x y} \ {0 1 8 1 0 0 10 1 0 2 12 1} \ [list 3 1 1 1 1 1 1 1 1 8 8 8 5001 9] ] do_execsql_test 1.2.3 { SELECT snippet(t1, '[', ']'), offsets(t1), mit(matchinfo(t1, 'pcxnal')) FROM t1 WHERE t1 MATCH 'f (e NEAR/3 a)'; } [list \ {[a] b c d [e] [f] [a] x y} \ {0 2 0 1 0 1 8 1 0 0 10 1 0 2 12 1} \ [list 3 1 1 1 1 1 1 1 2 8 8 8 5001 9] ] do_execsql_test 1.3.1 { DROP TABLE t1 } #----------------------------------------------------------------------------- # Test cases fts3defer2-2.* focus specifically on the matchinfo function. # |
︙ | ︙ | |||
95 96 97 98 99 100 101 102 | do_execsql_test 2.2.$tn.1 { SELECT mit(matchinfo(t2, 'pcxnal')) FROM t2 WHERE t2 MATCH 'a b'; } [list \ [list 2 1 1 54 54 1 3 3 54 372 8] \ [list 2 1 1 54 54 1 3 3 54 372 7] \ ] set sqlite_fts3_enable_parentheses 1 | > > > > > > | | 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 | do_execsql_test 2.2.$tn.1 { SELECT mit(matchinfo(t2, 'pcxnal')) FROM t2 WHERE t2 MATCH 'a b'; } [list \ [list 2 1 1 54 54 1 3 3 54 372 8] \ [list 2 1 1 54 54 1 3 3 54 372 7] \ ] do_execsql_test 2.2.$tn.2 { SELECT mit(matchinfo(t2, 'x')) FROM t2 WHERE t2 MATCH 'g z'; } [list \ [list 1 2 2 1 54 54] \ ] set sqlite_fts3_enable_parentheses 1 do_execsql_test 2.2.$tn.3 { SELECT mit(matchinfo(t2, 'x')) FROM t2 WHERE t2 MATCH 'g OR (g z)'; } [list \ [list 1 2 2 1 2 2 1 54 54] \ [list 1 2 2 1 2 2 0 54 54] \ ] set sqlite_fts3_enable_parentheses 0 } |
︙ | ︙ |
Changes to test/fts3matchinfo.test.
︙ | ︙ | |||
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 | CREATE VIRTUAL TABLE t3 USING fts3(mtchinfo=fts3); INSERT INTO t3(mtchinfo) VALUES('Beside the lake, beneath the trees'); SELECT mtchinfo FROM t3; } {{Beside the lake, beneath the trees}} do_execsql_test 3.2 { CREATE VIRTUAL TABLE xx USING FTS4; SELECT * FROM xx WHERE xx MATCH 'abc'; SELECT * FROM xx WHERE xx MATCH 'a b c'; } #-------------------------------------------------------------------------- # Proc [do_matchinfo_test] is used to test the FTSX matchinfo() function. # | > > > > | 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 | CREATE VIRTUAL TABLE t3 USING fts3(mtchinfo=fts3); INSERT INTO t3(mtchinfo) VALUES('Beside the lake, beneath the trees'); SELECT mtchinfo FROM t3; } {{Beside the lake, beneath the trees}} do_execsql_test 3.2 { CREATE VIRTUAL TABLE xx USING FTS4; } do_execsql_test 3.3 { SELECT * FROM xx WHERE xx MATCH 'abc'; } do_execsql_test 3.4 { SELECT * FROM xx WHERE xx MATCH 'a b c'; } #-------------------------------------------------------------------------- # Proc [do_matchinfo_test] is used to test the FTSX matchinfo() function. # |
︙ | ︙ |