Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Avoid loading doclists for infrequent terms that are part of phrases twice. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
8f939723f742329cedba8930f71dff42 |
User & Date: | dan 2011-06-17 17:37:31.284 |
Context
2011-06-17
| ||
18:52 | Fix a header dependency in nmake Makefile. (check-in: 54492212af user: shaneh tags: trunk) | |
17:37 | Avoid loading doclists for infrequent terms that are part of phrases twice. (check-in: 8f939723f7 user: dan tags: trunk) | |
16:04 | Add a missing declaration to fts3Int.h. (check-in: 3bfd4466f5 user: dan tags: trunk) | |
Changes
Changes to ext/fts3/fts3.c.
︙ | ︙ | |||
3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 | pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr ); if( rc!=SQLITE_OK ){ *pRc = rc; return; } } }else{ *pnOr += (pExpr->eType==FTSQUERY_OR); fts3EvalAllocateReaders(pCsr, pExpr->pLeft, pnToken, pnOr, pRc); fts3EvalAllocateReaders(pCsr, pExpr->pRight, pnToken, pnOr, pRc); } } } static int fts3EvalPhraseLoad( Fts3Cursor *pCsr, Fts3Phrase *p ){ Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; int iToken; int rc = SQLITE_OK; | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > < < < < | | < < < < < < < < < < | < < < < < < > | < < < < < < < | | 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 | pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr ); if( rc!=SQLITE_OK ){ *pRc = rc; return; } } assert( pExpr->pPhrase->iDoclistToken==0 ); pExpr->pPhrase->iDoclistToken = -1; }else{ *pnOr += (pExpr->eType==FTSQUERY_OR); fts3EvalAllocateReaders(pCsr, pExpr->pLeft, pnToken, pnOr, pRc); fts3EvalAllocateReaders(pCsr, pExpr->pRight, pnToken, pnOr, pRc); } } } static void fts3EvalPhraseMergeToken( Fts3Table *pTab, Fts3Phrase *p, int iToken, char *pList, int nList ){ assert( iToken!=p->iDoclistToken ); if( pList==0 ){ sqlite3_free(p->doclist.aAll); p->doclist.aAll = 0; p->doclist.nAll = 0; } else if( p->iDoclistToken<0 ){ p->doclist.aAll = pList; p->doclist.nAll = nList; } else if( p->doclist.aAll==0 ){ sqlite3_free(pList); } else { char *pLeft; char *pRight; int nLeft; int nRight; int nDiff; if( p->iDoclistToken<iToken ){ pLeft = p->doclist.aAll; nLeft = p->doclist.nAll; pRight = pList; nRight = nList; nDiff = iToken - p->iDoclistToken; }else{ pRight = p->doclist.aAll; nRight = p->doclist.nAll; pLeft = pList; nLeft = nList; nDiff = p->iDoclistToken - iToken; } fts3DoclistPhraseMerge(pTab->bDescIdx, nDiff, pLeft, nLeft, pRight,&nRight); sqlite3_free(pLeft); p->doclist.aAll = pRight; p->doclist.nAll = nRight; } if( iToken>p->iDoclistToken ) p->iDoclistToken = iToken; } static int fts3EvalPhraseLoad( Fts3Cursor *pCsr, Fts3Phrase *p ){ Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; int iToken; int rc = SQLITE_OK; for(iToken=0; rc==SQLITE_OK && iToken<p->nToken; iToken++){ Fts3PhraseToken *pToken = &p->aToken[iToken]; assert( pToken->pDeferred==0 || pToken->pSegcsr==0 ); if( pToken->pSegcsr ){ int nThis = 0; char *pThis = 0; rc = fts3TermSelect(pTab, pToken, p->iColumn, 1, &nThis, &pThis); if( rc==SQLITE_OK ){ fts3EvalPhraseMergeToken(pTab, p, iToken, pThis, nThis); } } assert( pToken->pSegcsr==0 ); } return rc; } static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){ int iToken; int rc = SQLITE_OK; int nMaxUndeferred = pPhrase->iDoclistToken; char *aPoslist = 0; int nPoslist = 0; int iPrev = -1; assert( pPhrase->doclist.bFreeList==0 ); for(iToken=0; rc==SQLITE_OK && iToken<pPhrase->nToken; iToken++){ |
︙ | ︙ | |||
3287 3288 3289 3290 3291 3292 3293 | sqlite3_free(aPoslist); pPhrase->doclist.pList = 0; pPhrase->doclist.nList = 0; return SQLITE_OK; } } iPrev = iToken; | < < | 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 | sqlite3_free(aPoslist); pPhrase->doclist.pList = 0; pPhrase->doclist.nList = 0; return SQLITE_OK; } } iPrev = iToken; } } if( iPrev>=0 ){ if( nMaxUndeferred<0 ){ pPhrase->doclist.pList = aPoslist; pPhrase->doclist.nList = nPoslist; |
︙ | ︙ | |||
3347 3348 3349 3350 3351 3352 3353 | ** used with fts3EvalPhraseNext() to iterate through the matching docids. */ static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){ int rc; Fts3PhraseToken *pFirst = &p->aToken[0]; Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; | < | > > | > | 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 | ** used with fts3EvalPhraseNext() to iterate through the matching docids. */ static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){ int rc; Fts3PhraseToken *pFirst = &p->aToken[0]; Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; if( pCsr->bDesc==pTab->bDescIdx && bOptOk==1 && p->nToken==1 && pFirst->pSegcsr && pFirst->pSegcsr->bLookup ){ /* Use the incremental approach. */ int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn); rc = sqlite3Fts3MsrIncrStart( pTab, pFirst->pSegcsr, iCol, pFirst->z, pFirst->n); p->bIncr = 1; |
︙ | ︙ | |||
3520 3521 3522 3523 3524 3525 3526 | fts3EvalStartReaders(pCsr, pExpr->pLeft, bOptOk, pRc); fts3EvalStartReaders(pCsr, pExpr->pRight, bOptOk, pRc); pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred); } } } | < > > | | > > | 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 | fts3EvalStartReaders(pCsr, pExpr->pLeft, bOptOk, pRc); fts3EvalStartReaders(pCsr, pExpr->pRight, bOptOk, pRc); pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred); } } } typedef struct Fts3TokenAndCost Fts3TokenAndCost; struct Fts3TokenAndCost { Fts3Phrase *pPhrase; /* The phrase the token belongs to */ int iToken; /* Position of token in phrase */ Fts3PhraseToken *pToken; /* The token itself */ Fts3Expr *pRoot; int nOvfl; int iCol; /* The column the token must match */ }; static void fts3EvalTokenCosts( Fts3Cursor *pCsr, Fts3Expr *pRoot, Fts3Expr *pExpr, Fts3TokenAndCost **ppTC, Fts3Expr ***ppOr, int *pRc ){ if( *pRc==SQLITE_OK && pExpr ){ if( pExpr->eType==FTSQUERY_PHRASE ){ Fts3Phrase *pPhrase = pExpr->pPhrase; int i; for(i=0; *pRc==SQLITE_OK && i<pPhrase->nToken; i++){ Fts3TokenAndCost *pTC = (*ppTC)++; pTC->pPhrase = pPhrase; pTC->iToken = i; pTC->pRoot = pRoot; pTC->pToken = &pPhrase->aToken[i]; pTC->iCol = pPhrase->iColumn; *pRc = sqlite3Fts3MsrOvfl(pCsr, pTC->pToken->pSegcsr, &pTC->nOvfl); } }else if( pExpr->eType!=FTSQUERY_NOT ){ if( pExpr->eType==FTSQUERY_OR ){ |
︙ | ︙ | |||
3655 3656 3657 3658 3659 3660 3661 | assert( pTC ); /* At this point pTC points to the cheapest remaining token. */ if( ii==0 ){ if( pTC->nOvfl ){ nDocEst = (pTC->nOvfl * pTab->nPgsz + pTab->nPgsz) / 10; }else{ | < | < | < < < < > > | 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 | assert( pTC ); /* At this point pTC points to the cheapest remaining token. */ if( ii==0 ){ if( pTC->nOvfl ){ nDocEst = (pTC->nOvfl * pTab->nPgsz + pTab->nPgsz) / 10; }else{ Fts3PhraseToken *pToken = pTC->pToken; int nList = 0; char *pList = 0; rc = fts3TermSelect(pTab, pToken, pTC->iCol, 1, &nList, &pList); assert( rc==SQLITE_OK || pList==0 ); if( rc==SQLITE_OK ){ nDocEst = fts3DoclistCountDocids(1, pList, nList); fts3EvalPhraseMergeToken(pTab, pTC->pPhrase, pTC->iToken,pList,nList); } } }else{ if( pTC->nOvfl>=(nDocEst*nDocSize) ){ Fts3PhraseToken *pToken = pTC->pToken; rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol); fts3SegReaderCursorFree(pToken->pSegcsr); |
︙ | ︙ |
Changes to ext/fts3/fts3Int.h.
︙ | ︙ | |||
302 303 304 305 306 307 308 | char *z; /* Text of the token */ int n; /* Number of bytes in buffer z */ int isPrefix; /* True if token ends with a "*" character */ /* Variables above this point are populated when the expression is ** parsed (by code in fts3_expr.c). Below this point the variables are ** used when evaluating the expression. */ | < > | 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 | char *z; /* Text of the token */ int n; /* Number of bytes in buffer z */ int isPrefix; /* True if token ends with a "*" character */ /* Variables above this point are populated when the expression is ** parsed (by code in fts3_expr.c). Below this point the variables are ** used when evaluating the expression. */ Fts3DeferredToken *pDeferred; /* Deferred token object for this token */ Fts3MultiSegReader *pSegcsr; /* Segment-reader for this token */ }; struct Fts3Phrase { /* Cache of doclist for this phrase. */ Fts3Doclist doclist; int bIncr; /* True if doclist is loaded incrementally */ int iDoclistToken; /* Variables below this point are populated by fts3_expr.c when parsing ** a MATCH expression. Everything above is part of the evaluation phase. */ int nToken; /* Number of tokens in the phrase */ int iColumn; /* Index of column this phrase must match */ Fts3PhraseToken aToken[1]; /* One entry for each token in the phrase */ |
︙ | ︙ |