Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Allow multi-token phrases to load doclists from the database incrementally. This allows queries that feature such phrases to benefit from the "docid<?" optimization. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts4-docid-range-constraints |
Files: | files | file ages | folders |
SHA1: |
ea543f081d93ed1bf66c21ce2108ec94 |
User & Date: | dan 2013-10-01 20:02:32.683 |
Context
2013-10-01
| ||
20:10 | Merge trunk changes with this branch. (check-in: 65d9c6fafb user: dan tags: fts4-docid-range-constraints) | |
20:02 | Allow multi-token phrases to load doclists from the database incrementally. This allows queries that feature such phrases to benefit from the "docid<?" optimization. (check-in: ea543f081d user: dan tags: fts4-docid-range-constraints) | |
2013-09-30
| ||
18:16 | Merge trunk changes with this branch. (check-in: e294a9c7c5 user: dan tags: fts4-docid-range-constraints) | |
Changes
Changes to ext/fts3/fts3.c.
︙ | ︙ | |||
4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 | sqlite3_free(aPoslist); } } return SQLITE_OK; } /* ** This function is called for each Fts3Phrase in a full-text query ** expression to initialize the mechanism for returning rows. Once this ** function has been called successfully on an Fts3Phrase, it may be ** used with fts3EvalPhraseNext() to iterate through the matching docids. ** ** If parameter bOptOk is true, then the phrase may (or may not) use the ** incremental loading strategy. Otherwise, the entire doclist is loaded into ** memory within this call. ** ** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. */ static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){ | > > > > > > < < > > > > > > > > | < | > | > | > > > | | > > | < < | < > | 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 | sqlite3_free(aPoslist); } } return SQLITE_OK; } /* ** Maximum number of tokens a phrase may have to be considered for the ** incremental doclists strategy. */ #define MAX_INCR_PHRASE_TOKENS 4 /* ** This function is called for each Fts3Phrase in a full-text query ** expression to initialize the mechanism for returning rows. Once this ** function has been called successfully on an Fts3Phrase, it may be ** used with fts3EvalPhraseNext() to iterate through the matching docids. ** ** If parameter bOptOk is true, then the phrase may (or may not) use the ** incremental loading strategy. Otherwise, the entire doclist is loaded into ** memory within this call. ** ** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. */ static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){ Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; int rc = SQLITE_OK; /* Error code */ int i; /* Determine if doclists may be loaded from disk incrementally. This is ** possible if the bOptOk argument is true, the FTS doclists will be ** scanned in forward order, and the phrase consists of ** MAX_INCR_PHRASE_TOKENS or fewer tokens, none of which are are "^first" ** tokens or prefix tokens that cannot use a prefix-index. */ int bIncrOk = (bOptOk && pCsr->bDesc==pTab->bDescIdx && p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0 ); for(i=0; bIncrOk==1 && i<p->nToken; i++){ Fts3PhraseToken *pToken = &p->aToken[i]; if( pToken->bFirst || !pToken->pSegcsr || !pToken->pSegcsr->bLookup ){ bIncrOk = 0; } } if( bIncrOk ){ /* Use the incremental approach. */ int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn); for(i=0; rc==SQLITE_OK && i<p->nToken; i++){ Fts3PhraseToken *pTok = &p->aToken[i]; rc = sqlite3Fts3MsrIncrStart(pTab, pTok->pSegcsr, iCol, pTok->z, pTok->n); } }else{ /* Load the full doclist for the phrase into memory. */ rc = fts3EvalPhraseLoad(pCsr, p); } p->bIncr = bIncrOk; assert( rc!=SQLITE_OK || p->nToken<1 || p->aToken[0].pSegcsr==0 || p->bIncr ); return rc; } /* ** This function is used to iterate backwards (from the end to start) |
︙ | ︙ | |||
4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 | p += sqlite3Fts3GetVarint(p, &iVar); *piDocid += ((bDescIdx ? -1 : 1) * iVar); } } *ppIter = p; } /* ** Attempt to move the phrase iterator to point to the next matching docid. ** If an error occurs, return an SQLite error code. Otherwise, return ** SQLITE_OK. ** ** If there is no "next" entry and no error occurs, then *pbEof is set to ** 1 before returning. Otherwise, if no error occurs and the iterator is ** successfully advanced, *pbEof is set to 0. */ static int fts3EvalPhraseNext( Fts3Cursor *pCsr, /* FTS Cursor handle */ Fts3Phrase *p, /* Phrase object to advance to next docid */ u8 *pbEof /* OUT: Set to 1 if EOF */ ){ int rc = SQLITE_OK; Fts3Doclist *pDL = &p->doclist; Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; if( p->bIncr ){ | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > < < < < < < | < | 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 | p += sqlite3Fts3GetVarint(p, &iVar); *piDocid += ((bDescIdx ? -1 : 1) * iVar); } } *ppIter = p; } /* ** Helper type used by fts3EvalIncrPhraseNext() and incrPhraseTokenNext(). */ typedef struct TokenDoclist TokenDoclist; struct TokenDoclist { sqlite3_int64 iDocid; char *pList; int nList; }; /* ** Token pToken is an incrementally loaded token that is part of a ** multi-token phrase. Advance it to the next matching document in the ** database and populate output variable *p with the details of the new ** entry. Or, if the iterator has reached EOF, set *pbEof to true. ** ** If an error occurs, return an SQLite error code. Otherwise, return ** SQLITE_OK. */ static int incrPhraseTokenNext( Fts3Table *pTab, /* Virtual table handle */ Fts3PhraseToken *pToken, /* Advance the iterator for this token */ TokenDoclist *p, /* OUT: Docid and doclist for new entry */ int *pbEof /* OUT: True if iterator is at EOF */ ){ int rc; assert( pToken->pDeferred==0 ); rc = sqlite3Fts3MsrIncrNext( pTab, pToken->pSegcsr, &p->iDocid, &p->pList, &p->nList ); if( p->pList==0 ) *pbEof = 1; return rc; } /* ** The phrase iterator passed as the second argument uses the incremental ** doclist strategy. Advance it to the next matching documnent in the ** database. If an error occurs, return an SQLite error code. Otherwise, ** return SQLITE_OK. ** ** If there is no "next" entry and no error occurs, then *pbEof is set to ** 1 before returning. Otherwise, if no error occurs and the iterator is ** successfully advanced, *pbEof is set to 0. */ static int fts3EvalIncrPhraseNext( Fts3Cursor *pCsr, /* FTS Cursor handle */ Fts3Phrase *p, /* Phrase object to advance to next docid */ u8 *pbEof /* OUT: Set to 1 if EOF */ ){ int rc = SQLITE_OK; Fts3Doclist *pDL = &p->doclist; Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; int bEof = 0; assert( p->bIncr==1 ); assert( pDL->pNextDocid==0 ); if( p->nToken==1 ){ rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr, &pDL->iDocid, &pDL->pList, &pDL->nList ); if( pDL->pList==0 ) bEof = 1; }else{ int bDescDoclist = pCsr->bDesc; struct TokenDoclist a[MAX_INCR_PHRASE_TOKENS]; assert( p->nToken<=MAX_INCR_PHRASE_TOKENS ); while( bEof==0 ){ sqlite3_int64 iMax; /* Largest docid for all iterators */ int i; /* Used to iterate through tokens */ /* Advance the iterator for each token in the phrase once. */ for(i=0; rc==SQLITE_OK && i<p->nToken; i++){ rc = incrPhraseTokenNext(pTab, &p->aToken[i], &a[i], &bEof); if( i==0 || DOCID_CMP(iMax, a[i].iDocid)<0 ){ iMax = a[i].iDocid; } } /* Keep advancing iterators until they all point to the same document */ if( bEof==0 && rc==SQLITE_OK ){ for(i=0; i<p->nToken; i++){ while( DOCID_CMP(a[i].iDocid, iMax)<0 && rc==SQLITE_OK && bEof==0 ){ rc = incrPhraseTokenNext(pTab, &p->aToken[i], &a[i], &bEof); if( DOCID_CMP(a[i].iDocid, iMax)>0 ){ iMax = a[i].iDocid; i = 0; } } } } /* Check if the current entries really are a phrase match */ if( bEof==0 ){ int nByte = a[p->nToken-1].nList; char *aDoclist = sqlite3_malloc(nByte+1); if( !aDoclist ) return SQLITE_NOMEM; memcpy(aDoclist, a[p->nToken-1].pList, nByte+1); int nList; for(i=0; i<(p->nToken-1); i++){ char *pLeft = a[i].pList; char *pRight = aDoclist; char *pOut = aDoclist; int nDist = p->nToken-1-i; int res = fts3PoslistPhraseMerge(&pOut, nDist, 0, 1, &pLeft, &pRight); if( res==0 ) break; nList = (pOut - aDoclist); } if( i==(p->nToken-1) ){ pDL->iDocid = a[0].iDocid; pDL->pList = aDoclist; pDL->nList = nList; pDL->bFreeList = 1; break; } sqlite3_free(aDoclist); } } } *pbEof = bEof; return rc; } /* ** Attempt to move the phrase iterator to point to the next matching docid. ** If an error occurs, return an SQLite error code. Otherwise, return ** SQLITE_OK. ** ** If there is no "next" entry and no error occurs, then *pbEof is set to ** 1 before returning. Otherwise, if no error occurs and the iterator is ** successfully advanced, *pbEof is set to 0. */ static int fts3EvalPhraseNext( Fts3Cursor *pCsr, /* FTS Cursor handle */ Fts3Phrase *p, /* Phrase object to advance to next docid */ u8 *pbEof /* OUT: Set to 1 if EOF */ ){ int rc = SQLITE_OK; Fts3Doclist *pDL = &p->doclist; Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; if( p->bIncr ){ rc = fts3EvalIncrPhraseNext(pCsr, p, pbEof); }else if( pCsr->bDesc!=pTab->bDescIdx && pDL->nAll ){ sqlite3Fts3DoclistPrev(pTab->bDescIdx, pDL->aAll, pDL->nAll, &pDL->pNextDocid, &pDL->iDocid, &pDL->nList, pbEof ); pDL->pList = pDL->pNextDocid; }else{ char *pIter; /* Used to iterate through aAll */ |
︙ | ︙ | |||
4241 4242 4243 4244 4245 4246 4247 | ** ** If an error occurs within this function, *pRc is set to an SQLite error ** code before returning. */ static void fts3EvalStartReaders( Fts3Cursor *pCsr, /* FTS Cursor handle */ Fts3Expr *pExpr, /* Expression to initialize phrases in */ | < | | | | 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 | ** ** If an error occurs within this function, *pRc is set to an SQLite error ** code before returning. */ static void fts3EvalStartReaders( Fts3Cursor *pCsr, /* FTS Cursor handle */ Fts3Expr *pExpr, /* Expression to initialize phrases in */ int *pRc /* IN/OUT: Error code */ ){ if( pExpr && SQLITE_OK==*pRc ){ if( pExpr->eType==FTSQUERY_PHRASE ){ int i; int nToken = pExpr->pPhrase->nToken; for(i=0; i<nToken; i++){ if( pExpr->pPhrase->aToken[i].pDeferred==0 ) break; } pExpr->bDeferred = (i==nToken); *pRc = fts3EvalPhraseStart(pCsr, 1, pExpr->pPhrase); }else{ fts3EvalStartReaders(pCsr, pExpr->pLeft, pRc); fts3EvalStartReaders(pCsr, pExpr->pRight, pRc); pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred); } } } /* ** An array of the following structures is assembled as part of the process |
︙ | ︙ | |||
4577 4578 4579 4580 4581 4582 4583 | } sqlite3_free(aTC); } } #endif | | | 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 | } sqlite3_free(aTC); } } #endif fts3EvalStartReaders(pCsr, pCsr->pExpr, &rc); return rc; } /* ** Invalidate the current position list for phrase pPhrase. */ static void fts3EvalInvalidatePoslist(Fts3Phrase *pPhrase){ |
︙ | ︙ | |||
5093 5094 5095 5096 5097 5098 5099 | ){ if( pExpr && *pRc==SQLITE_OK ){ Fts3Phrase *pPhrase = pExpr->pPhrase; if( pPhrase ){ fts3EvalInvalidatePoslist(pPhrase); if( pPhrase->bIncr ){ | > | | | > < | 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 | ){ if( pExpr && *pRc==SQLITE_OK ){ Fts3Phrase *pPhrase = pExpr->pPhrase; if( pPhrase ){ fts3EvalInvalidatePoslist(pPhrase); if( pPhrase->bIncr ){ int i; for(i=0; i<pPhrase->nToken; i++){ assert( pPhrase->aToken[i].pSegcsr ); sqlite3Fts3MsrIncrRestart(pPhrase->aToken[i].pSegcsr); } *pRc = fts3EvalPhraseStart(pCsr, 0, pPhrase); } pPhrase->doclist.pNextDocid = 0; pPhrase->doclist.iDocid = 0; } pExpr->iDocid = 0; pExpr->bEof = 0; pExpr->bStart = 0; |
︙ | ︙ |