Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Propagate prefix flag through implementation of doclist query code. Also implement correct prefix-handling for traversal of interior nodes of segment tree. A given prefix can span multiple children of an interior node, and from there the branches need to be followed in parallel. (CVS 3889) |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
cae844a01a1d87ffb00bba8b4e7b62a9 |
User & Date: | shess 2007-04-30 22:09:36.000 |
Context
2007-05-01
| ||
16:59 | The pager takes the sector size to be the larger of the sector size reported by sqlite3OsSectorSize() and the page size. (CVS 3890) (check-in: e5e6af55cc user: drh tags: trunk) | |
2007-04-30
| ||
22:09 | Propagate prefix flag through implementation of doclist query code. Also implement correct prefix-handling for traversal of interior nodes of segment tree. A given prefix can span multiple children of an interior node, and from there the branches need to be followed in parallel. (CVS 3889) (check-in: cae844a01a user: shess tags: trunk) | |
21:39 | Fix a potential segfault following a malloc() failure during a call to sqlite3_prepare() where the nBytes parameter is positive but less than the length of the input SQL string. (CVS 3888) (check-in: 27bf3fc3cf user: drh tags: trunk) | |
Changes
Changes to ext/fts2/fts2.c.
︙ | ︙ | |||
1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 | */ typedef struct QueryTerm { short int nPhrase; /* How many following terms are part of the same phrase */ short int iPhrase; /* This is the i-th term of a phrase. */ short int iColumn; /* Column of the index that must match this term */ signed char isOr; /* this term is preceded by "OR" */ signed char isNot; /* this term is preceded by "-" */ char *pTerm; /* text of the term. '\000' terminated. malloced */ int nTerm; /* Number of bytes in pTerm[] */ } QueryTerm; /* A query string is parsed into a Query structure. * | > | 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 | */ typedef struct QueryTerm { short int nPhrase; /* How many following terms are part of the same phrase */ short int iPhrase; /* This is the i-th term of a phrase. */ short int iColumn; /* Column of the index that must match this term */ signed char isOr; /* this term is preceded by "OR" */ signed char isNot; /* this term is preceded by "-" */ signed char isPrefix; /* this term is followed by "*" */ char *pTerm; /* text of the term. '\000' terminated. malloced */ int nTerm; /* Number of bytes in pTerm[] */ } QueryTerm; /* A query string is parsed into a Query structure. * |
︙ | ︙ | |||
3228 3229 3230 3231 3232 3233 3234 | /* TODO(shess) If we pushed LeafReader to the top of the file, or to ** another file, term_select() could be pushed above ** docListOfTerm(). */ static int termSelect(fulltext_vtab *v, int iColumn, | | | 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 | /* TODO(shess) If we pushed LeafReader to the top of the file, or to ** another file, term_select() could be pushed above ** docListOfTerm(). */ static int termSelect(fulltext_vtab *v, int iColumn, const char *pTerm, int nTerm, int isPrefix, DocListType iType, DataBuffer *out); /* Return a DocList corresponding to the query term *pTerm. If *pTerm ** is the first term of a phrase query, go ahead and evaluate the phrase ** query and return the doclist for the entire phrase query. ** ** The resulting DL_DOCIDS doclist is stored in pResult, which is |
︙ | ︙ | |||
3254 3255 3256 3257 3258 3259 3260 | /* No phrase search if no position info. */ assert( pQTerm->nPhrase==0 || DL_DEFAULT!=DL_DOCIDS ); /* This code should never be called with buffered updates. */ assert( v->nPendingData<0 ); dataBufferInit(&left, 0); | | | | 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 | /* No phrase search if no position info. */ assert( pQTerm->nPhrase==0 || DL_DEFAULT!=DL_DOCIDS ); /* This code should never be called with buffered updates. */ assert( v->nPendingData<0 ); dataBufferInit(&left, 0); rc = termSelect(v, iColumn, pQTerm->pTerm, pQTerm->nTerm, pQTerm->isPrefix, 0<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS, &left); if( rc ) return rc; for(i=1; i<=pQTerm->nPhrase && left.nData>0; i++){ dataBufferInit(&right, 0); rc = termSelect(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm, pQTerm[i].isPrefix, DL_POSITIONS, &right); if( rc ){ dataBufferDestroy(&left); return rc; } dataBufferInit(&new, 0); docListPhraseMerge(left.pData, left.nData, right.pData, right.nData, i<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS, &new); |
︙ | ︙ | |||
3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 | t = &q->pTerms[q->nTerms - 1]; CLEAR(t); t->pTerm = malloc(nTerm+1); memcpy(t->pTerm, pTerm, nTerm); t->pTerm[nTerm] = 0; t->nTerm = nTerm; t->isOr = q->nextIsOr; q->nextIsOr = 0; t->iColumn = q->nextColumn; q->nextColumn = q->dfltColumn; } /* ** Check to see if the string zToken[0...nToken-1] matches any | > | 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 | t = &q->pTerms[q->nTerms - 1]; CLEAR(t); t->pTerm = malloc(nTerm+1); memcpy(t->pTerm, pTerm, nTerm); t->pTerm[nTerm] = 0; t->nTerm = nTerm; t->isOr = q->nextIsOr; t->isPrefix = 0; q->nextIsOr = 0; t->iColumn = q->nextColumn; q->nextColumn = q->dfltColumn; } /* ** Check to see if the string zToken[0...nToken-1] matches any |
︙ | ︙ | |||
4178 4179 4180 4181 4182 4183 4184 | pReader->pData += n+nSuffix; pReader->nData -= n+nSuffix; } pReader->iBlockid++; } /* Compare the current term to pTerm[nTerm], returning strcmp-style | | | > | 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 | pReader->pData += n+nSuffix; pReader->nData -= n+nSuffix; } pReader->iBlockid++; } /* Compare the current term to pTerm[nTerm], returning strcmp-style ** results. If isPrefix, equality means equal through nTerm bytes. */ static int interiorReaderTermCmp(InteriorReader *pReader, const char *pTerm, int nTerm, int isPrefix){ const char *pReaderTerm = interiorReaderTerm(pReader); int nReaderTerm = interiorReaderTermBytes(pReader); int c, n = nReaderTerm<nTerm ? nReaderTerm : nTerm; if( n==0 ){ if( nReaderTerm>0 ) return -1; if( nTerm>0 ) return 1; return 0; } c = memcmp(pReaderTerm, pTerm, n); if( c!=0 ) return c; if( isPrefix && n==nTerm ) return 0; return nReaderTerm - nTerm; } /****************************************************************/ /* LeafWriter is used to collect terms and associated doclist data ** into leaf blocks in %_segments (see top of file for format info). ** Expected usage is: |
︙ | ︙ | |||
5146 5147 5148 5149 5150 5151 5152 | /* Call loadSegmentLeavesInt() with the leaf nodes from iStartLeaf to ** iEndLeaf (inclusive) as input, and merge the resulting doclist into ** out. */ static int loadSegmentLeaves(fulltext_vtab *v, sqlite_int64 iStartLeaf, sqlite_int64 iEndLeaf, | | | > > | | | | | | | | | > > > > > > > > > > > > | | > > > > | | > > | | > > | | | | > > | | | > | > > > > | > > > > > | | > > > > > > > > | | > | > | 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 | /* Call loadSegmentLeavesInt() with the leaf nodes from iStartLeaf to ** iEndLeaf (inclusive) as input, and merge the resulting doclist into ** out. */ static int loadSegmentLeaves(fulltext_vtab *v, sqlite_int64 iStartLeaf, sqlite_int64 iEndLeaf, const char *pTerm, int nTerm, DataBuffer *out){ int rc; LeavesReader reader; assert( iStartLeaf<=iEndLeaf ); rc = leavesReaderInit(v, 0, iStartLeaf, iEndLeaf, NULL, 0, &reader); if( rc!=SQLITE_OK ) return rc; rc = loadSegmentLeavesInt(v, &reader, pTerm, nTerm, out); leavesReaderReset(&reader); leavesReaderDestroy(&reader); return rc; } /* Taking pData/nData as an interior node, find the sequence of child ** nodes which could include pTerm/nTerm/isPrefix. Note that the ** interior node terms logically come between the blocks, so there is ** one more blockid than there are terms (that block contains terms >= ** the last interior-node term). */ static void getChildrenContaining(const char *pData, int nData, const char *pTerm, int nTerm, int isPrefix, sqlite_int64 *piStartChild, sqlite_int64 *piEndChild){ InteriorReader reader; assert( nData>1 ); assert( *pData!='\0' ); interiorReaderInit(pData, nData, &reader); /* Scan for the first child which could contain pTerm/nTerm. */ while( !interiorReaderAtEnd(&reader) ){ if( interiorReaderTermCmp(&reader, pTerm, nTerm, 0)>0 ) break; interiorReaderStep(&reader); } *piStartChild = interiorReaderCurrentBlockid(&reader); /* Keep scanning to find a term greater than our term, using prefix ** comparison if indicated. If isPrefix is false, this will be the ** same blockid as the starting block. */ while( !interiorReaderAtEnd(&reader) ){ if( interiorReaderTermCmp(&reader, pTerm, nTerm, isPrefix)>0 ) break; interiorReaderStep(&reader); } *piEndChild = interiorReaderCurrentBlockid(&reader); interiorReaderDestroy(&reader); /* Children must ascend, and if !prefix, both must be the same. */ assert( *piEndChild>=*piStartChild ); assert( isPrefix || *piStartChild==*piEndChild ); } /* Read block at iBlockid and pass it with other params to ** getChildrenContaining(). */ static int loadAndGetChildrenContaining( fulltext_vtab *v, sqlite_int64 iBlockid, const char *pTerm, int nTerm, int isPrefix, sqlite_int64 *piStartChild, sqlite_int64 *piEndChild ){ sqlite3_stmt *s = NULL; int rc; assert( iBlockid!=0 ); assert( pTerm!=NULL ); assert( nTerm!=0 ); /* TODO(shess) Why not allow this? */ assert( piStartChild!=NULL ); assert( piEndChild!=NULL ); rc = sql_get_statement(v, BLOCK_SELECT_STMT, &s); if( rc!=SQLITE_OK ) return rc; rc = sqlite3_bind_int64(s, 1, iBlockid); if( rc!=SQLITE_OK ) return rc; rc = sql_step_statement(v, BLOCK_SELECT_STMT, &s); if( rc==SQLITE_DONE ) return SQLITE_ERROR; if( rc!=SQLITE_ROW ) return rc; getChildrenContaining(sqlite3_column_blob(s, 0), sqlite3_column_bytes(s, 0), pTerm, nTerm, isPrefix, piStartChild, piEndChild); /* We expect only one row. We must execute another sqlite3_step() * to complete the iteration; otherwise the table will remain * locked. */ rc = sqlite3_step(s); if( rc==SQLITE_ROW ) return SQLITE_ERROR; if( rc!=SQLITE_DONE ) return rc; return SQLITE_OK; } /* Traverse the tree represented by pData[nData] looking for ** pTerm[nTerm], placing its doclist into *out. This is internal to ** loadSegment() to make error-handling cleaner. */ static int loadSegmentInt(fulltext_vtab *v, const char *pData, int nData, sqlite_int64 iLeavesEnd, const char *pTerm, int nTerm, int isPrefix, DataBuffer *out){ /* Special case where root is a leaf. */ if( *pData=='\0' ){ assert( !isPrefix ); /* TODO(shess) Add prefix support. */ return loadSegmentLeaf(v, pData, nData, pTerm, nTerm, out); }else{ int rc; sqlite_int64 iStartChild, iEndChild; /* Process pData as an interior node, then loop down the tree ** until we find the set of leaf nodes to scan for the term. */ getChildrenContaining(pData, nData, pTerm, nTerm, isPrefix, &iStartChild, &iEndChild); while( iStartChild>iLeavesEnd ){ sqlite_int64 iNextStart, iNextEnd; rc = loadAndGetChildrenContaining(v, iStartChild, pTerm, nTerm, isPrefix, &iNextStart, &iNextEnd); if( rc!=SQLITE_OK ) return rc; /* If we've branched, follow the end branch, too. */ if( iStartChild!=iEndChild ){ sqlite_int64 iDummy; rc = loadAndGetChildrenContaining(v, iEndChild, pTerm, nTerm, isPrefix, &iDummy, &iNextEnd); if( rc!=SQLITE_OK ) return rc; } assert( iNextStart<=iNextEnd ); iStartChild = iNextStart; iEndChild = iNextEnd; } assert( iStartChild<=iLeavesEnd ); assert( iEndChild<=iLeavesEnd ); assert( !isPrefix ); /* TODO(shess) Add prefix support. */ return loadSegmentLeaves(v, iStartChild, iEndChild, pTerm, nTerm, out); } } /* Call loadSegmentInt() to collect the doclist for pTerm/nTerm, then ** merge its doclist over *out (any duplicate doclists read from the ** segment rooted at pData will overwrite those in *out). */ /* NOTE(shess) Previous code passed out down to sub-routines for use ** in docListMerge(). This version deoptimizes things slightly, but ** prefix searches require a different merge function entirely. */ static int loadSegment(fulltext_vtab *v, const char *pData, int nData, sqlite_int64 iLeavesEnd, const char *pTerm, int nTerm, int isPrefix, DataBuffer *out){ DataBuffer result; int rc; assert( nData>1 ); /* This code should never be called with buffered updates. */ assert( v->nPendingData<0 ); dataBufferInit(&result, 0); rc = loadSegmentInt(v, pData, nData, iLeavesEnd, pTerm, nTerm, isPrefix, &result); if( rc==SQLITE_OK && result.nData>0 ){ if( out->nData==0 ){ DataBuffer tmp = *out; *out = result; result = tmp; }else{ DataBuffer merged; |
︙ | ︙ | |||
5294 5295 5296 5297 5298 5299 5300 | return rc; } /* Scan the database and merge together the posting lists for the term ** into *out. */ static int termSelect(fulltext_vtab *v, int iColumn, | | | > | 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 | return rc; } /* Scan the database and merge together the posting lists for the term ** into *out. */ static int termSelect(fulltext_vtab *v, int iColumn, const char *pTerm, int nTerm, int isPrefix, DocListType iType, DataBuffer *out){ DataBuffer doclist; sqlite3_stmt *s; int rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s); if( rc!=SQLITE_OK ) return rc; /* This code should never be called with buffered updates. */ assert( v->nPendingData<0 ); dataBufferInit(&doclist, 0); /* Traverse the segments from oldest to newest so that newer doclist ** elements for given docids overwrite older elements. */ while( (rc=sql_step_statement(v, SEGDIR_SELECT_ALL_STMT, &s))==SQLITE_ROW ){ const char *pData = sqlite3_column_blob(s, 0); const int nData = sqlite3_column_bytes(s, 0); const sqlite_int64 iLeavesEnd = sqlite3_column_int64(s, 1); rc = loadSegment(v, pData, nData, iLeavesEnd, pTerm, nTerm, isPrefix, &doclist); if( rc!=SQLITE_OK ) goto err; } if( rc==SQLITE_DONE ){ if( doclist.nData!=0 ){ /* TODO(shess) The old term_select_all() code applied the column ** restrict as we merged segments, leading to smaller buffers. ** This is probably worthwhile to bring back, once the new storage |
︙ | ︙ |