Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Change prefix search from O(N*M) to O(NlogM). Backports (4599) from fts3. (CVS 5455) |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
3f614453d2d7c753a5963b027fe8618b |
User & Date: | shess 2008-07-22 23:08:40.000 |
Context
2008-07-22
| ||
23:32 | fts2 functions for testing scripts. Backports (5340) from fts3. (CVS 5456) (check-in: 4e47394be9 user: shess tags: trunk) | |
23:08 | Change prefix search from O(N*M) to O(NlogM). Backports (4599) from fts3. (CVS 5455) (check-in: 3f614453d2 user: shess tags: trunk) | |
22:57 | Changes fts2 to use only sqlite3_malloc() and not system malloc. Backports (4554) and (4555) from fts3. (CVS 5454) (check-in: ecf2dec66c user: shess tags: trunk) | |
Changes
Changes to ext/fts2/fts2.c.
︙ | ︙ | |||
451 452 453 454 455 456 457 458 459 460 461 462 463 464 | /* DataBuffer is used to collect data into a buffer in piecemeal ** fashion. It implements the usual distinction between amount of ** data currently stored (nData) and buffer capacity (nCapacity). ** ** dataBufferInit - create a buffer with given initial capacity. ** dataBufferReset - forget buffer's data, retaining capacity. ** dataBufferDestroy - free buffer's data. ** dataBufferExpand - expand capacity without adding data. ** dataBufferAppend - append data. ** dataBufferAppend2 - append two pieces of data at once. ** dataBufferReplace - replace buffer's data. */ typedef struct DataBuffer { char *pData; /* Pointer to malloc'ed buffer. */ | > | 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 | /* DataBuffer is used to collect data into a buffer in piecemeal ** fashion. It implements the usual distinction between amount of ** data currently stored (nData) and buffer capacity (nCapacity). ** ** dataBufferInit - create a buffer with given initial capacity. ** dataBufferReset - forget buffer's data, retaining capacity. ** dataBufferDestroy - free buffer's data. ** dataBufferSwap - swap contents of two buffers. ** dataBufferExpand - expand capacity without adding data. ** dataBufferAppend - append data. ** dataBufferAppend2 - append two pieces of data at once. ** dataBufferReplace - replace buffer's data. */ typedef struct DataBuffer { char *pData; /* Pointer to malloc'ed buffer. */ |
︙ | ︙ | |||
474 475 476 477 478 479 480 481 482 483 484 485 486 487 | } static void dataBufferReset(DataBuffer *pBuffer){ pBuffer->nData = 0; } static void dataBufferDestroy(DataBuffer *pBuffer){ if( pBuffer->pData!=NULL ) sqlite3_free(pBuffer->pData); SCRAMBLE(pBuffer); } static void dataBufferExpand(DataBuffer *pBuffer, int nAddCapacity){ assert( nAddCapacity>0 ); /* TODO(shess) Consider expanding more aggressively. Note that the ** underlying malloc implementation may take care of such things for ** us already. */ | > > > > > | 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 | } static void dataBufferReset(DataBuffer *pBuffer){ pBuffer->nData = 0; } static void dataBufferDestroy(DataBuffer *pBuffer){ if( pBuffer->pData!=NULL ) sqlite3_free(pBuffer->pData); SCRAMBLE(pBuffer); } static void dataBufferSwap(DataBuffer *pBuffer1, DataBuffer *pBuffer2){ DataBuffer tmp = *pBuffer1; *pBuffer1 = *pBuffer2; *pBuffer2 = tmp; } static void dataBufferExpand(DataBuffer *pBuffer, int nAddCapacity){ assert( nAddCapacity>0 ); /* TODO(shess) Consider expanding more aggressively. Note that the ** underlying malloc implementation may take care of such things for ** us already. */ |
︙ | ︙ | |||
5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 | err: for(i=0; i<MERGE_COUNT; i++){ leavesReaderDestroy(&lrs[i]); } leafWriterDestroy(&writer); return rc; } /* Scan pReader for pTerm/nTerm, and merge the term's doclist over ** *out (any doclists with duplicate docids overwrite those in *out). ** Internal function for loadSegmentLeaf(). */ static int loadSegmentLeavesInt(fulltext_vtab *v, LeavesReader *pReader, const char *pTerm, int nTerm, int isPrefix, DataBuffer *out){ assert( nTerm>0 ); | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > < | > < > | | > | > > | > > > | > | > > > > > > | > > > > > > > > | > | > > | > > > > > > > | > > > > | > > > | > > > > > > > > > > > > > | > > > > > > > > > > > | > > > | > > > | | | | 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 | err: for(i=0; i<MERGE_COUNT; i++){ leavesReaderDestroy(&lrs[i]); } leafWriterDestroy(&writer); return rc; } /* Accumulate the union of *acc and *pData into *acc. */ static void docListAccumulateUnion(DataBuffer *acc, const char *pData, int nData) { DataBuffer tmp = *acc; dataBufferInit(acc, tmp.nData+nData); docListUnion(tmp.pData, tmp.nData, pData, nData, acc); dataBufferDestroy(&tmp); } /* TODO(shess) It might be interesting to explore different merge ** strategies, here. For instance, since this is a sorted merge, we ** could easily merge many doclists in parallel. With some ** comprehension of the storage format, we could merge all of the ** doclists within a leaf node directly from the leaf node's storage. ** It may be worthwhile to merge smaller doclists before larger ** doclists, since they can be traversed more quickly - but the ** results may have less overlap, making them more expensive in a ** different way. */ /* Scan pReader for pTerm/nTerm, and merge the term's doclist over ** *out (any doclists with duplicate docids overwrite those in *out). ** Internal function for loadSegmentLeaf(). */ static int loadSegmentLeavesInt(fulltext_vtab *v, LeavesReader *pReader, const char *pTerm, int nTerm, int isPrefix, DataBuffer *out){ /* doclist data is accumulated into pBuffers similar to how one does ** increment in binary arithmetic. If index 0 is empty, the data is ** stored there. If there is data there, it is merged and the ** results carried into position 1, with further merge-and-carry ** until an empty position is found. */ DataBuffer *pBuffers = NULL; int nBuffers = 0, nMaxBuffers = 0, rc; assert( nTerm>0 ); for(rc=SQLITE_OK; rc==SQLITE_OK && !leavesReaderAtEnd(pReader); rc=leavesReaderStep(v, pReader)){ /* TODO(shess) Really want leavesReaderTermCmp(), but that name is ** already taken to compare the terms of two LeavesReaders. Think ** on a better name. [Meanwhile, break encapsulation rather than ** use a confusing name.] */ int c = leafReaderTermCmp(&pReader->leafReader, pTerm, nTerm, isPrefix); if( c>0 ) break; /* Past any possible matches. */ if( c==0 ){ const char *pData = leavesReaderData(pReader); int iBuffer, nData = leavesReaderDataBytes(pReader); /* Find the first empty buffer. */ for(iBuffer=0; iBuffer<nBuffers; ++iBuffer){ if( 0==pBuffers[iBuffer].nData ) break; } /* Out of buffers, add an empty one. */ if( iBuffer==nBuffers ){ if( nBuffers==nMaxBuffers ){ DataBuffer *p; nMaxBuffers += 20; /* Manual realloc so we can handle NULL appropriately. */ p = sqlite3_malloc(nMaxBuffers*sizeof(*pBuffers)); if( p==NULL ){ rc = SQLITE_NOMEM; break; } if( nBuffers>0 ){ assert(pBuffers!=NULL); memcpy(p, pBuffers, nBuffers*sizeof(*pBuffers)); sqlite3_free(pBuffers); } pBuffers = p; } dataBufferInit(&(pBuffers[nBuffers]), 0); nBuffers++; } /* At this point, must have an empty at iBuffer. */ assert(iBuffer<nBuffers && pBuffers[iBuffer].nData==0); /* If empty was first buffer, no need for merge logic. */ if( iBuffer==0 ){ dataBufferReplace(&(pBuffers[0]), pData, nData); }else{ /* pAcc is the empty buffer the merged data will end up in. */ DataBuffer *pAcc = &(pBuffers[iBuffer]); DataBuffer *p = &(pBuffers[0]); /* Handle position 0 specially to avoid need to prime pAcc ** with pData/nData. */ dataBufferSwap(p, pAcc); docListAccumulateUnion(pAcc, pData, nData); /* Accumulate remaining doclists into pAcc. */ for(++p; p<pAcc; ++p){ docListAccumulateUnion(pAcc, p->pData, p->nData); /* dataBufferReset() could allow a large doclist to blow up ** our memory requirements. */ if( p->nCapacity<1024 ){ dataBufferReset(p); }else{ dataBufferDestroy(p); dataBufferInit(p, 0); } } } } } /* Union all the doclists together into *out. */ /* TODO(shess) What if *out is big? Sigh. */ if( rc==SQLITE_OK && nBuffers>0 ){ int iBuffer; for(iBuffer=0; iBuffer<nBuffers; ++iBuffer){ if( pBuffers[iBuffer].nData>0 ){ if( out->nData==0 ){ dataBufferSwap(out, &(pBuffers[iBuffer])); }else{ docListAccumulateUnion(out, pBuffers[iBuffer].pData, pBuffers[iBuffer].nData); } } } } while( nBuffers-- ){ dataBufferDestroy(&(pBuffers[nBuffers])); } if( pBuffers!=NULL ) sqlite3_free(pBuffers); return rc; } /* Call loadSegmentLeavesInt() with pData/nData as input. */ static int loadSegmentLeaf(fulltext_vtab *v, const char *pData, int nData, const char *pTerm, int nTerm, int isPrefix, DataBuffer *out){ LeavesReader reader; |
︙ | ︙ |