Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Optimize copying data from fts5 in-memory hash tables to top level segments. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts5 |
Files: | files | file ages | folders |
SHA1: |
8e3ca6323a2beab5f04250e24ae15b15 |
User & Date: | dan 2015-02-26 20:49:09.566 |
Context
2015-02-27
| ||
07:23 | Fix suffix and prefix compression of terms in top-level fts5 segments. And a crash that could follow an OOM condition. (check-in: bb104b3646 user: dan tags: fts5) | |
2015-02-26
| ||
20:49 | Optimize copying data from fts5 in-memory hash tables to top level segments. (check-in: 8e3ca6323a user: dan tags: fts5) | |
14:54 | Fix an fts5 bug in large incremental merges. (check-in: 208e3cb6b6 user: dan tags: fts5) | |
Changes
Changes to ext/fts5/fts5_hash.c.
︙ | ︙ | |||
389 390 391 392 393 394 395 | pHash->nEntry = 0; sqlite3_free(ap); *ppSorted = pList; return SQLITE_OK; } | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 389 390 391 392 393 394 395 396 397 398 399 400 401 402 | pHash->nEntry = 0; sqlite3_free(ap); *ppSorted = pList; return SQLITE_OK; } /* ** Query the hash table for a doclist associated with term pTerm/nTerm. */ int sqlite3Fts5HashQuery( Fts5Hash *pHash, /* Hash table to query */ const char *pTerm, int nTerm, /* Query term */ const char **ppDoclist, /* OUT: Pointer to doclist for pTerm */ |
︙ | ︙ | |||
474 475 476 477 478 479 480 | Fts5Hash *p, /* Hash table to query */ const char *pTerm, int nTerm /* Query prefix */ ){ fts5HashEntrySort(p, pTerm, nTerm, &p->pScan); } void sqlite3Fts5HashScanNext(Fts5Hash *p){ | | | < | 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 | Fts5Hash *p, /* Hash table to query */ const char *pTerm, int nTerm /* Query prefix */ ){ fts5HashEntrySort(p, pTerm, nTerm, &p->pScan); } void sqlite3Fts5HashScanNext(Fts5Hash *p){ Fts5HashEntry *pScan = p->pScan; if( pScan ) p->pScan = pScan->pScanNext; } int sqlite3Fts5HashScanEof(Fts5Hash *p){ return (p->pScan==0); } void sqlite3Fts5HashScanEntry( |
︙ | ︙ |
Changes to ext/fts5/fts5_index.c.
︙ | ︙ | |||
109 110 111 112 113 114 115 | ** ** doclist format: ** ** varint: first rowid ** poslist: first poslist ** zero-or-more { ** varint: rowid delta (always > 0) | | | 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 | ** ** doclist format: ** ** varint: first rowid ** poslist: first poslist ** zero-or-more { ** varint: rowid delta (always > 0) ** poslist: next poslist ** } ** 0x00 byte ** ** poslist format: ** ** varint: size of poslist in bytes. not including this field. ** collist: collist for column 0 |
︙ | ︙ | |||
2673 2674 2675 2676 2677 2678 2679 | ** term, write it now. */ static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){ if( pWriter->nEmpty ){ int bFlag = 0; Fts5PageWriter *pPg; pPg = &pWriter->aWriter[1]; | | | 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 | ** term, write it now. */ static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){ if( pWriter->nEmpty ){ int bFlag = 0; Fts5PageWriter *pPg; pPg = &pWriter->aWriter[1]; if( pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE && pWriter->cdlidx.n ){ i64 iKey = FTS5_DOCLIST_IDX_ROWID( pWriter->iIdx, pWriter->iSegid, pWriter->aWriter[0].pgno - 1 - pWriter->nEmpty ); assert( pWriter->cdlidx.n>0 ); fts5DataWrite(p, iKey, pWriter->cdlidx.p, pWriter->cdlidx.n); bFlag = 1; |
︙ | ︙ | |||
3000 3001 3002 3003 3004 3005 3006 | Fts5Index *p, Fts5SegWriter *pWriter, /* Writer object */ int *pnHeight, /* OUT: Height of the b-tree */ int *pnLeaf /* OUT: Number of leaf pages in b-tree */ ){ int i; if( p->rc==SQLITE_OK ){ | | | > | > > | 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 | Fts5Index *p, Fts5SegWriter *pWriter, /* Writer object */ int *pnHeight, /* OUT: Height of the b-tree */ int *pnLeaf /* OUT: Number of leaf pages in b-tree */ ){ int i; if( p->rc==SQLITE_OK ){ Fts5PageWriter *pLeaf = &pWriter->aWriter[0]; if( pLeaf->pgno==1 && pLeaf->buf.n==0 ){ *pnLeaf = 0; *pnHeight = 0; }else{ if( pLeaf->buf.n>4 ){ fts5WriteFlushLeaf(p, pWriter); } *pnLeaf = pLeaf->pgno-1; if( pWriter->nWriter==1 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ fts5WriteBtreeGrow(p, pWriter); } if( pWriter->nWriter>1 ){ fts5WriteBtreeNEmpty(p, pWriter); } *pnHeight = pWriter->nWriter; |
︙ | ︙ | |||
3377 3378 3379 3380 3381 3382 3383 | typedef struct Fts5FlushCtx Fts5FlushCtx; struct Fts5FlushCtx { Fts5Index *pIdx; Fts5SegWriter writer; }; | < < < < < < | < < < | < | < | > | < < < | | < < | < < < < | < | | < < | < < | > > > > > > > > > > > > > > > > > > > > > > > | > > | > > > > > > > > > > > | > > > > > > > > > > > > > | > > > > > > > > > > > > > > > > > > | > > > > > > > > | > > > > > > > > > > | > > > > > > > > > > > > > > > > > > > | > > > > > | | 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 | typedef struct Fts5FlushCtx Fts5FlushCtx; struct Fts5FlushCtx { Fts5Index *pIdx; Fts5SegWriter writer; }; /* ** Buffer aBuf[] contains a list of varints, all small enough to fit ** in a 32-bit integer. Return the size of the largest prefix of this ** list nMax bytes or less in size. */ static int fts5PoslistPrefix(const u8 *aBuf, int nMax){ int ret = 0; while( 1 ){ u32 dummy; int i = fts5GetVarint32(&aBuf[ret], dummy); if( (ret + i) > nMax ) break; ret += i; } return ret; } /* ** Flush the contents of in-memory hash table iHash to a new level-0 ** segment on disk. Also update the corresponding structure record. ** ** If an error occurs, set the Fts5Index.rc error code. If an error has ** already occurred, this function is a no-op. */ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ Fts5Hash *pHash = p->apHash[iHash]; Fts5Structure *pStruct; int iSegid; int pgnoLast = 0; /* Last leaf page number in segment */ /* Obtain a reference to the index structure and allocate a new segment-id ** for the new level-0 segment. */ pStruct = fts5StructureRead(p, iHash); iSegid = fts5AllocateSegid(p, pStruct); if( iSegid ){ const int pgsz = p->pConfig->pgsz; Fts5StructureSegment *pSeg; /* New segment within pStruct */ int nHeight; /* Height of new segment b-tree */ Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */ Fts5SegWriter writer; fts5WriteInit(p, &writer, iHash, iSegid); /* Pre-allocate the buffer used to assemble leaf pages to the target ** page size. */ assert( pgsz>0 ); pBuf = &writer.aWriter[0].buf; fts5BufferGrow(&p->rc, pBuf, pgsz + 20); /* Begin scanning through hash table entries. */ if( p->rc==SQLITE_OK ){ memset(pBuf->p, 0, 4); pBuf->n = 4; sqlite3Fts5HashScanInit(pHash, 0, 0); } while( 0==sqlite3Fts5HashScanEof(pHash) ){ const char *zTerm; int nTerm; const u8 *pDoclist; int nDoclist; sqlite3Fts5HashScanEntry(pHash, &zTerm,(const char**)&pDoclist,&nDoclist); nTerm = strlen(zTerm); /* Decide if the term fits on the current leaf. If not, flush it ** to disk. */ if( (pBuf->n + nTerm + 2) > pgsz ){ fts5WriteFlushLeaf(p, &writer); pBuf = &writer.aWriter[0].buf; if( (nTerm + 32) > pBuf->nSpace ){ fts5BufferGrow(&p->rc, pBuf, nTerm + 32 - pBuf->n); } } /* Write the term to the leaf. And push it up into the b-tree hierarchy */ if( writer.bFirstTermInPage==0 ){ pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], 0); }else{ fts5PutU16(&pBuf->p[2], pBuf->n); writer.bFirstTermInPage = 0; if( writer.aWriter[0].pgno!=1 ){ fts5WriteBtreeTerm(p, &writer, nTerm, (const u8*)zTerm); pBuf = &writer.aWriter[0].buf; } } pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], nTerm); fts5BufferAppendBlob(&p->rc, pBuf, nTerm, (const u8*)zTerm); if( pgsz>=(pBuf->n + nDoclist + 1) ){ /* The entire doclist will fit on the current leaf. */ fts5BufferAppendBlob(&p->rc, pBuf, nDoclist, pDoclist); }else{ i64 iRowid = 0; i64 iDelta = 0; int iOff = 0; int bFirstDocid = 0; /* The entire doclist will not fit on this leaf. The following ** loop iterates through the poslists that make up the current ** doclist. */ while( iOff<nDoclist ){ u32 nPos; int nCopy; iOff += getVarint(&pDoclist[iOff], (u64*)&iDelta); nCopy = fts5GetVarint32(&pDoclist[iOff], nPos); nCopy += nPos; iRowid += iDelta; if( bFirstDocid ){ fts5PutU16(&pBuf->p[0], pBuf->n); /* first docid on page */ pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iRowid); bFirstDocid = 0; }else{ pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iDelta); } assert( pBuf->n<=pBuf->nSpace ); if( (pBuf->n + nCopy) <= pgsz ){ /* The entire poslist will fit on the current leaf. So copy ** it in one go. */ fts5BufferAppendBlob(&p->rc, pBuf, nCopy, &pDoclist[iOff]); }else{ /* The entire poslist will not fit on this leaf. So it needs ** to be broken into sections. The only qualification being ** that each varint must be stored contiguously. */ const u8 *pPoslist = &pDoclist[iOff]; int iPos = 0; while( 1 ){ int nSpace = pgsz - pBuf->n; int n; if( (nCopy - iPos)<=nSpace ){ n = nCopy - iPos; }else{ n = fts5PoslistPrefix(&pPoslist[iPos], nSpace); } fts5BufferAppendBlob(&p->rc, pBuf, n, &pPoslist[iPos]); iPos += n; if( iPos>=nCopy ) break; fts5WriteFlushLeaf(p, &writer); pBuf = &writer.aWriter[0].buf; } bFirstDocid = 1; } assert( pBuf->n<=pgsz ); iOff += nCopy; } } pBuf->p[pBuf->n++] = '\0'; assert( pBuf->n<=pBuf->nSpace ); sqlite3Fts5HashScanNext(pHash); } sqlite3Fts5HashClear(pHash); fts5WriteFinish(p, &writer, &nHeight, &pgnoLast); /* Update the Fts5Structure. It is written back to the database by the ** fts5StructureRelease() call below. */ if( pStruct->nLevel==0 ){ fts5StructureAddLevel(&p->rc, &pStruct); } fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0); |
︙ | ︙ |