SQLite

Check-in [8e3ca6323a]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Optimize copying data from fts5 in-memory hash tables to top level segments.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | fts5
Files: files | file ages | folders
SHA1: 8e3ca6323a2beab5f04250e24ae15b159d2aa0ac
User & Date: dan 2015-02-26 20:49:09.566
Context
2015-02-27
07:23
Fix suffix and prefix compression of terms in top-level fts5 segments. And a crash that could follow an OOM condition. (check-in: bb104b3646 user: dan tags: fts5)
2015-02-26
20:49
Optimize copying data from fts5 in-memory hash tables to top level segments. (check-in: 8e3ca6323a user: dan tags: fts5)
14:54
Fix an fts5 bug in large incremental merges. (check-in: 208e3cb6b6 user: dan tags: fts5)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts5/fts5_hash.c.
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451

  pHash->nEntry = 0;
  sqlite3_free(ap);
  *ppSorted = pList;
  return SQLITE_OK;
}

int sqlite3Fts5HashIterate(
  Fts5Hash *pHash,
  void *pCtx,
  int (*xTerm)(void*, const char*, int),
  int (*xEntry)(void*, i64, const u8*, int),
  int (*xTermDone)(void*)
){
  Fts5HashEntry *pList;
  int rc;

  rc = fts5HashEntrySort(pHash, 0, 0, &pList);
  if( rc==SQLITE_OK ){
    memset(pHash->aSlot, 0, sizeof(Fts5HashEntry*) * pHash->nSlot);
    while( pList ){
      Fts5HashEntry *pNext = pList->pScanNext;
      if( rc==SQLITE_OK ){
        const int nKey = strlen(pList->zKey);
        i64 iRowid = 0;
        u8 *pPtr = (u8*)pList;
        int iOff = sizeof(Fts5HashEntry) + nKey + 1;

        /* Fill in the final poslist size field */
        fts5HashAddPoslistSize(pList);
        
        /* Issue the new-term callback */
        rc = xTerm(pCtx, pList->zKey, nKey);

        /* Issue the xEntry callbacks */
        while( rc==SQLITE_OK && iOff<pList->nData ){
          i64 iDelta;             /* Rowid delta value */
          int nPoslist;           /* Size of position list in bytes */
          int nVarint;
          iOff += getVarint(&pPtr[iOff], (u64*)&iDelta);
          iRowid += iDelta;
          nVarint = fts5GetVarint32(&pPtr[iOff], nPoslist);
          rc = xEntry(pCtx, iRowid, &pPtr[iOff], nPoslist+nVarint);
          iOff += nVarint+nPoslist;
        }

        /* Issue the term-done callback */
        if( rc==SQLITE_OK ) rc = xTermDone(pCtx);
      }
      sqlite3_free(pList);
      pList = pNext;
    }
  }
  return rc;
}

/*
** Query the hash table for a doclist associated with term pTerm/nTerm.
*/
int sqlite3Fts5HashQuery(
  Fts5Hash *pHash,                /* Hash table to query */
  const char *pTerm, int nTerm,   /* Query term */
  const char **ppDoclist,         /* OUT: Pointer to doclist for pTerm */







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







389
390
391
392
393
394
395

















































396
397
398
399
400
401
402

  pHash->nEntry = 0;
  sqlite3_free(ap);
  *ppSorted = pList;
  return SQLITE_OK;
}


















































/*
** Query the hash table for a doclist associated with term pTerm/nTerm.
*/
int sqlite3Fts5HashQuery(
  Fts5Hash *pHash,                /* Hash table to query */
  const char *pTerm, int nTerm,   /* Query term */
  const char **ppDoclist,         /* OUT: Pointer to doclist for pTerm */
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
  Fts5Hash *p,                    /* Hash table to query */
  const char *pTerm, int nTerm    /* Query prefix */
){
  fts5HashEntrySort(p, pTerm, nTerm, &p->pScan);
}

void sqlite3Fts5HashScanNext(Fts5Hash *p){
  if( p->pScan ){
    p->pScan = p->pScan->pScanNext;
  }
}

int sqlite3Fts5HashScanEof(Fts5Hash *p){
  return (p->pScan==0);
}

void sqlite3Fts5HashScanEntry(







|
|
<







425
426
427
428
429
430
431
432
433

434
435
436
437
438
439
440
  Fts5Hash *p,                    /* Hash table to query */
  const char *pTerm, int nTerm    /* Query prefix */
){
  fts5HashEntrySort(p, pTerm, nTerm, &p->pScan);
}

void sqlite3Fts5HashScanNext(Fts5Hash *p){
  Fts5HashEntry *pScan = p->pScan;
  if( pScan ) p->pScan = pScan->pScanNext;

}

int sqlite3Fts5HashScanEof(Fts5Hash *p){
  return (p->pScan==0);
}

void sqlite3Fts5HashScanEntry(
Changes to ext/fts5/fts5_index.c.
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
**
**     doclist format:
**
**         varint:  first rowid
**         poslist: first poslist
**         zero-or-more {
**           varint:  rowid delta (always > 0)
**           poslist: first poslist
**         }
**         0x00 byte
**
**     poslist format:
**
**         varint: size of poslist in bytes. not including this field.
**         collist: collist for column 0







|







109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
**
**     doclist format:
**
**         varint:  first rowid
**         poslist: first poslist
**         zero-or-more {
**           varint:  rowid delta (always > 0)
**           poslist: next poslist
**         }
**         0x00 byte
**
**     poslist format:
**
**         varint: size of poslist in bytes. not including this field.
**         collist: collist for column 0
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
** term, write it now.
*/
static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){
  if( pWriter->nEmpty ){
    int bFlag = 0;
    Fts5PageWriter *pPg;
    pPg = &pWriter->aWriter[1];
    if( pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
      i64 iKey = FTS5_DOCLIST_IDX_ROWID(
          pWriter->iIdx, pWriter->iSegid, 
          pWriter->aWriter[0].pgno - 1 - pWriter->nEmpty
      );
      assert( pWriter->cdlidx.n>0 );
      fts5DataWrite(p, iKey, pWriter->cdlidx.p, pWriter->cdlidx.n);
      bFlag = 1;







|







2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
** term, write it now.
*/
static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){
  if( pWriter->nEmpty ){
    int bFlag = 0;
    Fts5PageWriter *pPg;
    pPg = &pWriter->aWriter[1];
    if( pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE && pWriter->cdlidx.n ){
      i64 iKey = FTS5_DOCLIST_IDX_ROWID(
          pWriter->iIdx, pWriter->iSegid, 
          pWriter->aWriter[0].pgno - 1 - pWriter->nEmpty
      );
      assert( pWriter->cdlidx.n>0 );
      fts5DataWrite(p, iKey, pWriter->cdlidx.p, pWriter->cdlidx.n);
      bFlag = 1;
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011

3012


3013
3014
3015
3016
3017
3018
3019
  Fts5Index *p, 
  Fts5SegWriter *pWriter,         /* Writer object */
  int *pnHeight,                  /* OUT: Height of the b-tree */
  int *pnLeaf                     /* OUT: Number of leaf pages in b-tree */
){
  int i;
  if( p->rc==SQLITE_OK ){
    *pnLeaf = pWriter->aWriter[0].pgno;
    if( *pnLeaf==1 && pWriter->aWriter[0].buf.n==0 ){
      *pnLeaf = 0;
      *pnHeight = 0;
    }else{

      fts5WriteFlushLeaf(p, pWriter);


      if( pWriter->nWriter==1 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
        fts5WriteBtreeGrow(p, pWriter);
      }
      if( pWriter->nWriter>1 ){
        fts5WriteBtreeNEmpty(p, pWriter);
      }
      *pnHeight = pWriter->nWriter;







|
|



>
|
>
>







3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
  Fts5Index *p, 
  Fts5SegWriter *pWriter,         /* Writer object */
  int *pnHeight,                  /* OUT: Height of the b-tree */
  int *pnLeaf                     /* OUT: Number of leaf pages in b-tree */
){
  int i;
  if( p->rc==SQLITE_OK ){
    Fts5PageWriter *pLeaf = &pWriter->aWriter[0];
    if( pLeaf->pgno==1 && pLeaf->buf.n==0 ){
      *pnLeaf = 0;
      *pnHeight = 0;
    }else{
      if( pLeaf->buf.n>4 ){
        fts5WriteFlushLeaf(p, pWriter);
      }
      *pnLeaf = pLeaf->pgno-1;
      if( pWriter->nWriter==1 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
        fts5WriteBtreeGrow(p, pWriter);
      }
      if( pWriter->nWriter>1 ){
        fts5WriteBtreeNEmpty(p, pWriter);
      }
      *pnHeight = pWriter->nWriter;
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398

3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431

3432
3433
3434
3435
3436
3437
3438
3439
3440
3441


3442
3443




















3444


3445


3446









3447













3448








3449










3450








3451










3452



















3453





3454
3455
3456
3457
3458
3459
3460
3461

typedef struct Fts5FlushCtx Fts5FlushCtx;
struct Fts5FlushCtx {
  Fts5Index *pIdx;
  Fts5SegWriter writer; 
};

static int fts5FlushNewTerm(void *pCtx, const char *zTerm, int nTerm){
  Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx;
  int rc = SQLITE_OK;
  fts5WriteAppendTerm(p->pIdx, &p->writer, nTerm, (const u8*)zTerm);
  return rc;
}

static int fts5FlushTermDone(void *pCtx){
  Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx;
  int rc = SQLITE_OK;
  /* Write the doclist terminator */
  fts5WriteAppendZerobyte(p->pIdx, &p->writer);
  return rc;
}


static int fts5FlushNewEntry(
  void *pCtx, 
  i64 iRowid, 
  const u8 *aPoslist, 
  int nPoslist
){
  Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx;
  Fts5Index *pIdx = p->pIdx;

#ifdef SQLITE_DEBUG
  /* The poslist-size varint should already be at the start of the 
  ** aPoslist/nPoslist buffer. This assert verifies that. */
  int n, i;
  i = fts5GetVarint32(aPoslist, n);
  assert( nPoslist==(n+i) );
#endif

  /* Append the rowid itself */
  fts5WriteAppendRowid(pIdx, &p->writer, iRowid);

  /* And the poslist data */
  fts5WriteAppendPoslistData(pIdx, &p->writer, aPoslist, nPoslist);
  return pIdx->rc;
}

/*
** Flush the contents of in-memory hash table iHash to a new level-0 
** segment on disk. Also update the corresponding structure record.
**
** If an error occurs, set the Fts5Index.rc error code. If an error has 
** already occurred, this function is a no-op.
*/
static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){

  Fts5Structure *pStruct;
  int iSegid;
  int pgnoLast = 0;                 /* Last leaf page number in segment */

  /* Obtain a reference to the index structure and allocate a new segment-id
  ** for the new level-0 segment.  */
  pStruct = fts5StructureRead(p, iHash);
  iSegid = fts5AllocateSegid(p, pStruct);

  if( iSegid ){


    Fts5StructureSegment *pSeg;   /* New segment within pStruct */
    int nHeight;                  /* Height of new segment b-tree */




















    int rc;


    Fts5FlushCtx ctx;












    fts5WriteInit(p, &ctx.writer, iHash, iSegid);













    ctx.pIdx = p;



















    rc = sqlite3Fts5HashIterate( p->apHash[iHash], (void*)&ctx, 








        fts5FlushNewTerm, fts5FlushNewEntry, fts5FlushTermDone










    );



















    if( p->rc==SQLITE_OK ) p->rc = rc;





    fts5WriteFinish(p, &ctx.writer, &nHeight, &pgnoLast);

    /* Update the Fts5Structure. It is written back to the database by the
    ** fts5StructureRelease() call below.  */
    if( pStruct->nLevel==0 ){
      fts5StructureAddLevel(&p->rc, &pStruct);
    }
    fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0);







<
<
<
<
<
<
|
<
<
<
|
<
|
<
|
>
|
<
<
<
|
|
<
<
|
<
<
<
<
|
<
|
|
<
<
|
<
<
|










>










>
>


>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
|
>
>

>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>

>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
|







3380
3381
3382
3383
3384
3385
3386






3387



3388

3389

3390
3391
3392



3393
3394


3395




3396

3397
3398


3399


3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549

typedef struct Fts5FlushCtx Fts5FlushCtx;
struct Fts5FlushCtx {
  Fts5Index *pIdx;
  Fts5SegWriter writer; 
};







/*



** Buffer aBuf[] contains a list of varints, all small enough to fit

** in a 32-bit integer. Return the size of the largest prefix of this 

** list nMax bytes or less in size.
*/
static int fts5PoslistPrefix(const u8 *aBuf, int nMax){



  int ret = 0;
  while( 1 ){


    u32 dummy;




    int i = fts5GetVarint32(&aBuf[ret], dummy);

    if( (ret + i) > nMax ) break;
    ret += i;


  }


  return ret;
}

/*
** Flush the contents of in-memory hash table iHash to a new level-0 
** segment on disk. Also update the corresponding structure record.
**
** If an error occurs, set the Fts5Index.rc error code. If an error has 
** already occurred, this function is a no-op.
*/
static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){
  Fts5Hash *pHash = p->apHash[iHash];
  Fts5Structure *pStruct;
  int iSegid;
  int pgnoLast = 0;                 /* Last leaf page number in segment */

  /* Obtain a reference to the index structure and allocate a new segment-id
  ** for the new level-0 segment.  */
  pStruct = fts5StructureRead(p, iHash);
  iSegid = fts5AllocateSegid(p, pStruct);

  if( iSegid ){
    const int pgsz = p->pConfig->pgsz;

    Fts5StructureSegment *pSeg;   /* New segment within pStruct */
    int nHeight;                  /* Height of new segment b-tree */
    Fts5Buffer *pBuf;             /* Buffer in which to assemble leaf page */

    Fts5SegWriter writer;
    fts5WriteInit(p, &writer, iHash, iSegid);

    /* Pre-allocate the buffer used to assemble leaf pages to the target
    ** page size.  */
    assert( pgsz>0 );
    pBuf = &writer.aWriter[0].buf;
    fts5BufferGrow(&p->rc, pBuf, pgsz + 20);

    /* Begin scanning through hash table entries. */
    if( p->rc==SQLITE_OK ){
      memset(pBuf->p, 0, 4);
      pBuf->n = 4;
      sqlite3Fts5HashScanInit(pHash, 0, 0);
    }

    while( 0==sqlite3Fts5HashScanEof(pHash) ){
      const char *zTerm;
      int nTerm;
      const u8 *pDoclist;
      int nDoclist;

      sqlite3Fts5HashScanEntry(pHash, &zTerm,(const char**)&pDoclist,&nDoclist);
      nTerm = strlen(zTerm);

      /* Decide if the term fits on the current leaf. If not, flush it
      ** to disk.  */
      if( (pBuf->n + nTerm + 2) > pgsz ){
        fts5WriteFlushLeaf(p, &writer);
        pBuf = &writer.aWriter[0].buf;
        if( (nTerm + 32) > pBuf->nSpace ){
          fts5BufferGrow(&p->rc, pBuf, nTerm + 32 - pBuf->n);
        }
      }

      /* Write the term to the leaf. And push it up into the b-tree hierarchy */
      if( writer.bFirstTermInPage==0 ){
        pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], 0);
      }else{
        fts5PutU16(&pBuf->p[2], pBuf->n);
        writer.bFirstTermInPage = 0;
        if( writer.aWriter[0].pgno!=1 ){
          fts5WriteBtreeTerm(p, &writer, nTerm, (const u8*)zTerm);
          pBuf = &writer.aWriter[0].buf;
        }
      }
      pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], nTerm);
      fts5BufferAppendBlob(&p->rc, pBuf, nTerm, (const u8*)zTerm);

      if( pgsz>=(pBuf->n + nDoclist + 1) ){
        /* The entire doclist will fit on the current leaf. */
        fts5BufferAppendBlob(&p->rc, pBuf, nDoclist, pDoclist);
      }else{
        i64 iRowid = 0;
        i64 iDelta = 0;
        int iOff = 0;
        int bFirstDocid = 0;

        /* The entire doclist will not fit on this leaf. The following 
        ** loop iterates through the poslists that make up the current 
        ** doclist.  */
        while( iOff<nDoclist ){
          u32 nPos;
          int nCopy;
          iOff += getVarint(&pDoclist[iOff], (u64*)&iDelta);
          nCopy = fts5GetVarint32(&pDoclist[iOff], nPos);
          nCopy += nPos;
          iRowid += iDelta;
          
          if( bFirstDocid ){
            fts5PutU16(&pBuf->p[0], pBuf->n);   /* first docid on page */
            pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iRowid);
            bFirstDocid = 0;
          }else{
            pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iDelta);
          }
          assert( pBuf->n<=pBuf->nSpace );

          if( (pBuf->n + nCopy) <= pgsz ){
            /* The entire poslist will fit on the current leaf. So copy
            ** it in one go. */
            fts5BufferAppendBlob(&p->rc, pBuf, nCopy, &pDoclist[iOff]);
          }else{
            /* The entire poslist will not fit on this leaf. So it needs
            ** to be broken into sections. The only qualification being
            ** that each varint must be stored contiguously.  */
            const u8 *pPoslist = &pDoclist[iOff];
            int iPos = 0;
            while( 1 ){
              int nSpace = pgsz - pBuf->n;
              int n;
              if( (nCopy - iPos)<=nSpace ){
                n = nCopy - iPos;
              }else{
                n = fts5PoslistPrefix(&pPoslist[iPos], nSpace);
              }
              fts5BufferAppendBlob(&p->rc, pBuf, n, &pPoslist[iPos]);
              iPos += n;
              if( iPos>=nCopy ) break;
              fts5WriteFlushLeaf(p, &writer);
              pBuf = &writer.aWriter[0].buf;
            }
            bFirstDocid = 1;
          }
          assert( pBuf->n<=pgsz );
          iOff += nCopy;
        }
      }

      pBuf->p[pBuf->n++] = '\0';
      assert( pBuf->n<=pBuf->nSpace );
      sqlite3Fts5HashScanNext(pHash);
    }
    sqlite3Fts5HashClear(pHash);
    fts5WriteFinish(p, &writer, &nHeight, &pgnoLast);

    /* Update the Fts5Structure. It is written back to the database by the
    ** fts5StructureRelease() call below.  */
    if( pStruct->nLevel==0 ){
      fts5StructureAddLevel(&p->rc, &pStruct);
    }
    fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0);