/ Check-in [bb104b36]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix suffix and prefix compression of terms in top-level fts5 segments. And a crash that could follow an OOM condition.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts5
Files: files | file ages | folders
SHA1:bb104b3646c6f07ed002be7360b08433ee7980d4
User & Date: dan 2015-02-27 07:23:26
Context
2015-02-27
09:41
Further minor optimizations to flushing fts5 data to disk. check-in: a07dcca9 user: dan tags: fts5
07:23
Fix suffix and prefix compression of terms in top-level fts5 segments. And a crash that could follow an OOM condition. check-in: bb104b36 user: dan tags: fts5
2015-02-26
20:49
Optimize copying data from fts5 in-memory hash tables to top level segments. check-in: 8e3ca632 user: dan tags: fts5
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5Int.h.

396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
int sqlite3Fts5HashQuery(
  Fts5Hash*,                      /* Hash table to query */
  const char *pTerm, int nTerm,   /* Query term */
  const char **ppDoclist,         /* OUT: Pointer to doclist for pTerm */
  int *pnDoclist                  /* OUT: Size of doclist in bytes */
);

void sqlite3Fts5HashScanInit(
  Fts5Hash*,                      /* Hash table to query */
  const char *pTerm, int nTerm    /* Query prefix */
);
void sqlite3Fts5HashScanNext(Fts5Hash*);
int sqlite3Fts5HashScanEof(Fts5Hash*);
void sqlite3Fts5HashScanEntry(Fts5Hash *,
  const char **pzTerm,            /* OUT: term (nul-terminated) */







|







396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
int sqlite3Fts5HashQuery(
  Fts5Hash*,                      /* Hash table to query */
  const char *pTerm, int nTerm,   /* Query term */
  const char **ppDoclist,         /* OUT: Pointer to doclist for pTerm */
  int *pnDoclist                  /* OUT: Size of doclist in bytes */
);

int sqlite3Fts5HashScanInit(
  Fts5Hash*,                      /* Hash table to query */
  const char *pTerm, int nTerm    /* Query prefix */
);
void sqlite3Fts5HashScanNext(Fts5Hash*);
int sqlite3Fts5HashScanEof(Fts5Hash*);
void sqlite3Fts5HashScanEntry(Fts5Hash *,
  const char **pzTerm,            /* OUT: term (nul-terminated) */

Changes to ext/fts5/fts5_hash.c.

417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
    *ppDoclist = 0;
    *pnDoclist = 0;
  }

  return SQLITE_OK;
}

void sqlite3Fts5HashScanInit(
  Fts5Hash *p,                    /* Hash table to query */
  const char *pTerm, int nTerm    /* Query prefix */
){
  fts5HashEntrySort(p, pTerm, nTerm, &p->pScan);
}

void sqlite3Fts5HashScanNext(Fts5Hash *p){
  Fts5HashEntry *pScan = p->pScan;
  if( pScan ) p->pScan = pScan->pScanNext;
}








|



|







417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
    *ppDoclist = 0;
    *pnDoclist = 0;
  }

  return SQLITE_OK;
}

int sqlite3Fts5HashScanInit(
  Fts5Hash *p,                    /* Hash table to query */
  const char *pTerm, int nTerm    /* Query prefix */
){
  return fts5HashEntrySort(p, pTerm, nTerm, &p->pScan);
}

void sqlite3Fts5HashScanNext(Fts5Hash *p){
  Fts5HashEntry *pScan = p->pScan;
  if( pScan ) p->pScan = pScan->pScanNext;
}

Changes to ext/fts5/fts5_index.c.

2075
2076
2077
2078
2079
2080
2081

2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
....
3420
3421
3422
3423
3424
3425
3426

3427
3428
3429
3430
3431
3432
3433
....
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448

3449
3450
3451
3452
3453
3454
3455
....
3458
3459
3460
3461
3462
3463
3464

3465

3466
3467
3468
3469

3470
3471

3472

3473
3474
3475


3476
3477
3478
3479
3480
3481
3482
....
3532
3533
3534
3535
3536
3537
3538

3539
3540
3541
3542
3543
3544
3545
  Fts5Hash *pHash = p->apHash[iIdx];
  const char *pList = 0;
  int nList = 0;
  const u8 *z = 0;
  int n = 0;

  assert( pHash );


  if( pTerm==0 || (iIdx==0 && (flags & FTS5INDEX_QUERY_PREFIX)) ){
    sqlite3Fts5HashScanInit(pHash, (const char*)pTerm, nTerm);
    sqlite3Fts5HashScanEntry(pHash, (const char**)&z, &pList, &nList);
    n = (z ? strlen((const char*)z) : 0);
  }else{
    pIter->flags |= FTS5_SEGITER_ONETERM;
    sqlite3Fts5HashQuery(pHash, (const char*)pTerm, nTerm, &pList, &nList);
    z = pTerm;
    n = nTerm;
................................................................................

  if( iSegid ){
    const int pgsz = p->pConfig->pgsz;

    Fts5StructureSegment *pSeg;   /* New segment within pStruct */
    int nHeight;                  /* Height of new segment b-tree */
    Fts5Buffer *pBuf;             /* Buffer in which to assemble leaf page */


    Fts5SegWriter writer;
    fts5WriteInit(p, &writer, iHash, iSegid);

    /* Pre-allocate the buffer used to assemble leaf pages to the target
    ** page size.  */
    assert( pgsz>0 );
................................................................................
    pBuf = &writer.aWriter[0].buf;
    fts5BufferGrow(&p->rc, pBuf, pgsz + 20);

    /* Begin scanning through hash table entries. */
    if( p->rc==SQLITE_OK ){
      memset(pBuf->p, 0, 4);
      pBuf->n = 4;
      sqlite3Fts5HashScanInit(pHash, 0, 0);
    }

    while( 0==sqlite3Fts5HashScanEof(pHash) ){
      const char *zTerm;
      int nTerm;
      const u8 *pDoclist;
      int nDoclist;


      sqlite3Fts5HashScanEntry(pHash, &zTerm,(const char**)&pDoclist,&nDoclist);
      nTerm = strlen(zTerm);

      /* Decide if the term fits on the current leaf. If not, flush it
      ** to disk.  */
      if( (pBuf->n + nTerm + 2) > pgsz ){
................................................................................
        if( (nTerm + 32) > pBuf->nSpace ){
          fts5BufferGrow(&p->rc, pBuf, nTerm + 32 - pBuf->n);
        }
      }

      /* Write the term to the leaf. And push it up into the b-tree hierarchy */
      if( writer.bFirstTermInPage==0 ){

        pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], 0);

      }else{
        fts5PutU16(&pBuf->p[2], pBuf->n);
        writer.bFirstTermInPage = 0;
        if( writer.aWriter[0].pgno!=1 ){

          fts5WriteBtreeTerm(p, &writer, nTerm, (const u8*)zTerm);
          pBuf = &writer.aWriter[0].buf;

        }

      }
      pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], nTerm);
      fts5BufferAppendBlob(&p->rc, pBuf, nTerm, (const u8*)zTerm);



      if( pgsz>=(pBuf->n + nDoclist + 1) ){
        /* The entire doclist will fit on the current leaf. */
        fts5BufferAppendBlob(&p->rc, pBuf, nDoclist, pDoclist);
      }else{
        i64 iRowid = 0;
        i64 iDelta = 0;
................................................................................
          assert( pBuf->n<=pgsz );
          iOff += nCopy;
        }
      }

      pBuf->p[pBuf->n++] = '\0';
      assert( pBuf->n<=pBuf->nSpace );

      sqlite3Fts5HashScanNext(pHash);
    }
    sqlite3Fts5HashClear(pHash);
    fts5WriteFinish(p, &writer, &nHeight, &pgnoLast);

    /* Update the Fts5Structure. It is written back to the database by the
    ** fts5StructureRelease() call below.  */







>


|







 







>







 







|


|




>







 







>
|
>




>
|

>

>

|
|
>
>







 







>







2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
....
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
....
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
....
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
....
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
  Fts5Hash *pHash = p->apHash[iIdx];
  const char *pList = 0;
  int nList = 0;
  const u8 *z = 0;
  int n = 0;

  assert( pHash );
  assert( p->rc==SQLITE_OK );

  if( pTerm==0 || (iIdx==0 && (flags & FTS5INDEX_QUERY_PREFIX)) ){
    p->rc = sqlite3Fts5HashScanInit(pHash, (const char*)pTerm, nTerm);
    sqlite3Fts5HashScanEntry(pHash, (const char**)&z, &pList, &nList);
    n = (z ? strlen((const char*)z) : 0);
  }else{
    pIter->flags |= FTS5_SEGITER_ONETERM;
    sqlite3Fts5HashQuery(pHash, (const char*)pTerm, nTerm, &pList, &nList);
    z = pTerm;
    n = nTerm;
................................................................................

  if( iSegid ){
    const int pgsz = p->pConfig->pgsz;

    Fts5StructureSegment *pSeg;   /* New segment within pStruct */
    int nHeight;                  /* Height of new segment b-tree */
    Fts5Buffer *pBuf;             /* Buffer in which to assemble leaf page */
    const char *zPrev = 0;

    Fts5SegWriter writer;
    fts5WriteInit(p, &writer, iHash, iSegid);

    /* Pre-allocate the buffer used to assemble leaf pages to the target
    ** page size.  */
    assert( pgsz>0 );
................................................................................
    pBuf = &writer.aWriter[0].buf;
    fts5BufferGrow(&p->rc, pBuf, pgsz + 20);

    /* Begin scanning through hash table entries. */
    if( p->rc==SQLITE_OK ){
      memset(pBuf->p, 0, 4);
      pBuf->n = 4;
      p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0);
    }

    while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){
      const char *zTerm;
      int nTerm;
      const u8 *pDoclist;
      int nDoclist;
      int nSuffix;                /* Size of term suffix */

      sqlite3Fts5HashScanEntry(pHash, &zTerm,(const char**)&pDoclist,&nDoclist);
      nTerm = strlen(zTerm);

      /* Decide if the term fits on the current leaf. If not, flush it
      ** to disk.  */
      if( (pBuf->n + nTerm + 2) > pgsz ){
................................................................................
        if( (nTerm + 32) > pBuf->nSpace ){
          fts5BufferGrow(&p->rc, pBuf, nTerm + 32 - pBuf->n);
        }
      }

      /* Write the term to the leaf. And push it up into the b-tree hierarchy */
      if( writer.bFirstTermInPage==0 ){
        int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, zTerm);
        pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], nPre);
        nSuffix = nTerm - nPre;
      }else{
        fts5PutU16(&pBuf->p[2], pBuf->n);
        writer.bFirstTermInPage = 0;
        if( writer.aWriter[0].pgno!=1 ){
          int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, zTerm);
          fts5WriteBtreeTerm(p, &writer, nPre+1, (const u8*)zTerm);
          pBuf = &writer.aWriter[0].buf;
          assert( nPre<nTerm );
        }
        nSuffix = nTerm;
      }
      pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], nSuffix);
      fts5BufferAppendBlob(&p->rc, pBuf, 
          nSuffix, (const u8*)&zTerm[nTerm-nSuffix]
      );

      if( pgsz>=(pBuf->n + nDoclist + 1) ){
        /* The entire doclist will fit on the current leaf. */
        fts5BufferAppendBlob(&p->rc, pBuf, nDoclist, pDoclist);
      }else{
        i64 iRowid = 0;
        i64 iDelta = 0;
................................................................................
          assert( pBuf->n<=pgsz );
          iOff += nCopy;
        }
      }

      pBuf->p[pBuf->n++] = '\0';
      assert( pBuf->n<=pBuf->nSpace );
      zPrev = zTerm;
      sqlite3Fts5HashScanNext(pHash);
    }
    sqlite3Fts5HashClear(pHash);
    fts5WriteFinish(p, &writer, &nHeight, &pgnoLast);

    /* Update the Fts5Structure. It is written back to the database by the
    ** fts5StructureRelease() call below.  */