SQLite

Check-in [9b58c59eb4]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Change the way ^ tokens work in FTS so that the filtering is done as part of reading the FTS index instead of waiting until an entire doclist has been retrieved and then filtering it.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | fts4-content
Files: files | file ages | folders
SHA1: 9b58c59eb4efaa38ce50a3ce1b52f9ba578c71d6
User & Date: dan 2011-10-19 11:57:13.985
Context
2011-10-19
15:52
Have FTS3 ignore ^ prefixes. The ^ syntax is only supported on FTS4 tables. (Closed-Leaf check-in: df36ac9481 user: dan tags: fts4-content)
11:57
Change the way ^ tokens work in FTS so that the filtering is done as part of reading the FTS index instead of waiting until an entire doclist has been retrieved and then filtering it. (check-in: 9b58c59eb4 user: dan tags: fts4-content)
10:18
Add tests for FTS ^ searches and matchinfo(). (check-in: 92618c1463 user: dan tags: fts4-content)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts3/fts3.c.
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354

2355
2356
2357
2358
2359
2360
2361
2362

2363
2364


2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
    }
  }

  *pnRight = p - aOut;
}

/*
** When this function is called, pList points to a doclist containing position
** data, length *pnList bytes. This removes all entries from the doclist that
** do not correspond to the first token in a column and overwrites pList
** with the result. *pnList is set to the length of the new doclist before

** returning.
**
** If bDescDoclist is true, then both the input and output are in descending
** order. Otherwise, ascending.
*/
static void fts3DoclistFirstFilter(
  int bDescDoclist,               /* True if pList is a descending doclist */
  char *pList,                    /* Buffer containing doclist */

  int *pnList                     /* IN/OUT: Size of doclist */
){


  char *p = pList;
  char *pOut = pList;
  char *pEnd = &pList[*pnList];

  sqlite3_int64 iDoc;
  sqlite3_int64 iPrev;
  int bFirstOut = 0;

  fts3GetDeltaVarint3(&p, pEnd, 0, &iDoc);
  while( p ){
    int bWritten = 0;
    if( *p!=0x01 ){
      if( *p==0x02 ){
        fts3PutDeltaVarint3(&pOut, bDescDoclist, &iPrev, &bFirstOut, iDoc); 
        *pOut++ = 0x02;
        bWritten = 1;
      }
      fts3ColumnlistCopy(0, &p);
    }

    while( *p==0x01 ){
      sqlite3_int64 iCol;
      p++;
      p += sqlite3Fts3GetVarint(p, &iCol);
      if( *p==0x02 ){
        if( bWritten==0 ){
          fts3PutDeltaVarint3(&pOut, bDescDoclist, &iPrev, &bFirstOut, iDoc); 
          bWritten = 1;
        }
        *pOut++ = 0x01;
        pOut += sqlite3Fts3PutVarint(pOut, iCol);
        *pOut++ = 0x02;
      }
      fts3ColumnlistCopy(0, &p);
    }
    if( bWritten ){
      *pOut++ = 0x00;
    }

    assert( *p==0x00 );
    p++;
    fts3GetDeltaVarint3(&p, pEnd, bDescDoclist, &iDoc);
  }

  *pnList = (pOut - pList);
}


/*
** Merge all doclists in the TermSelect.aaOutput[] array into a single
** doclist stored in TermSelect.aaOutput[0]. If successful, delete all
** other doclists (except the aaOutput[0] one) and return SQLITE_OK.







|
|
|
|
>
|
<
<
<

|
|
|
>
|

>
>

<
|

<
<
<
<
<
<
<
|
|
|
|
|
|
|
|

|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|

<
<
<
<
|
<







2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356



2357
2358
2359
2360
2361
2362
2363
2364
2365
2366

2367
2368







2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396




2397

2398
2399
2400
2401
2402
2403
2404
    }
  }

  *pnRight = p - aOut;
}

/*
** Argument pList points to a position list nList bytes in size. This
** function checks to see if the position list contains any entries for
** a token in position 0 (of any column). If so, it writes argument iDelta
** to the output buffer pOut, followed by a position list consisting only
** of the entries from pList at position 0, and terminated by an 0x00 byte.
** The value returned is the number of bytes written to pOut (if any).



*/
int sqlite3Fts3FirstFilter(
  sqlite3_int64 iDelta,           /* Varint that may be written to pOut */
  char *pList,                    /* Position list (no 0x00 term) */
  int nList,                      /* Size of pList in bytes */
  char *pOut                      /* Write output here */
){
  int nOut = 0;
  int bWritten = 0;               /* True once iDelta has been written */
  char *p = pList;

  char *pEnd = &pList[nList];








  if( *p!=0x01 ){
    if( *p==0x02 ){
      nOut += sqlite3Fts3PutVarint(&pOut[nOut], iDelta);
      pOut[nOut++] = 0x02;
      bWritten = 1;
    }
    fts3ColumnlistCopy(0, &p);
  }

  while( p<pEnd && *p==0x01 ){
    sqlite3_int64 iCol;
    p++;
    p += sqlite3Fts3GetVarint(p, &iCol);
    if( *p==0x02 ){
      if( bWritten==0 ){
        nOut += sqlite3Fts3PutVarint(&pOut[nOut], iDelta);
        bWritten = 1;
      }
      pOut[nOut++] = 0x01;
      nOut += sqlite3Fts3PutVarint(&pOut[nOut], iCol);
      pOut[nOut++] = 0x02;
    }
    fts3ColumnlistCopy(0, &p);
  }
  if( bWritten ){
    pOut[nOut++] = 0x00;
  }





  return nOut;

}


/*
** Merge all doclists in the TermSelect.aaOutput[] array into a single
** doclist stored in TermSelect.aaOutput[0]. If successful, delete all
** other doclists (except the aaOutput[0] one) and return SQLITE_OK.
2761
2762
2763
2764
2765
2766
2767

2768
2769
2770
2771
2772
2773
2774
  Fts3SegFilter filter;           /* Segment term filter configuration */

  pSegcsr = pTok->pSegcsr;
  memset(&tsc, 0, sizeof(TermSelect));

  filter.flags = FTS3_SEGMENT_IGNORE_EMPTY | FTS3_SEGMENT_REQUIRE_POS
        | (pTok->isPrefix ? FTS3_SEGMENT_PREFIX : 0)

        | (iColumn<p->nColumn ? FTS3_SEGMENT_COLUMN_FILTER : 0);
  filter.iCol = iColumn;
  filter.zTerm = pTok->z;
  filter.nTerm = pTok->n;

  rc = sqlite3Fts3SegReaderStart(p, pSegcsr, &filter);
  while( SQLITE_OK==rc







>







2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
  Fts3SegFilter filter;           /* Segment term filter configuration */

  pSegcsr = pTok->pSegcsr;
  memset(&tsc, 0, sizeof(TermSelect));

  filter.flags = FTS3_SEGMENT_IGNORE_EMPTY | FTS3_SEGMENT_REQUIRE_POS
        | (pTok->isPrefix ? FTS3_SEGMENT_PREFIX : 0)
        | (pTok->bFirst ? FTS3_SEGMENT_FIRST : 0)
        | (iColumn<p->nColumn ? FTS3_SEGMENT_COLUMN_FILTER : 0);
  filter.iCol = iColumn;
  filter.zTerm = pTok->z;
  filter.nTerm = pTok->n;

  rc = sqlite3Fts3SegReaderStart(p, pSegcsr, &filter);
  while( SQLITE_OK==rc
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
  Fts3Phrase *p,                  /* Phrase to merge pList/nList into */
  int iToken,                     /* Token pList/nList corresponds to */
  char *pList,                    /* Pointer to doclist */
  int nList                       /* Number of bytes in pList */
){
  assert( iToken!=p->iDoclistToken );

  if( p->aToken[iToken].bFirst ){
    fts3DoclistFirstFilter(pTab->bDescIdx, pList, &nList);
  }

  if( pList==0 ){
    sqlite3_free(p->doclist.aAll);
    p->doclist.aAll = 0;
    p->doclist.nAll = 0;
  }

  else if( p->iDoclistToken<0 ){







<
<
<
<







3565
3566
3567
3568
3569
3570
3571




3572
3573
3574
3575
3576
3577
3578
  Fts3Phrase *p,                  /* Phrase to merge pList/nList into */
  int iToken,                     /* Token pList/nList corresponds to */
  char *pList,                    /* Pointer to doclist */
  int nList                       /* Number of bytes in pList */
){
  assert( iToken!=p->iDoclistToken );





  if( pList==0 ){
    sqlite3_free(p->doclist.aAll);
    p->doclist.aAll = 0;
    p->doclist.nAll = 0;
  }

  else if( p->iDoclistToken<0 ){
Changes to ext/fts3/fts3Int.h.
425
426
427
428
429
430
431

432
433
434
435
436
437
438

/* Flags allowed as part of the 4th argument to SegmentReaderIterate() */
#define FTS3_SEGMENT_REQUIRE_POS   0x00000001
#define FTS3_SEGMENT_IGNORE_EMPTY  0x00000002
#define FTS3_SEGMENT_COLUMN_FILTER 0x00000004
#define FTS3_SEGMENT_PREFIX        0x00000008
#define FTS3_SEGMENT_SCAN          0x00000010


/* Type passed as 4th argument to SegmentReaderIterate() */
struct Fts3SegFilter {
  const char *zTerm;
  int nTerm;
  int iCol;
  int flags;







>







425
426
427
428
429
430
431
432
433
434
435
436
437
438
439

/* Flags allowed as part of the 4th argument to SegmentReaderIterate() */
#define FTS3_SEGMENT_REQUIRE_POS   0x00000001
#define FTS3_SEGMENT_IGNORE_EMPTY  0x00000002
#define FTS3_SEGMENT_COLUMN_FILTER 0x00000004
#define FTS3_SEGMENT_PREFIX        0x00000008
#define FTS3_SEGMENT_SCAN          0x00000010
#define FTS3_SEGMENT_FIRST         0x00000020

/* Type passed as 4th argument to SegmentReaderIterate() */
struct Fts3SegFilter {
  const char *zTerm;
  int nTerm;
  int iCol;
  int flags;
464
465
466
467
468
469
470
471
472

473
474
475
476
477
478
479
/* fts3.c */
int sqlite3Fts3PutVarint(char *, sqlite3_int64);
int sqlite3Fts3GetVarint(const char *, sqlite_int64 *);
int sqlite3Fts3GetVarint32(const char *, int *);
int sqlite3Fts3VarintLen(sqlite3_uint64);
void sqlite3Fts3Dequote(char *);
void sqlite3Fts3DoclistPrev(int,char*,int,char**,sqlite3_int64*,int*,u8*);

int sqlite3Fts3EvalPhraseStats(Fts3Cursor *, Fts3Expr *, u32 *);


/* fts3_tokenizer.c */
const char *sqlite3Fts3NextToken(const char *, int *);
int sqlite3Fts3InitHashTable(sqlite3 *, Fts3Hash *, const char *);
int sqlite3Fts3InitTokenizer(Fts3Hash *pHash, const char *, 
    sqlite3_tokenizer **, char **
);







<

>







465
466
467
468
469
470
471

472
473
474
475
476
477
478
479
480
/* fts3.c */
int sqlite3Fts3PutVarint(char *, sqlite3_int64);
int sqlite3Fts3GetVarint(const char *, sqlite_int64 *);
int sqlite3Fts3GetVarint32(const char *, int *);
int sqlite3Fts3VarintLen(sqlite3_uint64);
void sqlite3Fts3Dequote(char *);
void sqlite3Fts3DoclistPrev(int,char*,int,char**,sqlite3_int64*,int*,u8*);

int sqlite3Fts3EvalPhraseStats(Fts3Cursor *, Fts3Expr *, u32 *);
int sqlite3Fts3FirstFilter(sqlite3_int64, char *, int, char *);

/* fts3_tokenizer.c */
const char *sqlite3Fts3NextToken(const char *, int *);
int sqlite3Fts3InitHashTable(sqlite3 *, Fts3Hash *, const char *);
int sqlite3Fts3InitTokenizer(Fts3Hash *pHash, const char *, 
    sqlite3_tokenizer **, char **
);
Changes to ext/fts3/fts3_write.c.
2505
2506
2507
2508
2509
2510
2511

2512
2513
2514
2515
2516
2517
2518
  int rc = SQLITE_OK;

  int isIgnoreEmpty =  (pCsr->pFilter->flags & FTS3_SEGMENT_IGNORE_EMPTY);
  int isRequirePos =   (pCsr->pFilter->flags & FTS3_SEGMENT_REQUIRE_POS);
  int isColFilter =    (pCsr->pFilter->flags & FTS3_SEGMENT_COLUMN_FILTER);
  int isPrefix =       (pCsr->pFilter->flags & FTS3_SEGMENT_PREFIX);
  int isScan =         (pCsr->pFilter->flags & FTS3_SEGMENT_SCAN);


  Fts3SegReader **apSegment = pCsr->apSegment;
  int nSegment = pCsr->nSegment;
  Fts3SegFilter *pFilter = pCsr->pFilter;
  int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = (
    p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp
  );







>







2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
  int rc = SQLITE_OK;

  int isIgnoreEmpty =  (pCsr->pFilter->flags & FTS3_SEGMENT_IGNORE_EMPTY);
  int isRequirePos =   (pCsr->pFilter->flags & FTS3_SEGMENT_REQUIRE_POS);
  int isColFilter =    (pCsr->pFilter->flags & FTS3_SEGMENT_COLUMN_FILTER);
  int isPrefix =       (pCsr->pFilter->flags & FTS3_SEGMENT_PREFIX);
  int isScan =         (pCsr->pFilter->flags & FTS3_SEGMENT_SCAN);
  int isFirst =        (pCsr->pFilter->flags & FTS3_SEGMENT_FIRST);

  Fts3SegReader **apSegment = pCsr->apSegment;
  int nSegment = pCsr->nSegment;
  Fts3SegFilter *pFilter = pCsr->pFilter;
  int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = (
    p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp
  );
2564
2565
2566
2567
2568
2569
2570

2571
2572
2573
2574
2575
2576
2577
    ){
      nMerge++;
    }

    assert( isIgnoreEmpty || (isRequirePos && !isColFilter) );
    if( nMerge==1 
     && !isIgnoreEmpty 

     && (p->bDescIdx==0 || fts3SegReaderIsPending(apSegment[0])==0)
    ){
      pCsr->nDoclist = apSegment[0]->nDoclist;
      if( fts3SegReaderIsPending(apSegment[0]) ){
        rc = fts3MsrBufferData(pCsr, apSegment[0]->aDoclist, pCsr->nDoclist);
        pCsr->aDoclist = pCsr->aBuffer;
      }else{







>







2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
    ){
      nMerge++;
    }

    assert( isIgnoreEmpty || (isRequirePos && !isColFilter) );
    if( nMerge==1 
     && !isIgnoreEmpty 
     && !isFirst 
     && (p->bDescIdx==0 || fts3SegReaderIsPending(apSegment[0])==0)
    ){
      pCsr->nDoclist = apSegment[0]->nDoclist;
      if( fts3SegReaderIsPending(apSegment[0]) ){
        rc = fts3MsrBufferData(pCsr, apSegment[0]->aDoclist, pCsr->nDoclist);
        pCsr->aDoclist = pCsr->aBuffer;
      }else{
2629
2630
2631
2632
2633
2634
2635











2636
2637
2638
2639
2640
2641

2642
2643
2644
2645
2646
2647
2648
            pCsr->nBuffer = (nDoclist+nByte)*2;
            aNew = sqlite3_realloc(pCsr->aBuffer, pCsr->nBuffer);
            if( !aNew ){
              return SQLITE_NOMEM;
            }
            pCsr->aBuffer = aNew;
          }











          nDoclist += sqlite3Fts3PutVarint(&pCsr->aBuffer[nDoclist], iDelta);
          iPrev = iDocid;
          if( isRequirePos ){
            memcpy(&pCsr->aBuffer[nDoclist], pList, nList);
            nDoclist += nList;
            pCsr->aBuffer[nDoclist++] = '\0';

          }
        }

        fts3SegReaderSort(apSegment, nMerge, j, xCmp);
      }
      if( nDoclist>0 ){
        pCsr->aDoclist = pCsr->aBuffer;







>
>
>
>
>
>
>
>
>
>
>
|
|
|
|
|
|
>







2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
            pCsr->nBuffer = (nDoclist+nByte)*2;
            aNew = sqlite3_realloc(pCsr->aBuffer, pCsr->nBuffer);
            if( !aNew ){
              return SQLITE_NOMEM;
            }
            pCsr->aBuffer = aNew;
          }

          if( isFirst ){
            char *a = &pCsr->aBuffer[nDoclist];
            int nWrite;
           
            nWrite = sqlite3Fts3FirstFilter(iDelta, pList, nList, a);
            if( nWrite ){
              iPrev = iDocid;
              nDoclist += nWrite;
            }
          }else{
            nDoclist += sqlite3Fts3PutVarint(&pCsr->aBuffer[nDoclist], iDelta);
            iPrev = iDocid;
            if( isRequirePos ){
              memcpy(&pCsr->aBuffer[nDoclist], pList, nList);
              nDoclist += nList;
              pCsr->aBuffer[nDoclist++] = '\0';
            }
          }
        }

        fts3SegReaderSort(apSegment, nMerge, j, xCmp);
      }
      if( nDoclist>0 ){
        pCsr->aDoclist = pCsr->aBuffer;