SQLite

Check-in [2c4bbd90e2]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Changes to improve the selection of deferred tokens within phrases.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | fts3-changes
Files: files | file ages | folders
SHA1: 2c4bbd90e2fca593c186bf412b608aff8c9f9061
User & Date: dan 2011-06-27 11:15:53.752
Context
2011-06-27
19:12
Remove an unnecessary assignment from vdbeapi.c. (check-in: 25e5b7686a user: dan tags: fts3-changes)
11:15
Changes to improve the selection of deferred tokens within phrases. (check-in: 2c4bbd90e2 user: dan tags: fts3-changes)
2011-06-23
17:09
Fix some of the code issues (missing comments etc.) in the new FTS code. (check-in: 8230d83120 user: dan tags: fts3-changes)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts3/fts3.c.
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
** in buffer aList[], size nList bytes.
**
** If the isPoslist argument is true, then it is assumed that the doclist
** contains a position-list following each docid. Otherwise, it is assumed
** that the doclist is simply a list of docids stored as delta encoded 
** varints.
*/
static int fts3DoclistCountDocids(int isPoslist, char *aList, int nList){
  int nDoc = 0;                   /* Return value */
  if( aList ){
    char *aEnd = &aList[nList];   /* Pointer to one byte after EOF */
    char *p = aList;              /* Cursor */
    if( !isPoslist ){
      /* The number of docids in the list is the same as the number of 
      ** varints. In FTS3 a varint consists of a single byte with the 0x80 
      ** bit cleared and zero or more bytes with the 0x80 bit set. So to
      ** count the varints in the buffer, just count the number of bytes
      ** with the 0x80 bit clear.  */
      while( p<aEnd ) nDoc += (((*p++)&0x80)==0);
    }else{
      while( p<aEnd ){
        nDoc++;
        while( (*p++)&0x80 );     /* Skip docid varint */
        fts3PoslistCopy(0, &p);   /* Skip over position list */
      }
    }
  }

  return nDoc;
}

/*







|




<
<
<
<
<
<
<
<
|
|
|
|
<







2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560








2561
2562
2563
2564

2565
2566
2567
2568
2569
2570
2571
** in buffer aList[], size nList bytes.
**
** If the isPoslist argument is true, then it is assumed that the doclist
** contains a position-list following each docid. Otherwise, it is assumed
** that the doclist is simply a list of docids stored as delta encoded 
** varints.
*/
static int fts3DoclistCountDocids(char *aList, int nList){
  int nDoc = 0;                   /* Return value */
  if( aList ){
    char *aEnd = &aList[nList];   /* Pointer to one byte after EOF */
    char *p = aList;              /* Cursor */








    while( p<aEnd ){
      nDoc++;
      while( (*p++)&0x80 );     /* Skip docid varint */
      fts3PoslistCopy(0, &p);   /* Skip over position list */

    }
  }

  return nDoc;
}

/*
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919







3920



3921
3922
3923
3924
3925
3926

3927
3928

3929
3930
3931
3932
3933
3934
3935
3936
3937


3938
3939
3940
3941
3942
3943
3944

    /* Determine if token pTC should be deferred. If not, update nDocEst. 
    **
    ** TODO: If there are performance regressions involving deferred tokens,
    ** this (the logic that selects the tokens to be deferred) is probably
    ** the bit that needs to change.
    */
    if( ii==0 ){
      if( pTC->nOvfl ){
        nDocEst = (pTC->nOvfl * pTab->nPgsz + pTab->nPgsz) / 10;







      }else{



        Fts3PhraseToken *pToken = pTC->pToken;
        int nList = 0;
        char *pList = 0;
        rc = fts3TermSelect(pTab, pToken, pTC->iCol, &nList, &pList);
        assert( rc==SQLITE_OK || pList==0 );
        if( rc==SQLITE_OK ){

          nDocEst = fts3DoclistCountDocids(1, pList, nList);
          fts3EvalPhraseMergeToken(pTab, pTC->pPhrase, pTC->iToken,pList,nList);

        }
      }
    }else{
      if( pTC->nOvfl>=(nDocEst*nDocSize) ){
        Fts3PhraseToken *pToken = pTC->pToken;
        rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol);
        fts3SegReaderCursorFree(pToken->pSegcsr);
        pToken->pSegcsr = 0;
      }


      nDocEst = 1 + (nDocEst/4);
    }
    pTC->pToken = 0;
  }

  return rc;
}







|
|
|
>
>
>
>
>
>
>
|
>
>
>
|
|
|
|
|
|
>
|
|
>
|
<
|
<
|
<
<
<
<
>
>







3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932

3933

3934




3935
3936
3937
3938
3939
3940
3941
3942
3943

    /* Determine if token pTC should be deferred. If not, update nDocEst. 
    **
    ** TODO: If there are performance regressions involving deferred tokens,
    ** this (the logic that selects the tokens to be deferred) is probably
    ** the bit that needs to change.
    */

    if( ii && pTC->nOvfl>=(nDocEst*nDocSize) ){
      /* The number of overflow pages to load for this (and therefore all
      ** subsequent) tokens is greater than the estimated number of pages 
      ** that will be loaded if all subsequent tokens are deferred.
      */
      Fts3PhraseToken *pToken = pTC->pToken;
      rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol);
      fts3SegReaderCursorFree(pToken->pSegcsr);
      pToken->pSegcsr = 0;
    }else if( ii==0 || pTC->pPhrase->nToken>1 ){
      /* Either this is the cheapest token in the entire query, or it is
      ** part of a multi-token phrase. Either way, the entire doclist will
      ** (eventually) be loaded into memory. It may as well be now. */
      Fts3PhraseToken *pToken = pTC->pToken;
      int nList = 0;
      char *pList = 0;
      rc = fts3TermSelect(pTab, pToken, pTC->iCol, &nList, &pList);
      assert( rc==SQLITE_OK || pList==0 );
      if( rc==SQLITE_OK ){
        fts3EvalPhraseMergeToken(pTab, pTC->pPhrase, pTC->iToken,pList,nList);
        nDocEst = fts3DoclistCountDocids(
            pTC->pPhrase->doclist.aAll, pTC->pPhrase->doclist.nAll
        );
      }

    }else {

      /* This token will not be deferred. And it will not be loaded into




      ** memory at this point either. So assume that it filters out 75% of
      ** the currently estimated number of documents. */
      nDocEst = 1 + (nDocEst/4);
    }
    pTC->pToken = 0;
  }

  return rc;
}