/ Check-in [1e599409]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix problems with doclist-indexes involving very large rowids.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts5
Files: files | file ages | folders
SHA1: 1e5994097e4c740c5173ea9718c3935728fdb86f
User & Date: dan 2015-04-22 20:14:46
Context
2015-04-22
20:58
Add extra OOM tests for fts5. check-in: 2dd59b57 user: dan tags: fts5
20:14
Fix problems with doclist-indexes involving very large rowids. check-in: 1e599409 user: dan tags: fts5
09:40
Update this branch with latest trunk changes. check-in: 9797482d user: dan tags: fts5
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5_index.c.

2989
2990
2991
2992
2993
2994
2995
2996
2997

2998
2999
3000
3001
3002
3003
3004
....
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
....
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
....
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
....
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
....
3663
3664
3665
3666
3667
3668
3669
3670

3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
....
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
  fts5BufferZero(&pPage->buf);
  fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
  pPage->pgno++;

  /* Increase the leaves written counter */
  pWriter->nLeafWritten++;

  /* The new leaf holds no terms */
  pWriter->bFirstTermInPage = 1;

}

/*
** Append term pTerm/nTerm to the segment being written by the writer passed
** as the second argument.
**
** If an error occurs, set the Fts5Index.rc error code. If an error has 
................................................................................

  pWriter->bFirstRowidInPage = 0;
  pWriter->bFirstRowidInDoclist = 1;

  /* If the current leaf page is full, flush it to disk. */
  if( pPage->buf.n>=p->pConfig->pgsz ){
    fts5WriteFlushLeaf(p, pWriter);
    pWriter->bFirstRowidInPage = 1;
  }
}

/*
** Append a docid and position-list size field to the writers output. 
*/
static void fts5WriteAppendRowid(
................................................................................
    pWriter->bFirstRowidInDoclist = 0;
    pWriter->bFirstRowidInPage = 0;

    fts5BufferAppendVarint(&p->rc, &pPage->buf, nPos);

    if( pPage->buf.n>=p->pConfig->pgsz ){
      fts5WriteFlushLeaf(p, pWriter);
      pWriter->bFirstRowidInPage = 1;
    }
  }
}

static void fts5WriteAppendPoslistInt(
  Fts5Index *p, 
  Fts5SegWriter *pWriter,
................................................................................
  int iVal
){
  if( p->rc==SQLITE_OK ){
    Fts5PageWriter *pPage = &pWriter->aWriter[0];
    fts5BufferAppendVarint(&p->rc, &pPage->buf, iVal);
    if( pPage->buf.n>=p->pConfig->pgsz ){
      fts5WriteFlushLeaf(p, pWriter);
      pWriter->bFirstRowidInPage = 1;
    }
  }
}

static void fts5WriteAppendPoslistData(
  Fts5Index *p, 
  Fts5SegWriter *pWriter, 
................................................................................
      i64 dummy;
      nCopy += getVarint(&a[nCopy], (u64*)&dummy);
    }
    fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a);
    a += nCopy;
    n -= nCopy;
    fts5WriteFlushLeaf(p, pWriter);
    pWriter->bFirstRowidInPage = 1;
  }
  if( n>0 ){
    fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a);
  }
}

static void fts5WriteAppendZerobyte(Fts5Index *p, Fts5SegWriter *pWriter){
................................................................................
      if( pgsz>=(pBuf->n + nDoclist + 1) ){
        /* The entire doclist will fit on the current leaf. */
        fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist);
      }else{
        i64 iRowid = 0;
        i64 iDelta = 0;
        int iOff = 0;
        int bFirstDocid = 0;


        /* The entire doclist will not fit on this leaf. The following 
        ** loop iterates through the poslists that make up the current 
        ** doclist.  */
        while( iOff<nDoclist ){
          int nPos;
          int nCopy;
          int bDummy;
          iOff += getVarint(&pDoclist[iOff], (u64*)&iDelta);
          nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDummy);
          nCopy += nPos;
          iRowid += iDelta;
          
          if( bFirstDocid ){
            fts5PutU16(&pBuf->p[0], pBuf->n);   /* first docid on page */
            pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iRowid);
            bFirstDocid = 0;
            fts5WriteDlidxAppend(p, &writer, iRowid);
          }else{
            pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iDelta);
          }
          assert( pBuf->n<=pBuf->nSpace );

          if( (pBuf->n + nCopy) <= pgsz ){
................................................................................
            while( 1 ){
              int nSpace = pgsz - pBuf->n;
              int n = 0;
              if( (nCopy - iPos)<=nSpace ){
                n = nCopy - iPos;
              }else{
                n = fts5PoslistPrefix(&pPoslist[iPos], nSpace);
                assert( n>=nSpace );
              }
              assert( n>0 );
              fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n);
              iPos += n;
              if( pBuf->n>=pgsz ){
                fts5WriteFlushLeaf(p, &writer);
                pBuf = &writer.aWriter[0].buf;
              }
              if( iPos>=nCopy ) break;
            }
            bFirstDocid = 1;
          }
          iOff += nCopy;
        }
      }

      pBuf->p[pBuf->n++] = '\0';
      assert( pBuf->n<=pBuf->nSpace );







|

>







 







<







 







<







 







<







 







<







 







|
>













|


|







 







<










<







2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
....
3064
3065
3066
3067
3068
3069
3070

3071
3072
3073
3074
3075
3076
3077
....
3102
3103
3104
3105
3106
3107
3108

3109
3110
3111
3112
3113
3114
3115
....
3116
3117
3118
3119
3120
3121
3122

3123
3124
3125
3126
3127
3128
3129
....
3142
3143
3144
3145
3146
3147
3148

3149
3150
3151
3152
3153
3154
3155
....
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
....
3702
3703
3704
3705
3706
3707
3708

3709
3710
3711
3712
3713
3714
3715
3716
3717
3718

3719
3720
3721
3722
3723
3724
3725
  fts5BufferZero(&pPage->buf);
  fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
  pPage->pgno++;

  /* Increase the leaves written counter */
  pWriter->nLeafWritten++;

  /* The new leaf holds no terms or rowids */
  pWriter->bFirstTermInPage = 1;
  pWriter->bFirstRowidInPage = 1;
}

/*
** Append term pTerm/nTerm to the segment being written by the writer passed
** as the second argument.
**
** If an error occurs, set the Fts5Index.rc error code. If an error has 
................................................................................

  pWriter->bFirstRowidInPage = 0;
  pWriter->bFirstRowidInDoclist = 1;

  /* If the current leaf page is full, flush it to disk. */
  if( pPage->buf.n>=p->pConfig->pgsz ){
    fts5WriteFlushLeaf(p, pWriter);

  }
}

/*
** Append a docid and position-list size field to the writers output. 
*/
static void fts5WriteAppendRowid(
................................................................................
    pWriter->bFirstRowidInDoclist = 0;
    pWriter->bFirstRowidInPage = 0;

    fts5BufferAppendVarint(&p->rc, &pPage->buf, nPos);

    if( pPage->buf.n>=p->pConfig->pgsz ){
      fts5WriteFlushLeaf(p, pWriter);

    }
  }
}

static void fts5WriteAppendPoslistInt(
  Fts5Index *p, 
  Fts5SegWriter *pWriter,
................................................................................
  int iVal
){
  if( p->rc==SQLITE_OK ){
    Fts5PageWriter *pPage = &pWriter->aWriter[0];
    fts5BufferAppendVarint(&p->rc, &pPage->buf, iVal);
    if( pPage->buf.n>=p->pConfig->pgsz ){
      fts5WriteFlushLeaf(p, pWriter);

    }
  }
}

static void fts5WriteAppendPoslistData(
  Fts5Index *p, 
  Fts5SegWriter *pWriter, 
................................................................................
      i64 dummy;
      nCopy += getVarint(&a[nCopy], (u64*)&dummy);
    }
    fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a);
    a += nCopy;
    n -= nCopy;
    fts5WriteFlushLeaf(p, pWriter);

  }
  if( n>0 ){
    fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a);
  }
}

static void fts5WriteAppendZerobyte(Fts5Index *p, Fts5SegWriter *pWriter){
................................................................................
      if( pgsz>=(pBuf->n + nDoclist + 1) ){
        /* The entire doclist will fit on the current leaf. */
        fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist);
      }else{
        i64 iRowid = 0;
        i64 iDelta = 0;
        int iOff = 0;

        writer.bFirstRowidInPage = 0;

        /* The entire doclist will not fit on this leaf. The following 
        ** loop iterates through the poslists that make up the current 
        ** doclist.  */
        while( iOff<nDoclist ){
          int nPos;
          int nCopy;
          int bDummy;
          iOff += getVarint(&pDoclist[iOff], (u64*)&iDelta);
          nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDummy);
          nCopy += nPos;
          iRowid += iDelta;
          
          if( writer.bFirstRowidInPage ){
            fts5PutU16(&pBuf->p[0], pBuf->n);   /* first docid on page */
            pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iRowid);
            writer.bFirstRowidInPage = 0;
            fts5WriteDlidxAppend(p, &writer, iRowid);
          }else{
            pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iDelta);
          }
          assert( pBuf->n<=pBuf->nSpace );

          if( (pBuf->n + nCopy) <= pgsz ){
................................................................................
            while( 1 ){
              int nSpace = pgsz - pBuf->n;
              int n = 0;
              if( (nCopy - iPos)<=nSpace ){
                n = nCopy - iPos;
              }else{
                n = fts5PoslistPrefix(&pPoslist[iPos], nSpace);

              }
              assert( n>0 );
              fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n);
              iPos += n;
              if( pBuf->n>=pgsz ){
                fts5WriteFlushLeaf(p, &writer);
                pBuf = &writer.aWriter[0].buf;
              }
              if( iPos>=nCopy ) break;
            }

          }
          iOff += nCopy;
        }
      }

      pBuf->p[pBuf->n++] = '\0';
      assert( pBuf->n<=pBuf->nSpace );

Changes to ext/fts5/test/fts5dlidx.test.

27
28
29
30
31
32
33
34
35
36

37
38

39


40


41
42
43


44
45
46
47
48

49
50
51
52
53
54
55
56
57
58
59


60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76






77
78
79
80
}

do_execsql_test 1.0 { 
  CREATE VIRTUAL TABLE t1 USING fts5(x);
  INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}

foreach {tn spc1 spc2 mul} {
  1 10  100 1000
  2 1   1   128

} {
  set xdoc [list]

  set ydoc [list]


  


  execsql { DELETE FROM t1 }

  do_test 1.$tn.1 {


  
    execsql BEGIN
    for {set i 0} {$i < 10000} {incr i} {
      set rowid [expr $i * $mul]
      set doc "a b c a b c a b c a b c a b c"

      if {($i % $spc1)==0} { 
        lappend xdoc $rowid
        append doc " x" 
        if {($i % $spc2)==0} { 
          lappend ydoc $rowid
          append doc " y" 
        }
      }
      execsql { INSERT INTO t1(rowid, x) VALUES($rowid, $doc) }
    }
    execsql COMMIT


    execsql { INSERT INTO t1(t1) VALUES('integrity-check') }
  } {}
  
  do_execsql_test 1.$tn.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
  
  do_fb_test 1.$tn.3.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'a AND x' } $xdoc
  do_fb_test 1.$tn.3.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'x AND a' } $xdoc
  
  do_fb_test 1.$tn.4.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'a AND y' } $ydoc
  do_fb_test 1.$tn.4.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'y AND a' } $ydoc
  
  do_fb_test 1.$tn.5.1 { 
    SELECT rowid FROM t1 WHERE t1 MATCH 'a + b + c + x' } $xdoc
  do_fb_test 1.$tn.5.2 { 
    SELECT rowid FROM t1 WHERE t1 MATCH 'b + c + x + y' } $ydoc

}








finish_test








|
|
<
>
|
<
>
|
>
>
|
>
>
|

<
>
>
|
|
|
|
<
>
|
|
|
|
|
|
|
|
|
|
|
>
>



<
<
|
|

|
|

|

|

|
|
>
>
>
>
>
>




27
28
29
30
31
32
33
34
35

36
37

38
39
40
41
42
43
44
45
46

47
48
49
50
51
52

53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69


70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
}

do_execsql_test 1.0 { 
  CREATE VIRTUAL TABLE t1 USING fts5(x);
  INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}

# This test populates the FTS5 table containing $nEntry entries. Rows are 
# numbered from 0 to ($nEntry-1). The rowid for row $i is:

#
#   ($iFirst + $i*$nStep)

#
# Each document is of the form "a b c a b c a b c...". If the row number ($i)
# is an integer multiple of $spc1, then an "x" token is appended to the
# document. If it is *also* a multiple of $spc2, a "y" token is also appended.
#
proc do_dlidx_test1 {tn spc1 spc2 nEntry iFirst nStep} {

  do_execsql_test $tn.0 { DELETE FROM t1 }


  set xdoc [list]
  set ydoc [list]

  execsql BEGIN
  for {set i 0} {$i < $nEntry} {incr i} {
    set rowid [expr $i * $nStep]

    set doc [string trim [string repeat "a b c " 100]]
    if {($i % $spc1)==0} {
      lappend xdoc $rowid
      append doc " x" 
      if {($i % $spc2)==0} { 
        lappend ydoc $rowid
        append doc " y" 
      }
    }
    execsql { INSERT INTO t1(rowid, x) VALUES($rowid, $doc) }
  }
  execsql COMMIT

  do_test $tn.1 {
    execsql { INSERT INTO t1(t1) VALUES('integrity-check') }
  } {}
  


  do_fb_test $tn.3.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'a AND x' } $xdoc
  do_fb_test $tn.3.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'x AND a' } $xdoc
  
  do_fb_test $tn.4.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'a AND y' } $ydoc
  do_fb_test $tn.4.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'y AND a' } $ydoc
  
  do_fb_test $tn.5.1 { 
    SELECT rowid FROM t1 WHERE t1 MATCH 'a + b + c + x' } $xdoc
  do_fb_test $tn.5.2 { 
    SELECT rowid FROM t1 WHERE t1 MATCH 'b + c + x + y' } $ydoc
}


do_dlidx_test1 1.1     10 100 10000 0 1000
do_dlidx_test1 1.2     10 10  10000 0 128
do_dlidx_test1 1.3     10 10  100   0 36028797018963970
do_dlidx_test1 1.3     10 10  50    0 150000000000000000



finish_test