SQLite

Check-in [7b7da1eb43]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Speed up seek operations on fts5 b-tree structures.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 7b7da1eb435d321fc4283f6aa2161fa1e16f2cf3
User & Date: dan 2015-07-06 20:27:19.997
Context
2015-07-07
08:29
Further optimizations for fts5 b-tree seeks. (check-in: f37899686c user: dan tags: trunk)
2015-07-06
20:27
Speed up seek operations on fts5 b-tree structures. (check-in: 7b7da1eb43 user: dan tags: trunk)
2015-07-05
22:15
Do not allow recursive CTEs that use aggregate queries in the recursive part. (check-in: 6d2999afbc user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts5/fts5_index.c.
1575
1576
1577
1578
1579
1580
1581

















1582
1583
1584
1585
1586
1587
1588
      p->rc = FTS5_CORRUPT;
    }else{
      const u8 *a = &pIter->pLeaf->p[iOff];
      pIter->iLeafOffset += fts5GetPoslistSize(a, &pIter->nPos, &pIter->bDel);
    }
  }
}


















/*
** Fts5SegIter.iLeafOffset currently points to the first byte of the 
** "nSuffix" field of a term. Function parameter nKeep contains the value
** of the "nPrefix" field (if there was one - it is passed 0 if this is
** the first term in the segment).
**







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
      p->rc = FTS5_CORRUPT;
    }else{
      const u8 *a = &pIter->pLeaf->p[iOff];
      pIter->iLeafOffset += fts5GetPoslistSize(a, &pIter->nPos, &pIter->bDel);
    }
  }
}

static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
  u8 *a = pIter->pLeaf->p;        /* Buffer to read data from */
  int iOff = pIter->iLeafOffset;

  if( iOff>=pIter->pLeaf->n ){
    fts5SegIterNextPage(p, pIter);
    if( pIter->pLeaf==0 ){
      if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
      return;
    }
    iOff = 4;
    a = pIter->pLeaf->p;
  }
  iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
  pIter->iLeafOffset = iOff;
}

/*
** Fts5SegIter.iLeafOffset currently points to the first byte of the 
** "nSuffix" field of a term. Function parameter nKeep contains the value
** of the "nPrefix" field (if there was one - it is passed 0 if this is
** the first term in the segment).
**
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626

  iOff += fts5GetVarint32(&a[iOff], nNew);
  pIter->term.n = nKeep;
  fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
  iOff += nNew;
  pIter->iTermLeafOffset = iOff;
  pIter->iTermLeafPgno = pIter->iLeafPgno;
  if( iOff>=pIter->pLeaf->n ){
    fts5SegIterNextPage(p, pIter);
    if( pIter->pLeaf==0 ){
      if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
      return;
    }
    iOff = 4;
    a = pIter->pLeaf->p;
  }
  iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
  pIter->iLeafOffset = iOff;
}

/*
** Initialize the iterator object pIter to iterate through the entries in
** segment pSeg. The iterator is left pointing to the first entry when 
** this function returns.
**







<
<
|
<
<
|
<
|
<
<
<







1619
1620
1621
1622
1623
1624
1625


1626


1627

1628



1629
1630
1631
1632
1633
1634
1635

  iOff += fts5GetVarint32(&a[iOff], nNew);
  pIter->term.n = nKeep;
  fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
  iOff += nNew;
  pIter->iTermLeafOffset = iOff;
  pIter->iTermLeafPgno = pIter->iLeafPgno;


  pIter->iLeafOffset = iOff;




  fts5SegIterLoadRowid(p, pIter);



}

/*
** Initialize the iterator object pIter to iterate through the entries in
** segment pSeg. The iterator is left pointing to the first entry when 
** this function returns.
**
2119
2120
2121
2122
2123
2124
2125



























































































































2126
2127
2128
2129
2130
2131
2132
    *pbDlidx = 0;
    pPtr += nNew;
  }

  fts5AssertNodeSeekOk(pNode, pTerm, nTerm, iPg, *pbDlidx);
  return iPg;
}




























































































































/*
** Initialize the object pIter to point to term pTerm/nTerm within segment
** pSeg. If there is no such term in the index, the iterator is set to EOF.
**
** If an error occurs, Fts5Index.rc is set to an appropriate error code. If 
** an error has already occurred when this function is called, it is a no-op.







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
    *pbDlidx = 0;
    pPtr += nNew;
  }

  fts5AssertNodeSeekOk(pNode, pTerm, nTerm, iPg, *pbDlidx);
  return iPg;
}

/*
** The iterator object passed as the second argument currently contains
** no valid values except for the Fts5SegIter.pLeaf member variable. This
** function searches the leaf page for a term matching (pTerm/nTerm).
**
*/
static void fts5LeafSeek(
  Fts5Index *p,                   /* Leave any error code here */
  int bGe,                        /* True for a >= search */
  Fts5SegIter *pIter,             /* Iterator to seek */
  const u8 *pTerm, int nTerm      /* Term to search for */
){
  int iOff;
  const u8 *a = pIter->pLeaf->p;
  int n = pIter->pLeaf->n;

  int nMatch = 0;
  int nKeep = 0;
  int nNew = 0;

  assert( p->rc==SQLITE_OK );
  assert( pIter->pLeaf );

  iOff = fts5GetU16(&a[2]);
  if( iOff<4 || iOff>=n ){
    p->rc = FTS5_CORRUPT;
    return;
  }

  while( 1 ){
    int i;
    int nCmp;
    i64 rowid;

    /* Figure out how many new bytes are in this term */

    nNew = a[iOff++];
    if( nNew & 0x80 ){ 
      iOff--;
      iOff += fts5GetVarint32(&a[iOff], nNew);
    }

    if( nKeep<nMatch ){
      goto search_failed;
    }

    assert( nKeep>=nMatch );
    if( nKeep==nMatch ){
      nCmp = MIN(nNew, nTerm-nMatch);
      for(i=0; i<nCmp; i++){
        if( a[iOff+i]!=pTerm[nMatch+i] ) break;
      }
      nMatch += i;

      if( nTerm==nMatch ){
        if( i==nNew ){
          goto search_success;
        }else{
          goto search_failed;
        }
      }else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){
        goto search_failed;
      }
    }
    iOff += nNew;

    /* Skip past the doclist. If the end of the page is reached, bail out. */
    iOff += fts5GetVarint(&a[iOff], &rowid);
    while( iOff<n ){
      int nPos;

      iOff += fts5GetVarint32(&a[iOff], nPos);
      iOff += (nPos / 2);

      /* Skip past docid delta */
      iOff += fts5GetVarint(&a[iOff], &rowid);
      if( rowid==0 ) break;
    };
    if( iOff>=n ) goto search_failed;

    /* Read the nKeep field of the next term. */
    nKeep = a[iOff++];
    if( nKeep & 0x80 ){
      iOff--;
      iOff += fts5GetVarint32(&a[iOff], nKeep);
    }
  }

 search_failed:
  if( bGe==0 ){
    fts5DataRelease(pIter->pLeaf);
    pIter->pLeaf = 0;
    return;
  }else if( iOff>=n ){
    do {
      fts5SegIterNextPage(p, pIter);
      if( pIter->pLeaf==0 ) return;
      a = pIter->pLeaf->p;
      iOff = fts5GetU16(&a[2]);
      if( iOff ){
        if( iOff<4 || iOff>=n ){
          p->rc = FTS5_CORRUPT;
        }else{
          nKeep = 0;
          iOff += fts5GetVarint32(&a[iOff], nNew);
          break;
        }
      }
    }while( 1 );
  }

 search_success:
  pIter->iLeafOffset = iOff + nNew;
  pIter->iTermLeafOffset = pIter->iLeafOffset;
  pIter->iTermLeafPgno = pIter->iLeafPgno;

  fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm);
  fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);

  fts5SegIterLoadRowid(p, pIter);
  fts5SegIterLoadNPos(p, pIter);
}

/*
** Initialize the object pIter to point to term pTerm/nTerm within segment
** pSeg. If there is no such term in the index, the iterator is set to EOF.
**
** If an error occurs, Fts5Index.rc is set to an appropriate error code. If 
** an error has already occurred when this function is called, it is a no-op.
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
    iPg = pSeg->pgnoFirst;
    bDlidx = 0;
  }

  pIter->iLeafPgno = iPg - 1;
  fts5SegIterNextPage(p, pIter);

  if( (pLeaf = pIter->pLeaf) ){
    int res;
    pIter->iLeafOffset = fts5GetU16(&pLeaf->p[2]);
    if( pIter->iLeafOffset<4 || pIter->iLeafOffset>=pLeaf->n ){
      p->rc = FTS5_CORRUPT;
    }else{
      fts5SegIterLoadTerm(p, pIter, 0);
      fts5SegIterLoadNPos(p, pIter);
      do {
        res = fts5BufferCompareBlob(&pIter->term, pTerm, nTerm);
        if( res>=0 ) break;
        fts5SegIterNext(p, pIter, 0);
      }while( pIter->pLeaf && p->rc==SQLITE_OK );

      if( bGe==0 && res ){
        /* Set iterator to point to EOF */
        fts5DataRelease(pIter->pLeaf);
        pIter->pLeaf = 0;
      }
    }
  }

  if( p->rc==SQLITE_OK && bGe==0 ){
    pIter->flags |= FTS5_SEGITER_ONETERM;
    if( pIter->pLeaf ){
      if( flags & FTS5INDEX_QUERY_DESC ){
        pIter->flags |= FTS5_SEGITER_REVERSE;







|
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<







2296
2297
2298
2299
2300
2301
2302
2303












2304






2305
2306
2307
2308
2309
2310
2311
    iPg = pSeg->pgnoFirst;
    bDlidx = 0;
  }

  pIter->iLeafPgno = iPg - 1;
  fts5SegIterNextPage(p, pIter);

  if( pIter->pLeaf ){












    fts5LeafSeek(p, bGe, pIter, pTerm, nTerm);






  }

  if( p->rc==SQLITE_OK && bGe==0 ){
    pIter->flags |= FTS5_SEGITER_ONETERM;
    if( pIter->pLeaf ){
      if( flags & FTS5INDEX_QUERY_DESC ){
        pIter->flags |= FTS5_SEGITER_REVERSE;
Changes to ext/fts5/test/fts5aa.test.
46
47
48
49
50
51
52


53








54
55
56
57
58
59
60
}
do_execsql_test 2.1 {
  INSERT INTO t1 VALUES('a b c', 'd e f');
}
do_test 2.2 {
  execsql { SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 }
} {/{\(structure\) {lvl=0 nMerge=0 {id=[0123456789]* h=1 leaves=1..1}}}/}


do_execsql_test 2.3 {








  INSERT INTO t1(t1) VALUES('integrity-check');
}

#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 3.0 {







>
>
|
>
>
>
>
>
>
>
>







46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
}
do_execsql_test 2.1 {
  INSERT INTO t1 VALUES('a b c', 'd e f');
}
do_test 2.2 {
  execsql { SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 }
} {/{\(structure\) {lvl=0 nMerge=0 {id=[0123456789]* h=1 leaves=1..1}}}/}

foreach w {a b c d e f} {
  do_execsql_test 2.3.$w.asc {
    SELECT rowid FROM t1 WHERE t1 MATCH $w;
  } {1}
  do_execsql_test 2.3.$w.desc {
    SELECT rowid FROM t1 WHERE t1 MATCH $w ORDER BY rowid DESC;
  } {1}
}

do_execsql_test 2.4 {
  INSERT INTO t1(t1) VALUES('integrity-check');
}

#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 3.0 {
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
      set z [doc]
      set rowid [expr int(rand() * 100)]
      execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) }
    }
    execsql { INSERT INTO t1(t1) VALUES('integrity-check'); }
  } {}
}
#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}
#exit

#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 8.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x, prefix="1,2,3");
  INSERT INTO t1(t1, rank) VALUES('pgsz', 32);







<
<







196
197
198
199
200
201
202


203
204
205
206
207
208
209
      set z [doc]
      set rowid [expr int(rand() * 100)]
      execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) }
    }
    execsql { INSERT INTO t1(t1) VALUES('integrity-check'); }
  } {}
}



#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 8.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x, prefix="1,2,3");
  INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
Changes to ext/fts5/test/fts5ac.test.
200
201
202
203
204
205
206

207
208
209
210
211
212
213

  do_test $tn2.1.1 {
    foreach {id x y} $data {
      execsql { INSERT INTO xx(rowid, x, y) VALUES($id, $x, $y) }
    }
    execsql { INSERT INTO xx(xx) VALUES('integrity-check') }
  } {}


  #-------------------------------------------------------------------------
  # Test phrase queries.
  #
  foreach {tn phrase} {
    1 "o"
    2 "b q"







>







200
201
202
203
204
205
206
207
208
209
210
211
212
213
214

  do_test $tn2.1.1 {
    foreach {id x y} $data {
      execsql { INSERT INTO xx(rowid, x, y) VALUES($id, $x, $y) }
    }
    execsql { INSERT INTO xx(xx) VALUES('integrity-check') }
  } {}


  #-------------------------------------------------------------------------
  # Test phrase queries.
  #
  foreach {tn phrase} {
    1 "o"
    2 "b q"
Changes to ext/fts5/test/fts5ad.test.
200
201
202
203
204
205
206
207

208
209
210
211
212
213
214
215
216
217
          break
        }
      }
      if {$bMatch} { lappend ret $rowid }
    }
    return $ret
  }
  

  foreach {bAsc sql} {
    0 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix ORDER BY rowid DESC}
    1 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix}
  } {
    foreach {tn prefix} {
      1  {a*} 2 {ab*} 3 {abc*} 4 {abcd*} 5 {abcde*} 
      6  {f*} 7 {fg*} 8 {fgh*} 9 {fghi*} 10 {fghij*}
      11 {k*} 12 {kl*} 13 {klm*} 14 {klmn*} 15 {klmno*}
      16 {p*} 17 {pq*} 18 {pqr*} 19 {pqrs*} 20 {pqrst*}
      21 {u*} 22 {uv*} 23 {uvw*} 24 {uvwx*} 25 {uvwxy*} 26 {uvwxyz*}







|
>

|
|







200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
          break
        }
      }
      if {$bMatch} { lappend ret $rowid }
    }
    return $ret
  }

  
  foreach {bAsc sql} {
    1 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix}
    0 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix ORDER BY rowid DESC}
  } {
    foreach {tn prefix} {
      1  {a*} 2 {ab*} 3 {abc*} 4 {abcd*} 5 {abcde*} 
      6  {f*} 7 {fg*} 8 {fgh*} 9 {fghi*} 10 {fghij*}
      11 {k*} 12 {kl*} 13 {klm*} 14 {klmn*} 15 {klmno*}
      16 {p*} 17 {pq*} 18 {pqr*} 19 {pqrs*} 20 {pqrst*}
      21 {u*} 22 {uv*} 23 {uvw*} 24 {uvwx*} 25 {uvwxy*} 26 {uvwxyz*}
Changes to ext/fts5/test/fts5alter.test.
77
78
79
80
81
82
83

















84
85
86
} {-56 -22 -11}

do_execsql_test 2.3 {
  ROLLBACK;
  SELECT rowid FROM yy WHERE yy MATCH 'a + b + c';
} {-56 -22}



















finish_test








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>



77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
} {-56 -22 -11}

do_execsql_test 2.3 {
  ROLLBACK;
  SELECT rowid FROM yy WHERE yy MATCH 'a + b + c';
} {-56 -22}

#-------------------------------------------------------------------------

do_execsql_test 3.1 {
  CREATE VIRTUAL TABLE abc USING fts5(a);
  INSERT INTO abc(rowid, a) VALUES(1, 'a');
  BEGIN;
    INSERT INTO abc(rowid, a) VALUES(2, 'a');
}
breakpoint
do_execsql_test 3.2 {
    SELECT rowid FROM abc WHERE abc MATCH 'a';
} {1 2}

do_execsql_test 3.3 {
  COMMIT;
  SELECT rowid FROM abc WHERE abc MATCH 'a';
} {1 2}

finish_test