SQLite

Check-in [4ea015ab98]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix an fts5 problem in extracting columns from position lists containing large varints.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | fts5
Files: files | file ages | folders
SHA1: 4ea015ab983300d420ef104cca550b22a6395866
User & Date: dan 2015-06-03 11:23:30.476
Context
2015-06-05
19:05
Make use of range constraints on the rowid field of an fts5 table in full-text queries. (check-in: 32cbc0ed36 user: dan tags: fts5)
2015-06-03
11:23
Fix an fts5 problem in extracting columns from position lists containing large varints. (check-in: 4ea015ab98 user: dan tags: fts5)
2015-06-02
19:38
Change the fts5 multi-column syntax to use parenthesis instead of square brackets. (check-in: ab85a6fc4f user: dan tags: fts5)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts5/fts5_expr.c.
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680

681
682
683
684
685
686
687
688
  const u8 *p = *pa;
  const u8 *pEnd = &p[n];         /* One byte past end of position list */
  u8 prev = 0;

  while( iCol!=iCurrent ){
    /* Advance pointer p until it points to pEnd or an 0x01 byte that is
    ** not part of a varint */
    while( !(prev & 0x80) && *p!=0x01 ){
      prev = *p++;
      if( p==pEnd ) return 0;
    }
    *pa = p++;
    p += fts5GetVarint32(p, iCurrent);
  }

  /* Advance pointer p until it points to pEnd or an 0x01 byte that is
  ** not part of a varint */

  while( p<pEnd && !(prev & 0x80) && *p!=0x01 ){
    prev = *p++;
  }
  return p - (*pa);
}

static int fts5ExprExtractColset (
  Fts5ExprColset *pColset,        /* Colset to filter on */







|









>
|







664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
  const u8 *p = *pa;
  const u8 *pEnd = &p[n];         /* One byte past end of position list */
  u8 prev = 0;

  while( iCol!=iCurrent ){
    /* Advance pointer p until it points to pEnd or an 0x01 byte that is
    ** not part of a varint */
    while( (prev & 0x80) || *p!=0x01 ){
      prev = *p++;
      if( p==pEnd ) return 0;
    }
    *pa = p++;
    p += fts5GetVarint32(p, iCurrent);
  }

  /* Advance pointer p until it points to pEnd or an 0x01 byte that is
  ** not part of a varint */
  assert( (prev & 0x80)==0 );
  while( p<pEnd && ((prev & 0x80) || *p!=0x01) ){
    prev = *p++;
  }
  return p - (*pa);
}

static int fts5ExprExtractColset (
  Fts5ExprColset *pColset,        /* Colset to filter on */
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
static int fts5ExprNearTest(
  int *pRc,
  Fts5Expr *pExpr,                /* Expression that pNear is a part of */
  Fts5ExprNode *pNode             /* The "NEAR" node (FTS5_STRING) */
){
  Fts5ExprNearset *pNear = pNode->pNear;
  int rc = *pRc;

  if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 ){
    /* If this "NEAR" object is actually a single phrase that consists 
    ** of a single term only, then grab pointers into the poslist
    ** managed by the fts5_index.c iterator object. This is much faster 
    ** than synthesizing a new poslist the way we have to for more
    ** complicated phrase or NEAR expressions.  */
    Fts5ExprPhrase *pPhrase = pNear->apPhrase[0];
    Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter;
    Fts5ExprColset *pColset = pNear->pColset;
    const u8 *pPos;
    int nPos;

    if( rc!=SQLITE_OK ) return 0;
    rc = sqlite3Fts5IterPoslist(pIter, &pPos, &nPos, &pNode->iRowid);

    /* If the term may match any column, then this must be a match. 
    ** Return immediately in this case. Otherwise, try to find the
    ** part of the poslist that corresponds to the required column.
    ** If it can be found, return. If it cannot, the next iteration
    ** of the loop will test the next rowid in the database for this
    ** term.  */
    if( pColset==0 ){
      assert( pPhrase->poslist.nSpace==0 );
      pPhrase->poslist.p = (u8*)pPos;
      pPhrase->poslist.n = nPos;
    }else if( pColset->nCol==1 ){
      assert( pPhrase->poslist.nSpace==0 );
      pPhrase->poslist.n = fts5ExprExtractCol(&pPos, nPos, pColset->aiCol[0]);
      pPhrase->poslist.p = (u8*)pPos;
    }else if( rc==SQLITE_OK ){
      rc = fts5ExprExtractColset(pColset, pPos, nPos, &pPhrase->poslist);
    }

    *pRc = rc;
    return (pPhrase->poslist.n>0);
  }else{
    int i;

    /* Check that each phrase in the nearset matches the current row.
    ** Populate the pPhrase->poslist buffers at the same time. If any
    ** phrase is not a match, break out of the loop early.  */
    for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
      Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
      if( pPhrase->nTerm>1 || pNear->pColset ){
        int bMatch = 0;
        rc = fts5ExprPhraseIsMatch(pExpr, pNear->pColset, pPhrase, &bMatch);
        if( bMatch==0 ) break;
      }else{
        rc = sqlite3Fts5IterPoslistBuffer(
            pPhrase->aTerm[0].pIter, &pPhrase->poslist
        );
      }
    }

    *pRc = rc;
    if( i==pNear->nPhrase && (i==1 || fts5ExprNearIsMatch(pRc, pNear)) ){
      return 1;
    }
  }

  return 0;
}

static int fts5ExprTokenTest(
  Fts5Expr *pExpr,                /* Expression that pNear is a part of */







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|

|
|
|
|
|
|
|
|
|
|
|
|
|
|
|

|
|
|
<







707
708
709
710
711
712
713





































714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734

735
736
737
738
739
740
741
static int fts5ExprNearTest(
  int *pRc,
  Fts5Expr *pExpr,                /* Expression that pNear is a part of */
  Fts5ExprNode *pNode             /* The "NEAR" node (FTS5_STRING) */
){
  Fts5ExprNearset *pNear = pNode->pNear;
  int rc = *pRc;





































  int i;

  /* Check that each phrase in the nearset matches the current row.
  ** Populate the pPhrase->poslist buffers at the same time. If any
  ** phrase is not a match, break out of the loop early.  */
  for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
    Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
    if( pPhrase->nTerm>1 || pNear->pColset ){
      int bMatch = 0;
      rc = fts5ExprPhraseIsMatch(pExpr, pNear->pColset, pPhrase, &bMatch);
      if( bMatch==0 ) break;
    }else{
      rc = sqlite3Fts5IterPoslistBuffer(
          pPhrase->aTerm[0].pIter, &pPhrase->poslist
      );
    }
  }

  *pRc = rc;
  if( i==pNear->nPhrase && (i==1 || fts5ExprNearIsMatch(pRc, pNear)) ){
    return 1;

  }

  return 0;
}

static int fts5ExprTokenTest(
  Fts5Expr *pExpr,                /* Expression that pNear is a part of */
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
  }else{
    if( iLhs>iRhs ) return -1;
    return (iLhs < iRhs);
  }
}

static void fts5ExprSetEof(Fts5ExprNode *pNode){
  if( pNode ){
    int i;
    pNode->bEof = 1;
    for(i=0; i<pNode->nChild; i++){
      fts5ExprSetEof(pNode->apChild[i]);
    }
  }
}

static void fts5ExprNodeZeroPoslist(Fts5ExprNode *pNode){
  if( pNode->eType==FTS5_STRING || pNode->eType==FTS5_TERM ){
    Fts5ExprNearset *pNear = pNode->pNear;
    int i;







<
|
|
|
|
<







898
899
900
901
902
903
904

905
906
907
908

909
910
911
912
913
914
915
  }else{
    if( iLhs>iRhs ) return -1;
    return (iLhs < iRhs);
  }
}

static void fts5ExprSetEof(Fts5ExprNode *pNode){

  int i;
  pNode->bEof = 1;
  for(i=0; i<pNode->nChild; i++){
    fts5ExprSetEof(pNode->apChild[i]);

  }
}

static void fts5ExprNodeZeroPoslist(Fts5ExprNode *pNode){
  if( pNode->eType==FTS5_STRING || pNode->eType==FTS5_TERM ){
    Fts5ExprNearset *pNear = pNode->pNear;
    int i;
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569

1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588

1589
1590

1591
1592
1593
1594
1595
1596
1597
1598
1599

Fts5ExprColset *sqlite3Fts5ParseColset(
  Fts5Parse *pParse,              /* Store SQLITE_NOMEM here if required */
  Fts5ExprColset *pColset,        /* Existing colset object */
  Fts5Token *p
){
  Fts5ExprColset *pRet = 0;

  if( pParse->rc==SQLITE_OK ){
    int iCol;
    char *z = 0;
    int rc = fts5ParseStringFromToken(p, &z);

    if( rc==SQLITE_OK ){
      Fts5Config *pConfig = pParse->pConfig;
      sqlite3Fts5Dequote(z);
      for(iCol=0; iCol<pConfig->nCol; iCol++){
        if( 0==sqlite3_stricmp(pConfig->azCol[iCol], z) ){
          break;
        }
      }
      if( iCol==pConfig->nCol ){
        sqlite3Fts5ParseError(pParse, "no such column: %s", z);
      }
      sqlite3_free(z);
    }else{
      pParse->rc = rc;
    }

    if( pParse->rc==SQLITE_OK ){
      pRet = fts5ParseColset(pParse, pColset, iCol);
    }

  }


  if( pParse->rc!=SQLITE_OK ){
    assert( pRet==0 );
    sqlite3_free(pColset);
  }

  return pRet;
}

void sqlite3Fts5ParseSetColset(







<
<
|
|
|
>
|
|
|
|
|
<
|
<
|
|
<
<

<
<
<
<


>


>
|
<







1519
1520
1521
1522
1523
1524
1525


1526
1527
1528
1529
1530
1531
1532
1533
1534

1535

1536
1537


1538




1539
1540
1541
1542
1543
1544
1545

1546
1547
1548
1549
1550
1551
1552

Fts5ExprColset *sqlite3Fts5ParseColset(
  Fts5Parse *pParse,              /* Store SQLITE_NOMEM here if required */
  Fts5ExprColset *pColset,        /* Existing colset object */
  Fts5Token *p
){
  Fts5ExprColset *pRet = 0;


  int iCol;
  char *z;                        /* Dequoted copy of token p */

  z = sqlite3Fts5Strndup(&pParse->rc, p->p, p->n);
  if( pParse->rc==SQLITE_OK ){
    Fts5Config *pConfig = pParse->pConfig;
    sqlite3Fts5Dequote(z);
    for(iCol=0; iCol<pConfig->nCol; iCol++){
      if( 0==sqlite3_stricmp(pConfig->azCol[iCol], z) ) break;

    }

    if( iCol==pConfig->nCol ){
      sqlite3Fts5ParseError(pParse, "no such column: %s", z);


    }else{




      pRet = fts5ParseColset(pParse, pColset, iCol);
    }
    sqlite3_free(z);
  }

  if( pRet==0 ){
    assert( pParse->rc!=SQLITE_OK );

    sqlite3_free(pColset);
  }

  return pRet;
}

void sqlite3Fts5ParseSetColset(
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
        zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" ", zTerm);
      }

      if( zRet ) zRet = fts5PrintfAppend(zRet, "}");
      if( zRet==0 ) return 0;
    }

    if( zRet==0 ) return 0;

  }else{
    char const *zOp = 0;
    int i;
    switch( pExpr->eType ){
      case FTS5_AND: zOp = "AND"; break;
      case FTS5_NOT: zOp = "NOT"; break;
      default: 







<
<







1720
1721
1722
1723
1724
1725
1726


1727
1728
1729
1730
1731
1732
1733
        zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" ", zTerm);
      }

      if( zRet ) zRet = fts5PrintfAppend(zRet, "}");
      if( zRet==0 ) return 0;
    }



  }else{
    char const *zOp = 0;
    int i;
    switch( pExpr->eType ){
      case FTS5_AND: zOp = "AND"; break;
      case FTS5_NOT: zOp = "NOT"; break;
      default: 
Changes to ext/fts5/test/fts5auto.test.
228
229
230
231
232
233
234
235






236
237
238
239
240
241
242
243




244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268

do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE tt USING fts5(a, b, c, d, e, f);
} {}

fts5_aux_test_functions db

proc matchdata {expr {order ASC}} {






  set tclexpr [db one {
    SELECT fts5_expr_tcl(
      $expr, 'nearset $cols -pc ::pc', 'a','b','c','d','e','f'
    )
  }]
  set res [list]

  db eval "SELECT rowid, * FROM tt ORDER BY rowid $order" {




    set cols [list $a $b $c $d $e $f]
    set ::pc 0
    set rowdata [eval $tclexpr]
    if {$rowdata != ""} { lappend res $rowid $rowdata }
  }

  set res
}

proc do_auto_test {tn expr} { 
  foreach order {asc desc} {
    set res [matchdata $expr $order]
    set testname "3.$tn.[string range $order 0 0].rows=[expr [llength $res]/2]"

    set ::autotest_expr $expr
    do_execsql_test $testname [subst -novar {
      SELECT rowid, fts5_test_poslist(tt) FROM tt 
      WHERE tt MATCH $::autotest_expr ORDER BY rowid [set order]
    }] $res
  }


}

#-------------------------------------------------------------------------







|
>
>
>
>
>
>
|

|

|


|
>
>
>
>
|


|





|

|
|



|
|







228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278

do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE tt USING fts5(a, b, c, d, e, f);
} {}

fts5_aux_test_functions db

proc matchdata {expr tbl collist {order ASC}} {

  set cols ""
  foreach e $collist {
    append cols ", '$e'"
  }

  set tclexpr [db one [subst -novar {
    SELECT fts5_expr_tcl(
      $expr, 'nearset $cols -pc ::pc' [set cols]
    )
  }]]
  set res [list]

  db eval "SELECT rowid, * FROM $tbl ORDER BY rowid $order" x {
    set cols [list]
    foreach col $x(*) {
      if {$col != "rowid"} { lappend cols $x($col) }
    }
    # set cols [list $a $b $c $d $e $f]
    set ::pc 0
    set rowdata [eval $tclexpr]
    if {$rowdata != ""} { lappend res $x(rowid) $rowdata }
  }

  set res
}

proc do_auto_test {tn tbl cols expr} { 
  foreach order {asc desc} {
    set res [matchdata $expr $tbl $cols $order]
    set testname "$tn.[string range $order 0 0].rows=[expr [llength $res]/2]"

    set ::autotest_expr $expr
    do_execsql_test $testname [subst -novar {
      SELECT rowid, fts5_test_poslist([set tbl]) FROM [set tbl] 
      WHERE [set tbl] MATCH $::autotest_expr ORDER BY rowid [set order]
    }] $res
  }


}

#-------------------------------------------------------------------------
306
307
308
309
310
311
312

313
314
315
316
317
318
319
320
321
322
323
324
325
326







































327
328
329
    A.3 { {a b f} : x }
    A.4 { {f a b} : x }
    A.5 { {f a b} : x y }
    A.6 { {f a b} : x + y }
    A.7 { {c a b} : x + c }
    A.8 { {c d} : "l m" }
    A.9 { {c e} : "l m" }


    B.1 { a NOT b }
    B.2 { a NOT a:b }
    B.3 { a OR (b AND c) }
    B.4 { a OR (b AND {a b c}:c) }
    B.5 { a OR "b c" }
    B.6 { a OR b OR c }

    C.1 { a OR (b AND "b c") }
    C.2 { a OR (b AND "z c") }
  } {
    do_auto_test 3.$fold.$tn $expr
  }
}








































finish_test








>











|


>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>



316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
    A.3 { {a b f} : x }
    A.4 { {f a b} : x }
    A.5 { {f a b} : x y }
    A.6 { {f a b} : x + y }
    A.7 { {c a b} : x + c }
    A.8 { {c d} : "l m" }
    A.9 { {c e} : "l m" }
    A.10 { {a b c a b c a b c f f e} : "l m" }

    B.1 { a NOT b }
    B.2 { a NOT a:b }
    B.3 { a OR (b AND c) }
    B.4 { a OR (b AND {a b c}:c) }
    B.5 { a OR "b c" }
    B.6 { a OR b OR c }

    C.1 { a OR (b AND "b c") }
    C.2 { a OR (b AND "z c") }
  } {
    do_auto_test 3.$fold.$tn tt {a b c d e f} $expr
  }
}

proc replace_elems {list args} {
  set ret $list
  foreach {idx elem} $args {
    set ret [lreplace $ret $idx $idx $elem]
  }
  set ret
}

#-------------------------------------------------------------------------
#
set bigdoc [string trim [string repeat "a " 1000]]
do_test 4.0 {
  set a [replace_elems $bigdoc  50 x  950 x]
  set b [replace_elems $bigdoc  20 y   21 x  887 x 888 y]
  set c [replace_elems $bigdoc   1 z  444 z  789 z]
  execsql {
    CREATE VIRTUAL TABLE yy USING fts5(c1, c2, c3);
    INSERT INTO yy(rowid, c1, c2, c3) VALUES(-56789, $a, $b, $c);
    INSERT INTO yy(rowid, c1, c2, c3) VALUES(250, $a, $b, $c);
  }
} {}

foreach {tn expr} {
  1 x    
  2 y    
  3 z

  4 {c1 : x} 5 {c2 : x} 6 {c3 : x}
  7 {c1 : y} 8 {c2 : y} 9 {c3 : y}
  10 {c1 : z} 11 {c2 : z} 12 {c3 : z}


} {
breakpoint
  do_auto_test 4.$tn yy {c1 c2 c3} $expr
}



finish_test

Changes to ext/fts5/test/fts5fault4.test.
366
367
368
369
370
371
372




























373
374
375
  faultsim_restore_and_reopen
  db eval { SELECT * FROM vv }
} -body {
  db eval { SELECT * FROM vv }
} -test {
  faultsim_test_result {0 {a 1 1 b 1 1}} 
}





























finish_test








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>



366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
  faultsim_restore_and_reopen
  db eval { SELECT * FROM vv }
} -body {
  db eval { SELECT * FROM vv }
} -test {
  faultsim_test_result {0 {a 1 1 b 1 1}} 
}

#-------------------------------------------------------------------------
# OOM in multi-column token query.
#
reset_db
do_execsql_test 13.0 {
  CREATE VIRTUAL TABLE ft USING fts5(x, y, z);
  INSERT INTO ft(ft, rank) VALUES('pgsz', 32);
  INSERT INTO ft VALUES(
      'x x x x x x x x x x x x x x x x',
      'y y y y y y y y y y y y y y y y',
      'z z z z z z z z x x x x x x x x'
  );
  INSERT INTO ft SELECT * FROM ft;
  INSERT INTO ft SELECT * FROM ft;
  INSERT INTO ft SELECT * FROM ft;
  INSERT INTO ft SELECT * FROM ft;
}
faultsim_save_and_close
do_faultsim_test 13.1 -faults oom-t* -prep {
  faultsim_restore_and_reopen
  db eval { SELECT * FROM ft }
} -body {
  db eval { SELECT rowid FROM ft WHERE ft MATCH '{x z}: x' }
} -test {
  faultsim_test_result {0 {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16}}
}


finish_test