SQLite

Check-in [45c051e786]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Improvements to the way fts3 reads the full-text index.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | fts3-refactor
Files: files | file ages | folders
SHA1: 45c051e78651d8204c17cecdda2bde705698881f
User & Date: dan 2009-11-17 12:52:10.000
Context
2009-11-18
15:35
Add some missing comments and fix some other issues in fts3 code. (check-in: 2fe579e778 user: dan tags: fts3-refactor)
2009-11-17
12:52
Improvements to the way fts3 reads the full-text index. (check-in: 45c051e786 user: dan tags: fts3-refactor)
2009-11-16
16:36
Add a few extra coverage test cases for fts3. (check-in: f29c8fcade user: dan tags: fts3-refactor)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts3/fts3.c.
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
*/
/*
** Read a single block from the %_segments table.
*/
static int fts3ReadBlock(
  Fts3Table *p,
  sqlite3_int64 iBlock,
  char **pzBlock,
  int *pnBlock
){
  sqlite3_stmt *pStmt;
  int rc = sqlite3Fts3SqlStmt(p, FTS3_SQL_GET_BLOCK, &pStmt);
  if( rc!=SQLITE_OK ) return rc;
  sqlite3_reset(pStmt);








|







901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
*/
/*
** Read a single block from the %_segments table.
*/
static int fts3ReadBlock(
  Fts3Table *p,
  sqlite3_int64 iBlock,
  char const **pzBlock,
  int *pnBlock
){
  sqlite3_stmt *pStmt;
  int rc = sqlite3Fts3SqlStmt(p, FTS3_SQL_GET_BLOCK, &pStmt);
  if( rc!=SQLITE_OK ) return rc;
  sqlite3_reset(pStmt);

924
925
926
927
928
929
930
931
932
933
934
935
936
937





938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958

959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994

995
996
997







998


999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024

1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
  if( !*pzBlock ){
    return SQLITE_NOMEM;
  }
  return SQLITE_OK;
}

/*
** The buffer pointed to by argument zNode (size nNode bytes) contains a
** b-tree segment interior node. This function inspects the sub-tree headed
** by the node to determine the range of leaf-nodes (if any) that may 
** contain a term that matches the contents of buffer zTerm (size nTerm 
** bytes). If the isPrefix parameter is true, then the range of leaves
** returned are those that may contain any term for which zTerm/nTerm is
** a prefix.





**
** If successful, SQLITE_OK is returned. The blockid of the first leaf in the
** selected range is written to piStart before returning. The blockid of the
** final leaf in the selected range is written to *piEnd.
*/ 
static int fts3SelectLeaves(
  Fts3Table *p,                   /* Virtual table handle */
  const char *zTerm,              /* Term to select leaves for */
  int nTerm,                      /* Size of term zTerm in bytes */
  int isPrefix,                   /* True for a prefix search */
  const char *zNode,              /* Buffer containing segment interior node */
  int nNode,                      /* Size of buffer at zNode */
  sqlite3_int64 *piStart,         /* First selected leaf */
  sqlite3_int64 *piEnd            /* Second selected leaf */
){
  int rc = SQLITE_OK;             /* Return code */
  const char *zCsr = zNode;       /* Cursor to iterate through node */
  const char *zEnd = &zCsr[nNode];/* End of interior node buffer */
  char *zBuffer = 0;              /* Buffer to load terms into */
  int nAlloc = 0;                 /* Size of allocated buffer */


  int iHeight;                    /* Height of this node in tree */
  sqlite3_int64 iChild;
  sqlite3_int64 iStart = 0;
  sqlite3_int64 iEnd;

  zCsr += sqlite3Fts3GetVarint32(zCsr, &iHeight);
  zCsr += sqlite3Fts3GetVarint(zCsr, &iChild);

  while( zCsr<zEnd ){
    int nSuffix;                  /* Size of term suffix */
    int nPrefix = 0;              /* Size of term prefix */
    int nBuffer;                  /* Total term size */
    int nMin;                     /* Minimum of nBuffer and nTerm */
    int cmp;                      /* Result of comparing term and buffer */

    /* Load the next term on the node into zBuffer */
    if( zBuffer ){
      zCsr += sqlite3Fts3GetVarint32(zCsr, &nPrefix);
    }
    zCsr += sqlite3Fts3GetVarint32(zCsr, &nSuffix);
    if( nPrefix+nSuffix>nAlloc ){
      char *zNew;
      nAlloc = (nPrefix+nSuffix) * 2;
      zNew = (char *)sqlite3_realloc(zBuffer, nAlloc);
      if( !zNew ){
        sqlite3_free(zBuffer);
        return SQLITE_NOMEM;
      }
      zBuffer = zNew;
    }
    memcpy(&zBuffer[nPrefix], zCsr, nSuffix);
    nBuffer = nPrefix + nSuffix;
    zCsr += nSuffix;

    /* Compare the term we are searching for with the term just loaded from
    ** the interior node. If variable cmp is greater than or equal to zero, 

    ** then all terms on the sub-tree headed by node iChild are smaller than 
    ** zTerm. No need to search iChild.
    **







    ** If variable cmp is less than zero, then the sub-tree headed by 


    */
    nMin = (nBuffer>nTerm ? nTerm : nBuffer);
    cmp = memcmp(zTerm, zBuffer, nMin);
    if( isPrefix && cmp==0 && iStart==0 ){
      iStart = iChild;
    }
    if( cmp<0 ) break;
    iChild++;
  };
  iEnd = iChild;
  if( iStart==0 ) iStart = iChild;
  sqlite3_free(zBuffer);

  if( iHeight==1 ){
    if( piEnd ) *piEnd = iEnd;
    if( piStart ) *piStart = iStart;
  }else{
    char *zBlock;
    int nBlock;
    if( piEnd ){
      rc = fts3ReadBlock(p, iEnd, &zBlock, &nBlock);
      if( rc==SQLITE_OK ){
        rc = fts3SelectLeaves(p,zTerm,nTerm,isPrefix,zBlock,nBlock,0,piEnd);
      }
    }
    if( piStart && rc==SQLITE_OK ){

      rc = fts3ReadBlock(p, iStart, &zBlock, &nBlock);
      if( rc==SQLITE_OK ){
        rc = fts3SelectLeaves(p,zTerm,nTerm,isPrefix,zBlock,nBlock,piStart,0);
      }
    }
  }

  return rc;
}

static void fts3PutDeltaVarint(
  char **pp, 
  sqlite3_int64 *piPrev, 
  sqlite3_int64 iVal







|
|
|
|
|
|
|
>
>
>
>
>

|
<
<

|



<


|
<







>
|
|
<
|

|
|
|
|
|
|
|
|
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>
|
|
|
>
>
>
>
>
>
>
|
>
>

<
<
<
<
<
<
<
<
<
<
<
<
|
|
<
<
<
|
<
<
<
<
|
|
<
>
|
|
|
|
<
<
|







924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944


945
946
947
948
949

950
951
952

953
954
955
956
957
958
959
960
961
962

963
964
965
966
967
968
969
970
971
972

973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009












1010
1011



1012




1013
1014

1015
1016
1017
1018
1019


1020
1021
1022
1023
1024
1025
1026
1027
  if( !*pzBlock ){
    return SQLITE_NOMEM;
  }
  return SQLITE_OK;
}

/*
** The buffer pointed to by argument zNode (size nNode bytes) contains the
** root node of a b-tree segment. The segment is guaranteed to be at least
** one level high (i.e. the root node is not also a leaf). If successful,
** this function locates the leaf node of the segment that may contain the 
** term specified by arguments zTerm and nTerm and writes its block number 
** to *piLeaf.
**
** It is possible that the returned leaf node does not contain the specified
** term. However, if the segment does contain said term, it is stored on
** the identified leaf node. Because this function only inspects interior
** segment nodes (and never loads leaf nodes into memory), it is not possible
** to be sure.
**
** If an error occurs, an error code other than SQLITE_OK is returned.


*/ 
static int fts3SelectLeaf(
  Fts3Table *p,                   /* Virtual table handle */
  const char *zTerm,              /* Term to select leaves for */
  int nTerm,                      /* Size of term zTerm in bytes */

  const char *zNode,              /* Buffer containing segment interior node */
  int nNode,                      /* Size of buffer at zNode */
  sqlite3_int64 *piLeaf           /* Selected leaf node */

){
  int rc = SQLITE_OK;             /* Return code */
  const char *zCsr = zNode;       /* Cursor to iterate through node */
  const char *zEnd = &zCsr[nNode];/* End of interior node buffer */
  char *zBuffer = 0;              /* Buffer to load terms into */
  int nAlloc = 0;                 /* Size of allocated buffer */

  while( 1 ){
    int iHeight;                  /* Height of this node in tree */
    sqlite3_int64 iChild;         /* Block id of child node to descend to */

    int nBlock;                   /* Size of child node in bytes */

    zCsr += sqlite3Fts3GetVarint32(zCsr, &iHeight);
    zCsr += sqlite3Fts3GetVarint(zCsr, &iChild);
  
    while( zCsr<zEnd ){
      int nSuffix;                /* Size of term suffix */
      int nPrefix = 0;            /* Size of term prefix */
      int nBuffer;                /* Total term size */
      int nMin;                   /* Minimum of nBuffer and nTerm */

  
      /* Load the next term on the node into zBuffer */
      if( zBuffer ){
        zCsr += sqlite3Fts3GetVarint32(zCsr, &nPrefix);
      }
      zCsr += sqlite3Fts3GetVarint32(zCsr, &nSuffix);
      if( nPrefix+nSuffix>nAlloc ){
        char *zNew;
        nAlloc = (nPrefix+nSuffix) * 2;
        zNew = (char *)sqlite3_realloc(zBuffer, nAlloc);
        if( !zNew ){
          sqlite3_free(zBuffer);
          return SQLITE_NOMEM;
        }
        zBuffer = zNew;
      }
      memcpy(&zBuffer[nPrefix], zCsr, nSuffix);
      nBuffer = nPrefix + nSuffix;
      zCsr += nSuffix;
  
      /* Compare the term we are searching for with the term just loaded from
      ** the interior node. If the specified term is greater than or equal
      ** to the term from the interior node, then all terms on the sub-tree 
      ** headed by node iChild are smaller than zTerm. No need to search 
      ** iChild.
      **
      ** If the interior node term is larger than the specified term, then
      ** the tree headed by iChild may contain the specified term.
      */
      nMin = (nBuffer>nTerm ? nTerm : nBuffer);
      if( memcmp(zTerm, zBuffer, nMin)<0 ) break;
      iChild++;
    };

    /* If (iHeight==1), the children of this interior node are leaves. The
    ** specified term may be present on leaf node iChild.
    */












    if( iHeight==1 ){
      *piLeaf = iChild;



      break;




    }


    /* Descend to interior node iChild. */
    rc = fts3ReadBlock(p, iChild, &zCsr, &nBlock);
    if( rc!=SQLITE_OK ) break;
    zEnd = &zCsr[nBlock];
  }


  sqlite3_free(zBuffer);
  return rc;
}

static void fts3PutDeltaVarint(
  char **pp, 
  sqlite3_int64 *piPrev, 
  sqlite3_int64 iVal
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
      if( mergetype==MERGE_POS_NEAR ){
        ppPos = &p;
        aTmp = sqlite3_malloc(2*(n1+n2));
        if( !aTmp ){
          return SQLITE_NOMEM;
        }
      }
      (mergetype==MERGE_NEAR ? 0 : &p);

      while( p1 && p2 ){
        if( i1==i2 ){
          char *pSave = p;
          sqlite3_int64 iPrevSave = iPrev;
          fts3PutDeltaVarint(&p, &iPrev, i1);








<







1394
1395
1396
1397
1398
1399
1400

1401
1402
1403
1404
1405
1406
1407
      if( mergetype==MERGE_POS_NEAR ){
        ppPos = &p;
        aTmp = sqlite3_malloc(2*(n1+n2));
        if( !aTmp ){
          return SQLITE_NOMEM;
        }
      }


      while( p1 && p2 ){
        if( i1==i2 ){
          char *pSave = p;
          sqlite3_int64 iPrevSave = iPrev;
          fts3PutDeltaVarint(&p, &iPrev, i1);

1440
1441
1442
1443
1444
1445
1446




1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470

1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
      assert(!"Invalid mergetype value passed to fts3DoclistMerge()");
  }

  *pnBuffer = (p-aBuffer);
  return SQLITE_OK;
}





typedef struct TermSelect TermSelect;
struct TermSelect {
  char const *zTerm;
  int nTerm;
  int isPrefix;
  int isReqPos;
  char *aOutput;                  /* Malloc'd output buffer */
  int nOutput;                    /* Size of output in bytes */
};

static int fts3TermSelectCb(
  Fts3Table *p,
  void *pContext,
  char *zTerm,
  int nTerm,
  char *aDoclist,
  int nDoclist
){
  TermSelect *pTS = (TermSelect *)pContext;

  if( (pTS->nTerm==nTerm || (pTS->isPrefix && pTS->nTerm<nTerm))
   && 0==memcmp(zTerm, pTS->zTerm, pTS->nTerm) 
  ){
    int nNew = pTS->nOutput + nDoclist;

    char *aNew = sqlite3_malloc(nNew);
    if( !aNew ){
      return SQLITE_NOMEM;
    }

    if( pTS->nOutput==0 ){
      /* If this is the first term selected, copy the doclist to the output
      ** buffer using memcpy(). TODO: Add a way to transfer control of the
      ** aDoclist buffer from the caller so as to avoid the memcpy().
      */
      memcpy(aNew, aDoclist, nDoclist);
    }else{
      /* The output buffer is not empty. Merge doclist aDoclist with the
      ** existing output. This can only happen with prefix-searches (as
      ** searches for exact terms return exactly one doclist).
      */
      int mergetype = (pTS->isReqPos ? MERGE_POS_OR : MERGE_OR);
      assert( pTS->isPrefix );
      fts3DoclistMerge(mergetype, 0, 0,
          aNew, &nNew, pTS->aOutput, pTS->nOutput, aDoclist, nDoclist
      );
    }

    sqlite3_free(pTS->aOutput);
    pTS->aOutput = aNew;
    pTS->nOutput = nNew;
  }

  return SQLITE_OK;
}

/*
** This function retreives the doclist for the specified term (or term
** prefix) from the database. 







>
>
>
>


<
<
<






|
|






<
<
<
<
|
>
|
|
|
|

|
|
|
|
|
|
|
|
|
|
|
|
<
|
|
|
|

|
|
|
<







1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440



1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454




1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473

1474
1475
1476
1477
1478
1479
1480
1481

1482
1483
1484
1485
1486
1487
1488
      assert(!"Invalid mergetype value passed to fts3DoclistMerge()");
  }

  *pnBuffer = (p-aBuffer);
  return SQLITE_OK;
}

/* 
** A pointer to an instance of this structure is used as the context 
** argument to sqlite3Fts3SegReaderIterate()
*/
typedef struct TermSelect TermSelect;
struct TermSelect {



  int isReqPos;
  char *aOutput;                  /* Malloc'd output buffer */
  int nOutput;                    /* Size of output in bytes */
};

static int fts3TermSelectCb(
  Fts3Table *p,                   /* Virtual table object */
  void *pContext,                 /* Pointer to TermSelect structure */
  char *zTerm,
  int nTerm,
  char *aDoclist,
  int nDoclist
){
  TermSelect *pTS = (TermSelect *)pContext;




  int nNew = pTS->nOutput + nDoclist;

  char *aNew = sqlite3_malloc(nNew);
  if( !aNew ){
    return SQLITE_NOMEM;
  }

  if( pTS->nOutput==0 ){
    /* If this is the first term selected, copy the doclist to the output
    ** buffer using memcpy(). TODO: Add a way to transfer control of the
    ** aDoclist buffer from the caller so as to avoid the memcpy().
    */
    memcpy(aNew, aDoclist, nDoclist);
  }else{
    /* The output buffer is not empty. Merge doclist aDoclist with the
    ** existing output. This can only happen with prefix-searches (as
    ** searches for exact terms return exactly one doclist).
    */
    int mergetype = (pTS->isReqPos ? MERGE_POS_OR : MERGE_OR);

    fts3DoclistMerge(mergetype, 0, 0,
        aNew, &nNew, pTS->aOutput, pTS->nOutput, aDoclist, nDoclist
    );
  }

  sqlite3_free(pTS->aOutput);
  pTS->aOutput = aNew;
  pTS->nOutput = nNew;


  return SQLITE_OK;
}

/*
** This function retreives the doclist for the specified term (or term
** prefix) from the database. 
1518
1519
1520
1521
1522
1523
1524

1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
  int isPrefix,                   /* True for a prefix search */
  int isReqPos,                   /* True to include position lists in output */
  int *pnOut,                     /* OUT: Size of buffer at *ppOut */
  char **ppOut                    /* OUT: Malloced result buffer */
){
  int i;
  TermSelect tsc;

  Fts3SegReader **apSegment = 0;  /* Array of segments to read data from */
  int nSegment = 0;               /* Size of apSegment array */
  int nAlloc = 0;                 /* Allocated size of segment array */
  int rc;                         /* Return code */
  sqlite3_stmt *pStmt;            /* SQL statement to scan %_segdir table */
  int iAge = 0;                   /* Used to assign ages to segments */
  int flags;

  /* Loop through the entire %_segdir table. For each segment, create a
  ** Fts3SegReader to iterate through the subset of the segment leaves
  ** that may contain a term that matches zTerm/nTerm. For non-prefix
  ** searches, this is always a single leaf. For prefix searches, this
  ** may be a contiguous block of leaves.
  **







>






<







1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515

1516
1517
1518
1519
1520
1521
1522
  int isPrefix,                   /* True for a prefix search */
  int isReqPos,                   /* True to include position lists in output */
  int *pnOut,                     /* OUT: Size of buffer at *ppOut */
  char **ppOut                    /* OUT: Malloced result buffer */
){
  int i;
  TermSelect tsc;
  Fts3SegFilter filter;           /* Segment term filter configuration */
  Fts3SegReader **apSegment = 0;  /* Array of segments to read data from */
  int nSegment = 0;               /* Size of apSegment array */
  int nAlloc = 0;                 /* Allocated size of segment array */
  int rc;                         /* Return code */
  sqlite3_stmt *pStmt;            /* SQL statement to scan %_segdir table */
  int iAge = 0;                   /* Used to assign ages to segments */


  /* Loop through the entire %_segdir table. For each segment, create a
  ** Fts3SegReader to iterate through the subset of the segment leaves
  ** that may contain a term that matches zTerm/nTerm. For non-prefix
  ** searches, this is always a single leaf. For prefix searches, this
  ** may be a contiguous block of leaves.
  **
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
    if( sqlite3_column_int64(pStmt, 1)==0 ){
      /* The entire segment is stored on the root node (which must be a
      ** leaf). Do not bother inspecting any data in this case, just
      ** create a Fts3SegReader to scan the single leaf. 
      */
      rc = sqlite3Fts3SegReaderNew(p, iAge, 0, 0, 0, zRoot, nRoot, &pNew);
    }else{
      sqlite3_int64 i1, i2;
      rc = fts3SelectLeaves(p, zTerm, nTerm, isPrefix, zRoot, nRoot, &i1, &i2);
      if( rc==SQLITE_OK ){
        assert( i1 && i2 );
        rc = sqlite3Fts3SegReaderNew(p, iAge, i1, i2, 0, 0, 0, &pNew);
      }
    }
    iAge++;

    /* If a new Fts3SegReader was allocated, add it to the apSegment array. */
    assert( (rc==SQLITE_OK)==(pNew!=0) );







|
|

|







1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
    if( sqlite3_column_int64(pStmt, 1)==0 ){
      /* The entire segment is stored on the root node (which must be a
      ** leaf). Do not bother inspecting any data in this case, just
      ** create a Fts3SegReader to scan the single leaf. 
      */
      rc = sqlite3Fts3SegReaderNew(p, iAge, 0, 0, 0, zRoot, nRoot, &pNew);
    }else{
      sqlite3_int64 i1;
      rc = fts3SelectLeaf(p, zTerm, nTerm, zRoot, nRoot, &i1);
      if( rc==SQLITE_OK ){
        sqlite3_int64 i2 = sqlite3_column_int64(pStmt, 3);
        rc = sqlite3Fts3SegReaderNew(p, iAge, i1, i2, 0, 0, 0, &pNew);
      }
    }
    iAge++;

    /* If a new Fts3SegReader was allocated, add it to the apSegment array. */
    assert( (rc==SQLITE_OK)==(pNew!=0) );
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593

1594
1595



1596
1597
1598
1599
1600
1601
1602
1603
1604
  }
  if( rc!=SQLITE_DONE ){
    assert( rc!=SQLITE_OK );
    goto finished;
  }

  memset(&tsc, 0, sizeof(TermSelect));
  tsc.zTerm = zTerm;
  tsc.nTerm = nTerm;
  tsc.isPrefix = isPrefix;
  tsc.isReqPos = isReqPos;

  flags = FTS3_SEGMENT_IGNORE_EMPTY 

        | (isReqPos ? FTS3_SEGMENT_REQUIRE_POS : 0)
        | (iColumn<p->nColumn ? FTS3_SEGMENT_COLUMN_FILTER : 0);



  rc = sqlite3Fts3SegReaderIterate(p, apSegment, nSegment, flags,
      iColumn, fts3TermSelectCb, (void *)&tsc
  );

  if( rc==SQLITE_OK ){
    *ppOut = tsc.aOutput;
    *pnOut = tsc.nOutput;
  }else{
    sqlite3_free(tsc.aOutput);







<
<
<


|
>


>
>
>
|
|







1565
1566
1567
1568
1569
1570
1571



1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
  }
  if( rc!=SQLITE_DONE ){
    assert( rc!=SQLITE_OK );
    goto finished;
  }

  memset(&tsc, 0, sizeof(TermSelect));



  tsc.isReqPos = isReqPos;

  filter.flags = FTS3_SEGMENT_IGNORE_EMPTY 
        | (isPrefix ? FTS3_SEGMENT_PREFIX : 0)
        | (isReqPos ? FTS3_SEGMENT_REQUIRE_POS : 0)
        | (iColumn<p->nColumn ? FTS3_SEGMENT_COLUMN_FILTER : 0);
  filter.iCol = iColumn;
  filter.zTerm = zTerm;
  filter.nTerm = nTerm;
  rc = sqlite3Fts3SegReaderIterate(p, apSegment, nSegment, &filter,
      fts3TermSelectCb, (void *)&tsc
  );

  if( rc==SQLITE_OK ){
    *ppOut = tsc.aOutput;
    *pnOut = tsc.nOutput;
  }else{
    sqlite3_free(tsc.aOutput);
Changes to ext/fts3/fts3Int.h.
54
55
56
57
58
59
60

61
62
63
64
65
66
67
#define FTS3_VARINT_MAX 10

typedef struct Fts3Table Fts3Table;
typedef struct Fts3Cursor Fts3Cursor;
typedef struct Fts3Expr Fts3Expr;
typedef struct Fts3Phrase Fts3Phrase;
typedef struct Fts3SegReader Fts3SegReader;


/*
** A connection to a fulltext index is an instance of the following
** structure. The xCreate and xConnect methods create an instance
** of this structure and xDestroy and xDisconnect free that instance.
** All other methods receive a pointer to the structure as one of their
** arguments.







>







54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#define FTS3_VARINT_MAX 10

typedef struct Fts3Table Fts3Table;
typedef struct Fts3Cursor Fts3Cursor;
typedef struct Fts3Expr Fts3Expr;
typedef struct Fts3Phrase Fts3Phrase;
typedef struct Fts3SegReader Fts3SegReader;
typedef struct Fts3SegFilter Fts3SegFilter;

/*
** A connection to a fulltext index is an instance of the following
** structure. The xCreate and xConnect methods create an instance
** of this structure and xDestroy and xDisconnect free that instance.
** All other methods receive a pointer to the structure as one of their
** arguments.
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
/*
** A "phrase" is a sequence of one or more tokens that must match in
** sequence.  A single token is the base case and the most common case.
** For a sequence of tokens contained in "...", nToken will be the number
** of tokens in the string.
*/
struct Fts3Phrase {
  int nToken;          /* Number of tokens in the phrase */
  int iColumn;         /* Index of column this phrase must match */
  int isNot;           /* Phrase prefixed by unary not (-) operator */
  struct PhraseToken {
    char *z;              /* Text of the token */
    int n;                /* Number of bytes in buffer pointed to by z */
    int isPrefix;         /* True if token ends in with a "*" character */
  } aToken[1];         /* One entry for each token in the phrase */
};

/*
** A tree of these objects forms the RHS of a MATCH operator.
*/
struct Fts3Expr {
  int eType;                 /* One of the FTSQUERY_XXX values defined below */







|
|
|

|
|
|
|







120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
/*
** A "phrase" is a sequence of one or more tokens that must match in
** sequence.  A single token is the base case and the most common case.
** For a sequence of tokens contained in "...", nToken will be the number
** of tokens in the string.
*/
struct Fts3Phrase {
  int nToken;                /* Number of tokens in the phrase */
  int iColumn;               /* Index of column this phrase must match */
  int isNot;                 /* Phrase prefixed by unary not (-) operator */
  struct PhraseToken {
    char *z;                 /* Text of the token */
    int n;                   /* Number of bytes in buffer pointed to by z */
    int isPrefix;            /* True if token ends in with a "*" character */
  } aToken[1];               /* One entry for each token in the phrase */
};

/*
** A tree of these objects forms the RHS of a MATCH operator.
*/
struct Fts3Expr {
  int eType;                 /* One of the FTSQUERY_XXX values defined below */
174
175
176
177
178
179
180








181
182
183
184

185
186
187
188
189
190
191
192
193
void sqlite3Fts3PendingTermsClear(Fts3Table *);
int sqlite3Fts3Optimize(Fts3Table *);

/* Flags allowed as part of the 4th argument to SegmentReaderIterate() */
#define FTS3_SEGMENT_REQUIRE_POS   0x00000001
#define FTS3_SEGMENT_IGNORE_EMPTY  0x00000002
#define FTS3_SEGMENT_COLUMN_FILTER 0x00000004









int sqlite3Fts3SegReaderNew(Fts3Table *,int, sqlite3_int64,
  sqlite3_int64, sqlite3_int64, const char *, int, Fts3SegReader**);
void sqlite3Fts3SegReaderFree(Fts3SegReader *);

int sqlite3Fts3SegReaderIterate(
  Fts3Table *, Fts3SegReader **, int, int, int, 
  int (*)(Fts3Table *, void *, char *, int, char *, int),  void *
);

/* fts3.c */
int sqlite3Fts3PutVarint(char *, sqlite3_int64);
int sqlite3Fts3GetVarint(const char *, sqlite_int64 *);
int sqlite3Fts3GetVarint32(const char *, int *);







>
>
>
>
>
>
>
>




>

|







175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
void sqlite3Fts3PendingTermsClear(Fts3Table *);
int sqlite3Fts3Optimize(Fts3Table *);

/* Flags allowed as part of the 4th argument to SegmentReaderIterate() */
#define FTS3_SEGMENT_REQUIRE_POS   0x00000001
#define FTS3_SEGMENT_IGNORE_EMPTY  0x00000002
#define FTS3_SEGMENT_COLUMN_FILTER 0x00000004
#define FTS3_SEGMENT_PREFIX        0x00000008

struct Fts3SegFilter {
  const char *zTerm;
  int nTerm;
  int iCol;
  int flags;
};

int sqlite3Fts3SegReaderNew(Fts3Table *,int, sqlite3_int64,
  sqlite3_int64, sqlite3_int64, const char *, int, Fts3SegReader**);
void sqlite3Fts3SegReaderFree(Fts3SegReader *);

int sqlite3Fts3SegReaderIterate(
  Fts3Table *, Fts3SegReader **, int, Fts3SegFilter *,
  int (*)(Fts3Table *, void *, char *, int, char *, int),  void *
);

/* fts3.c */
int sqlite3Fts3PutVarint(char *, sqlite3_int64);
int sqlite3Fts3GetVarint(const char *, sqlite_int64 *);
int sqlite3Fts3GetVarint32(const char *, int *);
Changes to ext/fts3/fts3_write.c.
868
869
870
871
872
873
874



























875
876
877
878
879
880
881
    }else{
      rc = (pLhs->iDocid > pRhs->iDocid) ? 1 : -1;
    }
  }
  assert( pLhs->aNode && pRhs->aNode );
  return rc;
}




























/*
** Argument apSegment is an array of nSegment elements. It is known that
** the final (nSegment-nSuspect) members are already in sorted order
** (according to the comparison function provided). This function shuffles
** the array around until all entries are in sorted order.
*/







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
    }else{
      rc = (pLhs->iDocid > pRhs->iDocid) ? 1 : -1;
    }
  }
  assert( pLhs->aNode && pRhs->aNode );
  return rc;
}

/*
** Compare the term that the Fts3SegReader object passed as the first argument
** points to with the term specified by arguments zTerm and nTerm. 
**
** If the pSeg iterator is already at EOF, return 0. Otherwise, return
** -ve if the pSeg term is less than zTerm/nTerm, 0 if the two terms are
** equal, or +ve if the pSeg term is greater than zTerm/nTerm.
*/
static int fts3SegReaderTermCmp(
  Fts3SegReader *pSeg,            /* Segment reader object */
  const char *zTerm,              /* Term to compare to */
  int nTerm                       /* Size of term zTerm in bytes */
){
  int res = 0;
  if( pSeg->aNode ){
    if( pSeg->nTerm>nTerm ){
      res = memcmp(pSeg->zTerm, zTerm, nTerm);
    }else{
      res = memcmp(pSeg->zTerm, zTerm, pSeg->nTerm);
    }
    if( res==0 ){
      res = pSeg->nTerm-nTerm;
    }
  }
  return res;
}

/*
** Argument apSegment is an array of nSegment elements. It is known that
** the final (nSegment-nSuspect) members are already in sorted order
** (according to the comparison function provided). This function shuffles
** the array around until all entries are in sorted order.
*/
1423
1424
1425
1426
1427
1428
1429
1430
1431


1432
1433
1434
1435
1436
1437
1438
}

static void fts3ColumnFilter(int iCol, char **ppList, int *pnList){
  char *pList = *ppList;
  int nList = *pnList;
  char *pEnd = &pList[nList];
  int iCurrent = 0;

  char *p = pList;


  while( 1 ){
    char c = 0;
    while( p<pEnd && (c | *p)&0xFE ) c = *p++ & 0x80;
  
    if( iCol==iCurrent ){
      nList = (p - pList);
      break;







<

>
>







1450
1451
1452
1453
1454
1455
1456

1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
}

static void fts3ColumnFilter(int iCol, char **ppList, int *pnList){
  char *pList = *ppList;
  int nList = *pnList;
  char *pEnd = &pList[nList];
  int iCurrent = 0;

  char *p = pList;

  assert( iCol>=0 );
  while( 1 ){
    char c = 0;
    while( p<pEnd && (c | *p)&0xFE ) c = *p++ & 0x80;
  
    if( iCol==iCurrent ){
      nList = (p - pList);
      break;
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482



















1483
1484
1485
1486
1487
1488
















1489
1490
1491
1492
1493
1494
1495
  return fts3LeafAdd(p, ppW, 1, zTerm, nTerm, aDoclist, nDoclist);
}

int sqlite3Fts3SegReaderIterate(
  Fts3Table *p,                   /* Virtual table handle */
  Fts3SegReader **apSegment,      /* Array of Fts3SegReader objects */
  int nSegment,                   /* Size of apSegment array */
  int flags,                      /* Flags mask */
  int iCol,                       /* Column to filter for */
  int (*xFunc)(Fts3Table *, void *, char *, int, char *, int),  /* Callback */
  void *pContext                  /* Callback context (2nd argument) */
){
  int i;                          /* Iterator variable */
  char *aBuffer = 0;              /* Buffer to merge doclists in */
  int nAlloc = 0;                 /* Allocated size of aBuffer buffer */
  int rc = SQLITE_OK;             /* Return code */

  int isIgnoreEmpty  = (flags&FTS3_SEGMENT_IGNORE_EMPTY);
  int isRequirePos = (flags&FTS3_SEGMENT_REQUIRE_POS);
  int isColFilter = (flags&FTS3_SEGMENT_COLUMN_FILTER);




















  fts3SegReaderSort(apSegment, nSegment, nSegment, fts3SegReaderCmp);
  while( apSegment[0]->aNode ){
    int nTerm = apSegment[0]->nTerm;
    char *zTerm = apSegment[0]->zTerm;
    int nMerge = 1;

















    while( nMerge<nSegment 
        && apSegment[nMerge]->aNode
        && apSegment[nMerge]->nTerm==nTerm 
        && 0==memcmp(zTerm, apSegment[nMerge]->zTerm, nTerm)
    ){
      nMerge++;







|
<








|
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>






>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1491
1492
1493
1494
1495
1496
1497
1498

1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
  return fts3LeafAdd(p, ppW, 1, zTerm, nTerm, aDoclist, nDoclist);
}

int sqlite3Fts3SegReaderIterate(
  Fts3Table *p,                   /* Virtual table handle */
  Fts3SegReader **apSegment,      /* Array of Fts3SegReader objects */
  int nSegment,                   /* Size of apSegment array */
  Fts3SegFilter *pFilter,         /* Restrictions on range of iteration */

  int (*xFunc)(Fts3Table *, void *, char *, int, char *, int),  /* Callback */
  void *pContext                  /* Callback context (2nd argument) */
){
  int i;                          /* Iterator variable */
  char *aBuffer = 0;              /* Buffer to merge doclists in */
  int nAlloc = 0;                 /* Allocated size of aBuffer buffer */
  int rc = SQLITE_OK;             /* Return code */

  int isIgnoreEmpty =  (pFilter->flags & FTS3_SEGMENT_IGNORE_EMPTY);
  int isRequirePos =   (pFilter->flags & FTS3_SEGMENT_REQUIRE_POS);
  int isColFilter =    (pFilter->flags & FTS3_SEGMENT_COLUMN_FILTER);
  int isPrefix =       (pFilter->flags & FTS3_SEGMENT_PREFIX);

  /* If the Fts3SegFilter defines a specific term (or term prefix) to search 
  ** for, then advance each segment iterator until it points to a term of
  ** equal or greater value than the specified term. This prevents many
  ** unnecessary merge/sort operations for the case where single segment
  ** b-tree leaf nodes contain more than one term.
  */
  if( pFilter->zTerm ){
    int nTerm = pFilter->nTerm;
    char *zTerm = pFilter->zTerm;
    for(i=0; i<nSegment; i++){
      Fts3SegReader *pSeg = apSegment[i];
      while( fts3SegReaderTermCmp(pSeg, zTerm, nTerm)<0 ){
        rc = fts3SegReaderNext(pSeg);
        if( rc!=SQLITE_OK ) goto finished;
      }
    }
  }

  fts3SegReaderSort(apSegment, nSegment, nSegment, fts3SegReaderCmp);
  while( apSegment[0]->aNode ){
    int nTerm = apSegment[0]->nTerm;
    char *zTerm = apSegment[0]->zTerm;
    int nMerge = 1;

    /* If this is a prefix-search, and if the term that apSegment[0] points
    ** to does not share a suffix with pFilter->zTerm/nTerm, then all 
    ** required callbacks have been made. In this case exit early.
    **
    ** Similarly, if this is a search for an exact match, and the first term
    ** of segment apSegment[0] is not a match, exit early.
    */
    if( pFilter->zTerm ){
      if( nTerm<pFilter->nTerm 
       || (!isPrefix && nTerm>pFilter->nTerm)
       || memcmp(zTerm, pFilter->zTerm, pFilter->nTerm) 
    ){
        goto finished;
      }
    }

    while( nMerge<nSegment 
        && apSegment[nMerge]->aNode
        && apSegment[nMerge]->nTerm==nTerm 
        && 0==memcmp(zTerm, apSegment[nMerge]->zTerm, nTerm)
    ){
      nMerge++;
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
            && apSegment[j]->pOffsetList 
            && apSegment[j]->iDocid==iDocid 
        ){
          fts3SegReaderNextDocid(apSegment[j], 0, 0);
          j++;
        }

        assert( iCol>=0 || isColFilter==0 );
        if( isColFilter ){
          fts3ColumnFilter(iCol, &pList, &nList);
        }

        if( !isIgnoreEmpty || nList>0 ){
          nByte = sqlite3Fts3VarintLen(iDocid-iPrev) + (isRequirePos?nList+1:0);
          if( nDoclist+nByte>nAlloc ){
            char *aNew;
            nAlloc = nDoclist+nByte*2;







<

|







1585
1586
1587
1588
1589
1590
1591

1592
1593
1594
1595
1596
1597
1598
1599
1600
            && apSegment[j]->pOffsetList 
            && apSegment[j]->iDocid==iDocid 
        ){
          fts3SegReaderNextDocid(apSegment[j], 0, 0);
          j++;
        }


        if( isColFilter ){
          fts3ColumnFilter(pFilter->iCol, &pList, &nList);
        }

        if( !isIgnoreEmpty || nList>0 ){
          nByte = sqlite3Fts3VarintLen(iDocid-iPrev) + (isRequirePos?nList+1:0);
          if( nDoclist+nByte>nAlloc ){
            char *aNew;
            nAlloc = nDoclist+nByte*2;
1557
1558
1559
1560
1561
1562
1563








1564
1565
1566
1567
1568
1569
1570
      }

      if( nDoclist>0 ){
        rc = xFunc(p, pContext, zTerm, nTerm, aBuffer, nDoclist);
        if( rc!=SQLITE_OK ) goto finished;
      }
    }









    for(i=0; i<nMerge; i++){
      rc = fts3SegReaderNext(apSegment[i]);
      if( rc!=SQLITE_OK ) goto finished;
    }
    fts3SegReaderSort(apSegment, nSegment, nMerge, fts3SegReaderCmp);
  }







>
>
>
>
>
>
>
>







1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
      }

      if( nDoclist>0 ){
        rc = xFunc(p, pContext, zTerm, nTerm, aBuffer, nDoclist);
        if( rc!=SQLITE_OK ) goto finished;
      }
    }

    /* If there is a term specified to filter on, and this is not a prefix
    ** search, return now. The callback that corresponds to the required
    ** term (if such a term exists in the index) has already been made.
    */
    if( pFilter->zTerm && !isPrefix ){
      goto finished;
    }

    for(i=0; i<nMerge; i++){
      rc = fts3SegReaderNext(apSegment[i]);
      if( rc!=SQLITE_OK ) goto finished;
    }
    fts3SegReaderSort(apSegment, nSegment, nMerge, fts3SegReaderCmp);
  }
1590
1591
1592
1593
1594
1595
1596

1597
1598
1599
1600
1601
1602
1603
  int rc;                         /* Return code */
  int iIdx;                       /* Index of new segment */
  int iNewLevel;                  /* Level to create new segment at */
  sqlite3_stmt *pStmt;
  SegmentWriter *pWriter = 0;
  int nSegment = 0;               /* Number of segments being merged */
  Fts3SegReader **apSegment = 0;  /* Array of Segment iterators */


  if( iLevel<0 ){
    /* This call is to merge all segments in the database to a single
    ** segment. The level of the new segment is equal to the the numerically 
    ** greatest segment level currently present in the database. The index
    ** of the new segment is always 0.
    */







>







1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
  int rc;                         /* Return code */
  int iIdx;                       /* Index of new segment */
  int iNewLevel;                  /* Level to create new segment at */
  sqlite3_stmt *pStmt;
  SegmentWriter *pWriter = 0;
  int nSegment = 0;               /* Number of segments being merged */
  Fts3SegReader **apSegment = 0;  /* Array of Segment iterators */
  Fts3SegFilter filter;           /* Segment term filter condition */

  if( iLevel<0 ){
    /* This call is to merge all segments in the database to a single
    ** segment. The level of the new segment is equal to the the numerically 
    ** greatest segment level currently present in the database. The index
    ** of the new segment is always 0.
    */
1642
1643
1644
1645
1646
1647
1648



1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
      goto finished;
    }
  }
  rc = sqlite3_reset(pStmt);
  pStmt = 0;
  if( rc!=SQLITE_OK ) goto finished;




  rc = sqlite3Fts3SegReaderIterate(
      p, apSegment, nSegment, 
      (iLevel<0 ? FTS3_SEGMENT_IGNORE_EMPTY : 0)|FTS3_SEGMENT_REQUIRE_POS, 
      0, fts3MergeCallback, (void *)&pWriter
  );
  if( rc!=SQLITE_OK ) goto finished;

  rc = fts3DeleteSegdir(p, iLevel, apSegment, nSegment);
  if( rc==SQLITE_OK ){
    rc = fts3LeafWrite(p, pWriter, iNewLevel, iIdx);
  }







>
>
>
|
<
<
|







1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722


1723
1724
1725
1726
1727
1728
1729
1730
      goto finished;
    }
  }
  rc = sqlite3_reset(pStmt);
  pStmt = 0;
  if( rc!=SQLITE_OK ) goto finished;

  memset(&filter, 0, sizeof(Fts3SegFilter));
  filter.flags = FTS3_SEGMENT_REQUIRE_POS;
  filter.flags |= (iLevel<0 ? FTS3_SEGMENT_IGNORE_EMPTY : 0);
  rc = sqlite3Fts3SegReaderIterate(p, apSegment, nSegment,


      &filter, fts3MergeCallback, (void *)&pWriter
  );
  if( rc!=SQLITE_OK ) goto finished;

  rc = fts3DeleteSegdir(p, iLevel, apSegment, nSegment);
  if( rc==SQLITE_OK ){
    rc = fts3LeafWrite(p, pWriter, iNewLevel, iIdx);
  }
Changes to test/fts3malloc.test.
177
178
179
180
181
182
183
184
185
186
187

188
189
190

191
192
193
194







195
196
197
198
199
200
201
  CREATE VIRTUAL TABLE ft5 USING fts3(a, b, tokenize unknown)
} {unknown tokenizer: unknown}
do_write_test fts3_malloc-1.6 sqlite_master {
  CREATE VIRTUAL TABLE ft6 USING fts3(a, b, tokenize porter)
}

# Test the xConnect/xDisconnect methods:
db eval { ATTACH 'test2.db' AS aux }
do_write_test fts3_malloc-1.6 aux.sqlite_master {
  CREATE VIRTUAL TABLE aux.ft7 USING fts3(a, b, c);
}

do_write_test fts3_malloc-1.6 aux.sqlite_master {
  CREATE VIRTUAL TABLE aux.ft7 USING fts3(a, b, c);
}




do_test fts3_malloc-2.0 {







  execsql { CREATE VIRTUAL TABLE ft USING fts3(a, b) }
  for {set ii 1} {$ii < 32} {incr ii} {
    set a [list]
    set b [list]
    if {$ii & 0x01} {lappend a one   ; lappend b neung}
    if {$ii & 0x02} {lappend a two   ; lappend b song }
    if {$ii & 0x04} {lappend a three ; lappend b sahm }







|
|
|
<
>
|
|
<
>




>
>
>
>
>
>
>







177
178
179
180
181
182
183
184
185
186

187
188
189

190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
  CREATE VIRTUAL TABLE ft5 USING fts3(a, b, tokenize unknown)
} {unknown tokenizer: unknown}
do_write_test fts3_malloc-1.6 sqlite_master {
  CREATE VIRTUAL TABLE ft6 USING fts3(a, b, tokenize porter)
}

# Test the xConnect/xDisconnect methods:
#db eval { ATTACH 'test2.db' AS aux }
#do_write_test fts3_malloc-1.6 aux.sqlite_master {
#  CREATE VIRTUAL TABLE aux.ft7 USING fts3(a, b, c);

#}
#do_write_test fts3_malloc-1.6 aux.sqlite_master {
#  CREATE VIRTUAL TABLE aux.ft7 USING fts3(a, b, c);

#}



do_test fts3_malloc-2.0 {
  execsql { 
    DROP TABLE ft1;
    DROP TABLE ft2;
    DROP TABLE ft3;
    DROP TABLE ft4;
    DROP TABLE ft6;
  }
  execsql { CREATE VIRTUAL TABLE ft USING fts3(a, b) }
  for {set ii 1} {$ii < 32} {incr ii} {
    set a [list]
    set b [list]
    if {$ii & 0x01} {lappend a one   ; lappend b neung}
    if {$ii & 0x02} {lappend a two   ; lappend b song }
    if {$ii & 0x04} {lappend a three ; lappend b sahm }