/ Check-in [fffab4f7]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Update fts3 to use the onepass strategy for delete operations.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | vtab-onepass
Files: files | file ages | folders
SHA1:fffab4f70f85eeb2acbb89534064a6e397c39384
User & Date: dan 2015-09-28 15:23:29
Context
2015-09-28
20:03
Also allow UPDATE on virtual tables to use the onepass strategy. check-in: 1aa27d70 user: dan tags: vtab-onepass
15:23
Update fts3 to use the onepass strategy for delete operations. check-in: fffab4f7 user: dan tags: vtab-onepass
15:20
Changes to allow DELETE operations on virtual tables to use the onepass strategy under some circumstances. check-in: e73f919f user: dan tags: vtab-onepass
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3.c.

1512
1513
1514
1515
1516
1517
1518













1519
1520
1521
1522
1523
1524
1525
....
1602
1603
1604
1605
1606
1607
1608



1609
1610
1611
1612
1613
1614
1615
static void fts3SetEstimatedRows(sqlite3_index_info *pIdxInfo, i64 nRow){
#if SQLITE_VERSION_NUMBER>=3008002
  if( sqlite3_libversion_number()>=3008002 ){
    pIdxInfo->estimatedRows = nRow;
  }
#endif
}














/* 
** Implementation of the xBestIndex method for FTS3 tables. There
** are three possible strategies, in order of preference:
**
**   1. Direct lookup by rowid or docid. 
**   2. Full-text search using a MATCH operator on a non-docid column.
................................................................................
        case SQLITE_INDEX_CONSTRAINT_LE:
        case SQLITE_INDEX_CONSTRAINT_LT:
          iDocidLe = i;
          break;
      }
    }
  }




  iIdx = 1;
  if( iCons>=0 ){
    pInfo->aConstraintUsage[iCons].argvIndex = iIdx++;
    pInfo->aConstraintUsage[iCons].omit = 1;
  } 
  if( iLangidCons>=0 ){







>
>
>
>
>
>
>
>
>
>
>
>
>







 







>
>
>







1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
....
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
static void fts3SetEstimatedRows(sqlite3_index_info *pIdxInfo, i64 nRow){
#if SQLITE_VERSION_NUMBER>=3008002
  if( sqlite3_libversion_number()>=3008002 ){
    pIdxInfo->estimatedRows = nRow;
  }
#endif
}

/*
** Set the SQLITE_INDEX_SCAN_UNIQUE flag in pIdxInfo->flags. Unless this
** extension is currently being used by a version of SQLite too old to
** support index-info flags. In that case this function is a no-op.
*/
static void fts3SetUniqueFlag(sqlite3_index_info *pIdxInfo){
#if SQLITE_VERSION_NUMBER>=3008012
  if( sqlite3_libversion_number()>=3008012 ){
    pIdxInfo->flags |= SQLITE_INDEX_SCAN_UNIQUE;
  }
#endif
}

/* 
** Implementation of the xBestIndex method for FTS3 tables. There
** are three possible strategies, in order of preference:
**
**   1. Direct lookup by rowid or docid. 
**   2. Full-text search using a MATCH operator on a non-docid column.
................................................................................
        case SQLITE_INDEX_CONSTRAINT_LE:
        case SQLITE_INDEX_CONSTRAINT_LT:
          iDocidLe = i;
          break;
      }
    }
  }

  /* If using a docid=? or rowid=? strategy, set the UNIQUE flag. */
  if( pInfo->idxNum==FTS3_DOCID_SEARCH ) fts3SetUniqueFlag(pInfo);

  iIdx = 1;
  if( iCons>=0 ){
    pInfo->aConstraintUsage[iCons].argvIndex = iIdx++;
    pInfo->aConstraintUsage[iCons].omit = 1;
  } 
  if( iLangidCons>=0 ){

Changes to ext/fts3/fts3Int.h.

260
261
262
263
264
265
266

267
268
269
270
271
272
273
    int nPrefix;                  /* Prefix length (0 for main terms index) */
    Fts3Hash hPending;            /* Pending terms table for this index */
  } *aIndex;
  int nMaxPendingData;            /* Max pending data before flush to disk */
  int nPendingData;               /* Current bytes of pending data */
  sqlite_int64 iPrevDocid;        /* Docid of most recently inserted document */
  int iPrevLangid;                /* Langid of recently inserted document */


#if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST)
  /* State variables used for validating that the transaction control
  ** methods of the virtual table are called at appropriate times.  These
  ** values do not contribute to FTS functionality; they are used for
  ** verifying the operation of the SQLite core.
  */







>







260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
    int nPrefix;                  /* Prefix length (0 for main terms index) */
    Fts3Hash hPending;            /* Pending terms table for this index */
  } *aIndex;
  int nMaxPendingData;            /* Max pending data before flush to disk */
  int nPendingData;               /* Current bytes of pending data */
  sqlite_int64 iPrevDocid;        /* Docid of most recently inserted document */
  int iPrevLangid;                /* Langid of recently inserted document */
  int bPrevDelete;                /* True if last operation was a delete */

#if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST)
  /* State variables used for validating that the transaction control
  ** methods of the virtual table are called at appropriate times.  These
  ** values do not contribute to FTS functionality; they are used for
  ** verifying the operation of the SQLite core.
  */

Changes to ext/fts3/fts3_write.c.

856
857
858
859
860
861
862

863
864
865
866

867
868
869
870
871
872
873
874

875
876
877
878
879
880
881
882

883
884
885
886
887
888
889
....
1065
1066
1067
1068
1069
1070
1071
1072

1073
1074
1075
1076
1077
1078
1079
....
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
....
5613
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623
5624
5625
5626
5627
/* 
** Calling this function indicates that subsequent calls to 
** fts3PendingTermsAdd() are to add term/position-list pairs for the
** contents of the document with docid iDocid.
*/
static int fts3PendingTermsDocid(
  Fts3Table *p,                   /* Full-text table handle */

  int iLangid,                    /* Language id of row being written */
  sqlite_int64 iDocid             /* Docid of row being written */
){
  assert( iLangid>=0 );


  /* TODO(shess) Explore whether partially flushing the buffer on
  ** forced-flush would provide better performance.  I suspect that if
  ** we ordered the doclists by size and flushed the largest until the
  ** buffer was half empty, that would let the less frequent terms
  ** generate longer doclists.
  */
  if( iDocid<=p->iPrevDocid 

   || p->iPrevLangid!=iLangid
   || p->nPendingData>p->nMaxPendingData 
  ){
    int rc = sqlite3Fts3PendingTermsFlush(p);
    if( rc!=SQLITE_OK ) return rc;
  }
  p->iPrevDocid = iDocid;
  p->iPrevLangid = iLangid;

  return SQLITE_OK;
}

/*
** Discard the contents of the pending-terms hash tables. 
*/
void sqlite3Fts3PendingTermsClear(Fts3Table *p){
................................................................................
  assert( *pbFound==0 );
  if( *pRC ) return;
  rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pSelect, &pRowid);
  if( rc==SQLITE_OK ){
    if( SQLITE_ROW==sqlite3_step(pSelect) ){
      int i;
      int iLangid = langidFromSelect(p, pSelect);
      rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pSelect, 0));

      for(i=1; rc==SQLITE_OK && i<=p->nColumn; i++){
        int iCol = i-1;
        if( p->abNotindexed[iCol]==0 ){
          const char *zText = (const char *)sqlite3_column_text(pSelect, i);
          rc = fts3PendingTermsAdd(p, iLangid, zText, -1, &aSz[iCol]);
          aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i);
        }
................................................................................
        aSzDel = &aSzIns[p->nColumn+1];
      }
    }

    while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
      int iCol;
      int iLangid = langidFromSelect(p, pStmt);
      rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pStmt, 0));
      memset(aSz, 0, sizeof(aSz[0]) * (p->nColumn+1));
      for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){
        if( p->abNotindexed[iCol]==0 ){
          const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1);
          rc = fts3PendingTermsAdd(p, iLangid, z, iCol, &aSz[iCol]);
          aSz[p->nColumn] += sqlite3_column_bytes(pStmt, iCol+1);
        }
................................................................................
    if( bInsertDone==0 ){
      rc = fts3InsertData(p, apVal, pRowid);
      if( rc==SQLITE_CONSTRAINT && p->zContentTbl==0 ){
        rc = FTS_CORRUPT_VTAB;
      }
    }
    if( rc==SQLITE_OK && (!isRemove || *pRowid!=p->iPrevDocid ) ){
      rc = fts3PendingTermsDocid(p, iLangid, *pRowid);
    }
    if( rc==SQLITE_OK ){
      assert( p->iPrevDocid==*pRowid );
      rc = fts3InsertTerms(p, iLangid, apVal, aSzIns);
    }
    if( p->bHasDocsize ){
      fts3InsertDocsize(&rc, p, aSzIns);







>




>







|
>








>







 







|
>







 







|







 







|







856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
....
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
....
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
....
5618
5619
5620
5621
5622
5623
5624
5625
5626
5627
5628
5629
5630
5631
5632
/* 
** Calling this function indicates that subsequent calls to 
** fts3PendingTermsAdd() are to add term/position-list pairs for the
** contents of the document with docid iDocid.
*/
static int fts3PendingTermsDocid(
  Fts3Table *p,                   /* Full-text table handle */
  int bDelete,                    /* True if this op is a delete */
  int iLangid,                    /* Language id of row being written */
  sqlite_int64 iDocid             /* Docid of row being written */
){
  assert( iLangid>=0 );
  assert( bDelete==1 || bDelete==0 );

  /* TODO(shess) Explore whether partially flushing the buffer on
  ** forced-flush would provide better performance.  I suspect that if
  ** we ordered the doclists by size and flushed the largest until the
  ** buffer was half empty, that would let the less frequent terms
  ** generate longer doclists.
  */
  if( iDocid<p->iPrevDocid 
   || (iDocid==p->iPrevLangid && p->bPrevDelete==0)
   || p->iPrevLangid!=iLangid
   || p->nPendingData>p->nMaxPendingData 
  ){
    int rc = sqlite3Fts3PendingTermsFlush(p);
    if( rc!=SQLITE_OK ) return rc;
  }
  p->iPrevDocid = iDocid;
  p->iPrevLangid = iLangid;
  p->bPrevDelete = bDelete;
  return SQLITE_OK;
}

/*
** Discard the contents of the pending-terms hash tables. 
*/
void sqlite3Fts3PendingTermsClear(Fts3Table *p){
................................................................................
  assert( *pbFound==0 );
  if( *pRC ) return;
  rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pSelect, &pRowid);
  if( rc==SQLITE_OK ){
    if( SQLITE_ROW==sqlite3_step(pSelect) ){
      int i;
      int iLangid = langidFromSelect(p, pSelect);
      i64 iDocid = sqlite3_column_int64(pSelect, 0);
      rc = fts3PendingTermsDocid(p, 1, iLangid, iDocid);
      for(i=1; rc==SQLITE_OK && i<=p->nColumn; i++){
        int iCol = i-1;
        if( p->abNotindexed[iCol]==0 ){
          const char *zText = (const char *)sqlite3_column_text(pSelect, i);
          rc = fts3PendingTermsAdd(p, iLangid, zText, -1, &aSz[iCol]);
          aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i);
        }
................................................................................
        aSzDel = &aSzIns[p->nColumn+1];
      }
    }

    while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
      int iCol;
      int iLangid = langidFromSelect(p, pStmt);
      rc = fts3PendingTermsDocid(p, 0, iLangid, sqlite3_column_int64(pStmt, 0));
      memset(aSz, 0, sizeof(aSz[0]) * (p->nColumn+1));
      for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){
        if( p->abNotindexed[iCol]==0 ){
          const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1);
          rc = fts3PendingTermsAdd(p, iLangid, z, iCol, &aSz[iCol]);
          aSz[p->nColumn] += sqlite3_column_bytes(pStmt, iCol+1);
        }
................................................................................
    if( bInsertDone==0 ){
      rc = fts3InsertData(p, apVal, pRowid);
      if( rc==SQLITE_CONSTRAINT && p->zContentTbl==0 ){
        rc = FTS_CORRUPT_VTAB;
      }
    }
    if( rc==SQLITE_OK && (!isRemove || *pRowid!=p->iPrevDocid ) ){
      rc = fts3PendingTermsDocid(p, 0, iLangid, *pRowid);
    }
    if( rc==SQLITE_OK ){
      assert( p->iPrevDocid==*pRowid );
      rc = fts3InsertTerms(p, iLangid, apVal, aSzIns);
    }
    if( p->bHasDocsize ){
      fts3InsertDocsize(&rc, p, aSzIns);

Changes to test/fts4growth.test.

198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
} {0}
do_test 3.1.3 {
  db transaction { 
    insert_doc 1 2 3 4 5 6 7 8 9
    delete_doc 9 8 7
  }
  execsql { SELECT level, idx, second(end_block) FROM x3_segdir }
} {0 0 591 0 1 65 0 2 72 0 3 76}
do_test 3.1.4 {
  execsql { INSERT INTO x3(x3) VALUES('optimize') }
  execsql { SELECT level, idx, second(end_block) FROM x3_segdir }
} {0 0 412}

do_test 3.2.1 {
  execsql { DELETE FROM x3 }
  insert_doc 8 7 6 5 4 3 2 1
  delete_doc 7 8
  execsql { SELECT count(*) FROM x3_segdir }
} {10}







|



|







198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
} {0}
do_test 3.1.3 {
  db transaction { 
    insert_doc 1 2 3 4 5 6 7 8 9
    delete_doc 9 8 7
  }
  execsql { SELECT level, idx, second(end_block) FROM x3_segdir }
} {0 0 591 0 1 72 0 2 76}
do_test 3.1.4 {
  execsql { INSERT INTO x3(x3) VALUES('optimize') }
  execsql { SELECT level, idx, second(end_block) FROM x3_segdir }
} {0 0 463}

do_test 3.2.1 {
  execsql { DELETE FROM x3 }
  insert_doc 8 7 6 5 4 3 2 1
  delete_doc 7 8
  execsql { SELECT count(*) FROM x3_segdir }
} {10}