SQLite

Check-in [757fa22400]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Implemented UPDATE for full-text tables.

We handle an UPDATE to a row by performing an UPDATE on the content table and by building new position lists for each term which appears in either the old or new versions of the row. We write these position lists all at once; this is presumably more efficient than a delete followed by an insert (which would first write empty position lists, then new position lists). (CVS 3434)

Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 757fa22400b363212b4d5f648bdc9fcbd9a7f152
User & Date: adamd 2006-09-22 00:06:39.000
Context
2006-09-22
23:38
Fix a build problem around sqlite3_overload_function. Only affects so/dll builds. (CVS 3435) (check-in: 791d70936b user: shess tags: trunk)
00:06
Implemented UPDATE for full-text tables.

We handle an UPDATE to a row by performing an UPDATE on the content table and by building new position lists for each term which appears in either the old or new versions of the row. We write these position lists all at once; this is presumably more efficient than a delete followed by an insert (which would first write empty position lists, then new position lists). (CVS 3434) (check-in: 757fa22400 user: adamd tags: trunk)

2006-09-21
20:56
When gathering a doclist for querying, don't discard empty position lists until the end; this allows empty position lists to override non-empty lists encountered later in the gathering process. This fixes #1982, which was caused by the fact that for all-column queries we weren't discarding empty position lists at all. (CVS 3433) (check-in: 111ca61671 user: adamd tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts1/fts1.c.
967
968
969
970
971
972
973

974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993

994
995
996
997
998
999
1000
** in the decision may argue for a larger value.
*/
#define CHUNK_MAX 256

typedef enum fulltext_statement {
  CONTENT_INSERT_STMT,
  CONTENT_SELECT_STMT,

  CONTENT_DELETE_STMT,

  TERM_SELECT_STMT,
  TERM_SELECT_ALL_STMT,
  TERM_INSERT_STMT,
  TERM_UPDATE_STMT,
  TERM_DELETE_STMT,

  MAX_STMT                     /* Always at end! */
} fulltext_statement;

/* These must exactly match the enum above. */
/* TODO(adam): Is there some risk that a statement (in particular,
** pTermSelectStmt) will be used in two cursors at once, e.g.  if a
** query joins a virtual table to itself?  If so perhaps we should
** move some of these to the cursor object.
*/
static const char *const fulltext_zStatement[MAX_STMT] = {
  /* CONTENT_INSERT */ NULL,  /* generated in contentInsertStatement() */
  /* CONTENT_SELECT */ "select * from %_content where rowid = ?",

  /* CONTENT_DELETE */ "delete from %_content where rowid = ?",

  /* TERM_SELECT */
  "select rowid, doclist from %_term where term = ? and segment = ?",
  /* TERM_SELECT_ALL */
  "select doclist from %_term where term = ? order by segment",
  /* TERM_INSERT */







>




















>







967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
** in the decision may argue for a larger value.
*/
#define CHUNK_MAX 256

typedef enum fulltext_statement {
  CONTENT_INSERT_STMT,
  CONTENT_SELECT_STMT,
  CONTENT_UPDATE_STMT,
  CONTENT_DELETE_STMT,

  TERM_SELECT_STMT,
  TERM_SELECT_ALL_STMT,
  TERM_INSERT_STMT,
  TERM_UPDATE_STMT,
  TERM_DELETE_STMT,

  MAX_STMT                     /* Always at end! */
} fulltext_statement;

/* These must exactly match the enum above. */
/* TODO(adam): Is there some risk that a statement (in particular,
** pTermSelectStmt) will be used in two cursors at once, e.g.  if a
** query joins a virtual table to itself?  If so perhaps we should
** move some of these to the cursor object.
*/
static const char *const fulltext_zStatement[MAX_STMT] = {
  /* CONTENT_INSERT */ NULL,  /* generated in contentInsertStatement() */
  /* CONTENT_SELECT */ "select * from %_content where rowid = ?",
  /* CONTENT_UPDATE */ NULL,  /* generated in contentUpdateStatement() */
  /* CONTENT_DELETE */ "delete from %_content where rowid = ?",

  /* TERM_SELECT */
  "select rowid, doclist from %_term where term = ? and segment = ?",
  /* TERM_SELECT_ALL */
  "select doclist from %_term where term = ? order by segment",
  /* TERM_INSERT */
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
*/
struct fulltext_vtab {
  sqlite3_vtab base;               /* Base class used by SQLite core */
  sqlite3 *db;                     /* The database connection */
  const char *zName;               /* virtual table name */
  int nColumn;                     /* number of columns in virtual table */
  char **azColumn;                 /* column names.  malloced */
  char *zColumnList;               /* comma-separate list of column names */
  sqlite3_tokenizer *pTokenizer;   /* tokenizer for inserts and queries */

  /* Precompiled statements which we keep as long as the table is
  ** open.
  */
  sqlite3_stmt *pFulltextStatements[MAX_STMT];
};







|







1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
*/
struct fulltext_vtab {
  sqlite3_vtab base;               /* Base class used by SQLite core */
  sqlite3 *db;                     /* The database connection */
  const char *zName;               /* virtual table name */
  int nColumn;                     /* number of columns in virtual table */
  char **azColumn;                 /* column names.  malloced */
  char **azContentColumn;          /* column names in content table; malloced */
  sqlite3_tokenizer *pTokenizer;   /* tokenizer for inserts and queries */

  /* Precompiled statements which we keep as long as the table is
  ** open.
  */
  sqlite3_stmt *pFulltextStatements[MAX_STMT];
};
1042
1043
1044
1045
1046
1047
1048









1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065





















1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076


1077



1078

1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
} fulltext_cursor;

static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){
  return (fulltext_vtab *) c->base.pVtab;
}

static const sqlite3_module fulltextModule;   /* forward declaration */










/* Return a dynamically generated statement of the form
 *   insert into %_content (rowid, ...) values (?, ...)
 */
static const char *contentInsertStatement(fulltext_vtab *v){
  StringBuffer sb;
  int i;

  initStringBuffer(&sb);
  append(&sb, "insert into %_content (rowid, ");
  append(&sb, v->zColumnList);
  append(&sb, ") values (?");
  for(i=0; i<v->nColumn; ++i)
    append(&sb, ", ?");
  append(&sb, ")");
  return sb.s;
}






















/* Puts a freshly-prepared statement determined by iStmt in *ppStmt.
** If the indicated statement has never been prepared, it is prepared
** and cached, otherwise the cached version is reset.
*/
static int sql_get_statement(fulltext_vtab *v, fulltext_statement iStmt,
                             sqlite3_stmt **ppStmt){
  assert( iStmt<MAX_STMT );
  if( v->pFulltextStatements[iStmt]==NULL ){
    const char *zStmt;
    int rc;


    zStmt = iStmt==CONTENT_INSERT_STMT ? contentInsertStatement(v) : 



                                         fulltext_zStatement[iStmt];

    rc = sql_prepare(v->db, v->zName, &v->pFulltextStatements[iStmt],
                         zStmt);
    if( iStmt==CONTENT_INSERT_STMT ) free((void *) zStmt);
    if( rc!=SQLITE_OK ) return rc;
  } else {
    int rc = sqlite3_reset(v->pFulltextStatements[iStmt]);
    if( rc!=SQLITE_OK ) return rc;
  }

  *ppStmt = v->pFulltextStatements[iStmt];







>
>
>
>
>
>
>
>
>










|






>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>











>
>
|
>
>
>
|
>


|







1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
} fulltext_cursor;

static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){
  return (fulltext_vtab *) c->base.pVtab;
}

static const sqlite3_module fulltextModule;   /* forward declaration */

/* Append a list of strings separated by commas to a StringBuffer. */
static void appendList(StringBuffer *sb, int nString, char **azString){
  int i;
  for(i=0; i<nString; ++i){
    if( i>0 ) append(sb, ", ");
    append(sb, azString[i]);
  }
}

/* Return a dynamically generated statement of the form
 *   insert into %_content (rowid, ...) values (?, ...)
 */
static const char *contentInsertStatement(fulltext_vtab *v){
  StringBuffer sb;
  int i;

  initStringBuffer(&sb);
  append(&sb, "insert into %_content (rowid, ");
  appendList(&sb, v->nColumn, v->azContentColumn);
  append(&sb, ") values (?");
  for(i=0; i<v->nColumn; ++i)
    append(&sb, ", ?");
  append(&sb, ")");
  return sb.s;
}

/* Return a dynamically generated statement of the form
 *   update %_content set [col_0] = ?, [col_1] = ?, ...
 *                    where rowid = ?
 */
static const char *contentUpdateStatement(fulltext_vtab *v){
  StringBuffer sb;
  int i;

  initStringBuffer(&sb);
  append(&sb, "update %_content set ");
  for(i=0; i<v->nColumn; ++i) {
    if( i>0 ){
      append(&sb, ", ");
    }
    append(&sb, v->azContentColumn[i]);
    append(&sb, " = ?");
  }
  append(&sb, " where rowid = ?");
  return sb.s;
}

/* Puts a freshly-prepared statement determined by iStmt in *ppStmt.
** If the indicated statement has never been prepared, it is prepared
** and cached, otherwise the cached version is reset.
*/
static int sql_get_statement(fulltext_vtab *v, fulltext_statement iStmt,
                             sqlite3_stmt **ppStmt){
  assert( iStmt<MAX_STMT );
  if( v->pFulltextStatements[iStmt]==NULL ){
    const char *zStmt;
    int rc;
    switch( iStmt ){
      case CONTENT_INSERT_STMT:
        zStmt = contentInsertStatement(v); break;
      case CONTENT_UPDATE_STMT:
        zStmt = contentUpdateStatement(v); break;
      default:
        zStmt = fulltext_zStatement[iStmt];
    }
    rc = sql_prepare(v->db, v->zName, &v->pFulltextStatements[iStmt],
                         zStmt);
    if( zStmt != fulltext_zStatement[iStmt]) free((void *) zStmt);
    if( rc!=SQLITE_OK ) return rc;
  } else {
    int rc = sqlite3_reset(v->pFulltextStatements[iStmt]);
    if( rc!=SQLITE_OK ) return rc;
  }

  *ppStmt = v->pFulltextStatements[iStmt];
1154
1155
1156
1157
1158
1159
1160




















1161
1162
1163
1164
1165
1166
1167
  for(i=0; i<v->nColumn; ++i){
    rc = sqlite3_bind_value(s, 2+i, pValues[i]);
    if( rc!=SQLITE_OK ) return rc;
  }

  return sql_single_step_statement(v, CONTENT_INSERT_STMT, &s);
}





















void freeStringArray(int nString, const char **pString){
  int i;

  for (i=0 ; i < nString ; ++i) {
    free((void *) pString[i]);
  }







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
  for(i=0; i<v->nColumn; ++i){
    rc = sqlite3_bind_value(s, 2+i, pValues[i]);
    if( rc!=SQLITE_OK ) return rc;
  }

  return sql_single_step_statement(v, CONTENT_INSERT_STMT, &s);
}

/* update %_content set col0 = pValues[0], col1 = pValues[1], ...
 *                  where rowid = [iRowid] */
static int content_update(fulltext_vtab *v, sqlite3_value **pValues,
                          sqlite_int64 iRowid){
  sqlite3_stmt *s;
  int i;
  int rc = sql_get_statement(v, CONTENT_UPDATE_STMT, &s);
  if( rc!=SQLITE_OK ) return rc;

  for(i=0; i<v->nColumn; ++i){
    rc = sqlite3_bind_value(s, 1+i, pValues[i]);
    if( rc!=SQLITE_OK ) return rc;
  }

  rc = sqlite3_bind_int64(s, 1+v->nColumn, iRowid);
  if( rc!=SQLITE_OK ) return rc;

  return sql_single_step_statement(v, CONTENT_UPDATE_STMT, &s);
}

void freeStringArray(int nString, const char **pString){
  int i;

  for (i=0 ; i < nString ; ++i) {
    free((void *) pString[i]);
  }
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394



1395
1396
1397
1398
1399
1400
1401
1402
  return sql_single_step_statement(v, TERM_DELETE_STMT, &s);
}

/*
** Free the memory used to contain a fulltext_vtab structure.
*/
static void fulltext_vtab_destroy(fulltext_vtab *v){
  int iStmt;

  TRACE(("FTS1 Destroy %p\n", v));
  for( iStmt=0; iStmt<MAX_STMT; iStmt++ ){
    if( v->pFulltextStatements[iStmt]!=NULL ){
      sqlite3_finalize(v->pFulltextStatements[iStmt]);
      v->pFulltextStatements[iStmt] = NULL;
    }
  }

  if( v->pTokenizer!=NULL ){
    v->pTokenizer->pModule->xDestroy(v->pTokenizer);
    v->pTokenizer = NULL;
  }
  
  free(v->azColumn);



  free(v->zColumnList);
  free(v);
}

/*
** Token types for parsing the arguments to xConnect or xCreate.
*/
#define TOKEN_EOF         0    /* End of file */







|















>
>
>
|







1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
  return sql_single_step_statement(v, TERM_DELETE_STMT, &s);
}

/*
** Free the memory used to contain a fulltext_vtab structure.
*/
static void fulltext_vtab_destroy(fulltext_vtab *v){
  int iStmt, i;

  TRACE(("FTS1 Destroy %p\n", v));
  for( iStmt=0; iStmt<MAX_STMT; iStmt++ ){
    if( v->pFulltextStatements[iStmt]!=NULL ){
      sqlite3_finalize(v->pFulltextStatements[iStmt]);
      v->pFulltextStatements[iStmt] = NULL;
    }
  }

  if( v->pTokenizer!=NULL ){
    v->pTokenizer->pModule->xDestroy(v->pTokenizer);
    v->pTokenizer = NULL;
  }
  
  free(v->azColumn);
  for(i = 0; i < v->nColumn; ++i) {
    sqlite3_free(v->azContentColumn[i]);
  }
  free(v->azContentColumn);
  free(v);
}

/*
** Token types for parsing the arguments to xConnect or xCreate.
*/
#define TOKEN_EOF         0    /* End of file */
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
  while( *t ){
    if( tolower(*s++)!=tolower(*t++) ) return 0;
  }
  return *s!='_' && !isalnum(*s);
}

/*
** An instance of this structure defines the "spec" of a the
** full text index.  This structure is populated by parseSpec
** and use by fulltextConnect and fulltextCreate.
*/
typedef struct TableSpec {
  const char *zName;       /* Name of the full-text index */
  int nColumn;             /* Number of columns to be indexed */
  char **azColumn;         /* Original names of columns to be indexed */
  char *zColumnList;       /* Comma-separated list of names for %_content */
  char **azTokenizer;      /* Name of tokenizer and its arguments */
} TableSpec;

/*
** Reclaim all of the memory used by a TableSpec
*/
void clearTableSpec(TableSpec *p) {
  free(p->azColumn);
  free(p->zColumnList);
  free(p->azTokenizer);
}

/* Parse a CREATE VIRTUAL TABLE statement, which looks like this:
 *
 * CREATE VIRTUAL TABLE email
 *        USING fts1(subject, body, tokenize mytokenizer(myarg))







|







|








|







1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
  while( *t ){
    if( tolower(*s++)!=tolower(*t++) ) return 0;
  }
  return *s!='_' && !isalnum(*s);
}

/*
** An instance of this structure defines the "spec" of a
** full text index.  This structure is populated by parseSpec
** and use by fulltextConnect and fulltextCreate.
*/
typedef struct TableSpec {
  const char *zName;       /* Name of the full-text index */
  int nColumn;             /* Number of columns to be indexed */
  char **azColumn;         /* Original names of columns to be indexed */
  char **azContentColumn;  /* Column names for %_content */
  char **azTokenizer;      /* Name of tokenizer and its arguments */
} TableSpec;

/*
** Reclaim all of the memory used by a TableSpec
*/
void clearTableSpec(TableSpec *p) {
  free(p->azColumn);
  free(p->azContentColumn);
  free(p->azTokenizer);
}

/* Parse a CREATE VIRTUAL TABLE statement, which looks like this:
 *
 * CREATE VIRTUAL TABLE email
 *        USING fts1(subject, body, tokenize mytokenizer(myarg))
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758

1759
1760

1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
  /* Identify the column names and the tokenizer and delimiter arguments
  ** in the argv[][] array.
  */
  pSpec->zName = azArg[2];
  pSpec->nColumn = 0;
  pSpec->azColumn = azArg;
  zTokenizer = "tokenize simple";
  n = 0;
  for(i=3, j=0; i<argc; ++i){
    if( startsWith(azArg[i],"tokenize") ){
      zTokenizer = azArg[i];
    }else{
      z = azArg[pSpec->nColumn] = firstToken(azArg[i], &zDummy);
      pSpec->nColumn++;
      n += strlen(z) + 6;
    }
  }
  if( pSpec->nColumn==0 ){
    azArg[0] = "content";
    pSpec->nColumn = 1;
  }

  /*
  ** Construct the comma-separated list of column names.
  **
  ** Each column name will be of the form cNNAAAA
  ** where NN is the column number and AAAA is the sanitized
  ** column name.  "sanitized" means that special characters are
  ** converted to "_".  The cNN prefix guarantees that all column
  ** names are unique.
  **
  ** The AAAA suffix is not strictly necessary.  It is included
  ** for the convenience of people who might examine the generated
  ** %_content table and wonder what the columns are used for.
  */
  z = pSpec->zColumnList = malloc( n );
  if( z==0 ){
    clearTableSpec(pSpec);
    return SQLITE_NOMEM;
  }
  for(i=0; i<pSpec->nColumn; i++){

    sqlite3_snprintf(n, z, "c%d%s", i, azArg[i]);
    for(j=0; z[j]; j++){

      if( !isalnum(z[j]) ) z[j] = '_';
    }
    z[j] = ',';
    z += j+1;
  }
  z[-1] = 0;

  /*
  ** Parse the tokenizer specification string.
  */
  pSpec->azTokenizer = tokenizeString(zTokenizer, &n);
  tokenListToIdList(pSpec->azTokenizer);








<






<








|

|









|
|




>
|
<
>
|

<
<

<







1779
1780
1781
1782
1783
1784
1785

1786
1787
1788
1789
1790
1791

1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819

1820
1821
1822


1823

1824
1825
1826
1827
1828
1829
1830
  /* Identify the column names and the tokenizer and delimiter arguments
  ** in the argv[][] array.
  */
  pSpec->zName = azArg[2];
  pSpec->nColumn = 0;
  pSpec->azColumn = azArg;
  zTokenizer = "tokenize simple";

  for(i=3, j=0; i<argc; ++i){
    if( startsWith(azArg[i],"tokenize") ){
      zTokenizer = azArg[i];
    }else{
      z = azArg[pSpec->nColumn] = firstToken(azArg[i], &zDummy);
      pSpec->nColumn++;

    }
  }
  if( pSpec->nColumn==0 ){
    azArg[0] = "content";
    pSpec->nColumn = 1;
  }

  /*
  ** Construct the list of content column names.
  **
  ** Each content column name will be of the form cNNAAAA
  ** where NN is the column number and AAAA is the sanitized
  ** column name.  "sanitized" means that special characters are
  ** converted to "_".  The cNN prefix guarantees that all column
  ** names are unique.
  **
  ** The AAAA suffix is not strictly necessary.  It is included
  ** for the convenience of people who might examine the generated
  ** %_content table and wonder what the columns are used for.
  */
  pSpec->azContentColumn = malloc( pSpec->nColumn * sizeof(char *) );
  if( pSpec->azContentColumn==0 ){
    clearTableSpec(pSpec);
    return SQLITE_NOMEM;
  }
  for(i=0; i<pSpec->nColumn; i++){
    char *p;
    pSpec->azContentColumn[i] = sqlite3_mprintf("c%d%s", i, azArg[i]);

    for (p = pSpec->azContentColumn[i]; *p ; ++p) {
      if( !isalnum(*p) ) *p = '_';
    }


  }


  /*
  ** Parse the tokenizer specification string.
  */
  pSpec->azTokenizer = tokenizeString(zTokenizer, &n);
  tokenListToIdList(pSpec->azTokenizer);

1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
  v = (fulltext_vtab *) malloc(sizeof(fulltext_vtab));
  if( v==0 ) return SQLITE_NOMEM;
  memset(v, 0, sizeof(*v));
  /* sqlite will initialize v->base */
  v->db = db;
  v->zName = spec->zName;   /* Freed when azColumn is freed */
  v->nColumn = spec->nColumn;
  v->zColumnList = spec->zColumnList;
  spec->zColumnList = 0;
  v->azColumn = spec->azColumn;
  spec->azColumn = 0;

  if( spec->azTokenizer==0 ){
    return SQLITE_NOMEM;
  }
  /* TODO(shess) For now, add new tokenizers as else if clauses. */







|
|







1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
  v = (fulltext_vtab *) malloc(sizeof(fulltext_vtab));
  if( v==0 ) return SQLITE_NOMEM;
  memset(v, 0, sizeof(*v));
  /* sqlite will initialize v->base */
  v->db = db;
  v->zName = spec->zName;   /* Freed when azColumn is freed */
  v->nColumn = spec->nColumn;
  v->azContentColumn = spec->azContentColumn;
  spec->azContentColumn = 0;
  v->azColumn = spec->azColumn;
  spec->azColumn = 0;

  if( spec->azTokenizer==0 ){
    return SQLITE_NOMEM;
  }
  /* TODO(shess) For now, add new tokenizers as else if clauses. */
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934

1935


1936
1937
1938
1939
1940
1941
1942
1943
1944
  ** practice, though.
  */
static int fulltextCreate(sqlite3 *db, void *pAux,
                          int argc, const char * const *argv,
                          sqlite3_vtab **ppVTab, char **pzErr){
  int rc;
  TableSpec spec;
  char *schema;
  TRACE(("FTS1 Create\n"));

  rc = parseSpec(&spec, argc, argv, pzErr);
  if( rc!=SQLITE_OK ) return rc;


  schema = sqlite3_mprintf("CREATE TABLE %%_content(%s)", spec.zColumnList);


  rc = sql_exec(db, spec.zName, schema);
  sqlite3_free(schema);
  if( rc!=SQLITE_OK ) goto out;

  rc = sql_exec(db, spec.zName,
    "create table %_term(term text, segment integer, doclist blob, "
                        "primary key(term, segment));");
  if( rc!=SQLITE_OK ) goto out;








|





>
|
>
>
|
|







1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
  ** practice, though.
  */
static int fulltextCreate(sqlite3 *db, void *pAux,
                          int argc, const char * const *argv,
                          sqlite3_vtab **ppVTab, char **pzErr){
  int rc;
  TableSpec spec;
  StringBuffer schema;
  TRACE(("FTS1 Create\n"));

  rc = parseSpec(&spec, argc, argv, pzErr);
  if( rc!=SQLITE_OK ) return rc;

  initStringBuffer(&schema);
  append(&schema, "CREATE TABLE %_content(");
  appendList(&schema, spec.nColumn, spec.azContentColumn);
  append(&schema, ")");
  rc = sql_exec(db, spec.zName, schema.s);
  free(schema.s);
  if( rc!=SQLITE_OK ) goto out;

  rc = sql_exec(db, spec.zName,
    "create table %_term(term text, segment integer, doclist blob, "
                        "primary key(term, segment));");
  if( rc!=SQLITE_OK ) goto out;

2779
2780
2781
2782
2783
2784
2785
2786


2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
static int fulltextRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
  fulltext_cursor *c = (fulltext_cursor *) pCursor;

  *pRowid = sqlite3_column_int64(c->pStmt, 0);
  return SQLITE_OK;
}

/* Add all terms/positions in [zText] to the given hash table. */


static int buildTerms(fulltext_vtab *v, fts1Hash *terms, int iColumn,
                      const char *zText, int nText, sqlite_int64 iDocid){
  sqlite3_tokenizer *pTokenizer = v->pTokenizer;
  sqlite3_tokenizer_cursor *pCursor;
  const char *pToken;
  int nTokenBytes;
  int iStartOffset, iEndOffset, iPosition;
  int rc;

  rc = pTokenizer->pModule->xOpen(pTokenizer, zText, nText, &pCursor);
  if( rc!=SQLITE_OK ) return rc;

  pCursor->pTokenizer = pTokenizer;
  while( SQLITE_OK==pTokenizer->pModule->xNext(pCursor,
                                               &pToken, &nTokenBytes,
                                               &iStartOffset, &iEndOffset,
                                               &iPosition) ){







|
>
>
|
|







|







2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
static int fulltextRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
  fulltext_cursor *c = (fulltext_cursor *) pCursor;

  *pRowid = sqlite3_column_int64(c->pStmt, 0);
  return SQLITE_OK;
}

/* Add all terms in [zText] to the given hash table.  If [iColumn] > 0,
 * we also store positions and offsets in the hash table using the given
 * column number. */
static int buildTerms(fulltext_vtab *v, fts1Hash *terms, sqlite_int64 iDocid,
                      const char *zText, int iColumn){
  sqlite3_tokenizer *pTokenizer = v->pTokenizer;
  sqlite3_tokenizer_cursor *pCursor;
  const char *pToken;
  int nTokenBytes;
  int iStartOffset, iEndOffset, iPosition;
  int rc;

  rc = pTokenizer->pModule->xOpen(pTokenizer, zText, -1, &pCursor);
  if( rc!=SQLITE_OK ) return rc;

  pCursor->pTokenizer = pTokenizer;
  while( SQLITE_OK==pTokenizer->pModule->xNext(pCursor,
                                               &pToken, &nTokenBytes,
                                               &iStartOffset, &iEndOffset,
                                               &iPosition) ){
2811
2812
2813
2814
2815
2816
2817

2818

2819
2820
2821
2822
2823
2824
2825

    p = fts1HashFind(terms, pToken, nTokenBytes);
    if( p==NULL ){
      p = docListNew(DL_POSITIONS_OFFSETS);
      docListAddDocid(p, iDocid);
      fts1HashInsert(terms, pToken, nTokenBytes, p);
    }

    docListAddPosOffset(p, iColumn, iPosition, iStartOffset, iEndOffset);

  }

  /* TODO(shess) Check return?  Should this be able to cause errors at
  ** this point?  Actually, same question about sqlite3_finalize(),
  ** though one could argue that failure there means that the data is
  ** not durable.  *ponder*
  */







>
|
>







2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889

    p = fts1HashFind(terms, pToken, nTokenBytes);
    if( p==NULL ){
      p = docListNew(DL_POSITIONS_OFFSETS);
      docListAddDocid(p, iDocid);
      fts1HashInsert(terms, pToken, nTokenBytes, p);
    }
    if( iColumn>=0 ){
      docListAddPosOffset(p, iColumn, iPosition, iStartOffset, iEndOffset);
    }
  }

  /* TODO(shess) Check return?  Should this be able to cause errors at
  ** this point?  Actually, same question about sqlite3_finalize(),
  ** though one could argue that failure there means that the data is
  ** not durable.  *ponder*
  */
2890
2891
2892
2893
2894
2895
2896






























2897
2898
2899





















2900















2901


2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912

2913


2914





2915





2916








2917



2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
  }

 err:
  docListDestroy(&doclist);
  return rc;
}































/* Insert a row into the full-text index; set *piRowid to be the ID of the
 * new row. */
static int index_insert(fulltext_vtab *v, sqlite3_value *pRequestRowid,





















                        sqlite3_value **pValues,















                        sqlite_int64 *piRowid){


  int i;
  fts1Hash terms;  /* maps term string -> PosList */
  fts1HashElem *e;
  int rc;

  rc = content_insert(v, pRequestRowid, pValues);
  if( rc!=SQLITE_OK ) return rc;
  *piRowid = sqlite3_last_insert_rowid(v->db);

  fts1HashInit(&terms, FTS1_HASH_STRING, 1);
  for(i = 0; i < v->nColumn ; ++i){

    rc = buildTerms(v, &terms, i, (char*)sqlite3_value_text(pValues[i]), -1,


                    *piRowid);





    if( rc!=SQLITE_OK ) goto out;





  }












  for(e=fts1HashFirst(&terms); e; e=fts1HashNext(e)){
    DocList *p = fts1HashData(e);
    rc = index_insert_term(v, fts1HashKey(e), fts1HashKeysize(e), p);
    if( rc!=SQLITE_OK ) break;
  }

out:
  for(e=fts1HashFirst(&terms); e; e=fts1HashNext(e)){
    DocList *p = fts1HashData(e);
    docListDelete(p);
  }
  fts1HashClear(&terms);
  return rc;
}

/* Delete a row from the full-text index. */
static int index_delete(fulltext_vtab *v, sqlite_int64 iRow){
  const char **pValues;
  fts1Hash terms;
  int i;
  fts1HashElem *e;
  DocList doclist;

  int rc = content_select(v, iRow, &pValues);
  if( rc!=SQLITE_OK ) return rc;

  fts1HashInit(&terms, FTS1_HASH_STRING, 1);
  for(i = 0 ; i < v->nColumn; ++i) {
    rc = buildTerms(v, &terms, i, pValues[i], -1, iRow);
    if( rc!=SQLITE_OK ) goto out;
  }

  /* Delete by inserting a doclist with no positions.  This will
  ** overwrite existing data as it is merged forward by
  ** index_insert_term().
  */
  docListInit(&doclist, DL_POSITIONS_OFFSETS, 0, 0);
  docListAddDocid(&doclist, iRow);

  for(e=fts1HashFirst(&terms); e; e=fts1HashNext(e)){
    rc = index_insert_term(v, fts1HashKey(e), fts1HashKeysize(e), &doclist);
    if( rc!=SQLITE_OK ) break;
  }

out:
  freeStringArray(v->nColumn, pValues);
  for(e=fts1HashFirst(&terms); e; e=fts1HashNext(e)){
    DocList *p = fts1HashData(e);
    docListDelete(p);
  }
  fts1HashClear(&terms);
  docListDestroy(&doclist);

  if( rc!=SQLITE_OK ) return rc;
  return content_delete(v, iRow);
}

static int fulltextUpdate(sqlite3_vtab *pVtab, int nArg, sqlite3_value **ppArg,
                   sqlite_int64 *pRowid){
  fulltext_vtab *v = (fulltext_vtab *) pVtab;

  TRACE(("FTS1 Update %p\n", pVtab));
  if( nArg<2 ){
    return index_delete(v, sqlite3_value_int64(ppArg[0]));
  }

  if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){
    return SQLITE_ERROR;   /* an update; not yet supported */
  }

  /* ppArg[1] = rowid
   * ppArg[2..2+v->nColumn-1] = values
   * ppArg[2+v->nColumn] = value for _all (we ignore this)
   * ppArg[3+v->nColumn] = value of offset (we ignore this too)
   */
  assert( nArg==2+v->nColumn+1);    

  return index_insert(v, ppArg[1], &ppArg[2], pRowid);
}

/*
** Implementation of the snippet() function for FTS1
*/
static void snippetFunc(
  sqlite3_context *pContext,







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
|
<

<

|
<
<
|

|
>
|
>
>
|
>
>
>
>
>
|
>
>
>
>
>
|
>
>
>
>
>
>
>
>
|
>
>
>
|
|
|
|
|
|
<
<
<
<
|
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<





<

|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034

3035

3036
3037


3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075




3076



3077































3078
3079
3080
3081
3082

3083
3084
























3085
3086
3087
3088
3089
3090
3091
  }

 err:
  docListDestroy(&doclist);
  return rc;
}

/* Add doclists for all terms in [pValues] to the hash table [terms]. */
static int insertTerms(fulltext_vtab *v, fts1Hash *terms, sqlite_int64 iRowid,
                sqlite3_value **pValues){
  int i;
  for(i = 0; i < v->nColumn ; ++i){
    char *zText = (char*)sqlite3_value_text(pValues[i]);
    int rc = buildTerms(v, terms, iRowid, zText, i);
    if( rc!=SQLITE_OK ) return rc;
  }
  return SQLITE_OK;
}

/* Add empty doclists for all terms in the given row's content to the hash
 * table [pTerms]. */
static int deleteTerms(fulltext_vtab *v, fts1Hash *pTerms, sqlite_int64 iRowid){
  const char **pValues;
  int i;

  int rc = content_select(v, iRowid, &pValues);
  if( rc!=SQLITE_OK ) return rc;

  for(i = 0 ; i < v->nColumn; ++i) {
    rc = buildTerms(v, pTerms, iRowid, pValues[i], -1);
    if( rc!=SQLITE_OK ) break;
  }

  freeStringArray(v->nColumn, pValues);
  return SQLITE_OK;
}

/* Insert a row into the %_content table; set *piRowid to be the ID of the
 * new row.  Fill [pTerms] with new doclists for the %_term table. */
static int index_insert(fulltext_vtab *v, sqlite3_value *pRequestRowid,
                        sqlite3_value **pValues,
                        sqlite_int64 *piRowid, fts1Hash *pTerms){
  int rc;

  rc = content_insert(v, pRequestRowid, pValues);  /* execute an SQL INSERT */
  if( rc!=SQLITE_OK ) return rc;
  *piRowid = sqlite3_last_insert_rowid(v->db);
  return insertTerms(v, pTerms, *piRowid, pValues);
}

/* Delete a row from the %_content table; fill [pTerms] with empty doclists
 * to be written to the %_term table. */
static int index_delete(fulltext_vtab *v, sqlite_int64 iRow, fts1Hash *pTerms){
  int rc = deleteTerms(v, pTerms, iRow);
  if( rc!=SQLITE_OK ) return rc;
  return content_delete(v, iRow);  /* execute an SQL DELETE */
}

/* Update a row in the %_content table; fill [pTerms] with new doclists for the
 * %_term table. */
static int index_update(fulltext_vtab *v, sqlite_int64 iRow,
                        sqlite3_value **pValues, fts1Hash *pTerms){
  /* Generate an empty doclist for each term that previously appeared in this
   * row. */
  int rc = deleteTerms(v, pTerms, iRow);
  if( rc!=SQLITE_OK ) return rc;

  /* Now add positions for terms which appear in the updated row. */
  rc = insertTerms(v, pTerms, iRow, pValues);
  if( rc!=SQLITE_OK ) return rc;

  return content_update(v, pValues, iRow);  /* execute an SQL UPDATE */
}

/* This function implements the xUpdate callback; it's the top-level entry
 * point for inserting, deleting or updating a row in a full-text table. */
static int fulltextUpdate(sqlite3_vtab *pVtab, int nArg, sqlite3_value **ppArg,
                   sqlite_int64 *pRowid){
  fulltext_vtab *v = (fulltext_vtab *) pVtab;
  fts1Hash terms;   /* maps term string -> PosList */
  int rc;

  fts1HashElem *e;


  TRACE(("FTS1 Update %p\n", pVtab));


  
  fts1HashInit(&terms, FTS1_HASH_STRING, 1);

  if( nArg<2 ){
    rc = index_delete(v, sqlite3_value_int64(ppArg[0]), &terms);
  } else if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){
    /* An update:
     * ppArg[0] = old rowid
     * ppArg[1] = new rowid
     * ppArg[2..2+v->nColumn-1] = values
     * ppArg[2+v->nColumn] = value for magic column (we ignore this)
     */
    sqlite_int64 rowid = sqlite3_value_int64(ppArg[0]);
    if( sqlite3_value_type(ppArg[1]) != SQLITE_INTEGER ||
      sqlite3_value_int64(ppArg[1]) != rowid ){
      rc = SQLITE_ERROR;  /* we don't allow changing the rowid */
    } else {
      assert( nArg==2+v->nColumn+1);
      rc = index_update(v, rowid, &ppArg[2], &terms);
    }
  } else {
    /* An insert:
     * ppArg[1] = requested rowid
     * ppArg[2..2+v->nColumn-1] = values
     * ppArg[2+v->nColumn] = value for magic column (we ignore this)
     */
    assert( nArg==2+v->nColumn+1);
    rc = index_insert(v, ppArg[1], &ppArg[2], pRowid, &terms);
  }

  if( rc==SQLITE_OK ){
    /* Write updated doclists to disk. */
    for(e=fts1HashFirst(&terms); e; e=fts1HashNext(e)){
      DocList *p = fts1HashData(e);
      rc = index_insert_term(v, fts1HashKey(e), fts1HashKeysize(e), p);
      if( rc!=SQLITE_OK ) break;
    }
  }








  /* clean up */































  for(e=fts1HashFirst(&terms); e; e=fts1HashNext(e)){
    DocList *p = fts1HashData(e);
    docListDelete(p);
  }
  fts1HashClear(&terms);


  return rc;
























}

/*
** Implementation of the snippet() function for FTS1
*/
static void snippetFunc(
  sqlite3_context *pContext,