SQLite

Check-in [69bffc1632]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Updates so that fts5 API functions xInst, xPhraseFirst and xPhraseNext work with the offsets=0 option.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | fts5-offsets
Files: files | file ages | folders
SHA1: 69bffc1632c8a8f3bfe5bf92607e64fed982e48c
User & Date: dan 2015-12-22 18:54:16.763
Context
2015-12-28
19:55
Change the name of the offsets=0 option to "detail=column". Have the xInst, xPhraseFirst and other API functions work by parsing the original text for detail=column tables. (check-in: 228b4d10e3 user: dan tags: fts5-offsets)
2015-12-22
18:54
Updates so that fts5 API functions xInst, xPhraseFirst and xPhraseNext work with the offsets=0 option. (check-in: 69bffc1632 user: dan tags: fts5-offsets)
2015-12-21
18:45
Fix an fts5 integrity-check problem that affects offsets=0 tables with prefix indexes. (check-in: 609a0bc7f3 user: dan tags: fts5-offsets)
Changes
Side-by-Side Diff Ignore Whitespace Patch
Changes to ext/fts5/fts5.h.
105
106
107
108
109
110
111






112
113
114
115
116
117
118
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124







+
+
+
+
+
+







**   an error code (i.e. SQLITE_NOMEM) if an error occurs.
**
** xInst:
**   Query for the details of phrase match iIdx within the current row.
**   Phrase matches are numbered starting from zero, so the iIdx argument
**   should be greater than or equal to zero and smaller than the value
**   output by xInstCount().
**
**   Usually, output parameter *piPhrase is set to the phrase number, *piCol
**   to the column in which it occurs and *piOff the token offset of the
**   first token of the phrase. The exception is if the table was created
**   with the offsets=0 option specified. In this case *piOff is always
**   set to -1.
**
**   Returns SQLITE_OK if successful, or an error code (i.e. SQLITE_NOMEM) 
**   if an error occurs.
**
** xRowid:
**   Returns the rowid of the current row.
**
192
193
194
195
196
197
198
199

200
201
202
203
204
205
206
207
208
209
210
211
212
213

214
215
216
217
218
219
220
198
199
200
201
202
203
204

205
206
207
208
209
210
211
212
213
214
215
216
217
218

219
220
221
222
223
224
225
226







-
+













-
+







**   xInstCount/xInst APIs. While the xInstCount/xInst APIs are more convenient
**   to use, this API may be faster under some circumstances. To iterate 
**   through instances of phrase iPhrase, use the following code:
**
**       Fts5PhraseIter iter;
**       int iCol, iOff;
**       for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff);
**           iOff>=0;
**           iCol>=0;
**           pApi->xPhraseNext(pFts, &iter, &iCol, &iOff)
**       ){
**         // An instance of phrase iPhrase at offset iOff of column iCol
**       }
**
**   The Fts5PhraseIter structure is defined above. Applications should not
**   modify this structure directly - it should only be used as shown above
**   with the xPhraseFirst() and xPhraseNext() API methods.
**
** xPhraseNext()
**   See xPhraseFirst above.
*/
struct Fts5ExtensionApi {
  int iVersion;                   /* Currently always set to 1 */
  int iVersion;                   /* Currently always set to 2 */

  void *(*xUserData)(Fts5Context*);

  int (*xColumnCount)(Fts5Context*);
  int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow);
  int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken);

Changes to ext/fts5/fts5_main.c.
304
305
306
307
308
309
310







311
312
313
314
315
316
317
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324







+
+
+
+
+
+
+








/*
** Return true if pTab is a contentless table.
*/
static int fts5IsContentless(Fts5Table *pTab){
  return pTab->pConfig->eContent==FTS5_CONTENT_NONE;
}

/*
** Return true if pTab is an offsetless table.
*/
static int fts5IsOffsetless(Fts5Table *pTab){
  return pTab->pConfig->bOffsets==0;
}

/*
** Delete a virtual table handle allocated by fts5InitVtab(). 
*/
static void fts5FreeVtab(Fts5Table *pTab){
  if( pTab ){
    sqlite3Fts5IndexClose(pTab->pIndex);
1745
1746
1747
1748
1749
1750
1751




1752
1753
1754
1755
1756
1757
1758
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769







+
+
+
+







  Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  int rc = SQLITE_OK;
  if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_INST)==0 
   || SQLITE_OK==(rc = fts5CacheInstArray(pCsr)) 
  ){
    if( iIdx<0 || iIdx>=pCsr->nInstCount ){
      rc = SQLITE_RANGE;
    }else if( fts5IsOffsetless((Fts5Table*)pCsr->base.pVtab) ){
      *piPhrase = pCsr->aInst[iIdx*3];
      *piCol = pCsr->aInst[iIdx*3 + 2];
      *piOff = -1;
    }else{
      *piPhrase = pCsr->aInst[iIdx*3];
      *piCol = pCsr->aInst[iIdx*3 + 1];
      *piOff = pCsr->aInst[iIdx*3 + 2];
    }
  }
  return rc;
1909
1910
1911
1912
1913
1914
1915





1916
1917
1918
1919
1920
1921
1922
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938







+
+
+
+
+







static void fts5ApiPhraseNext(
  Fts5Context *pCtx, 
  Fts5PhraseIter *pIter, 
  int *piCol, int *piOff
){
  if( pIter->a>=pIter->b ){
    *piCol = -1;
    *piOff = -1;
  }else if( fts5IsOffsetless((Fts5Table*)(((Fts5Cursor*)pCtx)->base.pVtab)) ){
    int iVal;
    pIter->a += fts5GetVarint32(pIter->a, iVal);
    *piCol += (iVal-2);
    *piOff = -1;
  }else{
    int iVal;
    pIter->a += fts5GetVarint32(pIter->a, iVal);
    if( iVal==1 ){
      pIter->a += fts5GetVarint32(pIter->a, iVal);
      *piCol = iVal;
Changes to ext/fts5/fts5_tcl.c.
231
232
233
234
235
236
237

238
239
240
241
242
243
244
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245







+







    { "xColumnText",       1, "COL" },                /*  9 */
    { "xColumnSize",       1, "COL" },                /* 10 */
    { "xQueryPhrase",      2, "PHRASE SCRIPT" },      /* 11 */
    { "xSetAuxdata",       1, "VALUE" },              /* 12 */
    { "xGetAuxdata",       1, "CLEAR" },              /* 13 */
    { "xSetAuxdataInt",    1, "INTEGER" },            /* 14 */
    { "xGetAuxdataInt",    1, "CLEAR" },              /* 15 */
    { "xPhraseForeach",    4, "IPHRASE COLVAR OFFVAR SCRIPT" }, /* 16 */
    { 0, 0, 0}
  };

  int rc;
  int iSub = 0;
  F5tApi *p = (F5tApi*)clientData;

424
425
426
427
428
429
430






























431
432
433
434
435
436
437
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







    }
    CASE(15, "xGetAuxdataInt") {
      int iVal;
      int bClear;
      if( Tcl_GetBooleanFromObj(interp, objv[2], &bClear) ) return TCL_ERROR;
      iVal = ((char*)p->pApi->xGetAuxdata(p->pFts, bClear) - (char*)0);
      Tcl_SetObjResult(interp, Tcl_NewIntObj(iVal));
      break;
    }

    CASE(16, "xPhraseForeach") {
      int iPhrase;
      int iCol;
      int iOff;
      const char *zColvar;
      const char *zOffvar;
      Tcl_Obj *pScript = objv[5];
      Fts5PhraseIter iter;

      if( Tcl_GetIntFromObj(interp, objv[2], &iPhrase) ) return TCL_ERROR;
      zColvar = Tcl_GetString(objv[3]);
      zOffvar = Tcl_GetString(objv[4]);

      for(p->pApi->xPhraseFirst(p->pFts, iPhrase, &iter, &iCol, &iOff);
          iCol>=0;
          p->pApi->xPhraseNext(p->pFts, &iter, &iCol, &iOff)
      ){
        Tcl_SetVar2Ex(interp, zColvar, 0, Tcl_NewIntObj(iCol), 0);
        Tcl_SetVar2Ex(interp, zOffvar, 0, Tcl_NewIntObj(iOff), 0);
        rc = Tcl_EvalObjEx(interp, pScript, 0);
        if( rc==TCL_CONTINUE ) rc = TCL_OK;
        if( rc!=TCL_OK ){
          if( rc==TCL_BREAK ) rc = TCL_OK;
          break;
        }
      }

      break;
    }

    default: 
      assert( 0 );
      break;
  }
Changes to ext/fts5/fts5_test_mi.c.
130
131
132
133
134
135
136
137

138
139
140
141
142
143
144
130
131
132
133
134
135
136

137
138
139
140
141
142
143
144







-
+







){
  Fts5PhraseIter iter;
  int iCol, iOff;
  u32 *aOut = (u32*)pUserData;
  int iPrev = -1;

  for(pApi->xPhraseFirst(pFts, 0, &iter, &iCol, &iOff); 
      iOff>=0; 
      iCol>=0; 
      pApi->xPhraseNext(pFts, &iter, &iCol, &iOff)
  ){
    aOut[iCol*3+1]++;
    if( iCol!=iPrev ) aOut[iCol*3 + 2]++;
    iPrev = iCol;
  }

Changes to ext/fts5/test/fts5_common.tcl.
23
24
25
26
27
28
29












30
31
32
33
34
35
36
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48







+
+
+
+
+
+
+
+
+
+
+
+







proc fts5_test_poslist {cmd} {
  set res [list]
  for {set i 0} {$i < [$cmd xInstCount]} {incr i} {
    lappend res [string map {{ } .} [$cmd xInst $i]]
  }
  set res
}

proc fts5_test_poslist2 {cmd} {
  set res [list]

  for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} {
    $cmd xPhraseForeach $i c o {
      lappend res $i.$c.$o
    }
  }

  set res
}

proc fts5_test_columnsize {cmd} {
  set res [list]
  for {set i 0} {$i < [$cmd xColumnCount]} {incr i} {
    lappend res [$cmd xColumnSize $i]
  }
  set res
109
110
111
112
113
114
115

116
117
118
119
120
121
122
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135







+








proc fts5_aux_test_functions {db} {
  foreach f {
    fts5_test_columnsize
    fts5_test_columntext
    fts5_test_columntotalsize
    fts5_test_poslist
    fts5_test_poslist2
    fts5_test_tokenize
    fts5_test_rowcount
    fts5_test_all

    fts5_test_queryphrase
    fts5_test_phrasecount
  } {
Changes to ext/fts5/test/fts5offsets.test.
70
71
72
73
74
75
76
77
78
79




















































































80
81
82
83
70
71
72
73
74
75
76

77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166







-


+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+




do_execsql_test 2.0 {
  CREATE VIRTUAL TABLE t2 USING fts5(a, offsets=0, prefix="1");
  INSERT INTO t2(a) VALUES('aa ab');
}

#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t2_data} {puts $r}

breakpoint
do_execsql_test 2.1 {
  INSERT INTO t2(t2) VALUES('integrity-check');
}

#-------------------------------------------------------------------------
# Check that the xInstCount, xInst, xPhraseFirst and xPhraseNext APIs
# work with offsets=0 tables.
#
set data {
  1  {abb aca aca} {aba bab aab aac caa} {abc cbc ccb bcc bab ccb aca}
  2  {bca aca acb} {ccb bcc bca aab bcc} {bab aaa aac cbb bba aca abc}
  3  {cca abc cab} {aab aba bcc cac baa} {bab cbb acb aba aab ccc cca}
  4  {ccb bcb aba} {aba bbb bcc cac bbb} {cbb aaa bca bcc aab cac aca}
  5  {bca bbc cac} {aba cbb cac cca aca} {cab acb cbc ccb cac bbb bcb}
  6  {acc bba cba} {bab bbc bbb bcb aca} {bca ccc cbb aca bac ccc ccb}
  7  {aba bab aaa} {abb bca aac bcb bcc} {bcb bbc aba aaa cba abc acc}
  8  {cab aba aaa} {ccb aca caa bbc bcc} {aaa abc ccb bbb cac cca abb}
  9  {bcb bab bac} {bcb cba cac bbb abc} {aba aca cbb acb abb ccc ccb}
  10 {aba aab ccc} {abc ccc bcc cab bbb} {aab bcc cbb ccc aaa bac baa}
  11 {bab acb cba} {aac cab cab bca cbc} {aab cbc aac baa ccb acc cac}
  12 {ccc cbb cbc} {aaa aab bcc aac bbc} {cbc cbc bac bac ccc bbc acc}
  13 {cab bbc abc} {bbb bab bba aca bab} {baa bbb aab bbb ccb bbb ccc}
  14 {bbc cab caa} {acb aac abb cba acc} {cba bba bba acb abc abb baa}
  15 {aba cca bcc} {aaa acb abc aab ccb} {cca bcb acc aaa caa cca cbc}
  16 {bcb bba aba} {cbc acb cab caa ccb} {aac aaa bbc cab cca cba abc}
  17 {caa cbb acc} {ccb bcb bca aaa bcc} {bbb aca bcb bca cbc cbc cca}
  18 {cbb bbc aac} {ccc bbc aaa aab baa} {cab cab cac cca bbc abc bbc}
  19 {ccc acc aaa} {aab cbb bca cca caa} {bcb aca aca cab acc bac bcc}
  20 {aab ccc bcb} {bbc cbb bbc aaa bcc} {cbc aab ccc aaa bcb bac cbc}
  21 {aba cab ccc} {bbc cbc cba acc bbb} {acc aab aac acb aca bca acb}
  22 {bcb bca baa} {cca bbc aca ccb cbb} {aab abc bbc aaa cab bcc bcc}
  23 {cac cbb caa} {bbc aba bbb bcc ccb} {bbc bbb cab bbc cac abb acc}
  24 {ccb acb caa} {cab bba cac bbc aac} {aac bca abc cab bca cab bcb}
  25 {bbb aca bca} {bcb acc ccc cac aca} {ccc acb acc cac cac bba bbc}
  26 {bab acc caa} {caa cab cac bac aca} {aba cac caa acc bac ccc aaa}
  27 {bca bca aaa} {ccb aca bca aaa baa} {bab acc aaa cca cba cca bac}
  28 {ccb cac cac} {bca abb bba bbc baa} {aca ccb aac cab ccc cab caa}
  29 {abc bca cab} {cac cbc cbb ccc bcc} {bcc aaa aaa acc aac cac aac}
  30 {aca acc acb} {aab aac cbb caa acb} {acb bbc bbc acc cbb bbc aac}
  31 {aba aca baa} {aca bcc cab bab acb} {bcc acb baa bcb bbc acc aba}
  32 {abb cbc caa} {cba abb bbb cbb aca} {bac aca caa cac caa ccb bbc}
  33 {bcc bcb bcb} {cca cab cbc abb bab} {caa bbc aac bbb cab cba aaa}
  34 {caa cab acc} {ccc ccc bcc acb bcc} {bac bba aca bcb bba bcb cac}
  35 {bac bcb cba} {bcc acb bbc cba bab} {abb cbb abc abc bac acc cbb}
  36 {cab bab ccb} {bca bba bab cca acc} {acc aab bcc bac acb cbb caa}
  37 {aca cbc cab} {bba aac aca aac aaa} {baa cbb cba aba cab bca bcb}
  38 {acb aab baa} {baa bab bca bbc bbb} {abc baa acc aba cab baa cac}
  39 {bcb aac cba} {bcb baa caa cac bbc} {cbc ccc bab ccb bbb caa aba}
  40 {cba ccb abc} {cbb caa cba aac bab} {cbb bbb bca bbb bac cac bca}
}
foreach {tn tbl} {
  1 { CREATE VIRTUAL TABLE t3 USING fts5(x, y, z, offsets=0) }
} {
  reset_db
  fts5_aux_test_functions db
  execsql $tbl
  foreach {id x y z} $data {
    execsql { INSERT INTO t3(rowid, x, y, z) VALUES($id, $x, $y, $z) }
  }
  foreach {tn2 expr} {
    1 aaa    2 ccc    3 bab    4 aac
    5 aa*    6 cc*    7 ba*    8 aa*
    9 a*     10 b*   11 c*
  } {

    set res [list]
    foreach {id x y z} $data {
      if {[lsearch [concat $x $y $z] $expr]>=0} {
        lappend res $id
        set inst [list]
        if {[lsearch $x $expr]>=0} { lappend inst 0.0.-1 }
        if {[lsearch $y $expr]>=0} { lappend inst 0.1.-1 }
        if {[lsearch $z $expr]>=0} { lappend inst 0.2.-1 }
        lappend res $inst
      }
    }

    do_execsql_test 3.$tn.$tn2.1 {
      SELECT rowid, fts5_test_poslist(t3) FROM t3($expr)
    } $res

    do_execsql_test 3.$tn.$tn2.2 {
      SELECT rowid, fts5_test_poslist2(t3) FROM t3($expr)
    } $res
  }

}

finish_test