/ Check-in [69bffc16]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Updates so that fts5 API functions xInst, xPhraseFirst and xPhraseNext work with the offsets=0 option.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts5-offsets
Files: files | file ages | folders
SHA1: 69bffc1632c8a8f3bfe5bf92607e64fed982e48c
User & Date: dan 2015-12-22 18:54:16
Context
2015-12-28
19:55
Change the name of the offsets=0 option to "detail=column". Have the xInst, xPhraseFirst and other API functions work by parsing the original text for detail=column tables. check-in: 228b4d10 user: dan tags: fts5-offsets
2015-12-22
18:54
Updates so that fts5 API functions xInst, xPhraseFirst and xPhraseNext work with the offsets=0 option. check-in: 69bffc16 user: dan tags: fts5-offsets
2015-12-21
18:45
Fix an fts5 integrity-check problem that affects offsets=0 tables with prefix indexes. check-in: 609a0bc7 user: dan tags: fts5-offsets
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5.h.

   105    105   **   an error code (i.e. SQLITE_NOMEM) if an error occurs.
   106    106   **
   107    107   ** xInst:
   108    108   **   Query for the details of phrase match iIdx within the current row.
   109    109   **   Phrase matches are numbered starting from zero, so the iIdx argument
   110    110   **   should be greater than or equal to zero and smaller than the value
   111    111   **   output by xInstCount().
          112  +**
          113  +**   Usually, output parameter *piPhrase is set to the phrase number, *piCol
          114  +**   to the column in which it occurs and *piOff the token offset of the
          115  +**   first token of the phrase. The exception is if the table was created
          116  +**   with the offsets=0 option specified. In this case *piOff is always
          117  +**   set to -1.
   112    118   **
   113    119   **   Returns SQLITE_OK if successful, or an error code (i.e. SQLITE_NOMEM) 
   114    120   **   if an error occurs.
   115    121   **
   116    122   ** xRowid:
   117    123   **   Returns the rowid of the current row.
   118    124   **
................................................................................
   192    198   **   xInstCount/xInst APIs. While the xInstCount/xInst APIs are more convenient
   193    199   **   to use, this API may be faster under some circumstances. To iterate 
   194    200   **   through instances of phrase iPhrase, use the following code:
   195    201   **
   196    202   **       Fts5PhraseIter iter;
   197    203   **       int iCol, iOff;
   198    204   **       for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff);
   199         -**           iOff>=0;
          205  +**           iCol>=0;
   200    206   **           pApi->xPhraseNext(pFts, &iter, &iCol, &iOff)
   201    207   **       ){
   202    208   **         // An instance of phrase iPhrase at offset iOff of column iCol
   203    209   **       }
   204    210   **
   205    211   **   The Fts5PhraseIter structure is defined above. Applications should not
   206    212   **   modify this structure directly - it should only be used as shown above
   207    213   **   with the xPhraseFirst() and xPhraseNext() API methods.
   208    214   **
   209    215   ** xPhraseNext()
   210    216   **   See xPhraseFirst above.
   211    217   */
   212    218   struct Fts5ExtensionApi {
   213         -  int iVersion;                   /* Currently always set to 1 */
          219  +  int iVersion;                   /* Currently always set to 2 */
   214    220   
   215    221     void *(*xUserData)(Fts5Context*);
   216    222   
   217    223     int (*xColumnCount)(Fts5Context*);
   218    224     int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow);
   219    225     int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken);
   220    226   

Changes to ext/fts5/fts5_main.c.

   304    304   
   305    305   /*
   306    306   ** Return true if pTab is a contentless table.
   307    307   */
   308    308   static int fts5IsContentless(Fts5Table *pTab){
   309    309     return pTab->pConfig->eContent==FTS5_CONTENT_NONE;
   310    310   }
          311  +
          312  +/*
          313  +** Return true if pTab is an offsetless table.
          314  +*/
          315  +static int fts5IsOffsetless(Fts5Table *pTab){
          316  +  return pTab->pConfig->bOffsets==0;
          317  +}
   311    318   
   312    319   /*
   313    320   ** Delete a virtual table handle allocated by fts5InitVtab(). 
   314    321   */
   315    322   static void fts5FreeVtab(Fts5Table *pTab){
   316    323     if( pTab ){
   317    324       sqlite3Fts5IndexClose(pTab->pIndex);
................................................................................
  1745   1752     Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  1746   1753     int rc = SQLITE_OK;
  1747   1754     if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_INST)==0 
  1748   1755      || SQLITE_OK==(rc = fts5CacheInstArray(pCsr)) 
  1749   1756     ){
  1750   1757       if( iIdx<0 || iIdx>=pCsr->nInstCount ){
  1751   1758         rc = SQLITE_RANGE;
         1759  +    }else if( fts5IsOffsetless((Fts5Table*)pCsr->base.pVtab) ){
         1760  +      *piPhrase = pCsr->aInst[iIdx*3];
         1761  +      *piCol = pCsr->aInst[iIdx*3 + 2];
         1762  +      *piOff = -1;
  1752   1763       }else{
  1753   1764         *piPhrase = pCsr->aInst[iIdx*3];
  1754   1765         *piCol = pCsr->aInst[iIdx*3 + 1];
  1755   1766         *piOff = pCsr->aInst[iIdx*3 + 2];
  1756   1767       }
  1757   1768     }
  1758   1769     return rc;
................................................................................
  1909   1920   static void fts5ApiPhraseNext(
  1910   1921     Fts5Context *pCtx, 
  1911   1922     Fts5PhraseIter *pIter, 
  1912   1923     int *piCol, int *piOff
  1913   1924   ){
  1914   1925     if( pIter->a>=pIter->b ){
  1915   1926       *piCol = -1;
         1927  +    *piOff = -1;
         1928  +  }else if( fts5IsOffsetless((Fts5Table*)(((Fts5Cursor*)pCtx)->base.pVtab)) ){
         1929  +    int iVal;
         1930  +    pIter->a += fts5GetVarint32(pIter->a, iVal);
         1931  +    *piCol += (iVal-2);
  1916   1932       *piOff = -1;
  1917   1933     }else{
  1918   1934       int iVal;
  1919   1935       pIter->a += fts5GetVarint32(pIter->a, iVal);
  1920   1936       if( iVal==1 ){
  1921   1937         pIter->a += fts5GetVarint32(pIter->a, iVal);
  1922   1938         *piCol = iVal;

Changes to ext/fts5/fts5_tcl.c.

   231    231       { "xColumnText",       1, "COL" },                /*  9 */
   232    232       { "xColumnSize",       1, "COL" },                /* 10 */
   233    233       { "xQueryPhrase",      2, "PHRASE SCRIPT" },      /* 11 */
   234    234       { "xSetAuxdata",       1, "VALUE" },              /* 12 */
   235    235       { "xGetAuxdata",       1, "CLEAR" },              /* 13 */
   236    236       { "xSetAuxdataInt",    1, "INTEGER" },            /* 14 */
   237    237       { "xGetAuxdataInt",    1, "CLEAR" },              /* 15 */
          238  +    { "xPhraseForeach",    4, "IPHRASE COLVAR OFFVAR SCRIPT" }, /* 16 */
   238    239       { 0, 0, 0}
   239    240     };
   240    241   
   241    242     int rc;
   242    243     int iSub = 0;
   243    244     F5tApi *p = (F5tApi*)clientData;
   244    245   
................................................................................
   424    425       }
   425    426       CASE(15, "xGetAuxdataInt") {
   426    427         int iVal;
   427    428         int bClear;
   428    429         if( Tcl_GetBooleanFromObj(interp, objv[2], &bClear) ) return TCL_ERROR;
   429    430         iVal = ((char*)p->pApi->xGetAuxdata(p->pFts, bClear) - (char*)0);
   430    431         Tcl_SetObjResult(interp, Tcl_NewIntObj(iVal));
          432  +      break;
          433  +    }
          434  +
          435  +    CASE(16, "xPhraseForeach") {
          436  +      int iPhrase;
          437  +      int iCol;
          438  +      int iOff;
          439  +      const char *zColvar;
          440  +      const char *zOffvar;
          441  +      Tcl_Obj *pScript = objv[5];
          442  +      Fts5PhraseIter iter;
          443  +
          444  +      if( Tcl_GetIntFromObj(interp, objv[2], &iPhrase) ) return TCL_ERROR;
          445  +      zColvar = Tcl_GetString(objv[3]);
          446  +      zOffvar = Tcl_GetString(objv[4]);
          447  +
          448  +      for(p->pApi->xPhraseFirst(p->pFts, iPhrase, &iter, &iCol, &iOff);
          449  +          iCol>=0;
          450  +          p->pApi->xPhraseNext(p->pFts, &iter, &iCol, &iOff)
          451  +      ){
          452  +        Tcl_SetVar2Ex(interp, zColvar, 0, Tcl_NewIntObj(iCol), 0);
          453  +        Tcl_SetVar2Ex(interp, zOffvar, 0, Tcl_NewIntObj(iOff), 0);
          454  +        rc = Tcl_EvalObjEx(interp, pScript, 0);
          455  +        if( rc==TCL_CONTINUE ) rc = TCL_OK;
          456  +        if( rc!=TCL_OK ){
          457  +          if( rc==TCL_BREAK ) rc = TCL_OK;
          458  +          break;
          459  +        }
          460  +      }
          461  +
   431    462         break;
   432    463       }
   433    464   
   434    465       default: 
   435    466         assert( 0 );
   436    467         break;
   437    468     }

Changes to ext/fts5/fts5_test_mi.c.

   130    130   ){
   131    131     Fts5PhraseIter iter;
   132    132     int iCol, iOff;
   133    133     u32 *aOut = (u32*)pUserData;
   134    134     int iPrev = -1;
   135    135   
   136    136     for(pApi->xPhraseFirst(pFts, 0, &iter, &iCol, &iOff); 
   137         -      iOff>=0; 
          137  +      iCol>=0; 
   138    138         pApi->xPhraseNext(pFts, &iter, &iCol, &iOff)
   139    139     ){
   140    140       aOut[iCol*3+1]++;
   141    141       if( iCol!=iPrev ) aOut[iCol*3 + 2]++;
   142    142       iPrev = iCol;
   143    143     }
   144    144   

Changes to ext/fts5/test/fts5_common.tcl.

    23     23   proc fts5_test_poslist {cmd} {
    24     24     set res [list]
    25     25     for {set i 0} {$i < [$cmd xInstCount]} {incr i} {
    26     26       lappend res [string map {{ } .} [$cmd xInst $i]]
    27     27     }
    28     28     set res
    29     29   }
           30  +
           31  +proc fts5_test_poslist2 {cmd} {
           32  +  set res [list]
           33  +
           34  +  for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} {
           35  +    $cmd xPhraseForeach $i c o {
           36  +      lappend res $i.$c.$o
           37  +    }
           38  +  }
           39  +
           40  +  set res
           41  +}
    30     42   
    31     43   proc fts5_test_columnsize {cmd} {
    32     44     set res [list]
    33     45     for {set i 0} {$i < [$cmd xColumnCount]} {incr i} {
    34     46       lappend res [$cmd xColumnSize $i]
    35     47     }
    36     48     set res
................................................................................
   109    121   
   110    122   proc fts5_aux_test_functions {db} {
   111    123     foreach f {
   112    124       fts5_test_columnsize
   113    125       fts5_test_columntext
   114    126       fts5_test_columntotalsize
   115    127       fts5_test_poslist
          128  +    fts5_test_poslist2
   116    129       fts5_test_tokenize
   117    130       fts5_test_rowcount
   118    131       fts5_test_all
   119    132   
   120    133       fts5_test_queryphrase
   121    134       fts5_test_phrasecount
   122    135     } {

Changes to ext/fts5/test/fts5offsets.test.

    70     70   do_execsql_test 2.0 {
    71     71     CREATE VIRTUAL TABLE t2 USING fts5(a, offsets=0, prefix="1");
    72     72     INSERT INTO t2(a) VALUES('aa ab');
    73     73   }
    74     74   
    75     75   #db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t2_data} {puts $r}
    76     76   
    77         -breakpoint
    78     77   do_execsql_test 2.1 {
    79     78     INSERT INTO t2(t2) VALUES('integrity-check');
           79  +}
           80  +
           81  +#-------------------------------------------------------------------------
           82  +# Check that the xInstCount, xInst, xPhraseFirst and xPhraseNext APIs
           83  +# work with offsets=0 tables.
           84  +#
           85  +set data {
           86  +  1  {abb aca aca} {aba bab aab aac caa} {abc cbc ccb bcc bab ccb aca}
           87  +  2  {bca aca acb} {ccb bcc bca aab bcc} {bab aaa aac cbb bba aca abc}
           88  +  3  {cca abc cab} {aab aba bcc cac baa} {bab cbb acb aba aab ccc cca}
           89  +  4  {ccb bcb aba} {aba bbb bcc cac bbb} {cbb aaa bca bcc aab cac aca}
           90  +  5  {bca bbc cac} {aba cbb cac cca aca} {cab acb cbc ccb cac bbb bcb}
           91  +  6  {acc bba cba} {bab bbc bbb bcb aca} {bca ccc cbb aca bac ccc ccb}
           92  +  7  {aba bab aaa} {abb bca aac bcb bcc} {bcb bbc aba aaa cba abc acc}
           93  +  8  {cab aba aaa} {ccb aca caa bbc bcc} {aaa abc ccb bbb cac cca abb}
           94  +  9  {bcb bab bac} {bcb cba cac bbb abc} {aba aca cbb acb abb ccc ccb}
           95  +  10 {aba aab ccc} {abc ccc bcc cab bbb} {aab bcc cbb ccc aaa bac baa}
           96  +  11 {bab acb cba} {aac cab cab bca cbc} {aab cbc aac baa ccb acc cac}
           97  +  12 {ccc cbb cbc} {aaa aab bcc aac bbc} {cbc cbc bac bac ccc bbc acc}
           98  +  13 {cab bbc abc} {bbb bab bba aca bab} {baa bbb aab bbb ccb bbb ccc}
           99  +  14 {bbc cab caa} {acb aac abb cba acc} {cba bba bba acb abc abb baa}
          100  +  15 {aba cca bcc} {aaa acb abc aab ccb} {cca bcb acc aaa caa cca cbc}
          101  +  16 {bcb bba aba} {cbc acb cab caa ccb} {aac aaa bbc cab cca cba abc}
          102  +  17 {caa cbb acc} {ccb bcb bca aaa bcc} {bbb aca bcb bca cbc cbc cca}
          103  +  18 {cbb bbc aac} {ccc bbc aaa aab baa} {cab cab cac cca bbc abc bbc}
          104  +  19 {ccc acc aaa} {aab cbb bca cca caa} {bcb aca aca cab acc bac bcc}
          105  +  20 {aab ccc bcb} {bbc cbb bbc aaa bcc} {cbc aab ccc aaa bcb bac cbc}
          106  +  21 {aba cab ccc} {bbc cbc cba acc bbb} {acc aab aac acb aca bca acb}
          107  +  22 {bcb bca baa} {cca bbc aca ccb cbb} {aab abc bbc aaa cab bcc bcc}
          108  +  23 {cac cbb caa} {bbc aba bbb bcc ccb} {bbc bbb cab bbc cac abb acc}
          109  +  24 {ccb acb caa} {cab bba cac bbc aac} {aac bca abc cab bca cab bcb}
          110  +  25 {bbb aca bca} {bcb acc ccc cac aca} {ccc acb acc cac cac bba bbc}
          111  +  26 {bab acc caa} {caa cab cac bac aca} {aba cac caa acc bac ccc aaa}
          112  +  27 {bca bca aaa} {ccb aca bca aaa baa} {bab acc aaa cca cba cca bac}
          113  +  28 {ccb cac cac} {bca abb bba bbc baa} {aca ccb aac cab ccc cab caa}
          114  +  29 {abc bca cab} {cac cbc cbb ccc bcc} {bcc aaa aaa acc aac cac aac}
          115  +  30 {aca acc acb} {aab aac cbb caa acb} {acb bbc bbc acc cbb bbc aac}
          116  +  31 {aba aca baa} {aca bcc cab bab acb} {bcc acb baa bcb bbc acc aba}
          117  +  32 {abb cbc caa} {cba abb bbb cbb aca} {bac aca caa cac caa ccb bbc}
          118  +  33 {bcc bcb bcb} {cca cab cbc abb bab} {caa bbc aac bbb cab cba aaa}
          119  +  34 {caa cab acc} {ccc ccc bcc acb bcc} {bac bba aca bcb bba bcb cac}
          120  +  35 {bac bcb cba} {bcc acb bbc cba bab} {abb cbb abc abc bac acc cbb}
          121  +  36 {cab bab ccb} {bca bba bab cca acc} {acc aab bcc bac acb cbb caa}
          122  +  37 {aca cbc cab} {bba aac aca aac aaa} {baa cbb cba aba cab bca bcb}
          123  +  38 {acb aab baa} {baa bab bca bbc bbb} {abc baa acc aba cab baa cac}
          124  +  39 {bcb aac cba} {bcb baa caa cac bbc} {cbc ccc bab ccb bbb caa aba}
          125  +  40 {cba ccb abc} {cbb caa cba aac bab} {cbb bbb bca bbb bac cac bca}
          126  +}
          127  +foreach {tn tbl} {
          128  +  1 { CREATE VIRTUAL TABLE t3 USING fts5(x, y, z, offsets=0) }
          129  +} {
          130  +  reset_db
          131  +  fts5_aux_test_functions db
          132  +  execsql $tbl
          133  +  foreach {id x y z} $data {
          134  +    execsql { INSERT INTO t3(rowid, x, y, z) VALUES($id, $x, $y, $z) }
          135  +  }
          136  +  foreach {tn2 expr} {
          137  +    1 aaa    2 ccc    3 bab    4 aac
          138  +    5 aa*    6 cc*    7 ba*    8 aa*
          139  +    9 a*     10 b*   11 c*
          140  +  } {
          141  +
          142  +    set res [list]
          143  +    foreach {id x y z} $data {
          144  +      if {[lsearch [concat $x $y $z] $expr]>=0} {
          145  +        lappend res $id
          146  +        set inst [list]
          147  +        if {[lsearch $x $expr]>=0} { lappend inst 0.0.-1 }
          148  +        if {[lsearch $y $expr]>=0} { lappend inst 0.1.-1 }
          149  +        if {[lsearch $z $expr]>=0} { lappend inst 0.2.-1 }
          150  +        lappend res $inst
          151  +      }
          152  +    }
          153  +
          154  +    do_execsql_test 3.$tn.$tn2.1 {
          155  +      SELECT rowid, fts5_test_poslist(t3) FROM t3($expr)
          156  +    } $res
          157  +
          158  +    do_execsql_test 3.$tn.$tn2.2 {
          159  +      SELECT rowid, fts5_test_poslist2(t3) FROM t3($expr)
          160  +    } $res
          161  +  }
          162  +
    80    163   }
    81    164   
    82    165   finish_test
    83    166