/ Check-in [1cd15a17]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Allow the rank column to be remapped on a per-query basis by including a term similar to "rank match 'bm25(10,2)'" in a where clause.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts5
Files: files | file ages | folders
SHA1: 1cd15a1759004d5d321056905dbb6acff20dc7d9
User & Date: dan 2015-01-02 14:55:22
Context
2015-01-03
20:44
Add support for external content tables to fts5. check-in: 17ef5b59 user: dan tags: fts5
2015-01-02
14:55
Allow the rank column to be remapped on a per-query basis by including a term similar to "rank match 'bm25(10,2)'" in a where clause. check-in: 1cd15a17 user: dan tags: fts5
2015-01-01
18:03
Merge latest trunk changes with this branch. check-in: 4b365167 user: dan tags: fts5
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5.c.

   156    156     Fts5Expr *pExpr;                /* Expression for MATCH queries */
   157    157     Fts5Sorter *pSorter;            /* Sorter for "ORDER BY rank" queries */
   158    158     int csrflags;                   /* Mask of cursor flags (see below) */
   159    159     Fts5Cursor *pNext;              /* Next cursor in Fts5Cursor.pCsr list */
   160    160     char *zSpecial;                 /* Result of special query */
   161    161   
   162    162     /* "rank" function. Populated on demand from vtab.xColumn(). */
          163  +  char *zRank;                    /* Custom rank function */
          164  +  char *zRankArgs;                /* Custom rank function args */
   163    165     Fts5Auxiliary *pRank;           /* Rank callback (or NULL) */
   164    166     int nRankArg;                   /* Number of trailing arguments for rank() */
   165    167     sqlite3_value **apRankArg;      /* Array of trailing arguments */
   166    168     sqlite3_stmt *pRankArgStmt;     /* Origin of objects in apRankArg[] */
   167    169   
   168    170     /* Variables used by auxiliary functions */
   169    171     i64 iCsrId;                     /* Cursor id */
................................................................................
   177    179   
   178    180   /*
   179    181   ** Values for Fts5Cursor.csrflags
   180    182   */
   181    183   #define FTS5CSR_REQUIRE_CONTENT   0x01
   182    184   #define FTS5CSR_REQUIRE_DOCSIZE   0x02
   183    185   #define FTS5CSR_EOF               0x04
          186  +#define FTS5CSR_FREE_ZRANK        0x08
   184    187   
   185    188   /*
   186    189   ** Macros to Set(), Clear() and Test() cursor flags.
   187    190   */
   188    191   #define CsrFlagSet(pCsr, flag)   ((pCsr)->csrflags |= (flag))
   189    192   #define CsrFlagClear(pCsr, flag) ((pCsr)->csrflags &= ~(flag))
   190    193   #define CsrFlagTest(pCsr, flag)  ((pCsr)->csrflags & (flag))
................................................................................
   414    417   **   3. A full-table scan.
   415    418   */
   416    419   static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
   417    420     Fts5Table *pTab = (Fts5Table*)pVTab;
   418    421     Fts5Config *pConfig = pTab->pConfig;
   419    422     int iCons;
   420    423     int ePlan = FTS5_PLAN_SCAN;
          424  +  int iRankMatch;
   421    425   
   422    426     iCons = fts5FindConstraint(pInfo,SQLITE_INDEX_CONSTRAINT_MATCH,pConfig->nCol);
   423    427     if( iCons>=0 ){
   424    428       ePlan = FTS5_PLAN_MATCH;
   425    429       pInfo->estimatedCost = 1.0;
   426    430     }else{
   427    431       iCons = fts5FindConstraint(pInfo, SQLITE_INDEX_CONSTRAINT_EQ, -1);
................................................................................
   449    453         ePlan = FTS5_PLAN_SORTED_MATCH;
   450    454       }
   451    455   
   452    456       if( pInfo->orderByConsumed ){
   453    457         ePlan |= pInfo->aOrderBy[0].desc ? FTS5_ORDER_DESC : FTS5_ORDER_ASC;
   454    458       }
   455    459     }
          460  +
          461  +  iRankMatch = fts5FindConstraint(
          462  +      pInfo, SQLITE_INDEX_CONSTRAINT_MATCH, pConfig->nCol+1
          463  +  );
          464  +  if( iRankMatch>=0 ){
          465  +    pInfo->aConstraintUsage[iRankMatch].argvIndex = 1 + (iCons>=0);
          466  +    pInfo->aConstraintUsage[iRankMatch].omit = 1;
          467  +  }
   456    468      
   457    469     pInfo->idxNum = ePlan;
   458    470     return SQLITE_OK;
   459    471   }
   460    472   
   461    473   /*
   462    474   ** Implementation of xOpen method.
................................................................................
   539    551     for(pp=&pTab->pGlobal->pCsr; (*pp)!=pCsr; pp=&(*pp)->pNext);
   540    552     *pp = pCsr->pNext;
   541    553   
   542    554     sqlite3_finalize(pCsr->pRankArgStmt);
   543    555     sqlite3_free(pCsr->apRankArg);
   544    556   
   545    557     sqlite3_free(pCsr->zSpecial);
          558  +  if( CsrFlagTest(pCsr, FTS5CSR_FREE_ZRANK) ){
          559  +    sqlite3_free(pCsr->zRank);
          560  +    sqlite3_free(pCsr->zRankArgs);
          561  +  }
   546    562     sqlite3_free(pCsr);
   547    563     return SQLITE_OK;
   548    564   }
   549    565   
   550    566   static int fts5SorterNext(Fts5Cursor *pCsr){
   551    567     Fts5Sorter *pSorter = pCsr->pSorter;
   552    568     int rc;
................................................................................
   632    648   static int fts5CursorFirstSorted(Fts5Table *pTab, Fts5Cursor *pCsr, int bAsc){
   633    649     Fts5Config *pConfig = pTab->pConfig;
   634    650     Fts5Sorter *pSorter;
   635    651     int nPhrase;
   636    652     int nByte;
   637    653     int rc = SQLITE_OK;
   638    654     char *zSql;
   639         -  const char *zRank = pConfig->zRank ? pConfig->zRank : FTS5_DEFAULT_RANK;
          655  +  const char *zRank = pCsr->zRank;
          656  +  const char *zRankArgs = pCsr->zRankArgs;
   640    657     
   641    658     nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
   642    659     nByte = sizeof(Fts5Sorter) + sizeof(int) * nPhrase;
   643    660     pSorter = (Fts5Sorter*)sqlite3_malloc(nByte);
   644    661     if( pSorter==0 ) return SQLITE_NOMEM;
   645    662     memset(pSorter, 0, nByte);
   646    663     pSorter->nIdx = nPhrase;
................................................................................
   650    667     ** is not possible as SQLite reference counts the virtual table objects.
   651    668     ** And since the statement required here reads from this very virtual 
   652    669     ** table, saving it creates a circular reference.
   653    670     **
   654    671     ** If SQLite a built-in statement cache, this wouldn't be a problem. */
   655    672     zSql = sqlite3_mprintf("SELECT rowid, rank FROM %Q.%Q ORDER BY %s(%s%s%s) %s",
   656    673         pConfig->zDb, pConfig->zName, zRank, pConfig->zName,
   657         -      (pConfig->zRankArgs ? ", " : ""),
   658         -      (pConfig->zRankArgs ? pConfig->zRankArgs : ""),
          674  +      (zRankArgs ? ", " : ""),
          675  +      (zRankArgs ? zRankArgs : ""),
   659    676         bAsc ? "ASC" : "DESC"
   660    677     );
   661    678     if( zSql==0 ){
   662    679       rc = SQLITE_NOMEM;
   663    680     }else{
   664    681       rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &pSorter->pStmt, 0);
   665    682       sqlite3_free(zSql);
................................................................................
   743    760     return 0;
   744    761   }
   745    762   
   746    763   
   747    764   static int fts5FindRankFunction(Fts5Cursor *pCsr){
   748    765     Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
   749    766     Fts5Config *pConfig = pTab->pConfig;
   750         -  const char *zRank = pConfig->zRank;
   751    767     int rc = SQLITE_OK;
   752    768     Fts5Auxiliary *pAux;
          769  +  const char *zRank = pCsr->zRank;
          770  +  const char *zRankArgs = pCsr->zRankArgs;
   753    771   
   754         -  if( zRank==0 ) zRank = FTS5_DEFAULT_RANK;
   755         -
   756         -  if( pTab->pConfig->zRankArgs ){
   757         -    char *zSql = sqlite3_mprintf("SELECT %s", pTab->pConfig->zRankArgs);
          772  +  if( zRankArgs ){
          773  +    char *zSql = sqlite3_mprintf("SELECT %s", zRankArgs);
   758    774       if( zSql==0 ){
   759    775         rc = SQLITE_NOMEM;
   760    776       }else{
   761    777         sqlite3_stmt *pStmt = 0;
   762    778         rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &pStmt, 0);
   763    779         sqlite3_free(zSql);
   764    780         assert( rc==SQLITE_OK || pCsr->pRankArgStmt==0 );
................................................................................
   792    808       }
   793    809     }
   794    810   
   795    811     pCsr->pRank = pAux;
   796    812     return rc;
   797    813   }
   798    814   
          815  +
          816  +static int fts5CursorParseRank(
          817  +  Fts5Config *pConfig,
          818  +  Fts5Cursor *pCsr, 
          819  +  sqlite3_value *pRank
          820  +){
          821  +  int rc = SQLITE_OK;
          822  +  if( pRank ){
          823  +    const char *z = (const char*)sqlite3_value_text(pRank);
          824  +    char *zRank = 0;
          825  +    char *zRankArgs = 0;
          826  +
          827  +    rc = sqlite3Fts5ConfigParseRank(z, &zRank, &zRankArgs);
          828  +    if( rc==SQLITE_OK ){
          829  +      pCsr->zRank = zRank;
          830  +      pCsr->zRankArgs = zRankArgs;
          831  +      CsrFlagSet(pCsr, FTS5CSR_FREE_ZRANK);
          832  +    }else if( rc==SQLITE_ERROR ){
          833  +      pCsr->base.pVtab->zErrMsg = sqlite3_mprintf(
          834  +          "parse error in rank function: %s", z
          835  +      );
          836  +    }
          837  +  }else{
          838  +    if( pConfig->zRank ){
          839  +      pCsr->zRank = (char*)pConfig->zRank;
          840  +      pCsr->zRankArgs = (char*)pConfig->zRankArgs;
          841  +    }else{
          842  +      pCsr->zRank = (char*)FTS5_DEFAULT_RANK;
          843  +      pCsr->zRankArgs = 0;
          844  +    }
          845  +  }
          846  +  return rc;
          847  +}
          848  +
   799    849   /*
   800    850   ** This is the xFilter interface for the virtual table.  See
   801    851   ** the virtual table xFilter method documentation for additional
   802    852   ** information.
          853  +** 
          854  +** There are three possible query strategies:
          855  +**
          856  +**   1. Full-text search using a MATCH operator.
          857  +**   2. A by-rowid lookup.
          858  +**   3. A full-table scan.
   803    859   */
   804    860   static int fts5FilterMethod(
   805    861     sqlite3_vtab_cursor *pCursor,   /* The cursor used for this query */
   806    862     int idxNum,                     /* Strategy index */
   807    863     const char *idxStr,             /* Unused */
   808    864     int nVal,                       /* Number of elements in apVal */
   809    865     sqlite3_value **apVal           /* Arguments for the indexing scheme */
   810    866   ){
   811    867     Fts5Table *pTab = (Fts5Table*)(pCursor->pVtab);
   812    868     Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
   813    869     int bAsc = ((idxNum & FTS5_ORDER_ASC) ? 1 : 0);
   814    870     int rc = SQLITE_OK;
   815    871   
          872  +  assert( nVal<=2 );
   816    873     assert( pCsr->pStmt==0 );
   817    874     assert( pCsr->pExpr==0 );
   818    875     assert( pCsr->csrflags==0 );
   819    876     assert( pCsr->pRank==0 );
          877  +  assert( pCsr->zRank==0 );
          878  +  assert( pCsr->zRankArgs==0 );
   820    879   
   821    880     if( pTab->pSortCsr ){
   822    881       /* If pSortCsr is non-NULL, then this call is being made as part of 
   823    882       ** processing for a "... MATCH <expr> ORDER BY rank" query (ePlan is
   824    883       ** set to FTS5_PLAN_SORTED_MATCH). pSortCsr is the cursor that will
   825    884       ** return results to the user for this query. The current cursor 
   826    885       ** (pCursor) is used to execute the query issued by function 
................................................................................
   831    890       rc = fts5CursorFirst(pTab, pCsr, bAsc);
   832    891     }else{
   833    892       int ePlan = FTS5_PLAN(idxNum);
   834    893       pCsr->idxNum = idxNum;
   835    894       if( ePlan==FTS5_PLAN_MATCH || ePlan==FTS5_PLAN_SORTED_MATCH ){
   836    895         const char *zExpr = (const char*)sqlite3_value_text(apVal[0]);
   837    896   
   838         -      if( zExpr[0]=='*' ){
   839         -        /* The user has issued a query of the form "MATCH '*...'". This
   840         -        ** indicates that the MATCH expression is not a full text query,
   841         -        ** but a request for an internal parameter.  */
   842         -        rc = fts5SpecialMatch(pTab, pCsr, &zExpr[1]);
   843         -      }else{
   844         -        char **pzErr = &pTab->base.zErrMsg;
   845         -        rc = sqlite3Fts5ExprNew(pTab->pConfig, zExpr, &pCsr->pExpr, pzErr);
   846         -        if( rc==SQLITE_OK ){
   847         -          if( ePlan==FTS5_PLAN_MATCH ){
   848         -            rc = fts5CursorFirst(pTab, pCsr, bAsc);
   849         -          }else{
   850         -            rc = fts5CursorFirstSorted(pTab, pCsr, bAsc);
          897  +      rc = fts5CursorParseRank(pTab->pConfig, pCsr, (nVal==2 ? apVal[1] : 0));
          898  +      if( rc==SQLITE_OK ){
          899  +        if( zExpr[0]=='*' ){
          900  +          /* The user has issued a query of the form "MATCH '*...'". This
          901  +          ** indicates that the MATCH expression is not a full text query,
          902  +          ** but a request for an internal parameter.  */
          903  +          rc = fts5SpecialMatch(pTab, pCsr, &zExpr[1]);
          904  +        }else{
          905  +          char **pzErr = &pTab->base.zErrMsg;
          906  +          rc = sqlite3Fts5ExprNew(pTab->pConfig, zExpr, &pCsr->pExpr, pzErr);
          907  +          if( rc==SQLITE_OK ){
          908  +            if( ePlan==FTS5_PLAN_MATCH ){
          909  +              rc = fts5CursorFirst(pTab, pCsr, bAsc);
          910  +            }else{
          911  +              rc = fts5CursorFirstSorted(pTab, pCsr, bAsc);
          912  +            }
   851    913             }
   852    914           }
   853    915         }
   854    916       }else{
   855    917         /* This is either a full-table scan (ePlan==FTS5_PLAN_SCAN) or a lookup
   856    918         ** by rowid (ePlan==FTS5_PLAN_ROWID).  */
   857    919         int eStmt = fts5StmtType(idxNum);

Changes to ext/fts5/fts5Int.h.

   104    104   void sqlite3Fts5Dequote(char *z);
   105    105   
   106    106   /* Load the contents of the %_config table */
   107    107   int sqlite3Fts5ConfigLoad(Fts5Config*, int);
   108    108   
   109    109   /* Set the value of a single config attribute */
   110    110   int sqlite3Fts5ConfigSetValue(Fts5Config*, const char*, sqlite3_value*, int*);
          111  +
          112  +int sqlite3Fts5ConfigParseRank(const char*, char**, char**);
   111    113   
   112    114   /*
   113    115   ** End of interface to code in fts5_config.c.
   114    116   **************************************************************************/
   115    117   
   116    118   /**************************************************************************
   117    119   ** Interface to code in fts5_buffer.c.

Changes to ext/fts5/fts5_config.c.

   548    548   ** this is:
   549    549   **
   550    550   **   + Bareword (function name)
   551    551   **   + Open parenthesis - "("
   552    552   **   + Zero or more SQL literals in a comma separated list
   553    553   **   + Close parenthesis - ")"
   554    554   */
   555         -static int fts5ConfigParseRank(
          555  +int sqlite3Fts5ConfigParseRank(
   556    556     const char *zIn,                /* Input string */
   557    557     char **pzRank,                  /* OUT: Rank function name */
   558    558     char **pzRankArgs               /* OUT: Rank function arguments */
   559    559   ){
   560    560     const char *p = zIn;
   561    561     const char *pRank;
   562    562     char *zRank = 0;
................................................................................
   643    643       }
   644    644     }
   645    645   
   646    646     else if( 0==sqlite3_stricmp(zKey, "rank") ){
   647    647       const char *zIn = (const char*)sqlite3_value_text(pVal);
   648    648       char *zRank;
   649    649       char *zRankArgs;
   650         -    rc = fts5ConfigParseRank(zIn, &zRank, &zRankArgs);
          650  +    rc = sqlite3Fts5ConfigParseRank(zIn, &zRank, &zRankArgs);
   651    651       if( rc==SQLITE_OK ){
   652    652         sqlite3_free(pConfig->zRank);
   653    653         sqlite3_free(pConfig->zRankArgs);
   654    654         pConfig->zRank = zRank;
   655    655         pConfig->zRankArgs = zRankArgs;
   656    656       }else if( rc==SQLITE_ERROR ){
   657    657         rc = SQLITE_OK;

Changes to ext/fts5/test/fts5al.test.

   174    174     SELECT rowid, firstinst(t2) FROM t2 WHERE t2 MATCH 'a' ORDER BY rowid ASC
   175    175   } {
   176    176     1 0 2 4 3 6   5  103
   177    177     6 9 7 0 9 102 10 8
   178    178   }
   179    179   
   180    180   do_execsql_test 4.1.2 {
          181  +  SELECT rowid, rank FROM t2 
          182  +  WHERE t2 MATCH 'a' AND rank MATCH 'firstinst()' 
          183  +  ORDER BY rowid ASC
          184  +} {
          185  +  1 0 2 4 3 6   5  103
          186  +  6 9 7 0 9 102 10 8
          187  +}
          188  +
          189  +do_execsql_test 4.1.3 {
          190  +  SELECT rowid, rank FROM t2 
          191  +  WHERE t2 MATCH 'a' AND rank MATCH 'firstinst()'
          192  +  ORDER BY rank DESC
          193  +} {
          194  +  5 103  9 102  6 9  10 8  3 6  2 4  7 0  1 0 
          195  +}
          196  +
          197  +do_execsql_test 4.1.4 {
   181    198     INSERT INTO t2(t2, rank) VALUES('rank', 'firstinst()');
   182    199     SELECT rowid, rank FROM t2 WHERE t2 MATCH 'a' ORDER BY rowid ASC
   183    200   } {
   184    201     1 0 2 4 3 6   5  103
   185    202     6 9 7 0 9 102 10 8
   186    203   }
   187    204   
   188         -do_execsql_test 4.1.3 {
          205  +do_execsql_test 4.1.5 {
   189    206     SELECT rowid, rank FROM t2 WHERE t2 MATCH 'a' ORDER BY rank DESC
   190    207   } {
   191    208     5 103  9 102  6 9  10 8  3 6  2 4  7 0  1 0 
   192    209   }
   193    210   
   194         -do_execsql_test 4.1.4 {
          211  +do_execsql_test 4.1.6 {
   195    212     INSERT INTO t2(t2, rank) VALUES('rank', 'firstinst (    ) ');
   196    213     SELECT rowid, rank FROM t2 WHERE t2 MATCH 'a' ORDER BY rank DESC
   197    214   } {
   198    215     5 103  9 102  6 9  10 8  3 6  2 4  7 0  1 0 
   199    216   }
   200    217   
   201    218   proc rowidplus {cmd ival} { 
................................................................................
   212    229   do_execsql_test 4.2.2 {
   213    230     INSERT INTO t2(t2, rank) VALUES('rank', 'rowidplus(111) ');
   214    231     SELECT rowid, rank FROM t2 WHERE t2 MATCH 'o + q + g'
   215    232   } {
   216    233     10 121
   217    234   }
   218    235   
          236  +do_execsql_test 4.2.3 {
          237  +  SELECT rowid, rank FROM t2 
          238  +  WHERE t2 MATCH 'o + q + g' AND rank MATCH 'rowidplus(112)'
          239  +} {
          240  +  10 122
          241  +}
          242  +
          243  +proc rowidmod {cmd imod} { 
          244  +  expr [$cmd xRowid] % $imod
          245  +}
          246  +sqlite3_fts5_create_function db rowidmod rowidmod
          247  +do_execsql_test 4.3.1 {
          248  +  CREATE VIRTUAL TABLE t3 USING fts5(x);
          249  +  INSERT INTO t3 VALUES('a one');
          250  +  INSERT INTO t3 VALUES('a two');
          251  +  INSERT INTO t3 VALUES('a three');
          252  +  INSERT INTO t3 VALUES('a four');
          253  +  INSERT INTO t3 VALUES('a five');
          254  +  INSERT INTO t3(t3, rank) VALUES('rank', 'bm25()');
          255  +}
          256  +breakpoint
          257  +
          258  +do_execsql_test 4.3.2 {
          259  +  SELECT * FROM t3
          260  +  WHERE t3 MATCH 'a' AND rank MATCH 'rowidmod(4)' 
          261  +  ORDER BY rank ASC
          262  +} {
          263  +  {a four} {a five} {a one} {a two} {a three}
          264  +}
          265  +do_execsql_test 4.3.3 {
          266  +  SELECT *, rank FROM t3
          267  +  WHERE t3 MATCH 'a' AND rank MATCH 'rowidmod(3)' 
          268  +  ORDER BY rank ASC
          269  +} {
          270  +  {a three} 0 {a four} 1 {a one} 1 {a five} 2 {a two} 2
          271  +}
   219    272   
   220    273   
   221    274   finish_test
   222    275