SQLite4
Check-in [f3ac136843]
Not logged in

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Allow an fts5 tokenizer to split a single document into multiple streams (i.e. sub-fields within a single column value). Modify the matchinfo APIs so that a ranking function may handle streams and/or columns separately or otherwise.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | matchinfo
Files: files | file ages | folders
SHA1: f3ac136843205f618826cb50635631dbf238e2bd
User & Date: dan 2013-01-04 18:37:37
Context
2013-01-07
19:52
Add an implementation of snippet() and its associated mi apis to fts5. check-in: 8d94102cd3 user: dan tags: matchinfo
2013-01-04
18:37
Allow an fts5 tokenizer to split a single document into multiple streams (i.e. sub-fields within a single column value). Modify the matchinfo APIs so that a ranking function may handle streams and/or columns separately or otherwise. check-in: f3ac136843 user: dan tags: matchinfo
2013-01-03
20:35
Add comment describing format of row and global size records. check-in: 7cfa40b5c1 user: dan tags: matchinfo
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/fts5.c.

    12     12   */
    13     13   
    14     14   #include "sqliteInt.h"
    15     15   #include "vdbeInt.h"
    16     16   
    17     17   /* 
    18     18   ** Stream numbers must be lower than this.
           19  +**
           20  +** For optimization purposes, it is assumed that a given tokenizer uses
           21  +** a set of contiguous stream numbers starting with 0. And that most
           22  +** tokens belong to stream 0.
           23  +**
           24  +** The hard limit is 63 (due to the format of "row size" records).
    19     25   */
    20         -#define SQLITE4_FTS5_NSTREAM 60
           26  +#define SQLITE4_FTS5_NSTREAM 32
    21     27   
    22     28   /*
    23     29   ** Records stored within the index:
    24     30   **
    25     31   ** Row size record:
    26     32   **   There is one "row size" record in the index for each row in the
    27     33   **   indexed table. The "row size" record contains the number of tokens
................................................................................
    52     58   **
    53     59   **   The data for this record is a series of varint values. The first 
    54     60   **   varint is the total number of rows in the table. The subsequent
    55     61   **   varints make up a "row size" record containing the total number of
    56     62   **   tokens for each S/C combination in all rows of the table.
    57     63   **
    58     64   ** FTS index records:
    59         -**
    60     65   **   The FTS index records implement the following mapping:
    61     66   **
    62     67   **       (token, document-pk) -> (list of instances)
           68  +**
           69  +**   The key for each index record is in the same format as the keys for
           70  +**   regular text indexes. An 0x24 byte, followed by the utf-8 representation
           71  +**   of the token, followed by 0x00, followed by the PK blob for the table
           72  +**   row.
           73  +**
           74  +**   TODO: Describe value format.
    63     75   */
    64     76   
    65     77   /*
    66     78   ** Default distance value for NEAR operators.
    67     79   */
    68     80   #define FTS5_DEFAULT_NEAR 10
    69     81   
................................................................................
   133    145   typedef struct Fts5Expr Fts5Expr;
   134    146   typedef struct Fts5ExprNode Fts5ExprNode;
   135    147   typedef struct Fts5List Fts5List;
   136    148   typedef struct Fts5Parser Fts5Parser;
   137    149   typedef struct Fts5ParserToken Fts5ParserToken;
   138    150   typedef struct Fts5Phrase Fts5Phrase;
   139    151   typedef struct Fts5Prefix Fts5Prefix;
          152  +typedef struct Fts5Size Fts5Size;
   140    153   typedef struct Fts5Str Fts5Str;
   141    154   typedef struct Fts5Token Fts5Token;
   142    155   
   143    156   
   144    157   struct Fts5ParserToken {
   145    158     int eType;                      /* Token type */
   146    159     int n;                          /* Size of z[] in bytes */
................................................................................
   236    249     char *zExpr;                    /* Full text of MATCH expression */
   237    250     KVByteArray *aKey;              /* Buffer for primary key */
   238    251     int nKeyAlloc;                  /* Bytes allocated at aKey[] */
   239    252   
   240    253     KVCursor *pCsr;                 /* Cursor used to retrive values */
   241    254     Mem *aMem;                      /* Array of column values */
   242    255   
   243         -  /* Array of nPhrase*nCol integers. See sqlite4_mi_row_count() for details. */
          256  +  Fts5Size *pSz;                  /* Local size data */
          257  +  Fts5Size *pGlobal;              /* Global size data */
          258  +  i64 nGlobal;                    /* Total number of rows in table */
   244    259     int *anRow;
          260  +
          261  +#if 1
   245    262     i64 *aGlobal;
   246    263   
   247    264     /* Size of each column of current row (in tokens). */
   248    265     int bSzValid;
   249    266     int *aSz;
          267  +#endif
          268  +};
          269  +
          270  +/*
          271  +** A deserialized 'size record' (see above).
          272  +*/
          273  +struct Fts5Size {
          274  +  int nCol;                       /* Number of columns in indexed table */
          275  +  int nStream;                    /* Number of streams */
          276  +  i64 *aSz;                       /* Token count for each C/S */
   250    277   };
   251    278   
   252    279   /*
   253    280   ** This type is used when reading (decoding) an instance-list.
   254    281   */
   255    282   typedef struct InstanceList InstanceList;
   256    283   struct InstanceList {
   257    284     u8 *aList;
   258    285     int nList;
   259    286     int iList;
   260    287   
   261    288     /* The current entry */
   262    289     int iCol;
   263         -  int iWeight;
          290  +  int iStream;
   264    291     int iOff;
   265    292   };
   266    293   
   267    294   /*
   268    295   ** Return true for EOF, or false if the next entry is valid.
   269    296   */
   270    297   static int fts5InstanceListNext(InstanceList *p){
................................................................................
   275    302       u32 iVal;
   276    303       i += getVarint32(&p->aList[i], iVal);
   277    304       if( (iVal & 0x03)==0x01 ){
   278    305         p->iCol = (iVal>>2);
   279    306         p->iOff = 0;
   280    307       }
   281    308       else if( (iVal & 0x03)==0x03 ){
   282         -      p->iWeight = (iVal>>2);
          309  +      p->iStream = (iVal>>2);
   283    310       }
   284    311       else{
   285    312         p->iOff += (iVal>>1);
   286    313         bRet = 0;
   287    314       }
   288    315     }
   289    316     if( bRet ){
................................................................................
   297    324   static int fts5InstanceListEof(InstanceList *p){
   298    325     return (p->aList==0);
   299    326   }
   300    327   
   301    328   static void fts5InstanceListAppend(
   302    329     InstanceList *p,                /* Instance list to append to */
   303    330     int iCol,                       /* Column of new entry */
   304         -  int iWeight,                    /* Weight of new entry */
          331  +  int iStream,                    /* Weight of new entry */
   305    332     int iOff                        /* Offset of new entry */
   306    333   ){
   307    334     assert( iCol>=p->iCol );
   308    335     assert( iCol>p->iCol || iOff>=p->iOff );
   309    336   
   310    337     if( iCol!=p->iCol ){
   311    338       p->iList += putVarint32(&p->aList[p->iList], (iCol<<2)|0x01);
   312    339       p->iCol = iCol;
   313    340       p->iOff = 0;
   314    341     }
   315    342   
   316         -  if( iWeight!=p->iWeight ){
   317         -    p->iList += putVarint32(&p->aList[p->iList], (iWeight<<2)|0x03);
   318         -    p->iWeight = iWeight;
          343  +  if( iStream!=p->iStream ){
          344  +    p->iList += putVarint32(&p->aList[p->iList], (iStream<<2)|0x03);
          345  +    p->iStream = iStream;
   319    346     }
   320    347   
   321    348     p->iList += putVarint32(&p->aList[p->iList], (iOff-p->iOff)<<1);
   322    349     p->iOff = iOff;
   323    350   
   324    351     assert( p->iList<=p->nList );
   325    352   }
................................................................................
   504    531   }
   505    532   
   506    533   /*
   507    534   ** Callback for fts5CountTokens().
   508    535   */
   509    536   static int fts5CountTokensCb(
   510    537     void *pCtx, 
   511         -  int iWeight, 
          538  +  int iStream, 
   512    539     int iOff, 
   513    540     const char *z, int n,
   514    541     int iSrc, int nSrc
   515    542   ){
   516    543     (*((int *)pCtx))++;
   517    544     return 0;
   518    545   }
................................................................................
   539    566   struct AppendTokensCtx {
   540    567     Fts5Parser *pParse;
   541    568     Fts5Str *pStr;
   542    569   };
   543    570   
   544    571   static int fts5AppendTokensCb(
   545    572     void *pCtx, 
   546         -  int iWeight, 
          573  +  int iStream, 
   547    574     int iOff, 
   548    575     const char *z, int n, 
   549    576     int iSrc, int nSrc
   550    577   ){
   551    578     struct AppendTokensCtx *p = (struct AppendTokensCtx *)pCtx;
   552    579     Fts5Parser *pParse = p->pParse;
   553    580     Fts5Token *pToken;
................................................................................
  1155   1182   ** sqlite4DbRealloc().
  1156   1183   */
  1157   1184   typedef struct TokenizeCtx TokenizeCtx;
  1158   1185   typedef struct TokenizeTerm TokenizeTerm;
  1159   1186   struct TokenizeCtx {
  1160   1187     int rc;
  1161   1188     int iCol;
         1189  +  int nCol;                       /* Number of columns in table */
  1162   1190     sqlite4 *db;
  1163   1191     int nMax;
  1164         -  int *aSz;                       /* Number of tokens in each column */
         1192  +  i64 *aSz;                       /* Number of tokens in each column/stream */
         1193  +  int nStream;                    /* Number of streams in document */
  1165   1194     Hash hash;
  1166   1195   };
  1167   1196   struct TokenizeTerm {
  1168         -  int iWeight;                    /* Weight of previous entry */
         1197  +  int iStream;                    /* Weight of previous entry */
  1169   1198     int iCol;                       /* Column containing previous entry */
  1170   1199     int iOff;                       /* Token offset of previous entry */
  1171   1200     int nToken;                     /* Size of token in bytes */
  1172   1201     int nData;                      /* Bytes of data in value */
  1173   1202     int nAlloc;                     /* Bytes of data allocated */
  1174   1203   };
  1175   1204   
................................................................................
  1191   1220     a = &(((unsigned char *)&pTerm[1])[pTerm->nToken+pTerm->nData]);
  1192   1221     pTerm->nData += putVarint32(a, iVal);
  1193   1222     return pTerm;
  1194   1223   }
  1195   1224   
  1196   1225   static int fts5TokenizeCb(
  1197   1226     void *pCtx, 
  1198         -  int iWeight, 
         1227  +  int iStream, 
  1199   1228     int iOff,
  1200   1229     const char *zToken, 
  1201   1230     int nToken, 
  1202   1231     int iSrc, 
  1203   1232     int nSrc
  1204   1233   ){
  1205   1234     TokenizeCtx *p = (TokenizeCtx *)pCtx;
         1235  +  sqlite4 *db = p->db;
  1206   1236     TokenizeTerm *pTerm = 0;
  1207   1237     TokenizeTerm *pOrig = 0;
  1208   1238   
         1239  +  /* TODO: Error here if iStream is out of range */
         1240  +
  1209   1241     if( nToken>p->nMax ) p->nMax = nToken;
  1210         -  p->aSz[p->iCol]++;
         1242  +
         1243  +  if( iStream>=p->nStream ){
         1244  +    int nOld = p->nStream;
         1245  +    int nNew = 4;
         1246  +    while( nNew<=iStream ) nNew = nNew*2;
         1247  +    p->aSz = (i64*)sqlite4DbReallocOrFree(db, p->aSz, nNew*p->nCol*sizeof(i64));
         1248  +    if( p->aSz==0 ) goto tokenize_cb_out;
         1249  +    memset(&p->aSz[p->nStream * p->nCol], 0, (nNew-nOld)*p->nCol*sizeof(i64));
         1250  +  }
         1251  +  p->aSz[iStream*p->nCol + p->iCol]++;
  1211   1252   
  1212   1253     pTerm = (TokenizeTerm *)sqlite4HashFind(&p->hash, zToken, nToken);
  1213   1254     if( pTerm==0 ){
  1214   1255       /* Size the initial allocation so that it fits in the lookaside buffer */
  1215   1256       int nAlloc = sizeof(TokenizeTerm) + nToken + 32;
  1216   1257   
  1217   1258       pTerm = sqlite4DbMallocZero(p->db, nAlloc);
................................................................................
  1226   1267           pTerm = 0;
  1227   1268         }
  1228   1269         if( pTerm==0 ) goto tokenize_cb_out;
  1229   1270       }
  1230   1271     }
  1231   1272     pOrig = pTerm;
  1232   1273   
  1233         -  if( iWeight!=pTerm->iWeight ){
  1234         -    pTerm = fts5TokenizeAppendInt(p, pTerm, (iWeight << 2) | 0x00000003);
         1274  +  if( iStream!=pTerm->iStream ){
         1275  +    pTerm = fts5TokenizeAppendInt(p, pTerm, (iStream << 2) | 0x00000003);
  1235   1276       if( !pTerm ) goto tokenize_cb_out;
  1236         -    pTerm->iWeight = iWeight;
         1277  +    pTerm->iStream = iStream;
  1237   1278     }
  1238   1279   
  1239   1280     if( pTerm && p->iCol!=pTerm->iCol ){
  1240   1281       pTerm = fts5TokenizeAppendInt(p, pTerm, (p->iCol << 2) | 0x00000001);
  1241   1282       if( !pTerm ) goto tokenize_cb_out;
  1242   1283       pTerm->iCol = p->iCol;
  1243   1284       pTerm->iOff = 0;
................................................................................
  1255   1296       p->rc = SQLITE4_NOMEM;
  1256   1297       return 1;
  1257   1298     }
  1258   1299   
  1259   1300     return 0;
  1260   1301   }
  1261   1302   
  1262         -static int fts5LoadGlobal(sqlite4 *db, Fts5Info *pInfo, i64 *aVal){
  1263         -  int rc;
  1264         -  int nVal = pInfo->nCol + 1;
  1265         -  u8 aKey[10];                    /* Global record key */
  1266         -  int nKey;                       /* Bytes in key aKey */
         1303  +static int fts5LoadSizeRecord(
         1304  +  sqlite4 *db,                    /* Database handle */
         1305  +  u8 *aKey, int nKey,             /* KVStore key */
         1306  +  int nMinStream,                 /* Space for at least this many streams */
         1307  +  Fts5Info *pInfo,                /* Info record */
         1308  +  i64 *pnRow,                     /* non-NULL when reading global record */
         1309  +  Fts5Size **ppSz                 /* OUT: Loaded size record */
         1310  +){
         1311  +  Fts5Size *pSz = 0;              /* Size object */
  1267   1312     KVCursor *pCsr = 0;             /* Cursor used to read global record */
  1268         -
  1269         -  nKey = putVarint32(aKey, pInfo->iRoot);
  1270         -  aKey[nKey++] = 0x00;
         1313  +  int rc;
  1271   1314   
  1272   1315     rc = sqlite4KVStoreOpenCursor(db->aDb[pInfo->iDb].pKV, &pCsr);
  1273   1316     if( rc==SQLITE4_OK ){
  1274   1317       rc = sqlite4KVCursorSeek(pCsr, aKey, nKey, 0);
  1275   1318       if( rc==SQLITE4_NOTFOUND ){
  1276         -      rc = SQLITE4_OK;
  1277         -      memset(aVal, 0, sizeof(i64)*nVal);
         1319  +      rc = SQLITE4_CORRUPT_BKPT;
  1278   1320       }else if( rc==SQLITE4_OK ){
  1279   1321         const u8 *aData = 0;
  1280   1322         int nData = 0;
  1281   1323         rc = sqlite4KVCursorData(pCsr, 0, -1, &aData, &nData);
  1282   1324         if( rc==SQLITE4_OK ){
  1283         -        int i;
  1284   1325           int iOff = 0;
  1285         -        for(i=0; i<nVal; i++){
  1286         -          iOff += sqlite4GetVarint(&aData[iOff], (u64 *)&aVal[i]);
         1326  +        int nStream = 0;
         1327  +        int nAlloc;
         1328  +
         1329  +        /* If pnRow is not NULL, then this is the global record. Read the
         1330  +        ** number of documents in the table from the start of the record. */
         1331  +        if( pnRow ){
         1332  +          iOff += sqlite4GetVarint(&aData[iOff], (u64 *)pnRow);
         1333  +        }
         1334  +        iOff += getVarint32(&aData[iOff], nStream);
         1335  +        nAlloc = (nStream < nMinStream ? nMinStream : nStream);
         1336  +
         1337  +        pSz = sqlite4DbMallocZero(db, 
         1338  +            sizeof(Fts5Size) + sizeof(i64) * pInfo->nCol * nAlloc
         1339  +        );
         1340  +        if( pSz==0 ){
         1341  +          rc = SQLITE4_NOMEM;
         1342  +        }else{
         1343  +          int iCol = 0;
         1344  +          pSz->nCol = pInfo->nCol;
         1345  +          pSz->nStream = nAlloc;
         1346  +          while( iOff<nData ){
         1347  +            int i;
         1348  +            i64 *aSz = &pSz->aSz[iCol*nAlloc];
         1349  +            for(i=0; i<nStream; i++){
         1350  +              iOff += sqlite4GetVarint(&aData[iOff], (u64*)&aSz[i]);
         1351  +            }
         1352  +            iCol++;
         1353  +          }
  1287   1354           }
  1288   1355         }
  1289   1356       }
  1290   1357       sqlite4KVCursorClose(pCsr);
  1291   1358     }
  1292   1359   
         1360  +  *ppSz = pSz;
  1293   1361     return rc;
  1294   1362   }
         1363  +
         1364  +static int fts5StoreSizeRecord(
         1365  +  KVStore *p,
         1366  +  u8 *aKey, int nKey,
         1367  +  Fts5Size *pSz, 
         1368  +  i64 nRow, 
         1369  +  u8 *a                           /* Space to serialize record in */
         1370  +){
         1371  +  int iOff = 0;
         1372  +  int iCol;
         1373  +
         1374  +  if( nRow>=0 ){
         1375  +    iOff += sqlite4PutVarint(&a[iOff], nRow);
         1376  +  }
         1377  +  iOff += sqlite4PutVarint(&a[iOff], pSz->nStream);
         1378  +  for(iCol=0; iCol<pSz->nCol; iCol++){
         1379  +    int i;
         1380  +    for(i=0; i<pSz->nStream; i++){
         1381  +      iOff += sqlite4PutVarint(&a[iOff], pSz->aSz[iCol*pSz->nCol+i]);
         1382  +    }
         1383  +  }
         1384  +
         1385  +  return sqlite4KVStoreReplace(p, aKey, nKey, a, iOff);
         1386  +}
  1295   1387   
  1296   1388   static int fts5CsrLoadGlobal(Fts5Cursor *pCsr){
  1297   1389     int rc = SQLITE4_OK;
  1298         -  if( pCsr->aGlobal==0 ){
  1299         -    int nByte = sizeof(i64) * (pCsr->pInfo->nCol + 1);
  1300         -    pCsr->aGlobal = (i64 *)sqlite4DbMallocZero(pCsr->db, nByte);
  1301         -    if( pCsr->aGlobal==0 ){
  1302         -      rc = SQLITE4_NOMEM;
  1303         -    }else{
  1304         -      rc = fts5LoadGlobal(pCsr->db, pCsr->pInfo, pCsr->aGlobal);
  1305         -    }
         1390  +  if( pCsr->pGlobal==0 ){
         1391  +    int nKey;
         1392  +    u8 aKey[10];
         1393  +    nKey = putVarint32(aKey, pCsr->pInfo->iRoot);
         1394  +    aKey[nKey++] = 0x00;
         1395  +    rc = fts5LoadSizeRecord(
         1396  +        pCsr->db, aKey, nKey, 0, pCsr->pInfo, &pCsr->nGlobal, &pCsr->pGlobal
         1397  +    );
  1306   1398     }
  1307   1399     return rc;
  1308   1400   }
  1309   1401   
  1310   1402   static int fts5CsrLoadSz(Fts5Cursor *pCsr){
  1311         -  sqlite4 *db = pCsr->db;
  1312         -  Fts5Info *pInfo = pCsr->pInfo;
  1313         -  int nVal = pInfo->nCol;
  1314         -  int rc;
  1315         -  u8 *aKey;
  1316         -  int nKey = 0;
  1317         -  int nPk = pCsr->pExpr->pRoot->nPk;
  1318         -  KVCursor *pKVCsr = 0;           /* Cursor used to read global record */
  1319         -
  1320         -  aKey = (u8 *)sqlite4DbMallocZero(db, 10 + nPk);
  1321         -  if( !aKey ) return SQLITE4_NOMEM;
  1322         -
  1323         -  nKey = putVarint32(aKey, pInfo->iRoot);
  1324         -  aKey[nKey++] = 0x00;
  1325         -  memcpy(&aKey[nKey], pCsr->pExpr->pRoot->aPk, nPk);
  1326         -  nKey += nPk;
  1327         -
  1328         -  rc = sqlite4KVStoreOpenCursor(db->aDb[pInfo->iDb].pKV, &pKVCsr);
  1329         -  if( rc==SQLITE4_OK ){
  1330         -    rc = sqlite4KVCursorSeek(pKVCsr, aKey, nKey, 0);
  1331         -    if( rc==SQLITE4_NOTFOUND ){
  1332         -      rc = SQLITE4_CORRUPT_BKPT;
  1333         -    }else if( rc==SQLITE4_OK ){
  1334         -      const u8 *aData = 0;
  1335         -      int nData = 0;
  1336         -      rc = sqlite4KVCursorData(pKVCsr, 0, -1, &aData, &nData);
  1337         -      if( rc==SQLITE4_OK ){
  1338         -        int i;
  1339         -        int iOff = 0;
  1340         -        for(i=0; i<nVal; i++){
  1341         -          iOff += getVarint32(&aData[iOff], pCsr->aSz[i]);
  1342         -        }
  1343         -      }
  1344         -      pCsr->bSzValid = 1;
  1345         -    }
  1346         -    sqlite4KVCursorClose(pKVCsr);
         1403  +  int rc = SQLITE4_OK;
         1404  +  if( pCsr->pSz==0 ){
         1405  +    sqlite4 *db = pCsr->db;
         1406  +    Fts5Info *pInfo = pCsr->pInfo;
         1407  +    u8 *aKey;
         1408  +    int nKey = 0;
         1409  +    int nPk = pCsr->pExpr->pRoot->nPk;
         1410  +
         1411  +    aKey = (u8 *)sqlite4DbMallocZero(db, 10 + nPk);
         1412  +    if( !aKey ) return SQLITE4_NOMEM;
         1413  +
         1414  +    nKey = putVarint32(aKey, pInfo->iRoot);
         1415  +    aKey[nKey++] = 0x00;
         1416  +    memcpy(&aKey[nKey], pCsr->pExpr->pRoot->aPk, nPk);
         1417  +    nKey += nPk;
         1418  +
         1419  +    rc = fts5LoadSizeRecord(pCsr->db, aKey, nKey, 0, pInfo, 0, &pCsr->pSz);
         1420  +    sqlite4DbFree(db, aKey);
  1347   1421     }
  1348   1422   
  1349   1423     return rc;
  1350   1424   }
  1351   1425   
  1352   1426   
  1353   1427   /*
................................................................................
  1361   1435     int bDel,                       /* True for a delete, false for insert */
  1362   1436     char **pzErr                    /* OUT: Error message */
  1363   1437   ){
  1364   1438     int i;
  1365   1439     int rc = SQLITE4_OK;
  1366   1440     KVStore *pStore;
  1367   1441     TokenizeCtx sCtx;
  1368         -  u8 *aKey = 0;
  1369         -  int nKey = 0;
  1370   1442     int nTnum = 0;
  1371   1443     u32 dummy = 0;
         1444  +
         1445  +  u8 *aSpace = 0;
         1446  +  int nSpace = 0;
  1372   1447   
  1373   1448     const u8 *pPK;
  1374   1449     int nPK;
  1375   1450     HashElem *pElem;
  1376   1451   
  1377   1452     pStore = db->aDb[pInfo->iDb].pKV;
  1378         -  sCtx.rc = SQLITE4_OK;
         1453  +
         1454  +  memset(&sCtx, 0, sizeof(sCtx));
  1379   1455     sCtx.db = db;
  1380         -  sCtx.nMax = 0;
         1456  +  sCtx.nCol = pInfo->nCol;
  1381   1457     sqlite4HashInit(db->pEnv, &sCtx.hash, 1);
  1382   1458   
  1383   1459     pPK = (const u8 *)sqlite4_value_blob(pKey);
  1384   1460     nPK = sqlite4_value_bytes(pKey);
  1385   1461     
  1386   1462     nTnum = getVarint32(pPK, dummy);
  1387   1463     nPK -= nTnum;
  1388   1464     pPK += nTnum;
  1389   1465   
  1390         -  sCtx.aSz = (int *)sqlite4DbMallocZero(db, pInfo->nCol * sizeof(int));
  1391         -  if( sCtx.aSz==0 ) rc = SQLITE4_NOMEM;
  1392         -
  1393   1466     for(i=0; rc==SQLITE4_OK && i<pInfo->nCol; i++){
  1394   1467       sqlite4_value *pArg = (sqlite4_value *)(&aArg[i]);
  1395   1468       if( pArg->flags & MEM_Str ){
  1396   1469         const char *zText;
  1397   1470         int nText;
  1398   1471   
  1399   1472         zText = (const char *)sqlite4_value_text(pArg);
................................................................................
  1401   1474         sCtx.iCol = i;
  1402   1475         rc = pInfo->pTokenizer->xTokenize(
  1403   1476             &sCtx, pInfo->p, zText, nText, fts5TokenizeCb
  1404   1477         );
  1405   1478       }
  1406   1479     }
  1407   1480   
  1408         -  nKey = sqlite4VarintLen(pInfo->iRoot)+2+sCtx.nMax+nPK + 10*(pInfo->nCol+1);
  1409         -  aKey = sqlite4DbMallocRaw(db, nKey);
  1410         -  if( aKey==0 ) rc = SQLITE4_NOMEM;
         1481  +  /* Allocate enough space to serialize all the stuff that needs to
         1482  +  ** be inserted into the database. Specifically:
         1483  +  **
         1484  +  **   * Space for index record keys,
         1485  +  **   * space for the size record and key for this document, and
         1486  +  **   * space for the updated global size record for the document set.
         1487  +  **
         1488  +  ** To make it easier, the below allocates enough space to simultaneously
         1489  +  ** store the largest index record key and the largest possible global
         1490  +  ** size record.
         1491  +  */
         1492  +  nSpace = (sqlite4VarintLen(pInfo->iRoot) + 2 + sCtx.nMax + nPK) + 
         1493  +           (9 * (2 + pInfo->nCol * sCtx.nStream));
         1494  +  aSpace = sqlite4DbMallocRaw(db, nSpace);
         1495  +  if( aSpace==0 ) rc = SQLITE4_NOMEM;
  1411   1496   
  1412   1497     for(pElem=sqliteHashFirst(&sCtx.hash); pElem; pElem=sqliteHashNext(pElem)){
  1413   1498       TokenizeTerm *pTerm = (TokenizeTerm *)sqliteHashData(pElem);
  1414   1499       if( rc==SQLITE4_OK ){
  1415   1500         int nToken = sqliteHashKeysize(pElem);
  1416   1501         char *zToken = (char *)sqliteHashKey(pElem);
         1502  +      u8 *aKey = aSpace;
         1503  +      int nKey;
  1417   1504   
  1418   1505         nKey = putVarint32(aKey, pInfo->iRoot);
  1419   1506         aKey[nKey++] = 0x24;
  1420   1507         memcpy(&aKey[nKey], zToken, nToken);
  1421   1508         nKey += nToken;
  1422   1509         aKey[nKey++] = 0x00;
  1423   1510         memcpy(&aKey[nKey], pPK, nPK);
................................................................................
  1432   1519           aData += pTerm->nToken;
  1433   1520           rc = sqlite4KVStoreReplace(pStore, aKey, nKey, aData, pTerm->nData);
  1434   1521         }
  1435   1522       }
  1436   1523       sqlite4DbFree(db, pTerm);
  1437   1524     }
  1438   1525   
  1439         -  /* Write the "sizes" record into the db */
         1526  +  /* Write the size record into the db */
  1440   1527     if( rc==SQLITE4_OK ){
         1528  +    u8 *aKey = aSpace;
         1529  +    int nKey;
         1530  +
  1441   1531       nKey = putVarint32(aKey, pInfo->iRoot);
  1442   1532       aKey[nKey++] = 0x00;
  1443   1533       memcpy(&aKey[nKey], pPK, nPK);
  1444   1534       nKey += nPK;
  1445   1535   
  1446         -    if( bDel ){
  1447         -      rc = sqlite4KVStoreReplace(pStore, aKey, nKey, 0, -1);
         1536  +    if( bDel==0 ){
         1537  +      Fts5Size sSz;
         1538  +      sSz.nCol = pInfo->nCol;
         1539  +      sSz.nStream = sCtx.nStream;
         1540  +      sSz.aSz = sCtx.aSz;
         1541  +      rc = fts5StoreSizeRecord(pStore, aKey, nKey, &sSz, -1, &aKey[nKey]);
  1448   1542       }else{
  1449         -      u8 *aData = &aKey[nKey];
  1450         -      int nData = 0;
  1451         -      for(i=0; i<pInfo->nCol; i++){
  1452         -        nData += putVarint32(&aData[nData], sCtx.aSz[i]);
  1453         -      }
  1454         -      rc = sqlite4KVStoreReplace(pStore, aKey, nKey, aData, nData);
         1543  +      rc = sqlite4KVStoreReplace(pStore, aKey, nKey, 0, -1);
  1455   1544       }
  1456   1545     }
  1457   1546   
  1458   1547     /* Update the global record */
  1459   1548     if( rc==SQLITE4_OK ){
  1460         -    i64 *aGlobal = (i64 *)aKey;
  1461         -    u8 *aData = (u8 *)&aGlobal[pInfo->nCol+1];
  1462         -    int nData = 0;
         1549  +    Fts5Size *pSz;                /* Deserialized global size record */
         1550  +    i64 nRow;                     /* Number of rows in indexed table */
         1551  +    u8 *aKey = aSpace;            /* Space to format the global record key */
         1552  +    int nKey;                     /* Size of global record key in bytes */
  1463   1553   
  1464         -    rc = fts5LoadGlobal(db, pInfo, aGlobal);
         1554  +    nKey = putVarint32(aKey, pInfo->iRoot);
         1555  +    aKey[nKey++] = 0x00;
         1556  +    rc = fts5LoadSizeRecord(db, aKey, nKey, sCtx.nStream, pInfo, &nRow, &pSz);
         1557  +    assert( rc!=SQLITE4_OK || pSz->nStream>=sCtx.nStream );
         1558  +
  1465   1559       if( rc==SQLITE4_OK ){
  1466         -      u8 aDbKey[10];
  1467         -      int nDbKey;
  1468         -      nDbKey = putVarint32(aDbKey, pInfo->iRoot);
  1469         -      aDbKey[nDbKey++] = 0x00;
         1560  +      int iCol;
         1561  +      for(iCol=0; iCol<pSz->nCol; iCol++){
         1562  +        int iStr;
         1563  +        i64 *aIn = &sCtx.aSz[iCol * sCtx.nStream];
         1564  +        i64 *aOut = &pSz->aSz[iCol * pSz->nStream];
         1565  +        for(iStr=0; iStr<sCtx.nStream; iStr++){
         1566  +          aOut[iStr] += (aIn[iStr] * (bDel?-1:1));
         1567  +        }
         1568  +      }
         1569  +      nRow += (bDel?-1:1);
         1570  +      rc = fts5StoreSizeRecord(pStore, aKey, nKey, pSz, nRow, &aKey[nKey]);
         1571  +    }
  1470   1572   
  1471         -      nData += sqlite4PutVarint(&aData[nData], aGlobal[0] + (bDel?-1:1));
  1472         -      for(i=0; i<pInfo->nCol; i++){
  1473         -        i64 iNew = aGlobal[i+1] + (i64)sCtx.aSz[i] * (bDel?-1:1);
  1474         -        nData += sqlite4PutVarint(&aData[nData], iNew);
  1475         -      }
  1476         -
  1477         -      rc = sqlite4KVStoreReplace(pStore, aDbKey, nDbKey, aData, nData);
  1478         -    }
         1573  +    sqlite4DbFree(db, pSz);
  1479   1574     }
  1480   1575     
  1481         -  sqlite4DbFree(db, aKey);
         1576  +  sqlite4DbFree(db, aSpace);
  1482   1577     sqlite4DbFree(db, sCtx.aSz);
  1483   1578     sqlite4HashClear(&sCtx.hash);
  1484   1579     return rc;
  1485   1580   }
  1486   1581   
  1487   1582   static Fts5Info *fts5InfoCreate(Parse *pParse, Index *pIdx, int bCol){
  1488   1583     sqlite4 *db = pParse->db;
................................................................................
  1602   1697   **   * the weight assigned to the instance,
  1603   1698   **   * the column number, and
  1604   1699   **   * the term offset.
  1605   1700   */
  1606   1701   static i64 fts5TermInstanceCksum(
  1607   1702     const u8 *aTerm, int nTerm,
  1608   1703     const u8 *aPk, int nPk,
  1609         -  int iWeight,
         1704  +  int iStream,
  1610   1705     int iCol,
  1611   1706     int iOff
  1612   1707   ){
  1613   1708     int i;
  1614   1709     i64 cksum = 0;
  1615   1710   
  1616   1711     /* Add the term to the checksum */
................................................................................
  1620   1715   
  1621   1716     /* Add the primary key blob to the checksum */
  1622   1717     for(i=0; i<nPk; i++){
  1623   1718       cksum += (cksum << 3) + aPk[i];
  1624   1719     }
  1625   1720   
  1626   1721     /* Add the weight, column number and offset (in that order) to the checksum */
  1627         -  cksum += (cksum << 3) + iWeight;
         1722  +  cksum += (cksum << 3) + iStream;
  1628   1723     cksum += (cksum << 3) + iCol;
  1629   1724     cksum += (cksum << 3) + iOff;
  1630   1725   
  1631   1726     return cksum;
  1632   1727   }
  1633   1728   
  1634   1729   
................................................................................
  1660   1755     nToken = sqlite4Strlen30((const char *)aToken);
  1661   1756     aPk = &aToken[nToken+1];
  1662   1757     nPk = (&aKey[nKey] - aPk);
  1663   1758   
  1664   1759     fts5InstanceListInit((u8 *)aVal, nVal, &sList);
  1665   1760     while( 0==fts5InstanceListNext(&sList) ){
  1666   1761       i64 v = fts5TermInstanceCksum(
  1667         -        aPk, nPk, aToken, nToken, sList.iWeight, sList.iCol, sList.iOff
         1762  +        aPk, nPk, aToken, nToken, sList.iStream, sList.iCol, sList.iOff
  1668   1763       );
  1669   1764       cksum = cksum ^ v;
  1670   1765     }
  1671   1766   
  1672   1767     *piCksum = cksum;
  1673   1768     return SQLITE4_OK;
  1674   1769   }
................................................................................
  1679   1774     int nPK;
  1680   1775     int iCol;
  1681   1776     i64 cksum;
  1682   1777   };
  1683   1778   
  1684   1779   static int fts5CksumCb(
  1685   1780     void *pCtx, 
  1686         -  int iWeight, 
         1781  +  int iStream, 
  1687   1782     int iOff,
  1688   1783     const char *zToken, 
  1689   1784     int nToken, 
  1690   1785     int iSrc, 
  1691   1786     int nSrc
  1692   1787   ){
  1693   1788     CksumCtx *p = (CksumCtx *)pCtx;
  1694   1789     i64 cksum;
  1695   1790   
  1696   1791     cksum = fts5TermInstanceCksum(p->pPK, p->nPK, 
  1697         -      (const u8 *)zToken, nToken, iWeight, p->iCol, iOff
         1792  +      (const u8 *)zToken, nToken, iStream, p->iCol, iOff
  1698   1793     );
  1699   1794   
  1700   1795     p->cksum = (p->cksum ^ cksum);
  1701   1796     return 0;
  1702   1797   }
  1703   1798   
  1704   1799   int sqlite4Fts5RowCksum(
................................................................................
  1863   1958         fts5InstanceListNext(&in2);
  1864   1959       }else if( in1.iCol<in2.iCol || (in1.iCol==in2.iCol && in1.iOff<in2.iOff) ){
  1865   1960         pAdv = &in1;
  1866   1961       }else{
  1867   1962         pAdv = &in2;
  1868   1963       }
  1869   1964   
  1870         -    fts5InstanceListAppend(&out, pAdv->iCol, pAdv->iWeight, pAdv->iOff);
         1965  +    fts5InstanceListAppend(&out, pAdv->iCol, pAdv->iStream, pAdv->iOff);
  1871   1966       fts5InstanceListNext(pAdv);
  1872   1967     }
  1873   1968   
  1874   1969     if( bFree ){
  1875   1970       sqlite4DbFree(db, p1->aData);
  1876   1971       sqlite4DbFree(db, p2->aData);
  1877   1972     }
................................................................................
  2174   2269     while( rc==SQLITE4_OK && bEof==0 ){
  2175   2270       for(i=1; i<pStr->nToken; i++){
  2176   2271         int bMatch = fts5TokenAdvanceToMatch(&aIn[i], &aIn[0], i, &bEof);
  2177   2272         if( bMatch==0 || bEof ) break;
  2178   2273       }
  2179   2274       if( i==pStr->nToken && (iCol<0 || aIn[0].iCol==iCol) ){
  2180   2275         /* Record a match here */
  2181         -      fts5InstanceListAppend(&out, aIn[0].iCol, aIn[0].iWeight, aIn[0].iOff);
         2276  +      fts5InstanceListAppend(&out, aIn[0].iCol, aIn[0].iStream, aIn[0].iOff);
  2182   2277       }
  2183   2278       bEof = fts5InstanceListNext(&aIn[0]);
  2184   2279     }
  2185   2280   
  2186   2281     pStr->nList = out.iList;
  2187   2282     sqlite4DbFree(db, aIn);
  2188   2283   
................................................................................
  2220   2315   
  2221   2316       while( bEof==0 ){
  2222   2317         if( fts5IsNear(&near, &in, nTrail) 
  2223   2318          || fts5IsNear(&in, &near, nLead)
  2224   2319         ){
  2225   2320           /* The current position is a match. Append an entry to the output
  2226   2321           ** and advance the input cursor. */
  2227         -        fts5InstanceListAppend(&out, in.iCol, in.iWeight, in.iOff);
         2322  +        fts5InstanceListAppend(&out, in.iCol, in.iStream, in.iOff);
  2228   2323           bEof = fts5InstanceListNext(&in);
  2229   2324         }else{
  2230   2325           if( near.iCol<in.iCol || (near.iCol==in.iCol && near.iOff<in.iOff) ){
  2231   2326             bEof = fts5InstanceListNext(&near);
  2232   2327           }else if( near.iCol==in.iCol && near.iOff==in.iOff ){
  2233   2328             bEof = fts5InstanceListNext(&in);
  2234   2329             if( fts5IsNear(&near, &in, nTrail) ){
  2235         -            fts5InstanceListAppend(&out, near.iCol, near.iWeight, near.iOff);
         2330  +            fts5InstanceListAppend(&out, near.iCol, near.iStream, near.iOff);
  2236   2331             }
  2237   2332           }else{
  2238   2333             bEof = fts5InstanceListNext(&in);
  2239   2334           }
  2240   2335         }
  2241   2336       }
  2242   2337   
................................................................................
  2422   2517     }
  2423   2518   
  2424   2519     assert( rc!=SQLITE4_NOTFOUND );
  2425   2520     return rc;
  2426   2521   }
  2427   2522   
  2428   2523   int sqlite4Fts5Next(Fts5Cursor *pCsr){
  2429         -  pCsr->bSzValid = 0;
         2524  +  sqlite4DbFree(pCsr->db, pCsr->pSz);
         2525  +  pCsr->pSz = 0;
  2430   2526     return fts5ExprAdvance(pCsr->db, pCsr->pExpr->pRoot, 0);
  2431   2527   }
  2432   2528   
  2433   2529   int sqlite4Fts5Open(
  2434   2530     sqlite4 *db,                    /* Database handle */
  2435   2531     Fts5Info *pInfo,                /* Index description */
  2436   2532     const char *zMatch,             /* Match expression */
................................................................................
  2504   2600     memcpy(&pCsr->aKey[i], aPk, nPk);
  2505   2601   
  2506   2602     *paKey = pCsr->aKey;
  2507   2603     *pnKey = nReq;
  2508   2604     return SQLITE4_OK;
  2509   2605   }
  2510   2606   
  2511         -int sqlite4_mi_column_count(sqlite4_context *pCtx, int *pnCol){
         2607  +int sqlite4_mi_column_count(sqlite4_context *pCtx, int *pn){
         2608  +  int rc = SQLITE4_OK;
         2609  +  if( pCtx->pFts ){
         2610  +    *pn = pCtx->pFts->pInfo->nCol;
         2611  +  }else{
         2612  +    rc = SQLITE4_MISUSE;
         2613  +  }
         2614  +  return rc;
         2615  +}
         2616  +
         2617  +int sqlite4_mi_phrase_count(sqlite4_context *pCtx, int *pn){
  2512   2618     int rc = SQLITE4_OK;
  2513   2619     if( pCtx->pFts ){
  2514         -    *pnCol = pCtx->pFts->pInfo->nCol;
         2620  +    *pn = pCtx->pFts->pExpr->nPhrase;
         2621  +  }else{
         2622  +    rc = SQLITE4_MISUSE;
         2623  +  }
         2624  +  return rc;
         2625  +}
         2626  +
         2627  +int sqlite4_mi_stream_count(sqlite4_context *pCtx, int *pn){
         2628  +  int rc = SQLITE4_OK;
         2629  +  Fts5Cursor *pCsr = pCtx->pFts;
         2630  +  if( pCsr ){
         2631  +    rc = fts5CsrLoadGlobal(pCtx->pFts);
         2632  +    if( rc==SQLITE4_OK ) *pn = pCsr->pGlobal->nStream;
         2633  +  }else{
         2634  +    rc = SQLITE4_MISUSE;
         2635  +  }
         2636  +  return rc;
         2637  +}
         2638  +
         2639  +static int fts5GetSize(Fts5Size *pSz, int iC, int iS){
         2640  +  int nToken = 0;
         2641  +  int i;
         2642  +
         2643  +  if( iC<0 && iS<0 ){
         2644  +    int nFin = pSz->nCol * pSz->nStream;
         2645  +    for(i=0; i<nFin; i++) nToken += pSz->aSz[i];
         2646  +  }else if( iC<0 ){
         2647  +    for(i=0; i<pSz->nCol; i++) nToken += pSz->aSz[i*pSz->nStream + iS];
         2648  +  }else if( iS<0 ){
         2649  +    for(i=0; i<pSz->nStream; i++) nToken += pSz->aSz[pSz->nStream*iC + iS];
         2650  +  }else if( iC<pSz->nCol && iS<pSz->nStream ){
         2651  +    nToken = pSz->aSz[iC * pSz->nStream + iS];
         2652  +  }
         2653  +
         2654  +  return nToken;
         2655  +}
         2656  +
         2657  +int sqlite4_mi_size(sqlite4_context *pCtx, int iC, int iS, int *pn){
         2658  +  int rc = SQLITE4_OK;
         2659  +  Fts5Cursor *pCsr = pCtx->pFts;
         2660  +
         2661  +  if( pCsr==0 ){
         2662  +    rc = SQLITE4_MISUSE;
         2663  +  }else{
         2664  +    rc = fts5CsrLoadSz(pCsr);
         2665  +    if( rc==SQLITE4_OK ){
         2666  +      *pn = fts5GetSize(pCsr->pSz, iC, iS);
         2667  +    }
         2668  +  }
         2669  +  return rc;
         2670  +}
         2671  +
         2672  +int sqlite4_mi_total_size(sqlite4_context *pCtx, int iC, int iS, int *pn){
         2673  +  int rc = SQLITE4_OK;
         2674  +  Fts5Cursor *pCsr = pCtx->pFts;
         2675  +
         2676  +  if( pCsr==0 ){
         2677  +    rc = SQLITE4_MISUSE;
  2515   2678     }else{
         2679  +    rc = fts5CsrLoadGlobal(pCsr);
         2680  +    if( rc==SQLITE4_OK ){
         2681  +      *pn = fts5GetSize(pCsr->pGlobal, iC, iS);
         2682  +    }
         2683  +  }
         2684  +  return rc;
         2685  +}
         2686  +
         2687  +int sqlite4_mi_total_rows(sqlite4_context *pCtx, int *pn){
         2688  +  int rc = SQLITE4_OK;
         2689  +  Fts5Cursor *pCsr = pCtx->pFts;
         2690  +  if( pCsr==0 ){
  2516   2691       rc = SQLITE4_MISUSE;
         2692  +  }else{
         2693  +    rc = fts5CsrLoadGlobal(pCsr);
         2694  +    if( rc==SQLITE4_OK ) *pn = pCsr->nGlobal;
  2517   2695     }
  2518   2696     return rc;
  2519   2697   }
  2520   2698   
  2521         -int sqlite4_mi_column_size(sqlite4_context *pCtx, int iCol, int *pnToken){
  2522         -  int rc = SQLITE4_OK;
  2523         -  Fts5Cursor *pCsr = pCtx->pFts;
  2524         -
  2525         -  if( pCsr==0 ){
  2526         -    rc = SQLITE4_MISUSE;
  2527         -  }else if( iCol>=pCsr->pInfo->nCol ){
  2528         -    rc = SQLITE4_ERROR;
  2529         -  }else{
  2530         -    if( pCsr->aSz==0 ){
  2531         -      pCsr->aSz = (int *)sqlite4DbMallocZero(
  2532         -          pCsr->db, sizeof(int)*pCsr->pInfo->nCol
  2533         -      );
  2534         -      if( pCsr->aSz==0 ) rc = SQLITE4_NOMEM;
  2535         -    }
  2536         -    if( rc==SQLITE4_OK && pCsr->bSzValid==0 ){
  2537         -      rc = fts5CsrLoadSz(pCsr);
  2538         -    }
  2539         -    if( rc==SQLITE4_OK ){
  2540         -      assert( pCsr->bSzValid );
  2541         -      if( iCol>=0 ){
  2542         -        *pnToken = pCsr->aSz[iCol];
  2543         -      }else{
  2544         -        int i;
  2545         -        int nToken = 0;
  2546         -        for(i=0; i<pCsr->pInfo->nCol; i++){
  2547         -          nToken += pCsr->aSz[i];
  2548         -        }
  2549         -        *pnToken = nToken;
  2550         -      }
  2551         -    }
  2552         -  }
  2553         -  return rc;
  2554         -}
  2555   2699   
  2556   2700   int sqlite4_mi_column_value(
  2557   2701     sqlite4_context *pCtx, 
  2558   2702     int iCol, 
  2559   2703     sqlite4_value **ppVal
  2560   2704   ){
  2561   2705     int rc = SQLITE4_OK;
  2562   2706     if( pCtx->pFts ){
  2563   2707     }else{
  2564         -    rc = SQLITE4_MISUSE;
  2565         -  }
  2566         -  return rc;
  2567         -}
  2568         -
  2569         -int sqlite4_mi_phrase_count(sqlite4_context *pCtx, int *pnPhrase){
  2570         -  int rc = SQLITE4_OK;
  2571         -  if( pCtx->pFts ){
  2572         -    *pnPhrase = pCtx->pFts->pExpr->nPhrase;
  2573         -  }else{
  2574   2708       rc = SQLITE4_MISUSE;
  2575   2709     }
  2576   2710     return rc;
  2577   2711   }
  2578   2712   
  2579   2713   static Fts5Str *fts5FindStr(Fts5ExprNode *p, int *piStr){
  2580   2714     Fts5Str *pRet = 0;
................................................................................
  2590   2724       if( pRet==0 ) pRet = fts5FindStr(p->pRight, piStr);
  2591   2725     }
  2592   2726     return pRet;
  2593   2727   }
  2594   2728   
  2595   2729   int sqlite4_mi_match_count(
  2596   2730     sqlite4_context *pCtx, 
  2597         -  int iCol,
         2731  +  int iC,
         2732  +  int iS,
  2598   2733     int iPhrase,
  2599   2734     int *pnMatch
  2600   2735   ){
  2601   2736     int rc = SQLITE4_OK;
  2602   2737     Fts5Cursor *pCsr = pCtx->pFts;
  2603   2738     if( pCsr ){
  2604   2739       int nMatch = 0;
  2605   2740       Fts5Str *pStr;
  2606         -    int iCopy = iCol;
         2741  +    int iCopy = iPhrase;
  2607   2742       InstanceList sList;
  2608   2743   
  2609   2744       pStr = fts5FindStr(pCsr->pExpr->pRoot, &iCopy);
  2610   2745       assert( pStr );
  2611   2746   
  2612   2747       fts5InstanceListInit(pStr->aList, pStr->nList, &sList);
  2613   2748       while( 0==fts5InstanceListNext(&sList) ){
  2614         -      if( iCol<0 || sList.iCol==iCol ) nMatch++;
         2749  +      if( (iC<0 || sList.iCol==iC) && (iS<0 || sList.iStream==iS) ) nMatch++;
  2615   2750       }
  2616   2751       *pnMatch = nMatch;
  2617   2752     }else{
  2618   2753       rc = SQLITE4_MISUSE;
  2619   2754     }
  2620   2755     return rc;
  2621   2756   }
................................................................................
  2637   2772     int *pnMatch,
  2638   2773     int *pnDoc,
  2639   2774     int *pnRelevant
  2640   2775   ){
  2641   2776     return SQLITE4_OK;
  2642   2777   }
  2643   2778   
  2644         -int sqlite4_mi_total_size(sqlite4_context *pCtx, int iCol, int *pnToken){
  2645         -  int rc = SQLITE4_OK;
  2646         -  if( pCtx->pFts ){
  2647         -    Fts5Cursor *pCsr = pCtx->pFts;
  2648         -    int nCol = pCsr->pInfo->nCol;
  2649         -
  2650         -    if( iCol>=nCol ){
  2651         -      rc = SQLITE4_ERROR;
  2652         -    }else{
  2653         -      rc = fts5CsrLoadGlobal(pCsr);
  2654         -      if( rc==SQLITE4_OK ){
  2655         -        if( iCol<0 ){
  2656         -          int i;
  2657         -          int nToken = 0;
  2658         -          for(i=0; i<nCol; i++){
  2659         -            nToken += pCsr->aGlobal[i+1];
  2660         -          }
  2661         -          *pnToken = nToken;
  2662         -        }else{
  2663         -          *pnToken = pCsr->aGlobal[iCol+1];
  2664         -        }
  2665         -      }
  2666         -    }
  2667         -  }else{
  2668         -    rc = SQLITE4_MISUSE;
  2669         -  }
  2670         -  return rc;
  2671         -}
  2672         -
  2673         -static void fts5StrLoadRowcounts(Fts5Str *pStr, int *anRow){
         2779  +static void fts5StrLoadRowcounts(Fts5Str *pStr, int nStream, int *anRow){
         2780  +  u32 mask = 0;
         2781  +  int iPrevCol = 0;
  2674   2782     InstanceList sList;
  2675   2783   
  2676   2784     fts5InstanceListInit(pStr->aList, pStr->nList, &sList);
  2677   2785     while( 0==fts5InstanceListNext(&sList) ){
  2678         -    anRow[sList.iCol]++;
         2786  +    if( sList.iCol!=iPrevCol ) mask = 0;
         2787  +    if( (mask & (1<<sList.iStream))==0 ){
         2788  +      anRow[sList.iCol * nStream + sList.iStream]++;
         2789  +      mask |= (1<<sList.iStream);
         2790  +      iPrevCol = sList.iCol;
         2791  +    }
  2679   2792     }
  2680   2793   }
  2681         -
  2682   2794   
  2683   2795   static int fts5ExprLoadRowcounts(
  2684   2796     sqlite4 *db, 
  2685   2797     Fts5Info *pInfo,
         2798  +  int nStream,
  2686   2799     Fts5ExprNode *pNode, 
  2687   2800     int **panRow
  2688   2801   ){
  2689   2802     int rc = SQLITE4_OK;
  2690   2803   
  2691   2804     if( pNode ){
  2692   2805       if( pNode->eType==TOKEN_PRIMITIVE ){
  2693   2806         int *anRow = *panRow;
  2694   2807         Fts5Phrase *pPhrase = pNode->pPhrase;
  2695   2808   
  2696   2809         rc = fts5ExprAdvance(db, pNode, 1);
  2697   2810         while( rc==SQLITE4_OK ){
         2811  +        int nIncr =  pInfo->nCol * nStream;      /* Values for each Fts5Str */
  2698   2812           int i;
  2699   2813           for(i=0; i<pPhrase->nStr; i++){
  2700         -          fts5StrLoadRowcounts(&pPhrase->aStr[i], &anRow[i*pInfo->nCol]);
         2814  +          fts5StrLoadRowcounts(&pPhrase->aStr[i], nStream, &anRow[i*nIncr]);
  2701   2815           }
  2702   2816           rc = fts5ExprAdvance(db, pNode, 0);
  2703   2817         }
  2704   2818   
  2705         -      *panRow = &anRow[pInfo->nCol * pPhrase->nStr];
         2819  +      *panRow = &anRow[pInfo->nCol * nStream * pPhrase->nStr];
  2706   2820       }
  2707   2821   
  2708   2822       if( rc==SQLITE4_OK ){
  2709         -      rc = fts5ExprLoadRowcounts(db, pInfo, pNode->pLeft, panRow);
         2823  +      rc = fts5ExprLoadRowcounts(db, pInfo, nStream, pNode->pLeft, panRow);
  2710   2824       }
  2711   2825       if( rc==SQLITE4_OK ){
  2712         -      rc = fts5ExprLoadRowcounts(db, pInfo, pNode->pLeft, panRow);
         2826  +      rc = fts5ExprLoadRowcounts(db, pInfo, nStream, pNode->pRight, panRow);
  2713   2827       }
  2714   2828     }
  2715   2829   
  2716   2830     return rc;
  2717   2831   }
  2718   2832   
  2719   2833   static int fts5CsrLoadRowcounts(Fts5Cursor *pCsr){
  2720   2834     int rc = SQLITE4_OK;
  2721   2835   
  2722   2836     if( pCsr->anRow==0 ){
         2837  +    int nStream = pCsr->pGlobal->nStream;
  2723   2838       sqlite4 *db = pCsr->db;
  2724   2839       Fts5Expr *pCopy;
  2725   2840       Fts5Expr *pExpr = pCsr->pExpr;
  2726   2841       Fts5Info *pInfo = pCsr->pInfo;
  2727   2842       int *anRow;
  2728   2843   
  2729   2844       pCsr->anRow = anRow = (int *)sqlite4DbMallocZero(db, 
  2730         -        pExpr->nPhrase * pInfo->nCol * sizeof(int)
         2845  +        pExpr->nPhrase * pInfo->nCol * pCsr->pGlobal->nStream * sizeof(int)
  2731   2846       );
  2732   2847       if( !anRow ) return SQLITE4_NOMEM;
  2733   2848   
  2734   2849       rc = fts5ParseExpression(db, pInfo->pTokenizer, pInfo->p, 
  2735   2850           pInfo->iRoot, pInfo->azCol, pInfo->nCol, pCsr->zExpr, &pCopy, 0
  2736   2851       );
  2737   2852       if( rc==SQLITE4_OK ){
  2738   2853         rc = fts5OpenExprCursors(db, pInfo, pExpr->pRoot);
  2739   2854       }
  2740         -
  2741   2855       if( rc==SQLITE4_OK ){
  2742         -      rc = fts5ExprLoadRowcounts(db, pInfo, pCopy->pRoot, &anRow);
         2856  +      rc = fts5ExprLoadRowcounts(db, pInfo, nStream, pCopy->pRoot, &anRow);
  2743   2857       }
  2744   2858   
  2745   2859       fts5ExpressionFree(db, pCopy);
  2746   2860     }
  2747   2861   
  2748   2862     return rc;
  2749   2863   }
  2750   2864   
  2751   2865   int sqlite4_mi_row_count(
  2752   2866     sqlite4_context *pCtx,          /* Context object passed to mi function */
  2753         -  int iCol,                       /* Specific column (or -1) */
  2754         -  int iPhrase,                    /* Specific phrase (or -1) */
  2755         -  int *pnRow                      /* Total number of rows */
         2867  +  int iC,                         /* Specific column (or -ve for all columns) */
         2868  +  int iS,                         /* Specific stream (or -ve for all streams) */
         2869  +  int iP,                         /* Specific phrase */
         2870  +  int *pn                         /* Total number of rows containing C/S/P */
  2756   2871   ){
  2757   2872     int rc = SQLITE4_OK;
  2758         -  if( pCtx->pFts ){
  2759         -    Fts5Cursor *pCsr = pCtx->pFts;
  2760         -    Fts5Expr *pExpr = pCsr->pExpr;
  2761         -    int nCol = pCsr->pInfo->nCol;
  2762         -    int nPhrase = pExpr->nPhrase;
  2763         -
  2764         -    if( iCol>=nCol || iPhrase>=nPhrase ){
  2765         -      rc = SQLITE4_ERROR;
  2766         -    }
  2767         -
  2768         -    else if( iPhrase>=0 ){
  2769         -      int iIdx = iPhrase * pCsr->pInfo->nCol;
  2770         -
  2771         -      rc = fts5CsrLoadRowcounts(pCsr);
  2772         -      if( rc==SQLITE4_OK ){
  2773         -        if( iCol>0 ){
  2774         -          *pnRow = pCsr->anRow[iIdx + iCol];
  2775         -        }else{
  2776         -          int i;
  2777         -          int nRow = 0;
  2778         -          for(i=0; i<pCsr->pInfo->nCol; i++){
  2779         -            nRow += pCsr->anRow[iIdx + i];
  2780         -          }
  2781         -          *pnRow = nRow;
  2782         -        }
         2873  +  Fts5Cursor *pCsr = pCtx->pFts;
         2874  +  if( pCsr==0 ){
         2875  +    rc = SQLITE4_MISUSE;
         2876  +  }else{
         2877  +    rc = fts5CsrLoadGlobal(pCsr);
         2878  +    if( rc==SQLITE4_OK ) rc = fts5CsrLoadRowcounts(pCsr);
         2879  +
         2880  +    if( rc==SQLITE4_OK ){
         2881  +      int i;
         2882  +      int nRow = 0;
         2883  +      int nStream = pCsr->pGlobal->nStream;
         2884  +      int nCol = pCsr->pInfo->nCol;
         2885  +      int *aRow = &pCsr->anRow[iP * nStream * nCol];
         2886  +
         2887  +      if( iC<0 && iS<0 ){
         2888  +        int nFin = nCol * nStream;
         2889  +        for(i=0; i<nFin; i++) nRow += aRow[i];
         2890  +      }else if( iC<0 ){
         2891  +        for(i=0; i<nCol; i++) nRow += aRow[i*nStream + iS];
         2892  +      }else if( iS<0 ){
         2893  +        for(i=0; i<nStream; i++) nRow += aRow[nStream*iC + iS];
         2894  +      }else if( iC<nCol && iS<nStream ){
         2895  +        nRow = aRow[iC * nStream + iS];
  2783   2896         }
  2784         -    }else{
  2785         -      /* Total number of rows in table... */
  2786         -      rc = fts5CsrLoadGlobal(pCsr);
  2787         -      if( rc==SQLITE4_OK ){
  2788         -        *pnRow = (int)pCsr->aGlobal[0];
  2789         -      }
         2897  +
         2898  +      *pn = nRow;
  2790   2899       }
  2791         -  }else{
  2792         -    rc = SQLITE4_MISUSE;
  2793   2900     }
  2794   2901     return rc;
  2795   2902   }
  2796   2903   
  2797   2904   /**************************************************************************
  2798   2905   ***************************************************************************
  2799   2906   ** Below this point is test code.

Changes to src/fts5func.c.

   101    101         int ni;                     /* Number of docs with phrase i */
   102    102   
   103    103         p->db = db;
   104    104         p->nPhrase = nPhrase;
   105    105         p->aIdf = (double *)&p[1];
   106    106   
   107    107         /* Determine the IDF weight for each phrase in the query. */
   108         -      rc = sqlite4_mi_row_count(pCtx, -1, -1, &N);
          108  +      rc = sqlite4_mi_total_rows(pCtx, &N);
   109    109         for(i=0; rc==SQLITE4_OK && i<nPhrase; i++){
   110         -        rc = sqlite4_mi_row_count(pCtx, -1, i, &ni);
          110  +        rc = sqlite4_mi_row_count(pCtx, -1, -1, i, &ni);
   111    111           if( rc==SQLITE4_OK ){
   112    112             p->aIdf[i] = log((0.5 + N - ni) / (0.5 + ni));
   113    113           }
   114    114         }
   115    115   
   116    116         /* Determine the average document length */
   117    117         if( rc==SQLITE4_OK ){
   118    118           int nTotal;
   119         -        rc = sqlite4_mi_total_size(pCtx, -1, &nTotal);
          119  +        rc = sqlite4_mi_total_size(pCtx, -1, -1, &nTotal);
   120    120           if( rc==SQLITE4_OK ){
   121    121             p->avgdl = (double)nTotal / (double)N;
   122    122           }
   123    123         }
   124    124       }
   125    125     }
   126    126   
................................................................................
   129    129       int dl;                     /* Tokens in this row (document length) */
   130    130       double L;                   /* Normalized document length */
   131    131       double prank;               /* Contribution to rank of this phrase */
   132    132   
   133    133       /* Set variable tf to the total number of occurrences of phrase iPhrase
   134    134       ** in this row (within any column). And dl to the number of tokens in
   135    135       ** the current row (again, in any column).  */
   136         -    rc = sqlite4_mi_match_count(pCtx, -1, i, &tf); 
   137         -    if( rc==SQLITE4_OK ) rc = sqlite4_mi_column_size(pCtx, -1, &dl); 
          136  +    rc = sqlite4_mi_match_count(pCtx, -1, -1, i, &tf); 
          137  +    if( rc==SQLITE4_OK ) rc = sqlite4_mi_size(pCtx, -1, -1, &dl); 
   138    138   
   139    139       /* Calculate the normalized document length */
   140    140       L = (double)dl / p->avgdl;
   141    141   
   142    142       /* Calculate the contribution to the rank made by this phrase. Then
   143    143       ** add it to variable rank.  */
   144    144       prank = (p->aIdf[i] * tf) / (k1 * ( (1.0 - b) + b * L) + tf);

Changes to src/sqlite.h.in.

  4419   4419   
  4420   4420   /*
  4421   4421   ** Special functions that may be called from within matchinfo UDFs. All
  4422   4422   ** return an SQLite error code - SQLITE4_OK if successful, or some other
  4423   4423   ** error code otherwise.
  4424   4424   **
  4425   4425   ** sqlite4_mi_column_count():
  4426         -**   Set *pnCol to the number of columns in the queried table.
         4426  +**   Set *pn to the number of columns in the queried table.
         4427  +**
         4428  +** sqlite4_mi_phrase_count():
         4429  +**   Set *pn to the number of phrases in the query.
         4430  +**
         4431  +** sqlite4_mi_stream_count():
         4432  +**   Set *pn to the number of streams in the FTS index.
         4433  +**
         4434  +** sqlite4_mi_size():
         4435  +**   Set *pn to the number of tokens belonging to stream iS in the value 
         4436  +**   stored in column iC of the current row. 
         4437  +**
         4438  +**   Either or both of iS and iC may be negative. If iC is negative, then the
         4439  +**   output value is the total number of tokens for the specified stream (or
         4440  +**   streams) across all table columns. Similarly, if iS is negative, the 
         4441  +**   output value is the total number of tokens in the specified column or 
         4442  +**   columns, regardless of stream.
         4443  +**
         4444  +** sqlite4_mi_total_size():
         4445  +**   Similar to sqlite4_mi_size(), except the output parameter is set to
         4446  +**   the total number of tokens belonging to the specified column(s) 
         4447  +**   and stream(s) in all rows of the table, not just the current row.
         4448  +**
         4449  +** sqlite4_mi_total_rows():
         4450  +**   Set *pn to the total number of rows in the indexed table.
         4451  +**
         4452  +** sqlite4_mi_row_count():
         4453  +**   Set the output parameter to the total number of rows in the table that
         4454  +**   contain at least one instance of the phrase identified by parameter
         4455  +**   iP in the column(s) and stream(s) identified by parameters iC and iS.
         4456  +**
         4457  +** sqlite4_mi_match_count():
         4458  +**   Set the output parameter to the total number of occurences of phrase
         4459  +**   iP in the current row that belong to the column(s) and stream(s) 
         4460  +**   identified by parameters iC and iS.
         4461  +**
         4462  +**   Parameter iP may also be negative. In this case, the output value is
         4463  +**   set to the total number of occurrences of all query phrases in the
         4464  +**   current row, subject to the constraints imposed by iC and iS.
  4427   4465   **
  4428         -** sqlite4_mi_column_size():
  4429         -**   Set *pnToken to the number of tokens in the value stored in column iCol 
  4430         -**   of the current row.
         4466  +** sqlite4_mi_match_detail():
         4467  +**   This function may be used to iterate through all matches in the
         4468  +**   current row in order of occurrence.
  4431   4469   **
  4432   4470   ** sqlite4_mi_column_value():
  4433   4471   **   Set *ppVal to point to an sqlite4_value object containing the value
  4434   4472   **   read from column iCol of the current row. This object is valid until
  4435   4473   **   the function callback returns.
  4436         -**
  4437         -** sqlite4_mi_phrase_count():
  4438         -**   Set *pnPhrase to the number of phrases in the query.
  4439         -**
  4440         -** sqlite4_mi_match_count():
  4441         -**   Set *pn to the number of occurences of phrase iPhrase in column iCol of
  4442         -**   the current row.
  4443         -**
  4444         -** sqlite4_mi_total_match_count():
  4445         -**   Set *pnMatch to the total number of occurrences of phrase iPhrase
  4446         -**   in column iCol of all rows in the indexed table. Set *pnDoc to the
  4447         -**   number of rows that contain at least one match for phrase iPhrase in
  4448         -**   column iCol.
  4449         -**
  4450         -** sqlite4_mi_match_offset():
  4451         -**   Set *piOff to the token offset of the iMatch'th instance of phrase
  4452         -**   iPhrase in column iCol of the current row. If any parameter is out
  4453         -**   of range (i.e. too large) it is not an error. In this case *piOff is 
  4454         -**   set to -1 before returning.
  4455         -**   
  4456         -** sqlite4_mi_total_size():
  4457         -**   Set *pnToken to the total number of tokens in column iCol of all rows
  4458         -**   in the indexed table.
  4459         -**
  4460         -** sqlite4_mi_row_count():
  4461         -**   If parameter iPhrase is negative, this function sets the output 
  4462         -**   parameter to the total number of documents in the collection (rows 
  4463         -**   in the indexed table).
  4464         -**
  4465         -**   Otherwise, if iPhrase is not negative, then the output is set to the
  4466         -**   total number of rows that contain at least one instance of phrase iPhrase
  4467         -**   in column iCol, or in any column if iCol is negative.
  4468         -**
  4469         -**   If parameter iPhrase is equal to or greater than the number of phrases
  4470         -**   in the current query, or if iCol is equal to or greater than the number
  4471         -**   of columns in the indexed table, SQLITE4_MISUSE is returned. The value
  4472         -**   of the output parameter is undefined in this case.
  4473   4474   */
         4475  +int sqlite4_mi_column_count(sqlite4_context *, int *pn);
         4476  +int sqlite4_mi_phrase_count(sqlite4_context *, int *pn);
         4477  +int sqlite4_mi_stream_count(sqlite4_context *, int *pn);
         4478  +
         4479  +int sqlite4_mi_total_size(sqlite4_context *, int iC, int iS, int *pn);
         4480  +int sqlite4_mi_total_rows(sqlite4_context *, int *pn);
         4481  +
         4482  +int sqlite4_mi_row_count(sqlite4_context *, int iC, int iS, int iP, int *pn);
  4474   4483   
  4475         -int sqlite4_mi_column_count(sqlite4_context *, int *pnCol);
  4476         -int sqlite4_mi_phrase_count(sqlite4_context *, int *pnPhrase);
  4477         -
  4478         -int sqlite4_mi_column_size(sqlite4_context *, int iCol, int *pnToken);
  4479         -int sqlite4_mi_match_count(sqlite4_context *, int iCol, int iPhrase, int *pn);
  4480         -int sqlite4_mi_total_size(sqlite4_context *, int iCol, int *pnToken);
  4481         -int sqlite4_mi_row_count(sqlite4_context *, int iCol, int iPhrase, int *pnRow);
  4482         -
         4484  +int sqlite4_mi_size(sqlite4_context *, int iC, int iS, int *pn);
         4485  +int sqlite4_mi_match_count(sqlite4_context *, int iC, int iS, int iP, int *pn);
         4486  +int sqlite4_mi_match_detail(
         4487  +    sqlite4_context *, int iMatch, int *piOff, int *piC, int *piS, int *piP
         4488  +);
  4483   4489   int sqlite4_mi_column_value(sqlite4_context *, int iCol, sqlite4_value **ppVal);
  4484         -int sqlite4_mi_match_detail(sqlite4_context *, 
  4485         -    int iCol, int iPhrase, int iMatch, int *piOff, int *piWeight
  4486         -);
         4490  +
  4487   4491   
  4488   4492   
  4489   4493   /*
  4490   4494   ** Undo the hack that converts floating point types to integer for
  4491   4495   ** builds on processors without floating point support.
  4492   4496   */
  4493   4497   #ifdef SQLITE4_OMIT_FLOATING_POINT