Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fill in more of the matchinfo functions so that the BM25 function works.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | matchinfo
Files: files | file ages | folders
SHA1: 0e439483d78ef92af02d98bc98cfb28b7feb73a3
User & Date: dan 2013-01-03 18:13:33.409
Context
2013-01-03
20:35
Add comment describing format of row and global size records. check-in: 7cfa40b5c1 user: dan tags: matchinfo
18:13
Fill in more of the matchinfo functions so that the BM25 function works. check-in: 0e439483d7 user: dan tags: matchinfo
2013-01-02
20:01
Add an implementation of BM25 to fts5func.c. Other changes to matchinfo related things. check-in: 03f26d8c60 user: dan tags: matchinfo
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/fts5.c.
201
202
203
204
205
206
207




208
209
210
211
212
213
214

  KVCursor *pCsr;                 /* Cursor used to retrive values */
  Mem *aMem;                      /* Array of column values */

  /* Array of nPhrase*nCol integers. See sqlite4_mi_row_count() for details. */
  int *anRow;
  i64 *aGlobal;




};

/*
** This type is used when reading (decoding) an instance-list.
*/
typedef struct InstanceList InstanceList;
struct InstanceList {







>
>
>
>







201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218

  KVCursor *pCsr;                 /* Cursor used to retrive values */
  Mem *aMem;                      /* Array of column values */

  /* Array of nPhrase*nCol integers. See sqlite4_mi_row_count() for details. */
  int *anRow;
  i64 *aGlobal;

  /* Size of each column of current row (in tokens). */
  int bSzValid;
  int *aSz;
};

/*
** This type is used when reading (decoding) an instance-list.
*/
typedef struct InstanceList InstanceList;
struct InstanceList {
1261
1262
1263
1264
1265
1266
1267










































1268
1269
1270
1271
1272
1273
1274
    }else{
      rc = fts5LoadGlobal(pCsr->db, pCsr->pInfo, pCsr->aGlobal);
    }
  }
  return rc;
}












































/*
** Update an fts index.
*/
int sqlite4Fts5Update(
  sqlite4 *db,                    /* Database handle */
  Fts5Info *pInfo,                /* Description of fts index to update */







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
    }else{
      rc = fts5LoadGlobal(pCsr->db, pCsr->pInfo, pCsr->aGlobal);
    }
  }
  return rc;
}

static int fts5CsrLoadSz(Fts5Cursor *pCsr){
  sqlite4 *db = pCsr->db;
  Fts5Info *pInfo = pCsr->pInfo;
  int nVal = pInfo->nCol;
  int rc;
  u8 *aKey;
  int nKey = 0;
  int nPk = pCsr->pExpr->pRoot->nPk;
  KVCursor *pKVCsr = 0;           /* Cursor used to read global record */

  aKey = (u8 *)sqlite4DbMallocZero(db, 10 + nPk);
  if( !aKey ) return SQLITE4_NOMEM;

  nKey = putVarint32(aKey, pInfo->iRoot);
  aKey[nKey++] = 0x00;
  memcpy(&aKey[nKey], pCsr->pExpr->pRoot->aPk, nPk);
  nKey += nPk;

  rc = sqlite4KVStoreOpenCursor(db->aDb[pInfo->iDb].pKV, &pKVCsr);
  if( rc==SQLITE4_OK ){
    rc = sqlite4KVCursorSeek(pKVCsr, aKey, nKey, 0);
    if( rc==SQLITE4_NOTFOUND ){
      rc = SQLITE4_CORRUPT_BKPT;
    }else if( rc==SQLITE4_OK ){
      const u8 *aData = 0;
      int nData = 0;
      rc = sqlite4KVCursorData(pKVCsr, 0, -1, &aData, &nData);
      if( rc==SQLITE4_OK ){
        int i;
        int iOff = 0;
        for(i=0; i<nVal; i++){
          iOff += getVarint32(&aData[iOff], pCsr->aSz[i]);
        }
      }
      pCsr->bSzValid = 1;
    }
    sqlite4KVCursorClose(pKVCsr);
  }

  return rc;
}


/*
** Update an fts index.
*/
int sqlite4Fts5Update(
  sqlite4 *db,                    /* Database handle */
  Fts5Info *pInfo,                /* Description of fts index to update */
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
      return 0;
    }
  }

  return (p->iCol==pFirst->iCol && p->iOff==iReq);
}

static int fts5StringFindInstances(Fts5Cursor *pCsr, int iCol, Fts5Str *pStr){
  sqlite4 *db = pCsr->db;
  int i;
  int rc = SQLITE4_OK;
  int bEof = 0;
  int nByte = sizeof(InstanceList) * pStr->nToken;
  InstanceList *aIn;
  InstanceList out;








|
<







2093
2094
2095
2096
2097
2098
2099
2100

2101
2102
2103
2104
2105
2106
2107
      return 0;
    }
  }

  return (p->iCol==pFirst->iCol && p->iOff==iReq);
}

static int fts5StringFindInstances(sqlite4 *db, int iCol, Fts5Str *pStr){

  int i;
  int rc = SQLITE4_OK;
  int bEof = 0;
  int nByte = sizeof(InstanceList) * pStr->nToken;
  InstanceList *aIn;
  InstanceList out;

2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
  if( p1->iCol==p2->iCol && p1->iOff<p2->iOff && (p1->iOff+nNear)>=p2->iOff ){
    return 1;
  }
  return 0;
}

static int fts5StringNearTrim(
  Fts5Cursor *pCsr,               /* Cursor object that owns both strings */
  Fts5Str *pTrim,                 /* Trim this instance list */
  Fts5Str *pNext,                 /* According to this one */
  int nNear
){
  if( pNext->nList==0 ){
    pTrim->nList = 0;
  }else{







<







2155
2156
2157
2158
2159
2160
2161

2162
2163
2164
2165
2166
2167
2168
  if( p1->iCol==p2->iCol && p1->iOff<p2->iOff && (p1->iOff+nNear)>=p2->iOff ){
    return 1;
  }
  return 0;
}

static int fts5StringNearTrim(

  Fts5Str *pTrim,                 /* Trim this instance list */
  Fts5Str *pNext,                 /* According to this one */
  int nNear
){
  if( pNext->nList==0 ){
    pTrim->nList = 0;
  }else{
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
** is set to true before returning.
**
** If the cursors do not point to a match, then *ppAdvance is set to
** the token of the individual cursor that should be advanced before
** retrying this function.
*/
static int fts5PhraseIsMatch(
  Fts5Cursor *pCsr,               /* Cursor that owns this string */
  Fts5Phrase *pPhrase,            /* Phrase to test */
  int *pbMatch,                   /* OUT: True for a match, false otherwise */
  Fts5Token **ppAdvance           /* OUT: Token to advance before retrying */
){
  const u8 *aPk1 = 0;
  int nPk1 = 0;
  int rc = SQLITE4_OK;







|







2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
** is set to true before returning.
**
** If the cursors do not point to a match, then *ppAdvance is set to
** the token of the individual cursor that should be advanced before
** retrying this function.
*/
static int fts5PhraseIsMatch(
  sqlite4 *db,                    /* Database handle */
  Fts5Phrase *pPhrase,            /* Phrase to test */
  int *pbMatch,                   /* OUT: True for a match, false otherwise */
  Fts5Token **ppAdvance           /* OUT: Token to advance before retrying */
){
  const u8 *aPk1 = 0;
  int nPk1 = 0;
  int rc = SQLITE4_OK;
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303

  /* At this point, it is established that all of the token cursors in the
  ** phrase point to an entry with the same primary key. Now figure out if
  ** the various string constraints are met. Along the way, synthesize a 
  ** position list for each Fts5Str object.  */
  for(i=0; rc==SQLITE4_OK && i<pPhrase->nStr; i++){
    Fts5Str *pStr = &pPhrase->aStr[i];
    rc = fts5StringFindInstances(pCsr, pPhrase->iCol, pStr);
  }

  /* Trim the instance lists according to any NEAR constraints.  */
  for(i=1; rc==SQLITE4_OK && i<pPhrase->nStr; i++){
    int n = pPhrase->aiNear[i-1];
    rc = fts5StringNearTrim(pCsr, &pPhrase->aStr[i], &pPhrase->aStr[i-1], n);
  }
  for(i=pPhrase->nStr-1; rc==SQLITE4_OK && i>0; i--){
    int n = pPhrase->aiNear[i-1];
    rc = fts5StringNearTrim(pCsr, &pPhrase->aStr[i-1], &pPhrase->aStr[i], n);
  }

  *pbMatch = (pPhrase->aStr[0].nList>0);
  return rc;
}

static int fts5PhraseAdvanceToMatch(Fts5Cursor *pCsr, Fts5Phrase *pPhrase){
  int rc;
  do {
    int bMatch;
    Fts5Token *pAdvance = 0;
    rc = fts5PhraseIsMatch(pCsr, pPhrase, &bMatch, &pAdvance);
    if( rc!=SQLITE4_OK || bMatch ) break;
    rc = fts5TokenAdvance(pCsr->db, pAdvance);
  }while( rc==SQLITE4_OK );
  return rc;
}

static int fts5ExprAdvance(Fts5Cursor *pCsr, Fts5ExprNode *p, int bFirst){
  int rc = SQLITE4_OK;

  switch( p->eType ){
    case TOKEN_PRIMITIVE: {
      Fts5Phrase *pPhrase = p->pPhrase;
      if( bFirst==0 ){
        rc = fts5TokenAdvance(pCsr->db, &pPhrase->aStr[0].aToken[0]);
      }
      if( rc==SQLITE4_OK ) rc = fts5PhraseAdvanceToMatch(pCsr, pPhrase);
      if( rc==SQLITE4_OK ){
        rc = fts5TokenPk(&pPhrase->aStr[0].aToken[0], &p->aPk, &p->nPk);
      }else{
        p->aPk = 0;
        p->nPk = 0;
        if( rc==SQLITE4_NOTFOUND ) rc = SQLITE4_OK;
      }
      break;
    }

    case TOKEN_AND:
      p->aPk = 0;
      p->nPk = 0;
      rc = fts5ExprAdvance(pCsr, p->pLeft, bFirst);
      if( rc==SQLITE4_OK ) rc = fts5ExprAdvance(pCsr, p->pRight, bFirst);
      while( rc==SQLITE4_OK && p->pLeft->aPk && p->pRight->aPk ){
        int res = fts5KeyCompare(
            p->pLeft->aPk, p->pLeft->nPk, p->pRight->aPk, p->pRight->nPk
        );
        if( res<0 ){
          rc = fts5ExprAdvance(pCsr, p->pLeft, 0);
        }else if( res>0 ){
          rc = fts5ExprAdvance(pCsr, p->pRight, 0);
        }else{
          p->aPk = p->pLeft->aPk;
          p->nPk = p->pLeft->nPk;
          break;
        }
      }
      break;

    case TOKEN_OR: {
      int res = 0;
      if( bFirst==0 ){
        res = fts5KeyCompare(
            p->pLeft->aPk, p->pLeft->nPk, p->pRight->aPk, p->pRight->nPk
        );
      }
        
      if( res<=0 ) rc = fts5ExprAdvance(pCsr, p->pLeft, bFirst);
      if( rc==SQLITE4_OK && res>=0 ){
        rc = fts5ExprAdvance(pCsr, p->pRight, bFirst);
      }

      res = fts5KeyCompare(
          p->pLeft->aPk, p->pLeft->nPk, p->pRight->aPk, p->pRight->nPk
      );
      if( res>0 ){
        p->aPk = p->pRight->aPk;







|





|



|






|




|

|




|






|

|













|
|





|

|
















|

|







2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347

  /* At this point, it is established that all of the token cursors in the
  ** phrase point to an entry with the same primary key. Now figure out if
  ** the various string constraints are met. Along the way, synthesize a 
  ** position list for each Fts5Str object.  */
  for(i=0; rc==SQLITE4_OK && i<pPhrase->nStr; i++){
    Fts5Str *pStr = &pPhrase->aStr[i];
    rc = fts5StringFindInstances(db, pPhrase->iCol, pStr);
  }

  /* Trim the instance lists according to any NEAR constraints.  */
  for(i=1; rc==SQLITE4_OK && i<pPhrase->nStr; i++){
    int n = pPhrase->aiNear[i-1];
    rc = fts5StringNearTrim(&pPhrase->aStr[i], &pPhrase->aStr[i-1], n);
  }
  for(i=pPhrase->nStr-1; rc==SQLITE4_OK && i>0; i--){
    int n = pPhrase->aiNear[i-1];
    rc = fts5StringNearTrim(&pPhrase->aStr[i-1], &pPhrase->aStr[i], n);
  }

  *pbMatch = (pPhrase->aStr[0].nList>0);
  return rc;
}

static int fts5PhraseAdvanceToMatch(sqlite4 *db, Fts5Phrase *pPhrase){
  int rc;
  do {
    int bMatch;
    Fts5Token *pAdvance = 0;
    rc = fts5PhraseIsMatch(db, pPhrase, &bMatch, &pAdvance);
    if( rc!=SQLITE4_OK || bMatch ) break;
    rc = fts5TokenAdvance(db, pAdvance);
  }while( rc==SQLITE4_OK );
  return rc;
}

static int fts5ExprAdvance(sqlite4 *db, Fts5ExprNode *p, int bFirst){
  int rc = SQLITE4_OK;

  switch( p->eType ){
    case TOKEN_PRIMITIVE: {
      Fts5Phrase *pPhrase = p->pPhrase;
      if( bFirst==0 ){
        rc = fts5TokenAdvance(db, &pPhrase->aStr[0].aToken[0]);
      }
      if( rc==SQLITE4_OK ) rc = fts5PhraseAdvanceToMatch(db, pPhrase);
      if( rc==SQLITE4_OK ){
        rc = fts5TokenPk(&pPhrase->aStr[0].aToken[0], &p->aPk, &p->nPk);
      }else{
        p->aPk = 0;
        p->nPk = 0;
        if( rc==SQLITE4_NOTFOUND ) rc = SQLITE4_OK;
      }
      break;
    }

    case TOKEN_AND:
      p->aPk = 0;
      p->nPk = 0;
      rc = fts5ExprAdvance(db, p->pLeft, bFirst);
      if( rc==SQLITE4_OK ) rc = fts5ExprAdvance(db, p->pRight, bFirst);
      while( rc==SQLITE4_OK && p->pLeft->aPk && p->pRight->aPk ){
        int res = fts5KeyCompare(
            p->pLeft->aPk, p->pLeft->nPk, p->pRight->aPk, p->pRight->nPk
        );
        if( res<0 ){
          rc = fts5ExprAdvance(db, p->pLeft, 0);
        }else if( res>0 ){
          rc = fts5ExprAdvance(db, p->pRight, 0);
        }else{
          p->aPk = p->pLeft->aPk;
          p->nPk = p->pLeft->nPk;
          break;
        }
      }
      break;

    case TOKEN_OR: {
      int res = 0;
      if( bFirst==0 ){
        res = fts5KeyCompare(
            p->pLeft->aPk, p->pLeft->nPk, p->pRight->aPk, p->pRight->nPk
        );
      }
        
      if( res<=0 ) rc = fts5ExprAdvance(db, p->pLeft, bFirst);
      if( rc==SQLITE4_OK && res>=0 ){
        rc = fts5ExprAdvance(db, p->pRight, bFirst);
      }

      res = fts5KeyCompare(
          p->pLeft->aPk, p->pLeft->nPk, p->pRight->aPk, p->pRight->nPk
      );
      if( res>0 ){
        p->aPk = p->pRight->aPk;
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346

2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392


    default: assert( p->eType==TOKEN_NOT );

      p->aPk = 0;
      p->nPk = 0;

      rc = fts5ExprAdvance(pCsr, p->pLeft, bFirst);
      if( bFirst && rc==SQLITE4_OK ){
        rc = fts5ExprAdvance(pCsr, p->pRight, bFirst);
      }

      while( rc==SQLITE4_OK && p->pLeft->aPk && p->pRight->aPk ){
        int res = fts5KeyCompare(
            p->pLeft->aPk, p->pLeft->nPk, p->pRight->aPk, p->pRight->nPk
        );
        if( res<0 ){
          break;
        }else if( res>0 ){
          rc = fts5ExprAdvance(pCsr, p->pRight, 0);
        }else{
          rc = fts5ExprAdvance(pCsr, p->pLeft, 0);
        }
      }

      p->aPk = p->pLeft->aPk;
      p->nPk = p->pLeft->nPk;
      break;
  }

  assert( rc!=SQLITE4_NOTFOUND );
  return rc;
}

int sqlite4Fts5Next(Fts5Cursor *pCsr){

  return fts5ExprAdvance(pCsr, pCsr->pExpr->pRoot, 0);
}

int sqlite4Fts5Open(
  sqlite4 *db,                    /* Database handle */
  Fts5Info *pInfo,                /* Index description */
  const char *zMatch,             /* Match expression */
  int bDesc,                      /* True to iterate in desc. order of PK */
  Fts5Cursor **ppCsr,             /* OUT: New FTS cursor object */
  char **pzErr                    /* OUT: Error message */
){
  int rc = SQLITE4_OK;
  Fts5Cursor *pCsr;
  int nMatch = sqlite4Strlen30(zMatch);

  pCsr = sqlite4DbMallocZero(db, sizeof(Fts5Cursor) + nMatch + 1);

  if( !pCsr ){
    rc = SQLITE4_NOMEM;
  }else{
    pCsr->zExpr = (char *)&pCsr[1];
    memcpy(pCsr->zExpr, nMatch, zMatch);
    pCsr->pInfo = pInfo;
    pCsr->db = db;
    rc = fts5ParseExpression(db, pInfo->pTokenizer, pInfo->p, 
        pInfo->iRoot, pInfo->azCol, pInfo->nCol, zMatch, &pCsr->pExpr, pzErr
    );
  }

  if( rc==SQLITE4_OK ){
    /* Open a KV cursor for each term in the expression. Set each cursor
    ** to point to the first entry in the range it will scan.  */
    rc = fts5OpenCursors(db, pInfo, pCsr);
  }
  if( rc!=SQLITE4_OK ){
    sqlite4Fts5Close(db, pCsr);
    pCsr = 0;
  }else{
    rc = fts5ExprAdvance(pCsr, pCsr->pExpr->pRoot, 1);
  }
  *ppCsr = pCsr;
  return rc;
}

/*
** Return true if the cursor passed as the second argument currently points







|

|









|

|













>
|




















|
















|







2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437


    default: assert( p->eType==TOKEN_NOT );

      p->aPk = 0;
      p->nPk = 0;

      rc = fts5ExprAdvance(db, p->pLeft, bFirst);
      if( bFirst && rc==SQLITE4_OK ){
        rc = fts5ExprAdvance(db, p->pRight, bFirst);
      }

      while( rc==SQLITE4_OK && p->pLeft->aPk && p->pRight->aPk ){
        int res = fts5KeyCompare(
            p->pLeft->aPk, p->pLeft->nPk, p->pRight->aPk, p->pRight->nPk
        );
        if( res<0 ){
          break;
        }else if( res>0 ){
          rc = fts5ExprAdvance(db, p->pRight, 0);
        }else{
          rc = fts5ExprAdvance(db, p->pLeft, 0);
        }
      }

      p->aPk = p->pLeft->aPk;
      p->nPk = p->pLeft->nPk;
      break;
  }

  assert( rc!=SQLITE4_NOTFOUND );
  return rc;
}

int sqlite4Fts5Next(Fts5Cursor *pCsr){
  pCsr->bSzValid = 0;
  return fts5ExprAdvance(pCsr->db, pCsr->pExpr->pRoot, 0);
}

int sqlite4Fts5Open(
  sqlite4 *db,                    /* Database handle */
  Fts5Info *pInfo,                /* Index description */
  const char *zMatch,             /* Match expression */
  int bDesc,                      /* True to iterate in desc. order of PK */
  Fts5Cursor **ppCsr,             /* OUT: New FTS cursor object */
  char **pzErr                    /* OUT: Error message */
){
  int rc = SQLITE4_OK;
  Fts5Cursor *pCsr;
  int nMatch = sqlite4Strlen30(zMatch);

  pCsr = sqlite4DbMallocZero(db, sizeof(Fts5Cursor) + nMatch + 1);

  if( !pCsr ){
    rc = SQLITE4_NOMEM;
  }else{
    pCsr->zExpr = (char *)&pCsr[1];
    memcpy(pCsr->zExpr, zMatch, nMatch);
    pCsr->pInfo = pInfo;
    pCsr->db = db;
    rc = fts5ParseExpression(db, pInfo->pTokenizer, pInfo->p, 
        pInfo->iRoot, pInfo->azCol, pInfo->nCol, zMatch, &pCsr->pExpr, pzErr
    );
  }

  if( rc==SQLITE4_OK ){
    /* Open a KV cursor for each term in the expression. Set each cursor
    ** to point to the first entry in the range it will scan.  */
    rc = fts5OpenCursors(db, pInfo, pCsr);
  }
  if( rc!=SQLITE4_OK ){
    sqlite4Fts5Close(db, pCsr);
    pCsr = 0;
  }else{
    rc = fts5ExprAdvance(db, pCsr->pExpr->pRoot, 1);
  }
  *ppCsr = pCsr;
  return rc;
}

/*
** Return true if the cursor passed as the second argument currently points
2433
2434
2435
2436
2437
2438
2439
2440





2441




2442

















2443
2444
2445
2446
2447
2448
2449
    rc = SQLITE4_MISUSE;
  }
  return rc;
}

int sqlite4_mi_column_size(sqlite4_context *pCtx, int iCol, int *pnToken){
  int rc = SQLITE4_OK;
  if( pCtx->pFts ){





  }else{




    rc = SQLITE4_MISUSE;

















  }
  return rc;
}

int sqlite4_mi_column_value(
  sqlite4_context *pCtx, 
  int iCol, 







|
>
>
>
>
>

>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
    rc = SQLITE4_MISUSE;
  }
  return rc;
}

int sqlite4_mi_column_size(sqlite4_context *pCtx, int iCol, int *pnToken){
  int rc = SQLITE4_OK;
  Fts5Cursor *pCsr = pCtx->pFts;

  if( pCsr==0 ){
    rc = SQLITE4_MISUSE;
  }else if( iCol>=pCsr->pInfo->nCol ){
    rc = SQLITE4_ERROR;
  }else{
    if( pCsr->aSz==0 ){
      pCsr->aSz = (int *)sqlite4DbMallocZero(
          pCsr->db, sizeof(int)*pCsr->pInfo->nCol
      );
      if( pCsr->aSz==0 ) rc = SQLITE4_NOMEM;
    }
    if( rc==SQLITE4_OK && pCsr->bSzValid==0 ){
      rc = fts5CsrLoadSz(pCsr);
    }
    if( rc==SQLITE4_OK ){
      assert( pCsr->bSzValid );
      if( iCol>=0 ){
        *pnToken = pCsr->aSz[iCol];
      }else{
        int i;
        int nToken = 0;
        for(i=0; i<pCsr->pInfo->nCol; i++){
          nToken += pCsr->aSz[i];
        }
        *pnToken = nToken;
      }
    }
  }
  return rc;
}

int sqlite4_mi_column_value(
  sqlite4_context *pCtx, 
  int iCol, 
2462
2463
2464
2465
2466
2467
2468
















2469
2470
2471
2472
2473
2474
2475
2476

2477













2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490

2491
2492
2493
2494
2495
2496
2497
2498
2499
2500

2501
2502
2503
2504
2505
2506
2507
  if( pCtx->pFts ){
    *pnPhrase = pCtx->pFts->pExpr->nPhrase;
  }else{
    rc = SQLITE4_MISUSE;
  }
  return rc;
}

















int sqlite4_mi_match_count(
  sqlite4_context *pCtx, 
  int iCol, 
  int iPhrase, 
  int *pnMatch
){
  int rc = SQLITE4_OK;

  if( pCtx->pFts ){













  }else{
    rc = SQLITE4_MISUSE;
  }
  return rc;
}

int sqlite4_mi_match_offset(
  sqlite4_context *pCtx, 
  int iCol, 
  int iPhrase, 
  int iMatch, 
  int *piOff
){

}

int sqlite4_mi_total_match_count(
  sqlite4_context *pCtx,
  int iCol,
  int iPhrase,
  int *pnMatch,
  int *pnDoc,
  int *pnRelevant
){

}

int sqlite4_mi_total_size(sqlite4_context *pCtx, int iCol, int *pnToken){
  int rc = SQLITE4_OK;
  if( pCtx->pFts ){
    Fts5Cursor *pCsr = pCtx->pFts;
    int nCol = pCsr->pInfo->nCol;







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>



|
|



>
|
>
>
>
>
>
>
>
>
>
>
>
>
>













>










>







2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
  if( pCtx->pFts ){
    *pnPhrase = pCtx->pFts->pExpr->nPhrase;
  }else{
    rc = SQLITE4_MISUSE;
  }
  return rc;
}

static Fts5Str *fts5FindStr(Fts5ExprNode *p, int *piStr){
  Fts5Str *pRet = 0;
  if( p->eType==TOKEN_PRIMITIVE ){
    int iStr = *piStr;
    if( iStr<p->pPhrase->nStr ){
      pRet = &p->pPhrase->aStr[iStr];
    }else{
      *piStr = iStr - p->pPhrase->nStr;
    }
  }else{
    pRet = fts5FindStr(p->pLeft, piStr);
    if( pRet==0 ) pRet = fts5FindStr(p->pRight, piStr);
  }
  return pRet;
}

int sqlite4_mi_match_count(
  sqlite4_context *pCtx, 
  int iCol,
  int iPhrase,
  int *pnMatch
){
  int rc = SQLITE4_OK;
  Fts5Cursor *pCsr = pCtx->pFts;
  if( pCsr ){
    int nMatch = 0;
    Fts5Str *pStr;
    int iCopy = iCol;
    InstanceList sList;

    pStr = fts5FindStr(pCsr->pExpr->pRoot, &iCopy);
    assert( pStr );

    fts5InstanceListInit(pStr->aList, pStr->nList, &sList);
    while( 0==fts5InstanceListNext(&sList) ){
      if( iCol<0 || sList.iCol==iCol ) nMatch++;
    }
    *pnMatch = nMatch;
  }else{
    rc = SQLITE4_MISUSE;
  }
  return rc;
}

int sqlite4_mi_match_offset(
  sqlite4_context *pCtx, 
  int iCol, 
  int iPhrase, 
  int iMatch, 
  int *piOff
){
  return SQLITE4_OK;
}

int sqlite4_mi_total_match_count(
  sqlite4_context *pCtx,
  int iCol,
  int iPhrase,
  int *pnMatch,
  int *pnDoc,
  int *pnRelevant
){
  return SQLITE4_OK;
}

int sqlite4_mi_total_size(sqlite4_context *pCtx, int iCol, int *pnToken){
  int rc = SQLITE4_OK;
  if( pCtx->pFts ){
    Fts5Cursor *pCsr = pCtx->pFts;
    int nCol = pCsr->pInfo->nCol;
2524
2525
2526
2527
2528
2529
2530
2531














































2532


2533


2534
2535
2536
2537
2538
2539
2540
2541
2542





2543









2544
2545
2546
2547
2548
2549
2550
      }
    }
  }else{
    rc = SQLITE4_MISUSE;
  }
  return rc;
}















































static int fts5CsrLoadRowcounts(Fts5Cursor *pCsr){


  if( pCsr->anRow==0 ){


    Fts5Expr *pExpr = pCsr->pExpr;
    Fts5Info *pInfo = pCsr->pInfo;
    int *anRow;

    pCsr->anRow = anRow = (int *)sqlite4DbMallocZero(pCsr->db, 
        pExpr->nPhrase * pInfo->nCol * sizeof(int)
    );
    if( !anRow ) return SQLITE4_NOMEM;






  }









}

int sqlite4_mi_row_count(
  sqlite4_context *pCtx,          /* Context object passed to mi function */
  int iCol,                       /* Specific column (or -1) */
  int iPhrase,                    /* Specific phrase (or -1) */
  int *pnRow                      /* Total number of rows */








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

>
>

>
>




|




>
>
>
>
>
|
>
>
>
>
>
>
>
>
>







2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
      }
    }
  }else{
    rc = SQLITE4_MISUSE;
  }
  return rc;
}

static void fts5StrLoadRowcounts(Fts5Str *pStr, int *anRow){
  InstanceList sList;

  fts5InstanceListInit(pStr->aList, pStr->nList, &sList);
  while( 0==fts5InstanceListNext(&sList) ){
    anRow[sList.iCol]++;
  }
}


static int fts5ExprLoadRowcounts(
  sqlite4 *db, 
  Fts5Info *pInfo,
  Fts5ExprNode *pNode, 
  int **panRow
){
  int rc = SQLITE4_OK;

  if( pNode ){
    if( pNode->eType==TOKEN_PRIMITIVE ){
      int *anRow = *panRow;
      Fts5Phrase *pPhrase = pNode->pPhrase;

      rc = fts5ExprAdvance(db, pNode, 1);
      while( rc==SQLITE4_OK ){
        int i;
        for(i=0; i<pPhrase->nStr; i++){
          fts5StrLoadRowcounts(&pPhrase->aStr[i], &anRow[i*pInfo->nCol]);
        }
        rc = fts5ExprAdvance(db, pNode, 0);
      }

      *panRow = &anRow[pInfo->nCol * pPhrase->nStr];
    }

    if( rc==SQLITE4_OK ){
      rc = fts5ExprLoadRowcounts(db, pInfo, pNode->pLeft, panRow);
    }
    if( rc==SQLITE4_OK ){
      rc = fts5ExprLoadRowcounts(db, pInfo, pNode->pLeft, panRow);
    }
  }

  return rc;
}

static int fts5CsrLoadRowcounts(Fts5Cursor *pCsr){
  int rc = SQLITE4_OK;

  if( pCsr->anRow==0 ){
    sqlite4 *db = pCsr->db;
    Fts5Expr *pCopy;
    Fts5Expr *pExpr = pCsr->pExpr;
    Fts5Info *pInfo = pCsr->pInfo;
    int *anRow;

    pCsr->anRow = anRow = (int *)sqlite4DbMallocZero(db, 
        pExpr->nPhrase * pInfo->nCol * sizeof(int)
    );
    if( !anRow ) return SQLITE4_NOMEM;

    rc = fts5ParseExpression(db, pInfo->pTokenizer, pInfo->p, 
        pInfo->iRoot, pInfo->azCol, pInfo->nCol, pCsr->zExpr, &pCopy, 0
    );
    if( rc==SQLITE4_OK ){
      rc = fts5OpenExprCursors(db, pInfo, pExpr->pRoot);
    }

    if( rc==SQLITE4_OK ){
      rc = fts5ExprLoadRowcounts(db, pInfo, pCopy->pRoot, &anRow);
    }

    fts5ExpressionFree(db, pCopy);
  }

  return rc;
}

int sqlite4_mi_row_count(
  sqlite4_context *pCtx,          /* Context object passed to mi function */
  int iCol,                       /* Specific column (or -1) */
  int iPhrase,                    /* Specific phrase (or -1) */
  int *pnRow                      /* Total number of rows */
Changes to src/fts5func.c.
19
20
21
22
23
24
25

26
27
28
29
30
31
32
**
**   http://xapian.org/docs/bm25.html
**
**   http://en.wikipedia.org/wiki/Okapi_BM25
*/

#include "sqliteInt.h"


static char fts5Tolower(char c){
  if( c>='A' && c<='Z' ) c = c + ('a' - 'A');
  return c;
}

static int fts5SimpleCreate(







>







19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
**
**   http://xapian.org/docs/bm25.html
**
**   http://en.wikipedia.org/wiki/Okapi_BM25
*/

#include "sqliteInt.h"
#include <math.h>                 /* temporary: For log() */

static char fts5Tolower(char c){
  if( c>='A' && c<='Z' ) c = c + ('a' - 'A');
  return c;
}

static int fts5SimpleCreate(
Changes to src/sqlite.h.in.
4467
4468
4469
4470
4471
4472
4473

4474


4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
**   in column iCol, or in any column if iCol is negative.
**
**   If parameter iPhrase is equal to or greater than the number of phrases
**   in the current query, or if iCol is equal to or greater than the number
**   of columns in the indexed table, SQLITE4_MISUSE is returned. The value
**   of the output parameter is undefined in this case.
*/

int sqlite4_mi_column_count(sqlite4_context *, int *pnCol);


int sqlite4_mi_column_size(sqlite4_context *, int iCol, int *pnToken);
int sqlite4_mi_column_value(sqlite4_context *, int iCol, sqlite4_value **ppVal);

int sqlite4_mi_phrase_count(sqlite4_context *, int *pnPhrase);

int sqlite4_mi_match_count(sqlite4_context *, int iCol, int iPhrase, int *pn);
int sqlite4_mi_match_detail(sqlite4_context *, 
    int iCol, int iPhrase, int iMatch, int *piOff, int *piWeight
); 

int sqlite4_mi_total_size(sqlite4_context *, int iCol, int *pnToken);
int sqlite4_mi_row_count(sqlite4_context *, int iCol, int iPhrase, int *pnRow);

/*
** Undo the hack that converts floating point types to integer for
** builds on processors without floating point support.
*/
#ifdef SQLITE4_OMIT_FLOATING_POINT
# undef double







>

>
>

|
|
|

|


|

<
<







4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487


4488
4489
4490
4491
4492
4493
4494
**   in column iCol, or in any column if iCol is negative.
**
**   If parameter iPhrase is equal to or greater than the number of phrases
**   in the current query, or if iCol is equal to or greater than the number
**   of columns in the indexed table, SQLITE4_MISUSE is returned. The value
**   of the output parameter is undefined in this case.
*/

int sqlite4_mi_column_count(sqlite4_context *, int *pnCol);
int sqlite4_mi_phrase_count(sqlite4_context *, int *pnPhrase);

int sqlite4_mi_column_size(sqlite4_context *, int iCol, int *pnToken);
int sqlite4_mi_match_count(sqlite4_context *, int iCol, int iPhrase, int *pn);
int sqlite4_mi_total_size(sqlite4_context *, int iCol, int *pnToken);
int sqlite4_mi_row_count(sqlite4_context *, int iCol, int iPhrase, int *pnRow);

int sqlite4_mi_column_value(sqlite4_context *, int iCol, sqlite4_value **ppVal);
int sqlite4_mi_match_detail(sqlite4_context *, 
    int iCol, int iPhrase, int iMatch, int *piOff, int *piWeight
);




/*
** Undo the hack that converts floating point types to integer for
** builds on processors without floating point support.
*/
#ifdef SQLITE4_OMIT_FLOATING_POINT
# undef double