Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Changes to improve performance and support LIMIT clauses on fts3 tables. This branch is unstable for now. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts3-prefix-search |
Files: | files | file ages | folders |
SHA1: |
28149a7882a1e9dfe4a75ec5b91d176e |
User & Date: | dan 2011-06-02 19:57:24.733 |
Context
2011-06-03
| ||
18:00 | FTS changes: Remove unreachable code. Fix bugs. When processing a large doclist incrementally, read from disk incrementally too. (check-in: a4c7e28208 user: dan tags: fts3-prefix-search) | |
2011-06-02
| ||
19:57 | Changes to improve performance and support LIMIT clauses on fts3 tables. This branch is unstable for now. (check-in: 28149a7882 user: dan tags: fts3-prefix-search) | |
2011-05-28
| ||
15:57 | Minor changes made while planning a larger change. (check-in: 84097a4c75 user: dan tags: fts3-prefix-search) | |
Changes
Changes to ext/fts3/fts3.c.
︙ | ︙ | |||
309 310 311 312 313 314 315 316 317 318 319 320 321 322 | #include "fts3.h" #ifndef SQLITE_CORE # include "sqlite3ext.h" SQLITE_EXTENSION_INIT1 #endif /* ** Write a 64-bit variable-length integer to memory starting at p[0]. ** The length of data written will be between 1 and FTS3_VARINT_MAX bytes. ** The number of bytes written is returned. */ int sqlite3Fts3PutVarint(char *p, sqlite_int64 v){ unsigned char *q = (unsigned char *) p; | > > > | 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 | #include "fts3.h" #ifndef SQLITE_CORE # include "sqlite3ext.h" SQLITE_EXTENSION_INIT1 #endif static char *fts3EvalPhrasePoslist(Fts3Phrase *, int *); static sqlite3_int64 fts3EvalPhraseDocid(Fts3Phrase *); /* ** Write a 64-bit variable-length integer to memory starting at p[0]. ** The length of data written will be between 1 and FTS3_VARINT_MAX bytes. ** The number of bytes written is returned. */ int sqlite3Fts3PutVarint(char *p, sqlite_int64 v){ unsigned char *q = (unsigned char *) p; |
︙ | ︙ | |||
1203 1204 1205 1206 1207 1208 1209 | } /* Regardless of the strategy selected, FTS can deliver rows in rowid (or ** docid) order. Both ascending and descending are possible. */ if( pInfo->nOrderBy==1 ){ struct sqlite3_index_orderby *pOrder = &pInfo->aOrderBy[0]; | > | > < | > | 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 | } /* Regardless of the strategy selected, FTS can deliver rows in rowid (or ** docid) order. Both ascending and descending are possible. */ if( pInfo->nOrderBy==1 ){ struct sqlite3_index_orderby *pOrder = &pInfo->aOrderBy[0]; if( pOrder->desc==0 && (pOrder->iColumn<0 || pOrder->iColumn==p->nColumn+1) ){ if( pOrder->desc ){ pInfo->idxStr = "DESC"; }else{ pInfo->idxStr = "ASC"; } pInfo->orderByConsumed = 1; } } return SQLITE_OK; } /* ** Implementation of xOpen method. |
︙ | ︙ | |||
1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 | sqlite3Fts3FreeDeferredTokens(pCsr); sqlite3_free(pCsr->aDoclist); sqlite3_free(pCsr->aMatchinfo); sqlite3_free(pCsr); return SQLITE_OK; } /* ** Position the pCsr->pStmt statement so that it is on the row ** of the %_content table that contains the last match. Return ** SQLITE_OK on success. */ static int fts3CursorSeek(sqlite3_context *pContext, Fts3Cursor *pCsr){ if( pCsr->isRequireSeek ){ | > > < > | 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 | sqlite3Fts3FreeDeferredTokens(pCsr); sqlite3_free(pCsr->aDoclist); sqlite3_free(pCsr->aMatchinfo); sqlite3_free(pCsr); return SQLITE_OK; } static int fts3RowidMethod(sqlite3_vtab_cursor *, sqlite3_int64*); /* ** Position the pCsr->pStmt statement so that it is on the row ** of the %_content table that contains the last match. Return ** SQLITE_OK on success. */ static int fts3CursorSeek(sqlite3_context *pContext, Fts3Cursor *pCsr){ if( pCsr->isRequireSeek ){ sqlite3_bind_int64(pCsr->pStmt, 1, pCsr->iPrevId); pCsr->isRequireSeek = 0; if( SQLITE_ROW==sqlite3_step(pCsr->pStmt) ){ return SQLITE_OK; }else{ int rc = sqlite3_reset(pCsr->pStmt); if( rc==SQLITE_OK ){ /* If no row was found and no error has occured, then the %_content ** table is missing a row that is present in the full-text index. |
︙ | ︙ | |||
2216 2217 2218 2219 2220 2221 2222 | return SQLITE_OK; } /* ** Append SegReader object pNew to the end of the pCsr->apSegment[] array. */ static int fts3SegReaderCursorAppend( | | | 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 | return SQLITE_OK; } /* ** Append SegReader object pNew to the end of the pCsr->apSegment[] array. */ static int fts3SegReaderCursorAppend( Fts3MultiSegReader *pCsr, Fts3SegReader *pNew ){ if( (pCsr->nSegment%16)==0 ){ Fts3SegReader **apNew; int nByte = (pCsr->nSegment + 16)*sizeof(Fts3SegReader*); apNew = (Fts3SegReader **)sqlite3_realloc(pCsr->apSegment, nByte); if( !apNew ){ |
︙ | ︙ | |||
2241 2242 2243 2244 2245 2246 2247 | Fts3Table *p, /* FTS3 table handle */ int iIndex, /* Index to search (from 0 to p->nIndex-1) */ int iLevel, /* Level of segments to scan */ const char *zTerm, /* Term to query for */ int nTerm, /* Size of zTerm in bytes */ int isPrefix, /* True for a prefix search */ int isScan, /* True to scan from zTerm to EOF */ | | | 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 | Fts3Table *p, /* FTS3 table handle */ int iIndex, /* Index to search (from 0 to p->nIndex-1) */ int iLevel, /* Level of segments to scan */ const char *zTerm, /* Term to query for */ int nTerm, /* Size of zTerm in bytes */ int isPrefix, /* True for a prefix search */ int isScan, /* True to scan from zTerm to EOF */ Fts3MultiSegReader *pCsr /* Cursor object to populate */ ){ int rc = SQLITE_OK; int rc2; sqlite3_stmt *pStmt = 0; /* If iLevel is less than 0 and this is not a scan, include a seg-reader ** for the pending-terms. If this is a scan, then this call must be being |
︙ | ︙ | |||
2312 2313 2314 2315 2316 2317 2318 | Fts3Table *p, /* FTS3 table handle */ int iIndex, /* Index to search (from 0 to p->nIndex-1) */ int iLevel, /* Level of segments to scan */ const char *zTerm, /* Term to query for */ int nTerm, /* Size of zTerm in bytes */ int isPrefix, /* True for a prefix search */ int isScan, /* True to scan from zTerm to EOF */ | | | | | | | | > > | | 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 | Fts3Table *p, /* FTS3 table handle */ int iIndex, /* Index to search (from 0 to p->nIndex-1) */ int iLevel, /* Level of segments to scan */ const char *zTerm, /* Term to query for */ int nTerm, /* Size of zTerm in bytes */ int isPrefix, /* True for a prefix search */ int isScan, /* True to scan from zTerm to EOF */ Fts3MultiSegReader *pCsr /* Cursor object to populate */ ){ assert( iIndex>=0 && iIndex<p->nIndex ); assert( iLevel==FTS3_SEGCURSOR_ALL || iLevel==FTS3_SEGCURSOR_PENDING || iLevel>=0 ); assert( iLevel<FTS3_SEGDIR_MAXLEVEL ); assert( FTS3_SEGCURSOR_ALL<0 && FTS3_SEGCURSOR_PENDING<0 ); assert( isPrefix==0 || isScan==0 ); /* "isScan" is only set to true by the ft4aux module, an ordinary ** full-text tables. */ assert( isScan==0 || p->aIndex==0 ); memset(pCsr, 0, sizeof(Fts3MultiSegReader)); return fts3SegReaderCursor( p, iIndex, iLevel, zTerm, nTerm, isPrefix, isScan, pCsr ); } static int fts3SegReaderCursorAddZero( Fts3Table *p, const char *zTerm, int nTerm, Fts3MultiSegReader *pCsr ){ return fts3SegReaderCursor(p, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0,pCsr); } int sqlite3Fts3TermSegReaderCursor( Fts3Cursor *pCsr, /* Virtual table cursor handle */ const char *zTerm, /* Term to query for */ int nTerm, /* Size of zTerm in bytes */ int isPrefix, /* True for a prefix search */ Fts3MultiSegReader **ppSegcsr /* OUT: Allocated seg-reader cursor */ ){ Fts3MultiSegReader *pSegcsr; /* Object to allocate and return */ int rc = SQLITE_NOMEM; /* Return code */ pSegcsr = sqlite3_malloc(sizeof(Fts3MultiSegReader)); if( pSegcsr ){ int i; int nCost = 0; int bFound = 0; /* True once an index has been found */ Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; if( isPrefix ){ for(i=1; bFound==0 && i<p->nIndex; i++){ if( p->aIndex[i].nPrefix==nTerm ){ bFound = 1; rc = sqlite3Fts3SegReaderCursor( p, i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0, pSegcsr); pSegcsr->bLookup = 1; } } for(i=1; bFound==0 && i<p->nIndex; i++){ if( p->aIndex[i].nPrefix==nTerm+1 ){ bFound = 1; rc = sqlite3Fts3SegReaderCursor( p, i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 1, 0, pSegcsr ); if( rc==SQLITE_OK ){ rc = fts3SegReaderCursorAddZero(p, zTerm, nTerm, pSegcsr); } } } } if( bFound==0 ){ rc = sqlite3Fts3SegReaderCursor( p, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, isPrefix, 0, pSegcsr ); pSegcsr->bLookup = !isPrefix; } for(i=0; rc==SQLITE_OK && i<pSegcsr->nSegment; i++){ rc = sqlite3Fts3SegReaderCost(pCsr, pSegcsr->apSegment[i], &nCost); } pSegcsr->nCost = nCost; } *ppSegcsr = pSegcsr; return rc; } static void fts3SegReaderCursorFree(Fts3MultiSegReader *pSegcsr){ sqlite3Fts3SegReaderFinish(pSegcsr); sqlite3_free(pSegcsr); } /* ** This function retreives the doclist for the specified term (or term ** prefix) from the database. |
︙ | ︙ | |||
2423 2424 2425 2426 2427 2428 2429 | Fts3PhraseToken *pTok, /* Token to query for */ int iColumn, /* Column to query (or -ve for all columns) */ int isReqPos, /* True to include position lists in output */ int *pnOut, /* OUT: Size of buffer at *ppOut */ char **ppOut /* OUT: Malloced result buffer */ ){ int rc; /* Return code */ | | | 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 | Fts3PhraseToken *pTok, /* Token to query for */ int iColumn, /* Column to query (or -ve for all columns) */ int isReqPos, /* True to include position lists in output */ int *pnOut, /* OUT: Size of buffer at *ppOut */ char **ppOut /* OUT: Malloced result buffer */ ){ int rc; /* Return code */ Fts3MultiSegReader *pSegcsr; /* Seg-reader cursor for this term */ TermSelect tsc; /* Context object for fts3TermSelectCb() */ Fts3SegFilter filter; /* Segment term filter configuration */ pSegcsr = pTok->pSegcsr; memset(&tsc, 0, sizeof(TermSelect)); tsc.isReqPos = isReqPos; |
︙ | ︙ | |||
2550 2551 2552 2553 2554 2555 2556 | pOut += sqlite3Fts3PutVarint(pOut, delta); } *pnList = (int)(pOut - aList); } } | | | 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 | pOut += sqlite3Fts3PutVarint(pOut, delta); } *pnList = (int)(pOut - aList); } } /* ** Return a DocList corresponding to the phrase *pPhrase. ** ** If this function returns SQLITE_OK, but *pnOut is set to a negative value, ** then no tokens in the phrase were looked up in the full-text index. This ** is only possible when this function is called from within xFilter(). The ** caller should assume that all documents match the phrase. The actual ** filtering will take place in xNext(). |
︙ | ︙ | |||
2578 2579 2580 2581 2582 2583 2584 | int isTermPos = (pPhrase->nToken>1 || isReqPos); Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; int isFirst = 1; int iPrevTok = 0; int nDoc = 0; | < < < < < < < < < < < < < < < < < < < < | 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 | int isTermPos = (pPhrase->nToken>1 || isReqPos); Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; int isFirst = 1; int iPrevTok = 0; int nDoc = 0; for(ii=0; ii<pPhrase->nToken; ii++){ Fts3PhraseToken *pTok; /* Token to find doclist for */ int iTok = 0; /* The token being queried this iteration */ char *pList = 0; /* Pointer to token doclist */ int nList = 0; /* Size of buffer at pList */ /* Select a token to process. If this is an xFilter() call, then tokens |
︙ | ︙ | |||
2622 2623 2624 2625 2626 2627 2628 | pTok = &pPhrase->aToken[iTok]; }else{ int nMinCost = 0x7FFFFFFF; int jj; /* Find the remaining token with the lowest cost. */ for(jj=0; jj<pPhrase->nToken; jj++){ | | | 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 | pTok = &pPhrase->aToken[iTok]; }else{ int nMinCost = 0x7FFFFFFF; int jj; /* Find the remaining token with the lowest cost. */ for(jj=0; jj<pPhrase->nToken; jj++){ Fts3MultiSegReader *pSegcsr = pPhrase->aToken[jj].pSegcsr; if( pSegcsr && pSegcsr->nCost<nMinCost ){ iTok = jj; nMinCost = pSegcsr->nCost; } } pTok = &pPhrase->aToken[iTok]; |
︙ | ︙ | |||
2824 2825 2826 2827 2828 2829 2830 2831 | assert( pCsr->eEvalmode==FTS3_EVAL_FILTER ); if( pnExpr && pExpr->eType!=FTSQUERY_AND ){ (*pnExpr)++; pnExpr = 0; } if( pExpr->eType==FTSQUERY_PHRASE ){ Fts3Phrase *pPhrase = pExpr->pPhrase; | > < | | 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 | assert( pCsr->eEvalmode==FTS3_EVAL_FILTER ); if( pnExpr && pExpr->eType!=FTSQUERY_AND ){ (*pnExpr)++; pnExpr = 0; } if( pExpr->eType==FTSQUERY_PHRASE ){ int ii; /* Used to iterate through phrase tokens */ Fts3Phrase *pPhrase = pExpr->pPhrase; for(ii=0; rc==SQLITE_OK && ii<pPhrase->nToken; ii++){ Fts3PhraseToken *pTok = &pPhrase->aToken[ii]; if( pTok->pSegcsr==0 ){ rc = sqlite3Fts3TermSegReaderCursor( pCsr, pTok->z, pTok->n, pTok->isPrefix, &pTok->pSegcsr ); } } }else{ rc = fts3ExprAllocateSegReaders(pCsr, pExpr->pLeft, pnExpr); if( rc==SQLITE_OK ){ |
︙ | ︙ | |||
2876 2877 2878 2879 2880 2881 2882 | static int fts3ExprCost(Fts3Expr *pExpr){ int nCost; /* Return value */ if( pExpr->eType==FTSQUERY_PHRASE ){ Fts3Phrase *pPhrase = pExpr->pPhrase; int ii; nCost = 0; for(ii=0; ii<pPhrase->nToken; ii++){ | | | 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 | static int fts3ExprCost(Fts3Expr *pExpr){ int nCost; /* Return value */ if( pExpr->eType==FTSQUERY_PHRASE ){ Fts3Phrase *pPhrase = pExpr->pPhrase; int ii; nCost = 0; for(ii=0; ii<pPhrase->nToken; ii++){ Fts3MultiSegReader *pSegcsr = pPhrase->aToken[ii].pSegcsr; if( pSegcsr ) nCost += pSegcsr->nCost; } }else{ nCost = fts3ExprCost(pExpr->pLeft) + fts3ExprCost(pExpr->pRight); } return nCost; } |
︙ | ︙ | |||
3171 3172 3173 3174 3175 3176 3177 | ** subsequently to determine whether or not an EOF was hit. */ static int fts3NextMethod(sqlite3_vtab_cursor *pCursor){ int res; int rc = SQLITE_OK; /* Return code */ Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; | > > > | | | | | | | | | | | | | | | | | | | | | | | | | | | | > | 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 | ** subsequently to determine whether or not an EOF was hit. */ static int fts3NextMethod(sqlite3_vtab_cursor *pCursor){ int res; int rc = SQLITE_OK; /* Return code */ Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; if( pCsr->bIncremental ){ rc = sqlite3Fts3EvalNext(pCsr, pCsr->pExpr); }else{ pCsr->eEvalmode = FTS3_EVAL_NEXT; do { if( pCsr->aDoclist==0 ){ if( SQLITE_ROW!=sqlite3_step(pCsr->pStmt) ){ pCsr->isEof = 1; rc = sqlite3_reset(pCsr->pStmt); break; } pCsr->iPrevId = sqlite3_column_int64(pCsr->pStmt, 0); }else{ if( pCsr->desc==0 ){ if( pCsr->pNextId>=&pCsr->aDoclist[pCsr->nDoclist] ){ pCsr->isEof = 1; break; } fts3GetDeltaVarint(&pCsr->pNextId, &pCsr->iPrevId); }else{ fts3GetReverseDeltaVarint(&pCsr->pNextId,pCsr->aDoclist,&pCsr->iPrevId); if( pCsr->pNextId<=pCsr->aDoclist ){ pCsr->isEof = 1; break; } } sqlite3_reset(pCsr->pStmt); pCsr->isRequireSeek = 1; pCsr->isMatchinfoNeeded = 1; } }while( SQLITE_OK==(rc = fts3EvalDeferred(pCsr, &res)) && res==0 ); } return rc; } /* ** This is the xFilter interface for the virtual table. See ** the virtual table xFilter method documentation for additional |
︙ | ︙ | |||
3226 3227 3228 3229 3230 3231 3232 | static int fts3FilterMethod( sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ int idxNum, /* Strategy index */ const char *idxStr, /* Unused */ int nVal, /* Number of elements in apVal */ sqlite3_value **apVal /* Arguments for the indexing scheme */ ){ | < < < < | | 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 | static int fts3FilterMethod( sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ int idxNum, /* Strategy index */ const char *idxStr, /* Unused */ int nVal, /* Number of elements in apVal */ sqlite3_value **apVal /* Arguments for the indexing scheme */ ){ int rc; char *zSql; /* SQL statement used to access %_content */ Fts3Table *p = (Fts3Table *)pCursor->pVtab; Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; UNUSED_PARAMETER(idxStr); UNUSED_PARAMETER(nVal); |
︙ | ︙ | |||
3262 3263 3264 3265 3266 3267 3268 | } rc = sqlite3Fts3ExprParse(p->pTokenizer, p->azColumn, p->nColumn, iCol, zQuery, -1, &pCsr->pExpr ); if( rc!=SQLITE_OK ){ if( rc==SQLITE_ERROR ){ | > | < > | > | > > | > > | < > | < < | | > | | > < < > | 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 | } rc = sqlite3Fts3ExprParse(p->pTokenizer, p->azColumn, p->nColumn, iCol, zQuery, -1, &pCsr->pExpr ); if( rc!=SQLITE_OK ){ if( rc==SQLITE_ERROR ){ static const char *zErr = "malformed MATCH expression: [%s]"; p->base.zErrMsg = sqlite3_mprintf(zErr, zQuery); } return rc; } rc = sqlite3Fts3ReadLock(p); if( rc!=SQLITE_OK ) return rc; pCsr->bIncremental = 1; rc = sqlite3Fts3EvalStart(pCsr, pCsr->pExpr, 1); sqlite3Fts3SegmentsClose(p); if( rc!=SQLITE_OK ) return rc; pCsr->pNextId = pCsr->aDoclist; pCsr->iPrevId = 0; } /* Compile a SELECT statement for this cursor. For a full-table-scan, the ** statement loops through all rows of the %_content table. For a ** full-text query or docid lookup, the statement retrieves a single ** row by docid. */ if( idxNum==FTS3_FULLSCAN_SEARCH ){ const char *zSort = (idxStr ? idxStr : "ASC"); const char *zTmpl = "SELECT %s FROM %Q.'%q_content' AS x ORDER BY docid %s"; zSql = sqlite3_mprintf(zTmpl, p->zReadExprlist, p->zDb, p->zName, zSort); }else{ const char *zTmpl = "SELECT %s FROM %Q.'%q_content' AS x WHERE docid = ?"; zSql = sqlite3_mprintf(zTmpl, p->zReadExprlist, p->zDb, p->zName); } if( !zSql ) return SQLITE_NOMEM; rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0); sqlite3_free(zSql); if( rc!=SQLITE_OK ) return rc; if( idxNum==FTS3_DOCID_SEARCH ){ rc = sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]); if( rc!=SQLITE_OK ) return rc; } assert( pCsr->desc==0 ); pCsr->eSearch = (i16)idxNum; if( rc==SQLITE_OK && pCsr->nDoclist>0 && idxStr && idxStr[0]=='D' ){ sqlite3_int64 iDocid = 0; char *csr = pCsr->aDoclist; while( csr<&pCsr->aDoclist[pCsr->nDoclist] ){ fts3GetDeltaVarint(&csr, &iDocid); } pCsr->pNextId = csr; |
︙ | ︙ | |||
3333 3334 3335 3336 3337 3338 3339 | ** This is the xRowid method. The SQLite core calls this routine to ** retrieve the rowid for the current row of the result set. fts3 ** exposes %_content.docid as the rowid for the virtual table. The ** rowid should be written to *pRowid. */ static int fts3RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ Fts3Cursor *pCsr = (Fts3Cursor *) pCursor; | > > | | 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 | ** This is the xRowid method. The SQLite core calls this routine to ** retrieve the rowid for the current row of the result set. fts3 ** exposes %_content.docid as the rowid for the virtual table. The ** rowid should be written to *pRowid. */ static int fts3RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ Fts3Cursor *pCsr = (Fts3Cursor *) pCursor; if( pCsr->bIncremental ){ *pRowid = sqlite3Fts3EvalDocid(pCsr, pCsr->pExpr); }else if( pCsr->aDoclist ){ *pRowid = pCsr->iPrevId; }else{ /* This branch runs if the query is implemented using a full-table scan ** (not using the full-text index). In this case grab the rowid from the ** SELECT statement. */ assert( pCsr->isRequireSeek==0 ); |
︙ | ︙ | |||
3456 3457 3458 3459 3460 3461 3462 | /* ** Load the doclist associated with expression pExpr to pExpr->aDoclist. ** The loaded doclist contains positions as well as the document ids. ** This is used by the matchinfo(), snippet() and offsets() auxillary ** functions. */ int sqlite3Fts3ExprLoadDoclist(Fts3Cursor *pCsr, Fts3Expr *pExpr){ | | > | | | | > | > | | 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 | /* ** Load the doclist associated with expression pExpr to pExpr->aDoclist. ** The loaded doclist contains positions as well as the document ids. ** This is used by the matchinfo(), snippet() and offsets() auxillary ** functions. */ int sqlite3Fts3ExprLoadDoclist(Fts3Cursor *pCsr, Fts3Expr *pExpr){ int rc = SQLITE_OK; if( pCsr->bIncremental==0 ){ Fts3Phrase *pPhrase = pExpr->pPhrase; assert( pExpr->eType==FTSQUERY_PHRASE && pPhrase ); assert( pCsr->eEvalmode==FTS3_EVAL_NEXT ); rc = fts3EvalExpr(pCsr, pExpr, &pPhrase->aDoclist, &pPhrase->nDoclist, 1); } return rc; } /* ** TODO: This is something to do with matchinfo(). Similar to ** sqlite3ExprLoadDoclists() but slightly different. ** ** UPDATE: Only used when there are deferred tokens. */ int sqlite3Fts3ExprLoadFtDoclist( Fts3Cursor *pCsr, Fts3Expr *pExpr, char **paDoclist, int *pnDoclist ){ int rc = SQLITE_OK; assert( pExpr->eType==FTSQUERY_PHRASE && pExpr->pPhrase ); assert( pCsr->eEvalmode==FTS3_EVAL_NEXT ); assert( pCsr->bIncremental==0 ); pCsr->eEvalmode = FTS3_EVAL_MATCHINFO; rc = fts3EvalExpr(pCsr, pExpr, paDoclist, pnDoclist, 1); pCsr->eEvalmode = FTS3_EVAL_NEXT; return rc; } |
︙ | ︙ | |||
3503 3504 3505 3506 3507 3508 3509 | c = *p--; } if( p>pStart ){ p = &p[2]; } while( *p++&0x80 ); *ppPoslist = p; } | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 | c = *p--; } if( p>pStart ){ p = &p[2]; } while( *p++&0x80 ); *ppPoslist = p; } /* ** Helper function used by the implementation of the overloaded snippet(), ** offsets() and optimize() SQL functions. ** ** If the value passed as the third argument is a blob of size ** sizeof(Fts3Cursor*), then the blob contents are copied to the ** output variable *ppCsr and SQLITE_OK is returned. Otherwise, an error |
︙ | ︙ | |||
3981 3982 3983 3984 3985 3986 3987 3988 3989 | char **pzErrMsg, const sqlite3_api_routines *pApi ){ SQLITE_EXTENSION_INIT2(pApi) return sqlite3Fts3Init(db); } #endif #endif | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 | char **pzErrMsg, const sqlite3_api_routines *pApi ){ SQLITE_EXTENSION_INIT2(pApi) return sqlite3Fts3Init(db); } #endif /************************************************************************* ************************************************************************** ************************************************************************** ************************************************************************** *************************************************************************/ /* ** Allocate an Fts3MultiSegReader for each token in the expression headed ** by pExpr. ** ** An Fts3SegReader object is a cursor that can seek or scan a range of ** entries within a single segment b-tree. An Fts3MultiSegReader uses multiple ** Fts3SegReader objects internally to provide an interface to seek or scan ** within the union of all segments of a b-tree. Hence the name. ** ** If the allocated Fts3MultiSegReader just seeks to a single entry in a ** segment b-tree (if the term is not a prefix or it is a prefix for which ** there exists prefix b-tree of the right length) then it may be traversed ** and merged incrementally. Otherwise, it has to be merged into an in-memory ** doclist and then traversed. */ static void fts3EvalAllocateReaders( Fts3Cursor *pCsr, Fts3Expr *pExpr, int *pnToken, /* OUT: Total number of tokens in phrase. */ int *pRc ){ if( pExpr && SQLITE_OK==*pRc ){ if( pExpr->eType==FTSQUERY_PHRASE ){ int i; int nToken = pExpr->pPhrase->nToken; *pnToken += nToken; for(i=0; i<nToken; i++){ Fts3PhraseToken *pToken = &pExpr->pPhrase->aToken[i]; int rc = sqlite3Fts3TermSegReaderCursor(pCsr, pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr ); if( rc!=SQLITE_OK ){ *pRc = rc; return; } } }else{ fts3EvalAllocateReaders(pCsr, pExpr->pLeft, pnToken, pRc); fts3EvalAllocateReaders(pCsr, pExpr->pRight, pnToken, pRc); } } } static int fts3EvalPhraseLoad( Fts3Cursor *pCsr, Fts3Phrase *p ){ Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; int iToken; int rc = SQLITE_OK; char *aDoclist = 0; int nDoclist = 0; int iPrev = -1; for(iToken=0; rc==SQLITE_OK && iToken<p->nToken; iToken++){ Fts3PhraseToken *pToken = &p->aToken[iToken]; assert( pToken->pSegcsr || pToken->pDeferred ); if( pToken->pDeferred==0 ){ int nThis = 0; char *pThis = 0; rc = fts3TermSelect(pTab, pToken, p->iColumn, 1, &nThis, &pThis); if( rc==SQLITE_OK ){ if( pThis==0 ){ sqlite3_free(aDoclist); aDoclist = 0; nDoclist = 0; break; }else if( aDoclist==0 ){ aDoclist = pThis; nDoclist = nThis; }else{ assert( iPrev>=0 ); fts3DoclistMerge(MERGE_POS_PHRASE, iToken-iPrev, 0, pThis, &nThis, aDoclist, nDoclist, pThis, nThis, 0 ); sqlite3_free(aDoclist); aDoclist = pThis; nDoclist = nThis; } iPrev = iToken; } } } if( rc==SQLITE_OK ){ p->doclist.aAll = aDoclist; p->doclist.nAll = nDoclist; }else{ sqlite3_free(aDoclist); } return rc; } static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){ Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; int iToken; int rc = SQLITE_OK; int nMaxUndeferred = -1; char *aPoslist = 0; int nPoslist = 0; int iPrev = -1; for(iToken=0; rc==SQLITE_OK && iToken<pPhrase->nToken; iToken++){ Fts3PhraseToken *pToken = &pPhrase->aToken[iToken]; Fts3DeferredToken *pDeferred = pToken->pDeferred; if( pDeferred ){ char *pList; int nList; rc = sqlite3Fts3DeferredTokenList(pDeferred, &pList, &nList); if( rc!=SQLITE_OK ) return rc; if( pList==0 ){ sqlite3_free(aPoslist); pPhrase->doclist.pList = 0; pPhrase->doclist.nList = 0; return SQLITE_OK; }else if( aPoslist==0 ){ aPoslist = pList; nPoslist = nList; }else{ assert( iPrev>=0 ); char *aOut = pList; char *p1 = aPoslist; char *p2 = aOut; fts3PoslistPhraseMerge(&aOut, iToken-iPrev, 0, 1, &p1, &p2); sqlite3_free(aPoslist); aPoslist = pList; nPoslist = aOut - aPoslist; if( nPoslist==0 ){ sqlite3_free(aPoslist); pPhrase->doclist.pList = 0; pPhrase->doclist.nList = 0; return SQLITE_OK; } } iPrev = iToken; }else{ nMaxUndeferred = iToken; } } if( iPrev>=0 ){ if( nMaxUndeferred<0 ){ pPhrase->doclist.pList = aPoslist; pPhrase->doclist.nList = nPoslist; pPhrase->doclist.iDocid = pCsr->iPrevId; }else{ int nDistance; char *p1; char *p2; char *aOut; if( nMaxUndeferred>iPrev ){ p1 = aPoslist; p2 = pPhrase->doclist.pList; nDistance = nMaxUndeferred - iPrev; }else{ p1 = pPhrase->doclist.pList; p2 = aPoslist; nDistance = iPrev - nMaxUndeferred; } aOut = (char *)sqlite3_malloc(nPoslist+8); if( !aOut ){ sqlite3_free(aPoslist); return SQLITE_NOMEM; } pPhrase->doclist.pList = aOut; if( fts3PoslistPhraseMerge(&aOut, nDistance, 0, 1, &p1, &p2) ){ pPhrase->doclist.nList = (aOut - pPhrase->doclist.pList); sqlite3_free(aPoslist); }else{ sqlite3_free(aOut); pPhrase->doclist.pList = 0; pPhrase->doclist.nList = 0; } } } return SQLITE_OK; } /* ** The following three functions: ** ** fts3EvalPhraseStart() ** fts3EvalPhraseNext() ** fts3EvalPhraseReset() ** ** May be used with a phrase object after fts3EvalAllocateReaders() has been ** called to iterate through the set of docids that match the phrase. ** ** After a successful call to fts3EvalPhraseNext(), the following two ** functions may be called to access the current docid and position-list. ** ** fts3EvalPhraseDocid() ** fts3EvalPhrasePoslist() */ static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){ int rc; Fts3Doclist *pList = &p->doclist; Fts3PhraseToken *pFirst = &p->aToken[0]; assert( pList->aAll==0 ); if( p->nToken==1 && bOptOk==1 && pFirst->pSegcsr && pFirst->pSegcsr->bLookup ){ /* Use the incremental approach. */ Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn); rc = sqlite3Fts3MsrIncrStart( pTab, pFirst->pSegcsr, iCol, pFirst->z, pFirst->n); p->bIncr = 1; }else{ /* Load the full doclist for the phrase into memory. */ rc = fts3EvalPhraseLoad(pCsr, p); p->bIncr = 0; } assert( rc!=SQLITE_OK || p->nToken<1 || p->aToken[0].pSegcsr==0 || p->bIncr ); return rc; } /* ** Attempt to move the phrase iterator to point to the next matching docid. ** If an error occurs, return an SQLite error code. Otherwise, return ** SQLITE_OK. ** ** If there is no "next" entry and no error occurs, then *pbEof is set to ** 1 before returning. Otherwise, if no error occurs and the iterator is ** successfully advanced, *pbEof is set to 0. */ static int fts3EvalPhraseNext(Fts3Cursor *pCsr, Fts3Phrase *p, u8 *pbEof){ int rc = SQLITE_OK; if( p->bIncr ){ Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; assert( p->nToken==1 ); rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr, &p->doclist.iDocid, &p->doclist.pList, &p->doclist.nList ); if( rc==SQLITE_OK && !p->doclist.pList ){ *pbEof = 1; } }else{ char *pIter; Fts3Doclist *pDL = &p->doclist; if( pDL->pNextDocid ){ pIter = pDL->pNextDocid; }else{ pIter = pDL->aAll; } if( pIter>=&pDL->aAll[pDL->nAll] ){ /* We have already reached the end of this doclist. EOF. */ *pbEof = 1; }else{ fts3GetDeltaVarint(&pIter, &pDL->iDocid); pDL->pList = pIter; fts3PoslistCopy(0, &pIter); pDL->nList = (pIter - pDL->pList); pDL->pNextDocid = pIter; *pbEof = 0; } } return rc; } static int fts3EvalPhraseReset(Fts3Cursor *pCsr, Fts3Phrase *p){ return SQLITE_OK; } static sqlite3_int64 fts3EvalPhraseDocid(Fts3Phrase *p){ return p->doclist.iDocid; } static char *fts3EvalPhrasePoslist(Fts3Phrase *p, int *pnList){ if( pnList ){ *pnList = p->doclist.nList; } return p->doclist.pList; } static void fts3EvalStartReaders( Fts3Cursor *pCsr, Fts3Expr *pExpr, int bOptOk, int *pRc ){ if( pExpr && SQLITE_OK==*pRc ){ if( pExpr->eType==FTSQUERY_PHRASE ){ int i; int nToken = pExpr->pPhrase->nToken; for(i=0; i<nToken; i++){ if( pExpr->pPhrase->aToken[i].pDeferred==0 ) break; } pExpr->bDeferred = (i==nToken); *pRc = fts3EvalPhraseStart(pCsr, bOptOk, pExpr->pPhrase); }else{ if( pExpr->eType==FTSQUERY_NEAR ){ bOptOk = 0; } fts3EvalStartReaders(pCsr, pExpr->pLeft, bOptOk, pRc); fts3EvalStartReaders(pCsr, pExpr->pRight, bOptOk, pRc); pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred); } } } static void fts3EvalNearMerge( Fts3Expr *p1, Fts3Expr *p2, int nNear, int *pRc ){ if( *pRc==SQLITE_OK ){ int rc; /* Return code */ Fts3Phrase *pLeft = p1->pPhrase; Fts3Phrase *pRight = p2->pPhrase; assert( p2->eType==FTSQUERY_PHRASE && pLeft ); assert( p2->eType==FTSQUERY_PHRASE && pRight ); if( pLeft->doclist.aAll==0 ){ sqlite3_free(pRight->doclist.aAll); pRight->doclist.aAll = 0; pRight->doclist.nAll = 0; }else if( pRight->doclist.aAll ){ char *aOut; /* Buffer in which to assemble new doclist */ int nOut; /* Size of buffer aOut in bytes */ *pRc = fts3NearMerge(MERGE_POS_NEAR, nNear, pLeft->nToken, pLeft->doclist.aAll, pLeft->doclist.nAll, pRight->nToken, pRight->doclist.aAll, pRight->doclist.nAll, &aOut, &nOut ); sqlite3_free(pRight->doclist.aAll); pRight->doclist.aAll = aOut; pRight->doclist.nAll = nOut; } } } static void fts3EvalNearTrim(Fts3Cursor *pCsr, Fts3Expr *pExpr, int *pRc){ if( pExpr && SQLITE_OK==*pRc ){ if( pExpr->eType==FTSQUERY_NEAR ){ Fts3Expr *pLeft = pExpr->pLeft; int nPhrase = 2; Fts3Expr **aPhrase; assert( pLeft ); assert( pExpr->pRight ); assert( pExpr->pRight->eType==FTSQUERY_PHRASE ); while( pLeft->eType!=FTSQUERY_PHRASE ){ assert( pLeft->eType==FTSQUERY_NEAR ); assert( pLeft->pRight->eType==FTSQUERY_PHRASE ); pLeft = pLeft->pLeft; nPhrase++; } aPhrase = (Fts3Expr **)sqlite3_malloc(sizeof(Fts3Expr *) * nPhrase); if( !aPhrase ){ *pRc = SQLITE_NOMEM; }else{ int i = 1; aPhrase[0] = pLeft; do { pLeft = pLeft->pParent; aPhrase[i++] = pLeft->pRight; }while( pLeft!=pExpr ); for(i=0; i<(nPhrase-1); i++){ int nNear = aPhrase[i+1]->pParent->nNear; fts3EvalNearMerge(aPhrase[i], aPhrase[i+1], nNear, pRc); } for(i=nPhrase-2; i>=0; i--){ int nNear = aPhrase[i+1]->pParent->nNear; fts3EvalNearMerge(aPhrase[i+1], aPhrase[i], nNear, pRc); } sqlite3_free(aPhrase); } }else{ fts3EvalNearTrim(pCsr, pExpr->pLeft, pRc); fts3EvalNearTrim(pCsr, pExpr->pRight, pRc); } } } typedef struct Fts3TokenAndCost Fts3TokenAndCost; struct Fts3TokenAndCost { Fts3PhraseToken *pToken; int nOvfl; int iCol; }; static void fts3EvalTokenCosts( Fts3Cursor *pCsr, Fts3Expr *pExpr, Fts3TokenAndCost **ppTC, int *pRc ){ if( *pRc==SQLITE_OK && pExpr ){ if( pExpr->eType==FTSQUERY_PHRASE ){ Fts3Phrase *pPhrase = pExpr->pPhrase; int i; for(i=0; *pRc==SQLITE_OK && i<pPhrase->nToken; i++){ Fts3TokenAndCost *pTC = (*ppTC)++; pTC->pToken = &pPhrase->aToken[i]; pTC->iCol = pPhrase->iColumn; *pRc = sqlite3Fts3MsrOvfl(pCsr, pTC->pToken->pSegcsr, &pTC->nOvfl); } }else if( pExpr->eType==FTSQUERY_AND ){ fts3EvalTokenCosts(pCsr, pExpr->pLeft, ppTC, pRc); fts3EvalTokenCosts(pCsr, pExpr->pRight, ppTC, pRc); } } } static int fts3EvalAverageDocsize(Fts3Cursor *pCsr, int *pnPage){ if( pCsr->nRowAvg==0 ){ /* The average document size, which is required to calculate the cost ** of each doclist, has not yet been determined. Read the required ** data from the %_stat table to calculate it. ** ** Entry 0 of the %_stat table is a blob containing (nCol+1) FTS3 ** varints, where nCol is the number of columns in the FTS3 table. ** The first varint is the number of documents currently stored in ** the table. The following nCol varints contain the total amount of ** data stored in all rows of each column of the table, from left ** to right. */ int rc; Fts3Table *p = (Fts3Table*)pCsr->base.pVtab; sqlite3_stmt *pStmt; sqlite3_int64 nDoc = 0; sqlite3_int64 nByte = 0; const char *pEnd; const char *a; rc = sqlite3Fts3SelectDoctotal(p, &pStmt); if( rc!=SQLITE_OK ) return rc; a = sqlite3_column_blob(pStmt, 0); assert( a ); pEnd = &a[sqlite3_column_bytes(pStmt, 0)]; a += sqlite3Fts3GetVarint(a, &nDoc); while( a<pEnd ){ a += sqlite3Fts3GetVarint(a, &nByte); } if( nDoc==0 || nByte==0 ){ sqlite3_reset(pStmt); return SQLITE_CORRUPT_VTAB; } pCsr->nRowAvg = (int)(((nByte / nDoc) + p->nPgsz) / p->nPgsz); assert( pCsr->nRowAvg>0 ); rc = sqlite3_reset(pStmt); if( rc!=SQLITE_OK ) return rc; } *pnPage = pCsr->nRowAvg; return SQLITE_OK; } int sqlite3Fts3EvalStart(Fts3Cursor *pCsr, Fts3Expr *pExpr, int bOptOk){ Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; int rc = SQLITE_OK; int nToken = 0; /* Allocate a MultiSegReader for each token in the expression. */ fts3EvalAllocateReaders(pCsr, pExpr, &nToken, &rc); /* Call fts3EvalPhraseStart() on all phrases in the expression. TODO: ** This call will eventually also be responsible for determining which ** tokens are 'deferred' until the document text is loaded into memory. ** ** Each token in each phrase is dealt with using one of the following ** three strategies: ** ** 1. Entire doclist loaded into memory as part of the ** fts3EvalStartReaders() call. ** ** 2. Doclist loaded into memory incrementally, as part of each ** sqlite3Fts3EvalNext() call. ** ** 3. Token doclist is never loaded. Instead, documents are loaded into ** memory and scanned for the token as part of the sqlite3Fts3EvalNext() ** call. This is known as a "deferred" token. */ /* If bOptOk is true, check if there are any tokens that can be ** deferred (strategy 3). */ if( rc==SQLITE_OK && bOptOk && nToken>1 && pTab->bHasStat ){ Fts3TokenAndCost *aTC; aTC = (Fts3TokenAndCost *)sqlite3_malloc(sizeof(Fts3TokenAndCost) * nToken); if( !aTC ){ rc = SQLITE_NOMEM; }else{ int ii; int nDocEst = 0; int nDocSize; Fts3TokenAndCost *pTC = aTC; rc = fts3EvalAverageDocsize(pCsr, &nDocSize); fts3EvalTokenCosts(pCsr, pExpr, &pTC, &rc); nToken = pTC-aTC; for(ii=0; rc==SQLITE_OK && ii<nToken; ii++){ int jj; pTC = 0; for(jj=0; jj<nToken; jj++){ if( aTC[jj].pToken && (!pTC || aTC[jj].nOvfl<pTC->nOvfl) ){ pTC = &aTC[jj]; } } assert( pTC ); /* At this point pTC points to the cheapest remaining token. */ if( ii==0 ){ if( pTC->nOvfl ){ nDocEst = (pTC->nOvfl * pTab->nPgsz + pTab->nPgsz) / 10; }else{ /* TODO: Fix this so that the doclist need not be read twice. */ Fts3PhraseToken *pToken = pTC->pToken; int nList = 0; char *pList = 0; rc = fts3TermSelect(pTab, pToken, pTC->iCol, 1, &nList, &pList); if( rc==SQLITE_OK ){ nDocEst = fts3DoclistCountDocids(1, pList, nList); } sqlite3_free(pList); if( rc==SQLITE_OK ){ rc = sqlite3Fts3TermSegReaderCursor(pCsr, pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr ); } } }else{ if( pTC->nOvfl>=(nDocEst*nDocSize) ){ Fts3PhraseToken *pToken = pTC->pToken; rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol); fts3SegReaderCursorFree(pToken->pSegcsr); pToken->pSegcsr = 0; } nDocEst = 1 + (nDocEst/4); } pTC->pToken = 0; } sqlite3_free(aTC); } } fts3EvalStartReaders(pCsr, pExpr, bOptOk, &rc); /* Fix the results of NEAR expressions. */ fts3EvalNearTrim(pCsr, pExpr, &rc); return rc; } static void fts3EvalNext( Fts3Cursor *pCsr, Fts3Expr *pExpr, int *pRc ){ if( *pRc==SQLITE_OK ){ pExpr->bStart = 1; switch( pExpr->eType ){ case FTSQUERY_NEAR: case FTSQUERY_AND: { Fts3Expr *pLeft = pExpr->pLeft; Fts3Expr *pRight = pExpr->pRight; assert( !pLeft->bDeferred || !pRight->bDeferred ); if( pLeft->bDeferred ){ fts3EvalNext(pCsr, pRight, pRc); pExpr->iDocid = pRight->iDocid; pExpr->bEof = pRight->bEof; }else if( pRight->bDeferred ){ fts3EvalNext(pCsr, pLeft, pRc); pExpr->iDocid = pLeft->iDocid; pExpr->bEof = pLeft->bEof; }else{ fts3EvalNext(pCsr, pLeft, pRc); fts3EvalNext(pCsr, pRight, pRc); while( !pLeft->bEof && !pRight->bEof && *pRc==SQLITE_OK ){ int iDiff = pLeft->iDocid - pRight->iDocid; if( iDiff==0 ) break; if( iDiff<0 ){ fts3EvalNext(pCsr, pLeft, pRc); }else{ fts3EvalNext(pCsr, pRight, pRc); } } pExpr->iDocid = pLeft->iDocid; pExpr->bEof = (pLeft->bEof || pRight->bEof); } break; } case FTSQUERY_OR: { Fts3Expr *pLeft = pExpr->pLeft; Fts3Expr *pRight = pExpr->pRight; assert( pLeft->bStart || pLeft->iDocid==pRight->iDocid ); assert( pRight->bStart || pLeft->iDocid==pRight->iDocid ); if( pLeft->iDocid==pRight->iDocid ){ fts3EvalNext(pCsr, pLeft, pRc); fts3EvalNext(pCsr, pRight, pRc); }else if( pRight->bEof || (pLeft->bEof==0 && pLeft->iDocid<pRight->iDocid) ){ fts3EvalNext(pCsr, pLeft, pRc); }else{ fts3EvalNext(pCsr, pRight, pRc); } pExpr->bEof = (pLeft->bEof && pRight->bEof); if( pRight->bEof || (pLeft->bEof==0 && pLeft->iDocid<pRight->iDocid) ){ pExpr->iDocid = pLeft->iDocid; }else{ pExpr->iDocid = pRight->iDocid; } break; } case FTSQUERY_NOT: { Fts3Expr *pLeft = pExpr->pLeft; Fts3Expr *pRight = pExpr->pRight; if( pRight->bStart==0 ){ fts3EvalNext(pCsr, pRight, pRc); assert( *pRc!=SQLITE_OK || pRight->bStart ); } do { fts3EvalNext(pCsr, pLeft, pRc); if( pLeft->bEof ) break; while( !*pRc && !pRight->bEof && pRight->iDocid<pLeft->iDocid ){ fts3EvalNext(pCsr, pRight, pRc); } }while( !pRight->bEof && pRight->iDocid==pLeft->iDocid && !*pRc ); pExpr->iDocid = pLeft->iDocid; pExpr->bEof = pLeft->bEof; break; } default: assert( pExpr->eType==FTSQUERY_PHRASE ); *pRc = fts3EvalPhraseNext(pCsr, pExpr->pPhrase, &pExpr->bEof); pExpr->iDocid = fts3EvalPhraseDocid(pExpr->pPhrase); break; } } } static int fts3EvalDeferredTest(Fts3Cursor *pCsr, Fts3Expr *pExpr, int *pRc){ int bHit = 0; if( *pRc==SQLITE_OK ){ switch( pExpr->eType ){ case FTSQUERY_NEAR: case FTSQUERY_AND: bHit = ( fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc) && fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc) ); break; case FTSQUERY_OR: bHit = ( fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc) || fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc) ); break; case FTSQUERY_NOT: bHit = ( fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc) && !fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc) ); break; default: assert( pExpr->eType==FTSQUERY_PHRASE ); *pRc = fts3EvalDeferredPhrase(pCsr, pExpr->pPhrase); bHit = (pExpr->pPhrase->doclist.pList!=0); pExpr->iDocid = pCsr->iPrevId; break; } } return bHit; } /* ** Return 1 if both of the following are true: ** ** 1. *pRc is SQLITE_OK when this function returns, and ** ** 2. After scanning the current FTS table row for the deferred tokens, ** it is determined that the row does not match the query. */ static int fts3EvalLoadDeferred(Fts3Cursor *pCsr, int *pRc){ int rc = *pRc; int bMiss = 0; if( rc==SQLITE_OK && pCsr->pDeferred ){ rc = fts3CursorSeek(0, pCsr); if( rc==SQLITE_OK ){ sqlite3Fts3FreeDeferredDoclists(pCsr); rc = sqlite3Fts3CacheDeferredDoclists(pCsr); } bMiss = (0==fts3EvalDeferredTest(pCsr, pCsr->pExpr, &rc)); sqlite3Fts3FreeDeferredDoclists(pCsr); *pRc = rc; } return (rc==SQLITE_OK && bMiss); } /* ** Advance to the next document that matches the expression passed as an ** argument. */ int sqlite3Fts3EvalNext(Fts3Cursor *pCsr, Fts3Expr *pExpr){ int rc = SQLITE_OK; /* Return Code */ assert( pCsr->isEof==0 ); assert( pCsr->bIncremental ); if( pExpr==0 ){ pCsr->isEof = 1; }else{ do { sqlite3_reset(pCsr->pStmt); fts3EvalNext(pCsr, pExpr, &rc); pCsr->isEof = pExpr->bEof; pCsr->isRequireSeek = 1; pCsr->isMatchinfoNeeded = 1; pCsr->iPrevId = pExpr->iDocid; }while( pCsr->isEof==0 && fts3EvalLoadDeferred(pCsr, &rc) ); } return rc; } int sqlite3Fts3EvalFinish(Fts3Cursor *pCsr, Fts3Expr *pExpr){ return SQLITE_OK; } sqlite3_int64 sqlite3Fts3EvalDocid(Fts3Cursor *pCsr, Fts3Expr *pExpr){ return pExpr->iDocid; } /* ** Return a pointer to the entire doclist, including positions, associated ** with the phrase passed as the second argument. */ int sqlite3Fts3EvalPhraseDoclist( Fts3Cursor *pCsr, /* FTS3 cursor object */ Fts3Expr *pExpr, /* Phrase to return doclist for */ const char **ppList, /* OUT: Buffer containing doclist */ int *pnList /* OUT: Size of returned buffer, in bytes */ ){ int rc = SQLITE_OK; Fts3Phrase *pPhrase = pExpr->pPhrase; if( pPhrase->bIncr ){ /* This phrase was being loaded from disk incrementally. But the ** matchinfo() function requires that the entire doclist be loaded into ** memory. This block loads the doclist into memory and modifies the ** Fts3Phrase structure so that it does not use the incremental strategy. */ TESTONLY( int bEof = pExpr->bEof; ) TESTONLY( int bStart = pExpr->bStart; ) sqlite3_int64 iDocid = pExpr->iDocid; sqlite3Fts3EvalPhraseCleanup(pPhrase); pExpr->iDocid = 0; rc = sqlite3Fts3EvalStart(pCsr, pExpr, 0); assert( pExpr->bEof==bEof ); assert( pExpr->bStart==bStart ); assert( rc!=SQLITE_OK || pPhrase->bIncr==0 ); if( pExpr->bStart && !pExpr->bEof ){ pExpr->bStart = 0; while( rc==SQLITE_OK && pExpr->bEof==0 && pExpr->iDocid!=iDocid ){ fts3EvalNext(pCsr, pExpr, &rc); } } } *pnList = pPhrase->doclist.nAll; *ppList = pPhrase->doclist.aAll; return rc; } char *sqlite3Fts3EvalPhrasePoslist( Fts3Cursor *pCsr, /* FTS3 cursor object */ Fts3Expr *pExpr, /* Phrase to return doclist for */ sqlite3_int64 iDocid, /* Docid to return position list for */ int iCol /* Column to return position list for */ ){ Fts3Phrase *pPhrase = pExpr->pPhrase; Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; char *pIter = pPhrase->doclist.pList; int iThis; assert( iCol>=0 && iCol<pTab->nColumn ); if( !pIter || pExpr->bEof || pExpr->iDocid!=iDocid || (pPhrase->iColumn<pTab->nColumn && pPhrase->iColumn!=iCol) ){ return 0; } assert( pPhrase->doclist.nList>0 ); if( *pIter==0x01 ){ pIter++; pIter += sqlite3Fts3GetVarint32(pIter, &iThis); }else{ iThis = 0; } while( iThis<iCol ){ fts3ColumnlistCopy(0, &pIter); if( *pIter==0x00 ) return 0; pIter++; pIter += sqlite3Fts3GetVarint32(pIter, &iThis); } return ((iCol==iThis)?pIter:0); } void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *pPhrase){ int i; sqlite3_free(pPhrase->doclist.aAll); memset(&pPhrase->doclist, 0, sizeof(Fts3Doclist)); for(i=0; i<pPhrase->nToken; i++){ fts3SegReaderCursorFree(pPhrase->aToken[i].pSegcsr); pPhrase->aToken[i].pSegcsr = 0; } } #endif |
Changes to ext/fts3/fts3Int.h.
︙ | ︙ | |||
43 44 45 46 47 48 49 50 51 52 53 54 55 56 | /* ** Macro to return the number of elements in an array. SQLite has a ** similar macro called ArraySize(). Use a different name to avoid ** a collision when building an amalgamation with built-in FTS3. */ #define SizeofArray(X) ((int)(sizeof(X)/sizeof(X[0]))) /* ** Maximum length of a varint encoded integer. The varint format is different ** from that used by SQLite, so the maximum length is 10, not 9. */ #define FTS3_VARINT_MAX 10 /* | > > > > > | 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | /* ** Macro to return the number of elements in an array. SQLite has a ** similar macro called ArraySize(). Use a different name to avoid ** a collision when building an amalgamation with built-in FTS3. */ #define SizeofArray(X) ((int)(sizeof(X)/sizeof(X[0]))) #ifndef MIN # define MIN(x,y) ((x)<(y)?(x):(y)) #endif /* ** Maximum length of a varint encoded integer. The varint format is different ** from that used by SQLite, so the maximum length is 10, not 9. */ #define FTS3_VARINT_MAX 10 /* |
︙ | ︙ | |||
138 139 140 141 142 143 144 145 146 147 | typedef struct Fts3Table Fts3Table; typedef struct Fts3Cursor Fts3Cursor; typedef struct Fts3Expr Fts3Expr; typedef struct Fts3Phrase Fts3Phrase; typedef struct Fts3PhraseToken Fts3PhraseToken; typedef struct Fts3SegFilter Fts3SegFilter; typedef struct Fts3DeferredToken Fts3DeferredToken; typedef struct Fts3SegReader Fts3SegReader; | > | | 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 | typedef struct Fts3Table Fts3Table; typedef struct Fts3Cursor Fts3Cursor; typedef struct Fts3Expr Fts3Expr; typedef struct Fts3Phrase Fts3Phrase; typedef struct Fts3PhraseToken Fts3PhraseToken; typedef struct Fts3Doclist Fts3Doclist; typedef struct Fts3SegFilter Fts3SegFilter; typedef struct Fts3DeferredToken Fts3DeferredToken; typedef struct Fts3SegReader Fts3SegReader; typedef struct Fts3MultiSegReader Fts3MultiSegReader; /* ** A connection to a fulltext index is an instance of the following ** structure. The xCreate and xConnect methods create an instance ** of this structure and xDestroy and xDisconnect free that instance. ** All other methods receive a pointer to the structure as one of their ** arguments. |
︙ | ︙ | |||
220 221 222 223 224 225 226 227 228 229 230 231 232 233 | struct Fts3Cursor { sqlite3_vtab_cursor base; /* Base class used by SQLite core */ i16 eSearch; /* Search strategy (see below) */ u8 isEof; /* True if at End Of Results */ u8 isRequireSeek; /* True if must seek pStmt to %_content row */ sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */ Fts3Expr *pExpr; /* Parsed MATCH query string */ int nPhrase; /* Number of matchable phrases in query */ Fts3DeferredToken *pDeferred; /* Deferred search tokens, if any */ sqlite3_int64 iPrevId; /* Previous id read from aDoclist */ char *pNextId; /* Pointer into the body of aDoclist */ char *aDoclist; /* List of docids for full-text queries */ int nDoclist; /* Size of buffer at aDoclist */ int desc; /* True to sort in descending order */ | > | 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 | struct Fts3Cursor { sqlite3_vtab_cursor base; /* Base class used by SQLite core */ i16 eSearch; /* Search strategy (see below) */ u8 isEof; /* True if at End Of Results */ u8 isRequireSeek; /* True if must seek pStmt to %_content row */ sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */ Fts3Expr *pExpr; /* Parsed MATCH query string */ int bIncremental; /* True to use incremental querying */ int nPhrase; /* Number of matchable phrases in query */ Fts3DeferredToken *pDeferred; /* Deferred search tokens, if any */ sqlite3_int64 iPrevId; /* Previous id read from aDoclist */ char *pNextId; /* Pointer into the body of aDoclist */ char *aDoclist; /* List of docids for full-text queries */ int nDoclist; /* Size of buffer at aDoclist */ int desc; /* True to sort in descending order */ |
︙ | ︙ | |||
259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 | ** indicating that all columns should be searched, ** then eSearch would be set to FTS3_FULLTEXT_SEARCH+4. */ #define FTS3_FULLSCAN_SEARCH 0 /* Linear scan of %_content table */ #define FTS3_DOCID_SEARCH 1 /* Lookup by rowid on %_content table */ #define FTS3_FULLTEXT_SEARCH 2 /* Full-text index search */ /* ** A "phrase" is a sequence of one or more tokens that must match in ** sequence. A single token is the base case and the most common case. ** For a sequence of tokens contained in double-quotes (i.e. "one two three") ** nToken will be the number of tokens in the string. */ struct Fts3PhraseToken { char *z; /* Text of the token */ int n; /* Number of bytes in buffer z */ int isPrefix; /* True if token ends with a "*" character */ /* Variables above this point are populated when the expression is ** parsed (by code in fts3_expr.c). Below this point the variables are ** used when evaluating the expression. */ | > > > > > > > > > > > < < > < | > | > > > > > > > | 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 | ** indicating that all columns should be searched, ** then eSearch would be set to FTS3_FULLTEXT_SEARCH+4. */ #define FTS3_FULLSCAN_SEARCH 0 /* Linear scan of %_content table */ #define FTS3_DOCID_SEARCH 1 /* Lookup by rowid on %_content table */ #define FTS3_FULLTEXT_SEARCH 2 /* Full-text index search */ struct Fts3Doclist { char *aAll; /* Array containing doclist (or NULL) */ int nAll; /* Size of a[] in bytes */ sqlite3_int64 iDocid; /* Current docid (if p!=0) */ char *pNextDocid; /* Pointer to next docid */ char *pList; /* Pointer to position list following iDocid */ int nList; /* Length of position list */ } doclist; /* ** A "phrase" is a sequence of one or more tokens that must match in ** sequence. A single token is the base case and the most common case. ** For a sequence of tokens contained in double-quotes (i.e. "one two three") ** nToken will be the number of tokens in the string. */ struct Fts3PhraseToken { char *z; /* Text of the token */ int n; /* Number of bytes in buffer z */ int isPrefix; /* True if token ends with a "*" character */ /* Variables above this point are populated when the expression is ** parsed (by code in fts3_expr.c). Below this point the variables are ** used when evaluating the expression. */ int bFulltext; /* True if full-text index was used */ Fts3DeferredToken *pDeferred; /* Deferred token object for this token */ Fts3MultiSegReader *pSegcsr; /* Segment-reader for this token */ }; struct Fts3Phrase { /* Cache of doclist for this phrase. */ Fts3Doclist doclist; int bIncr; /* True if doclist is loaded incrementally */ #if 1 int isLoaded; /* True if aDoclist/nDoclist are initialized. */ char *aDoclist; /* Buffer containing doclist */ int nDoclist; /* Size of aDoclist in bytes */ sqlite3_int64 iCurrent; char *pCurrent; #endif /* Variables below this point are populated by fts3_expr.c when parsing ** a MATCH expression. Everything above is part of the evaluation phase. */ int nToken; /* Number of tokens in the phrase */ int iColumn; /* Index of column this phrase must match */ Fts3PhraseToken aToken[1]; /* One entry for each token in the phrase */ }; /* ** A tree of these objects forms the RHS of a MATCH operator. ** ** If Fts3Expr.eType is FTSQUERY_PHRASE and isLoaded is true, then aDoclist |
︙ | ︙ | |||
313 314 315 316 317 318 319 320 321 322 323 324 325 326 | struct Fts3Expr { int eType; /* One of the FTSQUERY_XXX values defined below */ int nNear; /* Valid if eType==FTSQUERY_NEAR */ Fts3Expr *pParent; /* pParent->pLeft==this or pParent->pRight==this */ Fts3Expr *pLeft; /* Left operand */ Fts3Expr *pRight; /* Right operand */ Fts3Phrase *pPhrase; /* Valid if eType==FTSQUERY_PHRASE */ }; /* ** Candidate values for Fts3Query.eType. Note that the order of the first ** four values is in order of precedence when parsing expressions. For ** example, the following: ** | > > > > > > | 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 | struct Fts3Expr { int eType; /* One of the FTSQUERY_XXX values defined below */ int nNear; /* Valid if eType==FTSQUERY_NEAR */ Fts3Expr *pParent; /* pParent->pLeft==this or pParent->pRight==this */ Fts3Expr *pLeft; /* Left operand */ Fts3Expr *pRight; /* Right operand */ Fts3Phrase *pPhrase; /* Valid if eType==FTSQUERY_PHRASE */ /* The following are used by the fts3_eval.c module. */ sqlite3_int64 iDocid; /* Current docid */ u8 bEof; /* True this expression is at EOF already */ u8 bStart; /* True if iDocid is valid */ u8 bDeferred; /* True if this expression is entirely deferred */ }; /* ** Candidate values for Fts3Query.eType. Note that the order of the first ** four values is in order of precedence when parsing expressions. For ** example, the following: ** |
︙ | ︙ | |||
362 363 364 365 366 367 368 | char *sqlite3Fts3DeferredDoclist(Fts3DeferredToken *, int *); void sqlite3Fts3SegmentsClose(Fts3Table *); /* Special values interpreted by sqlite3SegReaderCursor() */ #define FTS3_SEGCURSOR_PENDING -1 #define FTS3_SEGCURSOR_ALL -2 | | | | > | | > > | | > < | 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 | char *sqlite3Fts3DeferredDoclist(Fts3DeferredToken *, int *); void sqlite3Fts3SegmentsClose(Fts3Table *); /* Special values interpreted by sqlite3SegReaderCursor() */ #define FTS3_SEGCURSOR_PENDING -1 #define FTS3_SEGCURSOR_ALL -2 int sqlite3Fts3SegReaderStart(Fts3Table*, Fts3MultiSegReader*, Fts3SegFilter*); int sqlite3Fts3SegReaderStep(Fts3Table *, Fts3MultiSegReader *); void sqlite3Fts3SegReaderFinish(Fts3MultiSegReader *); int sqlite3Fts3SegReaderCursor( Fts3Table *, int, int, const char *, int, int, int, Fts3MultiSegReader *); /* Flags allowed as part of the 4th argument to SegmentReaderIterate() */ #define FTS3_SEGMENT_REQUIRE_POS 0x00000001 #define FTS3_SEGMENT_IGNORE_EMPTY 0x00000002 #define FTS3_SEGMENT_COLUMN_FILTER 0x00000004 #define FTS3_SEGMENT_PREFIX 0x00000008 #define FTS3_SEGMENT_SCAN 0x00000010 /* Type passed as 4th argument to SegmentReaderIterate() */ struct Fts3SegFilter { const char *zTerm; int nTerm; int iCol; int flags; }; struct Fts3MultiSegReader { /* Used internally by sqlite3Fts3SegReaderXXX() calls */ Fts3SegReader **apSegment; /* Array of Fts3SegReader objects */ int nSegment; /* Size of apSegment array */ int nAdvance; /* How many seg-readers to advance */ Fts3SegFilter *pFilter; /* Pointer to filter object */ char *aBuffer; /* Buffer to merge doclists in */ int nBuffer; /* Allocated size of aBuffer[] in bytes */ int iColFilter; /* If >=0, filter for this column */ /* Used by fts3.c only. */ int nCost; /* Cost of running iterator */ int bLookup; /* True if a lookup of a single entry. */ /* Output values. Valid only after Fts3SegReaderStep() returns SQLITE_ROW. */ char *zTerm; /* Pointer to term buffer */ int nTerm; /* Size of zTerm in bytes */ char *aDoclist; /* Pointer to doclist buffer */ int nDoclist; /* Size of aDoclist[] in bytes */ }; /* fts3.c */ int sqlite3Fts3PutVarint(char *, sqlite3_int64); int sqlite3Fts3GetVarint(const char *, sqlite_int64 *); int sqlite3Fts3GetVarint32(const char *, int *); int sqlite3Fts3VarintLen(sqlite3_uint64); void sqlite3Fts3Dequote(char *); int sqlite3Fts3ExprLoadDoclist(Fts3Cursor *, Fts3Expr *); int sqlite3Fts3ExprLoadFtDoclist(Fts3Cursor *, Fts3Expr *, char **, int *); int sqlite3Fts3ExprNearTrim(Fts3Expr *, Fts3Expr *, int); /* fts3_tokenizer.c */ const char *sqlite3Fts3NextToken(const char *, int *); int sqlite3Fts3InitHashTable(sqlite3 *, Fts3Hash *, const char *); |
︙ | ︙ | |||
442 443 444 445 446 447 448 449 | int sqlite3Fts3ExprInitTestInterface(sqlite3 *db); int sqlite3Fts3InitTerm(sqlite3 *db); #endif /* fts3_aux.c */ int sqlite3Fts3InitAux(sqlite3 *db); #endif /* _FTSINT_H */ | > > > > > > > > > > > > > > > > > > > > > > > > > | 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 | int sqlite3Fts3ExprInitTestInterface(sqlite3 *db); int sqlite3Fts3InitTerm(sqlite3 *db); #endif /* fts3_aux.c */ int sqlite3Fts3InitAux(sqlite3 *db); int sqlite3Fts3TermSegReaderCursor( Fts3Cursor *pCsr, /* Virtual table cursor handle */ const char *zTerm, /* Term to query for */ int nTerm, /* Size of zTerm in bytes */ int isPrefix, /* True for a prefix search */ Fts3MultiSegReader **ppSegcsr /* OUT: Allocated seg-reader cursor */ ); int sqlite3Fts3EvalPhraseCache(Fts3Cursor *, Fts3Phrase *); sqlite3_int64 sqlite3Fts3EvalDocid(Fts3Cursor *, Fts3Expr *); int sqlite3Fts3EvalPhraseDoclist(Fts3Cursor*, Fts3Expr*, const char**,int*); void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *); int sqlite3Fts3EvalStart(Fts3Cursor *, Fts3Expr *, int); int sqlite3Fts3EvalNext(Fts3Cursor *pCsr, Fts3Expr *pExpr); int sqlite3Fts3MsrIncrStart( Fts3Table*, Fts3MultiSegReader*, int, const char*, int); int sqlite3Fts3MsrIncrNext( Fts3Table *, Fts3MultiSegReader *, sqlite3_int64 *, char **, int *); char *sqlite3Fts3EvalPhrasePoslist( Fts3Cursor *, Fts3Expr *, sqlite3_int64, int iCol); int sqlite3Fts3MsrOvfl(Fts3Cursor *, Fts3MultiSegReader *, int *); int sqlite3Fts3DeferredTokenList(Fts3DeferredToken *, char **, int *); #endif /* _FTSINT_H */ |
Changes to ext/fts3/fts3_aux.c.
︙ | ︙ | |||
24 25 26 27 28 29 30 | struct Fts3auxTable { sqlite3_vtab base; /* Base class used by SQLite core */ Fts3Table *pFts3Tab; }; struct Fts3auxCursor { sqlite3_vtab_cursor base; /* Base class used by SQLite core */ | | | 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | struct Fts3auxTable { sqlite3_vtab base; /* Base class used by SQLite core */ Fts3Table *pFts3Tab; }; struct Fts3auxCursor { sqlite3_vtab_cursor base; /* Base class used by SQLite core */ Fts3MultiSegReader csr; /* Must be right after "base" */ Fts3SegFilter filter; char *zStop; int nStop; /* Byte-length of string zStop */ int isEof; /* True if cursor is at EOF */ sqlite3_int64 iRowid; /* Current rowid */ int iCol; /* Current value of 'col' column */ |
︙ | ︙ |
Changes to ext/fts3/fts3_expr.c.
︙ | ︙ | |||
764 765 766 767 768 769 770 | ** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse(). */ void sqlite3Fts3ExprFree(Fts3Expr *p){ if( p ){ assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 ); sqlite3Fts3ExprFree(p->pLeft); sqlite3Fts3ExprFree(p->pRight); | > > | > | 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 | ** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse(). */ void sqlite3Fts3ExprFree(Fts3Expr *p){ if( p ){ assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 ); sqlite3Fts3ExprFree(p->pLeft); sqlite3Fts3ExprFree(p->pRight); if( p->pPhrase ){ sqlite3Fts3EvalPhraseCleanup(p->pPhrase); sqlite3_free(p->pPhrase->aDoclist); } sqlite3_free(p); } } /**************************************************************************** ***************************************************************************** ** Everything after this point is just test code. |
︙ | ︙ |
Changes to ext/fts3/fts3_snippet.c.
︙ | ︙ | |||
412 413 414 415 416 417 418 | static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){ SnippetIter *p = (SnippetIter *)ctx; SnippetPhrase *pPhrase = &p->aPhrase[iPhrase]; char *pCsr; pPhrase->nToken = pExpr->pPhrase->nToken; | | | 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 | static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){ SnippetIter *p = (SnippetIter *)ctx; SnippetPhrase *pPhrase = &p->aPhrase[iPhrase]; char *pCsr; pPhrase->nToken = pExpr->pPhrase->nToken; pCsr = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->pCsr->iPrevId,p->iCol); if( pCsr ){ int iFirst = 0; pPhrase->pList = pCsr; fts3GetDeltaPosition(&pCsr, &iFirst); pPhrase->pHead = pCsr; pPhrase->pTail = pCsr; pPhrase->iHead = iFirst; |
︙ | ︙ | |||
822 823 824 825 826 827 828 | */ static int fts3ExprGlobalHitsCb( Fts3Expr *pExpr, /* Phrase expression node */ int iPhrase, /* Phrase number (numbered from zero) */ void *pCtx /* Pointer to MatchInfo structure */ ){ MatchInfo *p = (MatchInfo *)pCtx; | < < < < < < < | < < < < < < < < < < < | | | | | < < | > > > > > > | | < | | | | | | | > | < < | < > > | | 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 | */ static int fts3ExprGlobalHitsCb( Fts3Expr *pExpr, /* Phrase expression node */ int iPhrase, /* Phrase number (numbered from zero) */ void *pCtx /* Pointer to MatchInfo structure */ ){ MatchInfo *p = (MatchInfo *)pCtx; u32 *aOut = &p->aMatchinfo[3*iPhrase*p->nCol]; if( pExpr->bDeferred ){ int iCol; /* Column index */ for(iCol=0; iCol<p->nCol; iCol++){ aOut[iCol*3 + 1] = (u32)p->nDoc; aOut[iCol*3 + 2] = (u32)p->nDoc; } }else{ char *pIter; char *pEnd; int n; int rc = sqlite3Fts3EvalPhraseDoclist( p->pCursor, pExpr, (const char **)&pIter, &n ); if( rc!=SQLITE_OK ) return rc; pEnd = &pIter[n]; /* Fill in the global hit count matrix row for this phrase. */ while( pIter<pEnd ){ while( *pIter++ & 0x80 ); /* Skip past docid. */ fts3LoadColumnlistCounts(&pIter, &aOut[1], 1); } } return SQLITE_OK; } /* ** fts3ExprIterate() callback used to collect the "local" part of the ** FTS3_MATCHINFO_HITS array. The local stats are those elements of the ** array that are different for each row returned by the query. */ static int fts3ExprLocalHitsCb( Fts3Expr *pExpr, /* Phrase expression node */ int iPhrase, /* Phrase number */ void *pCtx /* Pointer to MatchInfo structure */ ){ MatchInfo *p = (MatchInfo *)pCtx; int iStart = iPhrase * p->nCol * 3; int i; sqlite3_int64 iDocid = p->pCursor->iPrevId; for(i=0; i<p->nCol; i++){ char *pCsr; pCsr = sqlite3Fts3EvalPhrasePoslist(p->pCursor, pExpr, iDocid, i); if( pCsr ){ p->aMatchinfo[iStart+i*3] = fts3ColumnlistCount(&pCsr); }else{ p->aMatchinfo[iStart+i*3] = 0; } } return SQLITE_OK; } static int fts3MatchinfoCheck( |
︙ | ︙ | |||
972 973 974 975 976 977 978 | ** iterating through a multi-column position-list corresponding to the ** hits for a single phrase on a single row in order to calculate the ** values for a matchinfo() FTS3_MATCHINFO_LCS request. */ typedef struct LcsIterator LcsIterator; struct LcsIterator { Fts3Expr *pExpr; /* Pointer to phrase expression */ | < | | 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 | ** iterating through a multi-column position-list corresponding to the ** hits for a single phrase on a single row in order to calculate the ** values for a matchinfo() FTS3_MATCHINFO_LCS request. */ typedef struct LcsIterator LcsIterator; struct LcsIterator { Fts3Expr *pExpr; /* Pointer to phrase expression */ int iPosOffset; /* Tokens count up to end of this phrase */ char *pRead; /* Cursor used to iterate through aDoclist */ int iPos; /* Current position */ }; /* ** If LcsIterator.iCol is set to the following value, the iterator has ** finished iterating through all offsets for all columns. */ |
︙ | ︙ | |||
1005 1006 1007 1008 1009 1010 1011 | */ static int fts3LcsIteratorAdvance(LcsIterator *pIter){ char *pRead = pIter->pRead; sqlite3_int64 iRead; int rc = 0; pRead += sqlite3Fts3GetVarint(pRead, &iRead); | | | < < < < < < < | 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 | */ static int fts3LcsIteratorAdvance(LcsIterator *pIter){ char *pRead = pIter->pRead; sqlite3_int64 iRead; int rc = 0; pRead += sqlite3Fts3GetVarint(pRead, &iRead); if( iRead==0 || iRead==1 ){ pRead = 0; rc = 1; }else{ pIter->iPos += (int)(iRead-2); } pIter->pRead = pRead; return rc; } |
︙ | ︙ | |||
1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 | ** undefined. */ static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){ LcsIterator *aIter; int i; int iCol; int nToken = 0; /* Allocate and populate the array of LcsIterator objects. The array ** contains one element for each matchable phrase in the query. **/ aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase); if( !aIter ) return SQLITE_NOMEM; memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase); (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter); for(i=0; i<pInfo->nPhrase; i++){ LcsIterator *pIter = &aIter[i]; nToken -= pIter->pExpr->pPhrase->nToken; pIter->iPosOffset = nToken; | > > < < < < < < < < < < > > > > | | | | | < < < | | 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 | ** undefined. */ static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){ LcsIterator *aIter; int i; int iCol; int nToken = 0; sqlite3_int64 iDocid = pCsr->iPrevId; /* Allocate and populate the array of LcsIterator objects. The array ** contains one element for each matchable phrase in the query. **/ aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase); if( !aIter ) return SQLITE_NOMEM; memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase); (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter); for(i=0; i<pInfo->nPhrase; i++){ LcsIterator *pIter = &aIter[i]; nToken -= pIter->pExpr->pPhrase->nToken; pIter->iPosOffset = nToken; } for(iCol=0; iCol<pInfo->nCol; iCol++){ int nLcs = 0; /* LCS value for this column */ int nLive = 0; /* Number of iterators in aIter not at EOF */ for(i=0; i<pInfo->nPhrase; i++){ LcsIterator *pIt = &aIter[i]; pIt->pRead = sqlite3Fts3EvalPhrasePoslist(pCsr, pIt->pExpr, iDocid, iCol); if( pIt->pRead ){ pIt->iPos = pIt->iPosOffset; fts3LcsIteratorAdvance(&aIter[i]); nLive++; } } while( nLive>0 ){ LcsIterator *pAdv = 0; /* The iterator to advance by one position */ int nThisLcs = 0; /* LCS for the current iterator positions */ for(i=0; i<pInfo->nPhrase; i++){ LcsIterator *pIter = &aIter[i]; if( pIter->pRead==0 ){ /* This iterator is already at EOF for this column. */ nThisLcs = 0; }else{ if( pAdv==0 || pIter->iPos<pAdv->iPos ){ pAdv = pIter; } if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){ |
︙ | ︙ | |||
1422 1423 1424 1425 1426 1427 1428 | TermOffsetCtx *p = (TermOffsetCtx *)ctx; int nTerm; /* Number of tokens in phrase */ int iTerm; /* For looping through nTerm phrase terms */ char *pList; /* Pointer to position list for phrase */ int iPos = 0; /* First position in position-list */ UNUSED_PARAMETER(iPhrase); | | | 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 | TermOffsetCtx *p = (TermOffsetCtx *)ctx; int nTerm; /* Number of tokens in phrase */ int iTerm; /* For looping through nTerm phrase terms */ char *pList; /* Pointer to position list for phrase */ int iPos = 0; /* First position in position-list */ UNUSED_PARAMETER(iPhrase); pList = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iDocid, p->iCol); nTerm = pExpr->pPhrase->nToken; if( pList ){ fts3GetDeltaPosition(&pList, &iPos); assert( iPos>=0 ); } for(iTerm=0; iTerm<nTerm; iTerm++){ |
︙ | ︙ |
Changes to ext/fts3/fts3_term.c.
︙ | ︙ | |||
29 30 31 32 33 34 35 | sqlite3_vtab base; /* Base class used by SQLite core */ int iIndex; /* Index for Fts3Table.aIndex[] */ Fts3Table *pFts3Tab; }; struct Fts3termCursor { sqlite3_vtab_cursor base; /* Base class used by SQLite core */ | | | 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 | sqlite3_vtab base; /* Base class used by SQLite core */ int iIndex; /* Index for Fts3Table.aIndex[] */ Fts3Table *pFts3Tab; }; struct Fts3termCursor { sqlite3_vtab_cursor base; /* Base class used by SQLite core */ Fts3MultiSegReader csr; /* Must be right after "base" */ Fts3SegFilter filter; int isEof; /* True if cursor is at EOF */ char *pNext; sqlite3_int64 iRowid; /* Current 'rowid' value */ sqlite3_int64 iDocid; /* Current 'docid' value */ |
︙ | ︙ |
Changes to ext/fts3/fts3_write.c.
︙ | ︙ | |||
1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 | Fts3SegReader *pReader, /* Segment-reader handle */ int *pnCost /* IN/OUT: Number of bytes read */ ){ Fts3Table *p = (Fts3Table*)pCsr->base.pVtab; int rc = SQLITE_OK; /* Return code */ int nCost = 0; /* Cost in bytes to return */ int pgsz = p->nPgsz; /* Database page size */ /* If this seg-reader is reading the pending-terms table, or if all data ** for the segment is stored on the root page of the b-tree, then the cost ** is zero. In this case all required data is already in main memory. */ if( p->bHasStat && !fts3SegReaderIsPending(pReader) | > > | 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 | Fts3SegReader *pReader, /* Segment-reader handle */ int *pnCost /* IN/OUT: Number of bytes read */ ){ Fts3Table *p = (Fts3Table*)pCsr->base.pVtab; int rc = SQLITE_OK; /* Return code */ int nCost = 0; /* Cost in bytes to return */ int pgsz = p->nPgsz; /* Database page size */ assert( pgsz>0 ); /* If this seg-reader is reading the pending-terms table, or if all data ** for the segment is stored on the root page of the b-tree, then the cost ** is zero. In this case all required data is already in main memory. */ if( p->bHasStat && !fts3SegReaderIsPending(pReader) |
︙ | ︙ | |||
1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 | } } } *pnCost += nCost; return rc; } /* ** Free all allocations associated with the iterator passed as the ** second argument. */ void sqlite3Fts3SegReaderFree(Fts3SegReader *pReader){ if( pReader && !fts3SegReaderIsPending(pReader) ){ | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 | } } } *pnCost += nCost; return rc; } int sqlite3Fts3MsrOvfl( Fts3Cursor *pCsr, Fts3MultiSegReader *pMsr, int *pnOvfl ){ Fts3Table *p = (Fts3Table*)pCsr->base.pVtab; int nOvfl = 0; int ii; int rc = SQLITE_OK; int pgsz = p->nPgsz; assert( p->bHasStat ); assert( pgsz>0 ); for(ii=0; rc==SQLITE_OK && ii<pMsr->nSegment; ii++){ Fts3SegReader *pReader = pMsr->apSegment[ii]; if( !fts3SegReaderIsPending(pReader) && !fts3SegReaderIsRootOnly(pReader) ){ int jj; for(jj=pReader->iStartBlock; jj<=pReader->iLeafEndBlock; jj++){ int nBlob; rc = sqlite3Fts3ReadBlock(p, jj, 0, &nBlob); if( rc!=SQLITE_OK ) break; if( (nBlob+35)>pgsz ){ nOvfl += (nBlob + 34)/pgsz; } } } } *pnOvfl = nOvfl; return rc; } /* ** Free all allocations associated with the iterator passed as the ** second argument. */ void sqlite3Fts3SegReaderFree(Fts3SegReader *pReader){ if( pReader && !fts3SegReaderIsPending(pReader) ){ |
︙ | ︙ | |||
2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 | p = &pList[1]; p += sqlite3Fts3GetVarint32(p, &iCurrent); } *ppList = pList; *pnList = nList; } int sqlite3Fts3SegReaderStart( Fts3Table *p, /* Virtual table handle */ | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 | p = &pList[1]; p += sqlite3Fts3GetVarint32(p, &iCurrent); } *ppList = pList; *pnList = nList; } int sqlite3Fts3MsrIncrStart( Fts3Table *p, /* Virtual table handle */ Fts3MultiSegReader *pCsr, /* Cursor object */ int iCol, /* Column to match on. */ const char *zTerm, /* Term to iterate through a doclist for */ int nTerm /* Number of bytes in zTerm */ ){ int i; int nSegment = pCsr->nSegment; assert( pCsr->pFilter==0 ); assert( zTerm && nTerm>0 ); /* Advance each segment iterator until it points to the term zTerm/nTerm. */ for(i=0; i<nSegment; i++){ Fts3SegReader *pSeg = pCsr->apSegment[i]; do { int rc = fts3SegReaderNext(p, pSeg); if( rc!=SQLITE_OK ) return rc; }while( fts3SegReaderTermCmp(pSeg, zTerm, nTerm)<0 ); } fts3SegReaderSort(pCsr->apSegment, nSegment, nSegment, fts3SegReaderCmp); /* Determine how many of the segments actually point to zTerm/nTerm. */ for(i=0; i<nSegment; i++){ Fts3SegReader *pSeg = pCsr->apSegment[i]; if( !pSeg->aNode || fts3SegReaderTermCmp(pSeg, zTerm, nTerm) ){ break; } } pCsr->nAdvance = i; /* Advance each of the segments to point to the first docid. */ for(i=0; i<pCsr->nAdvance; i++){ fts3SegReaderFirstDocid(pCsr->apSegment[i]); } assert( iCol<0 || iCol<p->nColumn ); pCsr->iColFilter = iCol; return SQLITE_OK; } int sqlite3Fts3MsrIncrNext( Fts3Table *p, /* Virtual table handle */ Fts3MultiSegReader *pMsr, /* Multi-segment-reader handle */ sqlite3_int64 *piDocid, /* OUT: Docid value */ char **paPoslist, /* OUT: Pointer to position list */ int *pnPoslist /* OUT: Size of position list in bytes */ ){ int rc = SQLITE_OK; int nMerge = pMsr->nAdvance; Fts3SegReader **apSegment = pMsr->apSegment; if( nMerge==0 ){ *paPoslist = 0; return SQLITE_OK; } while( 1 ){ Fts3SegReader *pSeg; fts3SegReaderSort(pMsr->apSegment, nMerge, nMerge, fts3SegReaderDoclistCmp); pSeg = pMsr->apSegment[0]; if( pSeg->pOffsetList==0 ){ *paPoslist = 0; break; }else{ char *pList; int nList; int j; sqlite3_int64 iDocid = apSegment[0]->iDocid; fts3SegReaderNextDocid(apSegment[0], &pList, &nList); j = 1; while( j<nMerge && apSegment[j]->pOffsetList && apSegment[j]->iDocid==iDocid ){ fts3SegReaderNextDocid(apSegment[j], 0, 0); } if( pMsr->iColFilter>=0 ){ fts3ColumnFilter(pMsr->iColFilter, &pList, &nList); } if( nList>0 ){ *piDocid = iDocid; *paPoslist = pList; *pnPoslist = nList; break; } } } return rc; } int sqlite3Fts3SegReaderStart( Fts3Table *p, /* Virtual table handle */ Fts3MultiSegReader *pCsr, /* Cursor object */ Fts3SegFilter *pFilter /* Restrictions on range of iteration */ ){ int i; /* Initialize the cursor object */ pCsr->pFilter = pFilter; |
︙ | ︙ | |||
2169 2170 2171 2172 2173 2174 2175 | pCsr->apSegment, pCsr->nSegment, pCsr->nSegment, fts3SegReaderCmp); return SQLITE_OK; } int sqlite3Fts3SegReaderStep( Fts3Table *p, /* Virtual table handle */ | | | 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 | pCsr->apSegment, pCsr->nSegment, pCsr->nSegment, fts3SegReaderCmp); return SQLITE_OK; } int sqlite3Fts3SegReaderStep( Fts3Table *p, /* Virtual table handle */ Fts3MultiSegReader *pCsr /* Cursor object */ ){ int rc = SQLITE_OK; int isIgnoreEmpty = (pCsr->pFilter->flags & FTS3_SEGMENT_IGNORE_EMPTY); int isRequirePos = (pCsr->pFilter->flags & FTS3_SEGMENT_REQUIRE_POS); int isColFilter = (pCsr->pFilter->flags & FTS3_SEGMENT_COLUMN_FILTER); int isPrefix = (pCsr->pFilter->flags & FTS3_SEGMENT_PREFIX); |
︙ | ︙ | |||
2303 2304 2305 2306 2307 2308 2309 2310 2311 | } } pCsr->nAdvance = nMerge; }while( rc==SQLITE_OK ); return rc; } void sqlite3Fts3SegReaderFinish( | > | | 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 | } } pCsr->nAdvance = nMerge; }while( rc==SQLITE_OK ); return rc; } void sqlite3Fts3SegReaderFinish( Fts3MultiSegReader *pCsr /* Cursor object */ ){ if( pCsr ){ int i; for(i=0; i<pCsr->nSegment; i++){ sqlite3Fts3SegReaderFree(pCsr->apSegment[i]); } sqlite3_free(pCsr->apSegment); |
︙ | ︙ | |||
2338 2339 2340 2341 2342 2343 2344 | */ static int fts3SegmentMerge(Fts3Table *p, int iIndex, int iLevel){ int rc; /* Return code */ int iIdx = 0; /* Index of new segment */ int iNewLevel = 0; /* Level/index to create new segment at */ SegmentWriter *pWriter = 0; /* Used to write the new, merged, segment */ Fts3SegFilter filter; /* Segment term filter condition */ | | | 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 | */ static int fts3SegmentMerge(Fts3Table *p, int iIndex, int iLevel){ int rc; /* Return code */ int iIdx = 0; /* Index of new segment */ int iNewLevel = 0; /* Level/index to create new segment at */ SegmentWriter *pWriter = 0; /* Used to write the new, merged, segment */ Fts3SegFilter filter; /* Segment term filter condition */ Fts3MultiSegReader csr; /* Cursor to iterate through level(s) */ int bIgnoreEmpty = 0; /* True to ignore empty segments */ assert( iLevel==FTS3_SEGCURSOR_ALL || iLevel==FTS3_SEGCURSOR_PENDING || iLevel>=0 ); assert( iLevel<FTS3_SEGDIR_MAXLEVEL ); |
︙ | ︙ | |||
2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 | rc = fts3PendingListAppendVarint(&pDef->pList, 0); } } } return rc; } /* ** Add an entry for token pToken to the pCsr->pDeferred list. */ int sqlite3Fts3DeferToken( Fts3Cursor *pCsr, /* Fts3 table cursor */ Fts3PhraseToken *pToken, /* Token to defer */ | > > > > > > > > > > > > > > > > > > > > > > > > > > > | 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 | rc = fts3PendingListAppendVarint(&pDef->pList, 0); } } } return rc; } int sqlite3Fts3DeferredTokenList( Fts3DeferredToken *p, char **ppData, int *pnData ){ char *pRet; int nSkip; sqlite3_int64 dummy; *ppData = 0; *pnData = 0; if( p->pList==0 ){ return SQLITE_OK; } pRet = (char *)sqlite3_malloc(p->pList->nData); if( !pRet ) return SQLITE_NOMEM; nSkip = sqlite3Fts3GetVarint(p->pList->aData, &dummy); *pnData = p->pList->nData - nSkip; *ppData = pRet; memcpy(pRet, &p->pList->aData[nSkip], *pnData); return SQLITE_OK; } /* ** Add an entry for token pToken to the pCsr->pDeferred list. */ int sqlite3Fts3DeferToken( Fts3Cursor *pCsr, /* Fts3 table cursor */ Fts3PhraseToken *pToken, /* Token to defer */ |
︙ | ︙ |
Changes to test/fts3defer.test.
︙ | ︙ | |||
16 17 18 19 20 21 22 23 24 25 26 27 28 29 | ifcapable !fts3 { finish_test return } set sqlite_fts3_enable_parentheses 1 set ::testprefix fts3defer #-------------------------------------------------------------------------- # Test cases fts3defer-1.* are the "warm body" cases. The database contains # one row with 15000 instances of the token "a". This makes the doclist for # "a" so large that FTS3 will avoid loading it in most cases. # | > > | 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | ifcapable !fts3 { finish_test return } set sqlite_fts3_enable_parentheses 1 set fts3_simple_deferred_tokens_only 1 set ::testprefix fts3defer #-------------------------------------------------------------------------- # Test cases fts3defer-1.* are the "warm body" cases. The database contains # one row with 15000 instances of the token "a". This makes the doclist for # "a" so large that FTS3 will avoid loading it in most cases. # |
︙ | ︙ | |||
253 254 255 256 257 258 259 | do_select_test 1.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'jk xnxhf' } {13 29 40 47 48 52 63 92} do_select_test 1.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'jk eh' } {100} | < | 255 256 257 258 259 260 261 262 263 264 265 266 267 268 | do_select_test 1.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'jk xnxhf' } {13 29 40 47 48 52 63 92} do_select_test 1.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'jk eh' } {100} do_select_test 1.3 { SELECT rowid FROM t1 WHERE t1 MATCH 'jk ubwrfqnbjf' } {7 70 98} do_select_test 1.4 { SELECT rowid FROM t1 WHERE t1 MATCH 'duszemmzl jk' } {3 5 8 10 13 18 20 23 32 37 41 43 55 60 65 67 72 74 76 81 94 96 97} do_select_test 1.5 { |
︙ | ︙ | |||
278 279 280 281 282 283 284 | } {68 100} do_select_test 1.9 { SELECT rowid FROM t1 WHERE t1 MATCH 'zm ubwrfqnbjf' } {7 70 98} do_select_test 1.10 { SELECT rowid FROM t1 WHERE t1 MATCH 'z* vgsld' } {10 13 17 31 35 51 58 88 89 90 93 100} | > > | | | | | | | > | 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 | } {68 100} do_select_test 1.9 { SELECT rowid FROM t1 WHERE t1 MATCH 'zm ubwrfqnbjf' } {7 70 98} do_select_test 1.10 { SELECT rowid FROM t1 WHERE t1 MATCH 'z* vgsld' } {10 13 17 31 35 51 58 88 89 90 93 100} if { $fts3_simple_deferred_tokens_only==0 } { do_select_test 1.11 { SELECT rowid FROM t1 WHERE t1 MATCH '( zdu OR zexh OR zf OR zhbrzadb OR zidhxhbtv OR zk OR zkhdvkw OR zm OR zsmhnf ) vgsld' } {10 13 17 31 35 51 58 88 89 90 93 100} } do_select_test 2.1 { SELECT rowid FROM t1 WHERE t1 MATCH '"zm agmckuiu"' } {3 24 52 53} do_select_test 2.2 { SELECT rowid FROM t1 WHERE t1 MATCH '"zm zf"' } {33 53 75 88 101} |
︙ | ︙ | |||
360 361 362 363 364 365 366 367 368 369 370 371 372 373 | # The following block of tests runs normally with FTS3 or FTS4 without the # long doclists zeroed. And with OOM-injection for FTS4 with long doclists # zeroed. Change this by messing with the [set dmt_modes] commands above. # foreach DO_MALLOC_TEST $dmt_modes { # Phrase search. do_select_test 5.$DO_MALLOC_TEST.1 { SELECT rowid FROM t1 WHERE t1 MATCH '"jk mjpavjuhw"' } {8 15 36 64 67 72} # Multiple tokens search. do_select_test 5.$DO_MALLOC_TEST.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'duszemmzl zm' | > | 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 | # The following block of tests runs normally with FTS3 or FTS4 without the # long doclists zeroed. And with OOM-injection for FTS4 with long doclists # zeroed. Change this by messing with the [set dmt_modes] commands above. # foreach DO_MALLOC_TEST $dmt_modes { # Phrase search. # do_select_test 5.$DO_MALLOC_TEST.1 { SELECT rowid FROM t1 WHERE t1 MATCH '"jk mjpavjuhw"' } {8 15 36 64 67 72} # Multiple tokens search. do_select_test 5.$DO_MALLOC_TEST.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'duszemmzl zm' |
︙ | ︙ | |||
412 413 414 415 416 417 418 | } {10} do_select_test 6.2.1 { SELECT rowid FROM t1 WHERE t1 MATCH '"jk xduvfhk"' } {8} do_select_test 6.2.2 { SELECT rowid FROM t1 WHERE t1 MATCH '"zm azavwm"' } {15 26 92 96} | > | | | > | 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 | } {10} do_select_test 6.2.1 { SELECT rowid FROM t1 WHERE t1 MATCH '"jk xduvfhk"' } {8} do_select_test 6.2.2 { SELECT rowid FROM t1 WHERE t1 MATCH '"zm azavwm"' } {15 26 92 96} if {$fts3_simple_deferred_tokens_only==0} { do_select_test 6.2.3 { SELECT rowid FROM t1 WHERE t1 MATCH '"jk xduvfhk" OR "zm azavwm"' } {8 15 26 92 96} } } set testprefix fts3defer do_execsql_test 3.1 { CREATE VIRTUAL TABLE x1 USING fts4(a, b); INSERT INTO x1 VALUES('a b c', 'd e f'); |
︙ | ︙ |
Changes to test/permutations.test.
︙ | ︙ | |||
175 176 177 178 179 180 181 182 183 184 185 186 187 188 | } -files { fts3aa.test fts3ab.test fts3ac.test fts3ad.test fts3ae.test fts3af.test fts3ag.test fts3ah.test fts3ai.test fts3aj.test fts3ak.test fts3al.test fts3am.test fts3an.test fts3ao.test fts3atoken.test fts3b.test fts3c.test fts3cov.test fts3d.test fts3defer.test fts3defer2.test fts3e.test fts3expr.test fts3expr2.test fts3near.test fts3query.test fts3shared.test fts3snippet.test fts3fault.test fts3malloc.test fts3matchinfo.test fts3aux1.test fts3comp1.test } | > | 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 | } -files { fts3aa.test fts3ab.test fts3ac.test fts3ad.test fts3ae.test fts3af.test fts3ag.test fts3ah.test fts3ai.test fts3aj.test fts3ak.test fts3al.test fts3am.test fts3an.test fts3ao.test fts3atoken.test fts3b.test fts3c.test fts3cov.test fts3d.test fts3defer.test fts3defer2.test fts3e.test fts3expr.test fts3expr2.test fts3near.test fts3query.test fts3shared.test fts3snippet.test fts3sort.test fts3fault.test fts3malloc.test fts3matchinfo.test fts3aux1.test fts3comp1.test } |
︙ | ︙ |