Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Avoid redundant string comparisons while merging fts5 segment b-trees. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts5 |
Files: | files | file ages | folders |
SHA1: |
5c46820d9b4aae791a8704b69145bd81 |
User & Date: | dan 2015-03-10 19:24:30.225 |
Context
2015-03-11
| ||
14:51 | Add an optimization to the fts5 unicode tokenizer code. (check-in: f5db489250 user: dan tags: fts5) | |
2015-03-10
| ||
19:24 | Avoid redundant string comparisons while merging fts5 segment b-trees. (check-in: 5c46820d9b user: dan tags: fts5) | |
2015-03-07
| ||
15:46 | Fix some compiler warnings caused by signed/unsigned pointer conversions. (check-in: cccee7b5b1 user: dan tags: fts5) | |
Changes
Changes to ext/fts5/fts5_index.c.
︙ | |||
406 407 408 409 410 411 412 413 414 415 416 417 | 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 | + + + + + + + - + | ** comparison in this context is the index of the iterator that currently ** points to the smaller term/rowid combination. Iterators at EOF are ** considered to be greater than all other iterators. ** ** aFirst[1] contains the index in aSeg[] of the iterator that points to ** the smallest key overall. aFirst[0] is unused. */ typedef struct Fts5CResult Fts5CResult; struct Fts5CResult { u16 iFirst; /* aSeg[] index of firstest iterator */ u8 bTermEq; /* True if the terms are equal */ }; struct Fts5MultiSegIter { int nSeg; /* Size of aSeg[] array */ int bRev; /* True to iterate in reverse order */ int bSkipEmpty; /* True to skip deleted entries */ Fts5SegIter *aSeg; /* Array of segment iterators */ |
︙ | |||
1740 1741 1742 1743 1744 1745 1746 | 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 | - + + + | ** ** If an error occurs, Fts5Index.rc is set to an appropriate error code. It ** is not considered an error if the iterator reaches EOF. If an error has ** already occurred when this function is called, it is a no-op. */ static void fts5SegIterNext( Fts5Index *p, /* FTS5 backend object */ |
︙ | |||
1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 | 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 | + | /* Check if the iterator is now at EOF. If so, return early. */ if( pIter->pLeaf && bNewTerm ){ if( pIter->flags & FTS5_SEGITER_ONETERM ){ fts5DataRelease(pIter->pLeaf); pIter->pLeaf = 0; }else{ fts5SegIterLoadTerm(p, pIter, nKeep); if( pbNewTerm ) *pbNewTerm = 1; } } } } } /* |
︙ | |||
2029 2030 2031 2032 2033 2034 2035 | 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 | - + | if( pIter->pLeaf ){ int res; pIter->iLeafOffset = fts5GetU16(&pIter->pLeaf->p[2]); fts5SegIterLoadTerm(p, pIter, 0); do { res = fts5BufferCompareBlob(&pIter->term, pTerm, nTerm); if( res>=0 ) break; |
︙ | |||
2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 | 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 | + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + + + - + | fts5BufferFree(&pIter->term); fts5DataRelease(pIter->pLeaf); fts5DlidxIterFree(pIter->pDlidx); sqlite3_free(pIter->aRowidOffset); memset(pIter, 0, sizeof(Fts5SegIter)); } #ifdef SQLITE_DEBUG /* ** This function is used as part of the big assert() procedure implemented by ** fts5AssertMultiIterSetup(). It ensures that the result currently stored ** in *pRes is the correct result of comparing the current positions of the ** two iterators. */ static void fts5AssertComparisonResult( Fts5MultiSegIter *pIter, Fts5SegIter *p1, Fts5SegIter *p2, Fts5CResult *pRes ){ int i1 = p1 - pIter->aSeg; int i2 = p2 - pIter->aSeg; if( p1->pLeaf || p2->pLeaf ){ if( p1->pLeaf==0 ){ assert( pRes->iFirst==i2 ); }else if( p2->pLeaf==0 ){ assert( pRes->iFirst==i1 ); }else{ int nMin = MIN(p1->term.n, p2->term.n); int res = memcmp(p1->term.p, p2->term.p, nMin); if( res==0 ) res = p1->term.n - p2->term.n; if( res==0 ){ assert( pRes->bTermEq==1 ); assert( p1->iRowid!=p2->iRowid ); res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : 1; }else{ assert( pRes->bTermEq==0 ); } if( res<0 ){ assert( pRes->iFirst==i1 ); }else{ assert( pRes->iFirst==i2 ); } } } } /* ** This function is a no-op unless SQLITE_DEBUG is defined when this module ** is compiled. In that case, this function is essentially an assert() ** statement used to verify that the contents of the pIter->aFirst[] array ** are correct. */ static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5MultiSegIter *pIter){ if( p->rc==SQLITE_OK ){ int i; for(i=0; i<pIter->nSeg; i+=2){ Fts5SegIter *p1 = &pIter->aSeg[i]; Fts5SegIter *p2 = &pIter->aSeg[i+1]; Fts5CResult *pRes = &pIter->aFirst[(pIter->nSeg + i) / 2]; fts5AssertComparisonResult(pIter, p1, p2, pRes); } for(i=1; i<(pIter->nSeg / 2); i+=2){ Fts5CResult *pRes = &pIter->aFirst[i]; Fts5SegIter *p1 = &pIter->aSeg[ pIter->aFirst[i*2].iFirst ]; Fts5SegIter *p2 = &pIter->aSeg[ pIter->aFirst[i*2+1].iFirst ]; fts5AssertComparisonResult(pIter, p1, p2, pRes); } } } #else # define fts5AssertMultiIterSetup(x,y) #endif /* ** Do the comparison necessary to populate pIter->aFirst[iOut]. ** ** If the returned value is non-zero, then it is the index of an entry ** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing ** to a key that is a duplicate of another, higher priority, ** segment-iterator in the pSeg->aSeg[] array. */ static int fts5MultiIterDoCompare(Fts5MultiSegIter *pIter, int iOut){ int i1; /* Index of left-hand Fts5SegIter */ int i2; /* Index of right-hand Fts5SegIter */ int iRes; Fts5SegIter *p1; /* Left-hand Fts5SegIter */ Fts5SegIter *p2; /* Right-hand Fts5SegIter */ Fts5CResult *pRes = &pIter->aFirst[iOut]; assert( iOut<pIter->nSeg && iOut>0 ); assert( pIter->bRev==0 || pIter->bRev==1 ); if( iOut>=(pIter->nSeg/2) ){ i1 = (iOut - pIter->nSeg/2) * 2; i2 = i1 + 1; }else{ |
︙ | |||
2248 2249 2250 2251 2252 2253 2254 | 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 | - + | pIter->iLeafPgno = iLeafPgno+1; fts5SegIterReverseNewPage(p, pIter); bMove = 0; } } while( 1 ){ |
︙ | |||
2280 2281 2282 2283 2284 2285 2286 | 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 | - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + + - + + + + + - + + + + - + - + | int iChanged, /* Index of sub-iterator just advanced */ int iMinset /* Minimum entry in aFirst[] to set */ ){ int i; for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){ int iEq; if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){ |
︙ | |||
2359 2360 2361 2362 2363 2364 2365 | 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 | - + - + | }else{ nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment); } for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2); *ppOut = pNew = fts5IdxMalloc(p, sizeof(Fts5MultiSegIter) + /* pNew */ sizeof(Fts5SegIter) * nSlot + /* pNew->aSeg[] */ |
︙ | |||
2403 2404 2405 2406 2407 2408 2409 | 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 | - + + - + - + - - + + | ** to the first entry in its segment. In this case initialize the ** aFirst[] array. Or, if an error has occurred, free the iterator ** object and set the output variable to NULL. */ if( p->rc==SQLITE_OK ){ for(iIter=nSlot-1; iIter>0; iIter--){ int iEq; if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){ |
︙ | |||
2460 2461 2462 2463 2464 2465 2466 | 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 | - + | } /* ** Return a pointer to a buffer containing the term associated with the ** entry that the iterator currently points to. */ static const u8 *fts5MultiIterTerm(Fts5MultiSegIter *pIter, int *pn){ |
︙ | |||
2589 2590 2591 2592 2593 2594 2595 | 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 | - + | */ static void fts5PosIterInit( Fts5Index *p, /* FTS5 backend object */ Fts5MultiSegIter *pMulti, /* Multi-seg iterator to read pos-list from */ Fts5PosIter *pIter /* Initialize this object */ ){ if( p->rc==SQLITE_OK ){ |
︙ | |||
3210 3211 3212 3213 3214 3215 3216 | 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 | - + | #endif assert( iLvl>=0 ); for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, 0, 0, iLvl, nInput, &pIter); fts5MultiIterEof(p, pIter)==0; fts5MultiIterNext(p, pIter, 0, 0) ){ |
︙ | |||
3936 3937 3938 3939 3940 3941 3942 | 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 | - + | Fts5Index *p, Fts5MultiSegIter *pMulti, int bSz, Fts5Buffer *pBuf ){ if( p->rc==SQLITE_OK ){ Fts5ChunkIter iter; |
︙ |