Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Have the fts5 integrity-check verify that doclist indexes match the contents of the leaf pages that they index. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts5 |
Files: | files | file ages | folders |
SHA1: |
37a7d3035eb4bbad7e32fe550321ac9f |
User & Date: | dan 2014-08-01 19:27:07.492 |
Context
2014-08-01
| ||
20:13 | Add a special case to the integrity-check code to check that the final integer in a doclist index is as expected. (check-in: c98934155c user: dan tags: fts5) | |
19:27 | Have the fts5 integrity-check verify that doclist indexes match the contents of the leaf pages that they index. (check-in: 37a7d3035e user: dan tags: fts5) | |
11:16 | Add "doclist index" records to the database. These are to make navigating within very large doclists faster. They are not yet used by queries. (check-in: 89377421ff user: dan tags: fts5) | |
Changes
Changes to ext/fts5/fts5_index.c.
︙ | |||
251 252 253 254 255 256 257 258 259 260 261 262 263 264 | 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 | + + + + + + | #ifdef SQLITE_DEBUG static int fts5Corrupt() { return SQLITE_CORRUPT_VTAB; } # define FTS5_CORRUPT fts5Corrupt() #else # define FTS5_CORRUPT SQLITE_CORRUPT_VTAB #endif #ifdef SQLITE_DEBUG static int fts5MissingData() { return 0; } #else # define fts5MissingData() #endif typedef struct Fts5BtreeIter Fts5BtreeIter; typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel; typedef struct Fts5ChunkIter Fts5ChunkIter; typedef struct Fts5Data Fts5Data; typedef struct Fts5MultiSegIter Fts5MultiSegIter; typedef struct Fts5NodeIter Fts5NodeIter; |
︙ | |||
526 527 528 529 530 531 532 533 534 535 536 537 538 539 | 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 | + | int nData; int iOff; /* Output variables */ Fts5Buffer term; int nEmpty; int iChild; int bDlidx; }; /* ** An Fts5BtreeIter object is used to iterate through all entries in the ** b-tree hierarchy belonging to a single fts5 segment. In this case the ** "b-tree hierarchy" is all b-tree nodes except leaves. Each entry in the ** b-tree hierarchy consists of the following: |
︙ | |||
562 563 564 565 566 567 568 569 570 571 572 573 574 575 | 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 | + | Fts5BtreeIterLevel *aLvl; /* Level for each tier of b-tree */ /* Output variables */ Fts5Buffer term; /* Current term */ int iLeaf; /* Leaf containing terms >= current term */ int nEmpty; /* Number of "empty" leaves following iLeaf */ int bEof; /* Set to true at EOF */ int bDlidx; /* True if there exists a dlidx */ }; static void fts5PutU16(u8 *aOut, u16 iVal){ aOut[0] = (iVal>>8); aOut[1] = (iVal&0xFF); } |
︙ | |||
665 666 667 668 669 670 671 672 673 674 675 676 677 678 | 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 | + + | Fts5Config *pConfig = p->pConfig; rc = sqlite3_blob_open(pConfig->db, pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader ); }else{ rc = sqlite3_blob_reopen(p->pReader, iRowid); } if( rc ) fts5MissingData(); if( rc==SQLITE_OK ){ int nByte = sqlite3_blob_bytes(p->pReader); if( pBuf ){ fts5BufferZero(pBuf); fts5BufferGrow(&rc, pBuf, nByte); rc = sqlite3_blob_read(p->pReader, pBuf->p, nByte, 0); |
︙ | |||
976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 | 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 | + + | /* ** If the pIter->iOff offset currently points to an entry indicating one ** or more term-less nodes, advance past it and set pIter->nEmpty to ** the number of empty child nodes. */ static void fts5NodeIterGobbleNEmpty(Fts5NodeIter *pIter){ if( pIter->iOff<pIter->nData && 0==(pIter->aData[pIter->iOff] & 0xfe) ){ pIter->bDlidx = pIter->aData[pIter->iOff] & 0x01; pIter->iOff++; pIter->iOff += getVarint32(&pIter->aData[pIter->iOff], pIter->nEmpty); }else{ pIter->nEmpty = 0; pIter->bDlidx = 0; } } /* ** Advance to the next entry within the node. */ static void fts5NodeIterNext(int *pRc, Fts5NodeIter *pIter){ |
︙ | |||
2078 2079 2080 2081 2082 2083 2084 | 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 | + - - + + + + + + + + + + + + + + + + + + + - + - - - - + - - - - - - - - | /* ** If an "nEmpty" record must be written to the b-tree before the next ** term, write it now. */ static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){ if( pWriter->nEmpty ){ int bFlag = 0; |
︙ | |||
2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 | 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 | + | static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; Fts5PageWriter *pPage = &pWriter->aWriter[0]; i64 iRowid; if( pPage->term.n==0 ){ /* No term was written to this page. */ assert( 0==fts5GetU16(&pPage->buf.p[2]) ); fts5WriteBtreeNoTerm(p, pWriter); } /* Write the current page to the db. */ iRowid = FTS5_SEGMENT_ROWID(pWriter->iIdx, pWriter->iSegid, 0, pPage->pgno); fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n); |
︙ | |||
2375 2376 2377 2378 2379 2380 2381 | 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 | - + + + + + | Fts5Index *p, Fts5SegWriter *pWriter, /* Writer object */ int *pnHeight, /* OUT: Height of the b-tree */ int *pnLeaf /* OUT: Number of leaf pages in b-tree */ ){ int i; *pnLeaf = pWriter->aWriter[0].pgno; |
︙ | |||
2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 | 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 | + | if( pIter->nLvl==0 || p->rc ){ pIter->bEof = 1; pIter->iLeaf = pSeg->pgnoLast; }else{ pIter->nEmpty = pIter->aLvl[0].s.nEmpty; pIter->iLeaf = pIter->aLvl[0].s.iChild; pIter->bDlidx = pIter->aLvl[0].s.bDlidx; } } static void fts5BtreeIterNext(Fts5BtreeIter *pIter){ Fts5Index *p = pIter->p; int i; |
︙ | |||
2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 | 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 | + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + | if( pLvl->pData ){ fts5NodeIterInit(pLvl->pData->p, pLvl->pData->n, &pLvl->s); } } } pIter->nEmpty = pIter->aLvl[0].s.nEmpty; pIter->bDlidx = pIter->aLvl[0].s.bDlidx; pIter->iLeaf = pIter->aLvl[0].s.iChild; assert( p->rc==SQLITE_OK || pIter->bEof ); } static void fts5BtreeIterFree(Fts5BtreeIter *pIter){ int i; for(i=0; i<pIter->nLvl; i++){ Fts5BtreeIterLevel *pLvl = &pIter->aLvl[i]; fts5NodeIterFree(&pLvl->s); if( pLvl->pData ){ fts5DataRelease(pLvl->pData); pLvl->pData = 0; } } sqlite3_free(pIter->aLvl); fts5BufferFree(&pIter->term); } typedef struct DoclistIdxIter DoclistIdxIter; struct DoclistIdxIter { Fts5Data *pDlidx; /* Data for doclist index, if any */ int iOff; /* Current offset into pDlidx */ int bRowidValid; /* iRowid is valid */ int bZero; /* True if current leaf has no rowid */ i64 iRowid; /* If bZero==0, first rowid on leaf */ }; /* ** Return non-zero if EOF is reached. */ static int fts5IndexDoclistIterNext(DoclistIdxIter *pIter){ i64 iVal; if( pIter->iOff>=pIter->pDlidx->n ) return 1; pIter->iOff += getVarint(&pIter->pDlidx->p[pIter->iOff], (u64*)&iVal); if( iVal==0 ){ pIter->bZero = 1; }else{ pIter->bZero = 0; if( pIter->bRowidValid ){ pIter->iRowid -= iVal; }else{ pIter->bRowidValid = 1; pIter->iRowid = iVal; } } return 0; } static void fts5IndexIntegrityCheckSegment( Fts5Index *p, /* FTS5 backend object */ int iIdx, /* Index that pSeg is a part of */ Fts5StructureSegment *pSeg /* Segment to check internal consistency */ ){ Fts5BtreeIter iter; /* Used to iterate through b-tree hierarchy */ /* Iterate through the b-tree hierarchy. */ for(fts5BtreeIterInit(p, iIdx, pSeg, &iter); iter.bEof==0; fts5BtreeIterNext(&iter) ){ i64 iRow; /* Rowid for this leaf */ Fts5Data *pLeaf; /* Data for this leaf */ int iOff; /* Offset of first term on leaf */ int i; /* Used to iterate through empty leaves */ DoclistIdxIter dliter; /* For iterating through any doclist index */ /* If the leaf in question has already been trimmed from the segment, ** ignore this b-tree entry. Otherwise, load it into memory. */ if( iter.iLeaf<pSeg->pgnoFirst ) continue; iRow = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, 0, iter.iLeaf); pLeaf = fts5DataRead(p, iRow); if( pLeaf==0 ) break; |
︙ | |||
2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 | 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 | + + + + + + + + + + + + + + + + + + + + + | if( res==0 ) res = nTerm - iter.term.n; if( res<0 ){ p->rc = FTS5_CORRUPT; } } fts5DataRelease(pLeaf); if( p->rc ) break; memset(&dliter, 0, sizeof(DoclistIdxIter)); if( iter.bDlidx ){ i64 iDlidxRowid = FTS5_DOCLIST_IDX_ROWID(iIdx, pSeg->iSegid, iter.iLeaf); dliter.pDlidx = fts5DataRead(p, iDlidxRowid); } /* Now check that the iter.nEmpty leaves following the current leaf ** (a) exist and (b) contain no terms. */ for(i=1; i<=iter.nEmpty; i++){ pLeaf = fts5DataRead(p, iRow+i); if( pLeaf && 0!=fts5GetU16(&pLeaf->p[2]) ){ p->rc = FTS5_CORRUPT; } if( pLeaf && dliter.pDlidx ){ if( fts5IndexDoclistIterNext(&dliter) ){ p->rc = FTS5_CORRUPT; }else{ int iRowidOff = fts5GetU16(&pLeaf->p[0]); if( dliter.bZero ){ if( iRowidOff!=0 ) p->rc = FTS5_CORRUPT; }else{ i64 iRowid; getVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid); if( iRowid!=dliter.iRowid ) p->rc = FTS5_CORRUPT; } } } fts5DataRelease(pLeaf); } fts5DataRelease(dliter.pDlidx); } if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){ p->rc = FTS5_CORRUPT; } fts5BtreeIterFree(&iter); |
︙ | |||
3214 3215 3216 3217 3218 3219 3220 | 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 | - + | a = sqlite3_value_blob(apVal[1]); fts5DecodeRowid(iRowid, &iIdx, &iSegid, &iHeight, &iPgno); if( iHeight==FTS5_SEGMENT_MAX_HEIGHT ){ int i = 0; i64 iPrev; sqlite3Fts5BufferAppendPrintf(&rc, &s, "(dlidx idx=%d segid=%d pgno=%d)", |
︙ | |||
3301 3302 3303 3304 3305 3306 3307 | 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 | - + + + | sqlite3Fts5BufferAppendPrintf(&rc, &s, " left=%d", ss.iChild); }else{ sqlite3Fts5BufferAppendPrintf(&rc,&s, " \"%.*s\"", ss.term.n, ss.term.p ); } if( ss.nEmpty ){ |
︙ |
Changes to test/fts5ah.test.
︙ | |||
45 46 47 48 49 50 51 | 45 46 47 48 49 50 51 52 53 54 55 | - - - | SELECT rowid FROM t1 WHERE t1 MATCH 'y AND x' } [lsort -integer -decr $Y] do_execsql_test 1.3 { INSERT INTO t1(t1) VALUES('integrity-check'); } |
Changes to test/permutations.test.
︙ | |||
222 223 224 225 226 227 228 | 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 | - + | fts4growth.test fts4growth2.test } test_suite "fts5" -prefix "" -description { All FTS5 tests. } -files { fts5aa.test fts5ab.test fts5ac.test fts5ad.test fts5ae.test fts5ea.test |
︙ |