Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Improve performance of fts5 prefix queries on detail=col tables. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
ca11f46db047e7f131cef3893f738247 |
User & Date: | dan 2016-02-03 20:04:59.805 |
Context
2016-02-03
| ||
22:14 | In the VDBE loop, only check for OOM errors at jumps rather than after every opcode, for about a 0.5% performance increase. (check-in: 632071bac5 user: drh tags: trunk) | |
20:04 | Improve performance of fts5 prefix queries on detail=col tables. (check-in: ca11f46db0 user: dan tags: trunk) | |
19:52 | Fix markup errors in comments used to generate the documentation - specifically in the documentation on the OP_Seek opcode. (check-in: ef252bc4b5 user: drh tags: trunk) | |
Changes
Changes to ext/fts5/fts5Int.h.
︙ | ︙ | |||
327 328 329 330 331 332 333 334 335 336 337 338 339 340 | /* ** Values used as part of the flags argument passed to IndexQuery(). */ #define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */ #define FTS5INDEX_QUERY_DESC 0x0002 /* Docs in descending rowid order */ #define FTS5INDEX_QUERY_TEST_NOIDX 0x0004 /* Do not use prefix index */ #define FTS5INDEX_QUERY_SCAN 0x0008 /* Scan query (fts5vocab) */ /* ** Create/destroy an Fts5Index object. */ int sqlite3Fts5IndexOpen(Fts5Config *pConfig, int bCreate, Fts5Index**, char**); int sqlite3Fts5IndexClose(Fts5Index *p); | > > > > > > | 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 | /* ** Values used as part of the flags argument passed to IndexQuery(). */ #define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */ #define FTS5INDEX_QUERY_DESC 0x0002 /* Docs in descending rowid order */ #define FTS5INDEX_QUERY_TEST_NOIDX 0x0004 /* Do not use prefix index */ #define FTS5INDEX_QUERY_SCAN 0x0008 /* Scan query (fts5vocab) */ /* The following are used internally by the fts5_index.c module. They are ** defined here only to make it easier to avoid clashes with the flags ** above. */ #define FTS5INDEX_QUERY_SKIPEMPTY 0x0010 #define FTS5INDEX_QUERY_NOOUTPUT 0x0020 /* ** Create/destroy an Fts5Index object. */ int sqlite3Fts5IndexOpen(Fts5Config *pConfig, int bCreate, Fts5Index**, char**); int sqlite3Fts5IndexClose(Fts5Index *p); |
︙ | ︙ |
Changes to ext/fts5/fts5_index.c.
︙ | ︙ | |||
2260 2261 2262 2263 2264 2265 2266 | Fts5StructureSegment *pSeg, /* Description of segment */ Fts5SegIter *pIter /* Object to populate */ ){ int iPg = 1; int bGe = (flags & FTS5INDEX_QUERY_SCAN); int bDlidx = 0; /* True if there is a doclist-index */ | < < < | 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 | Fts5StructureSegment *pSeg, /* Description of segment */ Fts5SegIter *pIter /* Object to populate */ ){ int iPg = 1; int bGe = (flags & FTS5INDEX_QUERY_SCAN); int bDlidx = 0; /* True if there is a doclist-index */ assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 ); assert( pTerm && nTerm ); memset(pIter, 0, sizeof(*pIter)); pIter->pSeg = pSeg; /* This block sets stack variable iPg to the leaf page number that may ** contain term (pTerm/nTerm), if it is present in the segment. */ |
︙ | ︙ | |||
2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 | pNew->nSeg = nSlot; pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot]; pNew->pIndex = p; pNew->xSetOutputs = fts5IterSetOutputs_Noop; } return pNew; } /* ** Allocate a new Fts5Iter object. ** ** The new object will be used to iterate through data in structure pStruct. ** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel ** is zero or greater, data from the first nSegment segments on level iLevel ** is merged. ** ** The iterator initially points to the first term/rowid entry in the ** iterated data. */ static void fts5MultiIterNew( Fts5Index *p, /* FTS5 backend to iterate within */ Fts5Structure *pStruct, /* Structure of specific index */ | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > < > | 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 | pNew->nSeg = nSlot; pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot]; pNew->pIndex = p; pNew->xSetOutputs = fts5IterSetOutputs_Noop; } return pNew; } static void fts5PoslistCallback( Fts5Index *p, void *pContext, const u8 *pChunk, int nChunk ){ assert_nc( nChunk>=0 ); if( nChunk>0 ){ fts5BufferSafeAppendBlob((Fts5Buffer*)pContext, pChunk, nChunk); } } typedef struct PoslistCallbackCtx PoslistCallbackCtx; struct PoslistCallbackCtx { Fts5Buffer *pBuf; /* Append to this buffer */ Fts5Colset *pColset; /* Restrict matches to this column */ int eState; /* See above */ }; typedef struct PoslistOffsetsCtx PoslistOffsetsCtx; struct PoslistOffsetsCtx { Fts5Buffer *pBuf; /* Append to this buffer */ Fts5Colset *pColset; /* Restrict matches to this column */ int iRead; int iWrite; }; /* ** TODO: Make this more efficient! */ static int fts5IndexColsetTest(Fts5Colset *pColset, int iCol){ int i; for(i=0; i<pColset->nCol; i++){ if( pColset->aiCol[i]==iCol ) return 1; } return 0; } static void fts5PoslistOffsetsCallback( Fts5Index *p, void *pContext, const u8 *pChunk, int nChunk ){ PoslistOffsetsCtx *pCtx = (PoslistOffsetsCtx*)pContext; assert_nc( nChunk>=0 ); if( nChunk>0 ){ int i = 0; while( i<nChunk ){ int iVal; i += fts5GetVarint32(&pChunk[i], iVal); iVal += pCtx->iRead - 2; pCtx->iRead = iVal; if( fts5IndexColsetTest(pCtx->pColset, iVal) ){ fts5BufferSafeAppendVarint(pCtx->pBuf, iVal + 2 - pCtx->iWrite); pCtx->iWrite = iVal; } } } } static void fts5PoslistFilterCallback( Fts5Index *p, void *pContext, const u8 *pChunk, int nChunk ){ PoslistCallbackCtx *pCtx = (PoslistCallbackCtx*)pContext; assert_nc( nChunk>=0 ); if( nChunk>0 ){ /* Search through to find the first varint with value 1. This is the ** start of the next columns hits. */ int i = 0; int iStart = 0; if( pCtx->eState==2 ){ int iCol; fts5FastGetVarint32(pChunk, i, iCol); if( fts5IndexColsetTest(pCtx->pColset, iCol) ){ pCtx->eState = 1; fts5BufferSafeAppendVarint(pCtx->pBuf, 1); }else{ pCtx->eState = 0; } } do { while( i<nChunk && pChunk[i]!=0x01 ){ while( pChunk[i] & 0x80 ) i++; i++; } if( pCtx->eState ){ fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart); } if( i<nChunk ){ int iCol; iStart = i; i++; if( i>=nChunk ){ pCtx->eState = 2; }else{ fts5FastGetVarint32(pChunk, i, iCol); pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol); if( pCtx->eState ){ fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart); iStart = i; } } } }while( i<nChunk ); } } static void fts5ChunkIterate( Fts5Index *p, /* Index object */ Fts5SegIter *pSeg, /* Poslist of this iterator */ void *pCtx, /* Context pointer for xChunk callback */ void (*xChunk)(Fts5Index*, void*, const u8*, int) ){ int nRem = pSeg->nPos; /* Number of bytes still to come */ Fts5Data *pData = 0; u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset]; int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset); int pgno = pSeg->iLeafPgno; int pgnoSave = 0; /* This function does notmwork with detail=none databases. */ assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE ); if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){ pgnoSave = pgno+1; } while( 1 ){ xChunk(p, pCtx, pChunk, nChunk); nRem -= nChunk; fts5DataRelease(pData); if( nRem<=0 ){ break; }else{ pgno++; pData = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno)); if( pData==0 ) break; pChunk = &pData->p[4]; nChunk = MIN(nRem, pData->szLeaf - 4); if( pgno==pgnoSave ){ assert( pSeg->pNextLeaf==0 ); pSeg->pNextLeaf = pData; pData = 0; } } } } /* ** Iterator pIter currently points to a valid entry (not EOF). This ** function appends the position list data for the current entry to ** buffer pBuf. It does not make a copy of the position-list size ** field. */ static void fts5SegiterPoslist( Fts5Index *p, Fts5SegIter *pSeg, Fts5Colset *pColset, Fts5Buffer *pBuf ){ if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos) ){ if( pColset==0 ){ fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback); }else{ if( p->pConfig->eDetail==FTS5_DETAIL_FULL ){ PoslistCallbackCtx sCtx; sCtx.pBuf = pBuf; sCtx.pColset = pColset; sCtx.eState = fts5IndexColsetTest(pColset, 0); assert( sCtx.eState==0 || sCtx.eState==1 ); fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback); }else{ PoslistOffsetsCtx sCtx; memset(&sCtx, 0, sizeof(sCtx)); sCtx.pBuf = pBuf; sCtx.pColset = pColset; fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistOffsetsCallback); } } } } /* ** IN/OUT parameter (*pa) points to a position list n bytes in size. If ** the position list contains entries for column iCol, then (*pa) is set ** to point to the sub-position-list for that column and the number of ** bytes in it returned. Or, if the argument position list does not ** contain any entries for column iCol, return 0. */ static int fts5IndexExtractCol( const u8 **pa, /* IN/OUT: Pointer to poslist */ int n, /* IN: Size of poslist in bytes */ int iCol /* Column to extract from poslist */ ){ int iCurrent = 0; /* Anything before the first 0x01 is col 0 */ const u8 *p = *pa; const u8 *pEnd = &p[n]; /* One byte past end of position list */ while( iCol>iCurrent ){ /* Advance pointer p until it points to pEnd or an 0x01 byte that is ** not part of a varint. Note that it is not possible for a negative ** or extremely large varint to occur within an uncorrupted position ** list. So the last byte of each varint may be assumed to have a clear ** 0x80 bit. */ while( *p!=0x01 ){ while( *p++ & 0x80 ); if( p>=pEnd ) return 0; } *pa = p++; iCurrent = *p++; if( iCurrent & 0x80 ){ p--; p += fts5GetVarint32(p, iCurrent); } } if( iCol!=iCurrent ) return 0; /* Advance pointer p until it points to pEnd or an 0x01 byte that is ** not part of a varint */ while( p<pEnd && *p!=0x01 ){ while( *p++ & 0x80 ); } return p - (*pa); } static int fts5IndexExtractColset ( Fts5Colset *pColset, /* Colset to filter on */ const u8 *pPos, int nPos, /* Position list */ Fts5Buffer *pBuf /* Output buffer */ ){ int rc = SQLITE_OK; int i; fts5BufferZero(pBuf); for(i=0; i<pColset->nCol; i++){ const u8 *pSub = pPos; int nSub = fts5IndexExtractCol(&pSub, nPos, pColset->aiCol[i]); if( nSub ){ fts5BufferAppendBlob(&rc, pBuf, nSub, pSub); } } return rc; } /* ** xSetOutputs callback used by detail=none tables. */ static void fts5IterSetOutputs_None(Fts5Iter *pIter, Fts5SegIter *pSeg){ assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_NONE ); pIter->base.iRowid = pSeg->iRowid; pIter->base.nData = pSeg->nPos; } /* ** xSetOutputs callback used by detail=full and detail=col tables when no ** column filters are specified. */ static void fts5IterSetOutputs_Nocolset(Fts5Iter *pIter, Fts5SegIter *pSeg){ pIter->base.iRowid = pSeg->iRowid; pIter->base.nData = pSeg->nPos; assert( pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_NONE ); assert( pIter->pColset==0 || pIter->bFiltered ); if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){ /* All data is stored on the current page. Populate the output ** variables to point into the body of the page object. */ pIter->base.pData = &pSeg->pLeaf->p[pSeg->iLeafOffset]; }else{ /* The data is distributed over two or more pages. Copy it into the ** Fts5Iter.poslist buffer and then set the output pointer to point ** to this buffer. */ fts5BufferZero(&pIter->poslist); fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist); pIter->base.pData = pIter->poslist.p; } } /* ** xSetOutputs callback used by detail=col when there is a column filter ** and there are 100 or more columns. Also called as a fallback from ** fts5IterSetOutputs_Col100 if the column-list spans more than one page. */ static void fts5IterSetOutputs_Col(Fts5Iter *pIter, Fts5SegIter *pSeg){ fts5BufferZero(&pIter->poslist); fts5SegiterPoslist(pIter->pIndex, pSeg, pIter->pColset, &pIter->poslist); pIter->base.iRowid = pSeg->iRowid; pIter->base.pData = pIter->poslist.p; pIter->base.nData = pIter->poslist.n; } /* ** xSetOutputs callback used when: ** ** * detail=col, ** * there is a column filter, and ** * the table contains 100 or fewer columns. ** ** The last point is to ensure all column numbers are stored as ** single-byte varints. */ static void fts5IterSetOutputs_Col100(Fts5Iter *pIter, Fts5SegIter *pSeg){ assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_COLUMNS ); assert( pIter->pColset ); if( pSeg->iLeafOffset+pSeg->nPos>pSeg->pLeaf->szLeaf ){ fts5IterSetOutputs_Col(pIter, pSeg); }else{ u8 *a = (u8*)&pSeg->pLeaf->p[pSeg->iLeafOffset]; u8 *pEnd = (u8*)&a[pSeg->nPos]; int iPrev = 0; int *aiCol = pIter->pColset->aiCol; int *aiColEnd = &aiCol[pIter->pColset->nCol]; u8 *aOut = pIter->poslist.p; int iPrevOut = 0; pIter->base.iRowid = pSeg->iRowid; while( a<pEnd ){ iPrev += (int)a++[0] - 2; while( *aiCol<iPrev ){ aiCol++; if( aiCol==aiColEnd ) goto setoutputs_col_out; } if( *aiCol==iPrev ){ *aOut++ = (iPrev - iPrevOut) + 2; iPrevOut = iPrev; } } setoutputs_col_out: pIter->base.pData = pIter->poslist.p; pIter->base.nData = aOut - pIter->poslist.p; } } /* ** xSetOutputs callback used by detail=full when there is a column filter. */ static void fts5IterSetOutputs_Full(Fts5Iter *pIter, Fts5SegIter *pSeg){ Fts5Colset *pColset = pIter->pColset; pIter->base.iRowid = pSeg->iRowid; assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_FULL ); assert( pColset ); if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){ /* All data is stored on the current page. Populate the output ** variables to point into the body of the page object. */ const u8 *a = &pSeg->pLeaf->p[pSeg->iLeafOffset]; if( pColset->nCol==1 ){ pIter->base.nData = fts5IndexExtractCol(&a, pSeg->nPos,pColset->aiCol[0]); pIter->base.pData = a; }else{ fts5BufferZero(&pIter->poslist); fts5IndexExtractColset(pColset, a, pSeg->nPos, &pIter->poslist); pIter->base.pData = pIter->poslist.p; pIter->base.nData = pIter->poslist.n; } }else{ /* The data is distributed over two or more pages. Copy it into the ** Fts5Iter.poslist buffer and then set the output pointer to point ** to this buffer. */ fts5BufferZero(&pIter->poslist); fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist); pIter->base.pData = pIter->poslist.p; pIter->base.nData = pIter->poslist.n; } } static void fts5IterSetOutputCb(int *pRc, Fts5Iter *pIter){ if( *pRc==SQLITE_OK ){ Fts5Config *pConfig = pIter->pIndex->pConfig; if( pConfig->eDetail==FTS5_DETAIL_NONE ){ pIter->xSetOutputs = fts5IterSetOutputs_None; } else if( pIter->pColset==0 || pIter->bFiltered ){ pIter->xSetOutputs = fts5IterSetOutputs_Nocolset; } else if( pConfig->eDetail==FTS5_DETAIL_FULL ){ pIter->xSetOutputs = fts5IterSetOutputs_Full; } else{ assert( pConfig->eDetail==FTS5_DETAIL_COLUMNS ); if( pConfig->nCol<=100 ){ pIter->xSetOutputs = fts5IterSetOutputs_Col100; sqlite3Fts5BufferSize(pRc, &pIter->poslist, pConfig->nCol); }else{ pIter->xSetOutputs = fts5IterSetOutputs_Col; } } } } /* ** Allocate a new Fts5Iter object. ** ** The new object will be used to iterate through data in structure pStruct. ** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel ** is zero or greater, data from the first nSegment segments on level iLevel ** is merged. ** ** The iterator initially points to the first term/rowid entry in the ** iterated data. */ static void fts5MultiIterNew( Fts5Index *p, /* FTS5 backend to iterate within */ Fts5Structure *pStruct, /* Structure of specific index */ int flags, /* FTS5INDEX_QUERY_XXX flags */ Fts5Colset *pColset, /* Colset to filter on (or NULL) */ const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */ int iLevel, /* Level to iterate (-1 for all) */ int nSegment, /* Number of segments to merge (iLevel>=0) */ Fts5Iter **ppOut /* New object */ ){ int nSeg = 0; /* Number of segment-iters in use */ int iIter = 0; /* */ |
︙ | ︙ | |||
2871 2872 2873 2874 2875 2876 2877 | }else{ nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment); } } *ppOut = pNew = fts5MultiIterAlloc(p, nSeg); if( pNew==0 ) return; pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC)); | | > > > | > > | | | | | | | | | | | | | | | | | | | | | | | | | > > > > > > | 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 | }else{ nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment); } } *ppOut = pNew = fts5MultiIterAlloc(p, nSeg); if( pNew==0 ) return; pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC)); pNew->bSkipEmpty = (0!=(flags & FTS5INDEX_QUERY_SKIPEMPTY)); pNew->pStruct = pStruct; pNew->pColset = pColset; fts5StructureRef(pStruct); if( (flags & FTS5INDEX_QUERY_NOOUTPUT)==0 ){ fts5IterSetOutputCb(&p->rc, pNew); } /* Initialize each of the component segment iterators. */ if( p->rc==SQLITE_OK ){ if( iLevel<0 ){ Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel]; if( p->pHash ){ /* Add a segment iterator for the current contents of the hash table. */ Fts5SegIter *pIter = &pNew->aSeg[iIter++]; fts5SegIterHashInit(p, pTerm, nTerm, flags, pIter); } for(pLvl=&pStruct->aLevel[0]; pLvl<pEnd; pLvl++){ for(iSeg=pLvl->nSeg-1; iSeg>=0; iSeg--){ Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; Fts5SegIter *pIter = &pNew->aSeg[iIter++]; if( pTerm==0 ){ fts5SegIterInit(p, pSeg, pIter); }else{ fts5SegIterSeekInit(p, &buf, pTerm, nTerm, flags, pSeg, pIter); } } } }else{ pLvl = &pStruct->aLevel[iLevel]; for(iSeg=nSeg-1; iSeg>=0; iSeg--){ fts5SegIterInit(p, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]); } } assert( iIter==nSeg ); } /* If the above was successful, each component iterators now points ** to the first entry in its segment. In this case initialize the ** aFirst[] array. Or, if an error has occurred, free the iterator ** object and set the output variable to NULL. */ if( p->rc==SQLITE_OK ){ for(iIter=pNew->nSeg-1; iIter>0; iIter--){ int iEq; if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){ Fts5SegIter *pSeg = &pNew->aSeg[iEq]; if( p->rc==SQLITE_OK ) pSeg->xNext(p, pSeg, 0); fts5MultiIterAdvanced(p, pNew, iEq, iIter); } } fts5MultiIterSetEof(pNew); fts5AssertMultiIterSetup(p, pNew); if( pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew) ){ fts5MultiIterNext(p, pNew, 0, 0); }else if( pNew->base.bEof==0 ){ Fts5SegIter *pSeg = &pNew->aSeg[pNew->aFirst[1].iFirst]; pNew->xSetOutputs(pNew, pSeg); } }else{ fts5MultiIterFree(p, pNew); *ppOut = 0; } fts5BufferFree(&buf); } /* ** Create an Fts5Iter that iterates through the doclist provided ** as the second argument. */ static void fts5MultiIterNew2( |
︙ | ︙ | |||
3017 3018 3019 3020 3021 3022 3023 | ** entry that the iterator currently points to. */ static const u8 *fts5MultiIterTerm(Fts5Iter *pIter, int *pn){ Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; *pn = p->term.n; return p->term.p; } | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 | ** entry that the iterator currently points to. */ static const u8 *fts5MultiIterTerm(Fts5Iter *pIter, int *pn){ Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; *pn = p->term.n; return p->term.p; } /* ** Allocate a new segment-id for the structure pStruct. The new segment ** id must be between 1 and 65335 inclusive, and must not be used by ** any currently existing segment. If a free segment id cannot be found, ** SQLITE_FULL is returned. ** |
︙ | ︙ | |||
3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 | int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */ int nInput; /* Number of input segments */ Fts5SegWriter writer; /* Writer object */ Fts5StructureSegment *pSeg; /* Output segment */ Fts5Buffer term; int bOldest; /* True if the output segment is the oldest */ int eDetail = p->pConfig->eDetail; assert( iLvl<pStruct->nLevel ); assert( pLvl->nMerge<=pLvl->nSeg ); memset(&writer, 0, sizeof(Fts5SegWriter)); memset(&term, 0, sizeof(Fts5Buffer)); if( pLvl->nMerge ){ | > | 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 | int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */ int nInput; /* Number of input segments */ Fts5SegWriter writer; /* Writer object */ Fts5StructureSegment *pSeg; /* Output segment */ Fts5Buffer term; int bOldest; /* True if the output segment is the oldest */ int eDetail = p->pConfig->eDetail; const int flags = FTS5INDEX_QUERY_NOOUTPUT; assert( iLvl<pStruct->nLevel ); assert( pLvl->nMerge<=pLvl->nSeg ); memset(&writer, 0, sizeof(Fts5SegWriter)); memset(&term, 0, sizeof(Fts5Buffer)); if( pLvl->nMerge ){ |
︙ | ︙ | |||
3724 3725 3726 3727 3728 3729 3730 | /* Read input from all segments in the input level */ nInput = pLvl->nSeg; } bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2); assert( iLvl>=0 ); | | | 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 | /* Read input from all segments in the input level */ nInput = pLvl->nSeg; } bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2); assert( iLvl>=0 ); for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, iLvl, nInput, &pIter); fts5MultiIterEof(p, pIter)==0; fts5MultiIterNext(p, pIter, 0, 0) ){ Fts5SegIter *pSegIter = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; int nPos; /* position-list size field value */ int nTerm; const u8 *pTerm; |
︙ | ︙ | |||
4169 4170 4171 4172 4173 4174 4175 | fts5StructureWrite(p, pStruct); } fts5StructureRelease(pStruct); return fts5IndexReturn(p); } | | | < < < < < < < < | < < < < < < | < < | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | < < < < < < < < < < < < | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | < < < < < < | < < | < | | < < < < < < < < < < < < < | | | < < < < | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 | fts5StructureWrite(p, pStruct); } fts5StructureRelease(pStruct); return fts5IndexReturn(p); } static void fts5AppendRowid( Fts5Index *p, i64 iDelta, Fts5Iter *pMulti, Fts5Buffer *pBuf ){ fts5BufferAppendVarint(&p->rc, pBuf, iDelta); } static void fts5AppendPoslist( Fts5Index *p, i64 iDelta, Fts5Iter *pMulti, Fts5Buffer *pBuf ){ int nData = pMulti->base.nData; assert( nData>0 ); if( p->rc==SQLITE_OK && 0==fts5BufferGrow(&p->rc, pBuf, nData+9+9) ){ fts5BufferSafeAppendVarint(pBuf, iDelta); fts5BufferSafeAppendVarint(pBuf, nData*2); fts5BufferSafeAppendBlob(pBuf, pMulti->base.pData, nData); } } static void fts5DoclistIterNext(Fts5DoclistIter *pIter){ u8 *p = pIter->aPoslist + pIter->nSize + pIter->nPoslist; assert( pIter->aPoslist ); |
︙ | ︙ | |||
4670 4671 4672 4673 4674 4675 4676 | Fts5Iter **ppIter /* OUT: New iterator */ ){ Fts5Structure *pStruct; Fts5Buffer *aBuf; const int nBuf = 32; void (*xMerge)(Fts5Index*, Fts5Buffer*, Fts5Buffer*); | | | > > | > > | | | > > > > | | | < | 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 | Fts5Iter **ppIter /* OUT: New iterator */ ){ Fts5Structure *pStruct; Fts5Buffer *aBuf; const int nBuf = 32; void (*xMerge)(Fts5Index*, Fts5Buffer*, Fts5Buffer*); void (*xAppend)(Fts5Index*, i64, Fts5Iter*, Fts5Buffer*); if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){ xMerge = fts5MergeRowidLists; xAppend = fts5AppendRowid; }else{ xMerge = fts5MergePrefixLists; xAppend = fts5AppendPoslist; } aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf); pStruct = fts5StructureRead(p); if( aBuf && pStruct ){ const int flags = FTS5INDEX_QUERY_SCAN | FTS5INDEX_QUERY_SKIPEMPTY | FTS5INDEX_QUERY_NOOUTPUT; int i; i64 iLastRowid = 0; Fts5Iter *p1 = 0; /* Iterator used to gather data from index */ Fts5Data *pData; Fts5Buffer doclist; int bNewTerm = 1; memset(&doclist, 0, sizeof(doclist)); fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1); fts5IterSetOutputCb(&p->rc, p1); for( /* no-op */ ; fts5MultiIterEof(p, p1)==0; fts5MultiIterNext2(p, p1, &bNewTerm) ){ Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ]; int nTerm = pSeg->term.n; const u8 *pTerm = pSeg->term.p; p1->xSetOutputs(p1, pSeg); assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 ); if( bNewTerm ){ if( nTerm<nToken || memcmp(pToken, pTerm, nToken) ) break; } if( p1->base.nData==0 ) continue; if( p1->base.iRowid<=iLastRowid && doclist.n>0 ){ for(i=0; p->rc==SQLITE_OK && doclist.n; i++){ assert( i<nBuf ); if( aBuf[i].n==0 ){ fts5BufferSwap(&doclist, &aBuf[i]); fts5BufferZero(&doclist); }else{ xMerge(p, &doclist, &aBuf[i]); fts5BufferZero(&aBuf[i]); } } iLastRowid = 0; } xAppend(p, p1->base.iRowid-iLastRowid, p1, &doclist); iLastRowid = p1->base.iRowid; } for(i=0; i<nBuf; i++){ if( p->rc==SQLITE_OK ){ xMerge(p, &doclist, &aBuf[i]); } fts5BufferFree(&aBuf[i]); |
︙ | ︙ | |||
4950 4951 4952 4953 4954 4955 4956 | ); } } return rc; } | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 | ); } } return rc; } /* ** Open a new iterator to iterate though all rowid that match the ** specified token or token prefix. */ int sqlite3Fts5IndexQuery( Fts5Index *p, /* FTS index to query */ const char *pToken, int nToken, /* Token (or prefix) to query for */ |
︙ | ︙ | |||
5173 5174 5175 5176 5177 5178 5179 | } if( iIdx<=pConfig->nPrefix ){ /* Straight index lookup */ Fts5Structure *pStruct = fts5StructureRead(p); buf.p[0] = (u8)(FTS5_MAIN_PREFIX + iIdx); if( pStruct ){ | > | > < | | | < < | | > > | 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 | } if( iIdx<=pConfig->nPrefix ){ /* Straight index lookup */ Fts5Structure *pStruct = fts5StructureRead(p); buf.p[0] = (u8)(FTS5_MAIN_PREFIX + iIdx); if( pStruct ){ fts5MultiIterNew(p, pStruct, flags | FTS5INDEX_QUERY_SKIPEMPTY, pColset, buf.p, nToken+1, -1, 0, &pRet ); fts5StructureRelease(pStruct); } }else{ /* Scan multiple terms in the main index */ int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0; buf.p[0] = FTS5_MAIN_PREFIX; fts5SetupPrefixIter(p, bDesc, buf.p, nToken+1, pColset, &pRet); fts5IterSetOutputCb(&p->rc, pRet); if( p->rc==SQLITE_OK ){ Fts5SegIter *pSeg = &pRet->aSeg[pRet->aFirst[1].iFirst]; if( p->rc==SQLITE_OK && pSeg->pLeaf ) pRet->xSetOutputs(pRet, pSeg); } } if( p->rc ){ sqlite3Fts5IterClose(&pRet->base); pRet = 0; fts5CloseReader(p); } *ppIter = &pRet->base; |
︙ | ︙ | |||
5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 | Fts5Structure *pStruct; /* Index structure */ #ifdef SQLITE_DEBUG /* Used by extra internal tests only run if NDEBUG is not defined */ u64 cksum3 = 0; /* Checksum based on contents of indexes */ Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */ #endif /* Load the FTS index structure */ pStruct = fts5StructureRead(p); /* Check that the internal nodes of each segment match the leaves */ if( pStruct ){ int iLvl, iSeg; | > | 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 | Fts5Structure *pStruct; /* Index structure */ #ifdef SQLITE_DEBUG /* Used by extra internal tests only run if NDEBUG is not defined */ u64 cksum3 = 0; /* Checksum based on contents of indexes */ Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */ #endif const int flags = FTS5INDEX_QUERY_NOOUTPUT; /* Load the FTS index structure */ pStruct = fts5StructureRead(p); /* Check that the internal nodes of each segment match the leaves */ if( pStruct ){ int iLvl, iSeg; |
︙ | ︙ | |||
5793 5794 5795 5796 5797 5798 5799 | ** variable cksum2) based on entries extracted from the full-text index ** while doing a linear scan of each individual index in turn. ** ** As each term visited by the linear scans, a separate query for the ** same term is performed. cksum3 is calculated based on the entries ** extracted by these queries. */ | | | 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 | ** variable cksum2) based on entries extracted from the full-text index ** while doing a linear scan of each individual index in turn. ** ** As each term visited by the linear scans, a separate query for the ** same term is performed. cksum3 is calculated based on the entries ** extracted by these queries. */ for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, -1, 0, &pIter); fts5MultiIterEof(p, pIter)==0; fts5MultiIterNext(p, pIter, 0, 0) ){ int n; /* Size of term in bytes */ i64 iPos = 0; /* Position read from poslist */ int iOff = 0; /* Offset within poslist */ i64 iRowid = fts5MultiIterRowid(pIter); |
︙ | ︙ |