Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | If the fts4 option prefix=1 is specified, have the fts4 module maintain an index of prefixes as well as terms. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts3-prefix-search |
Files: | files | file ages | folders |
SHA1: |
b5bdc639898ee22eebedeb560810e94e |
User & Date: | dan 2011-05-24 18:49:45.786 |
Context
2011-05-25
| ||
18:34 | Change fts4 so that the prefix= parameter is passes a comma-separated list of integers. For each integer N, a separate index of all prefixes of length N bytes is created. (check-in: be59bf4940 user: dan tags: fts3-prefix-search) | |
2011-05-24
| ||
18:49 | If the fts4 option prefix=1 is specified, have the fts4 module maintain an index of prefixes as well as terms. (check-in: b5bdc63989 user: dan tags: fts3-prefix-search) | |
15:36 | Do not invoke the xRollbackTo or xRelease methods of a virtual table without having first invoked an appropriate xSavepoint method. Add assert() statements to FTS3/4 to verify that this is happening in all cases. (check-in: 651ef24249 user: drh tags: trunk) | |
Changes
Changes to ext/fts3/fts3.c.
︙ | ︙ | |||
861 862 863 864 865 866 867 868 869 870 871 872 873 874 | int nString = 0; /* Bytes required to hold all column names */ int nCol = 0; /* Number of columns in the FTS table */ char *zCsr; /* Space for holding column names */ int nDb; /* Bytes required to hold database name */ int nName; /* Bytes required to hold table name */ int isFts4 = (argv[0][3]=='4'); /* True for FTS4, false for FTS3 */ int bNoDocsize = 0; /* True to omit %_docsize table */ const char **aCol; /* Array of column names */ sqlite3_tokenizer *pTokenizer = 0; /* Tokenizer for this table */ char *zCompress = 0; char *zUncompress = 0; assert( strlen(argv[0])==4 ); | > | 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 | int nString = 0; /* Bytes required to hold all column names */ int nCol = 0; /* Number of columns in the FTS table */ char *zCsr; /* Space for holding column names */ int nDb; /* Bytes required to hold database name */ int nName; /* Bytes required to hold table name */ int isFts4 = (argv[0][3]=='4'); /* True for FTS4, false for FTS3 */ int bNoDocsize = 0; /* True to omit %_docsize table */ int bPrefix = 0; /* True to include a prefix-search index */ const char **aCol; /* Array of column names */ sqlite3_tokenizer *pTokenizer = 0; /* Tokenizer for this table */ char *zCompress = 0; char *zUncompress = 0; assert( strlen(argv[0])==4 ); |
︙ | ︙ | |||
923 924 925 926 927 928 929 930 931 932 933 934 935 936 | } }else if( nKey==8 && 0==sqlite3_strnicmp(z, "compress", 8) ){ zCompress = zVal; zVal = 0; }else if( nKey==10 && 0==sqlite3_strnicmp(z, "uncompress", 10) ){ zUncompress = zVal; zVal = 0; }else{ *pzErr = sqlite3_mprintf("unrecognized parameter: %s", z); rc = SQLITE_ERROR; } sqlite3_free(zVal); } | > > | 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 | } }else if( nKey==8 && 0==sqlite3_strnicmp(z, "compress", 8) ){ zCompress = zVal; zVal = 0; }else if( nKey==10 && 0==sqlite3_strnicmp(z, "uncompress", 10) ){ zUncompress = zVal; zVal = 0; }else if( nKey==6 && 0==sqlite3_strnicmp(z, "prefix", 6) ){ bPrefix = 1; }else{ *pzErr = sqlite3_mprintf("unrecognized parameter: %s", z); rc = SQLITE_ERROR; } sqlite3_free(zVal); } |
︙ | ︙ | |||
975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 | p->pTokenizer = pTokenizer; p->nNodeSize = 1000; p->nMaxPendingData = FTS3_MAX_PENDING_DATA; p->bHasDocsize = (isFts4 && bNoDocsize==0); p->bHasStat = isFts4; TESTONLY( p->inTransaction = -1 ); TESTONLY( p->mxSavepoint = -1 ); fts3HashInit(&p->pendingTerms, FTS3_HASH_STRING, 1); /* Fill in the zName and zDb fields of the vtab structure. */ zCsr = (char *)&p->azColumn[nCol]; p->zName = zCsr; memcpy(zCsr, argv[2], nName); zCsr += nName; p->zDb = zCsr; | > > | 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 | p->pTokenizer = pTokenizer; p->nNodeSize = 1000; p->nMaxPendingData = FTS3_MAX_PENDING_DATA; p->bHasDocsize = (isFts4 && bNoDocsize==0); p->bHasStat = isFts4; TESTONLY( p->inTransaction = -1 ); TESTONLY( p->mxSavepoint = -1 ); p->bPrefix = bPrefix; fts3HashInit(&p->pendingTerms, FTS3_HASH_STRING, 1); fts3HashInit(&p->pendingPrefixes, FTS3_HASH_STRING, 1); /* Fill in the zName and zDb fields of the vtab structure. */ zCsr = (char *)&p->azColumn[nCol]; p->zName = zCsr; memcpy(zCsr, argv[2], nName); zCsr += nName; p->zDb = zCsr; |
︙ | ︙ | |||
2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 | if( !*ppOut ) return SQLITE_NOMEM; sqlite3Fts3PutVarint(*ppOut, docid); } return SQLITE_OK; } int sqlite3Fts3SegReaderCursor( Fts3Table *p, /* FTS3 table handle */ int iLevel, /* Level of segments to scan */ const char *zTerm, /* Term to query for */ int nTerm, /* Size of zTerm in bytes */ int isPrefix, /* True for a prefix search */ int isScan, /* True to scan from zTerm to EOF */ Fts3SegReaderCursor *pCsr /* Cursor object to populate */ ){ int rc = SQLITE_OK; int rc2; int iAge = 0; sqlite3_stmt *pStmt = 0; | > > > > > > > > > > > > > > > > > > > > > > < | > > > | > | > | > > > < > | > > > > < < < < < | | | < > | | > > > | > < < < < < < < < < < < < | | < < > > > | > > > > | > | 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 | if( !*ppOut ) return SQLITE_NOMEM; sqlite3Fts3PutVarint(*ppOut, docid); } return SQLITE_OK; } static int fts3SegReaderCursorAppend( Fts3SegReaderCursor *pCsr, Fts3SegReader *pNew ){ if( (pCsr->nSegment%16)==0 ){ Fts3SegReader **apNew; int nByte = (pCsr->nSegment + 16)*sizeof(Fts3SegReader*); apNew = (Fts3SegReader **)sqlite3_realloc(pCsr->apSegment, nByte); if( !apNew ){ sqlite3Fts3SegReaderFree(pNew); return SQLITE_NOMEM; } pCsr->apSegment = apNew; } pCsr->apSegment[pCsr->nSegment++] = pNew; return SQLITE_OK; } /* ** Set up a cursor object for iterating through the full-text index or ** a single level therein. */ int sqlite3Fts3SegReaderCursor( Fts3Table *p, /* FTS3 table handle */ int iLevel, /* Level of segments to scan */ const char *zTerm, /* Term to query for */ int nTerm, /* Size of zTerm in bytes */ int isPrefix, /* True for a prefix search */ int isScan, /* True to scan from zTerm to EOF */ Fts3SegReaderCursor *pCsr /* Cursor object to populate */ ){ int rc = SQLITE_OK; int rc2; int iAge = 0; sqlite3_stmt *pStmt = 0; assert( iLevel==FTS3_SEGCURSOR_ALL_TERM || iLevel==FTS3_SEGCURSOR_PENDING || iLevel==FTS3_SEGCURSOR_PENDING_PREFIX || iLevel==FTS3_SEGCURSOR_ALL_PREFIX || iLevel>=0 ); assert( 0>FTS3_SEGCURSOR_ALL_TERM && 0>FTS3_SEGCURSOR_PENDING && 0>FTS3_SEGCURSOR_PENDING_PREFIX && 0>FTS3_SEGCURSOR_ALL_PREFIX ); assert( iLevel==FTS3_SEGCURSOR_ALL_TERM || iLevel==FTS3_SEGCURSOR_ALL_PREFIX || (zTerm==0 && isPrefix==1) ); assert( isPrefix==0 || isScan==0 ); memset(pCsr, 0, sizeof(Fts3SegReaderCursor)); /* "isScan" is only set to true by the ft4aux module, not an ordinary ** full-text table. The pendingTerms and pendingPrefixes tables must be ** empty in this case. */ assert( isScan==0 || fts3HashCount(&p->pendingTerms)==0 ); assert( isScan==0 || fts3HashCount(&p->pendingPrefixes)==0 ); /* If iLevel is less than 0, include a seg-reader for the pending-terms. */ if( iLevel<0 && isScan==0 ){ int bPrefix = ( iLevel==FTS3_SEGCURSOR_PENDING_PREFIX || iLevel==FTS3_SEGCURSOR_ALL_PREFIX ); Fts3SegReader *pPending = 0; rc = sqlite3Fts3SegReaderPending(p,zTerm,nTerm,isPrefix,bPrefix,&pPending); if( rc==SQLITE_OK && pPending ){ rc = fts3SegReaderCursorAppend(pCsr, pPending); } } if( iLevel!=FTS3_SEGCURSOR_PENDING && iLevel!=FTS3_SEGCURSOR_PENDING_PREFIX ){ if( rc==SQLITE_OK ){ rc = sqlite3Fts3AllSegdirs(p, iLevel, &pStmt); } while( rc==SQLITE_OK && SQLITE_ROW==(rc = sqlite3_step(pStmt)) ){ Fts3SegReader *pSeg = 0; /* Read the values returned by the SELECT into local variables. */ sqlite3_int64 iStartBlock = sqlite3_column_int64(pStmt, 1); sqlite3_int64 iLeavesEndBlock = sqlite3_column_int64(pStmt, 2); sqlite3_int64 iEndBlock = sqlite3_column_int64(pStmt, 3); int nRoot = sqlite3_column_bytes(pStmt, 4); char const *zRoot = sqlite3_column_blob(pStmt, 4); /* If zTerm is not NULL, and this segment is not stored entirely on its ** root node, the range of leaves scanned can be reduced. Do this. */ if( iStartBlock && zTerm ){ sqlite3_int64 *pi = (isPrefix ? &iLeavesEndBlock : 0); rc = fts3SelectLeaf(p, zTerm, nTerm, zRoot, nRoot, &iStartBlock, pi); if( rc!=SQLITE_OK ) goto finished; if( isPrefix==0 && isScan==0 ) iLeavesEndBlock = iStartBlock; } rc = sqlite3Fts3SegReaderNew(iAge, iStartBlock, iLeavesEndBlock, iEndBlock, zRoot, nRoot, &pSeg ); if( rc!=SQLITE_OK ) goto finished; rc = fts3SegReaderCursorAppend(pCsr, pSeg); iAge++; } } finished: rc2 = sqlite3_reset(pStmt); if( rc==SQLITE_DONE ) rc = rc2; return rc; } static int fts3TermSegReaderCursor( Fts3Cursor *pCsr, /* Virtual table cursor handle */ const char *zTerm, /* Term to query for */ int nTerm, /* Size of zTerm in bytes */ int isPrefix, /* True for a prefix search */ Fts3SegReaderCursor **ppSegcsr /* OUT: Allocated seg-reader cursor */ ){ Fts3SegReaderCursor *pSegcsr; /* Object to allocate and return */ int rc = SQLITE_NOMEM; /* Return code */ pSegcsr = sqlite3_malloc(sizeof(Fts3SegReaderCursor)); if( pSegcsr ){ int i; int nCost = 0; Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; if( isPrefix && p->bPrefix && nTerm<=FTS3_MAX_PREFIX ){ rc = sqlite3Fts3SegReaderCursor( p, FTS3_SEGCURSOR_ALL_PREFIX, zTerm, nTerm, 0, 0, pSegcsr); }else{ rc = sqlite3Fts3SegReaderCursor( p, FTS3_SEGCURSOR_ALL_TERM, zTerm, nTerm, isPrefix, 0, pSegcsr); } for(i=0; rc==SQLITE_OK && i<pSegcsr->nSegment; i++){ rc = sqlite3Fts3SegReaderCost(pCsr, pSegcsr->apSegment[i], &nCost); } pSegcsr->nCost = nCost; } |
︙ | ︙ | |||
3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 | */ static int fts3RollbackMethod(sqlite3_vtab *pVtab){ Fts3Table *p = (Fts3Table*)pVtab; sqlite3Fts3PendingTermsClear(p); assert( p->inTransaction!=0 ); TESTONLY( p->inTransaction = 0 ); TESTONLY( p->mxSavepoint = -1; ); return SQLITE_OK; } /* ** Load the doclist associated with expression pExpr to pExpr->aDoclist. ** The loaded doclist contains positions as well as the document ids. ** This is used by the matchinfo(), snippet() and offsets() auxillary | > | 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 | */ static int fts3RollbackMethod(sqlite3_vtab *pVtab){ Fts3Table *p = (Fts3Table*)pVtab; sqlite3Fts3PendingTermsClear(p); assert( p->inTransaction!=0 ); TESTONLY( p->inTransaction = 0 ); TESTONLY( p->mxSavepoint = -1; ); sqlite3Fts3PendingPrefixesClear((Fts3Table *)pVtab); return SQLITE_OK; } /* ** Load the doclist associated with expression pExpr to pExpr->aDoclist. ** The loaded doclist contains positions as well as the document ids. ** This is used by the matchinfo(), snippet() and offsets() auxillary |
︙ | ︙ | |||
3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 | } static int fts3RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){ Fts3Table *p = (Fts3Table*)pVtab; assert( p->inTransaction ); assert( p->mxSavepoint >= iSavepoint ); TESTONLY( p->mxSavepoint = iSavepoint ); sqlite3Fts3PendingTermsClear(p); return SQLITE_OK; } static const sqlite3_module fts3Module = { /* iVersion */ 2, /* xCreate */ fts3CreateMethod, /* xConnect */ fts3ConnectMethod, | > | 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 | } static int fts3RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){ Fts3Table *p = (Fts3Table*)pVtab; assert( p->inTransaction ); assert( p->mxSavepoint >= iSavepoint ); TESTONLY( p->mxSavepoint = iSavepoint ); sqlite3Fts3PendingTermsClear(p); sqlite3Fts3PendingPrefixesClear((Fts3Table *)pVtab); return SQLITE_OK; } static const sqlite3_module fts3Module = { /* iVersion */ 2, /* xCreate */ fts3CreateMethod, /* xConnect */ fts3ConnectMethod, |
︙ | ︙ |
Changes to ext/fts3/fts3Int.h.
︙ | ︙ | |||
19 20 21 22 23 24 25 26 27 28 29 30 31 32 | # define NDEBUG 1 #endif #include "sqlite3.h" #include "fts3_tokenizer.h" #include "fts3_hash.h" /* ** This constant controls how often segments are merged. Once there are ** FTS3_MERGE_COUNT segments of level N, they are merged into a single ** segment of level N+1. */ #define FTS3_MERGE_COUNT 16 | > > | 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 | # define NDEBUG 1 #endif #include "sqlite3.h" #include "fts3_tokenizer.h" #include "fts3_hash.h" #define FTS3_MAX_PREFIX 8 /* ** This constant controls how often segments are merged. Once there are ** FTS3_MERGE_COUNT segments of level N, they are merged into a single ** segment of level N+1. */ #define FTS3_MERGE_COUNT 16 |
︙ | ︙ | |||
49 50 51 52 53 54 55 56 57 58 59 60 61 62 | /* ** Maximum length of a varint encoded integer. The varint format is different ** from that used by SQLite, so the maximum length is 10, not 9. */ #define FTS3_VARINT_MAX 10 /* ** The testcase() macro is only used by the amalgamation. If undefined, ** make it a no-op. */ #ifndef testcase # define testcase(X) #endif | > > > > > > > > > > > > > > | 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 | /* ** Maximum length of a varint encoded integer. The varint format is different ** from that used by SQLite, so the maximum length is 10, not 9. */ #define FTS3_VARINT_MAX 10 /* ** FTS4 virtual tables may maintain two separate indexes. One that indexes ** all document terms (the same index FTS3 tables maintain) and another used ** for prefixes. B+-trees that are part of the prefix index have values for ** the %_segdir.level column that are equal to or greater than the following ** value. ** ** It is considered impossible for the regular index to use levels this large. ** In theory it could, but that would require that at least 2^1024 separate ** write operations to be made within the lifetime of the database. */ #define FTS3_SEGDIR_PREFIXLEVEL 1024 #define FTS3_SEGDIR_PREFIXLEVEL_STR "1024" /* ** The testcase() macro is only used by the amalgamation. If undefined, ** make it a no-op. */ #ifndef testcase # define testcase(X) #endif |
︙ | ︙ | |||
144 145 146 147 148 149 150 | int nColumn; /* number of named columns in virtual table */ char **azColumn; /* column names. malloced */ sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */ /* Precompiled statements used by the implementation. Each of these ** statements is run and reset within a single virtual table API call. */ | | > > | 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 | int nColumn; /* number of named columns in virtual table */ char **azColumn; /* column names. malloced */ sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */ /* Precompiled statements used by the implementation. Each of these ** statements is run and reset within a single virtual table API call. */ sqlite3_stmt *aStmt[27]; char *zReadExprlist; char *zWriteExprlist; int nNodeSize; /* Soft limit for node size */ u8 bHasStat; /* True if %_stat table exists */ u8 bHasDocsize; /* True if %_docsize table exists */ u8 bPrefix; /* True if there is a prefix index */ int nPgsz; /* Page size for host database */ char *zSegmentsTbl; /* Name of %_segments table */ sqlite3_blob *pSegments; /* Blob handle open on %_segments table */ /* The following hash table is used to buffer pending index updates during ** transactions. Variable nPendingData estimates the memory size of the ** pending data, including hash table overhead, but not malloc overhead. ** When nPendingData exceeds nMaxPendingData, the buffer is flushed ** automatically. Variable iPrevDocid is the docid of the most recently ** inserted record. */ int nMaxPendingData; int nPendingData; sqlite_int64 iPrevDocid; Fts3Hash pendingTerms; Fts3Hash pendingPrefixes; #if defined(SQLITE_DEBUG) /* State variables used for validating that the transaction control ** methods of the virtual table are called at appropriate times. These ** values do not contribution to the FTS computation; they are used for ** verifying the SQLite core. */ |
︙ | ︙ | |||
313 314 315 316 317 318 319 | /* fts3_write.c */ int sqlite3Fts3UpdateMethod(sqlite3_vtab*,int,sqlite3_value**,sqlite3_int64*); int sqlite3Fts3PendingTermsFlush(Fts3Table *); void sqlite3Fts3PendingTermsClear(Fts3Table *); int sqlite3Fts3Optimize(Fts3Table *); int sqlite3Fts3SegReaderNew(int, sqlite3_int64, sqlite3_int64, sqlite3_int64, const char *, int, Fts3SegReader**); | | > > | | > > | 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 | /* fts3_write.c */ int sqlite3Fts3UpdateMethod(sqlite3_vtab*,int,sqlite3_value**,sqlite3_int64*); int sqlite3Fts3PendingTermsFlush(Fts3Table *); void sqlite3Fts3PendingTermsClear(Fts3Table *); int sqlite3Fts3Optimize(Fts3Table *); int sqlite3Fts3SegReaderNew(int, sqlite3_int64, sqlite3_int64, sqlite3_int64, const char *, int, Fts3SegReader**); int sqlite3Fts3SegReaderPending( Fts3Table*,const char*,int,int,int,Fts3SegReader**); void sqlite3Fts3SegReaderFree(Fts3SegReader *); int sqlite3Fts3SegReaderCost(Fts3Cursor *, Fts3SegReader *, int *); int sqlite3Fts3AllSegdirs(Fts3Table*, int, sqlite3_stmt **); int sqlite3Fts3ReadLock(Fts3Table *); int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*); int sqlite3Fts3SelectDoctotal(Fts3Table *, sqlite3_stmt **); int sqlite3Fts3SelectDocsize(Fts3Table *, sqlite3_int64, sqlite3_stmt **); void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *); int sqlite3Fts3DeferToken(Fts3Cursor *, Fts3PhraseToken *, int); int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *); void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *); char *sqlite3Fts3DeferredDoclist(Fts3DeferredToken *, int *); void sqlite3Fts3SegmentsClose(Fts3Table *); /* Special values interpreted by sqlite3SegReaderCursor() */ #define FTS3_SEGCURSOR_PENDING -1 #define FTS3_SEGCURSOR_PENDING_PREFIX -2 #define FTS3_SEGCURSOR_ALL_PREFIX -3 #define FTS3_SEGCURSOR_ALL_TERM -4 int sqlite3Fts3SegReaderStart(Fts3Table*, Fts3SegReaderCursor*, Fts3SegFilter*); int sqlite3Fts3SegReaderStep(Fts3Table *, Fts3SegReaderCursor *); void sqlite3Fts3SegReaderFinish(Fts3SegReaderCursor *); int sqlite3Fts3SegReaderCursor( Fts3Table *, int, const char *, int, int, int, Fts3SegReaderCursor *); |
︙ | ︙ |
Changes to ext/fts3/fts3_aux.c.
︙ | ︙ | |||
371 372 373 374 375 376 377 | if( idxNum&FTS4AUX_LE_CONSTRAINT ){ int iIdx = (idxNum&FTS4AUX_GE_CONSTRAINT) ? 1 : 0; pCsr->zStop = sqlite3_mprintf("%s", sqlite3_value_text(apVal[iIdx])); pCsr->nStop = sqlite3_value_bytes(apVal[iIdx]); if( pCsr->zStop==0 ) return SQLITE_NOMEM; } | | | 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 | if( idxNum&FTS4AUX_LE_CONSTRAINT ){ int iIdx = (idxNum&FTS4AUX_GE_CONSTRAINT) ? 1 : 0; pCsr->zStop = sqlite3_mprintf("%s", sqlite3_value_text(apVal[iIdx])); pCsr->nStop = sqlite3_value_bytes(apVal[iIdx]); if( pCsr->zStop==0 ) return SQLITE_NOMEM; } rc = sqlite3Fts3SegReaderCursor(pFts3, FTS3_SEGCURSOR_ALL_TERM, pCsr->filter.zTerm, pCsr->filter.nTerm, 0, isScan, &pCsr->csr ); if( rc==SQLITE_OK ){ rc = sqlite3Fts3SegReaderStart(pFts3, &pCsr->csr, &pCsr->filter); } if( rc==SQLITE_OK ) rc = fts3auxNextMethod(pCursor); |
︙ | ︙ |
Changes to ext/fts3/fts3_term.c.
︙ | ︙ | |||
23 24 25 26 27 28 29 30 31 32 33 34 35 36 | #include <assert.h> typedef struct Fts3termTable Fts3termTable; typedef struct Fts3termCursor Fts3termCursor; struct Fts3termTable { sqlite3_vtab base; /* Base class used by SQLite core */ Fts3Table *pFts3Tab; }; struct Fts3termCursor { sqlite3_vtab_cursor base; /* Base class used by SQLite core */ Fts3SegReaderCursor csr; /* Must be right after "base" */ Fts3SegFilter filter; | > | 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 | #include <assert.h> typedef struct Fts3termTable Fts3termTable; typedef struct Fts3termCursor Fts3termCursor; struct Fts3termTable { sqlite3_vtab base; /* Base class used by SQLite core */ int bPrefix; /* True for an fts4prefix table */ Fts3Table *pFts3Tab; }; struct Fts3termCursor { sqlite3_vtab_cursor base; /* Base class used by SQLite core */ Fts3SegReaderCursor csr; /* Must be right after "base" */ Fts3SegFilter filter; |
︙ | ︙ | |||
52 53 54 55 56 57 58 | /* ** This function does all the work for both the xConnect and xCreate methods. ** These tables have no persistent representation of their own, so xConnect ** and xCreate are identical operations. */ static int fts3termConnectMethod( sqlite3 *db, /* Database connection */ | | < < | 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 | /* ** This function does all the work for both the xConnect and xCreate methods. ** These tables have no persistent representation of their own, so xConnect ** and xCreate are identical operations. */ static int fts3termConnectMethod( sqlite3 *db, /* Database connection */ void *pCtx, /* Non-zero for an fts4prefix table */ int argc, /* Number of elements in argv array */ const char * const *argv, /* xCreate/xConnect argument array */ sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ char **pzErr /* OUT: sqlite3_malloc'd error message */ ){ char const *zDb; /* Name of database (e.g. "main") */ char const *zFts3; /* Name of fts3 table */ int nDb; /* Result of strlen(zDb) */ int nFts3; /* Result of strlen(zFts3) */ int nByte; /* Bytes of space to allocate here */ int rc; /* value returned by declare_vtab() */ Fts3termTable *p; /* Virtual table object to return */ /* The user should specify a single argument - the name of an fts3 table. */ if( argc!=4 ){ *pzErr = sqlite3_mprintf( "wrong number of arguments to fts4term constructor" ); return SQLITE_ERROR; } |
︙ | ︙ | |||
93 94 95 96 97 98 99 100 101 102 103 104 105 106 | if( !p ) return SQLITE_NOMEM; memset(p, 0, nByte); p->pFts3Tab = (Fts3Table *)&p[1]; p->pFts3Tab->zDb = (char *)&p->pFts3Tab[1]; p->pFts3Tab->zName = &p->pFts3Tab->zDb[nDb+1]; p->pFts3Tab->db = db; memcpy((char *)p->pFts3Tab->zDb, zDb, nDb); memcpy((char *)p->pFts3Tab->zName, zFts3, nFts3); sqlite3Fts3Dequote((char *)p->pFts3Tab->zName); *ppVtab = (sqlite3_vtab *)p; return SQLITE_OK; | > | 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 | if( !p ) return SQLITE_NOMEM; memset(p, 0, nByte); p->pFts3Tab = (Fts3Table *)&p[1]; p->pFts3Tab->zDb = (char *)&p->pFts3Tab[1]; p->pFts3Tab->zName = &p->pFts3Tab->zDb[nDb+1]; p->pFts3Tab->db = db; p->bPrefix = (int)pCtx; memcpy((char *)p->pFts3Tab->zDb, zDb, nDb); memcpy((char *)p->pFts3Tab->zName, zFts3, nFts3); sqlite3Fts3Dequote((char *)p->pFts3Tab->zName); *ppVtab = (sqlite3_vtab *)p; return SQLITE_OK; |
︙ | ︙ | |||
240 241 242 243 244 245 246 | sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ int idxNum, /* Strategy index */ const char *idxStr, /* Unused */ int nVal, /* Number of elements in apVal */ sqlite3_value **apVal /* Arguments for the indexing scheme */ ){ Fts3termCursor *pCsr = (Fts3termCursor *)pCursor; | > | | > | 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 | sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ int idxNum, /* Strategy index */ const char *idxStr, /* Unused */ int nVal, /* Number of elements in apVal */ sqlite3_value **apVal /* Arguments for the indexing scheme */ ){ Fts3termCursor *pCsr = (Fts3termCursor *)pCursor; Fts3termTable *p = (Fts3termTable *)pCursor->pVtab; Fts3Table *pFts3 = p->pFts3Tab; int rc; UNUSED_PARAMETER(nVal); UNUSED_PARAMETER(idxNum); UNUSED_PARAMETER(idxStr); UNUSED_PARAMETER(apVal); assert( idxStr==0 && idxNum==0 ); /* In case this cursor is being reused, close and zero it. */ testcase(pCsr->filter.zTerm); sqlite3Fts3SegReaderFinish(&pCsr->csr); memset(&pCsr->csr, 0, ((u8*)&pCsr[1]) - (u8*)&pCsr->csr); pCsr->filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY; pCsr->filter.flags |= FTS3_SEGMENT_SCAN; rc = sqlite3Fts3SegReaderCursor(pFts3, p->bPrefix ? FTS3_SEGCURSOR_ALL_PREFIX : FTS3_SEGCURSOR_ALL_TERM, pCsr->filter.zTerm, pCsr->filter.nTerm, 0, 1, &pCsr->csr ); if( rc==SQLITE_OK ){ rc = sqlite3Fts3SegReaderStart(pFts3, &pCsr->csr, &pCsr->filter); } if( rc==SQLITE_OK ){ rc = fts3termNextMethod(pCursor); |
︙ | ︙ | |||
349 350 351 352 353 354 355 356 357 358 359 360 | 0, /* xRollback */ 0, /* xFindFunction */ 0 /* xRename */ }; int rc; /* Return code */ rc = sqlite3_create_module(db, "fts4term", &fts3term_module, 0); return rc; } #endif #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ | > > > | 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 | 0, /* xRollback */ 0, /* xFindFunction */ 0 /* xRename */ }; int rc; /* Return code */ rc = sqlite3_create_module(db, "fts4term", &fts3term_module, 0); if( rc==SQLITE_OK ){ rc = sqlite3_create_module(db, "fts4prefix", &fts3term_module, (void*)1); } return rc; } #endif #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ |
Changes to ext/fts3/fts3_write.c.
︙ | ︙ | |||
141 142 143 144 145 146 147 148 149 150 151 152 153 154 | ** the interior part of the segment b+-tree structures (everything except ** the leaf nodes). These functions and type are only ever used by code ** within the fts3SegWriterXXX() family of functions described above. ** ** fts3NodeAddTerm() ** fts3NodeWrite() ** fts3NodeFree() */ struct SegmentNode { SegmentNode *pParent; /* Parent node (or NULL for root node) */ SegmentNode *pRight; /* Pointer to right-sibling */ SegmentNode *pLeftmost; /* Pointer to left-most node of this depth */ int nEntry; /* Number of terms written to node so far */ char *zTerm; /* Pointer to previous term buffer */ | > > > > > > > > | 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 | ** the interior part of the segment b+-tree structures (everything except ** the leaf nodes). These functions and type are only ever used by code ** within the fts3SegWriterXXX() family of functions described above. ** ** fts3NodeAddTerm() ** fts3NodeWrite() ** fts3NodeFree() ** ** When a b+tree is written to the database (either as a result of a merge ** or the pending-terms table being flushed), leaves are written into the ** database file as soon as they are completely populated. The interior of ** the tree is assembled in memory and written out only once all leaves have ** been populated and stored. This is Ok, as the b+-tree fanout is usually ** very large, meaning that the interior of the tree consumes relatively ** little memory. */ struct SegmentNode { SegmentNode *pParent; /* Parent node (or NULL for root node) */ SegmentNode *pRight; /* Pointer to right-sibling */ SegmentNode *pLeftmost; /* Pointer to left-most node of this depth */ int nEntry; /* Number of terms written to node so far */ char *zTerm; /* Pointer to previous term buffer */ |
︙ | ︙ | |||
173 174 175 176 177 178 179 | #define SQL_NEXT_SEGMENT_INDEX 8 #define SQL_INSERT_SEGMENTS 9 #define SQL_NEXT_SEGMENTS_ID 10 #define SQL_INSERT_SEGDIR 11 #define SQL_SELECT_LEVEL 12 #define SQL_SELECT_ALL_LEVEL 13 #define SQL_SELECT_LEVEL_COUNT 14 | | > > > > | 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 | #define SQL_NEXT_SEGMENT_INDEX 8 #define SQL_INSERT_SEGMENTS 9 #define SQL_NEXT_SEGMENTS_ID 10 #define SQL_INSERT_SEGDIR 11 #define SQL_SELECT_LEVEL 12 #define SQL_SELECT_ALL_LEVEL 13 #define SQL_SELECT_LEVEL_COUNT 14 #define SQL_SELECT_SEGDIR_MAX_LEVEL 15 #define SQL_DELETE_SEGDIR_BY_LEVEL 16 #define SQL_DELETE_SEGMENTS_RANGE 17 #define SQL_CONTENT_INSERT 18 #define SQL_DELETE_DOCSIZE 19 #define SQL_REPLACE_DOCSIZE 20 #define SQL_SELECT_DOCSIZE 21 #define SQL_SELECT_DOCTOTAL 22 #define SQL_REPLACE_DOCTOTAL 23 #define SQL_SELECT_ALL_PREFIX_LEVEL 24 #define SQL_DELETE_ALL_TERMS_SEGDIR 25 #define SQL_DELETE_ALL_PREFIX_SEGDIR 26 /* ** This function is used to obtain an SQLite prepared statement handle ** for the statement identified by the second argument. If successful, ** *pp is set to the requested statement handle and SQLITE_OK returned. ** Otherwise, an SQLite error code is returned and *pp is set to 0. ** |
︙ | ︙ | |||
218 219 220 221 222 223 224 | /* 10 */ "SELECT coalesce((SELECT max(blockid) FROM %Q.'%q_segments') + 1, 1)", /* 11 */ "INSERT INTO %Q.'%q_segdir' VALUES(?,?,?,?,?,?)", /* Return segments in order from oldest to newest.*/ /* 12 */ "SELECT idx, start_block, leaves_end_block, end_block, root " "FROM %Q.'%q_segdir' WHERE level = ? ORDER BY idx ASC", /* 13 */ "SELECT idx, start_block, leaves_end_block, end_block, root " | > | | > > > > > > | 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 | /* 10 */ "SELECT coalesce((SELECT max(blockid) FROM %Q.'%q_segments') + 1, 1)", /* 11 */ "INSERT INTO %Q.'%q_segdir' VALUES(?,?,?,?,?,?)", /* Return segments in order from oldest to newest.*/ /* 12 */ "SELECT idx, start_block, leaves_end_block, end_block, root " "FROM %Q.'%q_segdir' WHERE level = ? ORDER BY idx ASC", /* 13 */ "SELECT idx, start_block, leaves_end_block, end_block, root " "FROM %Q.'%q_segdir' WHERE level < " FTS3_SEGDIR_PREFIXLEVEL_STR " ORDER BY level DESC, idx ASC", /* 14 */ "SELECT count(*) FROM %Q.'%q_segdir' WHERE level = ?", /* 15 */ "SELECT max(level) FROM %Q.'%q_segdir' WHERE level < (?+1)*" FTS3_SEGDIR_PREFIXLEVEL_STR, /* 16 */ "DELETE FROM %Q.'%q_segdir' WHERE level = ?", /* 17 */ "DELETE FROM %Q.'%q_segments' WHERE blockid BETWEEN ? AND ?", /* 18 */ "INSERT INTO %Q.'%q_content' VALUES(%s)", /* 19 */ "DELETE FROM %Q.'%q_docsize' WHERE docid = ?", /* 20 */ "REPLACE INTO %Q.'%q_docsize' VALUES(?,?)", /* 21 */ "SELECT size FROM %Q.'%q_docsize' WHERE docid=?", /* 22 */ "SELECT value FROM %Q.'%q_stat' WHERE id=0", /* 23 */ "REPLACE INTO %Q.'%q_stat' VALUES(0,?)", /* 24 */ "SELECT idx, start_block, leaves_end_block, end_block, root " "FROM %Q.'%q_segdir' WHERE level >= " FTS3_SEGDIR_PREFIXLEVEL_STR " ORDER BY level DESC, idx ASC", /* 25 */ "DELETE FROM %Q.'%q_segdir' WHERE level<" FTS3_SEGDIR_PREFIXLEVEL_STR, /* 26 */ "DELETE FROM %Q.'%q_segdir' WHERE level>=" FTS3_SEGDIR_PREFIXLEVEL_STR, }; int rc = SQLITE_OK; sqlite3_stmt *pStmt; assert( SizeofArray(azSql)==SizeofArray(p->aStmt) ); assert( eStmt<SizeofArray(azSql) && eStmt>=0 ); |
︙ | ︙ | |||
389 390 391 392 393 394 395 | ** 2: leaves_end_block ** 3: end_block ** 4: root */ int sqlite3Fts3AllSegdirs(Fts3Table *p, int iLevel, sqlite3_stmt **ppStmt){ int rc; sqlite3_stmt *pStmt = 0; | | > > > | 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 | ** 2: leaves_end_block ** 3: end_block ** 4: root */ int sqlite3Fts3AllSegdirs(Fts3Table *p, int iLevel, sqlite3_stmt **ppStmt){ int rc; sqlite3_stmt *pStmt = 0; if( iLevel==FTS3_SEGCURSOR_ALL_PREFIX ){ rc = fts3SqlStmt(p, SQL_SELECT_ALL_PREFIX_LEVEL, &pStmt, 0); }else if( iLevel==FTS3_SEGCURSOR_ALL_TERM ){ rc = fts3SqlStmt(p, SQL_SELECT_ALL_LEVEL, &pStmt, 0); }else{ assert( iLevel>=0 ); rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0); if( rc==SQLITE_OK ) sqlite3_bind_int(pStmt, 1, iLevel); } *ppStmt = pStmt; return rc; } |
︙ | ︙ | |||
507 508 509 510 511 512 513 514 515 516 517 518 519 520 | *pRc = rc; if( p!=*pp ){ *pp = p; return 1; } return 0; } /* ** Tokenize the nul-terminated string zText and add all tokens to the ** pending-terms hash-table. The docid used is that currently stored in ** p->iPrevDocid, and the column is specified by argument iCol. ** ** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code. | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 | *pRc = rc; if( p!=*pp ){ *pp = p; return 1; } return 0; } static int fts3PendingTermsAddOne( Fts3Table *p, int iCol, int iPos, Fts3Hash *pHash, const char *zToken, int nToken ){ PendingList *pList; int rc = SQLITE_OK; pList = (PendingList *)fts3HashFind(pHash, zToken, nToken); if( pList ){ p->nPendingData -= (pList->nData + nToken + sizeof(Fts3HashElem)); } if( fts3PendingListAppend(&pList, p->iPrevDocid, iCol, iPos, &rc) ){ if( pList==fts3HashInsert(pHash, zToken, nToken, pList) ){ /* Malloc failed while inserting the new entry. This can only ** happen if there was no previous entry for this token. */ assert( 0==fts3HashFind(pHash, zToken, nToken) ); sqlite3_free(pList); rc = SQLITE_NOMEM; } } if( rc==SQLITE_OK ){ p->nPendingData += (pList->nData + nToken + sizeof(Fts3HashElem)); } return rc; } /* ** Tokenize the nul-terminated string zText and add all tokens to the ** pending-terms hash-table. The docid used is that currently stored in ** p->iPrevDocid, and the column is specified by argument iCol. ** ** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code. |
︙ | ︙ | |||
556 557 558 559 560 561 562 | } pCsr->pTokenizer = pTokenizer; xNext = pModule->xNext; while( SQLITE_OK==rc && SQLITE_OK==(rc = xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos)) ){ | < < | | | < < < < < < < < | > < < < | 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 | } pCsr->pTokenizer = pTokenizer; xNext = pModule->xNext; while( SQLITE_OK==rc && SQLITE_OK==(rc = xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos)) ){ if( iPos>=nWord ) nWord = iPos+1; /* Positions cannot be negative; we use -1 as a terminator internally. ** Tokens must have a non-zero length. */ if( iPos<0 || !zToken || nToken<=0 ){ rc = SQLITE_ERROR; break; } rc = fts3PendingTermsAddOne(p,iCol,iPos,&p->pendingTerms,zToken,nToken); if( p->bPrefix ){ int n = (nToken > FTS3_MAX_PREFIX ? FTS3_MAX_PREFIX : nToken); for(; n>0 && rc==SQLITE_OK; n--){ rc = fts3PendingTermsAddOne(p,iCol,iPos,&p->pendingPrefixes,zToken,n); } } } pModule->xClose(pCsr); *pnWord = nWord; return (rc==SQLITE_DONE ? SQLITE_OK : rc); } |
︙ | ︙ | |||
623 624 625 626 627 628 629 630 631 632 633 634 635 636 | Fts3HashElem *pElem; for(pElem=fts3HashFirst(&p->pendingTerms); pElem; pElem=fts3HashNext(pElem)){ sqlite3_free(fts3HashData(pElem)); } fts3HashClear(&p->pendingTerms); p->nPendingData = 0; } /* ** This function is called by the xUpdate() method as part of an INSERT ** operation. It adds entries for each term in the new record to the ** pendingTerms hash table. ** ** Argument apVal is the same as the similarly named argument passed to | > > > > > > > > > > > | 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 | Fts3HashElem *pElem; for(pElem=fts3HashFirst(&p->pendingTerms); pElem; pElem=fts3HashNext(pElem)){ sqlite3_free(fts3HashData(pElem)); } fts3HashClear(&p->pendingTerms); p->nPendingData = 0; } /* ** Discard the contents of the pending-prefixes hash table. */ void sqlite3Fts3PendingPrefixesClear(Fts3Table *p){ Fts3HashElem *pElem; for(pElem=fts3HashFirst(&p->pendingPrefixes); pElem; pElem=fts3HashNext(pElem)){ sqlite3_free(fts3HashData(pElem)); } fts3HashClear(&p->pendingPrefixes); } /* ** This function is called by the xUpdate() method as part of an INSERT ** operation. It adds entries for each term in the new record to the ** pendingTerms hash table. ** ** Argument apVal is the same as the similarly named argument passed to |
︙ | ︙ | |||
721 722 723 724 725 726 727 728 729 730 731 732 733 734 | ** pending terms. */ static int fts3DeleteAll(Fts3Table *p){ int rc = SQLITE_OK; /* Return code */ /* Discard the contents of the pending-terms hash table. */ sqlite3Fts3PendingTermsClear(p); /* Delete everything from the %_content, %_segments and %_segdir tables. */ fts3SqlExec(&rc, p, SQL_DELETE_ALL_CONTENT, 0); fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGMENTS, 0); fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGDIR, 0); if( p->bHasDocsize ){ fts3SqlExec(&rc, p, SQL_DELETE_ALL_DOCSIZE, 0); | > | 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 | ** pending terms. */ static int fts3DeleteAll(Fts3Table *p){ int rc = SQLITE_OK; /* Return code */ /* Discard the contents of the pending-terms hash table. */ sqlite3Fts3PendingTermsClear(p); sqlite3Fts3PendingPrefixesClear(p); /* Delete everything from the %_content, %_segments and %_segdir tables. */ fts3SqlExec(&rc, p, SQL_DELETE_ALL_CONTENT, 0); fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGMENTS, 0); fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGDIR, 0); if( p->bHasDocsize ){ fts3SqlExec(&rc, p, SQL_DELETE_ALL_DOCSIZE, 0); |
︙ | ︙ | |||
1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 | } return c; } /* ** This function is used to allocate an Fts3SegReader that iterates through ** a subset of the terms stored in the Fts3Table.pendingTerms array. */ int sqlite3Fts3SegReaderPending( Fts3Table *p, /* Virtual table handle */ const char *zTerm, /* Term to search for */ int nTerm, /* Size of buffer zTerm */ | > > > > > > > > > > > > > > > | > | > > > | > > > | | 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 | } return c; } /* ** This function is used to allocate an Fts3SegReader that iterates through ** a subset of the terms stored in the Fts3Table.pendingTerms array. ** ** If the isPrefixIter parameter is zero, then the returned SegReader iterates ** through each term in the pending-terms table. Or, if isPrefixIter is ** non-zero, it iterates through each term and its prefixes. For example, if ** the pending terms hash table contains the terms "sqlite", "mysql" and ** "firebird", then the iterator visits the following 'terms' (in the order ** shown): ** ** f fi fir fire fireb firebi firebir firebird ** m my mys mysq mysql ** s sq sql sqli sqlit sqlite ** ** Whereas if isPrefixIter is zero, the terms visited are: ** ** firebird mysql sqlite */ int sqlite3Fts3SegReaderPending( Fts3Table *p, /* Virtual table handle */ const char *zTerm, /* Term to search for */ int nTerm, /* Size of buffer zTerm */ int isMultiTerm, /* True to visit multiple terms */ int isPrefixIter, /* 0->pendingTerms, 1->pendingPrefixes */ Fts3SegReader **ppReader /* OUT: SegReader for pending-terms */ ){ Fts3SegReader *pReader = 0; /* Fts3SegReader object to return */ Fts3HashElem **aElem = 0; /* Array of term hash entries to scan */ int nElem = 0; /* Size of array at aElem */ int rc = SQLITE_OK; /* Return Code */ if( isMultiTerm ){ int nAlloc = 0; /* Size of allocated array at aElem */ Fts3HashElem *pE = 0; /* Iterator variable */ Fts3Hash *pHash; pHash = (isPrefixIter ? &p->pendingPrefixes : &p->pendingTerms); for(pE=fts3HashFirst(pHash); pE; pE=fts3HashNext(pE)){ char *zKey = (char *)fts3HashKey(pE); int nKey = fts3HashKeysize(pE); if( nTerm==0 || (nKey>=nTerm && 0==memcmp(zKey, zTerm, nTerm)) ){ if( nElem==nAlloc ){ Fts3HashElem **aElem2; nAlloc += 16; aElem2 = (Fts3HashElem **)sqlite3_realloc( aElem, nAlloc*sizeof(Fts3HashElem *) ); if( !aElem2 ){ rc = SQLITE_NOMEM; nElem = 0; break; } aElem = aElem2; } aElem[nElem++] = pE; } } /* If more than one term matches the prefix, sort the Fts3HashElem ** objects in term order using qsort(). This uses the same comparison ** callback as is used when flushing terms to disk. */ if( nElem>1 ){ qsort(aElem, nElem, sizeof(Fts3HashElem *), fts3CompareElemByTerm); } }else{ /* The query is a simple term lookup that matches at most one term in ** the index. All that is required is a straight hash-lookup. */ Fts3HashElem *pE = fts3HashFindElem(&p->pendingTerms, zTerm, nTerm); if( pE ){ aElem = &pE; nElem = 1; } } if( nElem>0 ){ int nByte = sizeof(Fts3SegReader) + (nElem+1)*sizeof(Fts3HashElem *); pReader = (Fts3SegReader *)sqlite3_malloc(nByte); if( !pReader ){ rc = SQLITE_NOMEM; }else{ memset(pReader, 0, nByte); pReader->iIdx = 0x7FFFFFFF; pReader->ppNextElem = (Fts3HashElem **)&pReader[1]; memcpy(pReader->ppNextElem, aElem, nElem*sizeof(Fts3HashElem *)); } } if( isMultiTerm ){ sqlite3_free(aElem); } *ppReader = pReader; return rc; } /* |
︙ | ︙ | |||
1910 1911 1912 1913 1914 1915 1916 | } rc = sqlite3_reset(pStmt); } return rc; } /* | < | > > > | | > > > > > > > | > < | | 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 | } rc = sqlite3_reset(pStmt); } return rc; } /* ** Set *pnMax to the largest segment level in the database for either the ** terms index (if parameter bPrefixIndex is 0) or the prefixes index (if ** parameter bPrefixIndex is 1). ** ** Segment levels are stored in the 'level' column of the %_segdir table. ** ** Return SQLITE_OK if successful, or an SQLite error code if not. */ static int fts3SegmentMaxLevel(Fts3Table *p, int bPrefixIndex, int *pnMax){ sqlite3_stmt *pStmt; int rc; assert( bPrefixIndex==0 || bPrefixIndex==1 ); /* Set pStmt to the compiled version of: ** ** SELECT max(level) FROM %Q.'%q_segdir' WHERE level < (?+1) * 1024 ** ** (1024 is actually the value of macro FTS3_SEGDIR_PREFIXLEVEL_STR). */ rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0); if( rc!=SQLITE_OK ) return rc; sqlite3_bind_int(pStmt, 1, bPrefixIndex); if( SQLITE_ROW==sqlite3_step(pStmt) ){ *pnMax = sqlite3_column_int(pStmt, 0); } return sqlite3_reset(pStmt); } /* ** This function is used after merging multiple segments into a single large ** segment to delete the old, now redundant, segment b-trees. Specifically, |
︙ | ︙ | |||
1967 1968 1969 1970 1971 1972 1973 | rc = sqlite3_reset(pDelete); } } if( rc!=SQLITE_OK ){ return rc; } | > > > > > > | | > > > > < | | 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 | rc = sqlite3_reset(pDelete); } } if( rc!=SQLITE_OK ){ return rc; } assert( iLevel>=0 || iLevel==FTS3_SEGCURSOR_ALL_TERM || iLevel==FTS3_SEGCURSOR_ALL_PREFIX || iLevel==FTS3_SEGCURSOR_PENDING || iLevel==FTS3_SEGCURSOR_PENDING_PREFIX ); if( iLevel==FTS3_SEGCURSOR_ALL_TERM ){ fts3SqlExec(&rc, p, SQL_DELETE_ALL_TERMS_SEGDIR, 0); }else if( iLevel==FTS3_SEGCURSOR_ALL_PREFIX ){ fts3SqlExec(&rc, p, SQL_DELETE_ALL_PREFIX_SEGDIR, 0); }else if( iLevel==FTS3_SEGCURSOR_PENDING_PREFIX ){ sqlite3Fts3PendingPrefixesClear(p); }else if( iLevel==FTS3_SEGCURSOR_PENDING ){ sqlite3Fts3PendingTermsClear(p); }else if( iLevel>=0 ){ rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_BY_LEVEL, &pDelete, 0); if( rc==SQLITE_OK ){ sqlite3_bind_int(pDelete, 1, iLevel); sqlite3_step(pDelete); rc = sqlite3_reset(pDelete); } } |
︙ | ︙ | |||
2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 | static int fts3SegmentMerge(Fts3Table *p, int iLevel){ int rc; /* Return code */ int iIdx = 0; /* Index of new segment */ int iNewLevel = 0; /* Level to create new segment at */ SegmentWriter *pWriter = 0; /* Used to write the new, merged, segment */ Fts3SegFilter filter; /* Segment term filter condition */ Fts3SegReaderCursor csr; /* Cursor to iterate through level(s) */ rc = sqlite3Fts3SegReaderCursor(p, iLevel, 0, 0, 1, 0, &csr); if( rc!=SQLITE_OK || csr.nSegment==0 ) goto finished; | > | < | > | > > > > > | > | | 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 | static int fts3SegmentMerge(Fts3Table *p, int iLevel){ int rc; /* Return code */ int iIdx = 0; /* Index of new segment */ int iNewLevel = 0; /* Level to create new segment at */ SegmentWriter *pWriter = 0; /* Used to write the new, merged, segment */ Fts3SegFilter filter; /* Segment term filter condition */ Fts3SegReaderCursor csr; /* Cursor to iterate through level(s) */ int bIgnoreEmpty = 0; /* True to ignore empty segments */ rc = sqlite3Fts3SegReaderCursor(p, iLevel, 0, 0, 1, 0, &csr); if( rc!=SQLITE_OK || csr.nSegment==0 ) goto finished; if( iLevel==FTS3_SEGCURSOR_ALL_TERM || iLevel==FTS3_SEGCURSOR_ALL_PREFIX ){ /* This call is to merge all segments in the database to a single ** segment. The level of the new segment is equal to the the numerically ** greatest segment level currently present in the database. The index ** of the new segment is always 0. */ if( csr.nSegment==1 ){ rc = SQLITE_DONE; goto finished; } rc = fts3SegmentMaxLevel(p, iLevel==FTS3_SEGCURSOR_ALL_PREFIX, &iNewLevel); bIgnoreEmpty = 1; }else{ /* This call is to merge all segments at level iLevel. find the next ** available segment index at level iLevel+1. The call to ** fts3AllocateSegdirIdx() will merge the segments at level iLevel+1 to ** a single iLevel+2 segment if necessary. */ if( iLevel==FTS3_SEGCURSOR_PENDING ){ iNewLevel = 0; }else if( iLevel==FTS3_SEGCURSOR_PENDING_PREFIX ){ iNewLevel = FTS3_SEGDIR_PREFIXLEVEL; }else{ iNewLevel = iLevel+1; } rc = fts3AllocateSegdirIdx(p, iNewLevel, &iIdx); } if( rc!=SQLITE_OK ) goto finished; assert( csr.nSegment>0 ); assert( iNewLevel>=0 ); memset(&filter, 0, sizeof(Fts3SegFilter)); filter.flags = FTS3_SEGMENT_REQUIRE_POS; filter.flags |= (bIgnoreEmpty ? FTS3_SEGMENT_IGNORE_EMPTY : 0); rc = sqlite3Fts3SegReaderStart(p, &csr, &filter); while( SQLITE_OK==rc ){ rc = sqlite3Fts3SegReaderStep(p, &csr); if( rc!=SQLITE_ROW ) break; rc = fts3SegWriterAdd(p, &pWriter, 1, csr.zTerm, csr.nTerm, csr.aDoclist, csr.nDoclist); |
︙ | ︙ | |||
2286 2287 2288 2289 2290 2291 2292 | } /* ** Flush the contents of pendingTerms to a level 0 segment. */ int sqlite3Fts3PendingTermsFlush(Fts3Table *p){ | > > > > > | > > > > > | 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 | } /* ** Flush the contents of pendingTerms to a level 0 segment. */ int sqlite3Fts3PendingTermsFlush(Fts3Table *p){ int rc = SQLITE_OK; if( p->bPrefix ){ rc = fts3SegmentMerge(p, FTS3_SEGCURSOR_PENDING_PREFIX); } if( rc==SQLITE_OK || rc==SQLITE_DONE ){ rc = fts3SegmentMerge(p, FTS3_SEGCURSOR_PENDING); } if( rc==SQLITE_DONE ){ rc = SQLITE_OK; } return rc; } /* ** Encode N integers as varints into a blob. */ static void fts3EncodeIntArray( int N, /* The number of integers to encode */ |
︙ | ︙ | |||
2453 2454 2455 2456 2457 2458 2459 | int rc; /* Return Code */ const char *zVal = (const char *)sqlite3_value_text(pVal); int nVal = sqlite3_value_bytes(pVal); if( !zVal ){ return SQLITE_NOMEM; }else if( nVal==8 && 0==sqlite3_strnicmp(zVal, "optimize", 8) ){ | | | | < < > > | 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 | int rc; /* Return Code */ const char *zVal = (const char *)sqlite3_value_text(pVal); int nVal = sqlite3_value_bytes(pVal); if( !zVal ){ return SQLITE_NOMEM; }else if( nVal==8 && 0==sqlite3_strnicmp(zVal, "optimize", 8) ){ rc = fts3SegmentMerge(p, FTS3_SEGCURSOR_ALL_PREFIX); if( rc==SQLITE_OK ){ rc = fts3SegmentMerge(p, FTS3_SEGCURSOR_ALL_TERM); } #ifdef SQLITE_TEST }else if( nVal>9 && 0==sqlite3_strnicmp(zVal, "nodesize=", 9) ){ p->nNodeSize = atoi(&zVal[9]); rc = SQLITE_OK; }else if( nVal>11 && 0==sqlite3_strnicmp(zVal, "maxpending=", 9) ){ p->nMaxPendingData = atoi(&zVal[11]); rc = SQLITE_OK; #endif }else{ rc = SQLITE_ERROR; } sqlite3Fts3SegmentsClose(p); sqlite3Fts3PendingTermsClear(p); sqlite3Fts3PendingPrefixesClear(p); return rc; } /* ** Return the deferred doclist associated with deferred token pDeferred. ** This function assumes that sqlite3Fts3CacheDeferredDoclists() has already ** been called to allocate and populate the doclist. |
︙ | ︙ | |||
2788 2789 2790 2791 2792 2793 2794 2795 2796 | /* ** Flush any data in the pending-terms hash table to disk. If successful, ** merge all segments in the database (including the new segment, if ** there was any data to flush) into a single segment. */ int sqlite3Fts3Optimize(Fts3Table *p){ int rc; rc = sqlite3_exec(p->db, "SAVEPOINT fts3", 0, 0, 0); if( rc==SQLITE_OK ){ | > | > > > > > > > > | | 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 | /* ** Flush any data in the pending-terms hash table to disk. If successful, ** merge all segments in the database (including the new segment, if ** there was any data to flush) into a single segment. */ int sqlite3Fts3Optimize(Fts3Table *p){ int rc; int bReturnDone = 0; rc = sqlite3_exec(p->db, "SAVEPOINT fts3", 0, 0, 0); if( rc==SQLITE_OK ){ rc = fts3SegmentMerge(p, FTS3_SEGCURSOR_ALL_PREFIX); if( rc==SQLITE_OK ){ rc = fts3SegmentMerge(p, FTS3_SEGCURSOR_ALL_TERM); } if( rc==SQLITE_DONE ){ bReturnDone = 1; rc = SQLITE_OK; } if( rc==SQLITE_OK ){ rc = sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0); if( rc==SQLITE_OK ){ sqlite3Fts3PendingTermsClear(p); sqlite3Fts3PendingPrefixesClear(p); } }else{ sqlite3_exec(p->db, "ROLLBACK TO fts3", 0, 0, 0); sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0); } } sqlite3Fts3SegmentsClose(p); return ((rc==SQLITE_OK && bReturnDone) ? SQLITE_DONE : rc); } #endif |
Added test/fts3prefix.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > || # 2011 May 04 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS3 module. # set testdir [file dirname $argv0] source $testdir/tester.tcl set testprefix fts3prefix # This proc tests that the prefixes index appears to represent the same content # as the terms index. # proc fts3_terms_and_prefixes {db tbl} { $db eval "CREATE VIRTUAL TABLE fts3check1 USING fts4term($tbl);" $db eval "CREATE VIRTUAL TABLE fts3check2 USING fts4prefix($tbl);" $db eval { CREATE TEMP TABLE terms AS SELECT * FROM fts3check1; CREATE TEMP TABLE prefixes AS SELECT * FROM fts3check2; CREATE INDEX temp.idx ON prefixes(term); DROP TABLE fts3check1; DROP TABLE fts3check2; } $db eval { SELECT term, docid, col, pos FROM temp.terms } a { set nMax [expr [string length $a(term)] - 1] if {$nMax>8} {set nMax 8} for {set n 0} {$n < $nMax} {incr n} { set t [string range $a(term) 0 $n] set r [$db one { SELECT count(*) FROM temp.prefixes WHERE term = $t AND docid = $a(docid) AND col = $a(col) AND pos = $a(pos) }] if {$r != 1} { error "$t, $a(docid), $a(col), $a(pos)" } } } execsql { DROP TABLE temp.prefixes } execsql { DROP TABLE temp.terms } set terms_layout [$db eval " SELECT level, idx FROM ${tbl}_segdir WHERE level < 1024 ORDER by 1, 2 "] set prefixes_layout [$db eval " SELECT level-1024, idx FROM ${tbl}_segdir WHERE level >= 1024 ORDER by 1, 2 "] if {$terms_layout != $prefixes_layout} { puts "TERMS LAYOUT: $terms_layout" puts "PREFIX LAYOUT: $prefixes_layout" error "Terms and prefixes are comprised of different b-trees" } return "" } proc fts3_tap_test {tn db tbl} { uplevel [list do_test $tn [list fts3_terms_and_prefixes $db $tbl] ""] } #------------------------------------------------------------------------- # Test cases 1.* are a sanity check. They test that the prefixes index is # being constructed correctly for the simplest possible case. # do_execsql_test 1.1 { CREATE VIRTUAL TABLE t1 USING fts4(prefix=1); CREATE VIRTUAL TABLE prefixes USING fts4prefix(t1); CREATE VIRTUAL TABLE terms USING fts4term(t1); } do_execsql_test 1.2 { INSERT INTO t1 VALUES('sqlite mysql firebird'); } do_execsql_test 1.3 { SELECT term FROM prefixes; } {f fi fir fire fireb firebi firebir firebird m my mys mysq mysql s sq sql sqli sqlit sqlite} do_execsql_test 1.4 { SELECT term FROM terms; } {firebird mysql sqlite} fts3_tap_test 1.5 db t1 #------------------------------------------------------------------------- # A slightly more complicated dataset. This test also verifies that DELETE # operations do not corrupt the prefixes index. # do_execsql_test 2.1 { INSERT INTO t1 VALUES('FTS3 and FTS4 are an SQLite virtual table modules'); INSERT INTO t1 VALUES('that allows users to perform full-text searches on'); INSERT INTO t1 VALUES('a set of documents. The most common (and'); INSERT INTO t1 VALUES('effective) way to describe full-text searches is'); INSERT INTO t1 VALUES('"what Google, Yahoo and Altavista do with'); INSERT INTO t1 VALUES('documents placed on the World Wide Web". Users'); INSERT INTO t1 VALUES('input a term, or series of terms, perhaps'); INSERT INTO t1 VALUES('connected by a binary operator or grouped together'); INSERT INTO t1 VALUES('into a phrase, and the full-text query system'); INSERT INTO t1 VALUES('finds the set of documents that best matches those'); INSERT INTO t1 VALUES('terms considering the operators and groupings the'); INSERT INTO t1 VALUES('user has specified. This article describes the'); INSERT INTO t1 VALUES('deployment and usage of FTS3 and FTS4.'); INSERT INTO t1 VALUES('FTS1 and FTS2 are obsolete full-text search'); INSERT INTO t1 VALUES('modules for SQLite. There are known issues with'); INSERT INTO t1 VALUES('these older modules and their use should be'); INSERT INTO t1 VALUES('avoided. Portions of the original FTS3 code were'); INSERT INTO t1 VALUES('contributed to the SQLite project by Scott Hess of'); INSERT INTO t1 VALUES('Google. It is now developed and maintained as part'); INSERT INTO t1 VALUES('of SQLite. '); } fts3_tap_test 2.2 db t1 do_execsql_test 2.3 { DELETE FROM t1 WHERE docid%2; } fts3_tap_test 2.4 db t1 do_execsql_test 2.5 { INSERT INTO t1(t1) VALUES('optimize') } fts3_tap_test 2.6 db t1 do_execsql_test 3.1 { CREATE VIRTUAL TABLE t2 USING fts4(prefix=1); INSERT INTO t2 VALUES('On 12 September the wind direction turned and'); INSERT INTO t2 VALUES('William''s fleet sailed. A storm blew up and the'); INSERT INTO t2 VALUES('fleet was forced to take shelter at'); INSERT INTO t2 VALUES('Saint-Valery-sur-Somme and again wait for the wind'); INSERT INTO t2 VALUES('to change. On 27 September the Norman fleet'); INSERT INTO t2 VALUES('finally set sail, landing in England at Pevensey'); INSERT INTO t2 VALUES('Bay (Sussex) on 28 September. William then moved'); INSERT INTO t2 VALUES('to Hastings, a few miles to the east, where he'); INSERT INTO t2 VALUES('built a prefabricated wooden castle for a base of'); INSERT INTO t2 VALUES('operations. From there, he ravaged the hinterland'); INSERT INTO t2 VALUES('and waited for Harold''s return from the north.'); INSERT INTO t2 VALUES('On 12 September the wind direction turned and'); INSERT INTO t2 VALUES('William''s fleet sailed. A storm blew up and the'); INSERT INTO t2 VALUES('fleet was forced to take shelter at'); INSERT INTO t2 VALUES('Saint-Valery-sur-Somme and again wait for the wind'); INSERT INTO t2 VALUES('to change. On 27 September the Norman fleet'); INSERT INTO t2 VALUES('finally set sail, landing in England at Pevensey'); INSERT INTO t2 VALUES('Bay (Sussex) on 28 September. William then moved'); INSERT INTO t2 VALUES('to Hastings, a few miles to the east, where he'); INSERT INTO t2 VALUES('built a prefabricated wooden castle for a base of'); INSERT INTO t2 VALUES('operations. From there, he ravaged the hinterland'); INSERT INTO t2 VALUES('and waited for Harold''s return from the north.'); } fts3_tap_test 3.2 db t2 do_execsql_test 3.3 { SELECT optimize(t2) FROM t2 LIMIT 1 } {{Index optimized}} fts3_tap_test 3.4 db t2 #------------------------------------------------------------------------- # Simple tests for reading the prefix-index. # do_execsql_test 4.1 { CREATE VIRTUAL TABLE t3 USING fts4(prefix=1); INSERT INTO t3 VALUES('one two three'); INSERT INTO t3 VALUES('four five six'); INSERT INTO t3 VALUES('seven eight nine'); } do_execsql_test 4.2 { SELECT * FROM t3 WHERE t3 MATCH 'f*' } {{four five six}} do_execsql_test 4.3 { SELECT * FROM t3 WHERE t3 MATCH 'four*' } {{four five six}} do_execsql_test 4.4 { SELECT * FROM t3 WHERE t3 MATCH 's*' } {{four five six} {seven eight nine}} finish_test |