Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Add the "offsets=0" option to fts5, to create a smaller index without term offset information. A few things are currently broken on this branch. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts5-offsets |
Files: | files | file ages | folders |
SHA1: |
40b5bbf02a824ca73b33aa4ae1c7d5f6 |
User & Date: | dan 2015-12-17 20:36:13.853 |
Context
2015-12-18
| ||
19:07 | Fix a problem with prefix queries on fts5 offsets=0 tables. (check-in: ad0987d83c user: dan tags: fts5-offsets) | |
2015-12-17
| ||
20:36 | Add the "offsets=0" option to fts5, to create a smaller index without term offset information. A few things are currently broken on this branch. (check-in: 40b5bbf02a user: dan tags: fts5-offsets) | |
14:18 | Fix the spellfix1_scriptcode() function to ignore whitespace and punctuation, and to recognize hebrew and arabic scripts. (check-in: 7adfa4a579 user: drh tags: trunk) | |
Changes
Changes to ext/fts5/fts5Int.h.
︙ | ︙ | |||
147 148 149 150 151 152 153 154 155 156 157 158 159 160 | u8 *abUnindexed; /* True for unindexed columns */ int nPrefix; /* Number of prefix indexes */ int *aPrefix; /* Sizes in bytes of nPrefix prefix indexes */ int eContent; /* An FTS5_CONTENT value */ char *zContent; /* content table */ char *zContentRowid; /* "content_rowid=" option value */ int bColumnsize; /* "columnsize=" option value (dflt==1) */ char *zContentExprlist; Fts5Tokenizer *pTok; fts5_tokenizer *pTokApi; /* Values loaded from the %_config table */ int iCookie; /* Incremented when %_config is modified */ int pgsz; /* Approximate page size used in %_data */ | > | 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 | u8 *abUnindexed; /* True for unindexed columns */ int nPrefix; /* Number of prefix indexes */ int *aPrefix; /* Sizes in bytes of nPrefix prefix indexes */ int eContent; /* An FTS5_CONTENT value */ char *zContent; /* content table */ char *zContentRowid; /* "content_rowid=" option value */ int bColumnsize; /* "columnsize=" option value (dflt==1) */ int bOffsets; /* "offsets=" option value (dflt==1) */ char *zContentExprlist; Fts5Tokenizer *pTok; fts5_tokenizer *pTokApi; /* Values loaded from the %_config table */ int iCookie; /* Incremented when %_config is modified */ int pgsz; /* Approximate page size used in %_data */ |
︙ | ︙ | |||
288 289 290 291 292 293 294 295 296 297 298 299 300 301 | /* Malloc utility */ void *sqlite3Fts5MallocZero(int *pRc, int nByte); char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn); /* Character set tests (like isspace(), isalpha() etc.) */ int sqlite3Fts5IsBareword(char t); /* ** End of interface to code in fts5_buffer.c. **************************************************************************/ /************************************************************************** ** Interface to code in fts5_index.c. fts5_index.c contains contains code ** to access the data stored in the %_data table. | > > > > > > > | 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 | /* Malloc utility */ void *sqlite3Fts5MallocZero(int *pRc, int nByte); char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn); /* Character set tests (like isspace(), isalpha() etc.) */ int sqlite3Fts5IsBareword(char t); /* Bucket of terms object used by the integrity-check in offsets=0 mode. */ typedef struct Fts5Termset Fts5Termset; int sqlite3Fts5TermsetNew(Fts5Termset**); int sqlite3Fts5TermsetAdd(Fts5Termset*, const char*, int, int *pbPresent); void sqlite3Fts5TermsetFree(Fts5Termset*); /* ** End of interface to code in fts5_buffer.c. **************************************************************************/ /************************************************************************** ** Interface to code in fts5_index.c. fts5_index.c contains contains code ** to access the data stored in the %_data table. |
︙ | ︙ | |||
488 489 490 491 492 493 494 | ** Interface to code in fts5_hash.c. */ typedef struct Fts5Hash Fts5Hash; /* ** Create a hash table, free a hash table. */ | | | 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 | ** Interface to code in fts5_hash.c. */ typedef struct Fts5Hash Fts5Hash; /* ** Create a hash table, free a hash table. */ int sqlite3Fts5HashNew(Fts5Config*, Fts5Hash**, int *pnSize); void sqlite3Fts5HashFree(Fts5Hash*); int sqlite3Fts5HashWrite( Fts5Hash*, i64 iRowid, /* Rowid for this entry */ int iCol, /* Column token appears in (-ve -> delete) */ int iPos, /* Position of token within column */ |
︙ | ︙ |
Changes to ext/fts5/fts5_buffer.c.
︙ | ︙ | |||
286 287 288 289 290 291 292 293 294 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50 .. 0x5F */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60 .. 0x6F */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 /* 0x70 .. 0x7F */ }; return (t & 0x80) || aBareword[(int)t]; } | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50 .. 0x5F */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60 .. 0x6F */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 /* 0x70 .. 0x7F */ }; return (t & 0x80) || aBareword[(int)t]; } /************************************************************************* */ typedef struct Fts5TermsetEntry Fts5TermsetEntry; struct Fts5TermsetEntry { char *pTerm; int nTerm; Fts5TermsetEntry *pNext; }; struct Fts5Termset { Fts5TermsetEntry *apHash[512]; }; int sqlite3Fts5TermsetNew(Fts5Termset **pp){ int rc = SQLITE_OK; *pp = sqlite3Fts5MallocZero(&rc, sizeof(Fts5Termset)); return rc; } int sqlite3Fts5TermsetAdd( Fts5Termset *p, const char *pTerm, int nTerm, int *pbPresent ){ int rc = SQLITE_OK; int i; int hash = 13; Fts5TermsetEntry *pEntry; /* Calculate a hash value for this term */ for(i=0; i<nTerm; i++){ hash += (hash << 3) + (int)pTerm[i]; } hash = hash % ArraySize(p->apHash); *pbPresent = 0; for(pEntry=p->apHash[hash]; pEntry; pEntry=pEntry->pNext){ if( pEntry->nTerm==nTerm && memcmp(pEntry->pTerm, pTerm, nTerm)==0 ){ *pbPresent = 1; break; } } if( pEntry==0 ){ pEntry = sqlite3Fts5MallocZero(&rc, sizeof(Fts5TermsetEntry) + nTerm); if( pEntry ){ pEntry->pTerm = (char*)&pEntry[1]; pEntry->nTerm = nTerm; memcpy(pEntry->pTerm, pTerm, nTerm); pEntry->pNext = p->apHash[hash]; p->apHash[hash] = pEntry; } } return rc; } void sqlite3Fts5TermsetFree(Fts5Termset *p){ if( p ){ int i; for(i=0; i<ArraySize(p->apHash); i++){ Fts5TermsetEntry *pEntry = p->apHash[i]; while( pEntry ){ Fts5TermsetEntry *pDel = pEntry; pEntry = pEntry->pNext; sqlite3_free(pDel); } } sqlite3_free(p); } } |
Changes to ext/fts5/fts5_config.c.
︙ | ︙ | |||
10 11 12 13 14 15 16 | ** ****************************************************************************** ** ** This is an SQLite module implementing full-text search. */ | < | 10 11 12 13 14 15 16 17 18 19 20 21 22 23 | ** ****************************************************************************** ** ** This is an SQLite module implementing full-text search. */ #include "fts5Int.h" #define FTS5_DEFAULT_PAGE_SIZE 4050 #define FTS5_DEFAULT_AUTOMERGE 4 #define FTS5_DEFAULT_CRISISMERGE 16 #define FTS5_DEFAULT_HASHSIZE (1024*1024) |
︙ | ︙ | |||
340 341 342 343 344 345 346 347 348 349 350 351 352 353 | *pzErr = sqlite3_mprintf("malformed columnsize=... directive"); rc = SQLITE_ERROR; }else{ pConfig->bColumnsize = (zArg[0]=='1'); } return rc; } *pzErr = sqlite3_mprintf("unrecognized option: \"%.*s\"", nCmd, zCmd); return SQLITE_ERROR; } /* ** Allocate an instance of the default tokenizer ("simple") at | > > > > > > > > > > | 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 | *pzErr = sqlite3_mprintf("malformed columnsize=... directive"); rc = SQLITE_ERROR; }else{ pConfig->bColumnsize = (zArg[0]=='1'); } return rc; } if( sqlite3_strnicmp("offsets", zCmd, nCmd)==0 ){ if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){ *pzErr = sqlite3_mprintf("malformed offsets=... directive"); rc = SQLITE_ERROR; }else{ pConfig->bOffsets = (zArg[0]=='1'); } return rc; } *pzErr = sqlite3_mprintf("unrecognized option: \"%.*s\"", nCmd, zCmd); return SQLITE_ERROR; } /* ** Allocate an instance of the default tokenizer ("simple") at |
︙ | ︙ | |||
496 497 498 499 500 501 502 503 504 505 506 507 508 509 | nByte = nArg * (sizeof(char*) + sizeof(u8)); pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, nByte); pRet->abUnindexed = (u8*)&pRet->azCol[nArg]; pRet->zDb = sqlite3Fts5Strndup(&rc, azArg[1], -1); pRet->zName = sqlite3Fts5Strndup(&rc, azArg[2], -1); pRet->bColumnsize = 1; #ifdef SQLITE_DEBUG pRet->bPrefixIndex = 1; #endif if( rc==SQLITE_OK && sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){ *pzErr = sqlite3_mprintf("reserved fts5 table name: %s", pRet->zName); rc = SQLITE_ERROR; } | > | 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 | nByte = nArg * (sizeof(char*) + sizeof(u8)); pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, nByte); pRet->abUnindexed = (u8*)&pRet->azCol[nArg]; pRet->zDb = sqlite3Fts5Strndup(&rc, azArg[1], -1); pRet->zName = sqlite3Fts5Strndup(&rc, azArg[2], -1); pRet->bColumnsize = 1; pRet->bOffsets = 1; #ifdef SQLITE_DEBUG pRet->bPrefixIndex = 1; #endif if( rc==SQLITE_OK && sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){ *pzErr = sqlite3_mprintf("reserved fts5 table name: %s", pRet->zName); rc = SQLITE_ERROR; } |
︙ | ︙ |
Changes to ext/fts5/fts5_hash.c.
︙ | ︙ | |||
22 23 24 25 26 27 28 29 30 31 32 33 34 35 | ** This file contains the implementation of an in-memory hash table used ** to accumuluate "term -> doclist" content before it is flused to a level-0 ** segment. */ struct Fts5Hash { int *pnByte; /* Pointer to bytes counter */ int nEntry; /* Number of entries currently in hash */ int nSlot; /* Size of aSlot[] array */ Fts5HashEntry *pScan; /* Current ordered scan item */ Fts5HashEntry **aSlot; /* Array of hash slots */ }; | > | 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 | ** This file contains the implementation of an in-memory hash table used ** to accumuluate "term -> doclist" content before it is flused to a level-0 ** segment. */ struct Fts5Hash { int bOffsets; /* Copy of Fts5Config.bOffsets */ int *pnByte; /* Pointer to bytes counter */ int nEntry; /* Number of entries currently in hash */ int nSlot; /* Size of aSlot[] array */ Fts5HashEntry *pScan; /* Current ordered scan item */ Fts5HashEntry **aSlot; /* Array of hash slots */ }; |
︙ | ︙ | |||
75 76 77 78 79 80 81 | #define FTS5_HASHENTRYSIZE (sizeof(Fts5HashEntry)-8) /* ** Allocate a new hash table. */ | | > | 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 | #define FTS5_HASHENTRYSIZE (sizeof(Fts5HashEntry)-8) /* ** Allocate a new hash table. */ int sqlite3Fts5HashNew(Fts5Config *pConfig, Fts5Hash **ppNew, int *pnByte){ int rc = SQLITE_OK; Fts5Hash *pNew; *ppNew = pNew = (Fts5Hash*)sqlite3_malloc(sizeof(Fts5Hash)); if( pNew==0 ){ rc = SQLITE_NOMEM; }else{ int nByte; memset(pNew, 0, sizeof(Fts5Hash)); pNew->pnByte = pnByte; pNew->bOffsets = pConfig->bOffsets; pNew->nSlot = 1024; nByte = sizeof(Fts5HashEntry*) * pNew->nSlot; pNew->aSlot = (Fts5HashEntry**)sqlite3_malloc(nByte); if( pNew->aSlot==0 ){ sqlite3_free(pNew); *ppNew = 0; |
︙ | ︙ | |||
210 211 212 213 214 215 216 217 218 219 220 221 222 223 | char bByte, /* First byte of token */ const char *pToken, int nToken /* Token to add or remove to or from index */ ){ unsigned int iHash; Fts5HashEntry *p; u8 *pPtr; int nIncr = 0; /* Amount to increment (*pHash->pnByte) by */ /* Attempt to locate an existing hash entry */ iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken); for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){ if( p->zKey[0]==bByte && memcmp(&p->zKey[1], pToken, nToken)==0 && p->zKey[nToken+1]==0 | > | 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 | char bByte, /* First byte of token */ const char *pToken, int nToken /* Token to add or remove to or from index */ ){ unsigned int iHash; Fts5HashEntry *p; u8 *pPtr; int nIncr = 0; /* Amount to increment (*pHash->pnByte) by */ int bNew = pHash->bOffsets; /* If non-delete entry should be written */ /* Attempt to locate an existing hash entry */ iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken); for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){ if( p->zKey[0]==bByte && memcmp(&p->zKey[1], pToken, nToken)==0 && p->zKey[nToken+1]==0 |
︙ | ︙ | |||
246 247 248 249 250 251 252 253 254 255 256 257 258 259 | assert( iHash==fts5HashKey(pHash->nSlot, (u8*)p->zKey, nToken+1) ); p->zKey[nToken+1] = '\0'; p->nData = nToken+1 + 1 + FTS5_HASHENTRYSIZE; p->nData += sqlite3Fts5PutVarint(&((u8*)p)[p->nData], iRowid); p->iSzPoslist = p->nData; p->nData += 1; p->iRowid = iRowid; p->pHashNext = pHash->aSlot[iHash]; pHash->aSlot[iHash] = p; pHash->nEntry++; nIncr += p->nData; } /* Check there is enough space to append a new entry. Worst case scenario | > | 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 | assert( iHash==fts5HashKey(pHash->nSlot, (u8*)p->zKey, nToken+1) ); p->zKey[nToken+1] = '\0'; p->nData = nToken+1 + 1 + FTS5_HASHENTRYSIZE; p->nData += sqlite3Fts5PutVarint(&((u8*)p)[p->nData], iRowid); p->iSzPoslist = p->nData; p->nData += 1; p->iRowid = iRowid; p->iCol = (pHash->bOffsets-1); p->pHashNext = pHash->aSlot[iHash]; pHash->aSlot[iHash] = p; pHash->nEntry++; nIncr += p->nData; } /* Check there is enough space to append a new entry. Worst case scenario |
︙ | ︙ | |||
282 283 284 285 286 287 288 | /* If this is a new rowid, append the 4-byte size field for the previous ** entry, and the new rowid for this entry. */ if( iRowid!=p->iRowid ){ fts5HashAddPoslistSize(p); p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iRowid - p->iRowid); p->iSzPoslist = p->nData; p->nData += 1; | | > > > > > | | | | | | > | > | | > | 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 | /* If this is a new rowid, append the 4-byte size field for the previous ** entry, and the new rowid for this entry. */ if( iRowid!=p->iRowid ){ fts5HashAddPoslistSize(p); p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iRowid - p->iRowid); p->iSzPoslist = p->nData; p->nData += 1; p->iCol = (pHash->bOffsets-1); p->iPos = 0; p->iRowid = iRowid; bNew = 1; } if( iCol>=0 ){ /* Append a new column value, if necessary */ assert( iCol>=p->iCol ); if( iCol!=p->iCol ){ if( pHash->bOffsets==0 ){ bNew = 1; p->iCol = iPos = iCol; }else{ pPtr[p->nData++] = 0x01; p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iCol); p->iCol = iCol; p->iPos = 0; } } /* Append the new position offset, if necessary */ if( bNew ){ p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iPos - p->iPos + 2); p->iPos = iPos; } }else{ /* This is a delete. Set the delete flag. */ p->bDel = 1; } nIncr += p->nData; *pHash->pnByte += nIncr; |
︙ | ︙ |
Changes to ext/fts5/fts5_index.c.
︙ | ︙ | |||
3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 | typedef struct PoslistCallbackCtx PoslistCallbackCtx; struct PoslistCallbackCtx { Fts5Buffer *pBuf; /* Append to this buffer */ Fts5Colset *pColset; /* Restrict matches to this column */ int eState; /* See above */ }; /* ** TODO: Make this more efficient! */ static int fts5IndexColsetTest(Fts5Colset *pColset, int iCol){ int i; for(i=0; i<pColset->nCol; i++){ if( pColset->aiCol[i]==iCol ) return 1; } return 0; } static void fts5PoslistFilterCallback( Fts5Index *p, void *pContext, const u8 *pChunk, int nChunk ){ PoslistCallbackCtx *pCtx = (PoslistCallbackCtx*)pContext; | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 | typedef struct PoslistCallbackCtx PoslistCallbackCtx; struct PoslistCallbackCtx { Fts5Buffer *pBuf; /* Append to this buffer */ Fts5Colset *pColset; /* Restrict matches to this column */ int eState; /* See above */ }; typedef struct PoslistOffsetsCtx PoslistOffsetsCtx; struct PoslistOffsetsCtx { Fts5Buffer *pBuf; /* Append to this buffer */ Fts5Colset *pColset; /* Restrict matches to this column */ int iRead; int iWrite; }; /* ** TODO: Make this more efficient! */ static int fts5IndexColsetTest(Fts5Colset *pColset, int iCol){ int i; for(i=0; i<pColset->nCol; i++){ if( pColset->aiCol[i]==iCol ) return 1; } return 0; } static void fts5PoslistOffsetsCallback( Fts5Index *p, void *pContext, const u8 *pChunk, int nChunk ){ PoslistOffsetsCtx *pCtx = (PoslistOffsetsCtx*)pContext; assert_nc( nChunk>=0 ); if( nChunk>0 ){ int i = 0; while( i<nChunk ){ int iVal; i += fts5GetVarint32(&pChunk[i], iVal); iVal += pCtx->iRead - 2; pCtx->iRead = iVal; if( fts5IndexColsetTest(pCtx->pColset, iVal) ){ fts5BufferSafeAppendVarint(pCtx->pBuf, iVal + 2 - pCtx->iWrite); pCtx->iWrite = iVal; } } } } static void fts5PoslistFilterCallback( Fts5Index *p, void *pContext, const u8 *pChunk, int nChunk ){ PoslistCallbackCtx *pCtx = (PoslistCallbackCtx*)pContext; |
︙ | ︙ | |||
4075 4076 4077 4078 4079 4080 4081 | Fts5Colset *pColset, Fts5Buffer *pBuf ){ if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos) ){ if( pColset==0 ){ fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback); }else{ | > > > > > > > | | | < | > | > | 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 | Fts5Colset *pColset, Fts5Buffer *pBuf ){ if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos) ){ if( pColset==0 ){ fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback); }else{ if( p->pConfig->bOffsets==0 ){ PoslistOffsetsCtx sCtx; memset(&sCtx, 0, sizeof(sCtx)); sCtx.pBuf = pBuf; sCtx.pColset = pColset; fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistOffsetsCallback); }else{ PoslistCallbackCtx sCtx; sCtx.pBuf = pBuf; sCtx.pColset = pColset; assert( sCtx.eState==0 || sCtx.eState==1 ); sCtx.eState = fts5IndexColsetTest(pColset, 0); fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback); } } } } /* ** IN/OUT parameter (*pa) points to a position list n bytes in size. If ** the position list contains entries for column iCol, then (*pa) is set |
︙ | ︙ | |||
4442 4443 4444 4445 4446 4447 4448 | ** to the document with rowid iRowid. */ int sqlite3Fts5IndexBeginWrite(Fts5Index *p, int bDelete, i64 iRowid){ assert( p->rc==SQLITE_OK ); /* Allocate the hash table if it has not already been allocated */ if( p->pHash==0 ){ | | | 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 | ** to the document with rowid iRowid. */ int sqlite3Fts5IndexBeginWrite(Fts5Index *p, int bDelete, i64 iRowid){ assert( p->rc==SQLITE_OK ); /* Allocate the hash table if it has not already been allocated */ if( p->pHash==0 ){ p->rc = sqlite3Fts5HashNew(p->pConfig, &p->pHash, &p->nPendingData); } /* Flush the hash table to disk if required */ if( iRowid<p->iWriteRowid || (iRowid==p->iWriteRowid && p->bDelete==0) || (p->nPendingData > p->pConfig->nHashSize) ){ |
︙ | ︙ | |||
4800 4801 4802 4803 4804 4805 4806 | const u8 **pp, /* OUT: Pointer to position-list data */ int *pn, /* OUT: Size of position-list in bytes */ i64 *piRowid /* OUT: Current rowid */ ){ Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; assert( pIter->pIndex->rc==SQLITE_OK ); *piRowid = pSeg->iRowid; | > | > | 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 | const u8 **pp, /* OUT: Pointer to position-list data */ int *pn, /* OUT: Size of position-list in bytes */ i64 *piRowid /* OUT: Current rowid */ ){ Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; assert( pIter->pIndex->rc==SQLITE_OK ); *piRowid = pSeg->iRowid; if( pIter->pIndex->pConfig->bOffsets && pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){ u8 *pPos = &pSeg->pLeaf->p[pSeg->iLeafOffset]; if( pColset==0 || pIter->bFiltered ){ *pn = pSeg->nPos; *pp = pPos; }else if( pColset->nCol==1 ){ *pp = pPos; *pn = fts5IndexExtractCol(pp, pSeg->nPos, pColset->aiCol[0]); |
︙ | ︙ |
Changes to ext/fts5/fts5_storage.c.
︙ | ︙ | |||
821 822 823 824 825 826 827 828 829 830 831 832 833 834 | */ typedef struct Fts5IntegrityCtx Fts5IntegrityCtx; struct Fts5IntegrityCtx { i64 iRowid; int iCol; int szCol; u64 cksum; Fts5Config *pConfig; }; /* ** Tokenization callback used by integrity check. */ static int fts5StorageIntegrityCallback( | > | > > > > > > | > > > > > | | > | | 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 | */ typedef struct Fts5IntegrityCtx Fts5IntegrityCtx; struct Fts5IntegrityCtx { i64 iRowid; int iCol; int szCol; u64 cksum; Fts5Termset *pTermset; Fts5Config *pConfig; }; /* ** Tokenization callback used by integrity check. */ static int fts5StorageIntegrityCallback( void *pContext, /* Pointer to Fts5IntegrityCtx object */ int tflags, const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ int iEnd /* End offset of token */ ){ int rc = SQLITE_OK; Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext; if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){ pCtx->szCol++; } if( pCtx->pTermset ){ int bPresent = 0; rc = sqlite3Fts5TermsetAdd(pCtx->pTermset, pToken, nToken, &bPresent); if( rc==SQLITE_OK && bPresent==0 ){ pCtx->cksum ^= sqlite3Fts5IndexCksum( pCtx->pConfig, pCtx->iRowid, 0, pCtx->iCol, pToken, nToken ); } }else{ pCtx->cksum ^= sqlite3Fts5IndexCksum( pCtx->pConfig, pCtx->iRowid, pCtx->iCol, pCtx->szCol-1, pToken, nToken ); } return rc; } /* ** Check that the contents of the FTS index match that of the %_content ** table. Return SQLITE_OK if they do, or SQLITE_CORRUPT if not. Return ** some other SQLite error code if an error occurs while attempting to ** determine this. |
︙ | ︙ | |||
882 883 884 885 886 887 888 | if( pConfig->bColumnsize ){ rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize); } for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){ if( pConfig->abUnindexed[i] ) continue; ctx.iCol = i; ctx.szCol = 0; | > > > > | | | | | | | > | > > | 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 | if( pConfig->bColumnsize ){ rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize); } for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){ if( pConfig->abUnindexed[i] ) continue; ctx.iCol = i; ctx.szCol = 0; if( pConfig->bOffsets==0 ){ rc = sqlite3Fts5TermsetNew(&ctx.pTermset); } if( rc==SQLITE_OK ){ rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT, (const char*)sqlite3_column_text(pScan, i+1), sqlite3_column_bytes(pScan, i+1), (void*)&ctx, fts5StorageIntegrityCallback ); } if( rc==SQLITE_OK && pConfig->bColumnsize && ctx.szCol!=aColSize[i] ){ rc = FTS5_CORRUPT; } aTotalSize[i] += ctx.szCol; sqlite3Fts5TermsetFree(ctx.pTermset); ctx.pTermset = 0; } if( rc!=SQLITE_OK ) break; } rc2 = sqlite3_reset(pScan); if( rc==SQLITE_OK ) rc = rc2; } |
︙ | ︙ |
Added ext/fts5/test/fts5offsets.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | # 2015 December 18 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5offsets # If SQLITE_ENABLE_FTS5 is not defined, omit this file. ifcapable !fts5 { finish_test return } do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(a, b, c, offsets=0); INSERT INTO t1 VALUES('h d g', 'j b b g b', 'i e i d h g g'); -- 1 INSERT INTO t1 VALUES('h j d', 'j h d a h', 'f d d g g f b'); -- 2 INSERT INTO t1 VALUES('j c i', 'f f h e f', 'c j i j c h f'); -- 3 INSERT INTO t1 VALUES('e g g', 'g e d h i', 'e d b e g d c'); -- 4 INSERT INTO t1 VALUES('b c c', 'd i h a f', 'd i j f a b c'); -- 5 INSERT INTO t1 VALUES('e d e', 'b c j g d', 'a i f d h b d'); -- 6 INSERT INTO t1 VALUES('g h e', 'b c d i d', 'e f c i f i c'); -- 7 INSERT INTO t1 VALUES('c f j', 'j j i e a', 'h a c f d h e'); -- 8 INSERT INTO t1 VALUES('a h i', 'c i a f a', 'c f d h g d g'); -- 9 INSERT INTO t1 VALUES('j g g', 'e f e f f', 'h j b i c g e'); -- 10 } do_execsql_test 1.1 { INSERT INTO t1(t1) VALUES('integrity-check'); } foreach {tn match res} { 1 "a:a" {9} 2 "b:g" {1 4 6} 3 "c:h" {1 3 6 8 9 10} } { do_execsql_test 1.2.$tn.1 { SELECT rowid FROM t1($match); } $res do_execsql_test 1.2.$tn.2 { SELECT rowid FROM t1($match || '*'); } $res } finish_test |