Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Instead of the 4-byte fields, use regular varints for the poslist-size field in fts5_hash.c. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts5 |
Files: | files | file ages | folders |
SHA1: |
7eb022d7e5fdb180af823c82c47c938e |
User & Date: | dan 2015-02-25 19:24:37.378 |
Context
2015-02-26
| ||
14:54 | Fix an fts5 bug in large incremental merges. (check-in: 208e3cb6b6 user: dan tags: fts5) | |
2015-02-25
| ||
19:24 | Instead of the 4-byte fields, use regular varints for the poslist-size field in fts5_hash.c. (check-in: 7eb022d7e5 user: dan tags: fts5) | |
2015-02-02
| ||
11:58 | Ensure generated header file fts5parse.h is included in sqlite3.c. (check-in: bc7be2fcfd user: dan tags: fts5) | |
Changes
Changes to ext/fts5/fts5Int.h.
︙ | ︙ | |||
347 348 349 350 351 352 353 354 355 356 357 358 359 360 | int sqlite3Fts5IndexReinit(Fts5Index *p); int sqlite3Fts5IndexOptimize(Fts5Index *p); int sqlite3Fts5IndexLoadConfig(Fts5Index *p); int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v); #define fts5GetVarint32(a,b) sqlite3Fts5GetVarint32(a,(u32*)&b) /* ** End of interface to code in fts5_index.c. **************************************************************************/ /************************************************************************** ** Interface to code in fts5_hash.c. | > > | 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 | int sqlite3Fts5IndexReinit(Fts5Index *p); int sqlite3Fts5IndexOptimize(Fts5Index *p); int sqlite3Fts5IndexLoadConfig(Fts5Index *p); int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v); #define fts5GetVarint32(a,b) sqlite3Fts5GetVarint32(a,(u32*)&b) int sqlite3Fts5GetVarintLen(u32 iVal); /* ** End of interface to code in fts5_index.c. **************************************************************************/ /************************************************************************** ** Interface to code in fts5_hash.c. |
︙ | ︙ |
Changes to ext/fts5/fts5_hash.c.
︙ | ︙ | |||
179 180 181 182 183 184 185 186 187 188 189 190 191 192 | } sqlite3_free(apOld); pHash->nSlot = nNew; pHash->aSlot = apNew; return SQLITE_OK; } int sqlite3Fts5HashWrite( Fts5Hash *pHash, i64 iRowid, /* Rowid for this entry */ int iCol, /* Column token appears in (-ve -> delete) */ int iPos, /* Position of token within column */ const char *pToken, int nToken /* Token to add or remove to or from index */ | > > > > > > > > > > > > > > > > > | 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 | } sqlite3_free(apOld); pHash->nSlot = nNew; pHash->aSlot = apNew; return SQLITE_OK; } static void fts5HashAddPoslistSize(Fts5HashEntry *p){ if( p->iSzPoslist ){ u8 *pPtr = (u8*)p; int nSz = p->nData - p->iSzPoslist - 1; if( nSz<=127 ){ pPtr[p->iSzPoslist] = nSz; }else{ int nByte = sqlite3Fts5GetVarintLen((u32)nSz); memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz); sqlite3PutVarint(&pPtr[p->iSzPoslist], nSz); p->nData += (nByte-1); } p->iSzPoslist = 0; } } int sqlite3Fts5HashWrite( Fts5Hash *pHash, i64 iRowid, /* Rowid for this entry */ int iCol, /* Column token appears in (-ve -> delete) */ int iPos, /* Position of token within column */ const char *pToken, int nToken /* Token to add or remove to or from index */ |
︙ | ︙ | |||
217 218 219 220 221 222 223 | memset(p, 0, sizeof(Fts5HashEntry)); p->nAlloc = nByte; memcpy(p->zKey, pToken, nToken); p->zKey[nToken] = '\0'; p->nData = nToken + 1 + sizeof(Fts5HashEntry); p->nData += sqlite3PutVarint(&((u8*)p)[p->nData], iRowid); p->iSzPoslist = p->nData; | | | | 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 | memset(p, 0, sizeof(Fts5HashEntry)); p->nAlloc = nByte; memcpy(p->zKey, pToken, nToken); p->zKey[nToken] = '\0'; p->nData = nToken + 1 + sizeof(Fts5HashEntry); p->nData += sqlite3PutVarint(&((u8*)p)[p->nData], iRowid); p->iSzPoslist = p->nData; p->nData += 1; p->iRowid = iRowid; p->pHashNext = pHash->aSlot[iHash]; pHash->aSlot[iHash] = p; pHash->nEntry++; nIncr += p->nData; } /* Check there is enough space to append a new entry. Worst case scenario ** is: ** ** + 9 bytes for a new rowid, ** + 4 byte reserved for the "poslist size" varint. ** + 1 byte for a "new column" byte, ** + 3 bytes for a new column number (16-bit max) as a varint, ** + 5 bytes for the new position offset (32-bit max). */ if( (p->nAlloc - p->nData) < (9 + 4 + 1 + 3 + 5) ){ int nNew = p->nAlloc * 2; Fts5HashEntry *pNew; |
︙ | ︙ | |||
251 252 253 254 255 256 257 | } pPtr = (u8*)p; nIncr -= p->nData; /* If this is a new rowid, append the 4-byte size field for the previous ** entry, and the new rowid for this entry. */ if( iRowid!=p->iRowid ){ | | < | | 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 | } pPtr = (u8*)p; nIncr -= p->nData; /* If this is a new rowid, append the 4-byte size field for the previous ** entry, and the new rowid for this entry. */ if( iRowid!=p->iRowid ){ fts5HashAddPoslistSize(p); p->nData += sqlite3PutVarint(&pPtr[p->nData], iRowid - p->iRowid); p->iSzPoslist = p->nData; p->nData += 1; p->iCol = 0; p->iPos = 0; p->iRowid = iRowid; } if( iCol>=0 ){ /* Append a new column value, if necessary */ |
︙ | ︙ | |||
389 390 391 392 393 394 395 | rc = fts5HashEntrySort(pHash, 0, 0, &pList); if( rc==SQLITE_OK ){ memset(pHash->aSlot, 0, sizeof(Fts5HashEntry*) * pHash->nSlot); while( pList ){ Fts5HashEntry *pNext = pList->pScanNext; if( rc==SQLITE_OK ){ | < | | < | | | 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 | rc = fts5HashEntrySort(pHash, 0, 0, &pList); if( rc==SQLITE_OK ){ memset(pHash->aSlot, 0, sizeof(Fts5HashEntry*) * pHash->nSlot); while( pList ){ Fts5HashEntry *pNext = pList->pScanNext; if( rc==SQLITE_OK ){ const int nKey = strlen(pList->zKey); i64 iRowid = 0; u8 *pPtr = (u8*)pList; int iOff = sizeof(Fts5HashEntry) + nKey + 1; /* Fill in the final poslist size field */ fts5HashAddPoslistSize(pList); /* Issue the new-term callback */ rc = xTerm(pCtx, pList->zKey, nKey); /* Issue the xEntry callbacks */ while( rc==SQLITE_OK && iOff<pList->nData ){ i64 iDelta; /* Rowid delta value */ int nPoslist; /* Size of position list in bytes */ int nVarint; iOff += getVarint(&pPtr[iOff], (u64*)&iDelta); iRowid += iDelta; nVarint = fts5GetVarint32(&pPtr[iOff], nPoslist); rc = xEntry(pCtx, iRowid, &pPtr[iOff], nPoslist+nVarint); iOff += nVarint+nPoslist; } /* Issue the term-done callback */ if( rc==SQLITE_OK ) rc = xTermDone(pCtx); } sqlite3_free(pList); pList = pNext; |
︙ | ︙ | |||
441 442 443 444 445 446 447 | Fts5HashEntry *p; for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){ if( memcmp(p->zKey, pTerm, nTerm)==0 && p->zKey[nTerm]==0 ) break; } if( p ){ | < | | 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 | Fts5HashEntry *p; for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){ if( memcmp(p->zKey, pTerm, nTerm)==0 && p->zKey[nTerm]==0 ) break; } if( p ){ fts5HashAddPoslistSize(p); *ppDoclist = &p->zKey[nTerm+1]; *pnDoclist = p->nData - (sizeof(*p) + nTerm + 1); }else{ *ppDoclist = 0; *pnDoclist = 0; } |
︙ | ︙ | |||
478 479 480 481 482 483 484 | Fts5Hash *pHash, const char **pzTerm, /* OUT: term (nul-terminated) */ const char **ppDoclist, /* OUT: pointer to doclist */ int *pnDoclist /* OUT: size of doclist in bytes */ ){ Fts5HashEntry *p; if( (p = pHash->pScan) ){ | < | | 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 | Fts5Hash *pHash, const char **pzTerm, /* OUT: term (nul-terminated) */ const char **ppDoclist, /* OUT: pointer to doclist */ int *pnDoclist /* OUT: size of doclist in bytes */ ){ Fts5HashEntry *p; if( (p = pHash->pScan) ){ int nTerm = strlen(p->zKey); fts5HashAddPoslistSize(p); *pzTerm = p->zKey; *ppDoclist = &p->zKey[nTerm+1]; *pnDoclist = p->nData - (sizeof(*p) + nTerm + 1); }else{ *pzTerm = 0; *ppDoclist = 0; *pnDoclist = 0; } } |
Changes to ext/fts5/fts5_index.c.
︙ | ︙ | |||
367 368 369 370 371 372 373 | /* ** An object of type Fts5SegWriter is used to write to segments. */ struct Fts5PageWriter { int pgno; /* Page number for this page */ Fts5Buffer buf; /* Buffer containing page data */ | | | 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 | /* ** An object of type Fts5SegWriter is used to write to segments. */ struct Fts5PageWriter { int pgno; /* Page number for this page */ Fts5Buffer buf; /* Buffer containing page data */ Fts5Buffer term; /* Buffer containing previous term on page */ }; struct Fts5SegWriter { int iIdx; /* Index to write to */ int iSegid; /* Segid to write to */ int nWriter; /* Number of entries in aWriter */ Fts5PageWriter *aWriter; /* Array of PageWriter objects */ i64 iPrevRowid; /* Previous docid written to current leaf */ |
︙ | ︙ | |||
662 663 664 665 666 667 668 669 670 671 672 673 674 675 | p -= 2; n = sqlite3GetVarint(p, &v64); *v = (u32)v64; assert( n>3 && n<=9 ); return n; } } /* ** Allocate and return a buffer at least nByte bytes in size. ** ** If an OOM error is encountered, return NULL and set the error code in ** the Fts5Index handle passed as the first argument. */ | > > > > > > > > | 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 | p -= 2; n = sqlite3GetVarint(p, &v64); *v = (u32)v64; assert( n>3 && n<=9 ); return n; } } int sqlite3Fts5GetVarintLen(u32 iVal){ if( iVal<(1 << 7 ) ) return 1; if( iVal<(1 << 14) ) return 2; if( iVal<(1 << 21) ) return 3; if( iVal<(1 << 28) ) return 4; return 5; } /* ** Allocate and return a buffer at least nByte bytes in size. ** ** If an OOM error is encountered, return NULL and set the error code in ** the Fts5Index handle passed as the first argument. */ |
︙ | ︙ |
Changes to ext/fts5/tool/loadfts5.tcl.
︙ | ︙ | |||
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 | proc usage {} { puts stderr "Usage: $::argv0 ?SWITCHES? DATABASE PATH" puts stderr "" puts stderr "Switches are:" puts stderr " -fts4 (use fts4 instead of fts5)" puts stderr " -fts5 (use fts5)" puts stderr " -porter (use porter tokenizer)" puts stderr " -limit N (load no more than N documents)" puts stderr " -automerge N (set the automerge parameter to N)" puts stderr " -crisismerge N (set the crisismerge parameter to N)" exit 1 } set O(vtab) fts5 set O(tok) "" set O(limit) 0 set O(automerge) -1 set O(crisismerge) -1 if {[llength $argv]<2} usage set nOpt [expr {[llength $argv]-2}] for {set i 0} {$i < $nOpt} {incr i} { set arg [lindex $argv $i] switch -- [lindex $argv $i] { -fts4 { set O(vtab) fts4 } -fts5 { set O(vtab) fts5 } -porter { set O(tok) ", tokenize=porter" } -limit { if { [incr i]>=$nOpt } usage set O(limit) [lindex $argv $i] } -automerge { | > > > > > > | 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | proc usage {} { puts stderr "Usage: $::argv0 ?SWITCHES? DATABASE PATH" puts stderr "" puts stderr "Switches are:" puts stderr " -fts4 (use fts4 instead of fts5)" puts stderr " -fts5 (use fts5)" puts stderr " -porter (use porter tokenizer)" puts stderr " -delete (delete the database file before starting)" puts stderr " -limit N (load no more than N documents)" puts stderr " -automerge N (set the automerge parameter to N)" puts stderr " -crisismerge N (set the crisismerge parameter to N)" exit 1 } set O(vtab) fts5 set O(tok) "" set O(limit) 0 set O(delete) 0 set O(automerge) -1 set O(crisismerge) -1 if {[llength $argv]<2} usage set nOpt [expr {[llength $argv]-2}] for {set i 0} {$i < $nOpt} {incr i} { set arg [lindex $argv $i] switch -- [lindex $argv $i] { -fts4 { set O(vtab) fts4 } -fts5 { set O(vtab) fts5 } -porter { set O(tok) ", tokenize=porter" } -delete { set O(delete) 1 } -limit { if { [incr i]>=$nOpt } usage set O(limit) [lindex $argv $i] } -automerge { |
︙ | ︙ | |||
82 83 84 85 86 87 88 | default { usage } } } | > > | | 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 | default { usage } } } set dbfile [lindex $argv end-1] if {$O(delete)} { file delete -force $dbfile } sqlite3 db $dbfile db func loadfile loadfile db transaction { catch { db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok))" } if {$O(automerge)>=0} { |
︙ | ︙ |