Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Instead of the 4-byte fields, use regular varints for the poslist-size field in fts5_hash.c. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | fts5 |
Files: | files | file ages | folders |
SHA1: |
7eb022d7e5fdb180af823c82c47c938e |
User & Date: | dan 2015-02-25 19:24:37 |
Context
2015-02-26
| ||
14:54 | Fix an fts5 bug in large incremental merges. check-in: 208e3cb6 user: dan tags: fts5 | |
2015-02-25
| ||
19:24 | Instead of the 4-byte fields, use regular varints for the poslist-size field in fts5_hash.c. check-in: 7eb022d7 user: dan tags: fts5 | |
2015-02-02
| ||
11:58 | Ensure generated header file fts5parse.h is included in sqlite3.c. check-in: bc7be2fc user: dan tags: fts5 | |
Changes
Changes to ext/fts5/fts5Int.h.
347 348 349 350 351 352 353 354 355 356 357 358 359 360 |
int sqlite3Fts5IndexReinit(Fts5Index *p); int sqlite3Fts5IndexOptimize(Fts5Index *p); int sqlite3Fts5IndexLoadConfig(Fts5Index *p); int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v); #define fts5GetVarint32(a,b) sqlite3Fts5GetVarint32(a,(u32*)&b) /* ** End of interface to code in fts5_index.c. **************************************************************************/ /************************************************************************** ** Interface to code in fts5_hash.c. |
> > |
347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 |
int sqlite3Fts5IndexReinit(Fts5Index *p);
int sqlite3Fts5IndexOptimize(Fts5Index *p);
int sqlite3Fts5IndexLoadConfig(Fts5Index *p);
int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v);
#define fts5GetVarint32(a,b) sqlite3Fts5GetVarint32(a,(u32*)&b)
int sqlite3Fts5GetVarintLen(u32 iVal);
/*
** End of interface to code in fts5_index.c.
**************************************************************************/
/**************************************************************************
** Interface to code in fts5_hash.c.
|
Changes to ext/fts5/fts5_hash.c.
179 180 181 182 183 184 185 186 187 188 189 190 191 192 ... 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 ... 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 ... 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 ... 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 ... 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 |
} sqlite3_free(apOld); pHash->nSlot = nNew; pHash->aSlot = apNew; return SQLITE_OK; } int sqlite3Fts5HashWrite( Fts5Hash *pHash, i64 iRowid, /* Rowid for this entry */ int iCol, /* Column token appears in (-ve -> delete) */ int iPos, /* Position of token within column */ const char *pToken, int nToken /* Token to add or remove to or from index */ ................................................................................ memset(p, 0, sizeof(Fts5HashEntry)); p->nAlloc = nByte; memcpy(p->zKey, pToken, nToken); p->zKey[nToken] = '\0'; p->nData = nToken + 1 + sizeof(Fts5HashEntry); p->nData += sqlite3PutVarint(&((u8*)p)[p->nData], iRowid); p->iSzPoslist = p->nData; p->nData += 4; p->iRowid = iRowid; p->pHashNext = pHash->aSlot[iHash]; pHash->aSlot[iHash] = p; pHash->nEntry++; nIncr += p->nData; } /* Check there is enough space to append a new entry. Worst case scenario ** is: ** ** + 9 bytes for a new rowid, ** + 4 bytes reserved for the "poslist size" varint. ** + 1 byte for a "new column" byte, ** + 3 bytes for a new column number (16-bit max) as a varint, ** + 5 bytes for the new position offset (32-bit max). */ if( (p->nAlloc - p->nData) < (9 + 4 + 1 + 3 + 5) ){ int nNew = p->nAlloc * 2; Fts5HashEntry *pNew; ................................................................................ } pPtr = (u8*)p; nIncr -= p->nData; /* If this is a new rowid, append the 4-byte size field for the previous ** entry, and the new rowid for this entry. */ if( iRowid!=p->iRowid ){ assert( p->iSzPoslist>0 ); fts5Put4ByteVarint(&pPtr[p->iSzPoslist], p->nData - p->iSzPoslist - 4); p->nData += sqlite3PutVarint(&pPtr[p->nData], iRowid - p->iRowid); p->iSzPoslist = p->nData; p->nData += 4; p->iCol = 0; p->iPos = 0; p->iRowid = iRowid; } if( iCol>=0 ){ /* Append a new column value, if necessary */ ................................................................................ rc = fts5HashEntrySort(pHash, 0, 0, &pList); if( rc==SQLITE_OK ){ memset(pHash->aSlot, 0, sizeof(Fts5HashEntry*) * pHash->nSlot); while( pList ){ Fts5HashEntry *pNext = pList->pScanNext; if( rc==SQLITE_OK ){ const int nSz = pList->nData - pList->iSzPoslist - 4; const int nKey = strlen(pList->zKey); i64 iRowid = 0; u8 *pPtr = (u8*)pList; int iOff = sizeof(Fts5HashEntry) + nKey + 1; /* Fill in the final poslist size field */ fts5Put4ByteVarint(&pPtr[pList->iSzPoslist], nSz); /* Issue the new-term callback */ rc = xTerm(pCtx, pList->zKey, nKey); /* Issue the xEntry callbacks */ while( rc==SQLITE_OK && iOff<pList->nData ){ i64 iDelta; /* Rowid delta value */ int nPoslist; /* Size of position list in bytes */ int nVarint; iOff += getVarint(&pPtr[iOff], (u64*)&iDelta); iRowid += iDelta; nPoslist = fts5Get4ByteVarint(&pPtr[iOff], &nVarint); iOff += 4; rc = xEntry(pCtx, iRowid, &pPtr[iOff-nVarint], nPoslist+nVarint); iOff += nPoslist; } /* Issue the term-done callback */ if( rc==SQLITE_OK ) rc = xTermDone(pCtx); } sqlite3_free(pList); pList = pNext; ................................................................................ Fts5HashEntry *p; for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){ if( memcmp(p->zKey, pTerm, nTerm)==0 && p->zKey[nTerm]==0 ) break; } if( p ){ u8 *pPtr = (u8*)p; fts5Put4ByteVarint(&pPtr[p->iSzPoslist], p->nData - p->iSzPoslist - 4); *ppDoclist = &p->zKey[nTerm+1]; *pnDoclist = p->nData - (sizeof(*p) + nTerm + 1); }else{ *ppDoclist = 0; *pnDoclist = 0; } ................................................................................ Fts5Hash *pHash, const char **pzTerm, /* OUT: term (nul-terminated) */ const char **ppDoclist, /* OUT: pointer to doclist */ int *pnDoclist /* OUT: size of doclist in bytes */ ){ Fts5HashEntry *p; if( (p = pHash->pScan) ){ u8 *pPtr = (u8*)p; int nTerm = strlen(p->zKey); fts5Put4ByteVarint(&pPtr[p->iSzPoslist], p->nData - p->iSzPoslist - 4); *pzTerm = p->zKey; *ppDoclist = &p->zKey[nTerm+1]; *pnDoclist = p->nData - (sizeof(*p) + nTerm + 1); }else{ *pzTerm = 0; *ppDoclist = 0; *pnDoclist = 0; } } |
> > > > > > > > > > > > > > > > > | | | < | < | | < | | | < < < > |
179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 ... 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 ... 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 ... 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 ... 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 ... 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 |
} sqlite3_free(apOld); pHash->nSlot = nNew; pHash->aSlot = apNew; return SQLITE_OK; } static void fts5HashAddPoslistSize(Fts5HashEntry *p){ if( p->iSzPoslist ){ u8 *pPtr = (u8*)p; int nSz = p->nData - p->iSzPoslist - 1; if( nSz<=127 ){ pPtr[p->iSzPoslist] = nSz; }else{ int nByte = sqlite3Fts5GetVarintLen((u32)nSz); memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz); sqlite3PutVarint(&pPtr[p->iSzPoslist], nSz); p->nData += (nByte-1); } p->iSzPoslist = 0; } } int sqlite3Fts5HashWrite( Fts5Hash *pHash, i64 iRowid, /* Rowid for this entry */ int iCol, /* Column token appears in (-ve -> delete) */ int iPos, /* Position of token within column */ const char *pToken, int nToken /* Token to add or remove to or from index */ ................................................................................ memset(p, 0, sizeof(Fts5HashEntry)); p->nAlloc = nByte; memcpy(p->zKey, pToken, nToken); p->zKey[nToken] = '\0'; p->nData = nToken + 1 + sizeof(Fts5HashEntry); p->nData += sqlite3PutVarint(&((u8*)p)[p->nData], iRowid); p->iSzPoslist = p->nData; p->nData += 1; p->iRowid = iRowid; p->pHashNext = pHash->aSlot[iHash]; pHash->aSlot[iHash] = p; pHash->nEntry++; nIncr += p->nData; } /* Check there is enough space to append a new entry. Worst case scenario ** is: ** ** + 9 bytes for a new rowid, ** + 4 byte reserved for the "poslist size" varint. ** + 1 byte for a "new column" byte, ** + 3 bytes for a new column number (16-bit max) as a varint, ** + 5 bytes for the new position offset (32-bit max). */ if( (p->nAlloc - p->nData) < (9 + 4 + 1 + 3 + 5) ){ int nNew = p->nAlloc * 2; Fts5HashEntry *pNew; ................................................................................ } pPtr = (u8*)p; nIncr -= p->nData; /* If this is a new rowid, append the 4-byte size field for the previous ** entry, and the new rowid for this entry. */ if( iRowid!=p->iRowid ){ fts5HashAddPoslistSize(p); p->nData += sqlite3PutVarint(&pPtr[p->nData], iRowid - p->iRowid); p->iSzPoslist = p->nData; p->nData += 1; p->iCol = 0; p->iPos = 0; p->iRowid = iRowid; } if( iCol>=0 ){ /* Append a new column value, if necessary */ ................................................................................ rc = fts5HashEntrySort(pHash, 0, 0, &pList); if( rc==SQLITE_OK ){ memset(pHash->aSlot, 0, sizeof(Fts5HashEntry*) * pHash->nSlot); while( pList ){ Fts5HashEntry *pNext = pList->pScanNext; if( rc==SQLITE_OK ){ const int nKey = strlen(pList->zKey); i64 iRowid = 0; u8 *pPtr = (u8*)pList; int iOff = sizeof(Fts5HashEntry) + nKey + 1; /* Fill in the final poslist size field */ fts5HashAddPoslistSize(pList); /* Issue the new-term callback */ rc = xTerm(pCtx, pList->zKey, nKey); /* Issue the xEntry callbacks */ while( rc==SQLITE_OK && iOff<pList->nData ){ i64 iDelta; /* Rowid delta value */ int nPoslist; /* Size of position list in bytes */ int nVarint; iOff += getVarint(&pPtr[iOff], (u64*)&iDelta); iRowid += iDelta; nVarint = fts5GetVarint32(&pPtr[iOff], nPoslist); rc = xEntry(pCtx, iRowid, &pPtr[iOff], nPoslist+nVarint); iOff += nVarint+nPoslist; } /* Issue the term-done callback */ if( rc==SQLITE_OK ) rc = xTermDone(pCtx); } sqlite3_free(pList); pList = pNext; ................................................................................ Fts5HashEntry *p; for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){ if( memcmp(p->zKey, pTerm, nTerm)==0 && p->zKey[nTerm]==0 ) break; } if( p ){ fts5HashAddPoslistSize(p); *ppDoclist = &p->zKey[nTerm+1]; *pnDoclist = p->nData - (sizeof(*p) + nTerm + 1); }else{ *ppDoclist = 0; *pnDoclist = 0; } ................................................................................ Fts5Hash *pHash, const char **pzTerm, /* OUT: term (nul-terminated) */ const char **ppDoclist, /* OUT: pointer to doclist */ int *pnDoclist /* OUT: size of doclist in bytes */ ){ Fts5HashEntry *p; if( (p = pHash->pScan) ){ int nTerm = strlen(p->zKey); fts5HashAddPoslistSize(p); *pzTerm = p->zKey; *ppDoclist = &p->zKey[nTerm+1]; *pnDoclist = p->nData - (sizeof(*p) + nTerm + 1); }else{ *pzTerm = 0; *ppDoclist = 0; *pnDoclist = 0; } } |
Changes to ext/fts5/fts5_index.c.
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
...
662
663
664
665
666
667
668
669
670
671
672
673
674
675
|
/*
** An object of type Fts5SegWriter is used to write to segments.
*/
struct Fts5PageWriter {
int pgno; /* Page number for this page */
Fts5Buffer buf; /* Buffer containing page data */
Fts5Buffer term; /* Buffer containing previous term on page */
};
struct Fts5SegWriter {
int iIdx; /* Index to write to */
int iSegid; /* Segid to write to */
int nWriter; /* Number of entries in aWriter */
Fts5PageWriter *aWriter; /* Array of PageWriter objects */
i64 iPrevRowid; /* Previous docid written to current leaf */
................................................................................
p -= 2;
n = sqlite3GetVarint(p, &v64);
*v = (u32)v64;
assert( n>3 && n<=9 );
return n;
}
}
/*
** Allocate and return a buffer at least nByte bytes in size.
**
** If an OOM error is encountered, return NULL and set the error code in
** the Fts5Index handle passed as the first argument.
*/
|
|
>
>
>
>
>
>
>
>
|
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
...
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
|
/* ** An object of type Fts5SegWriter is used to write to segments. */ struct Fts5PageWriter { int pgno; /* Page number for this page */ Fts5Buffer buf; /* Buffer containing page data */ Fts5Buffer term; /* Buffer containing previous term on page */ }; struct Fts5SegWriter { int iIdx; /* Index to write to */ int iSegid; /* Segid to write to */ int nWriter; /* Number of entries in aWriter */ Fts5PageWriter *aWriter; /* Array of PageWriter objects */ i64 iPrevRowid; /* Previous docid written to current leaf */ ................................................................................ p -= 2; n = sqlite3GetVarint(p, &v64); *v = (u32)v64; assert( n>3 && n<=9 ); return n; } } int sqlite3Fts5GetVarintLen(u32 iVal){ if( iVal<(1 << 7 ) ) return 1; if( iVal<(1 << 14) ) return 2; if( iVal<(1 << 21) ) return 3; if( iVal<(1 << 28) ) return 4; return 5; } /* ** Allocate and return a buffer at least nByte bytes in size. ** ** If an OOM error is encountered, return NULL and set the error code in ** the Fts5Index handle passed as the first argument. */ |
Changes to ext/fts5/tool/loadfts5.tcl.
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 .. 60 61 62 63 64 65 66 67 68 69 70 71 72 73 .. 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
proc usage {} { puts stderr "Usage: $::argv0 ?SWITCHES? DATABASE PATH" puts stderr "" puts stderr "Switches are:" puts stderr " -fts4 (use fts4 instead of fts5)" puts stderr " -fts5 (use fts5)" puts stderr " -porter (use porter tokenizer)" puts stderr " -limit N (load no more than N documents)" puts stderr " -automerge N (set the automerge parameter to N)" puts stderr " -crisismerge N (set the crisismerge parameter to N)" exit 1 } set O(vtab) fts5 set O(tok) "" set O(limit) 0 set O(automerge) -1 set O(crisismerge) -1 if {[llength $argv]<2} usage set nOpt [expr {[llength $argv]-2}] for {set i 0} {$i < $nOpt} {incr i} { set arg [lindex $argv $i] ................................................................................ -fts5 { set O(vtab) fts5 } -porter { set O(tok) ", tokenize=porter" } -limit { if { [incr i]>=$nOpt } usage set O(limit) [lindex $argv $i] } -automerge { ................................................................................ default { usage } } } sqlite3 db [lindex $argv end-1] db func loadfile loadfile db transaction { catch { db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok))" } if {$O(automerge)>=0} { |
> > > > > > | > > |
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 .. 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 .. 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
proc usage {} { puts stderr "Usage: $::argv0 ?SWITCHES? DATABASE PATH" puts stderr "" puts stderr "Switches are:" puts stderr " -fts4 (use fts4 instead of fts5)" puts stderr " -fts5 (use fts5)" puts stderr " -porter (use porter tokenizer)" puts stderr " -delete (delete the database file before starting)" puts stderr " -limit N (load no more than N documents)" puts stderr " -automerge N (set the automerge parameter to N)" puts stderr " -crisismerge N (set the crisismerge parameter to N)" exit 1 } set O(vtab) fts5 set O(tok) "" set O(limit) 0 set O(delete) 0 set O(automerge) -1 set O(crisismerge) -1 if {[llength $argv]<2} usage set nOpt [expr {[llength $argv]-2}] for {set i 0} {$i < $nOpt} {incr i} { set arg [lindex $argv $i] ................................................................................ -fts5 { set O(vtab) fts5 } -porter { set O(tok) ", tokenize=porter" } -delete { set O(delete) 1 } -limit { if { [incr i]>=$nOpt } usage set O(limit) [lindex $argv $i] } -automerge { ................................................................................ default { usage } } } set dbfile [lindex $argv end-1] if {$O(delete)} { file delete -force $dbfile } sqlite3 db $dbfile db func loadfile loadfile db transaction { catch { db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok))" } if {$O(automerge)>=0} { |