Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Changes In Branch fts5-incompatible Excluding Merge-Ins
This is equivalent to a diff from 47a46a9f to 99de5e36
2015-09-10
| ||
17:23 | Modify the fts5 leaf page format to permit faster seek operations. This is a file-format change. Any existing databases can be upgraded by running the fts5 'rebuild' command. (check-in: 0c0c4ae9 user: dan tags: trunk) | |
17:20 | Create separate "path" and "root" columns in the json_each() and json_tree() virtual tables. "Root" is the 2nd parameter and is fixed. "Path" varies as json_tree() walks the hierarchy. (check-in: 127cce3e user: drh tags: trunk) | |
16:39 | Increment the fts5 version value to indicate that the on-disk format has changed. (Closed-Leaf check-in: 99de5e36 user: dan tags: fts5-incompatible) | |
16:19 | Fix a segfault in fts5 that could occur if the database contents were corrupt. (check-in: 4931e37d user: dan tags: fts5-incompatible) | |
15:52 | Merge latest changes from trunk. Including fts5_expr.c fixes. (check-in: 716e7e74 user: dan tags: fts5-incompatible) | |
15:24 | Make the sqlite3ext.h header file responsive to -DSQLITE_OMIT_LOAD_EXTENSION. (check-in: 47a46a9f user: drh tags: trunk) | |
15:22 | Disable tests for json_each() and json_tree() on builds where virtual tables are not supported (check-in: bb8ee3b1 user: drh tags: trunk) | |
Changes to ext/fts5/fts5Int.h.
︙ | ︙ | |||
113 114 115 116 117 118 119 120 121 122 123 124 125 126 | ** This exists in order to allow the fts5_index.c module to return a ** decent error message if it encounters a file-format version it does ** not understand. ** ** bColumnsize: ** True if the %_docsize table is created. ** */ struct Fts5Config { sqlite3 *db; /* Database handle */ char *zDb; /* Database holding FTS index (e.g. "main") */ char *zName; /* Name of FTS index */ int nCol; /* Number of columns */ char **azCol; /* Column names */ | > > > > > > | 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 | ** This exists in order to allow the fts5_index.c module to return a ** decent error message if it encounters a file-format version it does ** not understand. ** ** bColumnsize: ** True if the %_docsize table is created. ** ** bPrefixIndex: ** This is only used for debugging. If set to false, any prefix indexes ** are ignored. This value is configured using: ** ** INSERT INTO tbl(tbl, rank) VALUES('prefix-index', $bPrefixIndex); ** */ struct Fts5Config { sqlite3 *db; /* Database handle */ char *zDb; /* Database holding FTS index (e.g. "main") */ char *zName; /* Name of FTS index */ int nCol; /* Number of columns */ char **azCol; /* Column names */ |
︙ | ︙ | |||
141 142 143 144 145 146 147 148 149 150 | int nAutomerge; /* 'automerge' setting */ int nCrisisMerge; /* Maximum allowed segments per level */ char *zRank; /* Name of rank function */ char *zRankArgs; /* Arguments to rank function */ /* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */ char **pzErrmsg; }; /* Current expected value of %_config table 'version' field */ | > > > > | | 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 | int nAutomerge; /* 'automerge' setting */ int nCrisisMerge; /* Maximum allowed segments per level */ char *zRank; /* Name of rank function */ char *zRankArgs; /* Arguments to rank function */ /* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */ char **pzErrmsg; #ifdef SQLITE_DEBUG int bPrefixIndex; /* True to use prefix-indexes */ #endif }; /* Current expected value of %_config table 'version' field */ #define FTS5_CURRENT_VERSION 4 #define FTS5_CONTENT_NORMAL 0 #define FTS5_CONTENT_NONE 1 #define FTS5_CONTENT_EXTERNAL 2 |
︙ | ︙ |
Changes to ext/fts5/fts5_buffer.c.
︙ | ︙ | |||
12 13 14 15 16 17 18 | */ #include "fts5Int.h" int sqlite3Fts5BufferGrow(int *pRc, Fts5Buffer *pBuf, int nByte){ | < < > > > > | 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 | */ #include "fts5Int.h" int sqlite3Fts5BufferGrow(int *pRc, Fts5Buffer *pBuf, int nByte){ if( (pBuf->n + nByte) > pBuf->nSpace ){ u8 *pNew; int nNew = pBuf->nSpace ? pBuf->nSpace*2 : 64; /* A no-op if an error has already occurred */ if( *pRc ) return 1; while( nNew<(pBuf->n + nByte) ){ nNew = nNew * 2; } pNew = sqlite3_realloc(pBuf->p, nNew); if( pNew==0 ){ *pRc = SQLITE_NOMEM; return 1; |
︙ | ︙ |
Changes to ext/fts5/fts5_config.c.
︙ | ︙ | |||
476 477 478 479 480 481 482 483 484 485 486 487 488 489 | nByte = nArg * (sizeof(char*) + sizeof(u8)); pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, nByte); pRet->abUnindexed = (u8*)&pRet->azCol[nArg]; pRet->zDb = sqlite3Fts5Strndup(&rc, azArg[1], -1); pRet->zName = sqlite3Fts5Strndup(&rc, azArg[2], -1); pRet->bColumnsize = 1; if( rc==SQLITE_OK && sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){ *pzErr = sqlite3_mprintf("reserved fts5 table name: %s", pRet->zName); rc = SQLITE_ERROR; } for(i=3; rc==SQLITE_OK && i<nArg; i++){ const char *zOrig = azArg[i]; | > > > | 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 | nByte = nArg * (sizeof(char*) + sizeof(u8)); pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, nByte); pRet->abUnindexed = (u8*)&pRet->azCol[nArg]; pRet->zDb = sqlite3Fts5Strndup(&rc, azArg[1], -1); pRet->zName = sqlite3Fts5Strndup(&rc, azArg[2], -1); pRet->bColumnsize = 1; #ifdef SQLITE_DEBUG pRet->bPrefixIndex = 1; #endif if( rc==SQLITE_OK && sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){ *pzErr = sqlite3_mprintf("reserved fts5 table name: %s", pRet->zName); rc = SQLITE_ERROR; } for(i=3; rc==SQLITE_OK && i<nArg; i++){ const char *zOrig = azArg[i]; |
︙ | ︙ |
Changes to ext/fts5/fts5_index.c.
︙ | ︙ | |||
83 84 85 86 87 88 89 | ** ** Then, for each level from 0 to nMax: ** ** + number of input segments in ongoing merge. ** + total number of segments in level. ** + for each segment from oldest to newest: ** + segment id (always > 0) | < | | | > < | > > > > > > > > > > > > > > > > > | | | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 | ** ** Then, for each level from 0 to nMax: ** ** + number of input segments in ongoing merge. ** + total number of segments in level. ** + for each segment from oldest to newest: ** + segment id (always > 0) ** + first leaf page number (often 1, always greater than 0) ** + final leaf page number ** ** 2. The Averages Record: ** ** A single record within the %_data table. The data is a list of varints. ** The first value is the number of rows in the index. Then, for each column ** from left to right, the total number of tokens in the column for all ** rows of the table. ** ** 3. Segment leaves: ** ** TERM/DOCLIST FORMAT: ** ** Most of each segment leaf is taken up by term/doclist data. The ** general format of term/doclist, starting with the first term ** on the leaf page, is: ** ** varint : size of first term ** blob: first term data ** doclist: first doclist ** zero-or-more { ** varint: number of bytes in common with previous term ** varint: number of bytes of new term data (nNew) ** blob: nNew bytes of new term data ** doclist: next doclist ** } ** ** doclist format: ** ** varint: first rowid ** poslist: first poslist ** zero-or-more { ** varint: rowid delta (always > 0) ** poslist: next poslist ** } ** ** poslist format: ** ** varint: size of poslist in bytes multiplied by 2, not including ** this field. Plus 1 if this entry carries the "delete" flag. ** collist: collist for column 0 ** zero-or-more { ** 0x01 byte ** varint: column number (I) ** collist: collist for column I ** } ** ** collist format: ** ** varint: first offset + 2 ** zero-or-more { ** varint: offset delta + 2 ** } ** ** PAGE FORMAT ** ** Each leaf page begins with a 4-byte header containing 2 16-bit ** unsigned integer fields in big-endian format. They are: ** ** * The byte offset of the first rowid on the page, if it exists ** and occurs before the first term (otherwise 0). ** ** * The byte offset of the start of the page footer. If the page ** footer is 0 bytes in size, then this field is the same as the ** size of the leaf page in bytes. ** ** The page footer consists of a single varint for each term located ** on the page. Each varint is the byte offset of the current term ** within the page, delta-compressed against the previous value. In ** other words, the first varint in the footer is the byte offset of ** the first term, the second is the byte offset of the second less that ** of the first, and so on. ** ** The term/doclist format described above is accurate if the entire ** term/doclist data fits on a single leaf page. If this is not the case, ** the format is changed in two ways: ** ** + if the first rowid on a page occurs before the first term, it ** is stored as a literal value: ** ** varint: first rowid ** ** + the first term on each page is stored in the same way as the ** very first term of the segment: ** ** varint : size of first term ** blob: first term data ** ** 5. Segment doclist indexes: ** ** Doclist indexes are themselves b-trees, however they usually consist of ** a single leaf record only. The format of each doclist index leaf page ** is: ** ** * Flags byte. Bits are: |
︙ | ︙ | |||
233 234 235 236 237 238 239 | /* ** Rowids for the averages and structure records in the %_data table. */ #define FTS5_AVERAGES_ROWID 1 /* Rowid used for the averages record */ #define FTS5_STRUCTURE_ROWID 10 /* The structure record */ /* | | < | | < < | < < < < < | < | < > | | | | 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 | /* ** Rowids for the averages and structure records in the %_data table. */ #define FTS5_AVERAGES_ROWID 1 /* Rowid used for the averages record */ #define FTS5_STRUCTURE_ROWID 10 /* The structure record */ /* ** Macros determining the rowids used by segment leaves and dlidx leaves ** and nodes. All nodes and leaves are stored in the %_data table with large ** positive rowids. ** ** Each segment has a unique non-zero 16-bit id. ** ** The rowid for each segment leaf is found by passing the segment id and ** the leaf page number to the FTS5_SEGMENT_ROWID macro. Leaves are numbered ** sequentially starting from 1. */ #define FTS5_DATA_ID_B 16 /* Max seg id number 65535 */ #define FTS5_DATA_DLI_B 1 /* Doclist-index flag (1 bit) */ #define FTS5_DATA_HEIGHT_B 5 /* Max dlidx tree height of 32 */ #define FTS5_DATA_PAGE_B 31 /* Max page number of 2147483648 */ #define fts5_dri(segid, dlidx, height, pgno) ( \ ((i64)(segid) << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) + \ ((i64)(dlidx) << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) + \ ((i64)(height) << (FTS5_DATA_PAGE_B)) + \ ((i64)(pgno)) \ ) #define FTS5_SEGMENT_ROWID(segid, pgno) fts5_dri(segid, 0, 0, pgno) #define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno) /* ** Maximum segments permitted in a single index */ #define FTS5_MAX_SEGMENT 2000 #ifdef SQLITE_DEBUG |
︙ | ︙ | |||
299 300 301 302 303 304 305 | typedef struct Fts5SegWriter Fts5SegWriter; typedef struct Fts5Structure Fts5Structure; typedef struct Fts5StructureLevel Fts5StructureLevel; typedef struct Fts5StructureSegment Fts5StructureSegment; struct Fts5Data { u8 *p; /* Pointer to buffer containing record */ | | > | 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 | typedef struct Fts5SegWriter Fts5SegWriter; typedef struct Fts5Structure Fts5Structure; typedef struct Fts5StructureLevel Fts5StructureLevel; typedef struct Fts5StructureSegment Fts5StructureSegment; struct Fts5Data { u8 *p; /* Pointer to buffer containing record */ int nn; /* Size of record in bytes */ int szLeaf; /* Size of leaf without page-index */ }; /* ** One object per %_data table. */ struct Fts5Index { Fts5Config *pConfig; /* Virtual table configuration */ |
︙ | ︙ | |||
351 352 353 354 355 356 357 | /* ** The contents of the "structure" record for each index are represented ** using an Fts5Structure record in memory. Which uses instances of the ** other Fts5StructureXXX types as components. */ struct Fts5StructureSegment { int iSegid; /* Segment id */ | < | 320 321 322 323 324 325 326 327 328 329 330 331 332 333 | /* ** The contents of the "structure" record for each index are represented ** using an Fts5Structure record in memory. Which uses instances of the ** other Fts5StructureXXX types as components. */ struct Fts5StructureSegment { int iSegid; /* Segment id */ int pgnoFirst; /* First leaf page number in segment */ int pgnoLast; /* Last leaf page number in segment */ }; struct Fts5StructureLevel { int nMerge; /* Number of segments in incr-merge */ int nSeg; /* Total number of segments on level */ Fts5StructureSegment *aSeg; /* Array of segments. aSeg[0] is oldest. */ |
︙ | ︙ | |||
373 374 375 376 377 378 379 | }; /* ** An object of type Fts5SegWriter is used to write to segments. */ struct Fts5PageWriter { int pgno; /* Page number for this page */ | > | > > | 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 | }; /* ** An object of type Fts5SegWriter is used to write to segments. */ struct Fts5PageWriter { int pgno; /* Page number for this page */ int iPrevPgidx; /* Previous value written into pgidx */ Fts5Buffer buf; /* Buffer containing leaf data */ Fts5Buffer pgidx; /* Buffer containing page-index */ Fts5Buffer term; /* Buffer containing previous term on page */ }; struct Fts5DlidxWriter { int pgno; /* Page number for this page */ int bPrevValid; /* True if iPrev is valid */ i64 iPrev; /* Previous rowid value written to page */ Fts5Buffer buf; /* Buffer containing page data */ }; struct Fts5SegWriter { int iSegid; /* Segid to write to */ Fts5PageWriter writer; /* PageWriter object */ i64 iPrevRowid; /* Previous rowid written to current leaf */ u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */ u8 bFirstRowidInPage; /* True if next rowid is first in page */ /* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */ u8 bFirstTermInPage; /* True if next term will be first in leaf */ int nLeafWritten; /* Number of leaf pages written */ int nEmpty; /* Number of contiguous term-less nodes */ int nDlidx; /* Allocated size of aDlidx[] array */ Fts5DlidxWriter *aDlidx; /* Array of Fts5DlidxWriter objects */ |
︙ | ︙ | |||
468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 | ** ** iRowidOffset/nRowidOffset/aRowidOffset: ** These are used if the FTS5_SEGITER_REVERSE flag is set. ** ** For each rowid on the page corresponding to the current term, the ** corresponding aRowidOffset[] entry is set to the byte offset of the ** start of the "position-list-size" field within the page. */ struct Fts5SegIter { Fts5StructureSegment *pSeg; /* Segment to iterate through */ int flags; /* Mask of configuration flags */ int iLeafPgno; /* Current leaf page number */ Fts5Data *pLeaf; /* Current leaf data */ Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */ int iLeafOffset; /* Byte offset within current leaf */ /* The page and offset from which the current term was read. The offset ** is the offset of the first rowid in the current doclist. */ int iTermLeafPgno; int iTermLeafOffset; /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */ int iRowidOffset; /* Current entry in aRowidOffset[] */ int nRowidOffset; /* Allocated size of aRowidOffset[] array */ int *aRowidOffset; /* Array of offset to rowid fields */ Fts5DlidxIter *pDlidx; /* If there is a doclist-index */ /* Variables populated based on current entry. */ Fts5Buffer term; /* Current term */ i64 iRowid; /* Current rowid */ int nPos; /* Number of bytes in current position list */ int bDel; /* True if the delete flag is set */ }; #define FTS5_SEGITER_ONETERM 0x01 #define FTS5_SEGITER_REVERSE 0x02 /* ** poslist: ** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered. ** There is no way to tell if this is populated or not. */ struct Fts5IndexIter { Fts5Index *pIndex; /* Index that owns this iterator */ | > > > > > > > > > > > > > > > > > > > > > > > > > | 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 | ** ** iRowidOffset/nRowidOffset/aRowidOffset: ** These are used if the FTS5_SEGITER_REVERSE flag is set. ** ** For each rowid on the page corresponding to the current term, the ** corresponding aRowidOffset[] entry is set to the byte offset of the ** start of the "position-list-size" field within the page. ** ** iTermIdx: ** Index of current term on iTermLeafPgno. */ struct Fts5SegIter { Fts5StructureSegment *pSeg; /* Segment to iterate through */ int flags; /* Mask of configuration flags */ int iLeafPgno; /* Current leaf page number */ Fts5Data *pLeaf; /* Current leaf data */ Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */ int iLeafOffset; /* Byte offset within current leaf */ /* The page and offset from which the current term was read. The offset ** is the offset of the first rowid in the current doclist. */ int iTermLeafPgno; int iTermLeafOffset; int iPgidxOff; /* Next offset in pgidx */ int iEndofDoclist; /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */ int iRowidOffset; /* Current entry in aRowidOffset[] */ int nRowidOffset; /* Allocated size of aRowidOffset[] array */ int *aRowidOffset; /* Array of offset to rowid fields */ Fts5DlidxIter *pDlidx; /* If there is a doclist-index */ /* Variables populated based on current entry. */ Fts5Buffer term; /* Current term */ i64 iRowid; /* Current rowid */ int nPos; /* Number of bytes in current position list */ int bDel; /* True if the delete flag is set */ }; /* ** Argument is a pointer to an Fts5Data structure that contains a ** leaf page. */ #define ASSERT_SZLEAF_OK(x) assert( \ (x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \ ) #define FTS5_SEGITER_ONETERM 0x01 #define FTS5_SEGITER_REVERSE 0x02 /* ** Argument is a pointer to an Fts5Data structure that contains a leaf ** page. This macro evaluates to true if the leaf contains no terms, or ** false if it contains at least one term. */ #define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn) #define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2])) #define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p)) /* ** poslist: ** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered. ** There is no way to tell if this is populated or not. */ struct Fts5IndexIter { Fts5Index *pIndex; /* Index that owns this iterator */ |
︙ | ︙ | |||
614 615 616 617 618 619 620 621 622 623 624 625 626 627 | ){ int nCmp = MIN(nLeft, nRight); int res = memcmp(pLeft, pRight, nCmp); return (res==0 ? (nLeft - nRight) : res); } #endif /* ** Close the read-only blob handle, if it is open. */ static void fts5CloseReader(Fts5Index *p){ if( p->pReader ){ sqlite3_blob *pReader = p->pReader; | > > > > > | 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 | ){ int nCmp = MIN(nLeft, nRight); int res = memcmp(pLeft, pRight, nCmp); return (res==0 ? (nLeft - nRight) : res); } #endif static int fts5LeafFirstTermOff(Fts5Data *pLeaf){ int ret; fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret); return ret; } /* ** Close the read-only blob handle, if it is open. */ static void fts5CloseReader(Fts5Index *p){ if( p->pReader ){ sqlite3_blob *pReader = p->pReader; |
︙ | ︙ | |||
675 676 677 678 679 680 681 | if( rc==SQLITE_OK ){ u8 *aOut = 0; /* Read blob data into this buffer */ int nByte = sqlite3_blob_bytes(p->pReader); int nAlloc = sizeof(Fts5Data) + nByte + FTS5_DATA_PADDING; pRet = (Fts5Data*)sqlite3_malloc(nAlloc); if( pRet ){ | | > > > | 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 | if( rc==SQLITE_OK ){ u8 *aOut = 0; /* Read blob data into this buffer */ int nByte = sqlite3_blob_bytes(p->pReader); int nAlloc = sizeof(Fts5Data) + nByte + FTS5_DATA_PADDING; pRet = (Fts5Data*)sqlite3_malloc(nAlloc); if( pRet ){ pRet->nn = nByte; aOut = pRet->p = (u8*)&pRet[1]; }else{ rc = SQLITE_NOMEM; } if( rc==SQLITE_OK ){ rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0); } if( rc!=SQLITE_OK ){ sqlite3_free(pRet); pRet = 0; }else{ /* TODO1: Fix this */ pRet->szLeaf = fts5GetU16(&pRet->p[2]); } } p->rc = rc; p->nRead++; } assert( (pRet==0)==(p->rc!=SQLITE_OK) ); |
︙ | ︙ | |||
781 782 783 784 785 786 787 | p->rc = sqlite3_reset(p->pDeleter); } /* ** Remove all records associated with segment iSegid. */ static void fts5DataRemoveSegment(Fts5Index *p, int iSegid){ | | | | 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 | p->rc = sqlite3_reset(p->pDeleter); } /* ** Remove all records associated with segment iSegid. */ static void fts5DataRemoveSegment(Fts5Index *p, int iSegid){ i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0); i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0)-1; fts5DataDelete(p, iFirst, iLast); if( p->pIdxDeleter==0 ){ Fts5Config *pConfig = p->pConfig; fts5IndexPrepareStmt(p, &p->pIdxDeleter, sqlite3_mprintf( "DELETE FROM '%q'.'%q_idx' WHERE segid=?", pConfig->zDb, pConfig->zName )); |
︙ | ︙ | |||
879 880 881 882 883 884 885 | nTotal * sizeof(Fts5StructureSegment) ); if( rc==SQLITE_OK ){ pLvl->nSeg = nTotal; for(iSeg=0; iSeg<nTotal; iSeg++){ i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].iSegid); | < | 883 884 885 886 887 888 889 890 891 892 893 894 895 896 | nTotal * sizeof(Fts5StructureSegment) ); if( rc==SQLITE_OK ){ pLvl->nSeg = nTotal; for(iSeg=0; iSeg<nTotal; iSeg++){ i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].iSegid); i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoFirst); i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoLast); } }else{ fts5StructureRelease(pRet); pRet = 0; } |
︙ | ︙ | |||
970 971 972 973 974 975 976 | Fts5Structure *pRet = 0; /* Object to return */ int iCookie; /* Configuration cookie */ Fts5Data *pData; Fts5Buffer buf = {0, 0, 0}; pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID); if( p->rc ) return 0; | > | | | 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 | Fts5Structure *pRet = 0; /* Object to return */ int iCookie; /* Configuration cookie */ Fts5Data *pData; Fts5Buffer buf = {0, 0, 0}; pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID); if( p->rc ) return 0; /* TODO: Do we need this if the leaf-index is appended? Probably... */ memset(&pData->p[pData->nn], 0, FTS5_DATA_PADDING); p->rc = fts5StructureDecode(pData->p, pData->nn, &iCookie, &pRet); if( p->rc==SQLITE_OK && pConfig->iCookie!=iCookie ){ p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie); } fts5DataRelease(pData); if( p->rc!=SQLITE_OK ){ fts5StructureRelease(pRet); |
︙ | ︙ | |||
1035 1036 1037 1038 1039 1040 1041 | Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge); fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg); assert( pLvl->nMerge<=pLvl->nSeg ); for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){ fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid); | < | 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 | Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge); fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg); assert( pLvl->nMerge<=pLvl->nSeg ); for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){ fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid); fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst); fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast); } } fts5DataWrite(p, FTS5_STRUCTURE_ROWID, buf.p, buf.n); fts5BufferFree(&buf); |
︙ | ︙ | |||
1124 1125 1126 1127 1128 1129 1130 | ){ if( p->rc==SQLITE_OK ){ int iTst; int iPromote = -1; int szPromote = 0; /* Promote anything this size or smaller */ Fts5StructureSegment *pSeg; /* Segment just written */ int szSeg; /* Size of segment just written */ | | > | 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 | ){ if( p->rc==SQLITE_OK ){ int iTst; int iPromote = -1; int szPromote = 0; /* Promote anything this size or smaller */ Fts5StructureSegment *pSeg; /* Segment just written */ int szSeg; /* Size of segment just written */ int nSeg = pStruct->aLevel[iLvl].nSeg; if( nSeg==0 ) return; pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1]; szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst); /* Check for condition (a) */ for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--); if( iTst>=0 ){ int i; |
︙ | ︙ | |||
1174 1175 1176 1177 1178 1179 1180 | assert( pLvl->bEof==0 ); pLvl->iOff = 1; pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno); pLvl->iOff += fts5GetVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid); pLvl->iFirstOff = pLvl->iOff; }else{ int iOff; | | | | 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 | assert( pLvl->bEof==0 ); pLvl->iOff = 1; pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno); pLvl->iOff += fts5GetVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid); pLvl->iFirstOff = pLvl->iOff; }else{ int iOff; for(iOff=pLvl->iOff; iOff<pData->nn; iOff++){ if( pData->p[iOff] ) break; } if( iOff<pData->nn ){ i64 iVal; pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1; iOff += fts5GetVarint(&pData->p[iOff], (u64*)&iVal); pLvl->iRowid += iVal; pLvl->iOff = iOff; }else{ pLvl->bEof = 1; |
︙ | ︙ | |||
1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 | /* ** Load the next leaf page into the segment iterator. */ static void fts5SegIterNextPage( Fts5Index *p, /* FTS5 backend object */ Fts5SegIter *pIter /* Iterator to advance to next page */ ){ Fts5StructureSegment *pSeg = pIter->pSeg; fts5DataRelease(pIter->pLeaf); pIter->iLeafPgno++; if( pIter->pNextLeaf ){ assert( pIter->iLeafPgno<=pSeg->pgnoLast ); pIter->pLeaf = pIter->pNextLeaf; pIter->pNextLeaf = 0; }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){ pIter->pLeaf = fts5DataRead(p, | > | > > > > > > > > > > > > | 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 | /* ** Load the next leaf page into the segment iterator. */ static void fts5SegIterNextPage( Fts5Index *p, /* FTS5 backend object */ Fts5SegIter *pIter /* Iterator to advance to next page */ ){ Fts5Data *pLeaf; Fts5StructureSegment *pSeg = pIter->pSeg; fts5DataRelease(pIter->pLeaf); pIter->iLeafPgno++; if( pIter->pNextLeaf ){ assert( pIter->iLeafPgno<=pSeg->pgnoLast ); pIter->pLeaf = pIter->pNextLeaf; pIter->pNextLeaf = 0; }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){ pIter->pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno) ); }else{ pIter->pLeaf = 0; } pLeaf = pIter->pLeaf; if( pLeaf ){ pIter->iPgidxOff = pLeaf->szLeaf; if( fts5LeafIsTermless(pLeaf) ){ pIter->iEndofDoclist = pLeaf->nn+1; }else{ pIter->iPgidxOff += fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff], pIter->iEndofDoclist ); } } } /* ** Argument p points to a buffer containing a varint to be interpreted as a ** position list size field. Read the varint and return the number of bytes ** read. Before returning, set *pnSz to the number of bytes in the position |
︙ | ︙ | |||
1466 1467 1468 1469 1470 1471 1472 | ** ** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the ** position list content (if any). */ static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){ if( p->rc==SQLITE_OK ){ int iOff = pIter->iLeafOffset; /* Offset to read at */ | > | > | | 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 | ** ** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the ** position list content (if any). */ static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){ if( p->rc==SQLITE_OK ){ int iOff = pIter->iLeafOffset; /* Offset to read at */ ASSERT_SZLEAF_OK(pIter->pLeaf); if( iOff>=pIter->pLeaf->szLeaf ){ p->rc = FTS5_CORRUPT; }else{ const u8 *a = &pIter->pLeaf->p[iOff]; pIter->iLeafOffset += fts5GetPoslistSize(a, &pIter->nPos, &pIter->bDel); } } } static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){ u8 *a = pIter->pLeaf->p; /* Buffer to read data from */ int iOff = pIter->iLeafOffset; ASSERT_SZLEAF_OK(pIter->pLeaf); if( iOff>=pIter->pLeaf->szLeaf ){ fts5SegIterNextPage(p, pIter); if( pIter->pLeaf==0 ){ if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT; return; } iOff = 4; a = pIter->pLeaf->p; |
︙ | ︙ | |||
1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 | iOff += fts5GetVarint32(&a[iOff], nNew); pIter->term.n = nKeep; fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]); iOff += nNew; pIter->iTermLeafOffset = iOff; pIter->iTermLeafPgno = pIter->iLeafPgno; pIter->iLeafOffset = iOff; fts5SegIterLoadRowid(p, pIter); } /* ** Initialize the iterator object pIter to iterate through the entries in ** segment pSeg. The iterator is left pointing to the first entry when | > > > > > > > > | 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 | iOff += fts5GetVarint32(&a[iOff], nNew); pIter->term.n = nKeep; fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]); iOff += nNew; pIter->iTermLeafOffset = iOff; pIter->iTermLeafPgno = pIter->iLeafPgno; pIter->iLeafOffset = iOff; if( pIter->iPgidxOff>=pIter->pLeaf->nn ){ pIter->iEndofDoclist = pIter->pLeaf->nn+1; }else{ int nExtra; pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra); pIter->iEndofDoclist += nExtra; } fts5SegIterLoadRowid(p, pIter); } /* ** Initialize the iterator object pIter to iterate through the entries in ** segment pSeg. The iterator is left pointing to the first entry when |
︙ | ︙ | |||
1554 1555 1556 1557 1558 1559 1560 | memset(pIter, 0, sizeof(*pIter)); pIter->pSeg = pSeg; pIter->iLeafPgno = pSeg->pgnoFirst-1; fts5SegIterNextPage(p, pIter); } if( p->rc==SQLITE_OK ){ | < | > > > | 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 | memset(pIter, 0, sizeof(*pIter)); pIter->pSeg = pSeg; pIter->iLeafPgno = pSeg->pgnoFirst-1; fts5SegIterNextPage(p, pIter); } if( p->rc==SQLITE_OK ){ pIter->iLeafOffset = 4; assert_nc( pIter->pLeaf->nn>4 ); assert( fts5LeafFirstTermOff(pIter->pLeaf)==4 ); pIter->iPgidxOff = pIter->pLeaf->szLeaf+1; fts5SegIterLoadTerm(p, pIter, 0); fts5SegIterLoadNPos(p, pIter); } } /* ** This function is only ever called on iterators created by calls to |
︙ | ︙ | |||
1577 1578 1579 1580 1581 1582 1583 | ** This function advances the iterator so that it points to the last ** relevant rowid on the page and, if necessary, initializes the ** aRowidOffset[] and iRowidOffset variables. At this point the iterator ** is in its regular state - Fts5SegIter.iLeafOffset points to the first ** byte of the position list content associated with said rowid. */ static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){ | | > > > > > < | 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 | ** This function advances the iterator so that it points to the last ** relevant rowid on the page and, if necessary, initializes the ** aRowidOffset[] and iRowidOffset variables. At this point the iterator ** is in its regular state - Fts5SegIter.iLeafOffset points to the first ** byte of the position list content associated with said rowid. */ static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){ int n = pIter->pLeaf->szLeaf; int i = pIter->iLeafOffset; u8 *a = pIter->pLeaf->p; int iRowidOffset = 0; if( n>pIter->iEndofDoclist ){ n = pIter->iEndofDoclist; } ASSERT_SZLEAF_OK(pIter->pLeaf); while( 1 ){ i64 iDelta = 0; int nPos; int bDummy; i += fts5GetPoslistSize(&a[i], &nPos, &bDummy); i += nPos; if( i>=n ) break; i += fts5GetVarint(&a[i], (u64*)&iDelta); pIter->iRowid += iDelta; if( iRowidOffset>=pIter->nRowidOffset ){ int nNew = pIter->nRowidOffset + 8; int *aNew = (int*)sqlite3_realloc(pIter->aRowidOffset, nNew*sizeof(int)); if( aNew==0 ){ p->rc = SQLITE_NOMEM; |
︙ | ︙ | |||
1625 1626 1627 1628 1629 1630 1631 | fts5DataRelease(pIter->pLeaf); pIter->pLeaf = 0; while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){ Fts5Data *pNew; pIter->iLeafPgno--; pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID( | | | | | > | 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 | fts5DataRelease(pIter->pLeaf); pIter->pLeaf = 0; while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){ Fts5Data *pNew; pIter->iLeafPgno--; pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID( pIter->pSeg->iSegid, pIter->iLeafPgno )); if( pNew ){ if( pIter->iLeafPgno==pIter->iTermLeafPgno ){ if( pIter->iTermLeafOffset<pNew->szLeaf ){ pIter->pLeaf = pNew; pIter->iLeafOffset = pIter->iTermLeafOffset; } }else{ int iRowidOff; iRowidOff = fts5LeafFirstRowidOff(pNew); if( iRowidOff ){ pIter->pLeaf = pNew; pIter->iLeafOffset = iRowidOff; } } if( pIter->pLeaf ){ u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset]; pIter->iLeafOffset += fts5GetVarint(a, (u64*)&pIter->iRowid); break; }else{ fts5DataRelease(pNew); } } } if( pIter->pLeaf ){ pIter->iEndofDoclist = pIter->pLeaf->nn+1; fts5SegIterReverseInitPage(p, pIter); } } /* ** Return true if the iterator passed as the second argument currently ** points to a delete marker. A delete marker is an entry with a 0 byte |
︙ | ︙ | |||
1708 1709 1710 1711 1712 1713 1714 | Fts5Data *pLeaf = pIter->pLeaf; int iOff; int bNewTerm = 0; int nKeep = 0; /* Search for the end of the position list within the current page. */ u8 *a = pLeaf->p; | | > | < < | | | < < < | > > > > > | > > > | | > > > > > | > > > > > > | > | 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 | Fts5Data *pLeaf = pIter->pLeaf; int iOff; int bNewTerm = 0; int nKeep = 0; /* Search for the end of the position list within the current page. */ u8 *a = pLeaf->p; int n = pLeaf->szLeaf; ASSERT_SZLEAF_OK(pLeaf); iOff = pIter->iLeafOffset + pIter->nPos; if( iOff<n ){ /* The next entry is on the current page. */ assert_nc( iOff<=pIter->iEndofDoclist ); if( iOff>=pIter->iEndofDoclist ){ bNewTerm = 1; if( iOff!=fts5LeafFirstTermOff(pLeaf) ){ iOff += fts5GetVarint32(&a[iOff], nKeep); } }else{ u64 iDelta; iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta); pIter->iRowid += iDelta; assert_nc( iDelta>0 ); } pIter->iLeafOffset = iOff; }else if( pIter->pSeg==0 ){ const u8 *pList = 0; const char *zTerm = 0; int nList = 0; if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){ sqlite3Fts5HashScanNext(p->pHash); sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList); } if( pList==0 ){ fts5DataRelease(pIter->pLeaf); pIter->pLeaf = 0; }else{ pIter->pLeaf->p = (u8*)pList; pIter->pLeaf->nn = nList; pIter->pLeaf->szLeaf = nList; pIter->iEndofDoclist = nList+1; sqlite3Fts5BufferSet(&p->rc, &pIter->term, strlen(zTerm), (u8*)zTerm); pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid); } }else{ iOff = 0; /* Next entry is not on the current page */ while( iOff==0 ){ fts5SegIterNextPage(p, pIter); pLeaf = pIter->pLeaf; if( pLeaf==0 ) break; ASSERT_SZLEAF_OK(pLeaf); if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){ iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid); pIter->iLeafOffset = iOff; if( pLeaf->nn>pLeaf->szLeaf ){ pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32( &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist ); } } else if( pLeaf->nn>pLeaf->szLeaf ){ pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32( &pLeaf->p[pLeaf->szLeaf], iOff ); pIter->iLeafOffset = iOff; pIter->iEndofDoclist = iOff; bNewTerm = 1; } if( iOff>=pLeaf->szLeaf ){ p->rc = FTS5_CORRUPT; return; } } } /* Check if the iterator is now at EOF. If so, return early. */ if( pIter->pLeaf ){ if( bNewTerm ){ if( pIter->flags & FTS5_SEGITER_ONETERM ){ fts5DataRelease(pIter->pLeaf); pIter->pLeaf = 0; }else{ int nExtra; fts5SegIterLoadTerm(p, pIter, nKeep); fts5SegIterLoadNPos(p, pIter); if( pbNewTerm ) *pbNewTerm = 1; } }else{ fts5SegIterLoadNPos(p, pIter); } |
︙ | ︙ | |||
1801 1802 1803 1804 1805 1806 1807 | Fts5DlidxIter *pDlidx = pIter->pDlidx; Fts5Data *pLast = 0; int pgnoLast = 0; if( pDlidx ){ int iSegid = pIter->pSeg->iSegid; pgnoLast = fts5DlidxIterPgno(pDlidx); | | < < < < < < < < < < < < < < < < < < < < | | | | > | < | > > > > > > > | 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 | Fts5DlidxIter *pDlidx = pIter->pDlidx; Fts5Data *pLast = 0; int pgnoLast = 0; if( pDlidx ){ int iSegid = pIter->pSeg->iSegid; pgnoLast = fts5DlidxIterPgno(pDlidx); pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast)); }else{ int iOff; /* Byte offset within pLeaf */ Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ /* Currently, Fts5SegIter.iLeafOffset (and iOff) points to the first ** byte of position-list content for the current rowid. Back it up ** so that it points to the start of the position-list size field. */ pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel); /* If this condition is true then the largest rowid for the current ** term may not be stored on the current page. So search forward to ** see where said rowid really is. */ if( pIter->iEndofDoclist>=pLeaf->szLeaf ){ int pgno; Fts5StructureSegment *pSeg = pIter->pSeg; /* The last rowid in the doclist may not be on the current page. Search ** forward to find the page containing the last rowid. */ for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){ i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno); Fts5Data *pNew = fts5DataRead(p, iAbs); if( pNew ){ int iRowid, bTermless; iRowid = fts5LeafFirstRowidOff(pNew); bTermless = fts5LeafIsTermless(pNew); if( iRowid ){ SWAPVAL(Fts5Data*, pNew, pLast); pgnoLast = pgno; } fts5DataRelease(pNew); if( bTermless==0 ) break; } } } } /* If pLast is NULL at this point, then the last rowid for this doclist ** lies on the page currently indicated by the iterator. In this case ** pIter->iLeafOffset is already set to point to the position-list size ** field associated with the first relevant rowid on the page. ** ** Or, if pLast is non-NULL, then it is the page that contains the last ** rowid. In this case configure the iterator so that it points to the ** first rowid on this page. */ if( pLast ){ int iOff; fts5DataRelease(pIter->pLeaf); pIter->pLeaf = pLast; pIter->iLeafPgno = pgnoLast; iOff = fts5LeafFirstRowidOff(pLast); iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid); pIter->iLeafOffset = iOff; if( fts5LeafIsTermless(pLast) ){ pIter->iEndofDoclist = pLast->nn+1; }else{ pIter->iEndofDoclist = fts5LeafFirstTermOff(pLast); } } fts5SegIterReverseInitPage(p, pIter); } /* ** Iterator pIter currently points to the first rowid of a doclist. |
︙ | ︙ | |||
1897 1898 1899 1900 1901 1902 1903 | assert( pIter->flags & FTS5_SEGITER_ONETERM ); assert( pIter->pDlidx==0 ); /* Check if the current doclist ends on this page. If it does, return ** early without loading the doclist-index (as it belongs to a different ** term. */ | | < | < < < | < < | < < < < | | | 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 | assert( pIter->flags & FTS5_SEGITER_ONETERM ); assert( pIter->pDlidx==0 ); /* Check if the current doclist ends on this page. If it does, return ** early without loading the doclist-index (as it belongs to a different ** term. */ if( pIter->iTermLeafPgno==pIter->iLeafPgno && pIter->iEndofDoclist<pLeaf->szLeaf ){ return; } pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno); } #define fts5IndexGetVarint32(a, iOff, nVal) { \ nVal = (a)[iOff++]; \ if( nVal & 0x80 ){ \ iOff--; \ iOff += fts5GetVarint32(&(a)[iOff], nVal); \ } \ } #define fts5IndexSkipVarint(a, iOff) { \ int iEnd = iOff+9; \ while( (a[iOff++] & 0x80) && iOff<iEnd ); \ } |
︙ | ︙ | |||
1951 1952 1953 1954 1955 1956 1957 | Fts5Index *p, /* Leave any error code here */ int bGe, /* True for a >= search */ Fts5SegIter *pIter, /* Iterator to seek */ const u8 *pTerm, int nTerm /* Term to search for */ ){ int iOff; const u8 *a = pIter->pLeaf->p; | > | > > > > < > | | < < | < < < < > > < > | | < | < < < | | < | < < < < < < < < < | > | | | > > > > > > > > > > | 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 | Fts5Index *p, /* Leave any error code here */ int bGe, /* True for a >= search */ Fts5SegIter *pIter, /* Iterator to seek */ const u8 *pTerm, int nTerm /* Term to search for */ ){ int iOff; const u8 *a = pIter->pLeaf->p; int szLeaf = pIter->pLeaf->szLeaf; int n = pIter->pLeaf->nn; int nMatch = 0; int nKeep = 0; int nNew = 0; int iTerm = 0; int iTermOff; int iPgidx; /* Current offset in pgidx */ int bEndOfPage = 0; assert( p->rc==SQLITE_OK ); iPgidx = szLeaf; iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff); iOff = iTermOff; while( 1 ){ /* Figure out how many new bytes are in this term */ fts5IndexGetVarint32(a, iOff, nNew); if( nKeep<nMatch ){ goto search_failed; } assert( nKeep>=nMatch ); if( nKeep==nMatch ){ int nCmp; int i; nCmp = MIN(nNew, nTerm-nMatch); for(i=0; i<nCmp; i++){ if( a[iOff+i]!=pTerm[nMatch+i] ) break; } nMatch += i; if( nTerm==nMatch ){ if( i==nNew ){ goto search_success; }else{ goto search_failed; } }else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){ goto search_failed; } } if( iPgidx>=n ){ bEndOfPage = 1; break; } iPgidx += fts5GetVarint32(&a[iPgidx], nKeep); iTermOff += nKeep; iOff = iTermOff; /* Read the nKeep field of the next term. */ fts5IndexGetVarint32(a, iOff, nKeep); } search_failed: if( bGe==0 ){ fts5DataRelease(pIter->pLeaf); pIter->pLeaf = 0; return; }else if( bEndOfPage ){ do { iTerm = 0; fts5SegIterNextPage(p, pIter); if( pIter->pLeaf==0 ) return; a = pIter->pLeaf->p; if( fts5LeafIsTermless(pIter->pLeaf)==0 ){ fts5GetVarint32(&pIter->pLeaf->p[pIter->pLeaf->szLeaf], iOff); if( iOff<4 || iOff>=pIter->pLeaf->szLeaf ){ p->rc = FTS5_CORRUPT; }else{ nKeep = 0; iOff += fts5GetVarint32(&a[iOff], nNew); break; } } }while( 1 ); } search_success: pIter->iLeafOffset = iOff + nNew; pIter->iTermLeafOffset = pIter->iLeafOffset; pIter->iTermLeafPgno = pIter->iLeafPgno; fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm); fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]); if( iPgidx>=n ){ pIter->iEndofDoclist = pIter->pLeaf->nn+1; }else{ int nExtra; iPgidx += fts5GetVarint32(&a[iPgidx], nExtra); pIter->iEndofDoclist = iTermOff + nExtra; } pIter->iPgidxOff = iPgidx; fts5SegIterLoadRowid(p, pIter); fts5SegIterLoadNPos(p, pIter); } /* ** Initialize the object pIter to point to term pTerm/nTerm within segment ** pSeg. If there is no such term in the index, the iterator is set to EOF. |
︙ | ︙ | |||
2186 2187 2188 2189 2190 2191 2192 | if( pList ){ Fts5Data *pLeaf; sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z); pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data)); if( pLeaf==0 ) return; pLeaf->p = (u8*)pList; | | > | 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 | if( pList ){ Fts5Data *pLeaf; sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z); pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data)); if( pLeaf==0 ) return; pLeaf->p = (u8*)pList; pLeaf->nn = pLeaf->szLeaf = nList; pIter->pLeaf = pLeaf; pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid); pIter->iEndofDoclist = pLeaf->nn+1; if( flags & FTS5INDEX_QUERY_DESC ){ pIter->flags |= FTS5_SEGITER_REVERSE; fts5SegIterReverseInitPage(p, pIter); }else{ fts5SegIterLoadNPos(p, pIter); } |
︙ | ︙ | |||
2379 2380 2381 2382 2383 2384 2385 | pIter->iLeafPgno = iLeafPgno-1; fts5SegIterNextPage(p, pIter); assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno ); if( p->rc==SQLITE_OK ){ int iOff; u8 *a = pIter->pLeaf->p; | | | | 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 | pIter->iLeafPgno = iLeafPgno-1; fts5SegIterNextPage(p, pIter); assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno ); if( p->rc==SQLITE_OK ){ int iOff; u8 *a = pIter->pLeaf->p; int n = pIter->pLeaf->szLeaf; iOff = fts5LeafFirstRowidOff(pIter->pLeaf); if( iOff<4 || iOff>=n ){ p->rc = FTS5_CORRUPT; }else{ iOff += fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid); pIter->iLeafOffset = iOff; fts5SegIterLoadNPos(p, pIter); } |
︙ | ︙ | |||
2713 2714 2715 2716 2717 2718 2719 | ){ Fts5IndexIter *pNew; pNew = fts5MultiIterAlloc(p, 2); if( pNew ){ Fts5SegIter *pIter = &pNew->aSeg[1]; pIter->flags = FTS5_SEGITER_ONETERM; | | > | 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 | ){ Fts5IndexIter *pNew; pNew = fts5MultiIterAlloc(p, 2); if( pNew ){ Fts5SegIter *pIter = &pNew->aSeg[1]; pIter->flags = FTS5_SEGITER_ONETERM; if( pData->szLeaf>0 ){ pIter->pLeaf = pData; pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid); pIter->iEndofDoclist = pData->nn; pNew->aFirst[1].iFirst = 1; if( bDesc ){ pNew->bRev = 1; pIter->flags |= FTS5_SEGITER_REVERSE; fts5SegIterReverseInitPage(p, pIter); }else{ fts5SegIterLoadNPos(p, pIter); |
︙ | ︙ | |||
2793 2794 2795 2796 2797 2798 2799 | Fts5SegIter *pSeg, /* Poslist of this iterator */ void *pCtx, /* Context pointer for xChunk callback */ void (*xChunk)(Fts5Index*, void*, const u8*, int) ){ int nRem = pSeg->nPos; /* Number of bytes still to come */ Fts5Data *pData = 0; u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset]; | | | | | 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 | Fts5SegIter *pSeg, /* Poslist of this iterator */ void *pCtx, /* Context pointer for xChunk callback */ void (*xChunk)(Fts5Index*, void*, const u8*, int) ){ int nRem = pSeg->nPos; /* Number of bytes still to come */ Fts5Data *pData = 0; u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset]; int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset); int pgno = pSeg->iLeafPgno; int pgnoSave = 0; if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){ pgnoSave = pgno+1; } while( 1 ){ xChunk(p, pCtx, pChunk, nChunk); nRem -= nChunk; fts5DataRelease(pData); if( nRem<=0 ){ break; }else{ pgno++; pData = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno)); if( pData==0 ) break; pChunk = &pData->p[4]; nChunk = MIN(nRem, pData->szLeaf - 4); if( pgno==pgnoSave ){ assert( pSeg->pNextLeaf==0 ); pSeg->pNextLeaf = pData; pData = 0; } } } |
︙ | ︙ | |||
3098 3099 3100 3101 3102 3103 3104 3105 3106 | } static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; Fts5PageWriter *pPage = &pWriter->writer; i64 iRowid; if( pWriter->bFirstTermInPage ){ /* No term was written to this page. */ | > > > > > > | > > > | | > > | 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 | } static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; Fts5PageWriter *pPage = &pWriter->writer; i64 iRowid; assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) ); /* Set the szLeaf header field. */ assert( 0==fts5GetU16(&pPage->buf.p[2]) ); fts5PutU16(&pPage->buf.p[2], pPage->buf.n); if( pWriter->bFirstTermInPage ){ /* No term was written to this page. */ assert( pPage->pgidx.n==0 ); fts5WriteBtreeNoTerm(p, pWriter); }else{ /* Append the pgidx to the page buffer. Set the szLeaf header field. */ fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p); } /* Write the page out to disk */ iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno); fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n); /* Initialize the next page. */ fts5BufferZero(&pPage->buf); fts5BufferZero(&pPage->pgidx); fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero); pPage->iPrevPgidx = 0; pPage->pgno++; /* Increase the leaves written counter */ pWriter->nLeafWritten++; /* The new leaf holds no terms or rowids */ pWriter->bFirstTermInPage = 1; |
︙ | ︙ | |||
3135 3136 3137 3138 3139 3140 3141 3142 | static void fts5WriteAppendTerm( Fts5Index *p, Fts5SegWriter *pWriter, int nTerm, const u8 *pTerm ){ int nPrefix; /* Bytes of prefix compression for term */ Fts5PageWriter *pPage = &pWriter->writer; | > > | > > > > | < < > > | < | > > > > > > > > > | < < < | 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 | static void fts5WriteAppendTerm( Fts5Index *p, Fts5SegWriter *pWriter, int nTerm, const u8 *pTerm ){ int nPrefix; /* Bytes of prefix compression for term */ Fts5PageWriter *pPage = &pWriter->writer; Fts5Buffer *pPgidx = &pWriter->writer.pgidx; if( p->rc ) return; assert( pPage->buf.n>=4 ); assert( pPage->buf.n>4 || pWriter->bFirstTermInPage ); /* If the current leaf page is full, flush it to disk. */ if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){ if( pPage->buf.n>4 ){ fts5WriteFlushLeaf(p, pWriter); } fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING); } /* TODO1: Updating pgidx here. */ pPgidx->n += sqlite3Fts5PutVarint( &pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx ); pPage->iPrevPgidx = pPage->buf.n; #if 0 fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n); pPgidx->n += 2; #endif if( pWriter->bFirstTermInPage ){ nPrefix = 0; if( pPage->pgno!=1 ){ /* This is the first term on a leaf that is not the leftmost leaf in ** the segment b-tree. In this case it is necessary to add a term to ** the b-tree hierarchy that is (a) larger than the largest term ** already written to the segment and (b) smaller than or equal to ** this term. In other words, a prefix of (pTerm/nTerm) that is one |
︙ | ︙ | |||
3190 3191 3192 3193 3194 3195 3196 | pWriter->bFirstTermInPage = 0; pWriter->bFirstRowidInPage = 0; pWriter->bFirstRowidInDoclist = 1; assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) ); pWriter->aDlidx[0].pgno = pPage->pgno; | < < < < < > > > > | 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 | pWriter->bFirstTermInPage = 0; pWriter->bFirstRowidInPage = 0; pWriter->bFirstRowidInDoclist = 1; assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) ); pWriter->aDlidx[0].pgno = pPage->pgno; } /* ** Append a rowid and position-list size field to the writers output. */ static void fts5WriteAppendRowid( Fts5Index *p, Fts5SegWriter *pWriter, i64 iRowid, int nPos ){ if( p->rc==SQLITE_OK ){ Fts5PageWriter *pPage = &pWriter->writer; if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){ fts5WriteFlushLeaf(p, pWriter); } /* If this is to be the first rowid written to the page, set the ** rowid-pointer in the page-header. Also append a value to the dlidx ** buffer, in case a doclist-index is required. */ if( pWriter->bFirstRowidInPage ){ fts5PutU16(pPage->buf.p, pPage->buf.n); fts5WriteDlidxAppend(p, pWriter, iRowid); |
︙ | ︙ | |||
3229 3230 3231 3232 3233 3234 3235 | fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid - pWriter->iPrevRowid); } pWriter->iPrevRowid = iRowid; pWriter->bFirstRowidInDoclist = 0; pWriter->bFirstRowidInPage = 0; fts5BufferAppendVarint(&p->rc, &pPage->buf, nPos); | < < < < | > > | | 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 | fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid - pWriter->iPrevRowid); } pWriter->iPrevRowid = iRowid; pWriter->bFirstRowidInDoclist = 0; pWriter->bFirstRowidInPage = 0; fts5BufferAppendVarint(&p->rc, &pPage->buf, nPos); } } static void fts5WriteAppendPoslistData( Fts5Index *p, Fts5SegWriter *pWriter, const u8 *aData, int nData ){ Fts5PageWriter *pPage = &pWriter->writer; const u8 *a = aData; int n = nData; assert( p->pConfig->pgsz>0 ); while( p->rc==SQLITE_OK && (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz ){ int nReq = p->pConfig->pgsz - pPage->buf.n - pPage->pgidx.n; int nCopy = 0; while( nCopy<nReq ){ i64 dummy; nCopy += fts5GetVarint(&a[nCopy], (u64*)&dummy); } fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a); a += nCopy; |
︙ | ︙ | |||
3275 3276 3277 3278 3279 3280 3281 | /* ** Flush any data cached by the writer object to the database. Free any ** allocations associated with the writer. */ static void fts5WriteFinish( Fts5Index *p, Fts5SegWriter *pWriter, /* Writer object */ | < < < > > > > > > > > > > > > > > | 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 | /* ** Flush any data cached by the writer object to the database. Free any ** allocations associated with the writer. */ static void fts5WriteFinish( Fts5Index *p, Fts5SegWriter *pWriter, /* Writer object */ int *pnLeaf /* OUT: Number of leaf pages in b-tree */ ){ int i; Fts5PageWriter *pLeaf = &pWriter->writer; if( p->rc==SQLITE_OK ){ if( pLeaf->pgno==1 && pLeaf->buf.n==0 ){ *pnLeaf = 0; }else{ if( pLeaf->buf.n>4 ){ fts5WriteFlushLeaf(p, pWriter); } *pnLeaf = pLeaf->pgno-1; fts5WriteFlushBtree(p, pWriter); } } fts5BufferFree(&pLeaf->term); fts5BufferFree(&pLeaf->buf); fts5BufferFree(&pLeaf->pgidx); fts5BufferFree(&pWriter->btterm); for(i=0; i<pWriter->nDlidx; i++){ sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf); } sqlite3_free(pWriter->aDlidx); } static void fts5WriteInit( Fts5Index *p, Fts5SegWriter *pWriter, int iSegid ){ const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING; memset(pWriter, 0, sizeof(Fts5SegWriter)); pWriter->iSegid = iSegid; fts5WriteDlidxGrow(p, pWriter, 1); pWriter->writer.pgno = 1; pWriter->bFirstTermInPage = 1; pWriter->iBtPage = 1; /* Grow the two buffers to pgsz + padding bytes in size. */ fts5BufferGrow(&p->rc, &pWriter->writer.pgidx, nBuffer); fts5BufferGrow(&p->rc, &pWriter->writer.buf, nBuffer); if( p->pIdxWriter==0 ){ Fts5Config *pConfig = p->pConfig; fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf( "INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)", pConfig->zDb, pConfig->zName )); } if( p->rc==SQLITE_OK ){ /* Initialize the 4-byte leaf-page header to 0x00. */ memset(pWriter->writer.buf.p, 0, 4); pWriter->writer.buf.n = 4; /* Bind the current output segment id to the index-writer. This is an ** optimization over binding the same value over and over as rows are ** inserted into %_idx by the current writer. */ sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); } } /* ** Iterator pIter was used to iterate through the input segments of on an ** incremental merge operation. This function is called if the incremental |
︙ | ︙ | |||
3354 3355 3356 3357 3358 3359 3360 | pSeg->pSeg->pgnoLast = 0; pSeg->pSeg->pgnoFirst = 0; }else{ int iOff = pSeg->iTermLeafOffset; /* Offset on new first leaf page */ i64 iLeafRowid; Fts5Data *pData; int iId = pSeg->pSeg->iSegid; | | | > > > > > > > > > > > > > > | > > > > | | 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 | pSeg->pSeg->pgnoLast = 0; pSeg->pSeg->pgnoFirst = 0; }else{ int iOff = pSeg->iTermLeafOffset; /* Offset on new first leaf page */ i64 iLeafRowid; Fts5Data *pData; int iId = pSeg->pSeg->iSegid; u8 aHdr[4] = {0x00, 0x00, 0x00, 0x00}; iLeafRowid = FTS5_SEGMENT_ROWID(iId, pSeg->iTermLeafPgno); pData = fts5DataRead(p, iLeafRowid); if( pData ){ fts5BufferZero(&buf); fts5BufferGrow(&p->rc, &buf, pData->nn); fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr); fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n); fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p); fts5BufferAppendBlob(&p->rc, &buf, pData->szLeaf-iOff, &pData->p[iOff]); if( p->rc==SQLITE_OK ){ /* Set the szLeaf field */ fts5PutU16(&buf.p[2], buf.n); } /* Set up the new page-index array */ fts5BufferAppendVarint(&p->rc, &buf, 4); if( pSeg->iLeafPgno==pSeg->iTermLeafPgno && pSeg->iEndofDoclist<pData->szLeaf ){ int nDiff = pData->szLeaf - pSeg->iEndofDoclist; fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4); fts5BufferAppendBlob(&p->rc, &buf, pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff] ); } fts5DataRelease(pData); pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno; fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 1), iLeafRowid); fts5DataWrite(p, iLeafRowid, buf.p, buf.n); } } } fts5BufferFree(&buf); } |
︙ | ︙ | |||
3466 3467 3468 3469 3470 3471 3472 3473 | pTerm = fts5MultiIterTerm(pIter, &nTerm); if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){ if( pnRem && writer.nLeafWritten>nRem ){ break; } /* This is a new term. Append a term to the output segment. */ if( bRequireDoclistTerm ){ | > | | | 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 | pTerm = fts5MultiIterTerm(pIter, &nTerm); if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){ if( pnRem && writer.nLeafWritten>nRem ){ break; } /* This is a new term. Append a term to the output segment. */ /* TODO2: Doclist 0x00 term */ if( bRequireDoclistTerm ){ /* fts5WriteAppendZerobyte(p, &writer); */ } fts5WriteAppendTerm(p, &writer, nTerm, pTerm); fts5BufferSet(&p->rc, &term, nTerm, pTerm); bRequireDoclistTerm = 1; } /* Append the rowid to the output */ /* WRITEPOSLISTSIZE */ nPos = pSegIter->nPos*2 + pSegIter->bDel; fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter), nPos); /* Append the position-list data to the output */ fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback); } /* Flush the last leaf page to disk. Set the output segment b-tree height ** and last leaf page number at the same time. */ fts5WriteFinish(p, &writer, &pSeg->pgnoLast); if( fts5MultiIterEof(p, pIter) ){ int i; /* Remove the redundant segments from the %_data table */ for(i=0; i<nInput; i++){ fts5DataRemoveSegment(p, pLvl->aSeg[i].iSegid); |
︙ | ︙ | |||
3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 | const int nCrisis = p->pConfig->nCrisisMerge; Fts5Structure *pStruct = *ppStruct; int iLvl = 0; assert( p->rc!=SQLITE_OK || pStruct->nLevel>0 ); while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){ fts5IndexMergeLevel(p, &pStruct, iLvl, 0); fts5StructurePromote(p, iLvl+1, pStruct); iLvl++; } *ppStruct = pStruct; } static int fts5IndexReturn(Fts5Index *p){ | > | 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 | const int nCrisis = p->pConfig->nCrisisMerge; Fts5Structure *pStruct = *ppStruct; int iLvl = 0; assert( p->rc!=SQLITE_OK || pStruct->nLevel>0 ); while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){ fts5IndexMergeLevel(p, &pStruct, iLvl, 0); assert( p->rc!=SQLITE_OK || pStruct->nLevel>(iLvl+1) ); fts5StructurePromote(p, iLvl+1, pStruct); iLvl++; } *ppStruct = pStruct; } static int fts5IndexReturn(Fts5Index *p){ |
︙ | ︙ | |||
3637 3638 3639 3640 3641 3642 3643 | ** in a 32-bit integer. Return the size of the largest prefix of this ** list nMax bytes or less in size. */ static int fts5PoslistPrefix(const u8 *aBuf, int nMax){ int ret; u32 dummy; ret = fts5GetVarint32(aBuf, dummy); | > | | | | > | 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 | ** in a 32-bit integer. Return the size of the largest prefix of this ** list nMax bytes or less in size. */ static int fts5PoslistPrefix(const u8 *aBuf, int nMax){ int ret; u32 dummy; ret = fts5GetVarint32(aBuf, dummy); if( ret<nMax ){ while( 1 ){ int i = fts5GetVarint32(&aBuf[ret], dummy); if( (ret + i) > nMax ) break; ret += i; } } return ret; } #define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob) { \ assert( pBuf->nSpace>=(pBuf->n+nBlob) ); \ memcpy(&pBuf->p[pBuf->n], pBlob, nBlob); \ |
︙ | ︙ | |||
3673 3674 3675 3676 3677 3678 3679 | pStruct = fts5StructureRead(p); iSegid = fts5AllocateSegid(p, pStruct); if( iSegid ){ const int pgsz = p->pConfig->pgsz; Fts5StructureSegment *pSeg; /* New segment within pStruct */ | < > < < < > | > > > > < < < > < | < < < < < < < < | < < < < < | < < < < < < < < < < < < < < < < | < < < < < | < | | 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 | pStruct = fts5StructureRead(p); iSegid = fts5AllocateSegid(p, pStruct); if( iSegid ){ const int pgsz = p->pConfig->pgsz; Fts5StructureSegment *pSeg; /* New segment within pStruct */ Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */ Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */ const u8 *zPrev = 0; Fts5SegWriter writer; fts5WriteInit(p, &writer, iSegid); pBuf = &writer.writer.buf; pPgidx = &writer.writer.pgidx; /* fts5WriteInit() should have initialized the buffers to (most likely) ** the maximum space required. */ assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) ); assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) ); /* Begin scanning through hash table entries. This loop runs once for each ** term/doclist currently stored within the hash table. */ if( p->rc==SQLITE_OK ){ p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0); } while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){ const char *zTerm; /* Buffer containing term */ const u8 *pDoclist; /* Pointer to doclist for this term */ int nDoclist; /* Size of doclist in bytes */ int nSuffix; /* Size of term suffix */ /* Write the term for this entry to disk. */ sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist); fts5WriteAppendTerm(p, &writer, strlen(zTerm), zTerm); if( writer.bFirstRowidInPage==0 && pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){ /* The entire doclist will fit on the current leaf. */ fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist); }else{ i64 iRowid = 0; i64 iDelta = 0; int iOff = 0; /* writer.bFirstRowidInPage = 0; */ /* The entire doclist will not fit on this leaf. The following ** loop iterates through the poslists that make up the current ** doclist. */ while( p->rc==SQLITE_OK && iOff<nDoclist ){ int nPos; int nCopy; |
︙ | ︙ | |||
3773 3774 3775 3776 3777 3778 3779 | writer.bFirstRowidInPage = 0; fts5WriteDlidxAppend(p, &writer, iRowid); }else{ pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iDelta); } assert( pBuf->n<=pBuf->nSpace ); | | | | < > | | < | 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 | writer.bFirstRowidInPage = 0; fts5WriteDlidxAppend(p, &writer, iRowid); }else{ pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iDelta); } assert( pBuf->n<=pBuf->nSpace ); if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){ /* The entire poslist will fit on the current leaf. So copy ** it in one go. */ fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy); }else{ /* The entire poslist will not fit on this leaf. So it needs ** to be broken into sections. The only qualification being ** that each varint must be stored contiguously. */ const u8 *pPoslist = &pDoclist[iOff]; int iPos = 0; while( p->rc==SQLITE_OK ){ int nSpace = pgsz - pBuf->n - pPgidx->n; int n = 0; if( (nCopy - iPos)<=nSpace ){ n = nCopy - iPos; }else{ n = fts5PoslistPrefix(&pPoslist[iPos], nSpace); } assert( n>0 ); fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n); iPos += n; if( (pBuf->n + pPgidx->n)>=pgsz ){ fts5WriteFlushLeaf(p, &writer); } if( iPos>=nCopy ) break; } } iOff += nCopy; } } /* TODO2: Doclist terminator written here. */ /* pBuf->p[pBuf->n++] = '\0'; */ assert( pBuf->n<=pBuf->nSpace ); zPrev = (const u8*)zTerm; sqlite3Fts5HashScanNext(pHash); } sqlite3Fts5HashClear(pHash); fts5WriteFinish(p, &writer, &pgnoLast); /* Update the Fts5Structure. It is written back to the database by the ** fts5StructureRelease() call below. */ if( pStruct->nLevel==0 ){ fts5StructureAddLevel(&p->rc, &pStruct); } fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0); if( p->rc==SQLITE_OK ){ pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ]; pSeg->iSegid = iSegid; pSeg->pgnoFirst = 1; pSeg->pgnoLast = pgnoLast; pStruct->nSegment++; } fts5StructurePromote(p, 0, pStruct); } |
︙ | ︙ | |||
3924 3925 3926 3927 3928 3929 3930 | } static void fts5PoslistCallback( Fts5Index *p, void *pCtx, const u8 *pChunk, int nChunk ){ | > > | > | 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 | } static void fts5PoslistCallback( Fts5Index *p, void *pCtx, const u8 *pChunk, int nChunk ){ assert_nc( nChunk>=0 ); if( nChunk>0 ){ fts5BufferAppendBlob(&p->rc, (Fts5Buffer*)pCtx, nChunk, pChunk); } } /* ** Iterator pIter currently points to a valid entry (not EOF). This ** function appends the position list data for the current entry to ** buffer pBuf. It does not make a copy of the position-list size ** field. |
︙ | ︙ | |||
4159 4160 4161 4162 4163 4164 4165 | fts5BufferFree(&aBuf[i]); } fts5MultiIterFree(p, p1); pData = fts5IdxMalloc(p, sizeof(Fts5Data) + doclist.n); if( pData ){ pData->p = (u8*)&pData[1]; | | | 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 | fts5BufferFree(&aBuf[i]); } fts5MultiIterFree(p, p1); pData = fts5IdxMalloc(p, sizeof(Fts5Data) + doclist.n); if( pData ){ pData->p = (u8*)&pData[1]; pData->nn = pData->szLeaf = doclist.n; memcpy(pData->p, doclist.p, doclist.n); fts5MultiIterNew2(p, pData, bDesc, ppIter); } fts5BufferFree(&doclist); } fts5StructureRelease(pStruct); |
︙ | ︙ | |||
4389 4390 4391 4392 4393 4394 4395 | || (flags & FTS5INDEX_QUERY_SCAN)==FTS5INDEX_QUERY_SCAN ); if( sqlite3Fts5BufferGrow(&p->rc, &buf, nToken+1)==0 ){ memcpy(&buf.p[1], pToken, nToken); #ifdef SQLITE_DEBUG | > > > > > | | 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 | || (flags & FTS5INDEX_QUERY_SCAN)==FTS5INDEX_QUERY_SCAN ); if( sqlite3Fts5BufferGrow(&p->rc, &buf, nToken+1)==0 ){ memcpy(&buf.p[1], pToken, nToken); #ifdef SQLITE_DEBUG /* If the QUERY_TEST_NOIDX flag was specified, then this must be a ** prefix-query. Instead of using a prefix-index (if one exists), ** evaluate the prefix query using the main FTS index. This is used ** for internal sanity checking by the integrity-check in debug ** mode only. */ if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){ assert( flags & FTS5INDEX_QUERY_PREFIX ); iIdx = 1+pConfig->nPrefix; }else #endif if( flags & FTS5INDEX_QUERY_PREFIX ){ int nChar = fts5IndexCharlen(pToken, nToken); for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){ |
︙ | ︙ | |||
4509 4510 4511 4512 4513 4514 4515 | int *pn, /* OUT: Size of position-list in bytes */ i64 *piRowid /* OUT: Current rowid */ ){ Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; assert( pIter->pIndex->rc==SQLITE_OK ); *piRowid = pSeg->iRowid; *pn = pSeg->nPos; | | | 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 | int *pn, /* OUT: Size of position-list in bytes */ i64 *piRowid /* OUT: Current rowid */ ){ Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; assert( pIter->pIndex->rc==SQLITE_OK ); *piRowid = pSeg->iRowid; *pn = pSeg->nPos; if( pSeg->iLeafOffset+pSeg->nPos <= pSeg->pLeaf->szLeaf ){ *pp = &pSeg->pLeaf->p[pSeg->iLeafOffset]; }else{ fts5BufferZero(&pIter->poslist); fts5SegiterPoslist(pIter->pIndex, pSeg, &pIter->poslist); *pp = pIter->poslist.p; } return fts5IndexReturn(pIter->pIndex); |
︙ | ︙ | |||
4557 4558 4559 4560 4561 4562 4563 | int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){ int nCol = p->pConfig->nCol; Fts5Data *pData; *pnRow = 0; memset(anSize, 0, sizeof(i64) * nCol); pData = fts5DataRead(p, FTS5_AVERAGES_ROWID); | | | | 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 | int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){ int nCol = p->pConfig->nCol; Fts5Data *pData; *pnRow = 0; memset(anSize, 0, sizeof(i64) * nCol); pData = fts5DataRead(p, FTS5_AVERAGES_ROWID); if( p->rc==SQLITE_OK && pData->nn ){ int i = 0; int iCol; i += fts5GetVarint(&pData->p[i], (u64*)pnRow); for(iCol=0; i<pData->nn && iCol<nCol; iCol++){ i += fts5GetVarint(&pData->p[i], (u64*)&anSize[iCol]); } } fts5DataRelease(pData); return fts5IndexReturn(p); } |
︙ | ︙ | |||
4766 4767 4768 4769 4770 4771 4772 | if( rc==SQLITE_OK ){ int f = flags|FTS5INDEX_QUERY_DESC; rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); } if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; /* If this is a prefix query, check that the results returned if the | | > > > > > > | | | | | | | | | | | > | 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 | if( rc==SQLITE_OK ){ int f = flags|FTS5INDEX_QUERY_DESC; rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); } if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; /* If this is a prefix query, check that the results returned if the ** the index is disabled are the same. In both ASC and DESC order. ** ** This check may only be performed if the hash table is empty. This ** is because the hash table only supports a single scan query at ** a time, and the multi-iter loop from which this function is called ** is already performing such a scan. */ if( p->nPendingData==0 ){ if( iIdx>0 && rc==SQLITE_OK ){ int f = flags|FTS5INDEX_QUERY_TEST_NOIDX; ck2 = 0; rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; } if( iIdx>0 && rc==SQLITE_OK ){ int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC; ck2 = 0; rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; } } cksum3 ^= ck1; fts5BufferSet(&rc, pPrev, n, (const u8*)z); if( rc==SQLITE_OK && cksum3!=expected ){ rc = FTS5_CORRUPT; |
︙ | ︙ | |||
4816 4817 4818 4819 4820 4821 4822 | int iLast ){ int i; /* Now check that the iter.nEmpty leaves following the current leaf ** (a) exist and (b) contain no terms. */ for(i=iFirst; p->rc==SQLITE_OK && i<=iLast; i++){ | | | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 | int iLast ){ int i; /* Now check that the iter.nEmpty leaves following the current leaf ** (a) exist and (b) contain no terms. */ for(i=iFirst; p->rc==SQLITE_OK && i<=iLast; i++){ Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, i)); if( pLeaf ){ if( !fts5LeafIsTermless(pLeaf) ) p->rc = FTS5_CORRUPT; if( i>=iNoRowid && 0!=fts5LeafFirstRowidOff(pLeaf) ) p->rc = FTS5_CORRUPT; } fts5DataRelease(pLeaf); if( p->rc ) break; } } static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){ int nPg = (pLeaf->nn - pLeaf->szLeaf) / 2; int iTermOff = 0; int ii; Fts5Buffer buf1 = {0,0,0}; Fts5Buffer buf2 = {0,0,0}; ii = pLeaf->szLeaf; while( ii<pLeaf->nn && p->rc==SQLITE_OK ){ int res; int iOff; int nIncr; ii += fts5GetVarint32(&pLeaf->p[ii], nIncr); iTermOff += nIncr; iOff = iTermOff; if( iOff>=pLeaf->szLeaf ){ p->rc = FTS5_CORRUPT; }else if( iTermOff==nIncr ){ int nByte; iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte); if( (iOff+nByte)>pLeaf->szLeaf ){ p->rc = FTS5_CORRUPT; }else{ fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff]); } }else{ int nKeep, nByte; iOff += fts5GetVarint32(&pLeaf->p[iOff], nKeep); iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte); if( nKeep>buf1.n || (iOff+nByte)>pLeaf->szLeaf ){ p->rc = FTS5_CORRUPT; }else{ buf1.n = nKeep; fts5BufferAppendBlob(&p->rc, &buf1, nByte, &pLeaf->p[iOff]); } if( p->rc==SQLITE_OK ){ res = fts5BufferCompare(&buf1, &buf2); if( res<=0 ) p->rc = FTS5_CORRUPT; } } fts5BufferSet(&p->rc, &buf2, buf1.n, buf1.p); } fts5BufferFree(&buf1); fts5BufferFree(&buf2); } static void fts5IndexIntegrityCheckSegment( Fts5Index *p, /* FTS5 backend object */ Fts5StructureSegment *pSeg /* Segment to check internal consistency */ ){ Fts5Config *pConfig = p->pConfig; sqlite3_stmt *pStmt = 0; |
︙ | ︙ | |||
4847 4848 4849 4850 4851 4852 4853 | pConfig->zDb, pConfig->zName, pSeg->iSegid )); /* Iterate through the b-tree hierarchy. */ while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ i64 iRow; /* Rowid for this leaf */ Fts5Data *pLeaf; /* Data for this leaf */ | < | | < > | > | > > | 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 | pConfig->zDb, pConfig->zName, pSeg->iSegid )); /* Iterate through the b-tree hierarchy. */ while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ i64 iRow; /* Rowid for this leaf */ Fts5Data *pLeaf; /* Data for this leaf */ int nIdxTerm = sqlite3_column_bytes(pStmt, 1); const char *zIdxTerm = (const char*)sqlite3_column_text(pStmt, 1); int iIdxLeaf = sqlite3_column_int(pStmt, 2); int bIdxDlidx = sqlite3_column_int(pStmt, 3); /* If the leaf in question has already been trimmed from the segment, ** ignore this b-tree entry. Otherwise, load it into memory. */ if( iIdxLeaf<pSeg->pgnoFirst ) continue; iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf); pLeaf = fts5DataRead(p, iRow); if( pLeaf==0 ) break; /* Check that the leaf contains at least one term, and that it is equal ** to or larger than the split-key in zIdxTerm. Also check that if there ** is also a rowid pointer within the leaf page header, it points to a ** location before the term. */ if( pLeaf->nn<=pLeaf->szLeaf ){ p->rc = FTS5_CORRUPT; }else{ int iOff; /* Offset of first term on leaf */ int iRowidOff; /* Offset of first rowid on leaf */ int nTerm; /* Size of term on leaf in bytes */ int res; /* Comparison of term and split-key */ iOff = fts5LeafFirstTermOff(pLeaf); iRowidOff = fts5LeafFirstRowidOff(pLeaf); if( iRowidOff>=iOff ){ p->rc = FTS5_CORRUPT; }else{ iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm); res = memcmp(&pLeaf->p[iOff], zIdxTerm, MIN(nTerm, nIdxTerm)); if( res==0 ) res = nTerm - nIdxTerm; if( res<0 ) p->rc = FTS5_CORRUPT; } fts5IntegrityCheckPgidx(p, pLeaf); } fts5DataRelease(pLeaf); if( p->rc ) break; /* Now check that the iter.nEmpty leaves following the current leaf ** (a) exist and (b) contain no terms. */ |
︙ | ︙ | |||
4909 4910 4911 4912 4913 4914 4915 | for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iIdxLeaf); fts5DlidxIterEof(p, pDlidx)==0; fts5DlidxIterNext(p, pDlidx) ){ /* Check any rowid-less pages that occur before the current leaf. */ for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){ | | | | | > | | 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 | for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iIdxLeaf); fts5DlidxIterEof(p, pDlidx)==0; fts5DlidxIterNext(p, pDlidx) ){ /* Check any rowid-less pages that occur before the current leaf. */ for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){ iKey = FTS5_SEGMENT_ROWID(iSegid, iPg); pLeaf = fts5DataRead(p, iKey); if( pLeaf ){ if( fts5LeafFirstRowidOff(pLeaf)!=0 ) p->rc = FTS5_CORRUPT; fts5DataRelease(pLeaf); } } iPrevLeaf = fts5DlidxIterPgno(pDlidx); /* Check that the leaf page indicated by the iterator really does ** contain the rowid suggested by the same. */ iKey = FTS5_SEGMENT_ROWID(iSegid, iPrevLeaf); pLeaf = fts5DataRead(p, iKey); if( pLeaf ){ i64 iRowid; int iRowidOff = fts5LeafFirstRowidOff(pLeaf); ASSERT_SZLEAF_OK(pLeaf); if( iRowidOff>=pLeaf->szLeaf ){ p->rc = FTS5_CORRUPT; }else{ fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid); if( iRowid!=fts5DlidxIterRowid(pDlidx) ) p->rc = FTS5_CORRUPT; } fts5DataRelease(pLeaf); } |
︙ | ︙ | |||
5126 5127 5128 5129 5130 5131 5132 | for(iLvl=0; iLvl<p->nLevel; iLvl++){ Fts5StructureLevel *pLvl = &p->aLevel[iLvl]; sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {lvl=%d nMerge=%d nSeg=%d", iLvl, pLvl->nMerge, pLvl->nSeg ); for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){ Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; | | < | | 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 | for(iLvl=0; iLvl<p->nLevel; iLvl++){ Fts5StructureLevel *pLvl = &p->aLevel[iLvl]; sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {lvl=%d nMerge=%d nSeg=%d", iLvl, pLvl->nMerge, pLvl->nSeg ); for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){ Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d leaves=%d..%d}", pSeg->iSegid, pSeg->pgnoFirst, pSeg->pgnoLast ); } sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}"); } } /* |
︙ | ︙ | |||
5189 5190 5191 5192 5193 5194 5195 | ** ** The return value is the number of bytes read from the input buffer. */ static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ i64 iDocid; int iOff = 0; | > | | > | | 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 | ** ** The return value is the number of bytes read from the input buffer. */ static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ i64 iDocid; int iOff = 0; if( n>0 ){ iOff = sqlite3Fts5GetVarint(a, (u64*)&iDocid); sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid); } while( iOff<n ){ int nPos; int bDummy; iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDummy); iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos)); if( iOff<n ){ i64 iDelta; iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta); if( iDelta==0 ) return iOff; iDocid += iDelta; sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid); } } return iOff; } /* |
︙ | ︙ | |||
5227 5228 5229 5230 5231 5232 5233 5234 5235 | Fts5Buffer s; /* Build up text to return here */ int rc = SQLITE_OK; /* Return code */ int nSpace = 0; assert( nArg==2 ); memset(&s, 0, sizeof(Fts5Buffer)); iRowid = sqlite3_value_int64(apVal[0]); n = sqlite3_value_bytes(apVal[1]); aBlob = sqlite3_value_blob(apVal[1]); | > > > > < > > | | > > > | | < < < > > > > > | | > > | | | | > | | | > > > > | > > > | | > | > > > > > | | > > | | < < | | | 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 | Fts5Buffer s; /* Build up text to return here */ int rc = SQLITE_OK; /* Return code */ int nSpace = 0; assert( nArg==2 ); memset(&s, 0, sizeof(Fts5Buffer)); iRowid = sqlite3_value_int64(apVal[0]); /* Make a copy of the second argument (a blob) in aBlob[]. The aBlob[] ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents ** buffer overreads even if the record is corrupt. */ n = sqlite3_value_bytes(apVal[1]); aBlob = sqlite3_value_blob(apVal[1]); nSpace = n + FTS5_DATA_ZERO_PADDING; a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace); if( a==0 ) goto decode_out; memcpy(a, aBlob, n); fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno); fts5DebugRowid(&rc, &s, iRowid); if( bDlidx ){ Fts5Data dlidx; Fts5DlidxLvl lvl; dlidx.p = a; dlidx.nn = n; memset(&lvl, 0, sizeof(Fts5DlidxLvl)); lvl.pData = &dlidx; lvl.iLeafPgno = iPgno; for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){ sqlite3Fts5BufferAppendPrintf(&rc, &s, " %d(%lld)", lvl.iLeafPgno, lvl.iRowid ); } }else if( iSegid==0 ){ if( iRowid==FTS5_AVERAGES_ROWID ){ /* todo */ }else{ fts5DecodeStructure(&rc, &s, a, n); } }else{ Fts5Buffer term; /* Current term read from page */ int szLeaf; /* Offset of pgidx in a[] */ int iPgidxOff; int iPgidxPrev = 0; /* Previous value read from pgidx */ int iTermOff = 0; int iRowidOff = 0; int iOff; int nDoclist; memset(&term, 0, sizeof(Fts5Buffer)); if( n<4 ){ sqlite3Fts5BufferSet(&rc, &s, 8, (const u8*)"corrupt"); goto decode_out; }else{ iRowidOff = fts5GetU16(&a[0]); iPgidxOff = szLeaf = fts5GetU16(&a[2]); if( iPgidxOff<n ){ fts5GetVarint32(&a[iPgidxOff], iTermOff); } } /* Decode the position list tail at the start of the page */ if( iRowidOff!=0 ){ iOff = iRowidOff; }else if( iTermOff!=0 ){ iOff = iTermOff; }else{ iOff = szLeaf; } fts5DecodePoslist(&rc, &s, &a[4], iOff-4); /* Decode any more doclist data that appears on the page before the ** first term. */ nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff; fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist); while( iPgidxOff<n ){ int bFirst = (iPgidxOff==szLeaf); /* True for first term on page */ int nByte; /* Bytes of data */ int iEnd; iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nByte); iPgidxPrev += nByte; iOff = iPgidxPrev; if( iPgidxOff<n ){ fts5GetVarint32(&a[iPgidxOff], nByte); iEnd = iPgidxPrev + nByte; }else{ iEnd = szLeaf; } if( bFirst==0 ){ iOff += fts5GetVarint32(&a[iOff], nByte); term.n = nByte; } iOff += fts5GetVarint32(&a[iOff], nByte); fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]); iOff += nByte; sqlite3Fts5BufferAppendPrintf( &rc, &s, " term=%.*s", term.n, (const char*)term.p ); iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff); } fts5BufferFree(&term); } decode_out: sqlite3_free(a); if( rc==SQLITE_OK ){ sqlite3_result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT); |
︙ | ︙ | |||
5335 5336 5337 5338 5339 5340 5341 | if( nArg==0 ){ sqlite3_result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1); }else{ zArg = (const char*)sqlite3_value_text(apVal[0]); if( 0==sqlite3_stricmp(zArg, "segment") ){ i64 iRowid; int segid, height, pgno; | | | < | | | | < < | 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 | if( nArg==0 ){ sqlite3_result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1); }else{ zArg = (const char*)sqlite3_value_text(apVal[0]); if( 0==sqlite3_stricmp(zArg, "segment") ){ i64 iRowid; int segid, height, pgno; if( nArg!=3 ){ sqlite3_result_error(pCtx, "should be: fts5_rowid('segment', segid, pgno))", -1 ); }else{ segid = sqlite3_value_int(apVal[1]); pgno = sqlite3_value_int(apVal[2]); iRowid = FTS5_SEGMENT_ROWID(segid, pgno); sqlite3_result_int64(pCtx, iRowid); } }else{ sqlite3_result_error(pCtx, "first arg to fts5_rowid() must be 'segment'" , -1 ); } } } /* ** This is called as part of registering the FTS5 module with database |
︙ | ︙ |
Changes to ext/fts5/fts5_main.c.
︙ | ︙ | |||
1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 | }else if( 0==sqlite3_stricmp("optimize", z) ){ rc = sqlite3Fts5StorageOptimize(pTab->pStorage); }else if( 0==sqlite3_stricmp("merge", z) ){ int nMerge = sqlite3_value_int(pVal); rc = sqlite3Fts5StorageMerge(pTab->pStorage, nMerge); }else if( 0==sqlite3_stricmp("integrity-check", z) ){ rc = sqlite3Fts5StorageIntegrity(pTab->pStorage); }else{ rc = sqlite3Fts5IndexLoadConfig(pTab->pIndex); if( rc==SQLITE_OK ){ rc = sqlite3Fts5ConfigSetValue(pTab->pConfig, z, pVal, &bError); } if( rc==SQLITE_OK ){ if( bError ){ | > > > > | 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 | }else if( 0==sqlite3_stricmp("optimize", z) ){ rc = sqlite3Fts5StorageOptimize(pTab->pStorage); }else if( 0==sqlite3_stricmp("merge", z) ){ int nMerge = sqlite3_value_int(pVal); rc = sqlite3Fts5StorageMerge(pTab->pStorage, nMerge); }else if( 0==sqlite3_stricmp("integrity-check", z) ){ rc = sqlite3Fts5StorageIntegrity(pTab->pStorage); #ifdef SQLITE_DEBUG }else if( 0==sqlite3_stricmp("prefix-index", z) ){ pConfig->bPrefixIndex = sqlite3_value_int(pVal); #endif }else{ rc = sqlite3Fts5IndexLoadConfig(pTab->pIndex); if( rc==SQLITE_OK ){ rc = sqlite3Fts5ConfigSetValue(pTab->pConfig, z, pVal, &bError); } if( rc==SQLITE_OK ){ if( bError ){ |
︙ | ︙ |
Changes to ext/fts5/test/fts5aa.test.
︙ | ︙ | |||
47 48 49 50 51 52 53 | } do_execsql_test 2.1 { INSERT INTO t1 VALUES('a b c', 'd e f'); } do_test 2.2 { execsql { SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 } | | | 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | } do_execsql_test 2.1 { INSERT INTO t1 VALUES('a b c', 'd e f'); } do_test 2.2 { execsql { SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 } } {/{{structure} {lvl=0 nMerge=0 nSeg=1 {id=[0123456789]* leaves=1..1}}}/} foreach w {a b c d e f} { do_execsql_test 2.3.$w.asc { SELECT rowid FROM t1 WHERE t1 MATCH $w; } {1} do_execsql_test 2.3.$w.desc { SELECT rowid FROM t1 WHERE t1 MATCH $w ORDER BY rowid DESC; |
︙ | ︙ | |||
135 136 137 138 139 140 141 | do_execsql_test 5.$i.1 { INSERT INTO t1 VALUES($x, $y) } do_execsql_test 5.$i.2 { INSERT INTO t1(t1) VALUES('integrity-check') } if {[set_test_counter errors]} break } #------------------------------------------------------------------------- # | < | 135 136 137 138 139 140 141 142 143 144 145 146 147 148 | do_execsql_test 5.$i.1 { INSERT INTO t1 VALUES($x, $y) } do_execsql_test 5.$i.2 { INSERT INTO t1(t1) VALUES('integrity-check') } if {[set_test_counter errors]} break } #------------------------------------------------------------------------- # reset_db do_execsql_test 6.0 { CREATE VIRTUAL TABLE t1 USING fts5(x,y); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } do_execsql_test 6.1 { |
︙ | ︙ | |||
197 198 199 200 201 202 203 204 205 206 207 208 209 210 | set y [doc] set z [doc] set rowid [expr int(rand() * 100)] execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) } } execsql { INSERT INTO t1(t1) VALUES('integrity-check'); } } {} } #------------------------------------------------------------------------- # reset_db do_execsql_test 8.0 { CREATE VIRTUAL TABLE t1 USING fts5(x, prefix="1,2,3"); | > | 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 | set y [doc] set z [doc] set rowid [expr int(rand() * 100)] execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) } } execsql { INSERT INTO t1(t1) VALUES('integrity-check'); } } {} if {[set_test_counter errors]} break } #------------------------------------------------------------------------- # reset_db do_execsql_test 8.0 { CREATE VIRTUAL TABLE t1 USING fts5(x, prefix="1,2,3"); |
︙ | ︙ |
Changes to ext/fts5/test/fts5ad.test.
︙ | ︙ | |||
201 202 203 204 205 206 207 208 209 210 211 212 213 214 | } } if {$bMatch} { lappend ret $rowid } } return $ret } foreach {bAsc sql} { 1 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix} 0 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix ORDER BY rowid DESC} } { foreach {tn prefix} { 1 {a*} 2 {ab*} 3 {abc*} 4 {abcd*} 5 {abcde*} | > > > | 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 | } } if {$bMatch} { lappend ret $rowid } } return $ret } do_execsql_test $T.integrity { INSERT INTO t1(t1) VALUES('integrity-check'); } foreach {bAsc sql} { 1 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix} 0 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix ORDER BY rowid DESC} } { foreach {tn prefix} { 1 {a*} 2 {ab*} 3 {abc*} 4 {abcd*} 5 {abcde*} |
︙ | ︙ |
Changes to ext/fts5/test/fts5al.test.
︙ | ︙ | |||
22 23 24 25 26 27 28 | finish_test return } do_execsql_test 1.1 { CREATE VIRTUAL TABLE ft1 USING fts5(x); SELECT * FROM ft1_config; | | | | | 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 | finish_test return } do_execsql_test 1.1 { CREATE VIRTUAL TABLE ft1 USING fts5(x); SELECT * FROM ft1_config; } {version 4} do_execsql_test 1.2 { INSERT INTO ft1(ft1, rank) VALUES('pgsz', 32); SELECT * FROM ft1_config; } {pgsz 32 version 4} do_execsql_test 1.3 { INSERT INTO ft1(ft1, rank) VALUES('pgsz', 64); SELECT * FROM ft1_config; } {pgsz 64 version 4} #-------------------------------------------------------------------------- # Test the logic for parsing the rank() function definition. # foreach {tn defn} { 1 "fname()" 2 "fname(1)" |
︙ | ︙ |
Changes to ext/fts5/test/fts5corrupt.test.
︙ | ︙ | |||
39 40 41 42 43 44 45 | db_save do_execsql_test 1.2 { INSERT INTO t1(t1) VALUES('integrity-check') } set segid [lindex [fts5_level_segids t1] 0] do_test 1.3 { execsql { | | | | 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 | db_save do_execsql_test 1.2 { INSERT INTO t1(t1) VALUES('integrity-check') } set segid [lindex [fts5_level_segids t1] 0] do_test 1.3 { execsql { DELETE FROM t1_data WHERE rowid = fts5_rowid('segment', $segid, 4); } catchsql { INSERT INTO t1(t1) VALUES('integrity-check') } } {1 {database disk image is malformed}} do_test 1.4 { db_restore_and_reopen execsql { UPDATE t1_data set block = X'00000000' || substr(block, 5) WHERE rowid = fts5_rowid('segment', $segid, 4); } catchsql { INSERT INTO t1(t1) VALUES('integrity-check') } } {1 {database disk image is malformed}} db_restore_and_reopen #db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r} |
︙ | ︙ |
Changes to ext/fts5/test/fts5corrupt2.test.
︙ | ︙ | |||
205 206 207 208 209 210 211 | if {$res == "1 {database disk image is malformed}"} {incr nCorrupt} set {} 1 } {1} execsql ROLLBACK } | | | | | | | 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 | if {$res == "1 {database disk image is malformed}"} {incr nCorrupt} set {} 1 } {1} execsql ROLLBACK } # do_test 4.$tn.x { expr $nCorrupt>0 } 1 } } set doc [string repeat "A B C " 1000] do_execsql_test 5.0 { CREATE VIRTUAL TABLE x5 USING fts5(tt); INSERT INTO x5(x5, rank) VALUES('pgsz', 32); WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<10) INSERT INTO x5 SELECT $doc FROM ii; } foreach {tn hdr} { 1 "\x00\x01" } { set tn2 0 set nCorrupt 0 foreach rowid [db eval {SELECT rowid FROM x5_data WHERE rowid>10}] { if {$rowid & $mask} continue incr tn2 do_test 5.$tn.$tn2 { execsql BEGIN set fd [db incrblob main x5_data block $rowid] fconfigure $fd -encoding binary -translation binary puts -nonewline $fd $hdr close $fd catchsql { INSERT INTO x5(x5) VALUES('integrity-check') } set {} {} } {} execsql ROLLBACK } } #-------------------------------------------------------------------- reset_db do_execsql_test 6.1 { CREATE VIRTUAL TABLE x5 USING fts5(tt); INSERT INTO x5 VALUES('a'); INSERT INTO x5 VALUES('a a'); INSERT INTO x5 VALUES('a a a'); INSERT INTO x5 VALUES('a a a a'); UPDATE x5_docsize SET sz = X'' WHERE id=3; } proc colsize {cmd i} { $cmd xColumnSize $i } sqlite3_fts5_create_function db colsize colsize do_catchsql_test 6.2 { SELECT colsize(x5, 0) FROM x5 WHERE x5 MATCH 'a' } {1 SQLITE_CORRUPT_VTAB} sqlite3_fts5_may_be_corrupt 0 finish_test |
Changes to ext/fts5/test/fts5rowid.test.
︙ | ︙ | |||
23 24 25 26 27 28 29 | do_catchsql_test 1.1 { SELECT fts5_rowid() } {1 {should be: fts5_rowid(subject, ....)}} do_catchsql_test 1.2 { SELECT fts5_rowid('segment') | | | | | | 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | do_catchsql_test 1.1 { SELECT fts5_rowid() } {1 {should be: fts5_rowid(subject, ....)}} do_catchsql_test 1.2 { SELECT fts5_rowid('segment') } {1 {should be: fts5_rowid('segment', segid, pgno))}} do_execsql_test 1.3 { SELECT fts5_rowid('segment', 1, 1) } {137438953473} do_catchsql_test 1.4 { SELECT fts5_rowid('nosucharg'); } {1 {first arg to fts5_rowid() must be 'segment'}} #------------------------------------------------------------------------- # Tests of the fts5_decode() function. # reset_db do_execsql_test 2.1 { |
︙ | ︙ |
Added ext/fts5/test/fts5simple.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 | # 2015 September 05 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5simple # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } if 1 { #------------------------------------------------------------------------- # set doc "x x [string repeat {y } 50]z z" do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(x); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); BEGIN; INSERT INTO t1 VALUES($doc); COMMIT; } do_execsql_test 1.1 { INSERT INTO t1(t1) VALUES('integrity-check'); } #------------------------------------------------------------------------- # reset_db do_execsql_test 2.0 { CREATE VIRTUAL TABLE t1 USING fts5(x); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); INSERT INTO t1 VALUES('a b c'); INSERT INTO t1 VALUES('d e f'); INSERT INTO t1(t1) VALUES('optimize'); } do_execsql_test 2.1 { INSERT INTO t1(t1) VALUES('integrity-check'); } {} #------------------------------------------------------------------------- # reset_db do_execsql_test 3.0 { CREATE VIRTUAL TABLE t1 USING fts5(x, prefix='1,2'); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); BEGIN; INSERT INTO t1 VALUES('one'); SELECT * FROM t1 WHERE t1 MATCH 'o*'; } {one} do_execsql_test 3.1 { INSERT INTO t1(t1) VALUES('integrity-check'); } {} #------------------------------------------------------------------------- reset_db do_execsql_test 4.1 { CREATE VIRTUAL TABLE t11 USING fts5(content); INSERT INTO t11(t11, rank) VALUES('pgsz', 32); INSERT INTO t11 VALUES('another'); INSERT INTO t11 VALUES('string'); INSERT INTO t11 VALUES('of'); INSERT INTO t11 VALUES('text'); } do_test 4.2 { execsql { INSERT INTO t11(t11) VALUES('optimize') } } {} do_execsql_test 4.3 { INSERT INTO t11(t11) VALUES('integrity-check'); } {} #db eval { SELECT fts5_decode(rowid, block) as x FROM t11_data } { puts $x } #------------------------------------------------------------------------- reset_db set doc [string repeat "x y " 5] do_execsql_test 5.1 { CREATE VIRTUAL TABLE yy USING fts5(content); INSERT INTO yy(yy, rank) VALUES('pgsz', 32); BEGIN; INSERT INTO yy VALUES($doc); INSERT INTO yy VALUES($doc); INSERT INTO yy VALUES($doc); INSERT INTO yy VALUES($doc); INSERT INTO yy VALUES($doc); INSERT INTO yy VALUES($doc); INSERT INTO yy VALUES($doc); INSERT INTO yy VALUES($doc); COMMIT; } do_execsql_test 5.2 { SELECT rowid FROM yy WHERE yy MATCH 'y' ORDER BY rowid ASC } {1 2 3 4 5 6 7 8} do_execsql_test 5.3 { SELECT rowid FROM yy WHERE yy MATCH 'y' ORDER BY rowid DESC } {8 7 6 5 4 3 2 1} #db eval { SELECT fts5_decode(rowid, block) as x FROM yy_data } { puts $x } #------------------------------------------------------------------------- reset_db do_execsql_test 5.1 { CREATE VIRTUAL TABLE tt USING fts5(content); INSERT INTO tt(tt, rank) VALUES('pgsz', 32); INSERT INTO tt VALUES('aa'); } do_execsql_test 5.2 { SELECT rowid FROM tt WHERE tt MATCH 'a*'; } {1} do_execsql_test 5.3 { DELETE FROM tt; BEGIN; INSERT INTO tt VALUES('aa'); INSERT INTO tt VALUES('ab'); COMMIT; } {} do_execsql_test 5.4 { SELECT rowid FROM tt WHERE tt MATCH 'a*'; } {1 2} } do_execsql_test 5.5 { DELETE FROM tt; BEGIN; INSERT INTO tt VALUES('aa'); INSERT INTO tt VALUES('ab'); INSERT INTO tt VALUES('aa'); INSERT INTO tt VALUES('ab'); INSERT INTO tt VALUES('aa'); INSERT INTO tt VALUES('ab'); INSERT INTO tt VALUES('aa'); INSERT INTO tt VALUES('ab'); COMMIT; SELECT rowid FROM tt WHERE tt MATCH 'a*'; } {1 2 3 4 5 6 7 8} do_execsql_test 5.6 { INSERT INTO tt(tt) VALUES('integrity-check'); } reset_db do_execsql_test 5.7 { CREATE VIRTUAL TABLE tt USING fts5(content); INSERT INTO tt(tt, rank) VALUES('pgsz', 32); INSERT INTO tt VALUES('aa ab ac ad ae af'); } do_execsql_test 5.8 { SELECT rowid FROM tt WHERE tt MATCH 'a*'; } {1} finish_test |
Changes to ext/fts5/test/fts5version.test.
︙ | ︙ | |||
26 27 28 29 30 31 32 | do_execsql_test 1.1 { CREATE VIRTUAL TABLE t1 USING fts5(one); INSERT INTO t1 VALUES('a b c d'); } {} do_execsql_test 1.2 { SELECT * FROM t1_config WHERE k='version' | | | | | | | 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | do_execsql_test 1.1 { CREATE VIRTUAL TABLE t1 USING fts5(one); INSERT INTO t1 VALUES('a b c d'); } {} do_execsql_test 1.2 { SELECT * FROM t1_config WHERE k='version' } {version 4} do_execsql_test 1.3 { SELECT rowid FROM t1 WHERE t1 MATCH 'a'; } {1} do_execsql_test 1.4 { UPDATE t1_config set v=5 WHERE k='version'; } do_test 1.5 { db close sqlite3 db test.db catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' } } {1 {invalid fts5 file format (found 5, expected 4) - run 'rebuild'}} do_test 1.6 { db close sqlite3 db test.db catchsql { INSERT INTO t1 VALUES('x y z') } } {1 {invalid fts5 file format (found 5, expected 4) - run 'rebuild'}} do_test 1.7 { execsql { DELETE FROM t1_config WHERE k='version' } db close sqlite3 db test.db catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' } } {1 {invalid fts5 file format (found 0, expected 4) - run 'rebuild'}} finish_test |
Changes to ext/fts5/tool/loadfts5.tcl.
︙ | ︙ | |||
14 15 16 17 18 19 20 21 22 23 24 25 26 27 | foreach f [glob -nocomplain -dir $dir *] { if {$::O(limit) && $::nRow>=$::O(limit)} break if {[file isdir $f]} { load_hierachy $f } else { db eval { INSERT INTO t1 VALUES($f, loadfile($f)) } incr ::nRow if {($::nRow % $::nRowPerDot)==0} { puts -nonewline . if {($::nRow % (65*$::nRowPerDot))==0} { puts "" } flush stdout } | > > > > > > | 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 | foreach f [glob -nocomplain -dir $dir *] { if {$::O(limit) && $::nRow>=$::O(limit)} break if {[file isdir $f]} { load_hierachy $f } else { db eval { INSERT INTO t1 VALUES($f, loadfile($f)) } incr ::nRow if {$::O(trans) && ($::nRow % $::O(trans))==0} { db eval { COMMIT } db eval { INSERT INTO t1(t1) VALUES('integrity-check') } db eval { BEGIN } } if {($::nRow % $::nRowPerDot)==0} { puts -nonewline . if {($::nRow % (65*$::nRowPerDot))==0} { puts "" } flush stdout } |
︙ | ︙ | |||
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 | puts stderr " -fts5 (use fts5)" puts stderr " -porter (use porter tokenizer)" puts stderr " -delete (delete the database file before starting)" puts stderr " -limit N (load no more than N documents)" puts stderr " -automerge N (set the automerge parameter to N)" puts stderr " -crisismerge N (set the crisismerge parameter to N)" puts stderr " -prefix PREFIX (comma separated prefix= argument)" exit 1 } set O(vtab) fts5 set O(tok) "" set O(limit) 0 set O(delete) 0 set O(automerge) -1 set O(crisismerge) -1 set O(prefix) "" if {[llength $argv]<2} usage set nOpt [expr {[llength $argv]-2}] for {set i 0} {$i < $nOpt} {incr i} { set arg [lindex $argv $i] switch -- [lindex $argv $i] { -fts4 { | > > | 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 | puts stderr " -fts5 (use fts5)" puts stderr " -porter (use porter tokenizer)" puts stderr " -delete (delete the database file before starting)" puts stderr " -limit N (load no more than N documents)" puts stderr " -automerge N (set the automerge parameter to N)" puts stderr " -crisismerge N (set the crisismerge parameter to N)" puts stderr " -prefix PREFIX (comma separated prefix= argument)" puts stderr " -trans N (commit after N inserts - 0 == never)" exit 1 } set O(vtab) fts5 set O(tok) "" set O(limit) 0 set O(delete) 0 set O(automerge) -1 set O(crisismerge) -1 set O(prefix) "" set O(trans) 0 if {[llength $argv]<2} usage set nOpt [expr {[llength $argv]-2}] for {set i 0} {$i < $nOpt} {incr i} { set arg [lindex $argv $i] switch -- [lindex $argv $i] { -fts4 { |
︙ | ︙ | |||
73 74 75 76 77 78 79 80 81 82 83 84 85 86 | set O(delete) 1 } -limit { if { [incr i]>=$nOpt } usage set O(limit) [lindex $argv $i] } -automerge { if { [incr i]>=$nOpt } usage set O(automerge) [lindex $argv $i] } -crisismerge { | > > > > > | 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 | set O(delete) 1 } -limit { if { [incr i]>=$nOpt } usage set O(limit) [lindex $argv $i] } -trans { if { [incr i]>=$nOpt } usage set O(trans) [lindex $argv $i] } -automerge { if { [incr i]>=$nOpt } usage set O(automerge) [lindex $argv $i] } -crisismerge { |
︙ | ︙ | |||
100 101 102 103 104 105 106 107 | } set dbfile [lindex $argv end-1] if {$O(delete)} { file delete -force $dbfile } sqlite3 db $dbfile catch { load_static_extension db fts5 } db func loadfile loadfile | > | < > | 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 | } set dbfile [lindex $argv end-1] if {$O(delete)} { file delete -force $dbfile } sqlite3 db $dbfile catch { load_static_extension db fts5 } db func loadfile loadfile db eval "PRAGMA page_size=4096" db eval BEGIN set pref "" if {$O(prefix)!=""} { set pref ", prefix='$O(prefix)'" } catch { db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok)$pref)" db eval "INSERT INTO t1(t1, rank) VALUES('pgsz', 4050);" } if {$O(automerge)>=0} { if {$O(vtab) == "fts5"} { db eval { INSERT INTO t1(t1, rank) VALUES('automerge', $O(automerge)) } } else { db eval { INSERT INTO t1(t1) VALUES('automerge=' || $O(automerge)) } } } if {$O(crisismerge)>=0} { if {$O(vtab) == "fts5"} { db eval {INSERT INTO t1(t1, rank) VALUES('crisismerge', $O(crisismerge))} } else { } } load_hierachy [lindex $argv end] db eval COMMIT |
Changes to main.mk.
︙ | ︙ | |||
328 329 330 331 332 333 334 | $(TOP)/ext/misc/series.c \ $(TOP)/ext/misc/spellfix.c \ $(TOP)/ext/misc/totype.c \ $(TOP)/ext/misc/wholenumber.c \ $(TOP)/ext/misc/vfslog.c \ $(TOP)/ext/fts5/fts5_tcl.c \ $(TOP)/ext/fts5/fts5_test_mi.c \ | > > > | > | 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 | $(TOP)/ext/misc/series.c \ $(TOP)/ext/misc/spellfix.c \ $(TOP)/ext/misc/totype.c \ $(TOP)/ext/misc/wholenumber.c \ $(TOP)/ext/misc/vfslog.c \ $(TOP)/ext/fts5/fts5_tcl.c \ $(TOP)/ext/fts5/fts5_test_mi.c \ $(FTS5_SRC) # fts5.c #TESTSRC += $(TOP)/ext/fts2/fts2_tokenizer.c #TESTSRC += $(TOP)/ext/fts3/fts3_tokenizer.c TESTSRC2 = \ $(TOP)/src/attach.c \ |
︙ | ︙ |
Changes to test/permutations.test.
︙ | ︙ | |||
254 255 256 257 258 259 260 | } -files [glob -nocomplain $::testdir/../ext/fts5/test/*.test] test_suite "fts5-light" -prefix "" -description { All FTS5 tests. } -files [ test_set \ [glob -nocomplain $::testdir/../ext/fts5/test/*.test] \ | | | 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 | } -files [glob -nocomplain $::testdir/../ext/fts5/test/*.test] test_suite "fts5-light" -prefix "" -description { All FTS5 tests. } -files [ test_set \ [glob -nocomplain $::testdir/../ext/fts5/test/*.test] \ -exclude *corrupt* ] test_suite "nofaultsim" -prefix "" -description { "Very" quick test suite. Runs in less than 5 minutes on a workstation. This test suite is the same as the "quick" tests, except that some files that test malloc and IO errors are omitted. } -files [ |
︙ | ︙ |