Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Add some missing comments and fix some other issues in fts3 code. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts3-refactor |
Files: | files | file ages | folders |
SHA1: |
2fe579e778b75fbf503c02e01e5424c1 |
User & Date: | dan 2009-11-18 15:35:59.000 |
Context
2009-11-19
| ||
00:15 | Fix problems introduced into fts3 as part of the refactoring. (check-in: fa0998e19d user: dan tags: fts3-refactor) | |
2009-11-18
| ||
15:35 | Add some missing comments and fix some other issues in fts3 code. (check-in: 2fe579e778 user: dan tags: fts3-refactor) | |
2009-11-17
| ||
12:52 | Improvements to the way fts3 reads the full-text index. (check-in: 45c051e786 user: dan tags: fts3-refactor) | |
Changes
Changes to ext/fts3/fts3.c.
︙ | ︙ | |||
891 892 893 894 895 896 897 | } } } return rc; } | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 891 892 893 894 895 896 897 898 899 900 901 902 903 904 | } } } return rc; } /* ** The buffer pointed to by argument zNode (size nNode bytes) contains the ** root node of a b-tree segment. The segment is guaranteed to be at least ** one level high (i.e. the root node is not also a leaf). If successful, ** this function locates the leaf node of the segment that may contain the ** term specified by arguments zTerm and nTerm and writes its block number ** to *piLeaf. |
︙ | ︙ | |||
1009 1010 1011 1012 1013 1014 1015 | */ if( iHeight==1 ){ *piLeaf = iChild; break; } /* Descend to interior node iChild. */ | | | 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 | */ if( iHeight==1 ){ *piLeaf = iChild; break; } /* Descend to interior node iChild. */ rc = sqlite3Fts3ReadBlock(p, iChild, &zCsr, &nBlock); if( rc!=SQLITE_OK ) break; zEnd = &zCsr[nBlock]; } sqlite3_free(zBuffer); return rc; } |
︙ | ︙ | |||
1520 1521 1522 1523 1524 1525 1526 | ** searches, this is always a single leaf. For prefix searches, this ** may be a contiguous block of leaves. ** ** The code in this loop does not actually load any leaves into memory ** (unless the root node happens to be a leaf). It simply examines the ** b-tree structure to determine which leaves need to be inspected. */ | | | 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 | ** searches, this is always a single leaf. For prefix searches, this ** may be a contiguous block of leaves. ** ** The code in this loop does not actually load any leaves into memory ** (unless the root node happens to be a leaf). It simply examines the ** b-tree structure to determine which leaves need to be inspected. */ rc = sqlite3Fts3AllSegdirs(p, &pStmt); while( rc==SQLITE_OK && SQLITE_ROW==(rc = sqlite3_step(pStmt)) ){ Fts3SegReader *pNew = 0; int nRoot = sqlite3_column_bytes(pStmt, 4); char const *zRoot = sqlite3_column_blob(pStmt, 4); if( sqlite3_column_int64(pStmt, 1)==0 ){ /* The entire segment is stored on the root node (which must be a ** leaf). Do not bother inspecting any data in this case, just |
︙ | ︙ |
Changes to ext/fts3/fts3Int.h.
︙ | ︙ | |||
170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 | sqlite3_vtab **, char **); /* fts3_write.c */ int sqlite3Fts3UpdateMethod(sqlite3_vtab*,int,sqlite3_value**,sqlite3_int64*); int sqlite3Fts3PendingTermsFlush(Fts3Table *); void sqlite3Fts3PendingTermsClear(Fts3Table *); int sqlite3Fts3Optimize(Fts3Table *); /* Flags allowed as part of the 4th argument to SegmentReaderIterate() */ #define FTS3_SEGMENT_REQUIRE_POS 0x00000001 #define FTS3_SEGMENT_IGNORE_EMPTY 0x00000002 #define FTS3_SEGMENT_COLUMN_FILTER 0x00000004 #define FTS3_SEGMENT_PREFIX 0x00000008 struct Fts3SegFilter { const char *zTerm; int nTerm; int iCol; int flags; }; | > > > > > > > > > > < < < < < < < < < < < < < < | 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 | sqlite3_vtab **, char **); /* fts3_write.c */ int sqlite3Fts3UpdateMethod(sqlite3_vtab*,int,sqlite3_value**,sqlite3_int64*); int sqlite3Fts3PendingTermsFlush(Fts3Table *); void sqlite3Fts3PendingTermsClear(Fts3Table *); int sqlite3Fts3Optimize(Fts3Table *); int sqlite3Fts3SegReaderNew(Fts3Table *,int, sqlite3_int64, sqlite3_int64, sqlite3_int64, const char *, int, Fts3SegReader**); void sqlite3Fts3SegReaderFree(Fts3SegReader *); int sqlite3Fts3SegReaderIterate( Fts3Table *, Fts3SegReader **, int, Fts3SegFilter *, int (*)(Fts3Table *, void *, char *, int, char *, int), void * ); int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char const**, int*); int sqlite3Fts3AllSegdirs(Fts3Table*, sqlite3_stmt **); /* Flags allowed as part of the 4th argument to SegmentReaderIterate() */ #define FTS3_SEGMENT_REQUIRE_POS 0x00000001 #define FTS3_SEGMENT_IGNORE_EMPTY 0x00000002 #define FTS3_SEGMENT_COLUMN_FILTER 0x00000004 #define FTS3_SEGMENT_PREFIX 0x00000008 /* Type passed as 4th argument to SegmentReaderIterate() */ struct Fts3SegFilter { const char *zTerm; int nTerm; int iCol; int flags; }; /* fts3.c */ int sqlite3Fts3PutVarint(char *, sqlite3_int64); int sqlite3Fts3GetVarint(const char *, sqlite_int64 *); int sqlite3Fts3GetVarint32(const char *, int *); int sqlite3Fts3VarintLen(sqlite3_uint64); void sqlite3Fts3Dequote(char *); /* fts3_tokenizer.c */ const char *sqlite3Fts3NextToken(const char *, int *); int sqlite3Fts3InitHashTable(sqlite3 *, Fts3Hash *, const char *); int sqlite3Fts3InitTokenizer(Fts3Hash *pHash, const char *, sqlite3_tokenizer **, const char **, char ** ); |
︙ | ︙ |
Changes to ext/fts3/fts3_write.c.
︙ | ︙ | |||
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 | #define INTERIOR_MAX 2048 /* Soft limit for segment node size */ #define LEAF_MAX 2048 /* Soft limit for segment leaf size */ typedef struct PendingList PendingList; typedef struct SegmentNode SegmentNode; typedef struct SegmentWriter SegmentWriter; struct PendingList { int nData; char *aData; int nSpace; sqlite3_int64 iLastDocid; sqlite3_int64 iLastCol; sqlite3_int64 iLastPos; }; /* | > > > > > > > > > > > | | | | 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 | #define INTERIOR_MAX 2048 /* Soft limit for segment node size */ #define LEAF_MAX 2048 /* Soft limit for segment leaf size */ typedef struct PendingList PendingList; typedef struct SegmentNode SegmentNode; typedef struct SegmentWriter SegmentWriter; /* ** Data structure used while accumulating terms in the pending-terms hash ** table. The hash table entry maps from term (a string) to a malloced ** instance of this structure. */ struct PendingList { int nData; char *aData; int nSpace; sqlite3_int64 iLastDocid; sqlite3_int64 iLastCol; sqlite3_int64 iLastPos; }; /* ** An instance of this structure is used to iterate through the terms on ** a contiguous set of segment b-tree leaf nodes. Although the details of ** this structure are only manipulated by code in this file, opaque handles ** of type Fts3SegReader* are also used by code in fts3.c to iterate through ** terms when querying the full-text index. See functions: ** ** sqlite3Fts3SegReaderNew() ** sqlite3Fts3SegReaderFree() ** sqlite3Fts3SegReaderIterate() */ struct Fts3SegReader { int iIdx; /* Index within level */ sqlite3_int64 iStartBlock; sqlite3_int64 iEndBlock; sqlite3_stmt *pStmt; /* SQL Statement to access leaf nodes */ char *aNode; /* Pointer to node data (or NULL) */ |
︙ | ︙ | |||
64 65 66 67 68 69 70 | /* The following variables are used to iterate through the current doclist */ char *pOffsetList; sqlite3_int64 iDocid; }; /* | > > > | | > | | | | < < < < < < < < < < < < < < < < < > | 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 | /* The following variables are used to iterate through the current doclist */ char *pOffsetList; sqlite3_int64 iDocid; }; /* ** An instance of this structure is used to create a segment b-tree in the ** database. The internal details of this type are only accessed by the ** following functions: ** ** fts3SegWriterAdd() ** fts3SegWriterFlush() ** fts3SegWriterFree() */ struct SegmentWriter { SegmentNode *pTree; /* Pointer to interior tree structure */ sqlite3_int64 iFirst; /* First slot in %_segments written */ sqlite3_int64 iFree; /* Next free slot in %_segments */ char *zTerm; /* Pointer to previous term buffer */ int nTerm; /* Number of bytes in zTerm */ int nMalloc; /* Size of malloc'd buffer at zMalloc */ char *zMalloc; /* Malloc'd space (possibly) used for zTerm */ int nSize; /* Size of allocation at aData */ int nData; /* Bytes of data in aData */ char *aData; /* Pointer to block from malloc() */ }; /* ** Type SegmentNode is used by the following three functions to create ** the interior part of the segment b+-tree structures (everything except ** the leaf nodes). These functions and type are only ever used by code ** within the fts3SegWriterXXX() family of functions described above. ** ** fts3NodeAddTerm() ** fts3NodeWrite() ** fts3NodeFree() */ struct SegmentNode { SegmentNode *pParent; /* Parent node (or NULL for root node) */ SegmentNode *pRight; /* Pointer to right-sibling */ SegmentNode *pLeftmost; /* Pointer to left-most node of this depth */ int nEntry; /* Number of terms written to node so far */ char *zTerm; /* Pointer to previous term buffer */ int nTerm; /* Number of bytes in zTerm */ int nMalloc; /* Size of malloc'd buffer at zMalloc */ char *zMalloc; /* Malloc'd space (possibly) used for zTerm */ int nData; /* Bytes of valid data so far */ char *aData; /* Node data */ }; /* ** Valid values for the second argument to fts3SqlStmt(). */ #define SQL_DELETE_CONTENT 0 #define SQL_IS_EMPTY 1 #define SQL_DELETE_ALL_CONTENT 2 #define SQL_DELETE_ALL_SEGMENTS 3 #define SQL_DELETE_ALL_SEGDIR 4 #define SQL_SELECT_CONTENT_BY_ROWID 5 #define SQL_NEXT_SEGMENT_INDEX 6 #define SQL_INSERT_SEGMENTS 7 #define SQL_NEXT_SEGMENTS_ID 8 #define SQL_INSERT_SEGDIR 9 #define SQL_SELECT_LEVEL 10 #define SQL_SELECT_ALL_LEVEL 11 #define SQL_SELECT_LEVEL_COUNT 12 #define SQL_SELECT_SEGDIR_COUNT_MAX 13 #define SQL_DELETE_SEGDIR_BY_LEVEL 14 #define SQL_DELETE_SEGMENTS_RANGE 15 #define SQL_CONTENT_INSERT 16 #define SQL_GET_BLOCK 17 static int fts3SqlStmt( Fts3Table *p, int eStmt, sqlite3_stmt **pp, sqlite3_value **apVal ){ |
︙ | ︙ | |||
203 204 205 206 207 208 209 | rc = sqlite3_bind_value(pStmt, i+1, apVal[i]); } } *pp = pStmt; return rc; } | > > > > > > > > > > > > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | > | 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 | rc = sqlite3_bind_value(pStmt, i+1, apVal[i]); } } *pp = pStmt; return rc; } /* ** Read a single block from the %_segments table. If the specified block ** does not exist, return SQLITE_CORRUPT. If some other error (malloc, IO ** etc.) occurs, return the appropriate SQLite error code. ** ** Otherwise, if successful, set *pzBlock to point to a buffer containing ** the block read from the database, and *pnBlock to the size of the read ** block in bytes. ** ** WARNING: The returned buffer is only valid until the next call to ** sqlite3Fts3ReadBlock(). */ int sqlite3Fts3ReadBlock( Fts3Table *p, sqlite3_int64 iBlock, char const **pzBlock, int *pnBlock ){ sqlite3_stmt *pStmt; int rc = fts3SqlStmt(p, SQL_GET_BLOCK, &pStmt, 0); if( rc!=SQLITE_OK ) return rc; sqlite3_reset(pStmt); sqlite3_bind_int64(pStmt, 1, iBlock); rc = sqlite3_step(pStmt); if( rc!=SQLITE_ROW ){ return SQLITE_CORRUPT; } *pnBlock = sqlite3_column_bytes(pStmt, 0); *pzBlock = (char *)sqlite3_column_blob(pStmt, 0); if( !*pzBlock ){ return SQLITE_NOMEM; } return SQLITE_OK; } /* ** Set *ppStmt to a statement handle that may be used to iterate through ** all rows in the %_segdir table, from oldest to newest. If successful, ** return SQLITE_OK. If an error occurs while preparing the statement, ** return an SQLite error code. ** ** There is only ever one instance of this SQL statement compiled for ** each FTS3 table. */ int sqlite3Fts3AllSegdirs(Fts3Table *p, sqlite3_stmt **ppStmt){ return fts3SqlStmt(p, SQL_SELECT_ALL_LEVEL, ppStmt, 0); } static int fts3SqlExec(Fts3Table *p, int eStmt, sqlite3_value **apVal){ sqlite3_stmt *pStmt; int rc = fts3SqlStmt(p, eStmt, &pStmt, apVal); if( rc==SQLITE_OK ){ sqlite3_step(pStmt); rc = sqlite3_reset(pStmt); |
︙ | ︙ | |||
1187 1188 1189 1190 1191 1192 1193 | sqlite3_free(p->zMalloc); sqlite3_free(p); p = pRight; } } } | | | 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 | sqlite3_free(p->zMalloc); sqlite3_free(p); p = pRight; } } } static int fts3SegWriterAdd( Fts3Table *p, /* Virtual table handle */ SegmentWriter **ppWriter, /* IN/OUT: SegmentWriter handle */ int isCopyTerm, /* True if buffer zTerm must be copied */ const char *zTerm, /* Pointer to buffer containing term */ int nTerm, /* Size of term in bytes */ const char *aDoclist, /* Pointer to buffer containing doclist */ int nDoclist /* Size of doclist in bytes */ |
︙ | ︙ | |||
1319 1320 1321 1322 1323 1324 1325 | pWriter->zTerm = (char *)zTerm; } pWriter->nTerm = nTerm; return SQLITE_OK; } | | | 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 | pWriter->zTerm = (char *)zTerm; } pWriter->nTerm = nTerm; return SQLITE_OK; } static int fts3SegWriterFlush( Fts3Table *p, SegmentWriter *pWriter, int iLevel, int iIdx ){ int rc; if( pWriter->pTree ){ |
︙ | ︙ | |||
1348 1349 1350 1351 1352 1353 1354 | /* The entire tree fits on the root node. Write it to the segdir table. */ rc = fts3WriteSegdir( p, iLevel, iIdx, 0, 0, 0, pWriter->aData, pWriter->nData); } return rc; } | | | 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 | /* The entire tree fits on the root node. Write it to the segdir table. */ rc = fts3WriteSegdir( p, iLevel, iIdx, 0, 0, 0, pWriter->aData, pWriter->nData); } return rc; } static void fts3SegWriterFree(SegmentWriter *pWriter){ if( pWriter ){ sqlite3_free(pWriter->aData); sqlite3_free(pWriter->zMalloc); fts3NodeFree(pWriter->pTree); sqlite3_free(pWriter); } } |
︙ | ︙ | |||
1484 1485 1486 1487 1488 1489 1490 | void *pContext, char *zTerm, int nTerm, char *aDoclist, int nDoclist ){ SegmentWriter **ppW = (SegmentWriter **)pContext; | | | 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 | void *pContext, char *zTerm, int nTerm, char *aDoclist, int nDoclist ){ SegmentWriter **ppW = (SegmentWriter **)pContext; return fts3SegWriterAdd(p, ppW, 1, zTerm, nTerm, aDoclist, nDoclist); } int sqlite3Fts3SegReaderIterate( Fts3Table *p, /* Virtual table handle */ Fts3SegReader **apSegment, /* Array of Fts3SegReader objects */ int nSegment, /* Size of apSegment array */ Fts3SegFilter *pFilter, /* Restrictions on range of iteration */ |
︙ | ︙ | |||
1513 1514 1515 1516 1517 1518 1519 | ** for, then advance each segment iterator until it points to a term of ** equal or greater value than the specified term. This prevents many ** unnecessary merge/sort operations for the case where single segment ** b-tree leaf nodes contain more than one term. */ if( pFilter->zTerm ){ int nTerm = pFilter->nTerm; | | | 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 | ** for, then advance each segment iterator until it points to a term of ** equal or greater value than the specified term. This prevents many ** unnecessary merge/sort operations for the case where single segment ** b-tree leaf nodes contain more than one term. */ if( pFilter->zTerm ){ int nTerm = pFilter->nTerm; const char *zTerm = pFilter->zTerm; for(i=0; i<nSegment; i++){ Fts3SegReader *pSeg = apSegment[i]; while( fts3SegReaderTermCmp(pSeg, zTerm, nTerm)<0 ){ rc = fts3SegReaderNext(pSeg); if( rc!=SQLITE_OK ) goto finished; } } |
︙ | ︙ | |||
1722 1723 1724 1725 1726 1727 1728 | rc = sqlite3Fts3SegReaderIterate(p, apSegment, nSegment, &filter, fts3MergeCallback, (void *)&pWriter ); if( rc!=SQLITE_OK ) goto finished; rc = fts3DeleteSegdir(p, iLevel, apSegment, nSegment); if( rc==SQLITE_OK ){ | | | > > > > > > > > > > > > > > > > > > > > | 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 | rc = sqlite3Fts3SegReaderIterate(p, apSegment, nSegment, &filter, fts3MergeCallback, (void *)&pWriter ); if( rc!=SQLITE_OK ) goto finished; rc = fts3DeleteSegdir(p, iLevel, apSegment, nSegment); if( rc==SQLITE_OK ){ rc = fts3SegWriterFlush(p, pWriter, iNewLevel, iIdx); } finished: fts3SegWriterFree(pWriter); if( apSegment ){ for(i=0; i<nSegment; i++){ sqlite3Fts3SegReaderFree(apSegment[i]); } sqlite3_free(apSegment); } sqlite3_reset(pStmt); return rc; } /* ** This is a comparison function used as a qsort() callback when sorting ** an array of pending terms by term. This occurs as part of flushing ** the contents of the pending-terms hash table to the database. */ static int qsortCompare(const void *lhs, const void *rhs){ char *z1 = fts3HashKey(*(Fts3HashElem **)lhs); char *z2 = fts3HashKey(*(Fts3HashElem **)rhs); int n1 = fts3HashKeysize(*(Fts3HashElem **)lhs); int n2 = fts3HashKeysize(*(Fts3HashElem **)rhs); int n = (n1<n2 ? n1 : n2); int c = memcmp(z1, z2, n); if( c==0 ){ c = n1 - n2; } return c; } /* ** Flush the contents of pendingTerms to a level 0 segment. */ int sqlite3Fts3PendingTermsFlush(Fts3Table *p){ Fts3HashElem *pElem; int idx, rc, i; |
︙ | ︙ | |||
1787 1788 1789 1790 1791 1792 1793 | /* Write the segment tree into the database. */ for(i=0; rc==SQLITE_OK && i<nElem; i++){ const char *z = fts3HashKey(apElem[i]); int n = fts3HashKeysize(apElem[i]); PendingList *pList = fts3HashData(apElem[i]); | | | | | 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 | /* Write the segment tree into the database. */ for(i=0; rc==SQLITE_OK && i<nElem; i++){ const char *z = fts3HashKey(apElem[i]); int n = fts3HashKeysize(apElem[i]); PendingList *pList = fts3HashData(apElem[i]); rc = fts3SegWriterAdd(p, &pWriter, 0, z, n, pList->aData, pList->nData+1); } if( rc==SQLITE_OK ){ rc = fts3SegWriterFlush(p, pWriter, 0, idx); } /* Free all allocated resources before returning */ fts3SegWriterFree(pWriter); sqlite3_free(apElem); sqlite3Fts3PendingTermsClear(p); return rc; } /* ** This function does the work for the xUpdate method of FTS3 virtual |
︙ | ︙ |