Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Replace the hash table borrowed from fts3. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts5 |
Files: | files | file ages | folders |
SHA1: |
617e2fac1c128212254f71b1a8fddaf0 |
User & Date: | dan 2014-08-11 19:44:52.686 |
Context
2014-08-11
| ||
20:26 | Simplify the way position lists are copied when merging data. (check-in: 9f8d678a0e user: dan tags: fts5) | |
19:44 | Replace the hash table borrowed from fts3. (check-in: 617e2fac1c user: dan tags: fts5) | |
2014-08-09
| ||
18:22 | Fix an uninitialized variable causing a problem during fts5 table initialization. (check-in: a14fa876f0 user: dan tags: fts5) | |
Changes
Changes to ext/fts5/fts5Int.h.
︙ | ︙ | |||
278 279 280 281 282 283 284 285 286 287 288 289 290 291 | ** this connection since it was created. */ int sqlite3Fts5IndexReads(Fts5Index *p); /* ** End of interface to code in fts5_index.c. **************************************************************************/ /************************************************************************** ** Interface to code in fts5_storage.c. fts5_storage.c contains contains ** code to access the data stored in the %_content and %_docsize tables. */ #define FTS5_STMT_SCAN_ASC 0 /* SELECT rowid, * FROM ... ORDER BY 1 ASC */ | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 | ** this connection since it was created. */ int sqlite3Fts5IndexReads(Fts5Index *p); /* ** End of interface to code in fts5_index.c. **************************************************************************/ /************************************************************************** ** Interface to code in fts5_hash.c. */ typedef struct Fts5Hash Fts5Hash; /* ** Create a hash table, free a hash table. */ int sqlite3Fts5HashNew(Fts5Hash**, int *pnSize); void sqlite3Fts5HashFree(Fts5Hash*); int sqlite3Fts5HashWrite( Fts5Hash*, i64 iRowid, /* Rowid for this entry */ int iCol, /* Column token appears in (-ve -> delete) */ int iPos, /* Position of token within column */ const char *pToken, int nToken /* Token to add or remove to or from index */ ); /* ** Empty (but do not delete) a hash table. */ void sqlite3Fts5HashClear(Fts5Hash*); /* ** Iterate through the contents of the hash table. */ int sqlite3Fts5HashIterate( Fts5Hash*, void *pCtx, int (*xTerm)(void*, const char*, int), int (*xEntry)(void*, i64, const u8*, int), int (*xTermDone)(void*) ); /* ** End of interface to code in fts5_hash.c. **************************************************************************/ /************************************************************************** ** Interface to code in fts5_storage.c. fts5_storage.c contains contains ** code to access the data stored in the %_content and %_docsize tables. */ #define FTS5_STMT_SCAN_ASC 0 /* SELECT rowid, * FROM ... ORDER BY 1 ASC */ |
︙ | ︙ |
Added ext/fts5/fts5_hash.c.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 | /* ** 2014 August 11 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** ** */ #include "fts5Int.h" typedef struct Fts5HashEntry Fts5HashEntry; /* ** This file contains the implementation of an in-memory hash table used ** to accumuluate "term -> doclist" content before it is flused to a level-0 ** segment. */ struct Fts5Hash { int *pnByte; /* Pointer to bytes counter */ int nEntry; /* Number of entries currently in hash */ int nSlot; /* Size of aSlot[] array */ Fts5HashEntry **aSlot; /* Array of hash slots */ }; /* ** Each entry in the hash table is represented by an object of the ** following type. Each object, its key (zKey[]) and its current data ** are stored in a single memory allocation. The position list data ** immediately follows the key data in memory. ** ** The data that follows the key is in a similar, but not identical format ** to the doclist data stored in the database. It is: ** ** * Rowid, as a varint ** * Position list, without 0x00 terminator. ** * Size of previous position list and rowid, as a 4 byte ** big-endian integer. ** ** iRowidOff: ** Offset of last rowid written to data area. Relative to first byte of ** structure. ** ** nData: ** Bytes of data written since iRowidOff. */ struct Fts5HashEntry { Fts5HashEntry *pNext; /* Next hash entry with same hash-key */ int nAlloc; /* Total size of allocation */ int iRowidOff; /* Offset of last rowid written */ int nData; /* Total bytes of data (incl. structure) */ int iCol; /* Column of last value written */ int iPos; /* Position of last value written */ i64 iRowid; /* Rowid of last value written */ char zKey[0]; /* Nul-terminated entry key */ }; /* ** Allocate a new hash table. */ int sqlite3Fts5HashNew(Fts5Hash **ppNew, int *pnByte){ int rc = SQLITE_OK; Fts5Hash *pNew; *ppNew = pNew = (Fts5Hash*)sqlite3_malloc(sizeof(Fts5Hash)); if( pNew==0 ){ rc = SQLITE_NOMEM; }else{ int nByte; memset(pNew, 0, sizeof(Fts5Hash)); pNew->pnByte = pnByte; pNew->nSlot = 1024; nByte = sizeof(Fts5HashEntry*) * pNew->nSlot; pNew->aSlot = (Fts5HashEntry**)sqlite3_malloc(nByte); if( pNew->aSlot==0 ){ sqlite3_free(pNew); *ppNew = 0; rc = SQLITE_NOMEM; }else{ memset(pNew->aSlot, 0, nByte); } } return rc; } /* ** Free a hash table object. */ void sqlite3Fts5HashFree(Fts5Hash *pHash){ if( pHash ){ sqlite3Fts5HashClear(pHash); sqlite3_free(pHash->aSlot); sqlite3_free(pHash); } } /* ** Empty (but do not delete) a hash table. */ void sqlite3Fts5HashClear(Fts5Hash *pHash){ int i; for(i=0; i<pHash->nSlot; i++){ if( pHash->aSlot[i] ){ sqlite3_free(pHash->aSlot[i]); pHash->aSlot[i] = 0; } } } static unsigned int fts5HashKey(Fts5Hash *pHash, const char *p, int n){ int i; unsigned int h = 13; for(i=n-1; i>=0; i--){ h = (h << 3) ^ h ^ p[i]; } return (h % pHash->nSlot); } /* ** Store the 32-bit integer passed as the second argument in buffer p. */ static int fts5PutNativeInt(u8 *p, int i){ assert( sizeof(i)==4 ); memcpy(p, &i, sizeof(i)); return sizeof(i); } /* ** Read and return the 32-bit integer stored in buffer p. */ static int fts5GetNativeU32(u8 *p){ int i; assert( sizeof(i)==4 ); memcpy(&i, p, sizeof(i)); return i; } int sqlite3Fts5HashWrite( Fts5Hash *pHash, i64 iRowid, /* Rowid for this entry */ int iCol, /* Column token appears in (-ve -> delete) */ int iPos, /* Position of token within column */ const char *pToken, int nToken /* Token to add or remove to or from index */ ){ unsigned int iHash = fts5HashKey(pHash, pToken, nToken); Fts5HashEntry *p; u8 *pPtr; int nIncr = 0; /* Amount to increment (*pHash->pnByte) by */ /* Attempt to locate an existing hash object */ for(p=pHash->aSlot[iHash]; p; p=p->pNext){ if( memcmp(p->zKey, pToken, nToken)==0 && p->zKey[nToken]==0 ) break; } /* If an existing hash entry cannot be found, create a new one. */ if( p==0 ){ int nByte = sizeof(Fts5HashEntry) + nToken + 1 + 64; if( nByte<128 ) nByte = 128; p = (Fts5HashEntry*)sqlite3_malloc(nByte); if( !p ) return SQLITE_NOMEM; memset(p, 0, sizeof(Fts5HashEntry)); p->nAlloc = nByte; memcpy(p->zKey, pToken, nToken); p->zKey[nToken] = '\0'; p->iRowidOff = p->nData = nToken + 1 + sizeof(Fts5HashEntry); p->nData += sqlite3PutVarint(&((u8*)p)[p->nData], iRowid); p->iRowid = iRowid; p->pNext = pHash->aSlot[iHash]; pHash->aSlot[iHash] = p; nIncr += p->nData; } /* Check there is enough space to append a new entry. Worst case scenario ** is: ** ** + 4 bytes for the previous entry size field, ** + 9 bytes for a new rowid, ** + 1 byte for a "new column" byte, ** + 3 bytes for a new column number (16-bit max) as a varint, ** + 5 bytes for the new position offset (32-bit max). */ if( (p->nAlloc - p->nData) < (4 + 9 + 1 + 3 + 5) ){ int nNew = p->nAlloc * 2; Fts5HashEntry *pNew; Fts5HashEntry **pp; pNew = (Fts5HashEntry*)sqlite3_realloc(p, nNew); if( pNew==0 ) return SQLITE_NOMEM; pNew->nAlloc = nNew; for(pp=&pHash->aSlot[iHash]; *pp!=p; pp=&(*pp)->pNext); *pp = pNew; p = pNew; } pPtr = (u8*)p; nIncr -= p->nData; /* If this is a new rowid, append the 4-byte size field for the previous ** entry, and the new rowid for this entry. */ if( iRowid!=p->iRowid ){ p->nData += fts5PutNativeInt(&pPtr[p->nData], p->nData - p->iRowidOff); p->iRowidOff = p->nData; p->nData += sqlite3PutVarint(&pPtr[p->nData], iRowid); p->iCol = 0; p->iPos = 0; p->iRowid = iRowid; } if( iCol>=0 ){ /* Append a new column value, if necessary */ assert( iCol>=p->iCol ); if( iCol!=p->iCol ){ pPtr[p->nData++] = 0x01; p->nData += sqlite3PutVarint(&pPtr[p->nData], iCol); p->iCol = iCol; p->iPos = 0; } /* Append the new position offset */ p->nData += sqlite3PutVarint(&pPtr[p->nData], iPos - p->iPos + 2); p->iPos = iPos; } nIncr += p->nData; *pHash->pnByte += nIncr; return SQLITE_OK; } /* ** Arguments pLeft and pRight point to linked-lists of hash-entry objects, ** each sorted in key order. This function merges the two lists into a ** single list and returns a pointer to its first element. */ static Fts5HashEntry *fts5HashEntryMerge( Fts5HashEntry *pLeft, Fts5HashEntry *pRight ){ Fts5HashEntry *p1 = pLeft; Fts5HashEntry *p2 = pRight; Fts5HashEntry *pRet = 0; Fts5HashEntry **ppOut = &pRet; while( p1 || p2 ){ if( p1==0 ){ *ppOut = p2; p2 = 0; }else if( p2==0 ){ *ppOut = p1; p1 = 0; }else{ int i = 0; while( p1->zKey[i]==p2->zKey[i] ) i++; if( ((u8)p1->zKey[i])>((u8)p2->zKey[i]) ){ /* p2 is smaller */ *ppOut = p2; ppOut = &p2->pNext; p2 = p2->pNext; }else{ /* p1 is smaller */ *ppOut = p1; ppOut = &p1->pNext; p1 = p1->pNext; } *ppOut = 0; } } return pRet; } /* ** Extract all tokens from hash table iHash and link them into a list ** in sorted order. The hash table is cleared before returning. It is ** the responsibility of the caller to free the elements of the returned ** list. */ static int fts5HashEntrySort(Fts5Hash *pHash, Fts5HashEntry **ppSorted){ const int nMergeSlot = 32; Fts5HashEntry **ap; Fts5HashEntry *pList; int iSlot; int i; *ppSorted = 0; ap = sqlite3_malloc(sizeof(Fts5HashEntry*) * nMergeSlot); if( !ap ) return SQLITE_NOMEM; memset(ap, 0, sizeof(Fts5HashEntry*) * nMergeSlot); for(iSlot=0; iSlot<pHash->nSlot; iSlot++){ while( pHash->aSlot[iSlot] ){ Fts5HashEntry *pEntry = pHash->aSlot[iSlot]; pHash->aSlot[iSlot] = pEntry->pNext; pEntry->pNext = 0; for(i=0; ap[i]; i++){ pEntry = fts5HashEntryMerge(pEntry, ap[i]); ap[i] = 0; } ap[i] = pEntry; } } pList = 0; for(i=0; i<nMergeSlot; i++){ pList = fts5HashEntryMerge(pList, ap[i]); } sqlite3_free(ap); *ppSorted = pList; return SQLITE_OK; } int sqlite3Fts5HashIterate( Fts5Hash *pHash, void *pCtx, int (*xTerm)(void*, const char*, int), int (*xEntry)(void*, i64, const u8*, int), int (*xTermDone)(void*) ){ Fts5HashEntry *pList; int rc; rc = fts5HashEntrySort(pHash, &pList); if( rc==SQLITE_OK ){ while( pList ){ Fts5HashEntry *pNext = pList->pNext; if( rc==SQLITE_OK ){ u8 *pPtr = (u8*)pList; int nKey = strlen(pList->zKey); int iOff = pList->iRowidOff; int iEnd = sizeof(Fts5HashEntry) + nKey + 1; int nByte = pList->nData - pList->iRowidOff; rc = xTerm(pCtx, pList->zKey, nKey); while( rc==SQLITE_OK && iOff ){ int nVarint; i64 iRowid; nVarint = getVarint(&pPtr[iOff], (u64*)&iRowid); rc = xEntry(pCtx, iRowid, &pPtr[iOff+nVarint], nByte-nVarint); if( iOff==iEnd ){ iOff = 0; }else{ nByte = fts5GetNativeU32(&pPtr[iOff-sizeof(int)]); iOff = iOff - sizeof(int) - nByte; } } if( rc==SQLITE_OK ){ rc = xTermDone(pCtx); } } sqlite3_free(pList); pList = pNext; } } return rc; } |
Changes to ext/fts5/fts5_index.c.
︙ | ︙ | |||
13 14 15 16 17 18 19 | ** Low level access to the FTS index stored in the database file. The ** routines in this file file implement all read and write access to the ** %_data table. Other parts of the system access this functionality via ** the interface defined in fts5Int.h. */ #include "fts5Int.h" | < | 13 14 15 16 17 18 19 20 21 22 23 24 25 26 | ** Low level access to the FTS index stored in the database file. The ** routines in this file file implement all read and write access to the ** %_data table. Other parts of the system access this functionality via ** the interface defined in fts5Int.h. */ #include "fts5Int.h" /* ** Overview: ** ** The %_data table contains all the FTS indexes for an FTS5 virtual table. ** As well as the main term index, there may be up to 31 prefix indexes. ** The format is similar to FTS3/4, except that: |
︙ | ︙ | |||
272 273 274 275 276 277 278 | typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel; typedef struct Fts5ChunkIter Fts5ChunkIter; typedef struct Fts5Data Fts5Data; typedef struct Fts5DlidxIter Fts5DlidxIter; typedef struct Fts5MultiSegIter Fts5MultiSegIter; typedef struct Fts5NodeIter Fts5NodeIter; typedef struct Fts5PageWriter Fts5PageWriter; | < < | 271 272 273 274 275 276 277 278 279 280 281 282 283 284 | typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel; typedef struct Fts5ChunkIter Fts5ChunkIter; typedef struct Fts5Data Fts5Data; typedef struct Fts5DlidxIter Fts5DlidxIter; typedef struct Fts5MultiSegIter Fts5MultiSegIter; typedef struct Fts5NodeIter Fts5NodeIter; typedef struct Fts5PageWriter Fts5PageWriter; typedef struct Fts5PosIter Fts5PosIter; typedef struct Fts5SegIter Fts5SegIter; typedef struct Fts5DoclistIter Fts5DoclistIter; typedef struct Fts5SegWriter Fts5SegWriter; typedef struct Fts5Structure Fts5Structure; typedef struct Fts5StructureLevel Fts5StructureLevel; typedef struct Fts5StructureSegment Fts5StructureSegment; |
︙ | ︙ | |||
296 297 298 299 300 301 302 | int nMinMerge; /* Minimum input segments in a merge */ int nWorkUnit; /* Leaf pages in a "unit" of work */ /* ** Variables related to the accumulation of tokens and doclists within the ** in-memory hash tables before they are flushed to disk. */ | | | 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 | int nMinMerge; /* Minimum input segments in a merge */ int nWorkUnit; /* Leaf pages in a "unit" of work */ /* ** Variables related to the accumulation of tokens and doclists within the ** in-memory hash tables before they are flushed to disk. */ Fts5Hash **apHash; /* Array of hash tables */ int nMaxPendingData; /* Max pending data before flush to disk */ int nPendingData; /* Current bytes of pending data */ i64 iWriteRowid; /* Rowid for current doc being written */ /* Error state. */ int rc; /* Current error code */ |
︙ | ︙ | |||
343 344 345 346 347 348 349 | */ struct Fts5Data { u8 *p; /* Pointer to buffer containing record */ int n; /* Size of record in bytes */ int nRef; /* Ref count */ }; | < < < < < < < < < < < < < < < < < < < < | 340 341 342 343 344 345 346 347 348 349 350 351 352 353 | */ struct Fts5Data { u8 *p; /* Pointer to buffer containing record */ int n; /* Size of record in bytes */ int nRef; /* Ref count */ }; /* ** The contents of the "structure" record for each index are represented ** using an Fts5Structure record in memory. Which uses instances of the ** other Fts5StructureXXX types as components. */ struct Fts5StructureSegment { int iSegid; /* Segment id */ |
︙ | ︙ | |||
2454 2455 2456 2457 2458 2459 2460 | ** Return true if the position iterator passed as the second argument is ** at EOF. Or if an error has already occurred. Otherwise, return false. */ static int fts5PosIterEof(Fts5Index *p, Fts5PosIter *pIter){ return (p->rc || pIter->chunk.pLeaf==0); } | < < < < < < < < < < < < < < < < < < < | | < < < < | < < < < < < < < < < < < < < < < < < | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 | ** Return true if the position iterator passed as the second argument is ** at EOF. Or if an error has already occurred. Otherwise, return false. */ static int fts5PosIterEof(Fts5Index *p, Fts5PosIter *pIter){ return (p->rc || pIter->chunk.pLeaf==0); } /* ** Add an entry for (iRowid/iCol/iPos) to the doclist for (pToken/nToken) ** in hash table for index iIdx. If iIdx is zero, this is the main terms ** index. Values of 1 and greater for iIdx are prefix indexes. ** ** If an OOM error is encountered, set the Fts5Index.rc error code ** accordingly. */ static void fts5AddTermToHash( Fts5Index *p, /* Index object to write to */ int iIdx, /* Entry in p->aHash[] to update */ int iCol, /* Column token appears in (-ve -> delete) */ int iPos, /* Position of token within column */ const char *pToken, int nToken /* Token to add or remove to or from index */ ){ if( p->rc==SQLITE_OK ){ p->rc = sqlite3Fts5HashWrite( p->apHash[iIdx], p->iWriteRowid, iCol, iPos, pToken, nToken ); } } /* ** Insert or remove data to or from the index. Each time a document is ** added to or removed from the index, this function is called one or more ** times. ** |
︙ | ︙ | |||
2578 2579 2580 2581 2582 2583 2584 | int i; /* Used to iterate through indexes */ Fts5Config *pConfig = p->pConfig; /* If an error has already occured this call is a no-op. */ if( p->rc!=SQLITE_OK ) return; /* Allocate hash tables if they have not already been allocated */ | | | < < < | | < | 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 | int i; /* Used to iterate through indexes */ Fts5Config *pConfig = p->pConfig; /* If an error has already occured this call is a no-op. */ if( p->rc!=SQLITE_OK ) return; /* Allocate hash tables if they have not already been allocated */ if( p->apHash==0 ){ int nHash = pConfig->nPrefix + 1; p->apHash = (Fts5Hash**)fts5IdxMalloc(p, sizeof(Fts5Hash*) * nHash); for(i=0; p->rc==SQLITE_OK && i<nHash; i++){ p->rc = sqlite3Fts5HashNew(&p->apHash[i], &p->nPendingData); } } /* Add the new token to the main terms hash table. And to each of the ** prefix hash tables that it is large enough for. */ fts5AddTermToHash(p, 0, iCol, iPos, pToken, nToken); for(i=0; i<pConfig->nPrefix; i++){ |
︙ | ︙ | |||
2630 2631 2632 2633 2634 2635 2636 2637 | } if( iSegid ) return iSegid; } p->rc = SQLITE_ERROR; return 0; } | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | | 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 | } if( iSegid ) return iSegid; } p->rc = SQLITE_ERROR; return 0; } /* ** Discard all data currently cached in the hash-tables. */ static void fts5IndexDiscardData(Fts5Index *p){ Fts5Config *pConfig = p->pConfig; int i; for(i=0; i<=pConfig->nPrefix; i++){ sqlite3Fts5HashClear(p->apHash[i]); } p->nPendingData = 0; } /* ** Return the size of the prefix, in bytes, that buffer (nNew/pNew) shares ** with buffer (nOld/pOld). |
︙ | ︙ | |||
3008 3009 3010 3011 3012 3013 3014 | } } static void fts5WriteAppendZerobyte(Fts5Index *p, Fts5SegWriter *pWriter){ fts5BufferAppendVarint(&p->rc, &pWriter->aWriter[0].buf, 0); } | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 | } } static void fts5WriteAppendZerobyte(Fts5Index *p, Fts5SegWriter *pWriter){ fts5BufferAppendVarint(&p->rc, &pWriter->aWriter[0].buf, 0); } /* ** Flush any data cached by the writer object to the database. Free any ** allocations associated with the writer. */ static void fts5WriteFinish( Fts5Index *p, Fts5SegWriter *pWriter, /* Writer object */ |
︙ | ︙ | |||
3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 | fts5StructureExtendLevel(&p->rc, pStruct, iBestLvl+1, 1, 0); fts5IndexMergeLevel(p, iIdx, pStruct, iBestLvl, &nRem); fts5StructurePromote(p, iBestLvl+1, pStruct); assert( nRem==0 || p->rc==SQLITE_OK ); *ppStruct = pStruct; } } /* ** Flush the contents of in-memory hash table iHash to a new level-0 ** segment on disk. Also update the corresponding structure record. ** ** If an error occurs, set the Fts5Index.rc error code. If an error has ** already occurred, this function is a no-op. */ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ Fts5Structure *pStruct; int iSegid; int pgnoLast = 0; /* Last leaf page number in segment */ /* Obtain a reference to the index structure and allocate a new segment-id ** for the new level-0 segment. */ pStruct = fts5StructureRead(p, iHash); iSegid = fts5AllocateSegid(p, pStruct); if( iSegid ){ | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > < < < < < > > < < | | < < < < | > > > > | | 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 | fts5StructureExtendLevel(&p->rc, pStruct, iBestLvl+1, 1, 0); fts5IndexMergeLevel(p, iIdx, pStruct, iBestLvl, &nRem); fts5StructurePromote(p, iBestLvl+1, pStruct); assert( nRem==0 || p->rc==SQLITE_OK ); *ppStruct = pStruct; } } typedef struct Fts5FlushCtx Fts5FlushCtx; struct Fts5FlushCtx { Fts5Index *pIdx; Fts5SegWriter writer; }; static int fts5FlushNewTerm(void *pCtx, const char *zTerm, int nTerm){ Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx; int rc = SQLITE_OK; fts5WriteAppendTerm(p->pIdx, &p->writer, nTerm, (const u8*)zTerm); return rc; } static int fts5FlushTermDone(void *pCtx){ Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx; int rc = SQLITE_OK; /* Write the doclist terminator */ fts5WriteAppendZerobyte(p->pIdx, &p->writer); return rc; } static int fts5FlushNewEntry( void *pCtx, i64 iRowid, const u8 *aPoslist, int nPoslist ){ Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx; int rc = SQLITE_OK; int i = 0; /* Append the rowid itself */ fts5WriteAppendRowid(p->pIdx, &p->writer, iRowid); /* Append the size of the position list in bytes */ fts5WriteAppendPoslistInt(p->pIdx, &p->writer, nPoslist); /* Copy the position list to the output segment */ while( i<nPoslist ){ int iVal; i += getVarint32(&aPoslist[i], iVal); fts5WriteAppendPoslistInt(p->pIdx, &p->writer, iVal); } return rc; } /* ** Flush the contents of in-memory hash table iHash to a new level-0 ** segment on disk. Also update the corresponding structure record. ** ** If an error occurs, set the Fts5Index.rc error code. If an error has ** already occurred, this function is a no-op. */ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ Fts5Structure *pStruct; int iSegid; int pgnoLast = 0; /* Last leaf page number in segment */ /* Obtain a reference to the index structure and allocate a new segment-id ** for the new level-0 segment. */ pStruct = fts5StructureRead(p, iHash); iSegid = fts5AllocateSegid(p, pStruct); if( iSegid ){ Fts5StructureSegment *pSeg; /* New segment within pStruct */ int nHeight; /* Height of new segment b-tree */ int rc; Fts5FlushCtx ctx; fts5WriteInit(p, &ctx.writer, iHash, iSegid); ctx.pIdx = p; rc = sqlite3Fts5HashIterate( p->apHash[iHash], (void*)&ctx, fts5FlushNewTerm, fts5FlushNewEntry, fts5FlushTermDone ); if( p->rc==SQLITE_OK ) p->rc = rc; fts5WriteFinish(p, &ctx.writer, &nHeight, &pgnoLast); /* Edit the Fts5Structure and write it back to the database. */ if( pStruct->nLevel==0 ){ fts5StructureAddLevel(&p->rc, &pStruct); } fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0); if( p->rc==SQLITE_OK ){ |
︙ | ︙ | |||
3448 3449 3450 3451 3452 3453 3454 | static void fts5IndexFlush(Fts5Index *p){ Fts5Config *pConfig = p->pConfig; int i; /* Used to iterate through indexes */ int nLeaf = 0; /* Number of leaves written */ /* If an error has already occured this call is a no-op. */ if( p->rc!=SQLITE_OK || p->nPendingData==0 ) return; | | | 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 | static void fts5IndexFlush(Fts5Index *p){ Fts5Config *pConfig = p->pConfig; int i; /* Used to iterate through indexes */ int nLeaf = 0; /* Number of leaves written */ /* If an error has already occured this call is a no-op. */ if( p->rc!=SQLITE_OK || p->nPendingData==0 ) return; assert( p->apHash ); /* Flush the terms and each prefix index to disk */ for(i=0; i<=pConfig->nPrefix; i++){ fts5FlushOneHash(p, i, &nLeaf); } p->nPendingData = 0; } |
︙ | ︙ | |||
3551 3552 3553 3554 3555 3556 3557 | int rc = SQLITE_OK; if( bDestroy ){ rc = sqlite3Fts5DropTable(p->pConfig, "data"); } assert( p->pReader==0 ); sqlite3_finalize(p->pWriter); sqlite3_finalize(p->pDeleter); | > > > > > | > | 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 | int rc = SQLITE_OK; if( bDestroy ){ rc = sqlite3Fts5DropTable(p->pConfig, "data"); } assert( p->pReader==0 ); sqlite3_finalize(p->pWriter); sqlite3_finalize(p->pDeleter); if( p->apHash ){ int i; for(i=0; i<=p->pConfig->nPrefix; i++){ sqlite3Fts5HashFree(p->apHash[i]); } sqlite3_free(p->apHash); } sqlite3_free(p->zDataTbl); sqlite3_free(p); return rc; } /* ** Return a simple checksum value based on the arguments. |
︙ | ︙ | |||
4311 4312 4313 4314 4315 4316 4317 | aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf); pStruct = fts5StructureRead(p, 0); if( aBuf && pStruct ){ Fts5DoclistIter *pDoclist; int i; | | | 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 | aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf); pStruct = fts5StructureRead(p, 0); if( aBuf && pStruct ){ Fts5DoclistIter *pDoclist; int i; i64 iLastRowid = 0; Fts5MultiSegIter *p1 = 0; /* Iterator used to gather data from index */ Fts5Buffer doclist; memset(&doclist, 0, sizeof(doclist)); for(fts5MultiIterNew(p, pStruct, 0, 1, pToken, nToken, -1, 0, &p1); fts5MultiIterEof(p, p1)==0; fts5MultiIterNext(p, p1, 0, 0) |
︙ | ︙ |
Changes to main.mk.
︙ | ︙ | |||
73 74 75 76 77 78 79 80 81 82 83 84 85 86 | vdbetrace.o wal.o walker.o where.o utf.o vtab.o LIBOBJ += fts5.o LIBOBJ += fts5_aux.o LIBOBJ += fts5_buffer.o LIBOBJ += fts5_config.o LIBOBJ += fts5_expr.o LIBOBJ += fts5_index.o LIBOBJ += fts5_storage.o LIBOBJ += fts5parse.o # All of the source code files. | > | 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 | vdbetrace.o wal.o walker.o where.o utf.o vtab.o LIBOBJ += fts5.o LIBOBJ += fts5_aux.o LIBOBJ += fts5_buffer.o LIBOBJ += fts5_config.o LIBOBJ += fts5_expr.o LIBOBJ += fts5_hash.o LIBOBJ += fts5_index.o LIBOBJ += fts5_storage.o LIBOBJ += fts5parse.o # All of the source code files. |
︙ | ︙ | |||
228 229 230 231 232 233 234 235 236 237 238 239 240 241 | $(TOP)/ext/fts5/fts5.h \ $(TOP)/ext/fts5/fts5Int.h \ $(TOP)/ext/fts5/fts5_aux.c \ $(TOP)/ext/fts5/fts5_buffer.c \ $(TOP)/ext/fts5/fts5.c \ $(TOP)/ext/fts5/fts5_config.c \ $(TOP)/ext/fts5/fts5_expr.c \ $(TOP)/ext/fts5/fts5_index.c \ fts5parse.c \ $(TOP)/ext/fts5/fts5_storage.c # Generated source code files # | > | 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 | $(TOP)/ext/fts5/fts5.h \ $(TOP)/ext/fts5/fts5Int.h \ $(TOP)/ext/fts5/fts5_aux.c \ $(TOP)/ext/fts5/fts5_buffer.c \ $(TOP)/ext/fts5/fts5.c \ $(TOP)/ext/fts5/fts5_config.c \ $(TOP)/ext/fts5/fts5_expr.c \ $(TOP)/ext/fts5/fts5_hash.c \ $(TOP)/ext/fts5/fts5_index.c \ fts5parse.c \ $(TOP)/ext/fts5/fts5_storage.c # Generated source code files # |
︙ | ︙ | |||
594 595 596 597 598 599 600 601 602 603 604 605 606 607 | $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_buffer.c fts5_config.o: $(TOP)/ext/fts5/fts5_config.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_config.c fts5_expr.o: $(TOP)/ext/fts5/fts5_expr.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_expr.c fts5_index.o: $(TOP)/ext/fts5/fts5_index.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_index.c fts5_storage.o: $(TOP)/ext/fts5/fts5_storage.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_storage.c | > > > | 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 | $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_buffer.c fts5_config.o: $(TOP)/ext/fts5/fts5_config.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_config.c fts5_expr.o: $(TOP)/ext/fts5/fts5_expr.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_expr.c fts5_hash.o: $(TOP)/ext/fts5/fts5_hash.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_hash.c fts5_index.o: $(TOP)/ext/fts5/fts5_index.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_index.c fts5_storage.o: $(TOP)/ext/fts5/fts5_storage.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_storage.c |
︙ | ︙ |
Changes to tool/loadfts.c.
︙ | ︙ | |||
65 66 67 68 69 70 71 72 73 74 75 76 77 78 | " the contents of each file into the fts table. All files are assumed to\n" " contain UTF-8 text.\n" "\n" "Switches are:\n" " -fts [345] FTS version to use (default=5)\n" " -idx [01] Create a mapping from filename to rowid (default=0)\n" " -dir <path> Root of directory tree to load data from (default=.)\n" , zArgv0 ); exit(1); } /* ** Exit with a message based on the argument and the current value of errno. | > | 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | " the contents of each file into the fts table. All files are assumed to\n" " contain UTF-8 text.\n" "\n" "Switches are:\n" " -fts [345] FTS version to use (default=5)\n" " -idx [01] Create a mapping from filename to rowid (default=0)\n" " -dir <path> Root of directory tree to load data from (default=.)\n" " -trans <integer> Number of inserts per transaction (default=1)\n" , zArgv0 ); exit(1); } /* ** Exit with a message based on the argument and the current value of errno. |
︙ | ︙ | |||
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 | } /* ** Context object for visit_file(). */ typedef struct VisitContext VisitContext; struct VisitContext { sqlite3 *db; /* Database handle */ sqlite3_stmt *pInsert; /* INSERT INTO fts VALUES(readtext(:1)) */ }; /* ** Callback used with traverse(). The first argument points to an object ** of type VisitContext. This function inserts the contents of the text ** file zPath into the FTS table. */ void visit_file(void *pCtx, const char *zPath){ int rc; VisitContext *p = (VisitContext*)pCtx; /* printf("%s\n", zPath); */ sqlite3_bind_text(p->pInsert, 1, zPath, -1, SQLITE_STATIC); sqlite3_step(p->pInsert); rc = sqlite3_reset(p->pInsert); | > > | > > > > > | 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 | } /* ** Context object for visit_file(). */ typedef struct VisitContext VisitContext; struct VisitContext { int nRowPerTrans; sqlite3 *db; /* Database handle */ sqlite3_stmt *pInsert; /* INSERT INTO fts VALUES(readtext(:1)) */ }; /* ** Callback used with traverse(). The first argument points to an object ** of type VisitContext. This function inserts the contents of the text ** file zPath into the FTS table. */ void visit_file(void *pCtx, const char *zPath){ int rc; VisitContext *p = (VisitContext*)pCtx; /* printf("%s\n", zPath); */ sqlite3_bind_text(p->pInsert, 1, zPath, -1, SQLITE_STATIC); sqlite3_step(p->pInsert); rc = sqlite3_reset(p->pInsert); if( rc!=SQLITE_OK ){ sqlite_error_out("insert", p->db); }else if( p->nRowPerTrans>0 && (sqlite3_last_insert_rowid(p->db) % p->nRowPerTrans)==0 ){ sqlite3_exec(p->db, "COMMIT ; BEGIN", 0, 0, 0); } } /* ** Recursively traverse directory zDir. For each file that is not a ** directory, invoke the supplied callback with its path. */ static void traverse( |
︙ | ︙ | |||
146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 | int main(int argc, char **argv){ int iFts = 5; /* Value of -fts option */ int bMap = 0; /* True to create mapping table */ const char *zDir = "."; /* Directory to scan */ int i; int rc; sqlite3 *db; char *zSql; VisitContext sCtx; if( argc % 2 ) showHelp(argv[0]); for(i=1; i<(argc-1); i+=2){ char *zOpt = argv[i]; char *zArg = argv[i+1]; if( strcmp(zOpt, "-fts")==0 ){ iFts = atoi(zArg); if( iFts!=3 && iFts!=4 && iFts!= 5) showHelp(argv[0]); } else if( strcmp(zOpt, "-idx")==0 ){ bMap = atoi(zArg); if( bMap!=0 && bMap!=1 ) showHelp(argv[0]); } else if( strcmp(zOpt, "-dir")==0 ){ zDir = zArg; | > > > > | 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 | int main(int argc, char **argv){ int iFts = 5; /* Value of -fts option */ int bMap = 0; /* True to create mapping table */ const char *zDir = "."; /* Directory to scan */ int i; int rc; int nRowPerTrans = 0; sqlite3 *db; char *zSql; VisitContext sCtx; if( argc % 2 ) showHelp(argv[0]); for(i=1; i<(argc-1); i+=2){ char *zOpt = argv[i]; char *zArg = argv[i+1]; if( strcmp(zOpt, "-fts")==0 ){ iFts = atoi(zArg); if( iFts!=3 && iFts!=4 && iFts!= 5) showHelp(argv[0]); } if( strcmp(zOpt, "-trans")==0 ){ nRowPerTrans = atoi(zArg); } else if( strcmp(zOpt, "-idx")==0 ){ bMap = atoi(zArg); if( bMap!=0 && bMap!=1 ) showHelp(argv[0]); } else if( strcmp(zOpt, "-dir")==0 ){ zDir = zArg; |
︙ | ︙ | |||
185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 | rc = sqlite3_exec(db, zSql, 0, 0, 0); if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_exec(1)", db); sqlite3_free(zSql); /* Compile the INSERT statement to write data to the FTS table. */ memset(&sCtx, 0, sizeof(VisitContext)); sCtx.db = db; rc = sqlite3_prepare_v2(db, "INSERT INTO fts VALUES(readtext(?))", -1, &sCtx.pInsert, 0 ); if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_prepare_v2(1)", db); /* Load all files in the directory hierarchy into the FTS table. */ traverse(zDir, (void*)&sCtx, visit_file); /* Clean up and exit. */ sqlite3_finalize(sCtx.pInsert); sqlite3_close(db); return 0; } | > > > | 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 | rc = sqlite3_exec(db, zSql, 0, 0, 0); if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_exec(1)", db); sqlite3_free(zSql); /* Compile the INSERT statement to write data to the FTS table. */ memset(&sCtx, 0, sizeof(VisitContext)); sCtx.db = db; sCtx.nRowPerTrans = nRowPerTrans; rc = sqlite3_prepare_v2(db, "INSERT INTO fts VALUES(readtext(?))", -1, &sCtx.pInsert, 0 ); if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_prepare_v2(1)", db); /* Load all files in the directory hierarchy into the FTS table. */ if( sCtx.nRowPerTrans>0 ) sqlite3_exec(db, "BEGIN", 0, 0, 0); traverse(zDir, (void*)&sCtx, visit_file); if( sCtx.nRowPerTrans>0 ) sqlite3_exec(db, "COMMIT", 0, 0, 0); /* Clean up and exit. */ sqlite3_finalize(sCtx.pInsert); sqlite3_close(db); return 0; } |