Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Updates to snippet() and offsets() functions of FTS3 so that they work sanely following an OOM fault. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
b939a37a8ce296785a300e79ab9d3d87 |
User & Date: | drh 2009-11-28 21:33:21.000 |
Context
2009-11-30
| ||
08:55 | Add test cases for examples recently added to documentation file fts3.html. (check-in: 498922cc35 user: dan tags: trunk) | |
2009-11-28
| ||
21:33 | Updates to snippet() and offsets() functions of FTS3 so that they work sanely following an OOM fault. (check-in: b939a37a8c user: drh tags: trunk) | |
17:23 | Change FTS3 to detect when the RHS of the MATCH opertor encounters an OOM during string format conversion and report back an SQLITE_NOMEM error. (check-in: 31eed4f8f9 user: drh tags: trunk) | |
Changes
Changes to ext/fts3/fts3_snippet.c.
︙ | ︙ | |||
42 43 44 45 46 47 48 | }; /* It is not safe to call isspace(), tolower(), or isalnum() on ** hi-bit-set characters. This is the same solution used in the ** tokenizer. */ | < < < < < | < < | < < < < < < < < < < < < | | < < < < | < < < < | < < | < < < < < < | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | > > < < < | > > > | > | > < < | | > > | | | < < > > > | > > > > > > | < < > > | | < < < > > | | | > | > | | | > | | > | | | | > | > | > | | > > | 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 | }; /* It is not safe to call isspace(), tolower(), or isalnum() on ** hi-bit-set characters. This is the same solution used in the ** tokenizer. */ static int fts3snippetIsspace(char c){ return (c&0x80)==0 ? isspace(c) : 0; } /* ** A StringBuffer object holds a zero-terminated string that grows ** arbitrarily by appending. Space to hold the string is obtained ** from sqlite3_malloc(). After any memory allocation failure, ** StringBuffer.z is set to NULL and no further allocation is attempted. */ typedef struct StringBuffer { char *z; /* Text of the string. Space from malloc. */ int nUsed; /* Number bytes of z[] used, not counting \000 terminator */ int nAlloc; /* Bytes allocated for z[] */ } StringBuffer; /* ** Initialize a new StringBuffer. */ static void fts3SnippetSbInit(StringBuffer *p){ p->nAlloc = 100; p->nUsed = 0; p->z = sqlite3_malloc( p->nAlloc ); } /* ** Append text to the string buffer. */ static void fts3SnippetAppend(StringBuffer *p, const char *zNew, int nNew){ if( p->z==0 ) return; if( nNew<0 ) nNew = strlen(zNew); if( p->nUsed + nNew >= p->nAlloc ){ int nAlloc; char *zNew; nAlloc = p->nUsed + nNew + p->nAlloc; zNew = sqlite3_realloc(p->z, nAlloc); if( zNew==0 ){ sqlite3_free(p->z); p->z = 0; return; } p->z = zNew; p->nAlloc = nAlloc; } memcpy(&p->z[p->nUsed], zNew, nNew); p->nUsed += nNew; p->z[p->nUsed] = 0; } /* If the StringBuffer ends in something other than white space, add a ** single space character to the end. */ static void fts3SnippetAppendWhiteSpace(StringBuffer *p){ if( p->z && p->nUsed && !fts3snippetIsspace(p->z[p->nUsed-1]) ){ fts3SnippetAppend(p, " ", 1); } } /* Remove white space from the end of the StringBuffer */ static void fts3SnippetTrimWhiteSpace(StringBuffer *p){ if( p->z ){ while( p->nUsed && fts3snippetIsspace(p->z[p->nUsed-1]) ){ p->nUsed--; } p->z[p->nUsed] = 0; } } /* ** Release all memory associated with the Snippet structure passed as ** an argument. */ static void fts3SnippetFree(Snippet *p){ if( p ){ sqlite3_free(p->aMatch); sqlite3_free(p->zOffset); sqlite3_free(p->zSnippet); sqlite3_free(p); } } /* ** Append a single entry to the p->aMatch[] log. */ static int snippetAppendMatch( Snippet *p, /* Append the entry to this snippet */ int iCol, int iTerm, /* The column and query term */ int iToken, /* Matching token in document */ int iStart, int nByte /* Offset and size of the match */ ){ int i; struct snippetMatch *pMatch; if( p->nMatch+1>=p->nAlloc ){ struct snippetMatch *pNew; p->nAlloc = p->nAlloc*2 + 10; pNew = sqlite3_realloc(p->aMatch, p->nAlloc*sizeof(p->aMatch[0]) ); if( pNew==0 ){ p->aMatch = 0; p->nMatch = 0; p->nAlloc = 0; return SQLITE_NOMEM; } p->aMatch = pNew; } i = p->nMatch++; pMatch = &p->aMatch[i]; pMatch->iCol = iCol; pMatch->iTerm = iTerm; pMatch->iToken = iToken; pMatch->iStart = iStart; pMatch->nByte = nByte; return SQLITE_OK; } /* ** Sizing information for the circular buffer used in snippetOffsetsOfColumn() */ #define FTS3_ROTOR_SZ (32) #define FTS3_ROTOR_MASK (FTS3_ROTOR_SZ-1) |
︙ | ︙ | |||
276 277 278 279 280 281 282 | return 0; } /* ** Add entries to pSnippet->aMatch[] for every match that occurs against ** document zDoc[0..nDoc-1] which is stored in column iColumn. */ | | | 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 | return 0; } /* ** Add entries to pSnippet->aMatch[] for every match that occurs against ** document zDoc[0..nDoc-1] which is stored in column iColumn. */ static int snippetOffsetsOfColumn( Fts3Cursor *pCur, /* The fulltest search cursor */ Snippet *pSnippet, /* The Snippet object to be filled in */ int iColumn, /* Index of fulltext table column */ const char *zDoc, /* Text of the fulltext table column */ int nDoc /* Length of zDoc in bytes */ ){ const sqlite3_tokenizer_module *pTModule; /* The tokenizer module */ |
︙ | ︙ | |||
306 307 308 309 310 311 312 | int iRotorLen[FTS3_ROTOR_SZ]; /* Length of token */ pVtab = (Fts3Table *)pCur->base.pVtab; nColumn = pVtab->nColumn; pTokenizer = pVtab->pTokenizer; pTModule = pTokenizer->pModule; rc = pTModule->xOpen(pTokenizer, zDoc, nDoc, &pTCursor); | | | > | 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 | int iRotorLen[FTS3_ROTOR_SZ]; /* Length of token */ pVtab = (Fts3Table *)pCur->base.pVtab; nColumn = pVtab->nColumn; pTokenizer = pVtab->pTokenizer; pTModule = pTokenizer->pModule; rc = pTModule->xOpen(pTokenizer, zDoc, nDoc, &pTCursor); if( rc ) return rc; pTCursor->pTokenizer = pTokenizer; prevMatch = 0; while( (rc = pTModule->xNext(pTCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos))==SQLITE_OK ){ Fts3Expr *pIter = pCur->pExpr; int iIter = -1; iRotorBegin[iRotor&FTS3_ROTOR_MASK] = iBegin; iRotorLen[iRotor&FTS3_ROTOR_MASK] = iEnd-iBegin; match = 0; for(i=0; i<(FTS3_ROTOR_SZ-1) && fts3NextExprToken(&pIter, &iIter); i++){ int nPhrase; /* Number of tokens in current phrase */ |
︙ | ︙ | |||
335 336 337 338 339 340 341 | assert( pToken->n<=nToken ); if( memcmp(pToken->z, zToken, pToken->n) ) continue; if( iIter>0 && (prevMatch & (1<<i))==0 ) continue; match |= 1<<i; if( i==(FTS3_ROTOR_SZ-2) || nPhrase==iIter+1 ){ for(j=nPhrase-1; j>=0; j--){ int k = (iRotor-j) & FTS3_ROTOR_MASK; | | | > > > | 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 | assert( pToken->n<=nToken ); if( memcmp(pToken->z, zToken, pToken->n) ) continue; if( iIter>0 && (prevMatch & (1<<i))==0 ) continue; match |= 1<<i; if( i==(FTS3_ROTOR_SZ-2) || nPhrase==iIter+1 ){ for(j=nPhrase-1; j>=0; j--){ int k = (iRotor-j) & FTS3_ROTOR_MASK; rc = snippetAppendMatch(pSnippet, iColumn, i-j, iPos-j, iRotorBegin[k], iRotorLen[k]); if( rc ) goto end_offsets_of_column; } } } prevMatch = match<<1; iRotor++; } end_offsets_of_column: pTModule->xClose(pTCursor); return rc==SQLITE_DONE ? SQLITE_OK : rc; } /* ** Remove entries from the pSnippet structure to account for the NEAR ** operator. When this is called, pSnippet contains the list of token ** offsets produced by treating all NEAR operators as AND operators. ** This function removes any entries that should not be present after |
︙ | ︙ | |||
485 486 487 488 489 490 491 492 493 494 495 496 497 498 | static int snippetAllOffsets(Fts3Cursor *pCsr, Snippet **ppSnippet){ Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; int nColumn; int iColumn, i; int iFirst, iLast; int iTerm = 0; Snippet *pSnippet; if( pCsr->pExpr==0 ){ return SQLITE_OK; } pSnippet = (Snippet *)sqlite3_malloc(sizeof(Snippet)); *ppSnippet = pSnippet; | > | 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 | static int snippetAllOffsets(Fts3Cursor *pCsr, Snippet **ppSnippet){ Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; int nColumn; int iColumn, i; int iFirst, iLast; int iTerm = 0; Snippet *pSnippet; int rc = SQLITE_OK; if( pCsr->pExpr==0 ){ return SQLITE_OK; } pSnippet = (Snippet *)sqlite3_malloc(sizeof(Snippet)); *ppSnippet = pSnippet; |
︙ | ︙ | |||
508 509 510 511 512 513 514 | iFirst = 0; iLast = nColumn-1; }else{ /* Look for matches in the iColumn-th column of the index only */ iFirst = iColumn; iLast = iColumn; } | | > > > | > | | | | | | 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 | iFirst = 0; iLast = nColumn-1; }else{ /* Look for matches in the iColumn-th column of the index only */ iFirst = iColumn; iLast = iColumn; } for(i=iFirst; rc==SQLITE_OK && i<=iLast; i++){ const char *zDoc; int nDoc; zDoc = (const char*)sqlite3_column_text(pCsr->pStmt, i+1); nDoc = sqlite3_column_bytes(pCsr->pStmt, i+1); if( zDoc==0 && sqlite3_column_type(pCsr->pStmt, i+1)!=SQLITE_NULL ){ rc = SQLITE_NOMEM; }else{ rc = snippetOffsetsOfColumn(pCsr, pSnippet, i, zDoc, nDoc); } } while( trimSnippetOffsets(pCsr->pExpr, pSnippet, &iTerm) ){ iTerm = 0; } return rc; } /* ** Convert the information in the aMatch[] array of the snippet ** into the string zOffset[0..nOffset-1]. This string is used as ** the return of the SQL offsets() function. */ static void snippetOffsetText(Snippet *p){ int i; int cnt = 0; StringBuffer sb; char zBuf[200]; if( p->zOffset ) return; fts3SnippetSbInit(&sb); for(i=0; i<p->nMatch; i++){ struct snippetMatch *pMatch = &p->aMatch[i]; if( pMatch->iTerm>=0 ){ /* If snippetMatch.iTerm is less than 0, then the match was ** discarded as part of processing the NEAR operator (see the ** trimSnippetOffsetsForNear() function for details). Ignore ** it in this case */ zBuf[0] = ' '; sqlite3_snprintf(sizeof(zBuf)-1, &zBuf[cnt>0], "%d %d %d %d", pMatch->iCol, pMatch->iTerm, pMatch->iStart, pMatch->nByte); fts3SnippetAppend(&sb, zBuf, -1); cnt++; } } p->zOffset = sb.z; p->nOffset = sb.z ? sb.nUsed : 0; } /* ** zDoc[0..nDoc-1] is phrase of text. aMatch[0..nMatch-1] are a set ** of matching words some of which might be in zDoc. zDoc is column ** number iCol. ** |
︙ | ︙ | |||
589 590 591 592 593 594 595 | return aMatch[i].iStart; } if( i>0 && aMatch[i-1].iStart+aMatch[i-1].nByte>=iBreak ){ return aMatch[i-1].iStart; } } for(i=1; i<=10; i++){ | | | | 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 | return aMatch[i].iStart; } if( i>0 && aMatch[i-1].iStart+aMatch[i-1].nByte>=iBreak ){ return aMatch[i-1].iStart; } } for(i=1; i<=10; i++){ if( fts3snippetIsspace(zDoc[iBreak-i]) ){ return iBreak - i + 1; } if( fts3snippetIsspace(zDoc[iBreak+i]) ){ return iBreak + i + 1; } } return iBreak; } |
︙ | ︙ | |||
636 637 638 639 640 641 642 | int iMatch; sqlite3_free(pSnippet->zSnippet); pSnippet->zSnippet = 0; aMatch = pSnippet->aMatch; nMatch = pSnippet->nMatch; | | | 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 | int iMatch; sqlite3_free(pSnippet->zSnippet); pSnippet->zSnippet = 0; aMatch = pSnippet->aMatch; nMatch = pSnippet->nMatch; fts3SnippetSbInit(&sb); for(i=0; i<nMatch; i++){ aMatch[i].snStatus = SNIPPET_IGNORE; } nDesired = 0; for(i=0; i<FTS3_ROTOR_SZ; i++){ for(j=0; j<nMatch; j++){ |
︙ | ︙ | |||
670 671 672 673 674 675 676 | if( iStart<=10 ){ iStart = 0; } if( iCol==tailCol && iStart<=tailOffset+20 ){ iStart = tailOffset; } if( (iCol!=tailCol && tailCol>=0) || iStart!=tailOffset ){ | | | | | | | | | | | | | | | > | > | > > > > > > > | > | > > > > > > | 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 | if( iStart<=10 ){ iStart = 0; } if( iCol==tailCol && iStart<=tailOffset+20 ){ iStart = tailOffset; } if( (iCol!=tailCol && tailCol>=0) || iStart!=tailOffset ){ fts3SnippetTrimWhiteSpace(&sb); fts3SnippetAppendWhiteSpace(&sb); fts3SnippetAppend(&sb, zEllipsis, -1); fts3SnippetAppendWhiteSpace(&sb); } iEnd = aMatch[i].iStart + aMatch[i].nByte + 40; iEnd = wordBoundary(iEnd, zDoc, nDoc, aMatch, nMatch, iCol); if( iEnd>=nDoc-10 ){ iEnd = nDoc; tailEllipsis = 0; }else{ tailEllipsis = 1; } while( iMatch<nMatch && aMatch[iMatch].iCol<iCol ){ iMatch++; } while( iStart<iEnd ){ while( iMatch<nMatch && aMatch[iMatch].iStart<iStart && aMatch[iMatch].iCol<=iCol ){ iMatch++; } if( iMatch<nMatch && aMatch[iMatch].iStart<iEnd && aMatch[iMatch].iCol==iCol ){ fts3SnippetAppend(&sb, &zDoc[iStart], aMatch[iMatch].iStart - iStart); iStart = aMatch[iMatch].iStart; fts3SnippetAppend(&sb, zStartMark, -1); fts3SnippetAppend(&sb, &zDoc[iStart], aMatch[iMatch].nByte); fts3SnippetAppend(&sb, zEndMark, -1); iStart += aMatch[iMatch].nByte; for(j=iMatch+1; j<nMatch; j++){ if( aMatch[j].iTerm==aMatch[iMatch].iTerm && aMatch[j].snStatus==SNIPPET_DESIRED ){ nDesired--; aMatch[j].snStatus = SNIPPET_IGNORE; } } }else{ fts3SnippetAppend(&sb, &zDoc[iStart], iEnd - iStart); iStart = iEnd; } } tailCol = iCol; tailOffset = iEnd; } fts3SnippetTrimWhiteSpace(&sb); if( tailEllipsis ){ fts3SnippetAppendWhiteSpace(&sb); fts3SnippetAppend(&sb, zEllipsis, -1); } pSnippet->zSnippet = sb.z; pSnippet->nSnippet = sb.z ? sb.nUsed : 0; } void sqlite3Fts3Offsets( sqlite3_context *pCtx, /* SQLite function call context */ Fts3Cursor *pCsr /* Cursor object */ ){ Snippet *p; /* Snippet structure */ int rc = snippetAllOffsets(pCsr, &p); if( rc==SQLITE_OK ){ snippetOffsetText(p); if( p->zOffset ){ sqlite3_result_text(pCtx, p->zOffset, p->nOffset, SQLITE_TRANSIENT); }else{ sqlite3_result_error_nomem(pCtx); } }else{ sqlite3_result_error_nomem(pCtx); } fts3SnippetFree(p); } void sqlite3Fts3Snippet( sqlite3_context *pCtx, /* SQLite function call context */ Fts3Cursor *pCsr, /* Cursor object */ const char *zStart, /* Snippet start text - "<b>" */ const char *zEnd, /* Snippet end text - "</b>" */ const char *zEllipsis /* Snippet ellipsis text - "<b>...</b>" */ ){ Snippet *p; /* Snippet structure */ int rc = snippetAllOffsets(pCsr, &p); if( rc==SQLITE_OK ){ snippetText(pCsr, p, zStart, zEnd, zEllipsis); if( p->zSnippet ){ sqlite3_result_text(pCtx, p->zSnippet, p->nSnippet, SQLITE_TRANSIENT); }else{ sqlite3_result_error_nomem(pCtx); } }else{ sqlite3_result_error_nomem(pCtx); } fts3SnippetFree(p); } #endif |
Changes to ext/fts3/fts3_write.c.
︙ | ︙ | |||
642 643 644 645 646 647 648 | rc = fts3PendingTermsAdd(p, zText, -1); if( rc!=SQLITE_OK ){ sqlite3_reset(pSelect); return rc; } } } | > | > | | | 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 | rc = fts3PendingTermsAdd(p, zText, -1); if( rc!=SQLITE_OK ){ sqlite3_reset(pSelect); return rc; } } } rc = sqlite3_reset(pSelect); }else{ sqlite3_reset(pSelect); } return rc; } /* ** Forward declaration to account for the circular dependency between ** functions fts3SegmentMerge() and fts3AllocateSegdirIdx(). */ static int fts3SegmentMerge(Fts3Table *, int); |
︙ | ︙ |