Index: ext/fts5/fts5_expr.c ================================================================== --- ext/fts5/fts5_expr.c +++ ext/fts5/fts5_expr.c @@ -1465,10 +1465,11 @@ } typedef struct TokenCtx TokenCtx; struct TokenCtx { Fts5ExprPhrase *pPhrase; + int rc; }; /* ** Callback for tokenizing terms used by ParseTerm(). */ @@ -1483,40 +1484,53 @@ int rc = SQLITE_OK; const int SZALLOC = 8; TokenCtx *pCtx = (TokenCtx*)pContext; Fts5ExprPhrase *pPhrase = pCtx->pPhrase; + /* If an error has already occurred, this is a no-op */ + if( pCtx->rc!=SQLITE_OK ) return pCtx->rc; + assert( pPhrase==0 || pPhrase->nTerm>0 ); if( pPhrase && (tflags & FTS5_TOKEN_COLOCATED) ){ Fts5ExprTerm *pSyn; int nByte = sizeof(Fts5ExprTerm) + nToken+1; pSyn = (Fts5ExprTerm*)sqlite3_malloc(nByte); - if( pSyn==0 ) return SQLITE_NOMEM; - memset(pSyn, 0, nByte); - pSyn->zTerm = (char*)&pSyn[1]; - memcpy(pSyn->zTerm, pToken, nToken); - pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym; - pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn; + if( pSyn==0 ){ + rc = SQLITE_NOMEM; + }else{ + memset(pSyn, 0, nByte); + pSyn->zTerm = (char*)&pSyn[1]; + memcpy(pSyn->zTerm, pToken, nToken); + pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym; + pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn; + } }else{ Fts5ExprTerm *pTerm; if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){ Fts5ExprPhrase *pNew; int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0); pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase, sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew ); - if( pNew==0 ) return SQLITE_NOMEM; - if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase)); - pCtx->pPhrase = pPhrase = pNew; - pNew->nTerm = nNew - SZALLOC; - } - - pTerm = &pPhrase->aTerm[pPhrase->nTerm++]; - memset(pTerm, 0, sizeof(Fts5ExprTerm)); - pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken); - } + if( pNew==0 ){ + rc = SQLITE_NOMEM; + }else{ + if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase)); + pCtx->pPhrase = pPhrase = pNew; + pNew->nTerm = nNew - SZALLOC; + } + } + + if( rc==SQLITE_OK ){ + pTerm = &pPhrase->aTerm[pPhrase->nTerm++]; + memset(pTerm, 0, sizeof(Fts5ExprTerm)); + pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken); + } + } + + pCtx->rc = rc; return rc; } /* @@ -1571,11 +1585,11 @@ sqlite3Fts5Dequote(z); n = strlen(z); rc = sqlite3Fts5Tokenize(pConfig, flags, z, n, &sCtx, fts5ParseTokenize); } sqlite3_free(z); - if( rc ){ + if( rc || (rc = sCtx.rc) ){ pParse->rc = rc; fts5ExprPhraseFree(sCtx.pPhrase); sCtx.pPhrase = 0; }else if( sCtx.pPhrase ){ @@ -1620,11 +1634,11 @@ Fts5Expr *pNew = 0; /* Expression to return via *ppNew */ Fts5ExprPhrase **apPhrase; /* pNew->apPhrase */ Fts5ExprNode *pNode; /* pNew->pRoot */ Fts5ExprNearset *pNear; /* pNew->pRoot->pNear */ - TokenCtx sCtx = {0}; /* Context object for fts5ParseTokenize */ + TokenCtx sCtx = {0,0}; /* Context object for fts5ParseTokenize */ pOrig = pExpr->apExprPhrase[iPhrase]; pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr)); Index: ext/fts5/fts5_test_mi.c ================================================================== --- ext/fts5/fts5_test_mi.c +++ ext/fts5/fts5_test_mi.c @@ -350,11 +350,11 @@ int nVal, /* Number of values in apVal[] array */ sqlite3_value **apVal /* Array of trailing arguments */ ){ const char *zArg; Fts5MatchinfoCtx *p; - int rc; + int rc = SQLITE_OK; if( nVal>0 ){ zArg = (const char*)sqlite3_value_text(apVal[0]); }else{ zArg = "pcx"; @@ -361,15 +361,20 @@ } p = (Fts5MatchinfoCtx*)pApi->xGetAuxdata(pFts, 0); if( p==0 || sqlite3_stricmp(zArg, p->zArg) ){ p = fts5MatchinfoNew(pApi, pFts, pCtx, zArg); - pApi->xSetAuxdata(pFts, p, sqlite3_free); - if( p==0 ) return; + if( p==0 ){ + rc = SQLITE_NOMEM; + }else{ + rc = pApi->xSetAuxdata(pFts, p, sqlite3_free); + } } - rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoLocalCb); + if( rc==SQLITE_OK ){ + rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoLocalCb); + } if( rc!=SQLITE_OK ){ sqlite3_result_error_code(pCtx, rc); }else{ /* No errors has occured, so return a copy of the array of integers. */ int nByte = p->nRet * sizeof(u32); Index: ext/fts5/test/fts5_common.tcl ================================================================== --- ext/fts5/test/fts5_common.tcl +++ ext/fts5/test/fts5_common.tcl @@ -292,6 +292,39 @@ } proc NOT {a b} { if {[llength $b]>0} { return [list] } return $a } + +#------------------------------------------------------------------------- +# This command is similar to [split], except that it also provides the +# start and end offsets of each token. For example: +# +# [fts5_tokenize_split "abc d ef"] -> {abc 0 3 d 4 5 ef 6 8} +# + +proc gobble_whitespace {textvar} { + upvar $textvar t + regexp {([ ]*)(.*)} $t -> space t + return [string length $space] +} + +proc gobble_text {textvar wordvar} { + upvar $textvar t + upvar $wordvar w + regexp {([^ ]*)(.*)} $t -> w t + return [string length $w] +} + +proc fts5_tokenize_split {text} { + set token "" + set ret [list] + set iOff [gobble_whitespace text] + while {[set nToken [gobble_text text word]]} { + lappend ret $word $iOff [expr $iOff+$nToken] + incr iOff $nToken + incr iOff [gobble_whitespace text] + } + + set ret +} Index: ext/fts5/test/fts5fault6.test ================================================================== --- ext/fts5/test/fts5fault6.test +++ ext/fts5/test/fts5fault6.test @@ -145,8 +145,74 @@ CREATE VIRTUAL TABLE yu USING fts5(x, tokenize="unicode61 separators abc"); } } -test { faultsim_test_result {0 {}} } + +#------------------------------------------------------------------------- +# OOM while running a query that includes synonyms and matchinfo(). +# +proc mit {blob} { + set scan(littleEndian) i* + set scan(bigEndian) I* + binary scan $blob $scan($::tcl_platform(byteOrder)) r + return $r +} +proc tcl_tokenize {tflags text} { + foreach {w iStart iEnd} [fts5_tokenize_split $text] { + sqlite3_fts5_token $w $iStart $iEnd + if {$tflags=="query" && [string length $w]==1} { + for {set i 2} {$i < 7} {incr i} { + sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd + } + } + } +} +proc tcl_create {args} { return "tcl_tokenize" } +reset_db +sqlite3_fts5_create_tokenizer db tcl tcl_create +db func mit mit +sqlite3_fts5_register_matchinfo db +do_test 5.0 { + execsql { CREATE VIRTUAL TABLE t1 USING fts5(a, tokenize=tcl) } + foreach {rowid text} { + 1 {aaaa cc b aaaaa cc aa} + 2 {aa aa bb a bbb} + 3 {bb aaaaa aaaaa b aaaa aaaaa} + 4 {aa a b aaaa aa} + 5 {aa b ccc aaaaa cc} + 6 {aa aaaaa bbbb cc aaa} + 7 {aaaaa aa aa ccccc bb} + 8 {ccc bbbbb ccccc bbb c} + 9 {cccccc bbbb a aaa cccc c} + } { + execsql { INSERT INTO t1(rowid, a) VALUES($rowid, $text) } + } +} {} + +set res [list {*}{ + 1 {3 24 8 2 12 6} + 5 {2 24 8 2 12 6} + 6 {3 24 8 1 12 6} + 7 {3 24 8 1 12 6} + 9 {2 24 8 3 12 6} +}] +do_execsql_test 5.1 { + SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH 'a AND c' +} $res + +faultsim_save_and_close +do_faultsim_test 5.2 -faults oom* -prep { + faultsim_restore_and_reopen + sqlite3_fts5_create_tokenizer db tcl tcl_create + sqlite3_fts5_register_matchinfo db + db func mit mit +} -body { + db eval { + SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH 'a AND c' + } +} -test { + faultsim_test_result [list 0 $::res] +} finish_test Index: ext/fts5/test/fts5synonym.test ================================================================== --- ext/fts5/test/fts5synonym.test +++ ext/fts5/test/fts5synonym.test @@ -38,38 +38,12 @@ foreach x $S {if {$x!=$s} {lappend o $x}} set ::syn($s) $o } } -proc gobble_whitespace {textvar} { - upvar $textvar t - regexp {([ ]*)(.*)} $t -> space t - return [string length $space] -} - -proc gobble_text {textvar wordvar} { - upvar $textvar t - upvar $wordvar w - regexp {([^ ]*)(.*)} $t -> w t - return [string length $w] -} - -proc do_tokenize_split {text} { - set token "" - set ret [list] - set iOff [gobble_whitespace text] - while {[set nToken [gobble_text text word]]} { - lappend ret $word $iOff [expr $iOff+$nToken] - incr iOff $nToken - incr iOff [gobble_whitespace text] - } - - set ret -} - proc tcl_tokenize {tflags text} { - foreach {w iStart iEnd} [do_tokenize_split $text] { + foreach {w iStart iEnd} [fts5_tokenize_split $text] { sqlite3_fts5_token $w $iStart $iEnd } } proc tcl_create {args} { @@ -93,11 +67,11 @@ # Test a tokenizer that supports synonyms by adding extra entries to the # FTS index. # proc tcl_tokenize {tflags text} { - foreach {w iStart iEnd} [do_tokenize_split $text] { + foreach {w iStart iEnd} [fts5_tokenize_split $text] { sqlite3_fts5_token $w $iStart $iEnd if {$tflags=="document" && [info exists ::syn($w)]} { foreach s $::syn($w) { sqlite3_fts5_token -colo $s $iStart $iEnd } @@ -135,11 +109,11 @@ # reset_db sqlite3_fts5_create_tokenizer db tcl tcl_create proc tcl_tokenize {tflags text} { set bColo 1 - foreach {w iStart iEnd} [do_tokenize_split $text] { + foreach {w iStart iEnd} [fts5_tokenize_split $text] { if {$bColo} { sqlite3_fts5_token -colo $w $iStart $iEnd set bColo 0 } { sqlite3_fts5_token $w $iStart $iEnd @@ -158,11 +132,11 @@ do_execsql_test 3.1.1 { INSERT INTO ft(ft) VALUES('integrity-check'); } {} proc tcl_tokenize {tflags text} { - foreach {w iStart iEnd} [do_tokenize_split $text] { + foreach {w iStart iEnd} [fts5_tokenize_split $text] { sqlite3_fts5_token $w $iStart $iEnd } } do_execsql_test 3.1.2 { @@ -170,11 +144,11 @@ } {1} reset_db sqlite3_fts5_create_tokenizer db tcl tcl_create proc tcl_tokenize {tflags text} { - foreach {w iStart iEnd} [do_tokenize_split $text] { + foreach {w iStart iEnd} [fts5_tokenize_split $text] { sqlite3_fts5_token $w $iStart $iEnd sqlite3_fts5_token -colo $w $iStart $iEnd } } do_execsql_test 3.2.0 { @@ -205,11 +179,11 @@ # Check that expressions with synonyms can be parsed and executed. # reset_db sqlite3_fts5_create_tokenizer db tcl tcl_create proc tcl_tokenize {tflags text} { - foreach {w iStart iEnd} [do_tokenize_split $text] { + foreach {w iStart iEnd} [fts5_tokenize_split $text] { sqlite3_fts5_token $w $iStart $iEnd if {$tflags=="query" && [info exists ::syn($w)]} { foreach s $::syn($w) { sqlite3_fts5_token -colo $s $iStart $iEnd } @@ -328,11 +302,11 @@ # Test terms with more than 4 synonyms. # reset_db sqlite3_fts5_create_tokenizer db tcl tcl_create proc tcl_tokenize {tflags text} { - foreach {w iStart iEnd} [do_tokenize_split $text] { + foreach {w iStart iEnd} [fts5_tokenize_split $text] { sqlite3_fts5_token $w $iStart $iEnd if {$tflags=="query" && [string length $w]==1} { for {set i 2} {$i<=10} {incr i} { sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd }