Index: ext/fts5/extract_api_docs.tcl ================================================================== --- ext/fts5/extract_api_docs.tcl +++ ext/fts5/extract_api_docs.tcl @@ -106,17 +106,19 @@ regexp {[*][*](.*)} $line -> line if {[regexp {^ ?x.*:} $line]} { append res "
$line

\n" continue } + if {[regexp {SYNONYM SUPPORT} $line]} { + set line "

Synonym Support

" + } if {[string trim $line] == ""} { append res "

\n" } else { append res "$line\n" } } - append res "\n" set res } proc get_api_docs {data} { @@ -206,10 +208,14 @@ output [get_fts5_struct $data "typedef struct fts5_api" "^\};"] } fts5_tokenizer { output [get_fts5_struct $data "typedef struct Fts5Tokenizer" "^\};"] + output [get_fts5_struct $data \ + "Flags that may be passed as the third argument to xTokenize()" \ + "#define FTS5_TOKEN_COLOCATED" + ] } fts5_extension { output [get_fts5_struct $data "typedef.*Fts5ExtensionApi" "^.;"] } Index: ext/fts5/fts5.h ================================================================== --- ext/fts5/fts5.h +++ ext/fts5/fts5.h @@ -215,11 +215,11 @@ int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken); int (*xTokenize)(Fts5Context*, const char *pText, int nText, /* Text to tokenize */ void *pCtx, /* Context passed to xToken() */ - int (*xToken)(void*, const char*, int, int, int) /* Callback */ + int (*xToken)(void*, int, const char*, int, int, int) /* Callback */ ); int (*xPhraseCount)(Fts5Context*); int (*xPhraseSize)(Fts5Context*, int iPhrase); @@ -276,21 +276,49 @@ ** allocated using xCreate(). Fts5 guarantees that this function will ** be invoked exactly once for each successful call to xCreate(). ** ** xTokenize: ** This function is expected to tokenize the nText byte string indicated -** by argument pText. pText may not be nul-terminated. The first argument -** passed to this function is a pointer to an Fts5Tokenizer object returned -** by an earlier call to xCreate(). +** by argument pText. pText may or may not be nul-terminated. The first +** argument passed to this function is a pointer to an Fts5Tokenizer object +** returned by an earlier call to xCreate(). +** +** The second argument indicates the reason that FTS5 is requesting +** tokenization of the supplied text. This is always one of the following +** four values: +** +**

** ** For each token in the input string, the supplied callback xToken() must ** be invoked. The first argument to it should be a copy of the pointer -** passed as the second argument to xTokenize(). The next two arguments -** are a pointer to a buffer containing the token text, and the size of -** the token in bytes. The 4th and 5th arguments are the byte offsets of -** the first byte of and first byte immediately following the text from +** passed as the second argument to xTokenize(). The third and fourth +** arguments are a pointer to a buffer containing the token text, and the +** size of the token in bytes. The 4th and 5th arguments are the byte offsets +** of the first byte of and first byte immediately following the text from ** which the token is derived within the input. +** +** The second argument passed to the xToken() callback ("tflags") should +** normally be set to 0. The exception is if the tokenizer supports +** synonyms. In this case see the discussion below for details. ** ** FTS5 assumes the xToken() callback is invoked for each token in the ** order that they occur within the input text. ** ** If an xToken() callback returns any value other than SQLITE_OK, then @@ -299,39 +327,157 @@ ** input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally, ** if an error occurs with the xTokenize() implementation itself, it ** may abandon the tokenization and return any error code other than ** SQLITE_OK or SQLITE_DONE. ** +** SYNONYM SUPPORT +** +** Custom tokenizers may also support synonyms. Consider a case in which a +** user wishes to query for a phrase such as "first place". Using the +** built-in tokenizers, the FTS5 query 'first + place' will match instances +** of "first place" within the document set, but not alternative forms +** such as "1st place". In some applications, it would be better to match +** all instances of "first place" or "1st place" regardless of which form +** the user specified in the MATCH query text. +** +** There are several ways to approach this in FTS5: +** +**
  1. By mapping all synonyms to a single token. In this case, the +** In the above example, this means that the tokenizer returns the +** same token for inputs "first" and "1st". Say that token is in +** fact "first", so that when the user inserts the document "I won +** 1st place" entries are added to the index for tokens "i", "won", +** "first" and "place". If the user then queries for '1st + place', +** the tokenizer substitutes "first" for "1st" and the query works +** as expected. +** +**
  2. By adding multiple synonyms for a single term to the FTS index. +** In this case, when tokenizing query text, the tokenizer may +** provide multiple synonyms for a single term within the document. +** FTS5 then queries the index for each synonym individually. For +** example, faced with the query: +** +** +** ... MATCH 'first place' +** +** the tokenizer offers both "1st" and "first" as synonyms for the +** first token in the MATCH query and FTS5 effectively runs a query +** similar to: +** +** +** ... MATCH '(first OR 1st) place' +** +** except that, for the purposes of auxiliary functions, the query +** still appears to contain just two phrases - "(first OR 1st)" +** being treated as a single phrase. +** +**
  3. By adding multiple synonyms for a single term to the FTS index. +** Using this method, when tokenizing document text, the tokenizer +** provides multiple synonyms for each token. So that when a +** document such as "I won first place" is tokenized, entries are +** added to the FTS index for "i", "won", "first", "1st" and +** "place". +** +** This way, even if the tokenizer does not provide synonyms +** when tokenizing query text (it should not - to do would be +** inefficient), it doesn't matter if the user queries for +** 'first + place' or '1st + place', as there are entires in the +** FTS index corresponding to both forms of the first token. +**
+** +** Whether is is parsing document or query text, any call to xToken that +** specifies a tflags argument with the FTS5_TOKEN_COLOCATED bit +** is considered to supply a synonym for the previous token. For example, +** when parsing the document "I won first place", a tokenizer that supports +** synonyms would call xToken() 5 times, as follows: +** +** +** xToken(pCtx, 0, "i", 1, 0, 1); +** xToken(pCtx, 0, "won", 3, 2, 5); +** xToken(pCtx, 0, "first", 5, 6, 11); +** xToken(pCtx, FTS5_TOKEN_COLOCATED, "1st", 3, 6, 11); +** xToken(pCtx, 0, "place", 5, 12, 17); +** +** +** It is an error to specify the FTS5_TOKEN_COLOCATED flag the first time +** xToken() is called. Multiple synonyms may be specified for a single token +** by making multiple calls to xToken(FTS5_TOKEN_COLOCATED) in sequence. +** There is no limit to the number of synonyms that may be provided for a +** single token. +** +** In many cases, method (1) above is the best approach. It does not add +** extra data to the FTS index or require FTS5 to query for multiple terms, +** so it is efficient in terms of disk space and query speed. However, it +** does not support prefix queries very well. If, as suggested above, the +** token "first" is subsituted for "1st" by the tokenizer, then the query: +** +** +** ... MATCH '1s*' +** +** will not match documents that contain the token "1st" (as the tokenizer +** will probably not map "1s" to any prefix of "first"). +** +** For full prefix support, method (3) may be preferred. In this case, +** because the index contains entries for both "first" and "1st", prefix +** queries such as 'fi*' or '1s*' will match correctly. However, because +** extra entries are added to the FTS index, this method uses more space +** within the database. +** +** Method (2) offers a midpoint between (1) and (3). Using this method, +** a query such as '1s*' will match documents that contain the literal +** token "1st", but not "first" (assuming the tokenizer is not able to +** provide synonyms for prefixes). However, a non-prefix query like '1st' +** will match against "1st" and "first". This method does not require +** extra disk space, as no extra entries are added to the FTS index. +** On the other hand, it may require more CPU cycles to run MATCH queries, +** as separate queries of the FTS index are required for each synonym. +** +** When using methods (2) or (3), it is important that the tokenizer only +** provide synonyms when tokenizing document text (method (2)) or query +** text (method (3)), not both. Doing so will not cause any errors, but is +** inefficient. */ typedef struct Fts5Tokenizer Fts5Tokenizer; typedef struct fts5_tokenizer fts5_tokenizer; struct fts5_tokenizer { int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut); void (*xDelete)(Fts5Tokenizer*); int (*xTokenize)(Fts5Tokenizer*, void *pCtx, + int flags, /* Mask of FTS5_TOKENIZE_* flags */ const char *pText, int nText, int (*xToken)( void *pCtx, /* Copy of 2nd argument to xTokenize() */ + int tflags, /* Mask of FTS5_TOKEN_* flags */ const char *pToken, /* Pointer to buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Byte offset of token within input text */ int iEnd /* Byte offset of end of token within input text */ ) ); }; +/* Flags that may be passed as the third argument to xTokenize() */ +#define FTS5_TOKENIZE_QUERY 0x0001 +#define FTS5_TOKENIZE_PREFIX 0x0002 +#define FTS5_TOKENIZE_DOCUMENT 0x0004 +#define FTS5_TOKENIZE_AUX 0x0008 + +/* Flags that may be passed by the tokenizer implementation back to FTS5 +** as the third argument to the supplied xToken callback. */ +#define FTS5_TOKEN_COLOCATED 0x0001 /* Same position as prev. token */ + /* ** END OF CUSTOM TOKENIZERS *************************************************************************/ /************************************************************************* ** FTS5 EXTENSION REGISTRATION API */ typedef struct fts5_api fts5_api; struct fts5_api { - int iVersion; /* Currently always set to 1 */ + int iVersion; /* Currently always set to 2 */ /* Create a new tokenizer */ int (*xCreateTokenizer)( fts5_api *pApi, const char *zName, Index: ext/fts5/fts5Int.h ================================================================== --- ext/fts5/fts5Int.h +++ ext/fts5/fts5Int.h @@ -115,10 +115,16 @@ ** not understand. ** ** bColumnsize: ** True if the %_docsize table is created. ** +** bPrefixIndex: +** This is only used for debugging. If set to false, any prefix indexes +** are ignored. This value is configured using: +** +** INSERT INTO tbl(tbl, rank) VALUES('prefix-index', $bPrefixIndex); +** */ struct Fts5Config { sqlite3 *db; /* Database handle */ char *zDb; /* Database holding FTS index (e.g. "main") */ char *zName; /* Name of FTS index */ @@ -143,14 +149,18 @@ char *zRank; /* Name of rank function */ char *zRankArgs; /* Arguments to rank function */ /* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */ char **pzErrmsg; + +#ifdef SQLITE_DEBUG + int bPrefixIndex; /* True to use prefix-indexes */ +#endif }; /* Current expected value of %_config table 'version' field */ -#define FTS5_CURRENT_VERSION 3 +#define FTS5_CURRENT_VERSION 4 #define FTS5_CONTENT_NORMAL 0 #define FTS5_CONTENT_NONE 1 #define FTS5_CONTENT_EXTERNAL 2 @@ -164,13 +174,14 @@ int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig); int sqlite3Fts5Tokenize( Fts5Config *pConfig, /* FTS5 Configuration object */ + int flags, /* FTS5_TOKENIZE_* flags */ const char *pText, int nText, /* Text to tokenize */ void *pCtx, /* Context passed to xToken() */ - int (*xToken)(void*, const char*, int, int, int) /* Callback */ + int (*xToken)(void*, int, const char*, int, int, int) /* Callback */ ); void sqlite3Fts5Dequote(char *z); /* Load the contents of the %_config table */ @@ -232,12 +243,14 @@ int iCol; /* If (iCol>=0), this column only */ const u8 *a; /* Position list to iterate through */ int n; /* Size of buffer at a[] in bytes */ int i; /* Current offset in a[] */ + u8 bFlag; /* For client use (any custom purpose) */ + /* Output variables */ - int bEof; /* Set to true at EOF */ + u8 bEof; /* Set to true at EOF */ i64 iPos; /* (iCol<<32) + iPos */ }; int sqlite3Fts5PoslistReaderInit( int iCol, /* If (iCol>=0), this column only */ const u8 *a, int n, /* Poslist buffer to iterate through */ @@ -373,19 +386,13 @@ ** records must be invalidated. */ int sqlite3Fts5IndexRollback(Fts5Index *p); /* -** Retrieve and clear the current error code, respectively. -*/ -int sqlite3Fts5IndexErrcode(Fts5Index*); -void sqlite3Fts5IndexReset(Fts5Index*); - -/* -** Get or set the "averages" record. -*/ -int sqlite3Fts5IndexGetAverages(Fts5Index *p, Fts5Buffer *pBuf); +** Get or set the "averages" values. +*/ +int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize); int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8*, int); /* ** Functions called by the storage module as part of integrity-check. */ @@ -594,11 +601,11 @@ int sqlite3Fts5ExprPhraseCount(Fts5Expr*); int sqlite3Fts5ExprPhraseSize(Fts5Expr*, int iPhrase); int sqlite3Fts5ExprPoslist(Fts5Expr*, int, const u8 **); -int sqlite3Fts5ExprPhraseExpr(Fts5Config*, Fts5Expr*, int, Fts5Expr**); +int sqlite3Fts5ExprClonePhrase(Fts5Config*, Fts5Expr*, int, Fts5Expr**); /******************************************* ** The fts5_expr.c API above this point is used by the other hand-written ** C code in this module. The interfaces below this point are called by ** the parser code in fts5parse.y. */ @@ -661,21 +668,10 @@ */ int sqlite3Fts5TokenizerInit(fts5_api*); /* ** End of interface to code in fts5_tokenizer.c. -**************************************************************************/ - -/************************************************************************** -** Interface to code in fts5_sorter.c. -*/ -typedef struct Fts5Sorter Fts5Sorter; - -int sqlite3Fts5SorterNew(Fts5Expr *pExpr, Fts5Sorter **pp); - -/* -** End of interface to code in fts5_sorter.c. **************************************************************************/ /************************************************************************** ** Interface to code in fts5_vocab.c. */ Index: ext/fts5/fts5_aux.c ================================================================== --- ext/fts5/fts5_aux.c +++ ext/fts5/fts5_aux.c @@ -146,18 +146,22 @@ /* ** Tokenizer callback used by implementation of highlight() function. */ static int fts5HighlightCb( void *pContext, /* Pointer to HighlightContext object */ + int tflags, /* Mask of FTS5_TOKEN_* flags */ const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStartOff, /* Start offset of token */ int iEndOff /* End offset of token */ ){ HighlightContext *p = (HighlightContext*)pContext; int rc = SQLITE_OK; - int iPos = p->iPos++; + int iPos; + + if( tflags & FTS5_TOKEN_COLOCATED ) return SQLITE_OK; + iPos = p->iPos++; if( p->iRangeEnd>0 ){ if( iPosiRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK; if( p->iRangeStart && iPos==p->iRangeStart ) p->iOff = iStartOff; } Index: ext/fts5/fts5_buffer.c ================================================================== --- ext/fts5/fts5_buffer.c +++ ext/fts5/fts5_buffer.c @@ -14,16 +14,18 @@ #include "fts5Int.h" int sqlite3Fts5BufferGrow(int *pRc, Fts5Buffer *pBuf, int nByte){ - /* A no-op if an error has already occurred */ - if( *pRc ) return 1; if( (pBuf->n + nByte) > pBuf->nSpace ){ u8 *pNew; int nNew = pBuf->nSpace ? pBuf->nSpace*2 : 64; + + /* A no-op if an error has already occurred */ + if( *pRc ) return 1; + while( nNew<(pBuf->n + nByte) ){ nNew = nNew * 2; } pNew = sqlite3_realloc(pBuf->p, nNew); if( pNew==0 ){ Index: ext/fts5/fts5_config.c ================================================================== --- ext/fts5/fts5_config.c +++ ext/fts5/fts5_config.c @@ -478,10 +478,13 @@ pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, nByte); pRet->abUnindexed = (u8*)&pRet->azCol[nArg]; pRet->zDb = sqlite3Fts5Strndup(&rc, azArg[1], -1); pRet->zName = sqlite3Fts5Strndup(&rc, azArg[2], -1); pRet->bColumnsize = 1; +#ifdef SQLITE_DEBUG + pRet->bPrefixIndex = 1; +#endif if( rc==SQLITE_OK && sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){ *pzErr = sqlite3_mprintf("reserved fts5 table name: %s", pRet->zName); rc = SQLITE_ERROR; } @@ -643,16 +646,19 @@ ** because the callback returned another non-zero value, it is assumed ** to be an SQLite error code and returned to the caller. */ int sqlite3Fts5Tokenize( Fts5Config *pConfig, /* FTS5 Configuration object */ + int flags, /* FTS5_TOKENIZE_* flags */ const char *pText, int nText, /* Text to tokenize */ void *pCtx, /* Context passed to xToken() */ - int (*xToken)(void*, const char*, int, int, int) /* Callback */ + int (*xToken)(void*, int, const char*, int, int, int) /* Callback */ ){ if( pText==0 ) return SQLITE_OK; - return pConfig->pTokApi->xTokenize(pConfig->pTok, pCtx, pText, nText, xToken); + return pConfig->pTokApi->xTokenize( + pConfig->pTok, pCtx, flags, pText, nText, xToken + ); } /* ** Argument pIn points to the first character in what is expected to be ** a comma-separated list of SQL literals followed by a ')' character. Index: ext/fts5/fts5_expr.c ================================================================== --- ext/fts5/fts5_expr.c +++ ext/fts5/fts5_expr.c @@ -20,10 +20,12 @@ /* ** All token types in the generated fts5parse.h file are greater than 0. */ #define FTS5_EOF 0 +#define FTS5_LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32)) + typedef struct Fts5ExprTerm Fts5ExprTerm; /* ** Functions generated by lemon from fts5parse.y. */ @@ -71,10 +73,11 @@ */ struct Fts5ExprTerm { int bPrefix; /* True for a prefix term */ char *zTerm; /* nul-terminated term */ Fts5IndexIter *pIter; /* Iterator for this term */ + Fts5ExprTerm *pSynonym; /* Pointer to first in list of synonyms */ }; /* ** A phrase. One or more terms that must appear in a contiguous sequence ** within a document for it to match. @@ -179,10 +182,14 @@ break; } default: { const char *z2; + if( sqlite3Fts5IsBareword(z[0])==0 ){ + sqlite3Fts5ParseError(pParse, "fts5: syntax error near \"%.1s\"", z); + return FTS5_EOF; + } tok = FTS5_STRING; for(z2=&z[1]; sqlite3Fts5IsBareword(*z2); z2++); pToken->n = (z2 - z); if( pToken->n==2 && memcmp(pToken->p, "OR", 2)==0 ) tok = FTS5_OR; if( pToken->n==3 && memcmp(pToken->p, "NOT", 3)==0 ) tok = FTS5_NOT; @@ -242,83 +249,10 @@ sqlite3_free(sParse.apPhrase); *pzErr = sParse.zErr; return sParse.rc; } -/* -** Create a new FTS5 expression by cloning phrase iPhrase of the -** expression passed as the second argument. -*/ -int sqlite3Fts5ExprPhraseExpr( - Fts5Config *pConfig, - Fts5Expr *pExpr, - int iPhrase, - Fts5Expr **ppNew -){ - int rc = SQLITE_OK; /* Return code */ - Fts5ExprPhrase *pOrig; /* The phrase extracted from pExpr */ - Fts5ExprPhrase *pCopy; /* Copy of pOrig */ - Fts5Expr *pNew = 0; /* Expression to return via *ppNew */ - - pOrig = pExpr->apExprPhrase[iPhrase]; - pCopy = (Fts5ExprPhrase*)sqlite3Fts5MallocZero(&rc, - sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * pOrig->nTerm - ); - if( pCopy ){ - int i; /* Used to iterate through phrase terms */ - Fts5ExprPhrase **apPhrase; - Fts5ExprNode *pNode; - Fts5ExprNearset *pNear; - - pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr)); - apPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc, - sizeof(Fts5ExprPhrase*) - ); - pNode = (Fts5ExprNode*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5ExprNode)); - pNear = (Fts5ExprNearset*)sqlite3Fts5MallocZero(&rc, - sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*) - ); - - for(i=0; inTerm; i++){ - pCopy->aTerm[i].zTerm = sqlite3Fts5Strndup(&rc, pOrig->aTerm[i].zTerm,-1); - pCopy->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix; - } - - if( rc==SQLITE_OK ){ - /* All the allocations succeeded. Put the expression object together. */ - pNew->pIndex = pExpr->pIndex; - pNew->pRoot = pNode; - pNew->nPhrase = 1; - pNew->apExprPhrase = apPhrase; - pNew->apExprPhrase[0] = pCopy; - - pNode->eType = (pOrig->nTerm==1 ? FTS5_TERM : FTS5_STRING); - pNode->pNear = pNear; - - pNear->nPhrase = 1; - pNear->apPhrase[0] = pCopy; - - pCopy->nTerm = pOrig->nTerm; - pCopy->pNode = pNode; - }else{ - /* At least one allocation failed. Free them all. */ - for(i=0; inTerm; i++){ - sqlite3_free(pCopy->aTerm[i].zTerm); - } - sqlite3_free(pCopy); - sqlite3_free(pNear); - sqlite3_free(pNode); - sqlite3_free(apPhrase); - sqlite3_free(pNew); - pNew = 0; - } - } - - *ppNew = pNew; - return rc; -} - /* ** Free the expression node object passed as the only argument. */ void sqlite3Fts5ParseNodeFree(Fts5ExprNode *p){ if( p ){ @@ -347,10 +281,118 @@ for(i=0; inCol; i++){ if( pColset->aiCol[i]==iCol ) return 1; } return 0; } + +/* +** Argument pTerm must be a synonym iterator. Return the current rowid +** that it points to. +*/ +static i64 fts5ExprSynonymRowid(Fts5ExprTerm *pTerm, int bDesc, int *pbEof){ + i64 iRet = 0; + int bRetValid = 0; + Fts5ExprTerm *p; + + assert( pTerm->pSynonym ); + assert( bDesc==0 || bDesc==1 ); + for(p=pTerm; p; p=p->pSynonym){ + if( 0==sqlite3Fts5IterEof(p->pIter) ){ + i64 iRowid = sqlite3Fts5IterRowid(p->pIter); + if( bRetValid==0 || (bDesc!=(iRowidpSynonym ); + for(p=pTerm; p; p=p->pSynonym){ + Fts5IndexIter *pIter = p->pIter; + if( sqlite3Fts5IterEof(pIter)==0 && sqlite3Fts5IterRowid(pIter)==iRowid ){ + const u8 *a; + int n; + i64 dummy; + rc = sqlite3Fts5IterPoslist(pIter, &a, &n, &dummy); + if( rc!=SQLITE_OK ) goto synonym_poslist_out; + if( nIter==nAlloc ){ + int nByte = sizeof(Fts5PoslistReader) * nAlloc * 2; + Fts5PoslistReader *aNew = (Fts5PoslistReader*)sqlite3_malloc(nByte); + if( aNew==0 ){ + rc = SQLITE_NOMEM; + goto synonym_poslist_out; + } + memcpy(aNew, aIter, sizeof(Fts5PoslistReader) * nIter); + nAlloc = nAlloc*2; + if( aIter!=aStatic ) sqlite3_free(aIter); + aIter = aNew; + } + sqlite3Fts5PoslistReaderInit(-1, a, n, &aIter[nIter]); + assert( aIter[nIter].bEof==0 ); + nIter++; + } + } + + assert( *pbDel==0 ); + if( nIter==1 ){ + *pa = (u8*)aIter[0].a; + *pn = aIter[0].n; + }else{ + Fts5PoslistWriter writer = {0}; + Fts5Buffer buf = {0,0,0}; + i64 iPrev = -1; + while( 1 ){ + int i; + i64 iMin = FTS5_LARGEST_INT64; + for(i=0; inTerm>(sizeof(aStatic) / sizeof(aStatic[0])) ){ int nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm; aIter = (Fts5PoslistReader*)sqlite3_malloc(nByte); if( !aIter ) return SQLITE_NOMEM; } + memset(aIter, 0, sizeof(Fts5PoslistReader) * pPhrase->nTerm); /* Initialize a term iterator for each term in the phrase */ for(i=0; inTerm; i++){ + Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; i64 dummy; - int n; - const u8 *a; - rc = sqlite3Fts5IterPoslist(pPhrase->aTerm[i].pIter, &a, &n, &dummy); - if( rc || sqlite3Fts5PoslistReaderInit(iCol, a, n, &aIter[i]) ){ - goto ismatch_out; + int n = 0; + int bFlag = 0; + const u8 *a = 0; + if( pTerm->pSynonym ){ + rc = fts5ExprSynonymPoslist(pTerm, pNode->iRowid, &bFlag, (u8**)&a, &n); + }else{ + rc = sqlite3Fts5IterPoslist(pTerm->pIter, &a, &n, &dummy); } + if( rc!=SQLITE_OK ) goto ismatch_out; + sqlite3Fts5PoslistReaderInit(iCol, a, n, &aIter[i]); + aIter[i].bFlag = bFlag; + if( aIter[i].bEof ) goto ismatch_out; } while( 1 ){ int bMatch; i64 iPos = aIter[0].iPos; @@ -429,10 +479,13 @@ } } ismatch_out: *pbMatch = (pPhrase->poslist.n>0); + for(i=0; inTerm; i++){ + if( aIter[i].bFlag ) sqlite3_free((u8*)aIter[i].a); + } if( aIter!=aStatic ) sqlite3_free(aIter); return rc; } typedef struct Fts5LookaheadReader Fts5LookaheadReader; @@ -596,21 +649,59 @@ Fts5Expr *pExpr, /* Expression pPhrase belongs to */ Fts5ExprNode *pNode, /* FTS5_STRING or FTS5_TERM node */ int bFromValid, i64 iFrom ){ - Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter; - int rc; + Fts5ExprTerm *pTerm = &pNode->pNear->apPhrase[0]->aTerm[0]; + int rc = SQLITE_OK; - assert( Fts5NodeIsString(pNode) ); - if( bFromValid ){ - rc = sqlite3Fts5IterNextFrom(pIter, iFrom); + if( pTerm->pSynonym ){ + int bEof = 1; + Fts5ExprTerm *p; + + /* Find the firstest rowid any synonym points to. */ + i64 iRowid = fts5ExprSynonymRowid(pTerm, pExpr->bDesc, 0); + + /* Advance each iterator that currently points to iRowid. Or, if iFrom + ** is valid - each iterator that points to a rowid before iFrom. */ + for(p=pTerm; p; p=p->pSynonym){ + if( sqlite3Fts5IterEof(p->pIter)==0 ){ + i64 ii = sqlite3Fts5IterRowid(p->pIter); + if( ii==iRowid + || (bFromValid && ii!=iFrom && (ii>iFrom)==pExpr->bDesc) + ){ + if( bFromValid ){ + rc = sqlite3Fts5IterNextFrom(p->pIter, iFrom); + }else{ + rc = sqlite3Fts5IterNext(p->pIter); + } + if( rc!=SQLITE_OK ) break; + if( sqlite3Fts5IterEof(p->pIter)==0 ){ + bEof = 0; + } + }else{ + bEof = 0; + } + } + } + + /* Set the EOF flag if either all synonym iterators are at EOF or an + ** error has occurred. */ + pNode->bEof = (rc || bEof); }else{ - rc = sqlite3Fts5IterNext(pIter); + Fts5IndexIter *pIter = pTerm->pIter; + + assert( Fts5NodeIsString(pNode) ); + if( bFromValid ){ + rc = sqlite3Fts5IterNextFrom(pIter, iFrom); + }else{ + rc = sqlite3Fts5IterNext(pIter); + } + + pNode->bEof = (rc || sqlite3Fts5IterEof(pIter)); } - pNode->bEof = (rc || sqlite3Fts5IterEof(pIter)); return rc; } /* ** Advance iterator pIter until it points to a value equal to or laster @@ -644,10 +735,39 @@ } *piLast = iRowid; return 0; } + +static int fts5ExprSynonymAdvanceto( + Fts5ExprTerm *pTerm, /* Term iterator to advance */ + int bDesc, /* True if iterator is "rowid DESC" */ + i64 *piLast, /* IN/OUT: Lastest rowid seen so far */ + int *pRc /* OUT: Error code */ +){ + int rc = SQLITE_OK; + i64 iLast = *piLast; + Fts5ExprTerm *p; + int bEof = 0; + + for(p=pTerm; rc==SQLITE_OK && p; p=p->pSynonym){ + if( sqlite3Fts5IterEof(p->pIter)==0 ){ + i64 iRowid = sqlite3Fts5IterRowid(p->pIter); + if( (bDesc==0 && iLast>iRowid) || (bDesc && iLastpIter, iLast); + } + } + } + + if( rc!=SQLITE_OK ){ + *pRc = rc; + bEof = 1; + }else{ + *piLast = fts5ExprSynonymRowid(pTerm, bDesc, &bEof); + } + return bEof; +} /* ** IN/OUT parameter (*pa) points to a position list n bytes in size. If ** the position list contains entries for column iCol, then (*pa) is set ** to point to the sub-position-list for that column and the number of @@ -715,13 +835,13 @@ /* Check that each phrase in the nearset matches the current row. ** Populate the pPhrase->poslist buffers at the same time. If any ** phrase is not a match, break out of the loop early. */ for(i=0; rc==SQLITE_OK && inPhrase; i++){ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; - if( pPhrase->nTerm>1 || pNear->pColset ){ + if( pPhrase->nTerm>1 || pPhrase->aTerm[0].pSynonym || pNear->pColset ){ int bMatch = 0; - rc = fts5ExprPhraseIsMatch(pExpr, pNear->pColset, pPhrase, &bMatch); + rc = fts5ExprPhraseIsMatch(pNode, pNear->pColset, pPhrase, &bMatch); if( bMatch==0 ) break; }else{ rc = sqlite3Fts5IterPoslistBuffer( pPhrase->aTerm[0].pIter, &pPhrase->poslist ); @@ -753,10 +873,11 @@ int nPos; int rc; assert( pNode->eType==FTS5_TERM ); assert( pNear->nPhrase==1 && pPhrase->nTerm==1 ); + assert( pPhrase->aTerm[0].pSynonym==0 ); rc = sqlite3Fts5IterPoslist(pIter, &pPos, &nPos, &pNode->iRowid); /* If the term may match any column, then this must be a match. ** Return immediately in this case. Otherwise, try to find the @@ -799,73 +920,101 @@ Fts5ExprPhrase *pLeft = pNear->apPhrase[0]; int rc = SQLITE_OK; i64 iLast; /* Lastest rowid any iterator points to */ int i, j; /* Phrase and token index, respectively */ int bMatch; /* True if all terms are at the same rowid */ + const int bDesc = pExpr->bDesc; - assert( pNear->nPhrase>1 || pNear->apPhrase[0]->nTerm>1 ); + /* Check that this node should not be FTS5_TERM */ + assert( pNear->nPhrase>1 + || pNear->apPhrase[0]->nTerm>1 + || pNear->apPhrase[0]->aTerm[0].pSynonym + ); /* Initialize iLast, the "lastest" rowid any iterator points to. If the ** iterator skips through rowids in the default ascending order, this means ** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it ** means the minimum rowid. */ - iLast = sqlite3Fts5IterRowid(pLeft->aTerm[0].pIter); + if( pLeft->aTerm[0].pSynonym ){ + iLast = fts5ExprSynonymRowid(&pLeft->aTerm[0], bDesc, 0); + }else{ + iLast = sqlite3Fts5IterRowid(pLeft->aTerm[0].pIter); + } do { bMatch = 1; for(i=0; inPhrase; i++){ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; for(j=0; jnTerm; j++){ - Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter; - i64 iRowid = sqlite3Fts5IterRowid(pIter); - if( iRowid!=iLast ) bMatch = 0; - if( fts5ExprAdvanceto(pIter, pExpr->bDesc, &iLast,&rc,&pNode->bEof) ){ - return rc; + Fts5ExprTerm *pTerm = &pPhrase->aTerm[j]; + if( pTerm->pSynonym ){ + i64 iRowid = fts5ExprSynonymRowid(pTerm, bDesc, 0); + if( iRowid==iLast ) continue; + bMatch = 0; + if( fts5ExprSynonymAdvanceto(pTerm, bDesc, &iLast, &rc) ){ + pNode->bEof = 1; + return rc; + } + }else{ + Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter; + i64 iRowid = sqlite3Fts5IterRowid(pIter); + if( iRowid==iLast ) continue; + bMatch = 0; + if( fts5ExprAdvanceto(pIter, bDesc, &iLast, &rc, &pNode->bEof) ){ + return rc; + } } } } }while( bMatch==0 ); - pNode->bNomatch = (0==fts5ExprNearTest(&rc, pExpr, pNode)); pNode->iRowid = iLast; + pNode->bNomatch = (0==fts5ExprNearTest(&rc, pExpr, pNode)); return rc; } /* ** Initialize all term iterators in the pNear object. If any term is found -** to match no documents at all, set *pbEof to true and return immediately, -** without initializing any further iterators. +** to match no documents at all, return immediately without initializing any +** further iterators. */ static int fts5ExprNearInitAll( Fts5Expr *pExpr, Fts5ExprNode *pNode ){ Fts5ExprNearset *pNear = pNode->pNear; - Fts5ExprTerm *pTerm; - Fts5ExprPhrase *pPhrase; int i, j; int rc = SQLITE_OK; for(i=0; rc==SQLITE_OK && inPhrase; i++){ - pPhrase = pNear->apPhrase[i]; + Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; for(j=0; jnTerm; j++){ - pTerm = &pPhrase->aTerm[j]; - if( pTerm->pIter ){ - sqlite3Fts5IterClose(pTerm->pIter); - pTerm->pIter = 0; - } - rc = sqlite3Fts5IndexQuery( - pExpr->pIndex, pTerm->zTerm, strlen(pTerm->zTerm), - (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) | - (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0), - &pTerm->pIter - ); - assert( rc==SQLITE_OK || pTerm->pIter==0 ); - if( pTerm->pIter==0 || sqlite3Fts5IterEof(pTerm->pIter) ){ + Fts5ExprTerm *pTerm = &pPhrase->aTerm[j]; + Fts5ExprTerm *p; + int bEof = 1; + + for(p=pTerm; p && rc==SQLITE_OK; p=p->pSynonym){ + if( p->pIter ){ + sqlite3Fts5IterClose(p->pIter); + p->pIter = 0; + } + rc = sqlite3Fts5IndexQuery( + pExpr->pIndex, p->zTerm, strlen(p->zTerm), + (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) | + (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0), + &p->pIter + ); + assert( rc==SQLITE_OK || p->pIter==0 ); + if( p->pIter && 0==sqlite3Fts5IterEof(p->pIter) ){ + bEof = 0; + } + } + + if( bEof ){ pNode->bEof = 1; - break; + return rc; } } } return rc; @@ -1027,14 +1176,21 @@ rc = fts5ExprNearAdvanceFirst(pExpr, pNode, bFromValid, iFrom); break; }; case FTS5_TERM: { - rc = fts5ExprNearAdvanceFirst(pExpr, pNode, bFromValid, iFrom); - if( pNode->bEof==0 ){ + Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter; + if( bFromValid ){ + rc = sqlite3Fts5IterNextFrom(pIter, iFrom); + }else{ + rc = sqlite3Fts5IterNext(pIter); + } + if( rc==SQLITE_OK && sqlite3Fts5IterEof(pIter)==0 ){ assert( rc==SQLITE_OK ); rc = fts5ExprTokenTest(pExpr, pNode); + }else{ + pNode->bEof = 1; } return rc; }; case FTS5_AND: { @@ -1264,14 +1420,20 @@ */ static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){ if( pPhrase ){ int i; for(i=0; inTerm; i++){ + Fts5ExprTerm *pSyn; + Fts5ExprTerm *pNext; Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; sqlite3_free(pTerm->zTerm); - if( pTerm->pIter ){ - sqlite3Fts5IterClose(pTerm->pIter); + sqlite3Fts5IterClose(pTerm->pIter); + + for(pSyn=pTerm->pSynonym; pSyn; pSyn=pNext){ + pNext = pSyn->pSynonym; + sqlite3Fts5IterClose(pSyn->pIter); + sqlite3_free(pSyn); } } if( pPhrase->poslist.nSpace>0 ) fts5BufferFree(&pPhrase->poslist); sqlite3_free(pPhrase); } @@ -1329,45 +1491,72 @@ } typedef struct TokenCtx TokenCtx; struct TokenCtx { Fts5ExprPhrase *pPhrase; + int rc; }; /* ** Callback for tokenizing terms used by ParseTerm(). */ static int fts5ParseTokenize( void *pContext, /* Pointer to Fts5InsertCtx object */ + int tflags, /* Mask of FTS5_TOKEN_* flags */ const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ - int iStart, /* Start offset of token */ - int iEnd /* End offset of token */ + int iUnused1, /* Start offset of token */ + int iUnused2 /* End offset of token */ ){ int rc = SQLITE_OK; const int SZALLOC = 8; TokenCtx *pCtx = (TokenCtx*)pContext; Fts5ExprPhrase *pPhrase = pCtx->pPhrase; - Fts5ExprTerm *pTerm; - - if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){ - Fts5ExprPhrase *pNew; - int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0); - - pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase, - sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew - ); - if( pNew==0 ) return SQLITE_NOMEM; - if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase)); - pCtx->pPhrase = pPhrase = pNew; - pNew->nTerm = nNew - SZALLOC; - } - - pTerm = &pPhrase->aTerm[pPhrase->nTerm++]; - memset(pTerm, 0, sizeof(Fts5ExprTerm)); - pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken); - + + /* If an error has already occurred, this is a no-op */ + if( pCtx->rc!=SQLITE_OK ) return pCtx->rc; + + assert( pPhrase==0 || pPhrase->nTerm>0 ); + if( pPhrase && (tflags & FTS5_TOKEN_COLOCATED) ){ + Fts5ExprTerm *pSyn; + int nByte = sizeof(Fts5ExprTerm) + nToken+1; + pSyn = (Fts5ExprTerm*)sqlite3_malloc(nByte); + if( pSyn==0 ){ + rc = SQLITE_NOMEM; + }else{ + memset(pSyn, 0, nByte); + pSyn->zTerm = (char*)&pSyn[1]; + memcpy(pSyn->zTerm, pToken, nToken); + pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym; + pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn; + } + }else{ + Fts5ExprTerm *pTerm; + if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){ + Fts5ExprPhrase *pNew; + int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0); + + pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase, + sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew + ); + if( pNew==0 ){ + rc = SQLITE_NOMEM; + }else{ + if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase)); + pCtx->pPhrase = pPhrase = pNew; + pNew->nTerm = nNew - SZALLOC; + } + } + + if( rc==SQLITE_OK ){ + pTerm = &pPhrase->aTerm[pPhrase->nTerm++]; + memset(pTerm, 0, sizeof(Fts5ExprTerm)); + pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken); + } + } + + pCtx->rc = rc; return rc; } /* @@ -1415,15 +1604,18 @@ memset(&sCtx, 0, sizeof(TokenCtx)); sCtx.pPhrase = pAppend; rc = fts5ParseStringFromToken(pToken, &z); if( rc==SQLITE_OK ){ + int flags = FTS5_TOKENIZE_QUERY | (bPrefix ? FTS5_TOKENIZE_QUERY : 0); + int n; sqlite3Fts5Dequote(z); - rc = sqlite3Fts5Tokenize(pConfig, z, strlen(z), &sCtx, fts5ParseTokenize); + n = strlen(z); + rc = sqlite3Fts5Tokenize(pConfig, flags, z, n, &sCtx, fts5ParseTokenize); } sqlite3_free(z); - if( rc ){ + if( rc || (rc = sCtx.rc) ){ pParse->rc = rc; fts5ExprPhraseFree(sCtx.pPhrase); sCtx.pPhrase = 0; }else if( sCtx.pPhrase ){ @@ -1447,10 +1639,83 @@ sCtx.pPhrase->aTerm[sCtx.pPhrase->nTerm-1].bPrefix = bPrefix; } return sCtx.pPhrase; } + +/* +** Create a new FTS5 expression by cloning phrase iPhrase of the +** expression passed as the second argument. +*/ +int sqlite3Fts5ExprClonePhrase( + Fts5Config *pConfig, + Fts5Expr *pExpr, + int iPhrase, + Fts5Expr **ppNew +){ + int rc = SQLITE_OK; /* Return code */ + Fts5ExprPhrase *pOrig; /* The phrase extracted from pExpr */ + int i; /* Used to iterate through phrase terms */ + + Fts5Expr *pNew = 0; /* Expression to return via *ppNew */ + + TokenCtx sCtx = {0,0}; /* Context object for fts5ParseTokenize */ + + + pOrig = pExpr->apExprPhrase[iPhrase]; + + pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr)); + if( rc==SQLITE_OK ){ + pNew->apExprPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc, + sizeof(Fts5ExprPhrase*)); + } + if( rc==SQLITE_OK ){ + pNew->pRoot = (Fts5ExprNode*)sqlite3Fts5MallocZero(&rc, + sizeof(Fts5ExprNode)); + } + if( rc==SQLITE_OK ){ + pNew->pRoot->pNear = (Fts5ExprNearset*)sqlite3Fts5MallocZero(&rc, + sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*)); + } + + for(i=0; rc==SQLITE_OK && inTerm; i++){ + int tflags = 0; + Fts5ExprTerm *p; + for(p=&pOrig->aTerm[i]; p && rc==SQLITE_OK; p=p->pSynonym){ + const char *zTerm = p->zTerm; + rc = fts5ParseTokenize((void*)&sCtx, tflags, zTerm, strlen(zTerm), 0, 0); + tflags = FTS5_TOKEN_COLOCATED; + } + if( rc==SQLITE_OK ){ + sCtx.pPhrase->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix; + } + } + + if( rc==SQLITE_OK ){ + /* All the allocations succeeded. Put the expression object together. */ + pNew->pIndex = pExpr->pIndex; + pNew->nPhrase = 1; + pNew->apExprPhrase[0] = sCtx.pPhrase; + pNew->pRoot->pNear->apPhrase[0] = sCtx.pPhrase; + pNew->pRoot->pNear->nPhrase = 1; + sCtx.pPhrase->pNode = pNew->pRoot; + + if( pOrig->nTerm==1 && pOrig->aTerm[0].pSynonym==0 ){ + pNew->pRoot->eType = FTS5_TERM; + }else{ + pNew->pRoot->eType = FTS5_STRING; + } + }else{ + sqlite3Fts5ExprFree(pNew); + fts5ExprPhraseFree(sCtx.pPhrase); + pNew = 0; + } + + *ppNew = pNew; + return rc; +} + /* ** Token pTok has appeared in a MATCH expression where the NEAR operator ** is expected. If token pTok does not contain "NEAR", store an error ** in the pParse object. @@ -1628,11 +1893,14 @@ if( eType==FTS5_STRING ){ int iPhrase; for(iPhrase=0; iPhrasenPhrase; iPhrase++){ pNear->apPhrase[iPhrase]->pNode = pRet; } - if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 ){ + if( pNear->nPhrase==1 + && pNear->apPhrase[0]->nTerm==1 + && pNear->apPhrase[0]->aTerm[0].pSynonym==0 + ){ pRet->eType = FTS5_TERM; } }else{ fts5ExprAddChildren(pRet, pLeft); fts5ExprAddChildren(pRet, pRight); @@ -1648,20 +1916,32 @@ } return pRet; } static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){ - char *zQuoted = sqlite3_malloc(strlen(pTerm->zTerm) * 2 + 3 + 2); + int nByte = 0; + Fts5ExprTerm *p; + char *zQuoted; + + /* Determine the maximum amount of space required. */ + for(p=pTerm; p; p=p->pSynonym){ + nByte += strlen(pTerm->zTerm) * 2 + 3 + 2; + } + zQuoted = sqlite3_malloc(nByte); + if( zQuoted ){ int i = 0; - char *zIn = pTerm->zTerm; - zQuoted[i++] = '"'; - while( *zIn ){ - if( *zIn=='"' ) zQuoted[i++] = '"'; - zQuoted[i++] = *zIn++; + for(p=pTerm; p; p=p->pSynonym){ + char *zIn = p->zTerm; + zQuoted[i++] = '"'; + while( *zIn ){ + if( *zIn=='"' ) zQuoted[i++] = '"'; + zQuoted[i++] = *zIn++; + } + zQuoted[i++] = '"'; + if( p->pSynonym ) zQuoted[i++] = '|'; } - zQuoted[i++] = '"'; if( pTerm->bPrefix ){ zQuoted[i++] = ' '; zQuoted[i++] = '*'; } zQuoted[i++] = '\0'; Index: ext/fts5/fts5_index.c ================================================================== --- ext/fts5/fts5_index.c +++ ext/fts5/fts5_index.c @@ -85,27 +85,27 @@ ** ** + number of input segments in ongoing merge. ** + total number of segments in level. ** + for each segment from oldest to newest: ** + segment id (always > 0) -** + b-tree height (1 -> root is leaf, 2 -> root is parent of leaf etc.) ** + first leaf page number (often 1, always greater than 0) ** + final leaf page number ** ** 2. The Averages Record: ** ** A single record within the %_data table. The data is a list of varints. ** The first value is the number of rows in the index. Then, for each column -** from left to right, the total number of tokens in the column for all +** from left to right, the total number of tokens in the column for all ** rows of the table. ** ** 3. Segment leaves: ** -** TERM DOCLIST FORMAT: +** TERM/DOCLIST FORMAT: ** ** Most of each segment leaf is taken up by term/doclist data. The -** general format of the term/doclist data is: +** general format of term/doclist, starting with the first term +** on the leaf page, is: ** ** varint : size of first term ** blob: first term data ** doclist: first doclist ** zero-or-more { @@ -121,11 +121,10 @@ ** poslist: first poslist ** zero-or-more { ** varint: rowid delta (always > 0) ** poslist: next poslist ** } -** 0x00 byte ** ** poslist format: ** ** varint: size of poslist in bytes multiplied by 2, not including ** this field. Plus 1 if this entry carries the "delete" flag. @@ -141,15 +140,32 @@ ** varint: first offset + 2 ** zero-or-more { ** varint: offset delta + 2 ** } ** -** PAGINATION +** PAGE FORMAT ** -** The format described above is only accurate if the entire term/doclist -** data fits on a single leaf page. If this is not the case, the format -** is changed in two ways: +** Each leaf page begins with a 4-byte header containing 2 16-bit +** unsigned integer fields in big-endian format. They are: +** +** * The byte offset of the first rowid on the page, if it exists +** and occurs before the first term (otherwise 0). +** +** * The byte offset of the start of the page footer. If the page +** footer is 0 bytes in size, then this field is the same as the +** size of the leaf page in bytes. +** +** The page footer consists of a single varint for each term located +** on the page. Each varint is the byte offset of the current term +** within the page, delta-compressed against the previous value. In +** other words, the first varint in the footer is the byte offset of +** the first term, the second is the byte offset of the second less that +** of the first, and so on. +** +** The term/doclist format described above is accurate if the entire +** term/doclist data fits on a single leaf page. If this is not the case, +** the format is changed in two ways: ** ** + if the first rowid on a page occurs before the first term, it ** is stored as a literal value: ** ** varint: first rowid @@ -158,49 +174,10 @@ ** very first term of the segment: ** ** varint : size of first term ** blob: first term data ** -** Each leaf page begins with: -** -** + 2-byte unsigned containing offset to first rowid (or 0). -** + 2-byte unsigned containing offset to first term (or 0). -** -** Followed by term/doclist data. -** -** 4. Segment interior nodes: -** -** The interior nodes turn the list of leaves into a b+tree. -** -** Each interior node begins with a varint - the page number of the left -** most child node. Following this, for each leaf page except the first, -** the interior nodes contain: -** -** a) If the leaf page contains at least one term, then a term-prefix that -** is greater than all previous terms, and less than or equal to the -** first term on the leaf page. -** -** b) If the leaf page no terms, a record indicating how many consecutive -** leaves contain no terms, and whether or not there is an associated -** by-rowid index record. -** -** By definition, there is never more than one type (b) record in a row. -** Type (b) records only ever appear on height=1 pages - immediate parents -** of leaves. Only type (a) records are pushed to higher levels. -** -** Term format: -** -** * Number of bytes in common with previous term plus 2, as a varint. -** * Number of bytes of new term data, as a varint. -** * new term data. -** -** No-term format: -** -** * either an 0x00 or 0x01 byte. If the value 0x01 is used, then there -** is an associated index-by-rowid record. -** * the number of zero-term leaves as a varint. -** ** 5. Segment doclist indexes: ** ** Doclist indexes are themselves b-trees, however they usually consist of ** a single leaf record only. The format of each doclist index leaf page ** is: @@ -235,43 +212,34 @@ */ #define FTS5_AVERAGES_ROWID 1 /* Rowid used for the averages record */ #define FTS5_STRUCTURE_ROWID 10 /* The structure record */ /* -** Macros determining the rowids used by segment nodes. All nodes in all -** segments for all indexes (the regular FTS index and any prefix indexes) -** are stored in the %_data table with large positive rowids. -** -** The %_data table may contain up to (1<szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \ +) #define FTS5_SEGITER_ONETERM 0x01 #define FTS5_SEGITER_REVERSE 0x02 +/* +** Argument is a pointer to an Fts5Data structure that contains a leaf +** page. This macro evaluates to true if the leaf contains no terms, or +** false if it contains at least one term. +*/ +#define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn) + +#define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2])) + +#define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p)) + /* ** poslist: ** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered. ** There is no way to tell if this is populated or not. */ @@ -524,28 +519,10 @@ Fts5CResult *aFirst; /* Current merge state (see above) */ Fts5SegIter aSeg[1]; /* Array of segment iterators */ }; -/* -** Object for iterating through the conents of a single internal node in -** memory. -*/ -struct Fts5NodeIter { - /* Internal. Set and managed by fts5NodeIterXXX() functions. Except, - ** the EOF test for the iterator is (Fts5NodeIter.aData==0). */ - const u8 *aData; - int nData; - int iOff; - - /* Output variables */ - Fts5Buffer term; - int nEmpty; - int iChild; - int bDlidx; -}; - /* ** An instance of the following type is used to iterate through the contents ** of a doclist-index record. ** ** pData: @@ -571,27 +548,10 @@ int nLvl; int iSegid; Fts5DlidxLvl aLvl[1]; }; - - -/* -** The first argument passed to this macro is a pointer to an Fts5Buffer -** object. -*/ -#define fts5BufferSize(pBuf,n) { \ - if( pBuf->nSpacep, n); \ - if( pNew==0 ){ \ - sqlite3_free(pBuf->p); \ - } \ - pBuf->nSpace = n; \ - pBuf->p = pNew; \ - } \ -} - static void fts5PutU16(u8 *aOut, u16 iVal){ aOut[0] = (iVal>>8); aOut[1] = (iVal&0xFF); } @@ -615,19 +575,20 @@ ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or ** +ve if pRight is smaller than pLeft. In other words: ** ** res = *pLeft - *pRight */ +#ifdef SQLITE_DEBUG static int fts5BufferCompareBlob( Fts5Buffer *pLeft, /* Left hand side of comparison */ const u8 *pRight, int nRight /* Right hand side of comparison */ ){ int nCmp = MIN(pLeft->n, nRight); int res = memcmp(pLeft->p, pRight, nCmp); return (res==0 ? (pLeft->n - nRight) : res); } - +#endif /* ** Compare the contents of the two buffers using memcmp(). If one buffer ** is a prefix of the other, it is considered the lesser. ** @@ -651,10 +612,15 @@ int res = memcmp(pLeft, pRight, nCmp); return (res==0 ? (nLeft - nRight) : res); } #endif +static int fts5LeafFirstTermOff(Fts5Data *pLeaf){ + int ret; + fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret); + return ret; +} /* ** Close the read-only blob handle, if it is open. */ static void fts5CloseReader(Fts5Index *p){ @@ -663,15 +629,18 @@ p->pReader = 0; sqlite3_blob_close(pReader); } } -static Fts5Data *fts5DataReadOrBuffer( - Fts5Index *p, - Fts5Buffer *pBuf, - i64 iRowid -){ + +/* +** Retrieve a record from the %_data table. +** +** If an error occurs, NULL is returned and an error left in the +** Fts5Index object. +*/ +static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){ Fts5Data *pRet = 0; if( p->rc==SQLITE_OK ){ int rc = SQLITE_OK; if( p->pReader ){ @@ -687,12 +656,12 @@ fts5CloseReader(p); } if( rc==SQLITE_ABORT ) rc = SQLITE_OK; } - /* If the blob handle is not yet open, open and seek it. Otherwise, use - ** the blob_reopen() API to reseek the existing blob handle. */ + /* If the blob handle is not open at this point, open it and seek + ** to the requested entry. */ if( p->pReader==0 && rc==SQLITE_OK ){ Fts5Config *pConfig = p->pConfig; rc = sqlite3_blob_open(pConfig->db, pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader ); @@ -706,67 +675,38 @@ if( rc==SQLITE_ERROR ) rc = FTS5_CORRUPT; if( rc==SQLITE_OK ){ u8 *aOut = 0; /* Read blob data into this buffer */ int nByte = sqlite3_blob_bytes(p->pReader); - if( pBuf ){ - fts5BufferSize(pBuf, MAX(nByte, p->pConfig->pgsz) + 20); - pBuf->n = nByte; - aOut = pBuf->p; - if( aOut==0 ){ - rc = SQLITE_NOMEM; - } - }else{ - int nSpace = nByte + FTS5_DATA_PADDING; - pRet = (Fts5Data*)sqlite3_malloc(nSpace+sizeof(Fts5Data)); - if( pRet ){ - pRet->n = nByte; - aOut = pRet->p = (u8*)&pRet[1]; - }else{ - rc = SQLITE_NOMEM; - } + int nAlloc = sizeof(Fts5Data) + nByte + FTS5_DATA_PADDING; + pRet = (Fts5Data*)sqlite3_malloc(nAlloc); + if( pRet ){ + pRet->nn = nByte; + aOut = pRet->p = (u8*)&pRet[1]; + }else{ + rc = SQLITE_NOMEM; } if( rc==SQLITE_OK ){ rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0); } if( rc!=SQLITE_OK ){ sqlite3_free(pRet); pRet = 0; + }else{ + /* TODO1: Fix this */ + pRet->szLeaf = fts5GetU16(&pRet->p[2]); } } p->rc = rc; p->nRead++; } - return pRet; -} - -/* -** Retrieve a record from the %_data table. -** -** If an error occurs, NULL is returned and an error left in the -** Fts5Index object. -*/ -static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){ - Fts5Data *pRet = fts5DataReadOrBuffer(p, 0, iRowid); assert( (pRet==0)==(p->rc!=SQLITE_OK) ); return pRet; } -/* -** Read a record from the %_data table into the buffer supplied as the -** second argument. -** -** If an error occurs, an error is left in the Fts5Index object. If an -** error has already occurred when this function is called, it is a -** no-op. -*/ -static void fts5DataBuffer(Fts5Index *p, Fts5Buffer *pBuf, i64 iRowid){ - (void)fts5DataReadOrBuffer(p, pBuf, iRowid); -} - /* ** Release a reference to data record returned by an earlier call to ** fts5DataRead(). */ static void fts5DataRelease(Fts5Data *pData){ @@ -795,11 +735,10 @@ */ static void fts5DataWrite(Fts5Index *p, i64 iRowid, const u8 *pData, int nData){ if( p->rc!=SQLITE_OK ) return; if( p->pWriter==0 ){ - int rc = SQLITE_OK; Fts5Config *pConfig = p->pConfig; fts5IndexPrepareStmt(p, &p->pWriter, sqlite3_mprintf( "REPLACE INTO '%q'.'%q_data'(id, block) VALUES(?,?)", pConfig->zDb, pConfig->zName )); @@ -847,12 +786,12 @@ /* ** Remove all records associated with segment iSegid. */ static void fts5DataRemoveSegment(Fts5Index *p, int iSegid){ - i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0, 0); - i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0, 0)-1; + i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0); + i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0)-1; fts5DataDelete(p, iFirst, iLast); if( p->pIdxDeleter==0 ){ Fts5Config *pConfig = p->pConfig; fts5IndexPrepareStmt(p, &p->pIdxDeleter, sqlite3_mprintf( "DELETE FROM '%q'.'%q_idx' WHERE segid=?", @@ -945,11 +884,10 @@ if( rc==SQLITE_OK ){ pLvl->nSeg = nTotal; for(iSeg=0; iSegaSeg[iSeg].iSegid); - i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].nHeight); i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoFirst); i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoLast); } }else{ fts5StructureRelease(pRet); @@ -1031,23 +969,22 @@ */ static Fts5Structure *fts5StructureRead(Fts5Index *p){ Fts5Config *pConfig = p->pConfig; Fts5Structure *pRet = 0; /* Object to return */ int iCookie; /* Configuration cookie */ - Fts5Buffer buf = {0, 0, 0}; - - fts5DataBuffer(p, &buf, FTS5_STRUCTURE_ROWID); - if( buf.p==0 ) return 0; - assert( buf.nSpace>=(buf.n + FTS5_DATA_ZERO_PADDING) ); - memset(&buf.p[buf.n], 0, FTS5_DATA_ZERO_PADDING); - p->rc = fts5StructureDecode(buf.p, buf.n, &iCookie, &pRet); - + Fts5Data *pData; + + pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID); + if( p->rc ) return 0; + /* TODO: Do we need this if the leaf-index is appended? Probably... */ + memset(&pData->p[pData->nn], 0, FTS5_DATA_PADDING); + p->rc = fts5StructureDecode(pData->p, pData->nn, &iCookie, &pRet); if( p->rc==SQLITE_OK && pConfig->iCookie!=iCookie ){ p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie); } - fts5BufferFree(&buf); + fts5DataRelease(pData); if( p->rc!=SQLITE_OK ){ fts5StructureRelease(pRet); pRet = 0; } return pRet; @@ -1102,11 +1039,10 @@ fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg); assert( pLvl->nMerge<=pLvl->nSeg ); for(iSeg=0; iSegnSeg; iSeg++){ fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid); - fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].nHeight); fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst); fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast); } } @@ -1191,12 +1127,13 @@ int iTst; int iPromote = -1; int szPromote = 0; /* Promote anything this size or smaller */ Fts5StructureSegment *pSeg; /* Segment just written */ int szSeg; /* Size of segment just written */ + int nSeg = pStruct->aLevel[iLvl].nSeg; - + if( nSeg==0 ) return; pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1]; szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst); /* Check for condition (a) */ for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--); @@ -1226,66 +1163,10 @@ fts5StructurePromoteTo(p, iPromote, szPromote, pStruct); } } -/* -** If the pIter->iOff offset currently points to an entry indicating one -** or more term-less nodes, advance past it and set pIter->nEmpty to -** the number of empty child nodes. -*/ -static void fts5NodeIterGobbleNEmpty(Fts5NodeIter *pIter){ - if( pIter->iOffnData && 0==(pIter->aData[pIter->iOff] & 0xfe) ){ - pIter->bDlidx = pIter->aData[pIter->iOff] & 0x01; - pIter->iOff++; - pIter->iOff += fts5GetVarint32(&pIter->aData[pIter->iOff], pIter->nEmpty); - }else{ - pIter->nEmpty = 0; - pIter->bDlidx = 0; - } -} - -/* -** Advance to the next entry within the node. -*/ -static void fts5NodeIterNext(int *pRc, Fts5NodeIter *pIter){ - if( pIter->iOff>=pIter->nData ){ - pIter->aData = 0; - pIter->iChild += pIter->nEmpty; - }else{ - int nPre, nNew; - pIter->iOff += fts5GetVarint32(&pIter->aData[pIter->iOff], nPre); - pIter->iOff += fts5GetVarint32(&pIter->aData[pIter->iOff], nNew); - pIter->term.n = nPre-2; - fts5BufferAppendBlob(pRc, &pIter->term, nNew, pIter->aData+pIter->iOff); - pIter->iOff += nNew; - pIter->iChild += (1 + pIter->nEmpty); - fts5NodeIterGobbleNEmpty(pIter); - if( *pRc ) pIter->aData = 0; - } -} - - -/* -** Initialize the iterator object pIter to iterate through the internal -** segment node in pData. -*/ -static void fts5NodeIterInit(const u8 *aData, int nData, Fts5NodeIter *pIter){ - memset(pIter, 0, sizeof(*pIter)); - pIter->aData = aData; - pIter->nData = nData; - pIter->iOff = fts5GetVarint32(aData, pIter->iChild); - fts5NodeIterGobbleNEmpty(pIter); -} - -/* -** Free any memory allocated by the iterator object. -*/ -static void fts5NodeIterFree(Fts5NodeIter *pIter){ - fts5BufferFree(&pIter->term); -} - /* ** Advance the iterator passed as the only argument. If the end of the ** doclist-index page is reached, return non-zero. */ static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){ @@ -1297,15 +1178,15 @@ pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno); pLvl->iOff += fts5GetVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid); pLvl->iFirstOff = pLvl->iOff; }else{ int iOff; - for(iOff=pLvl->iOff; iOffn; iOff++){ + for(iOff=pLvl->iOff; iOffnn; iOff++){ if( pData->p[iOff] ) break; } - if( iOffn ){ + if( iOffnn ){ i64 iVal; pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1; iOff += fts5GetVarint(&pData->p[iOff], (u64*)&iVal); pLvl->iRowid += iVal; pLvl->iOff = iOff; @@ -1532,35 +1413,42 @@ } static int fts5DlidxIterPgno(Fts5DlidxIter *pIter){ return pIter->aLvl[0].iLeafPgno; } -static void fts5LeafHeader(Fts5Data *pLeaf, int *piRowid, int *piTerm){ - *piRowid = (int)fts5GetU16(&pLeaf->p[0]); - *piTerm = (int)fts5GetU16(&pLeaf->p[2]); -} - /* ** Load the next leaf page into the segment iterator. */ static void fts5SegIterNextPage( Fts5Index *p, /* FTS5 backend object */ Fts5SegIter *pIter /* Iterator to advance to next page */ ){ + Fts5Data *pLeaf; Fts5StructureSegment *pSeg = pIter->pSeg; fts5DataRelease(pIter->pLeaf); pIter->iLeafPgno++; if( pIter->pNextLeaf ){ - assert( pIter->iLeafPgno<=pSeg->pgnoLast ); pIter->pLeaf = pIter->pNextLeaf; pIter->pNextLeaf = 0; }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){ pIter->pLeaf = fts5DataRead(p, - FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, pIter->iLeafPgno) + FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno) ); }else{ pIter->pLeaf = 0; + } + pLeaf = pIter->pLeaf; + + if( pLeaf ){ + pIter->iPgidxOff = pLeaf->szLeaf; + if( fts5LeafIsTermless(pLeaf) ){ + pIter->iEndofDoclist = pLeaf->nn+1; + }else{ + pIter->iPgidxOff += fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff], + pIter->iEndofDoclist + ); + } } } /* ** Argument p points to a buffer containing a varint to be interpreted as a @@ -1589,11 +1477,12 @@ ** position list content (if any). */ static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){ if( p->rc==SQLITE_OK ){ int iOff = pIter->iLeafOffset; /* Offset to read at */ - if( iOff>=pIter->pLeaf->n ){ + ASSERT_SZLEAF_OK(pIter->pLeaf); + if( iOff>=pIter->pLeaf->szLeaf ){ p->rc = FTS5_CORRUPT; }else{ const u8 *a = &pIter->pLeaf->p[iOff]; pIter->iLeafOffset += fts5GetPoslistSize(a, &pIter->nPos, &pIter->bDel); } @@ -1602,11 +1491,12 @@ static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){ u8 *a = pIter->pLeaf->p; /* Buffer to read data from */ int iOff = pIter->iLeafOffset; - if( iOff>=pIter->pLeaf->n ){ + ASSERT_SZLEAF_OK(pIter->pLeaf); + if( iOff>=pIter->pLeaf->szLeaf ){ fts5SegIterNextPage(p, pIter); if( pIter->pLeaf==0 ){ if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT; return; } @@ -1642,10 +1532,18 @@ fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]); iOff += nNew; pIter->iTermLeafOffset = iOff; pIter->iTermLeafPgno = pIter->iLeafPgno; pIter->iLeafOffset = iOff; + + if( pIter->iPgidxOff>=pIter->pLeaf->nn ){ + pIter->iEndofDoclist = pIter->pLeaf->nn+1; + }else{ + int nExtra; + pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra); + pIter->iEndofDoclist += nExtra; + } fts5SegIterLoadRowid(p, pIter); } /* @@ -1677,12 +1575,14 @@ pIter->iLeafPgno = pSeg->pgnoFirst-1; fts5SegIterNextPage(p, pIter); } if( p->rc==SQLITE_OK ){ - u8 *a = pIter->pLeaf->p; - pIter->iLeafOffset = fts5GetU16(&a[2]); + pIter->iLeafOffset = 4; + assert_nc( pIter->pLeaf->nn>4 ); + assert( fts5LeafFirstTermOff(pIter->pLeaf)==4 ); + pIter->iPgidxOff = pIter->pLeaf->szLeaf+1; fts5SegIterLoadTerm(p, pIter, 0); fts5SegIterLoadNPos(p, pIter); } } @@ -1700,25 +1600,29 @@ ** aRowidOffset[] and iRowidOffset variables. At this point the iterator ** is in its regular state - Fts5SegIter.iLeafOffset points to the first ** byte of the position list content associated with said rowid. */ static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){ - int n = pIter->pLeaf->n; + int n = pIter->pLeaf->szLeaf; int i = pIter->iLeafOffset; u8 *a = pIter->pLeaf->p; int iRowidOffset = 0; + if( n>pIter->iEndofDoclist ){ + n = pIter->iEndofDoclist; + } + + ASSERT_SZLEAF_OK(pIter->pLeaf); while( 1 ){ i64 iDelta = 0; int nPos; int bDummy; i += fts5GetPoslistSize(&a[i], &nPos, &bDummy); i += nPos; if( i>=n ) break; i += fts5GetVarint(&a[i], (u64*)&iDelta); - if( iDelta==0 ) break; pIter->iRowid += iDelta; if( iRowidOffset>=pIter->nRowidOffset ){ int nNew = pIter->nRowidOffset + 8; int *aNew = (int*)sqlite3_realloc(pIter->aRowidOffset, nNew*sizeof(int)); @@ -1748,21 +1652,24 @@ pIter->pLeaf = 0; while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){ Fts5Data *pNew; pIter->iLeafPgno--; pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID( - pIter->pSeg->iSegid, 0, pIter->iLeafPgno + pIter->pSeg->iSegid, pIter->iLeafPgno )); if( pNew ){ - if( pIter->iLeafPgno==pIter->iTermLeafPgno ){ - if( pIter->iTermLeafOffsetn ){ - pIter->pLeaf = pNew; - pIter->iLeafOffset = pIter->iTermLeafOffset; - } + /* iTermLeafOffset may be equal to szLeaf if the term is the last + ** thing on the page - i.e. the first rowid is on the following page. + ** In this case leaf pIter->pLeaf==0, this iterator is at EOF. */ + if( pIter->iLeafPgno==pIter->iTermLeafPgno + && pIter->iTermLeafOffsetszLeaf + ){ + pIter->pLeaf = pNew; + pIter->iLeafOffset = pIter->iTermLeafOffset; }else{ - int iRowidOff, dummy; - fts5LeafHeader(pNew, &iRowidOff, &dummy); + int iRowidOff; + iRowidOff = fts5LeafFirstRowidOff(pNew); if( iRowidOff ){ pIter->pLeaf = pNew; pIter->iLeafOffset = iRowidOff; } } @@ -1776,10 +1683,11 @@ } } } if( pIter->pLeaf ){ + pIter->iEndofDoclist = pIter->pLeaf->nn+1; fts5SegIterReverseInitPage(p, pIter); } } /* @@ -1831,30 +1739,31 @@ int bNewTerm = 0; int nKeep = 0; /* Search for the end of the position list within the current page. */ u8 *a = pLeaf->p; - int n = pLeaf->n; + int n = pLeaf->szLeaf; + ASSERT_SZLEAF_OK(pLeaf); iOff = pIter->iLeafOffset + pIter->nPos; if( iOffiLeafOffset = iOff; - if( iDelta==0 ){ + /* The next entry is on the current page. */ + assert_nc( iOff<=pIter->iEndofDoclist ); + if( iOff>=pIter->iEndofDoclist ){ bNewTerm = 1; - if( iOff>=n ){ - fts5SegIterNextPage(p, pIter); - pIter->iLeafOffset = 4; - }else if( iOff!=fts5GetU16(&a[2]) ){ - pIter->iLeafOffset += fts5GetVarint32(&a[iOff], nKeep); + if( iOff!=fts5LeafFirstTermOff(pLeaf) ){ + iOff += fts5GetVarint32(&a[iOff], nKeep); } }else{ + u64 iDelta; + iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta); pIter->iRowid += iDelta; + assert_nc( iDelta>0 ); } + pIter->iLeafOffset = iOff; + }else if( pIter->pSeg==0 ){ const u8 *pList = 0; const char *zTerm = 0; int nList = 0; if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){ @@ -1864,11 +1773,13 @@ if( pList==0 ){ fts5DataRelease(pIter->pLeaf); pIter->pLeaf = 0; }else{ pIter->pLeaf->p = (u8*)pList; - pIter->pLeaf->n = nList; + pIter->pLeaf->nn = nList; + pIter->pLeaf->szLeaf = nList; + pIter->iEndofDoclist = nList+1; sqlite3Fts5BufferSet(&p->rc, &pIter->term, strlen(zTerm), (u8*)zTerm); pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid); } }else{ iOff = 0; @@ -1875,19 +1786,31 @@ /* Next entry is not on the current page */ while( iOff==0 ){ fts5SegIterNextPage(p, pIter); pLeaf = pIter->pLeaf; if( pLeaf==0 ) break; - if( (iOff = fts5GetU16(&pLeaf->p[0])) && iOffn ){ + ASSERT_SZLEAF_OK(pLeaf); + if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOffszLeaf ){ iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid); pIter->iLeafOffset = iOff; + + if( pLeaf->nn>pLeaf->szLeaf ){ + pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32( + &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist + ); + } + } - else if( (iOff = fts5GetU16(&pLeaf->p[2])) ){ + else if( pLeaf->nn>pLeaf->szLeaf ){ + pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32( + &pLeaf->p[pLeaf->szLeaf], iOff + ); pIter->iLeafOffset = iOff; + pIter->iEndofDoclist = iOff; bNewTerm = 1; } - if( iOff>=pLeaf->n ){ + if( iOff>=pLeaf->szLeaf ){ p->rc = FTS5_CORRUPT; return; } } } @@ -1924,61 +1847,41 @@ int pgnoLast = 0; if( pDlidx ){ int iSegid = pIter->pSeg->iSegid; pgnoLast = fts5DlidxIterPgno(pDlidx); - pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, 0, pgnoLast)); + pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast)); }else{ - int iOff; /* Byte offset within pLeaf */ Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ - /* Currently, Fts5SegIter.iLeafOffset (and iOff) points to the first - ** byte of position-list content for the current rowid. Back it up - ** so that it points to the start of the position-list size field. */ + /* Currently, Fts5SegIter.iLeafOffset points to the first byte of + ** position-list content for the current rowid. Back it up so that it + ** points to the start of the position-list size field. */ pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel); - iOff = pIter->iLeafOffset; - assert( iOff>=4 ); - - /* Search for a new term within the current leaf. If one can be found, - ** then this page contains the largest rowid for the current term. */ - while( iOffn ){ - int nPos; - i64 iDelta; - int bDummy; - - /* Read the position-list size field */ - iOff += fts5GetPoslistSize(&pLeaf->p[iOff], &nPos, &bDummy); - iOff += nPos; - if( iOff>=pLeaf->n ) break; - - /* Rowid delta. Or, if 0x00, the end of doclist marker. */ - nPos = fts5GetVarint(&pLeaf->p[iOff], (u64*)&iDelta); - if( iDelta==0 ) break; - iOff += nPos; - } /* If this condition is true then the largest rowid for the current ** term may not be stored on the current page. So search forward to ** see where said rowid really is. */ - if( iOff>=pLeaf->n ){ + if( pIter->iEndofDoclist>=pLeaf->szLeaf ){ int pgno; Fts5StructureSegment *pSeg = pIter->pSeg; /* The last rowid in the doclist may not be on the current page. Search ** forward to find the page containing the last rowid. */ for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){ - i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, pgno); + i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno); Fts5Data *pNew = fts5DataRead(p, iAbs); if( pNew ){ - int iRowid, iTerm; - fts5LeafHeader(pNew, &iRowid, &iTerm); + int iRowid, bTermless; + iRowid = fts5LeafFirstRowidOff(pNew); + bTermless = fts5LeafIsTermless(pNew); if( iRowid ){ SWAPVAL(Fts5Data*, pNew, pLast); pgnoLast = pgno; } fts5DataRelease(pNew); - if( iTerm ) break; + if( bTermless==0 ) break; } } } } @@ -1990,18 +1893,24 @@ ** Or, if pLast is non-NULL, then it is the page that contains the last ** rowid. In this case configure the iterator so that it points to the ** first rowid on this page. */ if( pLast ){ - int dummy; int iOff; fts5DataRelease(pIter->pLeaf); pIter->pLeaf = pLast; pIter->iLeafPgno = pgnoLast; - fts5LeafHeader(pLast, &iOff, &dummy); + iOff = fts5LeafFirstRowidOff(pLast); iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid); pIter->iLeafOffset = iOff; + + if( fts5LeafIsTermless(pLast) ){ + pIter->iEndofDoclist = pLast->nn+1; + }else{ + pIter->iEndofDoclist = fts5LeafFirstTermOff(pLast); + } + } fts5SegIterReverseInitPage(p, pIter); } @@ -2020,147 +1929,24 @@ assert( pIter->pDlidx==0 ); /* Check if the current doclist ends on this page. If it does, return ** early without loading the doclist-index (as it belongs to a different ** term. */ - if( pIter->iTermLeafPgno==pIter->iLeafPgno ){ - int iOff = pIter->iLeafOffset + pIter->nPos; - while( iOffn ){ - int bDummy; - int nPos; - i64 iDelta; - - /* iOff is currently the offset of the start of position list data */ - iOff += fts5GetVarint(&pLeaf->p[iOff], (u64*)&iDelta); - if( iDelta==0 ) return; - assert_nc( iOffn ); - iOff += fts5GetPoslistSize(&pLeaf->p[iOff], &nPos, &bDummy); - iOff += nPos; - } + if( pIter->iTermLeafPgno==pIter->iLeafPgno + && pIter->iEndofDoclistszLeaf + ){ + return; } pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno); } -#ifdef SQLITE_DEBUG -static void fts5AssertNodeSeekOk( - Fts5Buffer *pNode, - const u8 *pTerm, int nTerm, /* Term to search for */ - int iExpectPg, - int bExpectDlidx -){ - int bDlidx; - int iPg; - int rc = SQLITE_OK; - Fts5NodeIter node; - - fts5NodeIterInit(pNode->p, pNode->n, &node); - assert( node.term.n==0 ); - iPg = node.iChild; - bDlidx = node.bDlidx; - for(fts5NodeIterNext(&rc, &node); - node.aData && fts5BufferCompareBlob(&node.term, pTerm, nTerm)<=0; - fts5NodeIterNext(&rc, &node) - ){ - iPg = node.iChild; - bDlidx = node.bDlidx; - } - fts5NodeIterFree(&node); - - assert( rc!=SQLITE_OK || iPg==iExpectPg ); - assert( rc!=SQLITE_OK || bDlidx==bExpectDlidx ); -} -#else -#define fts5AssertNodeSeekOk(v,w,x,y,z) -#endif - -/* -** Argument pNode is an internal b-tree node. This function searches -** within the node for the largest term that is smaller than or equal -** to (pTerm/nTerm). -** -** It returns the associated page number. Or, if (pTerm/nTerm) is smaller -** than all terms within the node, the leftmost child page number. -** -** Before returning, (*pbDlidx) is set to true if the last term on the -** returned child page number has a doclist-index. Or left as is otherwise. -*/ -static int fts5NodeSeek( - Fts5Buffer *pNode, /* Node to search */ - const u8 *pTerm, int nTerm, /* Term to search for */ - int *pbDlidx /* OUT: True if dlidx flag is set */ -){ - int iPg; - u8 *pPtr = pNode->p; - u8 *pEnd = &pPtr[pNode->n]; - int nMatch = 0; /* Number of bytes of pTerm already matched */ - - assert( *pbDlidx==0 ); - - pPtr += fts5GetVarint32(pPtr, iPg); - while( pPtr=pEnd ) break; - } - - /* Read the next "term" pointer. Set nKeep to the number of bytes to - ** keep from the previous term, and nNew to the number of bytes of - ** new data that will be appended to it. */ - nKeep = (int)*pPtr++; - nNew = (int)*pPtr++; - if( (nKeep | nNew) & 0x0080 ){ - pPtr -= 2; - pPtr += fts5GetVarint32(pPtr, nKeep); - pPtr += fts5GetVarint32(pPtr, nNew); - } - nKeep -= 2; - - /* Compare (pTerm/nTerm) to the current term on the node (the one described - ** by nKeep/nNew). If the node term is larger, break out of the while() - ** loop. - ** - ** Otherwise, if (pTerm/nTerm) is larger or the two terms are equal, - ** leave variable nMatch set to the size of the largest prefix common to - ** both terms in bytes. */ - if( nKeep==nMatch ){ - int nTst = MIN(nNew, nTerm-nMatch); - int i; - for(i=0; i pTerm[nMatch]) ) break; - }else if( nKeeppLeaf->p; - int n = pIter->pLeaf->n; + int szLeaf = pIter->pLeaf->szLeaf; + int n = pIter->pLeaf->nn; int nMatch = 0; int nKeep = 0; int nNew = 0; + int iTermOff; + int iPgidx; /* Current offset in pgidx */ + int bEndOfPage = 0; assert( p->rc==SQLITE_OK ); - assert( pIter->pLeaf ); - iOff = fts5GetU16(&a[2]); - if( iOff<4 || iOff>=n ){ - p->rc = FTS5_CORRUPT; - return; - } + iPgidx = szLeaf; + iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff); + iOff = iTermOff; while( 1 ){ - int i; - int nCmp; /* Figure out how many new bytes are in this term */ fts5IndexGetVarint32(a, iOff, nNew); - if( nKeep=nMatch ); if( nKeep==nMatch ){ + int nCmp; + int i; nCmp = MIN(nNew, nTerm-nMatch); for(i=0; ipTerm[nMatch] ){ goto search_failed; } } - iOff += nNew; - - /* Skip past the doclist. If the end of the page is reached, bail out. */ - while( 1 ){ - int nPos; - - /* Skip past rowid delta */ - fts5IndexSkipVarint(a, iOff); - - /* Skip past position list */ - fts5IndexGetVarint32(a, iOff, nPos); - iOff += (nPos >> 1); - if( iOff>=(n-1) ){ - iOff = n; - goto search_failed; - } - - /* If this is the end of the doclist, break out of the loop */ - if( a[iOff]==0x00 ){ - iOff++; - break; - } - }; + + if( iPgidx>=n ){ + bEndOfPage = 1; + break; + } + + iPgidx += fts5GetVarint32(&a[iPgidx], nKeep); + iTermOff += nKeep; + iOff = iTermOff; /* Read the nKeep field of the next term. */ fts5IndexGetVarint32(a, iOff, nKeep); } @@ -2264,18 +2036,18 @@ search_failed: if( bGe==0 ){ fts5DataRelease(pIter->pLeaf); pIter->pLeaf = 0; return; - }else if( iOff>=n ){ + }else if( bEndOfPage ){ do { fts5SegIterNextPage(p, pIter); if( pIter->pLeaf==0 ) return; a = pIter->pLeaf->p; - iOff = fts5GetU16(&a[2]); - if( iOff ){ - if( iOff<4 || iOff>=n ){ + if( fts5LeafIsTermless(pIter->pLeaf)==0 ){ + fts5GetVarint32(&pIter->pLeaf->p[pIter->pLeaf->szLeaf], iOff); + if( iOff<4 || iOff>=pIter->pLeaf->szLeaf ){ p->rc = FTS5_CORRUPT; }else{ nKeep = 0; iOff += fts5GetVarint32(&a[iOff], nNew); break; @@ -2283,17 +2055,27 @@ } }while( 1 ); } search_success: + pIter->iLeafOffset = iOff + nNew; pIter->iTermLeafOffset = pIter->iLeafOffset; pIter->iTermLeafPgno = pIter->iLeafPgno; fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm); fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]); + if( iPgidx>=n ){ + pIter->iEndofDoclist = pIter->pLeaf->nn+1; + }else{ + int nExtra; + iPgidx += fts5GetVarint32(&a[iPgidx], nExtra); + pIter->iEndofDoclist = iTermOff + nExtra; + } + pIter->iPgidxOff = iPgidx; + fts5SegIterLoadRowid(p, pIter); fts5SegIterLoadNPos(p, pIter); } /* @@ -2422,13 +2204,14 @@ Fts5Data *pLeaf; sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z); pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data)); if( pLeaf==0 ) return; pLeaf->p = (u8*)pList; - pLeaf->n = nList; + pLeaf->nn = pLeaf->szLeaf = nList; pIter->pLeaf = pLeaf; pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid); + pIter->iEndofDoclist = pLeaf->nn+1; if( flags & FTS5INDEX_QUERY_DESC ){ pIter->flags |= FTS5_SEGITER_REVERSE; fts5SegIterReverseInitPage(p, pIter); }else{ @@ -2615,13 +2398,13 @@ assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno ); if( p->rc==SQLITE_OK ){ int iOff; u8 *a = pIter->pLeaf->p; - int n = pIter->pLeaf->n; + int n = pIter->pLeaf->szLeaf; - iOff = fts5GetU16(&a[0]); + iOff = fts5LeafFirstRowidOff(pIter->pLeaf); if( iOff<4 || iOff>=n ){ p->rc = FTS5_CORRUPT; }else{ iOff += fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid); pIter->iLeafOffset = iOff; @@ -2675,17 +2458,17 @@ fts5SegIterReverseNewPage(p, pIter); bMove = 0; } } - while( p->rc==SQLITE_OK ){ + do{ if( bMove ) fts5SegIterNext(p, pIter, 0); if( pIter->pLeaf==0 ) break; if( bRev==0 && pIter->iRowid>=iMatch ) break; if( bRev!=0 && pIter->iRowid<=iMatch ) break; bMove = 1; - } + }while( p->rc==SQLITE_OK ); } /* ** Free the iterator object passed as the second argument. @@ -2949,13 +2732,14 @@ pNew = fts5MultiIterAlloc(p, 2); if( pNew ){ Fts5SegIter *pIter = &pNew->aSeg[1]; pIter->flags = FTS5_SEGITER_ONETERM; - if( pData->n>0 ){ + if( pData->szLeaf>0 ){ pIter->pLeaf = pData; pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid); + pIter->iEndofDoclist = pData->nn; pNew->aFirst[1].iFirst = 1; if( bDesc ){ pNew->bRev = 1; pIter->flags |= FTS5_SEGITER_REVERSE; fts5SegIterReverseInitPage(p, pIter); @@ -3029,11 +2813,11 @@ void (*xChunk)(Fts5Index*, void*, const u8*, int) ){ int nRem = pSeg->nPos; /* Number of bytes still to come */ Fts5Data *pData = 0; u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset]; - int nChunk = MIN(nRem, pSeg->pLeaf->n - pSeg->iLeafOffset); + int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset); int pgno = pSeg->iLeafPgno; int pgnoSave = 0; if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){ pgnoSave = pgno+1; @@ -3045,14 +2829,14 @@ fts5DataRelease(pData); if( nRem<=0 ){ break; }else{ pgno++; - pData = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, 0, pgno)); + pData = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno)); if( pData==0 ) break; pChunk = &pData->p[4]; - nChunk = MIN(nRem, pData->n - 4); + nChunk = MIN(nRem, pData->szLeaf - 4); if( pgno==pgnoSave ){ assert( pSeg->pNextLeaf==0 ); pSeg->pNextLeaf = pData; pData = 0; } @@ -3334,23 +3118,34 @@ static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; Fts5PageWriter *pPage = &pWriter->writer; i64 iRowid; + assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) ); + + /* Set the szLeaf header field. */ + assert( 0==fts5GetU16(&pPage->buf.p[2]) ); + fts5PutU16(&pPage->buf.p[2], pPage->buf.n); + if( pWriter->bFirstTermInPage ){ /* No term was written to this page. */ - assert( 0==fts5GetU16(&pPage->buf.p[2]) ); + assert( pPage->pgidx.n==0 ); fts5WriteBtreeNoTerm(p, pWriter); + }else{ + /* Append the pgidx to the page buffer. Set the szLeaf header field. */ + fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p); } - /* Write the current page to the db. */ - iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, 0, pPage->pgno); + /* Write the page out to disk */ + iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno); fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n); /* Initialize the next page. */ fts5BufferZero(&pPage->buf); + fts5BufferZero(&pPage->pgidx); fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero); + pPage->iPrevPgidx = 0; pPage->pgno++; /* Increase the leaves written counter */ pWriter->nLeafWritten++; @@ -3371,24 +3166,35 @@ Fts5SegWriter *pWriter, int nTerm, const u8 *pTerm ){ int nPrefix; /* Bytes of prefix compression for term */ Fts5PageWriter *pPage = &pWriter->writer; - - assert( pPage->buf.n==0 || pPage->buf.n>4 ); - if( pPage->buf.n==0 ){ - /* Zero the first term and first rowid fields */ - static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; - fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero); - assert( pWriter->bFirstTermInPage ); - } - if( p->rc ) return; - - if( pWriter->bFirstTermInPage ){ - /* Update the "first term" field of the page header. */ - assert( pPage->buf.p[2]==0 && pPage->buf.p[3]==0 ); - fts5PutU16(&pPage->buf.p[2], pPage->buf.n); + Fts5Buffer *pPgidx = &pWriter->writer.pgidx; + + assert( p->rc==SQLITE_OK ); + assert( pPage->buf.n>=4 ); + assert( pPage->buf.n>4 || pWriter->bFirstTermInPage ); + + /* If the current leaf page is full, flush it to disk. */ + if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){ + if( pPage->buf.n>4 ){ + fts5WriteFlushLeaf(p, pWriter); + } + fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING); + } + + /* TODO1: Updating pgidx here. */ + pPgidx->n += sqlite3Fts5PutVarint( + &pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx + ); + pPage->iPrevPgidx = pPage->buf.n; +#if 0 + fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n); + pPgidx->n += 2; +#endif + + if( pWriter->bFirstTermInPage ){ nPrefix = 0; if( pPage->pgno!=1 ){ /* This is the first term on a leaf that is not the leftmost leaf in ** the segment b-tree. In this case it is necessary to add a term to ** the b-tree hierarchy that is (a) larger than the largest term @@ -3426,15 +3232,10 @@ pWriter->bFirstRowidInPage = 0; pWriter->bFirstRowidInDoclist = 1; assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) ); pWriter->aDlidx[0].pgno = pPage->pgno; - - /* If the current leaf page is full, flush it to disk. */ - if( pPage->buf.n>=p->pConfig->pgsz ){ - fts5WriteFlushLeaf(p, pWriter); - } } /* ** Append a rowid and position-list size field to the writers output. */ @@ -3444,10 +3245,14 @@ i64 iRowid, int nPos ){ if( p->rc==SQLITE_OK ){ Fts5PageWriter *pPage = &pWriter->writer; + + if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){ + fts5WriteFlushLeaf(p, pWriter); + } /* If this is to be the first rowid written to the page, set the ** rowid-pointer in the page-header. Also append a value to the dlidx ** buffer, in case a doclist-index is required. */ if( pWriter->bFirstRowidInPage ){ @@ -3465,14 +3270,10 @@ pWriter->iPrevRowid = iRowid; pWriter->bFirstRowidInDoclist = 0; pWriter->bFirstRowidInPage = 0; fts5BufferAppendVarint(&p->rc, &pPage->buf, nPos); - - if( pPage->buf.n>=p->pConfig->pgsz ){ - fts5WriteFlushLeaf(p, pWriter); - } } } static void fts5WriteAppendPoslistData( Fts5Index *p, @@ -3483,12 +3284,14 @@ Fts5PageWriter *pPage = &pWriter->writer; const u8 *a = aData; int n = nData; assert( p->pConfig->pgsz>0 ); - while( p->rc==SQLITE_OK && (pPage->buf.n + n)>=p->pConfig->pgsz ){ - int nReq = p->pConfig->pgsz - pPage->buf.n; + while( p->rc==SQLITE_OK + && (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz + ){ + int nReq = p->pConfig->pgsz - pPage->buf.n - pPage->pgidx.n; int nCopy = 0; while( nCopy0 ){ fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a); } } -static void fts5WriteAppendZerobyte(Fts5Index *p, Fts5SegWriter *pWriter){ - fts5BufferAppendVarint(&p->rc, &pWriter->writer.buf, 0); -} - /* ** Flush any data cached by the writer object to the database. Free any ** allocations associated with the writer. */ static void fts5WriteFinish( Fts5Index *p, Fts5SegWriter *pWriter, /* Writer object */ - int *pnHeight, /* OUT: Height of the b-tree */ int *pnLeaf /* OUT: Number of leaf pages in b-tree */ ){ int i; Fts5PageWriter *pLeaf = &pWriter->writer; if( p->rc==SQLITE_OK ){ - if( pLeaf->pgno==1 && pLeaf->buf.n==0 ){ - *pnLeaf = 0; - *pnHeight = 0; - }else{ - if( pLeaf->buf.n>4 ){ - fts5WriteFlushLeaf(p, pWriter); - } - *pnLeaf = pLeaf->pgno-1; - - fts5WriteFlushBtree(p, pWriter); - *pnHeight = 0; - } + assert( pLeaf->pgno>=1 ); + if( pLeaf->buf.n>4 ){ + fts5WriteFlushLeaf(p, pWriter); + } + *pnLeaf = pLeaf->pgno-1; + fts5WriteFlushBtree(p, pWriter); } fts5BufferFree(&pLeaf->term); fts5BufferFree(&pLeaf->buf); + fts5BufferFree(&pLeaf->pgidx); fts5BufferFree(&pWriter->btterm); for(i=0; inDlidx; i++){ sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf); } @@ -3545,17 +3338,23 @@ static void fts5WriteInit( Fts5Index *p, Fts5SegWriter *pWriter, int iSegid ){ + const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING; + memset(pWriter, 0, sizeof(Fts5SegWriter)); pWriter->iSegid = iSegid; fts5WriteDlidxGrow(p, pWriter, 1); pWriter->writer.pgno = 1; pWriter->bFirstTermInPage = 1; pWriter->iBtPage = 1; + + /* Grow the two buffers to pgsz + padding bytes in size. */ + fts5BufferGrow(&p->rc, &pWriter->writer.pgidx, nBuffer); + fts5BufferGrow(&p->rc, &pWriter->writer.buf, nBuffer); if( p->pIdxWriter==0 ){ Fts5Config *pConfig = p->pConfig; fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf( "INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)", @@ -3562,10 +3361,17 @@ pConfig->zDb, pConfig->zName )); } if( p->rc==SQLITE_OK ){ + /* Initialize the 4-byte leaf-page header to 0x00. */ + memset(pWriter->writer.buf.p, 0, 4); + pWriter->writer.buf.n = 4; + + /* Bind the current output segment id to the index-writer. This is an + ** optimization over binding the same value over and over as rows are + ** inserted into %_idx by the current writer. */ sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); } } /* @@ -3590,23 +3396,41 @@ }else{ int iOff = pSeg->iTermLeafOffset; /* Offset on new first leaf page */ i64 iLeafRowid; Fts5Data *pData; int iId = pSeg->pSeg->iSegid; - u8 aHdr[4] = {0x00, 0x00, 0x00, 0x04}; + u8 aHdr[4] = {0x00, 0x00, 0x00, 0x00}; - iLeafRowid = FTS5_SEGMENT_ROWID(iId, 0, pSeg->iTermLeafPgno); + iLeafRowid = FTS5_SEGMENT_ROWID(iId, pSeg->iTermLeafPgno); pData = fts5DataRead(p, iLeafRowid); if( pData ){ fts5BufferZero(&buf); + fts5BufferGrow(&p->rc, &buf, pData->nn); fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr); fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n); fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p); - fts5BufferAppendBlob(&p->rc, &buf, pData->n - iOff, &pData->p[iOff]); + fts5BufferAppendBlob(&p->rc, &buf, pData->szLeaf-iOff, &pData->p[iOff]); + if( p->rc==SQLITE_OK ){ + /* Set the szLeaf field */ + fts5PutU16(&buf.p[2], buf.n); + } + + /* Set up the new page-index array */ + fts5BufferAppendVarint(&p->rc, &buf, 4); + if( pSeg->iLeafPgno==pSeg->iTermLeafPgno + && pSeg->iEndofDoclistszLeaf + ){ + int nDiff = pData->szLeaf - pSeg->iEndofDoclist; + fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4); + fts5BufferAppendBlob(&p->rc, &buf, + pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff] + ); + } + fts5DataRelease(pData); pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno; - fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 0, 1), iLeafRowid); + fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 1), iLeafRowid); fts5DataWrite(p, iLeafRowid, buf.p, buf.n); } } } fts5BufferFree(&buf); @@ -3637,11 +3461,10 @@ int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */ int nInput; /* Number of input segments */ Fts5SegWriter writer; /* Writer object */ Fts5StructureSegment *pSeg; /* Output segment */ Fts5Buffer term; - int bRequireDoclistTerm = 0; /* Doclist terminator (0x00) required */ int bOldest; /* True if the output segment is the oldest */ assert( iLvlnLevel ); assert( pLvl->nMerge<=pLvl->nSeg ); @@ -3702,16 +3525,12 @@ if( pnRem && writer.nLeafWritten>nRem ){ break; } /* This is a new term. Append a term to the output segment. */ - if( bRequireDoclistTerm ){ - fts5WriteAppendZerobyte(p, &writer); - } fts5WriteAppendTerm(p, &writer, nTerm, pTerm); fts5BufferSet(&p->rc, &term, nTerm, pTerm); - bRequireDoclistTerm = 1; } /* Append the rowid to the output */ /* WRITEPOSLISTSIZE */ nPos = pSegIter->nPos*2 + pSegIter->bDel; @@ -3721,11 +3540,11 @@ fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback); } /* Flush the last leaf page to disk. Set the output segment b-tree height ** and last leaf page number at the same time. */ - fts5WriteFinish(p, &writer, &pSeg->nHeight, &pSeg->pgnoLast); + fts5WriteFinish(p, &writer, &pSeg->pgnoLast); if( fts5MultiIterEof(p, pIter) ){ int i; /* Remove the redundant segments from the %_data table */ @@ -3846,10 +3665,11 @@ int iLvl = 0; assert( p->rc!=SQLITE_OK || pStruct->nLevel>0 ); while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){ fts5IndexMergeLevel(p, &pStruct, iLvl, 0); + assert( p->rc!=SQLITE_OK || pStruct->nLevel>(iLvl+1) ); fts5StructurePromote(p, iLvl+1, pStruct); iLvl++; } *ppStruct = pStruct; } @@ -3873,14 +3693,16 @@ */ static int fts5PoslistPrefix(const u8 *aBuf, int nMax){ int ret; u32 dummy; ret = fts5GetVarint32(aBuf, dummy); - while( 1 ){ - int i = fts5GetVarint32(&aBuf[ret], dummy); - if( (ret + i) > nMax ) break; - ret += i; + if( ret nMax ) break; + ret += i; + } } return ret; } #define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob) { \ @@ -3909,88 +3731,47 @@ if( iSegid ){ const int pgsz = p->pConfig->pgsz; Fts5StructureSegment *pSeg; /* New segment within pStruct */ - int nHeight; /* Height of new segment b-tree */ Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */ - const u8 *zPrev = 0; + Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */ Fts5SegWriter writer; fts5WriteInit(p, &writer, iSegid); - /* Pre-allocate the buffer used to assemble leaf pages to the target - ** page size. */ - assert( pgsz>0 ); pBuf = &writer.writer.buf; - fts5BufferGrow(&p->rc, pBuf, pgsz + 20); + pPgidx = &writer.writer.pgidx; + + /* fts5WriteInit() should have initialized the buffers to (most likely) + ** the maximum space required. */ + assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) ); + assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) ); /* Begin scanning through hash table entries. This loop runs once for each ** term/doclist currently stored within the hash table. */ if( p->rc==SQLITE_OK ){ - memset(pBuf->p, 0, 4); - pBuf->n = 4; p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0); } while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){ const char *zTerm; /* Buffer containing term */ - int nTerm; /* Size of zTerm in bytes */ const u8 *pDoclist; /* Pointer to doclist for this term */ int nDoclist; /* Size of doclist in bytes */ - int nSuffix; /* Size of term suffix */ + /* Write the term for this entry to disk. */ sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist); - nTerm = strlen(zTerm); - - /* Decide if the term will fit on the current leaf. If it will not, - ** flush the leaf to disk here. */ - if( pBuf->n>4 && (pBuf->n + nTerm + 2) > pgsz ){ - fts5WriteFlushLeaf(p, &writer); - pBuf = &writer.writer.buf; - if( (nTerm + 32) > pBuf->nSpace ){ - fts5BufferGrow(&p->rc, pBuf, nTerm + 32 - pBuf->n); - if( p->rc ) break; - } - } - - /* Write the term to the leaf. And if it is the first on the leaf, and - ** the leaf is not page number 1, push it up into the b-tree hierarchy - ** as well. */ - if( writer.bFirstTermInPage==0 ){ - int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm); - pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], nPre); - nSuffix = nTerm - nPre; - }else{ - fts5PutU16(&pBuf->p[2], pBuf->n); - writer.bFirstTermInPage = 0; - if( writer.writer.pgno!=1 ){ - int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm); - fts5WriteBtreeTerm(p, &writer, nPre+1, (const u8*)zTerm); - pBuf = &writer.writer.buf; - assert( nPren += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], nSuffix); - fts5BufferSafeAppendBlob(pBuf, (const u8*)&zTerm[nTerm-nSuffix], nSuffix); - - /* We just wrote a term into page writer.aWriter[0].pgno. If a - ** doclist-index is to be generated for this doclist, it will be - ** associated with this page. */ - assert( writer.nDlidx>0 && writer.aDlidx[0].buf.n==0 ); - writer.aDlidx[0].pgno = writer.writer.pgno; - - if( pgsz>=(pBuf->n + nDoclist + 1) ){ + fts5WriteAppendTerm(p, &writer, strlen(zTerm), (const u8*)zTerm); + + assert( writer.bFirstRowidInPage==0 ); + if( pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){ /* The entire doclist will fit on the current leaf. */ fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist); }else{ i64 iRowid = 0; i64 iDelta = 0; int iOff = 0; - writer.bFirstRowidInPage = 0; - /* The entire doclist will not fit on this leaf. The following ** loop iterates through the poslists that make up the current ** doclist. */ while( p->rc==SQLITE_OK && iOffn += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iDelta); } assert( pBuf->n<=pBuf->nSpace ); - if( (pBuf->n + nCopy) <= pgsz ){ + if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){ /* The entire poslist will fit on the current leaf. So copy ** it in one go. */ fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy); }else{ /* The entire poslist will not fit on this leaf. So it needs @@ -4020,38 +3801,37 @@ ** to be broken into sections. The only qualification being ** that each varint must be stored contiguously. */ const u8 *pPoslist = &pDoclist[iOff]; int iPos = 0; while( p->rc==SQLITE_OK ){ - int nSpace = pgsz - pBuf->n; + int nSpace = pgsz - pBuf->n - pPgidx->n; int n = 0; if( (nCopy - iPos)<=nSpace ){ n = nCopy - iPos; }else{ n = fts5PoslistPrefix(&pPoslist[iPos], nSpace); } assert( n>0 ); fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n); iPos += n; - if( pBuf->n>=pgsz ){ + if( (pBuf->n + pPgidx->n)>=pgsz ){ fts5WriteFlushLeaf(p, &writer); - pBuf = &writer.writer.buf; } if( iPos>=nCopy ) break; } } iOff += nCopy; } } - pBuf->p[pBuf->n++] = '\0'; + /* TODO2: Doclist terminator written here. */ + /* pBuf->p[pBuf->n++] = '\0'; */ assert( pBuf->n<=pBuf->nSpace ); - zPrev = (const u8*)zTerm; sqlite3Fts5HashScanNext(pHash); } sqlite3Fts5HashClear(pHash); - fts5WriteFinish(p, &writer, &nHeight, &pgnoLast); + fts5WriteFinish(p, &writer, &pgnoLast); /* Update the Fts5Structure. It is written back to the database by the ** fts5StructureRelease() call below. */ if( pStruct->nLevel==0 ){ fts5StructureAddLevel(&p->rc, &pStruct); @@ -4058,11 +3838,10 @@ } fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0); if( p->rc==SQLITE_OK ){ pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ]; pSeg->iSegid = iSegid; - pSeg->nHeight = nHeight; pSeg->pgnoFirst = 1; pSeg->pgnoLast = pgnoLast; pStruct->nSegment++; } fts5StructurePromote(p, 0, pStruct); @@ -4160,11 +3939,14 @@ static void fts5PoslistCallback( Fts5Index *p, void *pCtx, const u8 *pChunk, int nChunk ){ - fts5BufferAppendBlob(&p->rc, (Fts5Buffer*)pCtx, nChunk, pChunk); + assert_nc( nChunk>=0 ); + if( nChunk>0 ){ + fts5BufferAppendBlob(&p->rc, (Fts5Buffer*)pCtx, nChunk, pChunk); + } } /* ** Iterator pIter currently points to a valid entry (not EOF). This ** function appends the position list data for the current entry to @@ -4366,11 +4148,11 @@ fts5MultiIterNext(p, p1, 0, 0) ){ i64 iRowid = fts5MultiIterRowid(p1); int nTerm; const u8 *pTerm = fts5MultiIterTerm(p1, &nTerm); - assert( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 ); + assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 ); if( nTerm0 && iRowid<=iLastRowid ){ for(i=0; p->rc==SQLITE_OK && doclist.n; i++){ assert( ip = (u8*)&pData[1]; - pData->n = doclist.n; + pData->nn = pData->szLeaf = doclist.n; memcpy(pData->p, doclist.p, doclist.n); fts5MultiIterNew2(p, pData, bDesc, ppIter); } fts5BufferFree(&doclist); } @@ -4457,17 +4239,13 @@ ** function populates it with the initial structure objects for each index, ** and the initial version of the "averages" record (a zero-byte blob). */ int sqlite3Fts5IndexReinit(Fts5Index *p){ Fts5Structure s; - - assert( p->rc==SQLITE_OK ); - p->rc = sqlite3Fts5IndexSetAverages(p, (const u8*)"", 0); - memset(&s, 0, sizeof(Fts5Structure)); + fts5DataWrite(p, FTS5_AVERAGES_ROWID, (const u8*)"", 0); fts5StructureWrite(p, &s); - return fts5IndexReturn(p); } /* ** Open a new Fts5Index handle. If the bCreate argument is true, create @@ -4629,11 +4407,16 @@ if( sqlite3Fts5BufferGrow(&p->rc, &buf, nToken+1)==0 ){ memcpy(&buf.p[1], pToken, nToken); #ifdef SQLITE_DEBUG - if( flags & FTS5INDEX_QUERY_TEST_NOIDX ){ + /* If the QUERY_TEST_NOIDX flag was specified, then this must be a + ** prefix-query. Instead of using a prefix-index (if one exists), + ** evaluate the prefix query using the main FTS index. This is used + ** for internal sanity checking by the integrity-check in debug + ** mode only. */ + if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){ assert( flags & FTS5INDEX_QUERY_PREFIX ); iIdx = 1+pConfig->nPrefix; }else #endif if( flags & FTS5INDEX_QUERY_PREFIX ){ @@ -4749,11 +4532,11 @@ ){ Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; assert( pIter->pIndex->rc==SQLITE_OK ); *piRowid = pSeg->iRowid; *pn = pSeg->nPos; - if( pSeg->iLeafOffset+pSeg->nPos <= pSeg->pLeaf->n ){ + if( pSeg->iLeafOffset+pSeg->nPos <= pSeg->pLeaf->szLeaf ){ *pp = &pSeg->pLeaf->p[pSeg->iLeafOffset]; }else{ fts5BufferZero(&pIter->poslist); fts5SegiterPoslist(pIter->pIndex, pSeg, &pIter->poslist); *pp = pIter->poslist.p; @@ -4785,17 +4568,32 @@ fts5CloseReader(pIndex); } } /* -** Read the "averages" record into the buffer supplied as the second -** argument. Return SQLITE_OK if successful, or an SQLite error code -** if an error occurs. +** Read and decode the "averages" record from the database. +** +** Parameter anSize must point to an array of size nCol, where nCol is +** the number of user defined columns in the FTS table. */ -int sqlite3Fts5IndexGetAverages(Fts5Index *p, Fts5Buffer *pBuf){ - assert( p->rc==SQLITE_OK ); - fts5DataReadOrBuffer(p, pBuf, FTS5_AVERAGES_ROWID); +int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){ + int nCol = p->pConfig->nCol; + Fts5Data *pData; + + *pnRow = 0; + memset(anSize, 0, sizeof(i64) * nCol); + pData = fts5DataRead(p, FTS5_AVERAGES_ROWID); + if( p->rc==SQLITE_OK && pData->nn ){ + int i = 0; + int iCol; + i += fts5GetVarint(&pData->p[i], (u64*)pnRow); + for(iCol=0; inn && iColp[i], (u64*)&anSize[iCol]); + } + } + + fts5DataRelease(pData); return fts5IndexReturn(p); } /* ** Replace the current "averages" record with the contents of the buffer @@ -4991,22 +4789,29 @@ rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); } if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; /* If this is a prefix query, check that the results returned if the - ** the index is disabled are the same. In both ASC and DESC order. */ - if( iIdx>0 && rc==SQLITE_OK ){ - int f = flags|FTS5INDEX_QUERY_TEST_NOIDX; - ck2 = 0; - rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); - if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; - } - if( iIdx>0 && rc==SQLITE_OK ){ - int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC; - ck2 = 0; - rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); - if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; + ** the index is disabled are the same. In both ASC and DESC order. + ** + ** This check may only be performed if the hash table is empty. This + ** is because the hash table only supports a single scan query at + ** a time, and the multi-iter loop from which this function is called + ** is already performing such a scan. */ + if( p->nPendingData==0 ){ + if( iIdx>0 && rc==SQLITE_OK ){ + int f = flags|FTS5INDEX_QUERY_TEST_NOIDX; + ck2 = 0; + rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); + if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; + } + if( iIdx>0 && rc==SQLITE_OK ){ + int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC; + ck2 = 0; + rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); + if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; + } } cksum3 ^= ck1; fts5BufferSet(&rc, pPrev, n, (const u8*)z); @@ -5041,18 +4846,67 @@ int i; /* Now check that the iter.nEmpty leaves following the current leaf ** (a) exist and (b) contain no terms. */ for(i=iFirst; p->rc==SQLITE_OK && i<=iLast; i++){ - Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, i)); + Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, i)); if( pLeaf ){ - if( 0!=fts5GetU16(&pLeaf->p[2]) ) p->rc = FTS5_CORRUPT; - if( i>=iNoRowid && 0!=fts5GetU16(&pLeaf->p[0]) ) p->rc = FTS5_CORRUPT; + if( !fts5LeafIsTermless(pLeaf) ) p->rc = FTS5_CORRUPT; + if( i>=iNoRowid && 0!=fts5LeafFirstRowidOff(pLeaf) ) p->rc = FTS5_CORRUPT; } fts5DataRelease(pLeaf); - if( p->rc ) break; + } +} + +static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){ + int iTermOff = 0; + int ii; + + Fts5Buffer buf1 = {0,0,0}; + Fts5Buffer buf2 = {0,0,0}; + + ii = pLeaf->szLeaf; + while( iinn && p->rc==SQLITE_OK ){ + int res; + int iOff; + int nIncr; + + ii += fts5GetVarint32(&pLeaf->p[ii], nIncr); + iTermOff += nIncr; + iOff = iTermOff; + + if( iOff>=pLeaf->szLeaf ){ + p->rc = FTS5_CORRUPT; + }else if( iTermOff==nIncr ){ + int nByte; + iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte); + if( (iOff+nByte)>pLeaf->szLeaf ){ + p->rc = FTS5_CORRUPT; + }else{ + fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff]); + } + }else{ + int nKeep, nByte; + iOff += fts5GetVarint32(&pLeaf->p[iOff], nKeep); + iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte); + if( nKeep>buf1.n || (iOff+nByte)>pLeaf->szLeaf ){ + p->rc = FTS5_CORRUPT; + }else{ + buf1.n = nKeep; + fts5BufferAppendBlob(&p->rc, &buf1, nByte, &pLeaf->p[iOff]); + } + + if( p->rc==SQLITE_OK ){ + res = fts5BufferCompare(&buf1, &buf2); + if( res<=0 ) p->rc = FTS5_CORRUPT; + } + } + fts5BufferSet(&p->rc, &buf2, buf1.n, buf1.p); } + + fts5BufferFree(&buf1); + fts5BufferFree(&buf2); } static void fts5IndexIntegrityCheckSegment( Fts5Index *p, /* FTS5 backend object */ Fts5StructureSegment *pSeg /* Segment to check internal consistency */ @@ -5064,53 +4918,55 @@ int iDlidxPrevLeaf = pSeg->pgnoLast; if( pSeg->pgnoFirst==0 ) return; fts5IndexPrepareStmt(p, &pStmt, sqlite3_mprintf( - "SELECT segid, term, (pgno>>1), (pgno & 1) FROM '%q'.'%q_idx' WHERE segid=%d", + "SELECT segid, term, (pgno>>1), (pgno&1) FROM %Q.'%q_idx' WHERE segid=%d", pConfig->zDb, pConfig->zName, pSeg->iSegid )); /* Iterate through the b-tree hierarchy. */ while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ i64 iRow; /* Rowid for this leaf */ Fts5Data *pLeaf; /* Data for this leaf */ - int iOff; /* Offset of first term on leaf */ int nIdxTerm = sqlite3_column_bytes(pStmt, 1); const char *zIdxTerm = (const char*)sqlite3_column_text(pStmt, 1); int iIdxLeaf = sqlite3_column_int(pStmt, 2); int bIdxDlidx = sqlite3_column_int(pStmt, 3); /* If the leaf in question has already been trimmed from the segment, ** ignore this b-tree entry. Otherwise, load it into memory. */ if( iIdxLeafpgnoFirst ) continue; - iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, iIdxLeaf); + iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf); pLeaf = fts5DataRead(p, iRow); if( pLeaf==0 ) break; /* Check that the leaf contains at least one term, and that it is equal ** to or larger than the split-key in zIdxTerm. Also check that if there ** is also a rowid pointer within the leaf page header, it points to a ** location before the term. */ - iOff = fts5GetU16(&pLeaf->p[2]); - if( iOff==0 ){ + if( pLeaf->nn<=pLeaf->szLeaf ){ p->rc = FTS5_CORRUPT; }else{ - int iRowidOff; + int iOff; /* Offset of first term on leaf */ + int iRowidOff; /* Offset of first rowid on leaf */ int nTerm; /* Size of term on leaf in bytes */ int res; /* Comparison of term and split-key */ - iRowidOff = fts5GetU16(&pLeaf->p[0]); + iOff = fts5LeafFirstTermOff(pLeaf); + iRowidOff = fts5LeafFirstRowidOff(pLeaf); if( iRowidOff>=iOff ){ p->rc = FTS5_CORRUPT; }else{ iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm); res = memcmp(&pLeaf->p[iOff], zIdxTerm, MIN(nTerm, nIdxTerm)); if( res==0 ) res = nTerm - nIdxTerm; if( res<0 ) p->rc = FTS5_CORRUPT; } + + fts5IntegrityCheckPgidx(p, pLeaf); } fts5DataRelease(pLeaf); if( p->rc ) break; @@ -5134,27 +4990,28 @@ fts5DlidxIterNext(p, pDlidx) ){ /* Check any rowid-less pages that occur before the current leaf. */ for(iPg=iPrevLeaf+1; iPgp[0])!=0 ) p->rc = FTS5_CORRUPT; + if( fts5LeafFirstRowidOff(pLeaf)!=0 ) p->rc = FTS5_CORRUPT; fts5DataRelease(pLeaf); } } iPrevLeaf = fts5DlidxIterPgno(pDlidx); /* Check that the leaf page indicated by the iterator really does ** contain the rowid suggested by the same. */ - iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPrevLeaf); + iKey = FTS5_SEGMENT_ROWID(iSegid, iPrevLeaf); pLeaf = fts5DataRead(p, iKey); if( pLeaf ){ i64 iRowid; - int iRowidOff = fts5GetU16(&pLeaf->p[0]); - if( iRowidOff>=pLeaf->n ){ + int iRowidOff = fts5LeafFirstRowidOff(pLeaf); + ASSERT_SZLEAF_OK(pLeaf); + if( iRowidOff>=pLeaf->szLeaf ){ p->rc = FTS5_CORRUPT; }else{ fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid); if( iRowid!=fts5DlidxIterRowid(pDlidx) ) p->rc = FTS5_CORRUPT; } @@ -5325,17 +5182,17 @@ int iSegid, iHeight, iPgno, bDlidx; /* Rowid compenents */ fts5DecodeRowid(iKey, &iSegid, &bDlidx, &iHeight, &iPgno); if( iSegid==0 ){ if( iKey==FTS5_AVERAGES_ROWID ){ - sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(averages) "); + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{averages} "); }else{ - sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(structure)"); + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{structure}"); } } else{ - sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(%ssegid=%d h=%d pgno=%d)", + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{%ssegid=%d h=%d pgno=%d}", bDlidx ? "dlidx " : "", iSegid, iHeight, iPgno ); } } @@ -5351,13 +5208,12 @@ sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {lvl=%d nMerge=%d nSeg=%d", iLvl, pLvl->nMerge, pLvl->nSeg ); for(iSeg=0; iSegnSeg; iSeg++){ Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; - sqlite3Fts5BufferAppendPrintf(pRc, pBuf, - " {id=%d h=%d leaves=%d..%d}", pSeg->iSegid, pSeg->nHeight, - pSeg->pgnoFirst, pSeg->pgnoLast + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d leaves=%d..%d}", + pSeg->iSegid, pSeg->pgnoFirst, pSeg->pgnoLast ); } sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}"); } } @@ -5411,26 +5267,27 @@ ** pBuf. ** ** The return value is the number of bytes read from the input buffer. */ static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ - i64 iDocid; + i64 iDocid = 0; int iOff = 0; - iOff = sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDocid); - sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " rowid=%lld", iDocid); + if( n>0 ){ + iOff = sqlite3Fts5GetVarint(a, (u64*)&iDocid); + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid); + } while( iOff=4 ){ - iRowidOff = fts5GetU16(&a[0]); - iTermOff = fts5GetU16(&a[2]); - }else{ - sqlite3Fts5BufferSet(&rc, &s, 8, (const u8*)"corrupt"); - goto decode_out; - } - - if( iRowidOff ){ - iOff = iRowidOff; - }else if( iTermOff ){ - iOff = iTermOff; - }else{ - iOff = n; - } - fts5DecodePoslist(&rc, &s, &a[4], iOff-4); - - assert( iRowidOff==0 || iOff==iRowidOff ); - if( iRowidOff ){ - iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], n-iOff); - } - - assert( iTermOff==0 || iOff==iTermOff ); - while( iOffpStorage, nMerge); }else if( 0==sqlite3_stricmp("integrity-check", z) ){ rc = sqlite3Fts5StorageIntegrity(pTab->pStorage); +#ifdef SQLITE_DEBUG + }else if( 0==sqlite3_stricmp("prefix-index", z) ){ + pConfig->bPrefixIndex = sqlite3_value_int(pVal); +#endif }else{ rc = sqlite3Fts5IndexLoadConfig(pTab->pIndex); if( rc==SQLITE_OK ){ rc = sqlite3Fts5ConfigSetValue(pTab->pConfig, z, pVal, &bError); } @@ -1496,15 +1500,17 @@ static int fts5ApiTokenize( Fts5Context *pCtx, const char *pText, int nText, void *pUserData, - int (*xToken)(void*, const char*, int, int, int) + int (*xToken)(void*, int, const char*, int, int, int) ){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); - return sqlite3Fts5Tokenize(pTab->pConfig, pText, nText, pUserData, xToken); + return sqlite3Fts5Tokenize( + pTab->pConfig, FTS5_TOKENIZE_AUX, pText, nText, pUserData, xToken + ); } static int fts5ApiPhraseCount(Fts5Context *pCtx){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; return sqlite3Fts5ExprPhraseCount(pCsr->pExpr); @@ -1653,17 +1659,20 @@ return rc; } static int fts5ColumnSizeCb( void *pContext, /* Pointer to int */ + int tflags, const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ int iEnd /* End offset of token */ ){ int *pCnt = (int*)pContext; - *pCnt = *pCnt + 1; + if( (tflags & FTS5_TOKEN_COLOCATED)==0 ){ + (*pCnt)++; + } return SQLITE_OK; } static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; @@ -1689,11 +1698,13 @@ const char *z; int n; void *p = (void*)(&pCsr->aColumnSize[i]); pCsr->aColumnSize[i] = 0; rc = fts5ApiColumnText(pCtx, i, &z, &n); if( rc==SQLITE_OK ){ - rc = sqlite3Fts5Tokenize(pConfig, z, n, p, fts5ColumnSizeCb); + rc = sqlite3Fts5Tokenize( + pConfig, FTS5_TOKENIZE_AUX, z, n, p, fts5ColumnSizeCb + ); } } } } CsrFlagClear(pCsr, FTS5CSR_REQUIRE_DOCSIZE); @@ -1851,11 +1862,11 @@ Fts5Config *pConf = pTab->pConfig; pNew->ePlan = FTS5_PLAN_MATCH; pNew->iFirstRowid = SMALLEST_INT64; pNew->iLastRowid = LARGEST_INT64; pNew->base.pVtab = (sqlite3_vtab*)pTab; - rc = sqlite3Fts5ExprPhraseExpr(pConf, pCsr->pExpr, iPhrase, &pNew->pExpr); + rc = sqlite3Fts5ExprClonePhrase(pConf, pCsr->pExpr, iPhrase, &pNew->pExpr); } if( rc==SQLITE_OK ){ for(rc = fts5CursorFirst(pTab, pNew, 0); rc==SQLITE_OK && CsrFlagTest(pNew, FTS5CSR_EOF)==0; @@ -2342,11 +2353,11 @@ rc = SQLITE_NOMEM; }else{ void *p = (void*)pGlobal; memset(pGlobal, 0, sizeof(Fts5Global)); pGlobal->db = db; - pGlobal->api.iVersion = 1; + pGlobal->api.iVersion = 2; pGlobal->api.xCreateFunction = fts5CreateAux; pGlobal->api.xCreateTokenizer = fts5CreateTokenizer; pGlobal->api.xFindTokenizer = fts5FindTokenizer; rc = sqlite3_create_module_v2(db, "fts5", &fts5Mod, p, fts5ModuleDestroy); if( rc==SQLITE_OK ) rc = sqlite3Fts5IndexInit(db); Index: ext/fts5/fts5_storage.c ================================================================== --- ext/fts5/fts5_storage.c +++ ext/fts5/fts5_storage.c @@ -357,19 +357,22 @@ /* ** Tokenization callback used when inserting tokens into the FTS index. */ static int fts5StorageInsertCallback( void *pContext, /* Pointer to Fts5InsertCtx object */ + int tflags, const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ int iEnd /* End offset of token */ ){ Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext; Fts5Index *pIdx = pCtx->pStorage->pIndex; - int iPos = pCtx->szCol++; - return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, iPos, pToken, nToken); + if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){ + pCtx->szCol++; + } + return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, pCtx->szCol-1, pToken, nToken); } /* ** If a row with rowid iDel is present in the %_content table, add the ** delete-markers to the FTS index necessary to delete it. Do not actually @@ -392,10 +395,11 @@ rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel); for(iCol=1; rc==SQLITE_OK && iCol<=pConfig->nCol; iCol++){ if( pConfig->abUnindexed[iCol-1] ) continue; ctx.szCol = 0; rc = sqlite3Fts5Tokenize(pConfig, + FTS5_TOKENIZE_DOCUMENT, (const char*)sqlite3_column_text(pSeek, iCol), sqlite3_column_bytes(pSeek, iCol), (void*)&ctx, fts5StorageInsertCallback ); @@ -449,26 +453,11 @@ ** occurs. */ static int fts5StorageLoadTotals(Fts5Storage *p, int bCache){ int rc = SQLITE_OK; if( p->bTotalsValid==0 ){ - int nCol = p->pConfig->nCol; - Fts5Buffer buf; - memset(&buf, 0, sizeof(buf)); - - memset(p->aTotalSize, 0, sizeof(i64) * nCol); - p->nTotalRow = 0; - rc = sqlite3Fts5IndexGetAverages(p->pIndex, &buf); - if( rc==SQLITE_OK && buf.n ){ - int i = 0; - int iCol; - i += fts5GetVarint(&buf.p[i], (u64*)&p->nTotalRow); - for(iCol=0; iaTotalSize[iCol]); - } - } - sqlite3_free(buf.p); + rc = sqlite3Fts5IndexGetAverages(p->pIndex, &p->nTotalRow, p->aTotalSize); p->bTotalsValid = bCache; } return rc; } @@ -563,10 +552,11 @@ rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel); for(iCol=0; rc==SQLITE_OK && iColnCol; iCol++){ if( pConfig->abUnindexed[iCol] ) continue; ctx.szCol = 0; rc = sqlite3Fts5Tokenize(pConfig, + FTS5_TOKENIZE_DOCUMENT, (const char*)sqlite3_value_text(apVal[iCol]), sqlite3_value_bytes(apVal[iCol]), (void*)&ctx, fts5StorageInsertCallback ); @@ -652,10 +642,11 @@ rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iRowid); for(ctx.iCol=0; rc==SQLITE_OK && ctx.iColnCol; ctx.iCol++){ ctx.szCol = 0; if( pConfig->abUnindexed[ctx.iCol]==0 ){ rc = sqlite3Fts5Tokenize(pConfig, + FTS5_TOKENIZE_DOCUMENT, (const char*)sqlite3_column_text(pScan, ctx.iCol+1), sqlite3_column_bytes(pScan, ctx.iCol+1), (void*)&ctx, fts5StorageInsertCallback ); @@ -769,10 +760,11 @@ } for(ctx.iCol=0; rc==SQLITE_OK && ctx.iColnCol; ctx.iCol++){ ctx.szCol = 0; if( pConfig->abUnindexed[ctx.iCol]==0 ){ rc = sqlite3Fts5Tokenize(pConfig, + FTS5_TOKENIZE_DOCUMENT, (const char*)sqlite3_value_text(apVal[ctx.iCol+2]), sqlite3_value_bytes(apVal[ctx.iCol+2]), (void*)&ctx, fts5StorageInsertCallback ); @@ -836,19 +828,22 @@ /* ** Tokenization callback used by integrity check. */ static int fts5StorageIntegrityCallback( void *pContext, /* Pointer to Fts5InsertCtx object */ + int tflags, const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ int iEnd /* End offset of token */ ){ Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext; - int iPos = pCtx->szCol++; + if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){ + pCtx->szCol++; + } pCtx->cksum ^= sqlite3Fts5IndexCksum( - pCtx->pConfig, pCtx->iRowid, pCtx->iCol, iPos, pToken, nToken + pCtx->pConfig, pCtx->iRowid, pCtx->iCol, pCtx->szCol-1, pToken, nToken ); return SQLITE_OK; } /* @@ -879,23 +874,27 @@ int rc2; while( SQLITE_ROW==sqlite3_step(pScan) ){ int i; ctx.iRowid = sqlite3_column_int64(pScan, 0); ctx.szCol = 0; - rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize); + if( pConfig->bColumnsize ){ + rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize); + } for(i=0; rc==SQLITE_OK && inCol; i++){ if( pConfig->abUnindexed[i] ) continue; ctx.iCol = i; ctx.szCol = 0; - rc = sqlite3Fts5Tokenize( - pConfig, + rc = sqlite3Fts5Tokenize(pConfig, + FTS5_TOKENIZE_DOCUMENT, (const char*)sqlite3_column_text(pScan, i+1), sqlite3_column_bytes(pScan, i+1), (void*)&ctx, fts5StorageIntegrityCallback ); - if( ctx.szCol!=aColSize[i] ) rc = FTS5_CORRUPT; + if( pConfig->bColumnsize && ctx.szCol!=aColSize[i] ){ + rc = FTS5_CORRUPT; + } aTotalSize[i] += ctx.szCol; } if( rc!=SQLITE_OK ) break; } rc2 = sqlite3_reset(pScan); @@ -916,11 +915,11 @@ if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){ i64 nRow; rc = fts5StorageCount(p, "content", &nRow); if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT; } - if( rc==SQLITE_OK ){ + if( rc==SQLITE_OK && pConfig->bColumnsize ){ i64 nRow; rc = fts5StorageCount(p, "docsize", &nRow); if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT; } @@ -1000,13 +999,16 @@ ** ** An SQLite error code is returned if an error occurs, or SQLITE_OK ** otherwise. */ int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol){ - int nCol = p->pConfig->nCol; - sqlite3_stmt *pLookup = 0; - int rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP_DOCSIZE, &pLookup, 0); + int nCol = p->pConfig->nCol; /* Number of user columns in table */ + sqlite3_stmt *pLookup = 0; /* Statement to query %_docsize */ + int rc; /* Return Code */ + + assert( p->pConfig->bColumnsize ); + rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP_DOCSIZE, &pLookup, 0); if( rc==SQLITE_OK ){ int bCorrupt = 1; sqlite3_bind_int64(pLookup, 1, iRowid); if( SQLITE_ROW==sqlite3_step(pLookup) ){ const u8 *aBlob = sqlite3_column_blob(pLookup, 0); Index: ext/fts5/fts5_tcl.c ================================================================== --- ext/fts5/fts5_tcl.c +++ ext/fts5/fts5_tcl.c @@ -139,10 +139,11 @@ Tcl_Obj *pObj; }; static int xTokenizeCb( void *pCtx, + int tflags, const char *zToken, int nToken, int iStart, int iEnd ){ F5tFunction *p = (F5tFunction*)pCtx; Tcl_Obj *pEval = Tcl_DuplicateObj(p->pScript); @@ -582,10 +583,11 @@ const char *zInput; }; static int xTokenizeCb2( void *pCtx, + int tflags, const char *zToken, int nToken, int iStart, int iEnd ){ F5tTokenizeCtx *p = (F5tTokenizeCtx*)pCtx; if( p->bSubst ){ @@ -664,11 +666,13 @@ pRet = Tcl_NewObj(); Tcl_IncrRefCount(pRet); ctx.bSubst = (objc==5); ctx.pRet = pRet; ctx.zInput = zText; - rc = tokenizer.xTokenize(pTok, (void*)&ctx, zText, nText, xTokenizeCb2); + rc = tokenizer.xTokenize( + pTok, (void*)&ctx, FTS5_TOKENIZE_DOCUMENT, zText, nText, xTokenizeCb2 + ); tokenizer.xDelete(pTok); if( rc!=SQLITE_OK ){ Tcl_AppendResult(interp, "error in tokenizer.xTokenize()", 0); Tcl_DecrRefCount(pRet); return TCL_ERROR; @@ -686,19 +690,25 @@ */ typedef struct F5tTokenizerContext F5tTokenizerContext; typedef struct F5tTokenizerCb F5tTokenizerCb; typedef struct F5tTokenizerModule F5tTokenizerModule; -typedef struct F5tTokenizerModule F5tTokenizerInstance; +typedef struct F5tTokenizerInstance F5tTokenizerInstance; struct F5tTokenizerContext { void *pCtx; - int (*xToken)(void*, const char*, int, int, int); + int (*xToken)(void*, int, const char*, int, int, int); }; struct F5tTokenizerModule { Tcl_Interp *interp; + Tcl_Obj *pScript; + F5tTokenizerContext *pContext; +}; + +struct F5tTokenizerInstance { + Tcl_Interp *interp; Tcl_Obj *pScript; F5tTokenizerContext *pContext; }; static int f5tTokenizerCreate( @@ -746,39 +756,66 @@ } static int f5tTokenizerTokenize( Fts5Tokenizer *p, void *pCtx, + int flags, const char *pText, int nText, - int (*xToken)(void*, const char*, int, int, int) + int (*xToken)(void*, int, const char*, int, int, int) ){ F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p; void *pOldCtx; - int (*xOldToken)(void*, const char*, int, int, int); + int (*xOldToken)(void*, int, const char*, int, int, int); Tcl_Obj *pEval; int rc; + const char *zFlags; pOldCtx = pInst->pContext->pCtx; xOldToken = pInst->pContext->xToken; + pInst->pContext->pCtx = pCtx; + pInst->pContext->xToken = xToken; + + assert( + flags==FTS5_TOKENIZE_DOCUMENT + || flags==FTS5_TOKENIZE_AUX + || flags==FTS5_TOKENIZE_QUERY + || flags==(FTS5_TOKENIZE_QUERY | FTS5_TOKENIZE_PREFIX) + ); pEval = Tcl_DuplicateObj(pInst->pScript); Tcl_IncrRefCount(pEval); - rc = Tcl_ListObjAppendElement( - pInst->interp, pEval, Tcl_NewStringObj(pText, nText) - ); - if( rc==TCL_OK ){ - rc = Tcl_EvalObjEx(pInst->interp, pEval, TCL_GLOBAL_ONLY); + switch( flags ){ + case FTS5_TOKENIZE_DOCUMENT: + zFlags = "document"; + break; + case FTS5_TOKENIZE_AUX: + zFlags = "aux"; + break; + case FTS5_TOKENIZE_QUERY: + zFlags = "query"; + break; + case (FTS5_TOKENIZE_PREFIX | FTS5_TOKENIZE_QUERY): + zFlags = "prefixquery"; + break; + default: + assert( 0 ); + zFlags = "invalid"; + break; } + + Tcl_ListObjAppendElement(pInst->interp, pEval, Tcl_NewStringObj(zFlags, -1)); + Tcl_ListObjAppendElement(pInst->interp, pEval, Tcl_NewStringObj(pText,nText)); + rc = Tcl_EvalObjEx(pInst->interp, pEval, TCL_GLOBAL_ONLY); Tcl_DecrRefCount(pEval); pInst->pContext->pCtx = pOldCtx; pInst->pContext->xToken = xOldToken; return rc; } /* -** sqlite3_fts5_token TEXT START END POS +** sqlite3_fts5_token ?-colocated? TEXT START END */ static int f5tTokenizerReturn( void * clientData, Tcl_Interp *interp, int objc, @@ -786,35 +823,47 @@ ){ F5tTokenizerContext *p = (F5tTokenizerContext*)clientData; int iStart; int iEnd; int nToken; + int tflags = 0; char *zToken; int rc; - assert( p ); - if( objc!=4 ){ - Tcl_WrongNumArgs(interp, 1, objv, "TEXT START END"); + if( objc==5 ){ + int nArg; + char *zArg = Tcl_GetStringFromObj(objv[1], &nArg); + if( nArg<=10 && nArg>=2 && memcmp("-colocated", zArg, nArg)==0 ){ + tflags |= FTS5_TOKEN_COLOCATED; + }else{ + goto usage; + } + }else if( objc!=4 ){ + goto usage; + } + + zToken = Tcl_GetStringFromObj(objv[objc-3], &nToken); + if( Tcl_GetIntFromObj(interp, objv[objc-2], &iStart) + || Tcl_GetIntFromObj(interp, objv[objc-1], &iEnd) + ){ return TCL_ERROR; } + if( p->xToken==0 ){ Tcl_AppendResult(interp, "sqlite3_fts5_token may only be used by tokenizer callback", 0 ); return TCL_ERROR; } - zToken = Tcl_GetStringFromObj(objv[1], &nToken); - if( Tcl_GetIntFromObj(interp, objv[2], &iStart) - || Tcl_GetIntFromObj(interp, objv[3], &iEnd) - ){ - return TCL_ERROR; - } - - rc = p->xToken(p->pCtx, zToken, nToken, iStart, iEnd); + rc = p->xToken(p->pCtx, tflags, zToken, nToken, iStart, iEnd); Tcl_SetResult(interp, (char*)sqlite3ErrName(rc), TCL_VOLATILE); return TCL_OK; + + usage: + Tcl_WrongNumArgs(interp, 1, objv, "?-colocated? TEXT START END"); + return TCL_ERROR; } static void f5tDelTokenizer(void *pCtx){ F5tTokenizerModule *pMod = (F5tTokenizerModule*)pCtx; Tcl_DecrRefCount(pMod->pScript); Index: ext/fts5/fts5_test_mi.c ================================================================== --- ext/fts5/fts5_test_mi.c +++ ext/fts5/fts5_test_mi.c @@ -350,11 +350,11 @@ int nVal, /* Number of values in apVal[] array */ sqlite3_value **apVal /* Array of trailing arguments */ ){ const char *zArg; Fts5MatchinfoCtx *p; - int rc; + int rc = SQLITE_OK; if( nVal>0 ){ zArg = (const char*)sqlite3_value_text(apVal[0]); }else{ zArg = "pcx"; @@ -361,15 +361,20 @@ } p = (Fts5MatchinfoCtx*)pApi->xGetAuxdata(pFts, 0); if( p==0 || sqlite3_stricmp(zArg, p->zArg) ){ p = fts5MatchinfoNew(pApi, pFts, pCtx, zArg); - pApi->xSetAuxdata(pFts, p, sqlite3_free); - if( p==0 ) return; + if( p==0 ){ + rc = SQLITE_NOMEM; + }else{ + rc = pApi->xSetAuxdata(pFts, p, sqlite3_free); + } } - rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoLocalCb); + if( rc==SQLITE_OK ){ + rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoLocalCb); + } if( rc!=SQLITE_OK ){ sqlite3_result_error_code(pCtx, rc); }else{ /* No errors has occured, so return a copy of the array of integers. */ int nByte = p->nRet * sizeof(u32); Index: ext/fts5/fts5_tokenize.c ================================================================== --- ext/fts5/fts5_tokenize.c +++ ext/fts5/fts5_tokenize.c @@ -114,12 +114,13 @@ ** Tokenize some text using the ascii tokenizer. */ static int fts5AsciiTokenize( Fts5Tokenizer *pTokenizer, void *pCtx, + int flags, const char *pText, int nText, - int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd) + int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd) ){ AsciiTokenizer *p = (AsciiTokenizer*)pTokenizer; int rc = SQLITE_OK; int ie; int is = 0; @@ -156,11 +157,11 @@ nFold = nByte*2; } asciiFold(pFold, &pText[is], nByte); /* Invoke the token callback */ - rc = xToken(pCtx, pFold, nByte, is, ie); + rc = xToken(pCtx, 0, pFold, nByte, is, ie); is = ie+1; } if( pFold!=aFold ) sqlite3_free(pFold); if( rc==SQLITE_DONE ) rc = SQLITE_OK; @@ -383,12 +384,13 @@ } static int fts5UnicodeTokenize( Fts5Tokenizer *pTokenizer, void *pCtx, + int flags, const char *pText, int nText, - int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd) + int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd) ){ Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTokenizer; int rc = SQLITE_OK; unsigned char *a = p->aTokenChar; @@ -473,11 +475,11 @@ } ie = zCsr - (unsigned char*)pText; } /* Invoke the token callback */ - rc = xToken(pCtx, aFold, zOut-aFold, is, ie); + rc = xToken(pCtx, 0, aFold, zOut-aFold, is, ie); } tokenize_done: if( rc==SQLITE_DONE ) rc = SQLITE_OK; return rc; @@ -551,11 +553,11 @@ } typedef struct PorterContext PorterContext; struct PorterContext { void *pCtx; - int (*xToken)(void*, const char*, int, int, int); + int (*xToken)(void*, int, const char*, int, int, int); char *aBuf; }; typedef struct PorterRule PorterRule; struct PorterRule { @@ -1116,10 +1118,11 @@ } } static int fts5PorterCb( void *pCtx, + int tflags, const char *pToken, int nToken, int iStart, int iEnd ){ @@ -1173,32 +1176,33 @@ && aBuf[nBuf-2]=='l' && fts5Porter_MGt1(aBuf, nBuf-1) ){ nBuf--; } - return p->xToken(p->pCtx, aBuf, nBuf, iStart, iEnd); + return p->xToken(p->pCtx, tflags, aBuf, nBuf, iStart, iEnd); pass_through: - return p->xToken(p->pCtx, pToken, nToken, iStart, iEnd); + return p->xToken(p->pCtx, tflags, pToken, nToken, iStart, iEnd); } /* ** Tokenize using the porter tokenizer. */ static int fts5PorterTokenize( Fts5Tokenizer *pTokenizer, void *pCtx, + int flags, const char *pText, int nText, - int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd) + int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd) ){ PorterTokenizer *p = (PorterTokenizer*)pTokenizer; PorterContext sCtx; sCtx.xToken = xToken; sCtx.pCtx = pCtx; sCtx.aBuf = p->aBuf; return p->tokenizer.xTokenize( - p->pTokenizer, (void*)&sCtx, pText, nText, fts5PorterCb + p->pTokenizer, (void*)&sCtx, flags, pText, nText, fts5PorterCb ); } /* ** Register all built-in tokenizers with FTS5. @@ -1223,9 +1227,9 @@ &aBuiltin[i].x, 0 ); } - return SQLITE_OK; + return rc; } Index: ext/fts5/test/fts5_common.tcl ================================================================== --- ext/fts5/test/fts5_common.tcl +++ ext/fts5/test/fts5_common.tcl @@ -292,6 +292,39 @@ } proc NOT {a b} { if {[llength $b]>0} { return [list] } return $a } + +#------------------------------------------------------------------------- +# This command is similar to [split], except that it also provides the +# start and end offsets of each token. For example: +# +# [fts5_tokenize_split "abc d ef"] -> {abc 0 3 d 4 5 ef 6 8} +# + +proc gobble_whitespace {textvar} { + upvar $textvar t + regexp {([ ]*)(.*)} $t -> space t + return [string length $space] +} + +proc gobble_text {textvar wordvar} { + upvar $textvar t + upvar $wordvar w + regexp {([^ ]*)(.*)} $t -> w t + return [string length $w] +} + +proc fts5_tokenize_split {text} { + set token "" + set ret [list] + set iOff [gobble_whitespace text] + while {[set nToken [gobble_text text word]]} { + lappend ret $word $iOff [expr $iOff+$nToken] + incr iOff $nToken + incr iOff [gobble_whitespace text] + } + + set ret +} Index: ext/fts5/test/fts5aa.test ================================================================== --- ext/fts5/test/fts5aa.test +++ ext/fts5/test/fts5aa.test @@ -49,11 +49,11 @@ INSERT INTO t1 VALUES('a b c', 'd e f'); } do_test 2.2 { execsql { SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 } -} {/{\(structure\) {lvl=0 nMerge=0 nSeg=1 {id=[0123456789]* h=0 leaves=1..1}}}/} +} {/{{structure} {lvl=0 nMerge=0 nSeg=1 {id=[0123456789]* leaves=1..1}}}/} foreach w {a b c d e f} { do_execsql_test 2.3.$w.asc { SELECT rowid FROM t1 WHERE t1 MATCH $w; } {1} @@ -137,11 +137,10 @@ if {[set_test_counter errors]} break } #------------------------------------------------------------------------- # -breakpoint reset_db do_execsql_test 6.0 { CREATE VIRTUAL TABLE t1 USING fts5(x,y); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } @@ -199,10 +198,11 @@ set rowid [expr int(rand() * 100)] execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) } } execsql { INSERT INTO t1(t1) VALUES('integrity-check'); } } {} + if {[set_test_counter errors]} break } #------------------------------------------------------------------------- # reset_db @@ -341,11 +341,11 @@ do_execsql_test 13.5 { SELECT rowid FROM t1 WHERE t1 MATCH 'o'; } {1} do_execsql_test 13.6 { - SELECT rowid FROM t1 WHERE t1 MATCH '.'; + SELECT rowid FROM t1 WHERE t1 MATCH '""'; } {} #------------------------------------------------------------------------- # reset_db @@ -503,9 +503,39 @@ SELECT t1.rowid, t2.rowid FROM t1, t2 WHERE t2 MATCH t1.a AND t1.rowid = t2.c } {1 1} do_execsql_test 18.3 { SELECT t1.rowid, t2.rowid FROM t2, t1 WHERE t2 MATCH t1.a AND t1.rowid = t2.c } {1 1} + +#-------------------------------------------------------------------- +# fts5 table in the temp schema. +# +reset_db +do_execsql_test 19.0 { + CREATE VIRTUAL TABLE temp.t1 USING fts5(x); + INSERT INTO t1 VALUES('x y z'); + INSERT INTO t1 VALUES('w x 1'); + SELECT rowid FROM t1 WHERE t1 MATCH 'x'; +} {1 2} + +#-------------------------------------------------------------------- +# Test that 6 and 7 byte varints can be read. +# +reset_db +do_execsql_test 20.0 { + CREATE VIRTUAL TABLE temp.tmp USING fts5(x); +} +set ::ids [list \ + 0 [expr 1<<36] [expr 2<<36] [expr 1<<43] [expr 2<<43] +] +do_test 20.1 { + foreach id $::ids { + execsql { INSERT INTO tmp(rowid, x) VALUES($id, 'x y z') } + } + execsql { SELECT rowid FROM tmp WHERE tmp MATCH 'y' } +} $::ids + + finish_test Index: ext/fts5/test/fts5ad.test ================================================================== --- ext/fts5/test/fts5ad.test +++ ext/fts5/test/fts5ad.test @@ -203,10 +203,13 @@ if {$bMatch} { lappend ret $rowid } } return $ret } + do_execsql_test $T.integrity { + INSERT INTO t1(t1) VALUES('integrity-check'); + } foreach {bAsc sql} { 1 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix} 0 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix ORDER BY rowid DESC} } { Index: ext/fts5/test/fts5ah.test ================================================================== --- ext/fts5/test/fts5ah.test +++ ext/fts5/test/fts5ah.test @@ -88,17 +88,17 @@ 4 { SELECT rowid FROM t1 WHERE t1 MATCH 'y AND x' } [list $Y] " { do_test 1.6.$tn.1 { set n [execsql_reads $q] - puts -nonewline "(n=$n nReadX=$nReadX)" + #puts -nonewline "(n=$n nReadX=$nReadX)" expr {$n < ($nReadX / 8)} } {1} do_test 1.6.$tn.2 { set n [execsql_reads "$q ORDER BY rowid DESC"] - puts -nonewline "(n=$n nReadX=$nReadX)" + #puts -nonewline "(n=$n nReadX=$nReadX)" expr {$n < ($nReadX / 8)} } {1} do_execsql_test 1.6.$tn.3 $q [lsort -int -incr $res] do_execsql_test 1.6.$tn.4 "$q ORDER BY rowid DESC" [lsort -int -decr $res] Index: ext/fts5/test/fts5al.test ================================================================== --- ext/fts5/test/fts5al.test +++ ext/fts5/test/fts5al.test @@ -24,21 +24,21 @@ } do_execsql_test 1.1 { CREATE VIRTUAL TABLE ft1 USING fts5(x); SELECT * FROM ft1_config; -} {version 3} +} {version 4} do_execsql_test 1.2 { INSERT INTO ft1(ft1, rank) VALUES('pgsz', 32); SELECT * FROM ft1_config; -} {pgsz 32 version 3} +} {pgsz 32 version 4} do_execsql_test 1.3 { INSERT INTO ft1(ft1, rank) VALUES('pgsz', 64); SELECT * FROM ft1_config; -} {pgsz 64 version 3} +} {pgsz 64 version 4} #-------------------------------------------------------------------------- # Test the logic for parsing the rank() function definition. # foreach {tn defn} { Index: ext/fts5/test/fts5columnsize.test ================================================================== --- ext/fts5/test/fts5columnsize.test +++ ext/fts5/test/fts5columnsize.test @@ -132,7 +132,20 @@ SELECT rowid, fts5_test_columnsize(t4) FROM t4 WHERE t4 MATCH 'a' } { 1 {-1 0 -1} 2 {-1 0 -1} } +#------------------------------------------------------------------------- +# Test the integrity-check +# +do_execsql_test 4.1.1 { + CREATE VIRTUAL TABLE t5 USING fts5(x, columnsize=0); + INSERT INTO t5 VALUES('1 2 3 4'); + INSERT INTO t5 VALUES('2 4 6 8'); +} + +breakpoint +do_execsql_test 4.1.2 { + INSERT INTO t5(t5) VALUES('integrity-check'); +} finish_test Index: ext/fts5/test/fts5corrupt.test ================================================================== --- ext/fts5/test/fts5corrupt.test +++ ext/fts5/test/fts5corrupt.test @@ -41,20 +41,20 @@ do_execsql_test 1.2 { INSERT INTO t1(t1) VALUES('integrity-check') } set segid [lindex [fts5_level_segids t1] 0] do_test 1.3 { execsql { - DELETE FROM t1_data WHERE rowid = fts5_rowid('segment', $segid, 0, 4); + DELETE FROM t1_data WHERE rowid = fts5_rowid('segment', $segid, 4); } catchsql { INSERT INTO t1(t1) VALUES('integrity-check') } } {1 {database disk image is malformed}} do_test 1.4 { db_restore_and_reopen execsql { UPDATE t1_data set block = X'00000000' || substr(block, 5) WHERE - rowid = fts5_rowid('segment', $segid, 0, 4); + rowid = fts5_rowid('segment', $segid, 4); } catchsql { INSERT INTO t1(t1) VALUES('integrity-check') } } {1 {database disk image is malformed}} db_restore_and_reopen Index: ext/fts5/test/fts5corrupt2.test ================================================================== --- ext/fts5/test/fts5corrupt2.test +++ ext/fts5/test/fts5corrupt2.test @@ -207,17 +207,17 @@ } {1} execsql ROLLBACK } - do_test 4.$tn.x { expr $nCorrupt>0 } 1 + # do_test 4.$tn.x { expr $nCorrupt>0 } 1 } } set doc [string repeat "A B C " 1000] -do_execsql_test 4.0 { +do_execsql_test 5.0 { CREATE VIRTUAL TABLE x5 USING fts5(tt); INSERT INTO x5(x5, rank) VALUES('pgsz', 32); WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<10) INSERT INTO x5 SELECT $doc FROM ii; } @@ -228,11 +228,11 @@ set tn2 0 set nCorrupt 0 foreach rowid [db eval {SELECT rowid FROM x5_data WHERE rowid>10}] { if {$rowid & $mask} continue incr tn2 - do_test 4.$tn.$tn2 { + do_test 5.$tn.$tn2 { execsql BEGIN set fd [db incrblob main x5_data block $rowid] fconfigure $fd -encoding binary -translation binary puts -nonewline $fd $hdr @@ -246,11 +246,11 @@ } } #-------------------------------------------------------------------- reset_db -do_execsql_test 5.1 { +do_execsql_test 6.1 { CREATE VIRTUAL TABLE x5 USING fts5(tt); INSERT INTO x5 VALUES('a'); INSERT INTO x5 VALUES('a a'); INSERT INTO x5 VALUES('a a a'); INSERT INTO x5 VALUES('a a a a'); @@ -260,13 +260,13 @@ proc colsize {cmd i} { $cmd xColumnSize $i } sqlite3_fts5_create_function db colsize colsize -do_catchsql_test 5.2 { +do_catchsql_test 6.2 { SELECT colsize(x5, 0) FROM x5 WHERE x5 MATCH 'a' } {1 SQLITE_CORRUPT_VTAB} sqlite3_fts5_may_be_corrupt 0 finish_test Index: ext/fts5/test/fts5corrupt3.test ================================================================== --- ext/fts5/test/fts5corrupt3.test +++ ext/fts5/test/fts5corrupt3.test @@ -21,23 +21,28 @@ ifcapable !fts5 { finish_test return } sqlite3_fts5_may_be_corrupt 1 + +proc create_t1 {} { + expr srand(0) + db func rnddoc fts5_rnddoc + db eval { + CREATE VIRTUAL TABLE t1 USING fts5(x); + INSERT INTO t1(t1, rank) VALUES('pgsz', 64); + WITH ii(i) AS (SELECT 1 UNION SELECT i+1 FROM ii WHERE i<100) + INSERT INTO t1 SELECT rnddoc(10) FROM ii; + } +} + +if 1 { # Create a simple FTS5 table containing 100 documents. Each document # contains 10 terms, each of which start with the character "x". # -expr srand(0) -db func rnddoc fts5_rnddoc -do_execsql_test 1.0 { - CREATE VIRTUAL TABLE t1 USING fts5(x); - INSERT INTO t1(t1, rank) VALUES('pgsz', 64); - WITH ii(i) AS (SELECT 1 UNION SELECT i+1 FROM ii WHERE i<100) - INSERT INTO t1 SELECT rnddoc(10) FROM ii; -} -set mask [expr 31 << 31] +do_test 1.0 { create_t1 } {} do_test 1.1 { # Pick out the rowid of the right-most b-tree leaf in the new segment. set rowid [db one { SELECT max(rowid) FROM t1_data WHERE ((rowid>>31) & 0x0F)==1 @@ -73,8 +78,263 @@ do_execsql_test 2.2 { INSERT INTO t2 VALUES(rnddoc(10)); SELECT length(block) FROM t2_data WHERE id=1; } {2} + +#------------------------------------------------------------------------- +# Test that missing leaf pages are recognized as corruption. +# +reset_db +do_test 3.0 { create_t1 } {} + +do_execsql_test 3.1 { + SELECT count(*) FROM t1_data; +} {105} + +proc do_3_test {tn} { + set i 0 + foreach ::rowid [db eval "SELECT rowid FROM t1_data WHERE rowid>100"] { + incr i + do_test $tn.$i { + db eval BEGIN + db eval {DELETE FROM t1_data WHERE rowid = $::rowid} + list [ + catch { db eval {SELECT rowid FROM t1 WHERE t1 MATCH 'x*'} } msg + ] $msg + } {1 {database disk image is malformed}} + catch { db eval ROLLBACK } + } +} + +do_3_test 3.2 + +do_execsql_test 3.3 { + INSERT INTO t1(t1, rank) VALUES('pgsz', 32); + INSERT INTO t1 SELECT x FROM t1; + INSERT INTO t1(t1) VALUES('optimize'); +} {} + +do_3_test 3.4 + +do_test 3.5 { + execsql { + DELETE FROM t1; + INSERT INTO t1(t1, rank) VALUES('pgsz', 40); + } + for {set i 0} {$i < 1000} {incr i} { + set rnd [expr int(rand() * 1000)] + set doc [string repeat "x$rnd " [expr int(rand() * 3) + 1]] + execsql { INSERT INTO t1(rowid, x) VALUES($i, $doc) } + } +} {} + +do_3_test 3.6 + +do_test 3.7 { + execsql { + INSERT INTO t1(t1, rank) VALUES('pgsz', 40); + INSERT INTO t1 SELECT x FROM t1; + INSERT INTO t1(t1) VALUES('optimize'); + } +} {} + +do_3_test 3.8 + +do_test 3.9 { + execsql { + DELETE FROM t1; + INSERT INTO t1(t1, rank) VALUES('pgsz', 32); + } + for {set i 0} {$i < 100} {incr i} { + set rnd [expr int(rand() * 100)] + set doc "x[string repeat $rnd 20]" + execsql { INSERT INTO t1(rowid, x) VALUES($i, $doc) } + } +} {} + +do_3_test 3.10 + +#------------------------------------------------------------------------- +# Test that segments that end unexpectedly are identified as corruption. +# +reset_db +do_test 4.0 { + execsql { + CREATE VIRTUAL TABLE t1 USING fts5(x); + INSERT INTO t1(t1, rank) VALUES('pgsz', 32); + } + for {set i 0} {$i < 100} {incr i} { + set rnd [expr int(rand() * 100)] + set doc "x[string repeat $rnd 20]" + execsql { INSERT INTO t1(rowid, x) VALUES($i, $doc) } + } + execsql { INSERT INTO t1(t1) VALUES('optimize') } +} {} + +set nErr 0 +for {set i 1} {1} {incr i} { + set struct [db one {SELECT block FROM t1_data WHERE id=10}] + binary scan $struct c* var + set end [lindex $var end] + if {$end<=$i} break + lset var end [expr $end - $i] + set struct [binary format c* $var] + db eval { + BEGIN; + UPDATE t1_data SET block = $struct WHERE id=10; + } + do_test 4.1.$i { + incr nErr [catch { db eval { SELECT rowid FROM t1 WHERE t1 MATCH 'x*' } }] + set {} {} + } {} + catch { db eval ROLLBACK } +} +do_test 4.1.x { expr $nErr>45 } 1 + +#------------------------------------------------------------------------- +# + +# The first argument passed to this command must be a binary blob +# containing an FTS5 leaf page. This command returns a copy of this +# blob, with the pgidx of the leaf page replaced by a single varint +# containing value $iVal. +# +proc rewrite_pgidx {blob iVal} { + binary scan $blob SS off1 szLeaf + if {$iVal<0 || $iVal>=128} { + error "$iVal out of range!" + } else { + set pgidx [binary format c $iVal] + } + + binary format a${szLeaf}a* $blob $pgidx +} + +reset_db +do_execsql_test 5.1 { + CREATE VIRTUAL TABLE x1 USING fts5(x); + INSERT INTO x1(x1, rank) VALUES('pgsz', 40); + BEGIN; + INSERT INTO x1 VALUES('xaaa xabb xccc xcdd xeee xeff xggg xghh xiii xijj'); + INSERT INTO x1 SELECT x FROM x1; + INSERT INTO x1 SELECT x FROM x1; + INSERT INTO x1 SELECT x FROM x1; + INSERT INTO x1 SELECT x FROM x1; + INSERT INTO x1(x1) VALUES('optimize'); + COMMIT; +} + +#db eval { SELECT fts5_decode(id, block) b from x1_data } { puts $b } +# +db func rewrite_pgidx rewrite_pgidx +set i 0 +foreach rowid [db eval {SELECT rowid FROM x1_data WHERE rowid>100}] { + foreach val {2 100} { + do_test 5.2.$val.[incr i] { + catchsql { + BEGIN; + UPDATE x1_data SET block=rewrite_pgidx(block, $val) WHERE id=$rowid; + SELECT rowid FROM x1 WHERE x1 MATCH 'xa*'; + SELECT rowid FROM x1 WHERE x1 MATCH 'xb*'; + SELECT rowid FROM x1 WHERE x1 MATCH 'xc*'; + SELECT rowid FROM x1 WHERE x1 MATCH 'xd*'; + SELECT rowid FROM x1 WHERE x1 MATCH 'xe*'; + SELECT rowid FROM x1 WHERE x1 MATCH 'xf*'; + SELECT rowid FROM x1 WHERE x1 MATCH 'xg*'; + SELECT rowid FROM x1 WHERE x1 MATCH 'xh*'; + SELECT rowid FROM x1 WHERE x1 MATCH 'xi*'; + } + set {} {} + } {} + catch { db eval ROLLBACK } + } +} + +} + +#------------------------------------------------------------------------ +# +reset_db +do_execsql_test 6.1.0 { + CREATE VIRTUAL TABLE t1 USING fts5(a); + INSERT INTO t1 VALUES('bbbbb ccccc'); + SELECT quote(block) FROM t1_data WHERE rowid>100; +} {X'000000180630626262626201020201056363636363010203040A'} +do_execsql_test 6.1.1 { + UPDATE t1_data SET block = + X'000000180630626262626201020201056161616161010203040A' + WHERE rowid>100; +} +do_catchsql_test 6.1.2 { + INSERT INTO t1(t1) VALUES('integrity-check'); +} {1 {database disk image is malformed}} + +#------- +reset_db +do_execsql_test 6.2.0 { + CREATE VIRTUAL TABLE t1 USING fts5(a); + INSERT INTO t1(t1, rank) VALUES('pgsz', 32); + INSERT INTO t1 VALUES('aa bb cc dd ee'); + SELECT pgno, quote(term) FROM t1_idx; +} {2 X'' 4 X'3064'} +do_execsql_test 6.2.1 { + UPDATE t1_idx SET term = X'3065' WHERE pgno=4; +} +do_catchsql_test 6.2.2 { + INSERT INTO t1(t1) VALUES('integrity-check'); +} {1 {database disk image is malformed}} + +#------- +reset_db +do_execsql_test 6.3.0 { + CREATE VIRTUAL TABLE t1 USING fts5(a); + INSERT INTO t1 VALUES('abc abcdef abcdefghi'); + SELECT quote(block) FROM t1_data WHERE id>100; +} {X'0000001C043061626301020204036465660102030703676869010204040808'} +do_execsql_test 6.3.1 { + BEGIN; + UPDATE t1_data SET block = + X'0000001C043061626301020204036465660102035003676869010204040808' + ------------------------------------------^^--------------------- + WHERE id>100; +} +do_catchsql_test 6.3.2 { + INSERT INTO t1(t1) VALUES('integrity-check'); +} {1 {database disk image is malformed}} +do_execsql_test 6.3.3 { + ROLLBACK; + BEGIN; + UPDATE t1_data SET block = + X'0000001C043061626301020204036465660102030750676869010204040808' + --------------------------------------------^^------------------- + WHERE id>100; +} +do_catchsql_test 6.3.3 { + INSERT INTO t1(t1) VALUES('integrity-check'); +} {1 {database disk image is malformed}} +do_execsql_test 6.3.4 { + ROLLBACK; + BEGIN; + UPDATE t1_data SET block = + X'0000001C043061626301020204036465660102030707676869010204040850' + --------------------------------------------------------------^^- + WHERE id>100; +} +do_catchsql_test 6.3.5 { + INSERT INTO t1(t1) VALUES('integrity-check'); +} {1 {database disk image is malformed}} +do_execsql_test 6.3.6 { + ROLLBACK; + BEGIN; + UPDATE t1_data SET block = + X'0000001C503061626301020204036465660102030707676869010204040808' + ----------^^----------------------------------------------------- + WHERE id>100; +} +do_catchsql_test 6.3.5 { + INSERT INTO t1(t1) VALUES('integrity-check'); +} {1 {database disk image is malformed}} + sqlite3_fts5_may_be_corrupt 0 finish_test Index: ext/fts5/test/fts5dlidx.test ================================================================== --- ext/fts5/test/fts5dlidx.test +++ ext/fts5/test/fts5dlidx.test @@ -23,10 +23,12 @@ if { $tcl_platform(wordSize)<8 } { finish_test return } + +if 1 { proc do_fb_test {tn sql res} { set res2 [lsort -integer -decr $res] uplevel [list do_execsql_test $tn.1 $sql $res] uplevel [list do_execsql_test $tn.2 "$sql ORDER BY rowid DESC" $res2] @@ -125,8 +127,71 @@ SELECT rowid FROM t1 WHERE t1 MATCH 'b AND a' ORDER BY rowid DESC } {1} } do_dlidx_test2 2.1 [expr 20] [expr 1<<57] [expr (1<<57) + 128] + +} + +#-------------------------------------------------------------------- +# +reset_db + +set ::vocab [list \ + IteratorpItercurrentlypointstothefirstrowidofadoclist \ + Thereisadoclistindexassociatedwiththefinaltermonthecurrent \ + pageIfthecurrenttermisthelasttermonthepageloadthe \ + doclistindexfromdiskandinitializeaniteratoratpIterpDlidx \ + IteratorpItercurrentlypointstothefirstrowidofadoclist \ + Thereisadoclistindexassociatedwiththefinaltermonthecurrent \ + pageIfthecurrenttermisthelasttermonthepageloadthe \ + doclistindexfromdiskandinitializeaniteratoratpIterpDlidx \ +] +proc rnddoc {} { + global vocab + set nVocab [llength $vocab] + set ret [list] + for {set i 0} {$i < 64} {incr i} { + lappend ret [lindex $vocab [expr $i % $nVocab]] + } + set ret +} +db func rnddoc rnddoc + +do_execsql_test 3.1 { + CREATE VIRTUAL TABLE abc USING fts5(a); + INSERT INTO abc(abc, rank) VALUES('pgsz', 32); + + INSERT INTO abc VALUES ( rnddoc() ); + INSERT INTO abc VALUES ( rnddoc() ); + INSERT INTO abc VALUES ( rnddoc() ); + INSERT INTO abc VALUES ( rnddoc() ); + + INSERT INTO abc SELECT rnddoc() FROM abc; + INSERT INTO abc SELECT rnddoc() FROM abc; +} + + + +do_execsql_test 3.2 { + SELECT rowid FROM abc WHERE abc + MATCH 'IteratorpItercurrentlypointstothefirstrowidofadoclist' + ORDER BY rowid DESC; +} {16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1} + +do_execsql_test 3.2 { + INSERT INTO abc(abc) VALUES('integrity-check'); + INSERT INTO abc(abc) VALUES('optimize'); + INSERT INTO abc(abc) VALUES('integrity-check'); +} + +set v [lindex $vocab 0] +set i 0 +foreach v $vocab { + do_execsql_test 3.3.[incr i] { + SELECT rowid FROM abc WHERE abc MATCH $v + } {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16} +} + finish_test Index: ext/fts5/test/fts5ea.test ================================================================== --- ext/fts5/test/fts5ea.test +++ ext/fts5/test/fts5ea.test @@ -85,9 +85,15 @@ # do_execsql_test 4.0 { SELECT fts5_expr('a AND """"', 'x', 'tokenize="unicode61 tokenchars ''""''"'); } {{"a" AND """"}} +#------------------------------------------------------------------------- +# Experiment with a tokenizer that considers " to be a token character. +# +do_catchsql_test 5.0 { + SELECT fts5_expr('abc | def'); +} {1 {fts5: syntax error near "|"}} finish_test Index: ext/fts5/test/fts5eb.test ================================================================== --- ext/fts5/test/fts5eb.test +++ ext/fts5/test/fts5eb.test @@ -28,22 +28,22 @@ set ::se_expr $expr do_execsql_test $tn {SELECT fts5_expr($se_expr)} [list $res] } foreach {tn expr res} { - 1 {abc} {"abc"} - 2 {abc .} {"abc"} - 3 {.} {} - 4 {abc OR .} {"abc"} - 5 {abc NOT .} {"abc"} - 6 {abc AND .} {"abc"} - 7 {. OR abc} {"abc"} - 8 {. NOT abc} {"abc"} - 9 {. AND abc} {"abc"} - 10 {abc + . + def} {"abc" + "def"} - 11 {abc . def} {"abc" AND "def"} - 12 {r+e OR w} {"r" + "e" OR "w"} + 1 {abc} {"abc"} + 2 {abc ""} {"abc"} + 3 {""} {} + 4 {abc OR ""} {"abc"} + 5 {abc NOT ""} {"abc"} + 6 {abc AND ""} {"abc"} + 7 {"" OR abc} {"abc"} + 8 {"" NOT abc} {"abc"} + 9 {"" AND abc} {"abc"} + 10 {abc + "" + def} {"abc" + "def"} + 11 {abc "" def} {"abc" AND "def"} + 12 {r+e OR w} {"r" + "e" OR "w"} } { do_execsql_test 1.$tn {SELECT fts5_expr($expr)} [list $res] } do_catchsql_test 2.1 { Index: ext/fts5/test/fts5fault6.test ================================================================== --- ext/fts5/test/fts5fault6.test +++ ext/fts5/test/fts5fault6.test @@ -19,10 +19,11 @@ # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } + #------------------------------------------------------------------------- # OOM while rebuilding an FTS5 table. # do_execsql_test 1.0 { @@ -146,7 +147,151 @@ } } -test { faultsim_test_result {0 {}} } +#------------------------------------------------------------------------- +# +# 5.2.* OOM while running a query that includes synonyms and matchinfo(). +# +# 5.3.* OOM while running a query that returns a row containing instances +# of more than 4 synonyms for a single term. +# +proc mit {blob} { + set scan(littleEndian) i* + set scan(bigEndian) I* + binary scan $blob $scan($::tcl_platform(byteOrder)) r + return $r +} +proc tcl_tokenize {tflags text} { + foreach {w iStart iEnd} [fts5_tokenize_split $text] { + sqlite3_fts5_token $w $iStart $iEnd + if {$tflags=="query" && [string length $w]==1} { + for {set i 2} {$i < 7} {incr i} { + sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd + } + } + } +} +proc tcl_create {args} { return "tcl_tokenize" } +reset_db +sqlite3_fts5_create_tokenizer db tcl tcl_create +db func mit mit +sqlite3_fts5_register_matchinfo db +do_test 5.0 { + execsql { CREATE VIRTUAL TABLE t1 USING fts5(a, tokenize=tcl) } + execsql { INSERT INTO t1(t1, rank) VALUES('pgsz', 32) } + foreach {rowid text} { + 1 {aaaa cc b aaaaa cc aa} + 2 {aa aa bb a bbb} + 3 {bb aaaaa aaaaa b aaaa aaaaa} + 4 {aa a b aaaa aa} + 5 {aa b ccc aaaaa cc} + 6 {aa aaaaa bbbb cc aaa} + 7 {aaaaa aa aa ccccc bb} + 8 {ccc bbbbb ccccc bbb c} + 9 {cccccc bbbb a aaa cccc c} + + 20 {ddd f ddd eeeee fff ffff eeee ddd fff eeeee dddddd eeee} + 21 {fffff eee dddd fffff dd ee ee eeeee eee eeeeee ee dd e} + 22 {fffff d eeee dddd fffff dddddd ffff ddddd eeeee ee eee dddd ddddd} + 23 {ddddd fff ddd eeeee ffff eeee ddd ff ff ffffff eeeeee dddd ffffff} + 24 {eee dd ee dddd dddd eeeeee e eee fff ffff} + 25 {ddddd ffffff dddddd fff ddd ddddd ddd f eeee fff dddd f} + 26 {f ffff fff fff eeeeee dddd d dddddd ddddd eee ff eeeee} + 27 {eee fff dddddd eeeee eeeee dddd ddddd ffff f eeeee eee dddddd ddddd d} + 28 {dd ddddd d ddd d fff d dddd ee dddd ee ddd dddddd dddddd} + 29 {eeee dddd ee dddd eeee dddd dd fffff f ddd eeeee ddd ee} + 30 {ff ffffff eeeeee eeeee eee ffffff ff ffff f fffff eeeee} + 31 {fffff eeeeee dddd eeee eeee eeeeee eee fffff d ddddd ffffff ffff dddddd} + 32 {dddddd fffff ee eeeeee eeee ee fff dddd fff eeee ffffff eeeeee ffffff} + 33 {ddddd eeee dd ffff dddddd fff eeee ddddd ffff eeee ddd} + 34 {ee dddd ddddd dddddd eeee eeeeee f dd ee dddddd ffffff} + 35 {ee dddd dd eeeeee ddddd eee d eeeeee dddddd eee dddd fffff} + 36 {eee ffffff ffffff e fffff eeeee ff dddddd dddddd fff} + 37 {eeeee fffff dddddd dddd ffffff fff f dd ee dd dd eeeee} + 38 {eeeeee ee d ff eeeeee eeeeee eee eeeee ee ffffff dddd eeee dddddd ee} + 39 {eeeeee ddd fffff e dddd ee eee eee ffffff ee f d dddd} + 40 {ffffff dddddd eee ee ffffff eee eeee ddddd ee eeeeee f} + 41 {ddd ddd fff fffff ee fffff f fff ddddd fffff} + 42 {dddd ee ff d f ffffff fff ffffff ff dd dddddd f eeee} + 43 {d dd fff fffff d f fff e dddd ee ee} + 44 {ff ffff eee ddd d dd ffff dddd d eeee d eeeeee} + 45 {eeee f eeeee ee e ffff f ddd e fff} + 46 {ffff d ffff eeee ffff eeeee f ffff ddddd eee} + 47 {dd dd dddddd ddddd fffff dddddd ddd ddddd eeeeee ffff eeee eee ee} + 48 {ffff ffff e dddd ffffff dd dd dddd f fffff} + 49 {ffffff d dddddd ffff eeeee f ffff ffff d dd fffff eeeee} + + 50 {x e} + } { + execsql { INSERT INTO t1(rowid, a) VALUES($rowid, $text) } + } +} {} + +set res [list {*}{ + 1 {3 24 8 2 12 6} + 5 {2 24 8 2 12 6} + 6 {3 24 8 1 12 6} + 7 {3 24 8 1 12 6} + 9 {2 24 8 3 12 6} +}] +do_execsql_test 5.1.1 { + SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH 'a AND c' +} $res +do_execsql_test 5.1.2 { + SELECT count(*) FROM t1 WHERE t1 MATCH 'd e f' +} 29 + +faultsim_save_and_close +do_faultsim_test 5.2 -faults oom* -prep { + faultsim_restore_and_reopen + sqlite3_fts5_create_tokenizer db tcl tcl_create + sqlite3_fts5_register_matchinfo db + db func mit mit +} -body { + db eval { + SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH 'a AND c' + } +} -test { + faultsim_test_result [list 0 $::res] +} + +do_faultsim_test 5.3 -faults oom* -prep { + faultsim_restore_and_reopen + sqlite3_fts5_create_tokenizer db tcl tcl_create +} -body { + db eval { + SELECT count(*) FROM t1 WHERE t1 MATCH 'd AND e AND f' + } +} -test { + faultsim_test_result {0 29} +} + +do_faultsim_test 5.4 -faults oom* -prep { + faultsim_restore_and_reopen + sqlite3_fts5_create_tokenizer db tcl tcl_create +} -body { + db eval { + SELECT count(*) FROM t1 WHERE t1 MATCH 'x + e' + } +} -test { + faultsim_test_result {0 1} +} + +#------------------------------------------------------------------------- +catch { db close } +breakpoint +do_faultsim_test 6 -faults oom* -prep { + sqlite_orig db test.db + sqlite3_db_config_lookaside db 0 0 0 +} -body { + load_static_extension db fts5 +} -test { + faultsim_test_result {0 {}} {1 {initialization of fts5 failed: }} + if {$testrc==0} { + db eval { CREATE VIRTUAL TABLE temp.t1 USING fts5(x) } + } + db close +} finish_test ADDED ext/fts5/test/fts5fault7.test Index: ext/fts5/test/fts5fault7.test ================================================================== --- /dev/null +++ ext/fts5/test/fts5fault7.test @@ -0,0 +1,91 @@ +# 2015 September 3 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# +# This file is focused on OOM errors. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +source $testdir/malloc_common.tcl +set testprefix fts5fault7 + +# If SQLITE_ENABLE_FTS3 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +if 1 { + +#------------------------------------------------------------------------- +# Test fault-injection on a query that uses xColumnSize() on columnsize=0 +# table. +# +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize=0); + INSERT INTO t1 VALUES('a b c d e f g'); + INSERT INTO t1 VALUES('a b c d'); + INSERT INTO t1 VALUES('a b c d e f g h i j'); +} + + +fts5_aux_test_functions db +do_faultsim_test 1 -faults oom* -body { + execsql { SELECT fts5_test_columnsize(t1) FROM t1 WHERE t1 MATCH 'b' } +} -test { + faultsim_test_result {0 {7 4 10}} {1 SQLITE_NOMEM} +} + +} + +#------------------------------------------------------------------------- +# Test fault-injection when a segment is promoted. +# +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE t2 USING fts5(a); + INSERT INTO t2(t2, rank) VALUES('automerge', 0); + INSERT INTO t2(t2, rank) VALUES('crisismerge', 4); + INSERT INTO t2(t2, rank) VALUES('pgsz', 40); + + INSERT INTO t2 VALUES('a b c'); + INSERT INTO t2 VALUES('d e f'); + INSERT INTO t2 VALUES('f e d'); + INSERT INTO t2 VALUES('c b a'); + + INSERT INTO t2 VALUES('a b c'); + INSERT INTO t2 VALUES('d e f'); + INSERT INTO t2 VALUES('f e d'); + INSERT INTO t2 VALUES('c b a'); +} {} + +faultsim_save_and_close +do_faultsim_test 1 -faults oom-t* -prep { + faultsim_restore_and_reopen + db eval { + BEGIN; + INSERT INTO t2 VALUES('c d c g g f'); + INSERT INTO t2 VALUES('c d g b f d'); + INSERT INTO t2 VALUES('c c f d e d'); + INSERT INTO t2 VALUES('e a f c e f'); + INSERT INTO t2 VALUES('c g f b b d'); + INSERT INTO t2 VALUES('d a g a b b'); + INSERT INTO t2 VALUES('e f a b c e'); + INSERT INTO t2 VALUES('e c a g c d'); + INSERT INTO t2 VALUES('g b d d e b'); + INSERT INTO t2 VALUES('e a d a e d'); + } +} -body { + db eval COMMIT +} -test { + faultsim_test_result {0 {}} +} + +finish_test + Index: ext/fts5/test/fts5matchinfo.test ================================================================== --- ext/fts5/test/fts5matchinfo.test +++ ext/fts5/test/fts5matchinfo.test @@ -353,14 +353,14 @@ } {1 1 one 2 2 two 3 3 three} #--------------------------------------------------------------------------- # Test the 'y' matchinfo flag # -set sqlite_fts3_enable_parentheses 1 reset_db +sqlite3_fts5_register_matchinfo db do_execsql_test 11.0 { - CREATE VIRTUAL TABLE tt USING fts3(x, y); + CREATE VIRTUAL TABLE tt USING fts5(x, y); INSERT INTO tt VALUES('c d a c d d', 'e a g b d a'); -- 1 INSERT INTO tt VALUES('c c g a e b', 'c g d g e c'); -- 2 INSERT INTO tt VALUES('b e f d e g', 'b a c b c g'); -- 3 INSERT INTO tt VALUES('a c f f g d', 'd b f d e g'); -- 4 INSERT INTO tt VALUES('g a c f c f', 'd g g b c c'); -- 5 @@ -430,28 +430,26 @@ do_execsql_test 11.1.$tn.2 { SELECT rowid, mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH $expr } $r2 } -set sqlite_fts3_enable_parentheses 0 #--------------------------------------------------------------------------- # Test the 'b' matchinfo flag # -set sqlite_fts3_enable_parentheses 1 reset_db +sqlite3_fts5_register_matchinfo db db func mit mit do_test 12.0 { set cols [list] for {set i 0} {$i < 50} {incr i} { lappend cols "c$i" } - execsql "CREATE VIRTUAL TABLE tt USING fts3([join $cols ,])" + execsql "CREATE VIRTUAL TABLE tt USING fts5([join $cols ,])" } {} do_execsql_test 12.1 { INSERT INTO tt (rowid, c4, c45) VALUES(1, 'abc', 'abc'); SELECT mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH 'abc'; } [list [list [expr 1<<4] [expr 1<<(45-32)]]] -set sqlite_fts3_enable_parentheses 0 finish_test Index: ext/fts5/test/fts5rowid.test ================================================================== --- ext/fts5/test/fts5rowid.test +++ ext/fts5/test/fts5rowid.test @@ -25,19 +25,19 @@ SELECT fts5_rowid() } {1 {should be: fts5_rowid(subject, ....)}} do_catchsql_test 1.2 { SELECT fts5_rowid('segment') -} {1 {should be: fts5_rowid('segment', segid, height, pgno))}} +} {1 {should be: fts5_rowid('segment', segid, pgno))}} do_execsql_test 1.3 { - SELECT fts5_rowid('segment', 1, 1, 1) -} {139586437121} + SELECT fts5_rowid('segment', 1, 1) +} {137438953473} do_catchsql_test 1.4 { SELECT fts5_rowid('nosucharg'); -} {1 {first arg to fts5_rowid() must be 'segment' or 'start-of-index'}} +} {1 {first arg to fts5_rowid() must be 'segment'}} #------------------------------------------------------------------------- # Tests of the fts5_decode() function. # @@ -88,10 +88,14 @@ # This is really a corruption test... #do_execsql_test 2.7 { # UPDATE x1_data SET block = X''; # SELECT count(fts5_decode(rowid, block)) FROM x1_data; #} $res + +do_execsql_test 2.8 { + SELECT fts5_decode(fts5_rowid('segment', 1000, 1), X'AB') +} {corrupt} #------------------------------------------------------------------------- # Tests with very large tokens. # set strlist [list \ ADDED ext/fts5/test/fts5simple.test Index: ext/fts5/test/fts5simple.test ================================================================== --- /dev/null +++ ext/fts5/test/fts5simple.test @@ -0,0 +1,173 @@ +# 2015 September 05 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5simple + +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +if 1 { +#------------------------------------------------------------------------- +# +set doc "x x [string repeat {y } 50]z z" +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x); + INSERT INTO t1(t1, rank) VALUES('pgsz', 32); + BEGIN; + INSERT INTO t1 VALUES($doc); + COMMIT; +} + +do_execsql_test 1.1 { + INSERT INTO t1(t1) VALUES('integrity-check'); +} + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x); + INSERT INTO t1(t1, rank) VALUES('pgsz', 32); + INSERT INTO t1 VALUES('a b c'); + INSERT INTO t1 VALUES('d e f'); + INSERT INTO t1(t1) VALUES('optimize'); +} + +do_execsql_test 2.1 { + INSERT INTO t1(t1) VALUES('integrity-check'); +} {} + + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 3.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x, prefix='1,2'); + INSERT INTO t1(t1, rank) VALUES('pgsz', 32); + BEGIN; + INSERT INTO t1 VALUES('one'); + SELECT * FROM t1 WHERE t1 MATCH 'o*'; +} {one} + +do_execsql_test 3.1 { + INSERT INTO t1(t1) VALUES('integrity-check'); +} {} + +#------------------------------------------------------------------------- +reset_db +do_execsql_test 4.1 { + CREATE VIRTUAL TABLE t11 USING fts5(content); + INSERT INTO t11(t11, rank) VALUES('pgsz', 32); + INSERT INTO t11 VALUES('another'); + INSERT INTO t11 VALUES('string'); + INSERT INTO t11 VALUES('of'); + INSERT INTO t11 VALUES('text'); +} +do_test 4.2 { + execsql { INSERT INTO t11(t11) VALUES('optimize') } +} {} +do_execsql_test 4.3 { + INSERT INTO t11(t11) VALUES('integrity-check'); +} {} + +#db eval { SELECT fts5_decode(rowid, block) as x FROM t11_data } { puts $x } + +#------------------------------------------------------------------------- +reset_db +set doc [string repeat "x y " 5] +do_execsql_test 5.1 { + CREATE VIRTUAL TABLE yy USING fts5(content); + INSERT INTO yy(yy, rank) VALUES('pgsz', 32); + BEGIN; + INSERT INTO yy VALUES($doc); + INSERT INTO yy VALUES($doc); + INSERT INTO yy VALUES($doc); + INSERT INTO yy VALUES($doc); + INSERT INTO yy VALUES($doc); + INSERT INTO yy VALUES($doc); + INSERT INTO yy VALUES($doc); + INSERT INTO yy VALUES($doc); + COMMIT; +} + +do_execsql_test 5.2 { + SELECT rowid FROM yy WHERE yy MATCH 'y' ORDER BY rowid ASC +} {1 2 3 4 5 6 7 8} + +do_execsql_test 5.3 { + SELECT rowid FROM yy WHERE yy MATCH 'y' ORDER BY rowid DESC +} {8 7 6 5 4 3 2 1} + +#db eval { SELECT fts5_decode(rowid, block) as x FROM yy_data } { puts $x } + +#------------------------------------------------------------------------- +reset_db +do_execsql_test 5.1 { + CREATE VIRTUAL TABLE tt USING fts5(content); + INSERT INTO tt(tt, rank) VALUES('pgsz', 32); + INSERT INTO tt VALUES('aa'); +} + +do_execsql_test 5.2 { + SELECT rowid FROM tt WHERE tt MATCH 'a*'; +} {1} + +do_execsql_test 5.3 { + DELETE FROM tt; + BEGIN; + INSERT INTO tt VALUES('aa'); + INSERT INTO tt VALUES('ab'); + COMMIT; +} {} + +do_execsql_test 5.4 { + SELECT rowid FROM tt WHERE tt MATCH 'a*'; +} {1 2} + +} + +do_execsql_test 5.5 { + DELETE FROM tt; + BEGIN; + INSERT INTO tt VALUES('aa'); + INSERT INTO tt VALUES('ab'); + INSERT INTO tt VALUES('aa'); + INSERT INTO tt VALUES('ab'); + INSERT INTO tt VALUES('aa'); + INSERT INTO tt VALUES('ab'); + INSERT INTO tt VALUES('aa'); + INSERT INTO tt VALUES('ab'); + COMMIT; + SELECT rowid FROM tt WHERE tt MATCH 'a*'; +} {1 2 3 4 5 6 7 8} + +do_execsql_test 5.6 { + INSERT INTO tt(tt) VALUES('integrity-check'); +} + +reset_db +do_execsql_test 5.7 { + CREATE VIRTUAL TABLE tt USING fts5(content); + INSERT INTO tt(tt, rank) VALUES('pgsz', 32); + INSERT INTO tt VALUES('aa ab ac ad ae af'); +} + +do_execsql_test 5.8 { + SELECT rowid FROM tt WHERE tt MATCH 'a*'; +} {1} + +finish_test + ADDED ext/fts5/test/fts5synonym.test Index: ext/fts5/test/fts5synonym.test ================================================================== --- /dev/null +++ ext/fts5/test/fts5synonym.test @@ -0,0 +1,460 @@ +# 2014 Dec 20 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Tests focusing on custom tokenizers that support synonyms. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5synonym + +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +foreach S { + {zero 0} + {one 1 i} + {two 2 ii} + {three 3 iii} + {four 4 iv} + {five 5 v} + {six 6 vi} + {seven 7 vii} + {eight 8 viii} + {nine 9 ix} +} { + foreach s $S { + set o [list] + foreach x $S {if {$x!=$s} {lappend o $x}} + set ::syn($s) $o + } +} + +proc tcl_tokenize {tflags text} { + foreach {w iStart iEnd} [fts5_tokenize_split $text] { + sqlite3_fts5_token $w $iStart $iEnd + } +} + +proc tcl_create {args} { + return "tcl_tokenize" +} + +sqlite3_fts5_create_tokenizer db tcl tcl_create + +#------------------------------------------------------------------------- +# Warm body test for the code in fts5_tcl.c. +# +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl); + INSERT INTO ft VALUES('abc def ghi'); + INSERT INTO ft VALUES('jkl mno pqr'); + SELECT rowid, x FROM ft WHERE ft MATCH 'def'; + SELECT x, rowid FROM ft WHERE ft MATCH 'pqr'; +} {1 {abc def ghi} {jkl mno pqr} 2} + +#------------------------------------------------------------------------- +# Test a tokenizer that supports synonyms by adding extra entries to the +# FTS index. +# + +proc tcl_tokenize {tflags text} { + foreach {w iStart iEnd} [fts5_tokenize_split $text] { + sqlite3_fts5_token $w $iStart $iEnd + if {$tflags=="document" && [info exists ::syn($w)]} { + foreach s $::syn($w) { + sqlite3_fts5_token -colo $s $iStart $iEnd + } + } + } +} +reset_db +sqlite3_fts5_create_tokenizer db tcl tcl_create + +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl); + INSERT INTO ft VALUES('one two three'); + INSERT INTO ft VALUES('four five six'); + INSERT INTO ft VALUES('eight nine ten'); +} {} + +foreach {tn expr res} { + 1 "3" 1 + 2 "eight OR 8 OR 5" {2 3} + 3 "10" {} + 4 "1*" {1} + 5 "1 + 2" {1} +} { + do_execsql_test 2.1.$tn { + SELECT rowid FROM ft WHERE ft MATCH $expr + } $res +} + +#------------------------------------------------------------------------- +# Test some broken tokenizers: +# +# 3.1.*: A tokenizer that declares the very first token to be colocated. +# +# 3.2.*: A tokenizer that reports two identical tokens at the same position. +# This is allowed. +# +reset_db +sqlite3_fts5_create_tokenizer db tcl tcl_create +proc tcl_tokenize {tflags text} { + set bColo 1 + foreach {w iStart iEnd} [fts5_tokenize_split $text] { + if {$bColo} { + sqlite3_fts5_token -colo $w $iStart $iEnd + set bColo 0 + } { + sqlite3_fts5_token $w $iStart $iEnd + } + } +} +do_execsql_test 3.1.0 { + CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl); + INSERT INTO ft VALUES('one two three'); + CREATE VIRTUAL TABLE vv USING fts5vocab(ft, row); + SELECT * FROM vv; +} { + one 1 1 three 1 1 two 1 1 +} + +do_execsql_test 3.1.1 { + INSERT INTO ft(ft) VALUES('integrity-check'); +} {} + +proc tcl_tokenize {tflags text} { + foreach {w iStart iEnd} [fts5_tokenize_split $text] { + sqlite3_fts5_token $w $iStart $iEnd + } +} + +do_execsql_test 3.1.2 { + SELECT rowid FROM ft WHERE ft MATCH 'one two three' +} {1} + +reset_db +sqlite3_fts5_create_tokenizer db tcl tcl_create +proc tcl_tokenize {tflags text} { + foreach {w iStart iEnd} [fts5_tokenize_split $text] { + sqlite3_fts5_token $w $iStart $iEnd + sqlite3_fts5_token -colo $w $iStart $iEnd + } +} +do_execsql_test 3.2.0 { + CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl); + INSERT INTO ft VALUES('one one two three'); + CREATE VIRTUAL TABLE vv USING fts5vocab(ft, row); + SELECT * FROM vv; +} { + one 1 4 three 1 2 two 1 2 +} +do_execsql_test 3.2.1 { + SELECT rowid FROM ft WHERE ft MATCH 'one'; +} {1} +do_execsql_test 3.2.2 { + SELECT rowid FROM ft WHERE ft MATCH 'one two three'; +} {1} +do_execsql_test 3.2.3 { + SELECT rowid FROM ft WHERE ft MATCH 'one + one + two + three'; +} {1} +do_execsql_test 3.2.4 { + SELECT rowid FROM ft WHERE ft MATCH 'one two two three'; +} {1} +do_execsql_test 3.2.5 { + SELECT rowid FROM ft WHERE ft MATCH 'one + two + two + three'; +} {} + +#------------------------------------------------------------------------- +# Check that expressions with synonyms can be parsed and executed. +# +reset_db +sqlite3_fts5_create_tokenizer db tcl tcl_create +proc tcl_tokenize {tflags text} { + foreach {w iStart iEnd} [fts5_tokenize_split $text] { + sqlite3_fts5_token $w $iStart $iEnd + if {$tflags=="query" && [info exists ::syn($w)]} { + foreach s $::syn($w) { + sqlite3_fts5_token -colo $s $iStart $iEnd + } + } + } +} + +foreach {tn expr res} { + 1 {abc} {"abc"} + 2 {one} {"one"|"i"|"1"} + 3 {3} {"3"|"iii"|"three"} + 4 {3*} {"3"|"iii"|"three" *} +} { + do_execsql_test 4.1.$tn {SELECT fts5_expr($expr, 'tokenize=tcl')} [list $res] +} + +do_execsql_test 4.2.1 { + CREATE VIRTUAL TABLE xx USING fts5(x, tokenize=tcl); + INSERT INTO xx VALUES('one two'); + INSERT INTO xx VALUES('three four'); +} + +do_execsql_test 4.2.2 { + SELECT rowid FROM xx WHERE xx MATCH '2' +} {1} + +do_execsql_test 4.2.3 { + SELECT rowid FROM xx WHERE xx MATCH '3' +} {2} + +do_test 5.0 { + execsql { + CREATE VIRTUAL TABLE t1 USING fts5(a, b, tokenize=tcl) + } + foreach {rowid a b} { + 1 {four v 4 i three} {1 3 five five 4 one} + 2 {5 1 3 4 i} {2 2 v two 4} + 3 {5 i 5 2 four 4 1} {iii ii five two 1} + 4 {ii four 4 one 5 three five} {one 5 1 iii 4 3} + 5 {three i v i four 4 1} {ii five five five iii} + 6 {4 2 ii two 2 iii} {three 1 four 4 iv 1 iv} + 7 {ii ii two three 2 5} {iii i ii iii iii one one} + 8 {2 ii i two 3 three 2} {two iv v iii 3 five} + 9 {i 2 iv 3 five four v} {iii 4 three i three ii 1} + } { + execsql { INSERT INTO t1(rowid, a, b) VALUES($rowid, $a, $b) } + } +} {} + + +foreach {tn q res} { + 1 {one} { + 1 {four v 4 [i] three} {[1] 3 five five 4 [one]} + 2 {5 [1] 3 4 [i]} {2 2 v two 4} + 3 {5 [i] 5 2 four 4 [1]} {iii ii five two [1]} + 4 {ii four 4 [one] 5 three five} {[one] 5 [1] iii 4 3} + 5 {three [i] v [i] four 4 [1]} {ii five five five iii} + 6 {4 2 ii two 2 iii} {three [1] four 4 iv [1] iv} + 7 {ii ii two three 2 5} {iii [i] ii iii iii [one] [one]} + 8 {2 ii [i] two 3 three 2} {two iv v iii 3 five} + 9 {[i] 2 iv 3 five four v} {iii 4 three [i] three ii [1]} + } + 2 {five four} { + 1 {[four] [v] [4] i three} {1 3 [five] [five] [4] one} + 2 {[5] 1 3 [4] i} {2 2 [v] two [4]} + 3 {[5] i [5] 2 [four] [4] 1} {iii ii [five] two 1} + 4 {ii [four] [4] one [5] three [five]} {one [5] 1 iii [4] 3} + 5 {three i [v] i [four] [4] 1} {ii [five] [five] [five] iii} + 8 {2 ii i two 3 three 2} {two [iv] [v] iii 3 [five]} + 9 {i 2 [iv] 3 [five] [four] [v]} {iii [4] three i three ii 1} + } + 3 {one OR two OR iii OR 4 OR v} { + 1 {[four] [v] [4] [i] [three]} {[1] [3] [five] [five] [4] [one]} + 2 {[5] [1] [3] [4] [i]} {[2] [2] [v] [two] [4]} + 3 {[5] [i] [5] [2] [four] [4] [1]} {[iii] [ii] [five] [two] [1]} + 4 {[ii] [four] [4] [one] [5] [three] [five]} {[one] [5] [1] [iii] [4] [3]} + 5 {[three] [i] [v] [i] [four] [4] [1]} {[ii] [five] [five] [five] [iii]} + 6 {[4] [2] [ii] [two] [2] [iii]} {[three] [1] [four] [4] [iv] [1] [iv]} + 7 {[ii] [ii] [two] [three] [2] [5]} {[iii] [i] [ii] [iii] [iii] [one] [one]} + 8 {[2] [ii] [i] [two] [3] [three] [2]} {[two] [iv] [v] [iii] [3] [five]} + 9 {[i] [2] [iv] [3] [five] [four] [v]} {[iii] [4] [three] [i] [three] [ii] [1]} + } + + 4 {5 + 1} { + 2 {[5 1] 3 4 i} {2 2 v two 4} + 3 {[5 i] 5 2 four 4 1} {iii ii five two 1} + 4 {ii four 4 one 5 three five} {one [5 1] iii 4 3} + 5 {three i [v i] four 4 1} {ii five five five iii} + } + + 5 {one + two + three} { + 7 {ii ii two three 2 5} {iii [i ii iii] iii one one} + 8 {2 ii [i two 3] three 2} {two iv v iii 3 five} + } + + 6 {"v v"} { + 1 {four v 4 i three} {1 3 [five five] 4 one} + 5 {three i v i four 4 1} {ii [five five five] iii} + } +} { + do_execsql_test 5.1.$tn { + SELECT rowid, highlight(t1, 0, '[', ']'), highlight(t1, 1, '[', ']') + FROM t1 WHERE t1 MATCH $q + } $res +} + +# Test that the xQueryPhrase() API works with synonyms. +# +proc mit {blob} { + set scan(littleEndian) i* + set scan(bigEndian) I* + binary scan $blob $scan($::tcl_platform(byteOrder)) r + return $r +} +db func mit mit +sqlite3_fts5_register_matchinfo db + +foreach {tn q res} { + 1 {one} { + 1 {1 11 7 2 12 6} 2 {2 11 7 0 12 6} + 3 {2 11 7 1 12 6} 4 {1 11 7 2 12 6} + 5 {3 11 7 0 12 6} 6 {0 11 7 2 12 6} + 7 {0 11 7 3 12 6} 8 {1 11 7 0 12 6} + 9 {1 11 7 2 12 6} + } +} { + do_execsql_test 5.2.$tn { + SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH $q + } $res +} + + +#------------------------------------------------------------------------- +# Test terms with more than 4 synonyms. +# +reset_db +sqlite3_fts5_create_tokenizer db tcl tcl_create +proc tcl_tokenize {tflags text} { + foreach {w iStart iEnd} [fts5_tokenize_split $text] { + sqlite3_fts5_token $w $iStart $iEnd + if {$tflags=="query" && [string length $w]==1} { + for {set i 2} {$i<=10} {incr i} { + sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd + } + } + } +} + +do_execsql_test 6.0.1 { + CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize=tcl); + INSERT INTO t1 VALUES('yy xx qq'); + INSERT INTO t1 VALUES('yy xx xx'); +} +do_execsql_test 6.0.2 { + SELECT * FROM t1 WHERE t1 MATCH 'NEAR(y q)'; +} {{yy xx qq}} + +do_test 6.0.3 { + execsql { + CREATE VIRTUAL TABLE t2 USING fts5(a, b, tokenize=tcl) + } + foreach {rowid a b} { + 1 {yyyy vvvvv qq oo yyyyyy vvvv eee} {ffff uu r qq aaaa} + 2 {ww oooooo bbbbb ssssss mm} {ffffff yy iiii rr s ccc qqqqq} + 3 {zzzz llll gggggg cccc uu} {hhhhhh aaaa ppppp rr ee jjjj} + 4 {r f i rrrrrr ww hhh} {aa yyy t x aaaaa ii} + 5 {fffff mm vvvv ooo ffffff kkkk tttt} {cccccc bb e zzz d n} + 6 {iii dddd hh qqqq ddd ooo} {ttt d c b aaaaaa qqqq} + 7 {jjjj rrrr v zzzzz u tt t} {ppppp pp dddd mm hhh uuu} + 8 {gggg rrrrrr kkkk vvvv gggg jjjjjj b} {dddddd jj r w cccc wwwwww ss} + 9 {kkkkk qqq oooo e tttttt mmm} {e ss qqqqqq hhhh llllll gg} + } { + execsql { INSERT INTO t2(rowid, a, b) VALUES($rowid, $a, $b) } + } +} {} + +foreach {tn q res} { + 1 {a} { + 1 {yyyy vvvvv qq oo yyyyyy vvvv eee} {ffff uu r qq [aaaa]} + 3 {zzzz llll gggggg cccc uu} {hhhhhh [aaaa] ppppp rr ee jjjj} + 4 {r f i rrrrrr ww hhh} {[aa] yyy t x [aaaaa] ii} + 6 {iii dddd hh qqqq ddd ooo} {ttt d c b [aaaaaa] qqqq} + } + + 2 {a AND q} { + 1 {yyyy vvvvv [qq] oo yyyyyy vvvv eee} {ffff uu r [qq] [aaaa]} + 6 {iii dddd hh [qqqq] ddd ooo} {ttt d c b [aaaaaa] [qqqq]} + } + + 3 {o OR (q AND a)} { + 1 {yyyy vvvvv [qq] [oo] yyyyyy vvvv eee} {ffff uu r [qq] [aaaa]} + 2 {ww [oooooo] bbbbb ssssss mm} {ffffff yy iiii rr s ccc qqqqq} + 5 {fffff mm vvvv [ooo] ffffff kkkk tttt} {cccccc bb e zzz d n} + 6 {iii dddd hh [qqqq] ddd [ooo]} {ttt d c b [aaaaaa] [qqqq]} + 9 {kkkkk qqq [oooo] e tttttt mmm} {e ss qqqqqq hhhh llllll gg} + } + + 4 {NEAR(q y, 20)} { + 1 {[yyyy] vvvvv [qq] oo [yyyyyy] vvvv eee} {ffff uu r qq aaaa} + 2 {ww oooooo bbbbb ssssss mm} {ffffff [yy] iiii rr s ccc [qqqqq]} + } +} { + do_execsql_test 6.1.$tn.asc { + SELECT rowid, highlight(t2, 0, '[', ']'), highlight(t2, 1, '[', ']') + FROM t2 WHERE t2 MATCH $q + } $res + + set res2 [list] + foreach {rowid a b} $res { + set res2 [concat [list $rowid $a $b] $res2] + } + + do_execsql_test 6.1.$tn.desc { + SELECT rowid, highlight(t2, 0, '[', ']'), highlight(t2, 1, '[', ']') + FROM t2 WHERE t2 MATCH $q ORDER BY rowid DESC + } $res2 +} + +do_execsql_test 6.2.1 { + INSERT INTO t2(rowid, a, b) VALUES(13, + 'x xx xxx xxxx xxxxx xxxxxx xxxxxxx', 'y yy yyy yyyy yyyyy yyyyyy yyyyyyy' + ); + SELECT rowid, highlight(t2, 0, '<', '>'), highlight(t2, 1, '(', ')') + FROM t2 WHERE t2 MATCH 'x OR y' +} { + 1 { vvvvv qq oo vvvv eee} {ffff uu r qq aaaa} + 2 {ww oooooo bbbbb ssssss mm} {ffffff (yy) iiii rr s ccc qqqqq} + 4 {r f i rrrrrr ww hhh} {aa (yyy) t (x) aaaaa ii} + 13 { } + {(y) (yy) (yyy) (yyyy) (yyyyy) (yyyyyy) (yyyyyyy)} +} + +#------------------------------------------------------------------------- +# Test that the xColumnSize() API is not confused by colocated tokens. +# +reset_db +sqlite3_fts5_create_tokenizer db tcl tcl_create +fts5_aux_test_functions db +proc tcl_tokenize {tflags text} { + foreach {w iStart iEnd} [fts5_tokenize_split $text] { + sqlite3_fts5_token $w $iStart $iEnd + if {[string length $w]==1} { + for {set i 2} {$i<=10} {incr i} { + sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd + } + } + } +} + +do_execsql_test 7.0.1 { + CREATE VIRTUAL TABLE t1 USING fts5(a, b, columnsize=1, tokenize=tcl); + INSERT INTO t1 VALUES('0 2 3', '4 5 6 7'); + INSERT INTO t1 VALUES('8 9', '0 0 0 0 0 0 0 0 0 0'); + SELECT fts5_test_columnsize(t1) FROM t1 WHERE t1 MATCH '000 AND 00 AND 0'; +} {{3 4} {2 10}} + +do_execsql_test 7.0.2 { + INSERT INTO t1(t1) VALUES('integrity-check'); +} + +do_execsql_test 7.1.1 { + CREATE VIRTUAL TABLE t2 USING fts5(a, b, columnsize=0, tokenize=tcl); + INSERT INTO t2 VALUES('0 2 3', '4 5 6 7'); + INSERT INTO t2 VALUES('8 9', '0 0 0 0 0 0 0 0 0 0'); + SELECT fts5_test_columnsize(t2) FROM t2 WHERE t2 MATCH '000 AND 00 AND 0'; +} {{3 4} {2 10}} + +do_execsql_test 7.1.2 { + INSERT INTO t2(t2) VALUES('integrity-check'); +} + +finish_test + Index: ext/fts5/test/fts5version.test ================================================================== --- ext/fts5/test/fts5version.test +++ ext/fts5/test/fts5version.test @@ -28,37 +28,37 @@ INSERT INTO t1 VALUES('a b c d'); } {} do_execsql_test 1.2 { SELECT * FROM t1_config WHERE k='version' -} {version 3} +} {version 4} do_execsql_test 1.3 { SELECT rowid FROM t1 WHERE t1 MATCH 'a'; } {1} do_execsql_test 1.4 { - UPDATE t1_config set v=4 WHERE k='version'; + UPDATE t1_config set v=5 WHERE k='version'; } do_test 1.5 { db close sqlite3 db test.db catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' } -} {1 {invalid fts5 file format (found 4, expected 3) - run 'rebuild'}} +} {1 {invalid fts5 file format (found 5, expected 4) - run 'rebuild'}} do_test 1.6 { db close sqlite3 db test.db catchsql { INSERT INTO t1 VALUES('x y z') } -} {1 {invalid fts5 file format (found 4, expected 3) - run 'rebuild'}} +} {1 {invalid fts5 file format (found 5, expected 4) - run 'rebuild'}} do_test 1.7 { execsql { DELETE FROM t1_config WHERE k='version' } db close sqlite3 db test.db catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' } -} {1 {invalid fts5 file format (found 0, expected 3) - run 'rebuild'}} +} {1 {invalid fts5 file format (found 0, expected 4) - run 'rebuild'}} finish_test Index: ext/fts5/tool/loadfts5.tcl ================================================================== --- ext/fts5/tool/loadfts5.tcl +++ ext/fts5/tool/loadfts5.tcl @@ -16,10 +16,16 @@ if {[file isdir $f]} { load_hierachy $f } else { db eval { INSERT INTO t1 VALUES($f, loadfile($f)) } incr ::nRow + + if {$::O(trans) && ($::nRow % $::O(trans))==0} { + db eval { COMMIT } + db eval { INSERT INTO t1(t1) VALUES('integrity-check') } + db eval { BEGIN } + } if {($::nRow % $::nRowPerDot)==0} { puts -nonewline . if {($::nRow % (65*$::nRowPerDot))==0} { puts "" } flush stdout @@ -39,10 +45,11 @@ puts stderr " -delete (delete the database file before starting)" puts stderr " -limit N (load no more than N documents)" puts stderr " -automerge N (set the automerge parameter to N)" puts stderr " -crisismerge N (set the crisismerge parameter to N)" puts stderr " -prefix PREFIX (comma separated prefix= argument)" + puts stderr " -trans N (commit after N inserts - 0 == never)" exit 1 } set O(vtab) fts5 set O(tok) "" @@ -49,10 +56,11 @@ set O(limit) 0 set O(delete) 0 set O(automerge) -1 set O(crisismerge) -1 set O(prefix) "" +set O(trans) 0 if {[llength $argv]<2} usage set nOpt [expr {[llength $argv]-2}] for {set i 0} {$i < $nOpt} {incr i} { set arg [lindex $argv $i] @@ -75,10 +83,15 @@ -limit { if { [incr i]>=$nOpt } usage set O(limit) [lindex $argv $i] } + + -trans { + if { [incr i]>=$nOpt } usage + set O(trans) [lindex $argv $i] + } -automerge { if { [incr i]>=$nOpt } usage set O(automerge) [lindex $argv $i] } @@ -102,12 +115,13 @@ set dbfile [lindex $argv end-1] if {$O(delete)} { file delete -force $dbfile } sqlite3 db $dbfile catch { load_static_extension db fts5 } db func loadfile loadfile +db eval "PRAGMA page_size=4096" -db transaction { +db eval BEGIN set pref "" if {$O(prefix)!=""} { set pref ", prefix='$O(prefix)'" } catch { db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok)$pref)" db eval "INSERT INTO t1(t1, rank) VALUES('pgsz', 4050);" @@ -124,9 +138,9 @@ db eval {INSERT INTO t1(t1, rank) VALUES('crisismerge', $O(crisismerge))} } else { } } load_hierachy [lindex $argv end] -} +db eval COMMIT Index: ext/fts5/tool/showfts5.tcl ================================================================== --- ext/fts5/tool/showfts5.tcl +++ ext/fts5/tool/showfts5.tcl @@ -3,31 +3,79 @@ #------------------------------------------------------------------------- # Process command line arguments. # proc usage {} { - puts stderr "usage: $::argv0 database table" + puts stderr "usage: $::argv0 ?OPTIONS? database table" + puts stderr "" + puts stderr " -nterm (count number of terms in each segment)" puts stderr "" exit 1 } -if {[llength $argv]!=2} usage -set database [lindex $argv 0] -set tbl [lindex $argv 1] + +set O(nterm) 0 + +if {[llength $argv]<2} usage +foreach a [lrange $argv 0 end-2] { + switch -- $a { + -nterm { + set O(nterm) 1 + } + + default { + usage + } + } +} + +set database [lindex $argv end-1] +set tbl [lindex $argv end] + +#------------------------------------------------------------------------- +# Count the number of terms in each segment of fts5 table $tbl. Store the +# counts in the array variable in the parent context named by parameter +# $arrayname, indexed by segment-id. Example: +# +# count_terms fts_tbl A +# foreach {k v} [array get A] { puts "segid=$k nTerm=$v" } +# +proc count_terms {tbl arrayname} { + upvar A $arrayname + array unset A + db eval "SELECT fts5_decode(rowid, block) AS d FROM ${tbl}_data" { + set desc [lindex $d 0] + if {[regexp {^segid=([0-9]*)} $desc -> id]} { + foreach i [lrange $d 1 end] { + if {[string match {term=*} $i]} { incr A($id) } + } + } + } +} #------------------------------------------------------------------------- # Start of main program. # sqlite3 db $database catch { load_static_extension db fts5 } + +if {$O(nterm)} { count_terms $tbl A } db eval "SELECT fts5_decode(rowid, block) AS d FROM ${tbl}_data WHERE id=10" { foreach lvl [lrange $d 1 end] { puts [lrange $lvl 0 2] + foreach seg [lrange $lvl 3 end] { - puts " $seg" + if {$::O(nterm)} { + regexp {^id=([0-9]*)} $seg -> id + set nTerm 0 + catch { set nTerm $A($id) } + puts [format " % -28s nTerm=%d" $seg $nTerm] + } else { + puts [format " % -28s" $seg] + } } } } Index: ext/misc/json1.c ================================================================== --- ext/misc/json1.c +++ ext/misc/json1.c @@ -66,10 +66,13 @@ #define JSON_REAL 4 #define JSON_STRING 5 #define JSON_ARRAY 6 #define JSON_OBJECT 7 +/* The "subtype" set for JSON values */ +#define JSON_SUBTYPE 74 /* Ascii for "J" */ + /* ** Names of the various JSON types: */ static const char * const jsonType[] = { "null", "true", "false", "integer", "real", "text", "array", "object" @@ -80,11 +83,11 @@ #define JNODE_RAW 0x01 /* Content is raw, not JSON encoded */ #define JNODE_ESCAPE 0x02 /* Content is text with \ escapes */ #define JNODE_REMOVE 0x04 /* Do not output */ #define JNODE_REPLACE 0x08 /* Replace with JsonNode.iVal */ #define JNODE_APPEND 0x10 /* More ARRAY/OBJECT entries at u.iAppend */ -#define JNODE_JSON 0x20 /* Treat REPLACE as JSON text */ +#define JNODE_LABEL 0x20 /* Is a label of an object */ /* A single node of parsed JSON */ struct JsonNode { @@ -240,12 +243,11 @@ ** Append a function parameter value to the JSON string under ** construction. */ static void jsonAppendValue( JsonString *p, /* Append to this JSON string */ - sqlite3_value *pValue, /* Value to append */ - u8 textIsJson /* Try to treat text values as JSON */ + sqlite3_value *pValue /* Value to append */ ){ switch( sqlite3_value_type(pValue) ){ case SQLITE_NULL: { jsonAppendRaw(p, "null", 4); break; @@ -258,11 +260,11 @@ break; } case SQLITE_TEXT: { const char *z = (const char*)sqlite3_value_text(pValue); u32 n = (u32)sqlite3_value_bytes(pValue); - if( textIsJson ){ + if( sqlite3_value_subtype(pValue)==JSON_SUBTYPE ){ jsonAppendRaw(p, z, n); }else{ jsonAppendString(p, z, n); } break; @@ -362,12 +364,11 @@ for(;;){ while( j<=pNode->n ){ if( pNode[j].jnFlags & (JNODE_REMOVE|JNODE_REPLACE) ){ if( pNode[j].jnFlags & JNODE_REPLACE ){ jsonAppendSeparator(pOut); - jsonAppendValue(pOut, aReplace[pNode[j].iVal], - (pNode[j].jnFlags & JNODE_JSON)!=0); + jsonAppendValue(pOut, aReplace[pNode[j].iVal]); } }else{ jsonAppendSeparator(pOut); jsonRenderNode(&pNode[j], pOut, aReplace); } @@ -388,12 +389,11 @@ if( (pNode[j+1].jnFlags & JNODE_REMOVE)==0 ){ jsonAppendSeparator(pOut); jsonRenderNode(&pNode[j], pOut, aReplace); jsonAppendChar(pOut, ':'); if( pNode[j+1].jnFlags & JNODE_REPLACE ){ - jsonAppendValue(pOut, aReplace[pNode[j+1].iVal], - (pNode[j+1].jnFlags & JNODE_JSON)!=0); + jsonAppendValue(pOut, aReplace[pNode[j+1].iVal]); }else{ jsonRenderNode(&pNode[j+1], pOut, aReplace); } } j += 1 + jsonNodeSize(&pNode[j+1]); @@ -418,10 +418,11 @@ ){ JsonString s; jsonInit(&s, pCtx); jsonRenderNode(pNode, &s, aReplace); jsonResult(&s); + sqlite3_result_subtype(pCtx, JSON_SUBTYPE); } /* ** Make the JsonNode the return value of the function. */ @@ -492,16 +493,16 @@ else if( c>='a' && c<='f' ) v = v*16 + c - 'a' + 10; else break; } if( v==0 ) break; if( v<=0x7f ){ - zOut[j++] = v; + zOut[j++] = (char)v; }else if( v<=0x7ff ){ - zOut[j++] = 0xc0 | (v>>6); + zOut[j++] = (char)(0xc0 | (v>>6)); zOut[j++] = 0x80 | (v&0x3f); }else{ - zOut[j++] = 0xe0 | (v>>12); + zOut[j++] = (char)(0xe0 | (v>>12)); zOut[j++] = 0x80 | ((v>>6)&0x3f); zOut[j++] = 0x80 | (v&0x3f); } }else{ if( c=='b' ){ @@ -581,10 +582,11 @@ static int jsonParseValue(JsonParse *pParse, u32 i){ char c; u32 j; int iThis; int x; + JsonNode *pNode; while( isspace(pParse->zJson[i]) ){ i++; } if( (c = pParse->zJson[i])==0 ) return 0; if( c=='{' ){ /* Parse object */ iThis = jsonParseAddNode(pParse, JSON_OBJECT, 0, 0); @@ -595,11 +597,13 @@ if( x<0 ){ if( x==(-2) && pParse->nNode==(u32)iThis+1 ) return j+1; return -1; } if( pParse->oom ) return -1; - if( pParse->aNode[pParse->nNode-1].eType!=JSON_STRING ) return -1; + pNode = &pParse->aNode[pParse->nNode-1]; + if( pNode->eType!=JSON_STRING ) return -1; + pNode->jnFlags |= JNODE_LABEL; j = x; while( isspace(pParse->zJson[j]) ){ j++; } if( pParse->zJson[j]!=':' ) return -1; j++; x = jsonParseValue(pParse, j); @@ -945,41 +949,26 @@ ** On an error, write an error message into pCtx and increment the ** pParse->nErr counter. ** ** If pApnd!=NULL then try to append missing nodes and set *pApnd = 1 if ** nodes are appended. -** -** If the path starts with $$ then set *pFlags to JNODE_REPLACE|JNODE_JSON -** as a single to the caller that the input text to be inserted should be -** interpreted as JSON rather than as ordinary text. */ static JsonNode *jsonLookup( JsonParse *pParse, /* The JSON to search */ const char *zPath, /* The path to search */ int *pApnd, /* Append nodes to complete path if not NULL */ - sqlite3_context *pCtx, /* Report errors here, if not NULL */ - u8 *pFlags /* Write JNODE_REPLACE or _REPLACE|_JSON here */ + sqlite3_context *pCtx /* Report errors here, if not NULL */ ){ const char *zErr = 0; JsonNode *pNode = 0; - u8 fg = JNODE_REPLACE; if( zPath==0 ) return 0; if( zPath[0]!='$' ){ zErr = zPath; goto lookup_err; } zPath++; - if( zPath[0]=='$' ){ - if( pFlags==0 ){ - zErr = zPath; - goto lookup_err; - } - zPath++; - fg = JNODE_REPLACE|JNODE_JSON; - } - if( pFlags ) *pFlags = fg; pNode = jsonLookupStep(pParse, 0, zPath, pApnd, &zErr); return pNode; lookup_err: pParse->nErr++; @@ -990,11 +979,10 @@ sqlite3_free(z); }else{ sqlite3_result_error_nomem(pCtx); } } - if( pFlags ) *pFlags = fg; return 0; } /* @@ -1034,49 +1022,41 @@ assert( argc==1 ); if( jsonParse(&x, ctx, (const char*)sqlite3_value_text(argv[0])) ) return; jsonParseFindParents(&x); jsonInit(&s, ctx); for(i=0; i2 ){ jsonAppendSeparator(&jx); if( pNode ){ jsonRenderNode(pNode, &jx, 0); @@ -1183,10 +1164,11 @@ } } if( argc>2 && i==argc ){ jsonAppendChar(&jx, ']'); jsonResult(&jx); + sqlite3_result_subtype(ctx, JSON_SUBTYPE); } jsonReset(&jx); jsonParseReset(&x); } @@ -1221,14 +1203,15 @@ jsonAppendSeparator(&jx); z = (const char*)sqlite3_value_text(argv[i]); n = (u32)sqlite3_value_bytes(argv[i]); jsonAppendString(&jx, z, n); jsonAppendChar(&jx, ':'); - jsonAppendValue(&jx, argv[i+1], 0); + jsonAppendValue(&jx, argv[i+1]); } jsonAppendChar(&jx, '}'); jsonResult(&jx); + sqlite3_result_subtype(ctx, JSON_SUBTYPE); } /* ** json_remove(JSON, PATH, ...) @@ -1250,11 +1233,11 @@ if( jsonParse(&x, ctx, (const char*)sqlite3_value_text(argv[0])) ) return; if( x.nNode ){ for(i=1; i<(u32)argc; i++){ zPath = (const char*)sqlite3_value_text(argv[i]); if( zPath==0 ) goto remove_done; - pNode = jsonLookup(&x, zPath, 0, ctx, 0); + pNode = jsonLookup(&x, zPath, 0, ctx); if( x.nErr ) goto remove_done; if( pNode ) pNode->jnFlags |= JNODE_REMOVE; } if( (x.aNode[0].jnFlags & JNODE_REMOVE)==0 ){ jsonReturnJson(x.aNode, ctx, 0); @@ -1286,18 +1269,16 @@ return; } if( jsonParse(&x, ctx, (const char*)sqlite3_value_text(argv[0])) ) return; if( x.nNode ){ for(i=1; i<(u32)argc; i+=2){ - u8 jnFlags = JNODE_REPLACE; zPath = (const char*)sqlite3_value_text(argv[i]); - pNode = jsonLookup(&x, zPath, 0, ctx, &jnFlags); + pNode = jsonLookup(&x, zPath, 0, ctx); if( x.nErr ) goto replace_err; if( pNode ){ - pNode->jnFlags &= ~JNODE_JSON; - pNode->jnFlags |= jnFlags; - pNode->iVal = i+1; + pNode->jnFlags |= (u8)JNODE_REPLACE; + pNode->iVal = (u8)(i+1); } } if( x.aNode[0].jnFlags & JNODE_REPLACE ){ sqlite3_result_value(ctx, argv[x.aNode[0].iVal]); }else{ @@ -1338,23 +1319,21 @@ return; } if( jsonParse(&x, ctx, (const char*)sqlite3_value_text(argv[0])) ) return; if( x.nNode ){ for(i=1; i<(u32)argc; i+=2){ - u8 jnFlags = JNODE_REPLACE; zPath = (const char*)sqlite3_value_text(argv[i]); bApnd = 0; - pNode = jsonLookup(&x, zPath, &bApnd, ctx, &jnFlags); + pNode = jsonLookup(&x, zPath, &bApnd, ctx); if( x.oom ){ sqlite3_result_error_nomem(ctx); goto jsonSetDone; }else if( x.nErr ){ goto jsonSetDone; }else if( pNode && (bApnd || bIsSet) ){ - pNode->jnFlags &= ~JNODE_JSON; - pNode->jnFlags |= jnFlags; - pNode->iVal = i+1; + pNode->jnFlags |= (u8)JNODE_REPLACE; + pNode->iVal = (u8)(i+1); } } if( x.aNode[0].jnFlags & JNODE_REPLACE ){ sqlite3_result_value(ctx, argv[x.aNode[0].iVal]); }else{ @@ -1383,11 +1362,11 @@ if( jsonParse(&x, ctx, (const char*)sqlite3_value_text(argv[0])) ) return; if( x.nNode ){ JsonNode *pNode; if( argc==2 ){ zPath = (const char*)sqlite3_value_text(argv[1]); - pNode = jsonLookup(&x, zPath, 0, ctx, 0); + pNode = jsonLookup(&x, zPath, 0, ctx); }else{ pNode = x.aNode; } if( pNode ){ sqlite3_result_text(ctx, jsonType[pNode->eType], -1, SQLITE_STATIC); @@ -1408,10 +1387,11 @@ sqlite3_value **argv ){ JsonParse x; /* The parse */ int rc = 0; + UNUSED_PARAM(argc); if( jsonParse(&x, 0, (const char*)sqlite3_value_text(argv[0]))==0 && x.nNode>0 ){ rc = 1; } @@ -1425,16 +1405,17 @@ ****************************************************************************/ typedef struct JsonEachCursor JsonEachCursor; struct JsonEachCursor { sqlite3_vtab_cursor base; /* Base class - must be first */ u32 iRowid; /* The rowid */ + u32 iBegin; /* The first node of the scan */ u32 i; /* Index in sParse.aNode[] of current row */ u32 iEnd; /* EOF when i equals or exceeds this value */ u8 eType; /* Type of top-level element */ u8 bRecursive; /* True for json_tree(). False for json_each() */ char *zJson; /* Input JSON */ - char *zPath; /* Path by which to filter zJson */ + char *zRoot; /* Path by which to filter zJson */ JsonParse sParse; /* Parse of the input JSON */ }; /* Constructor for the json_each virtual table */ static int jsonEachConnect( @@ -1453,20 +1434,21 @@ #define JEACH_TYPE 2 #define JEACH_ATOM 3 #define JEACH_ID 4 #define JEACH_PARENT 5 #define JEACH_FULLKEY 6 -#define JEACH_JSON 7 -#define JEACH_PATH 8 +#define JEACH_PATH 7 +#define JEACH_JSON 8 +#define JEACH_ROOT 9 UNUSED_PARAM(pzErr); UNUSED_PARAM(argv); UNUSED_PARAM(argc); UNUSED_PARAM(pAux); rc = sqlite3_declare_vtab(db, - "CREATE TABLE x(key,value,type,atom,id,parent,fullkey," - "json HIDDEN,path HIDDEN)"); + "CREATE TABLE x(key,value,type,atom,id,parent,fullkey,path," + "json HIDDEN,root HIDDEN)"); if( rc==SQLITE_OK ){ pNew = *ppVtab = sqlite3_malloc( sizeof(*pNew) ); if( pNew==0 ) return SQLITE_NOMEM; memset(pNew, 0, sizeof(*pNew)); } @@ -1503,18 +1485,18 @@ /* Reset a JsonEachCursor back to its original state. Free any memory ** held. */ static void jsonEachCursorReset(JsonEachCursor *p){ sqlite3_free(p->zJson); - sqlite3_free(p->zPath); + sqlite3_free(p->zRoot); jsonParseReset(&p->sParse); p->iRowid = 0; p->i = 0; p->iEnd = 0; p->eType = 0; p->zJson = 0; - p->zPath = 0; + p->zRoot = 0; } /* Destructor for a jsonEachCursor object */ static int jsonEachClose(sqlite3_vtab_cursor *cur){ JsonEachCursor *p = (JsonEachCursor*)cur; @@ -1532,20 +1514,14 @@ /* Advance the cursor to the next element for json_tree() */ static int jsonEachNext(sqlite3_vtab_cursor *cur){ JsonEachCursor *p = (JsonEachCursor*)cur; if( p->bRecursive ){ - if( p->i==0 ){ - p->i = 1; - }else{ - u32 iUp = p->sParse.aUp[p->i]; - JsonNode *pUp = &p->sParse.aNode[iUp]; - p->i++; - if( pUp->eType==JSON_OBJECT && (pUp->n + iUp >= p->i) ) p->i++; - } + if( p->sParse.aNode[p->i].jnFlags & JNODE_LABEL ) p->i++; + p->i++; p->iRowid++; - if( p->isParse.nNode ){ + if( p->iiEnd ){ u32 iUp = p->sParse.aUp[p->i]; JsonNode *pUp = &p->sParse.aNode[iUp]; p->eType = pUp->eType; if( pUp->eType==JSON_ARRAY ){ if( iUp==p->i-1 ){ @@ -1595,12 +1571,13 @@ pUp = &p->sParse.aNode[iUp]; if( pUp->eType==JSON_ARRAY ){ jsonPrintf(30, pStr, "[%d]", pUp->u.iKey); }else{ assert( pUp->eType==JSON_OBJECT ); - if( pNode->eType>=JSON_ARRAY ) pNode--; + if( (pNode->jnFlags & JNODE_LABEL)==0 ) pNode--; assert( pNode->eType==JSON_STRING ); + assert( pNode->jnFlags & JNODE_LABEL ); jsonPrintf(pNode->n+1, pStr, ".%.*s", pNode->n-2, pNode->u.zJContent+1); } } /* Return the value of a column */ @@ -1627,31 +1604,32 @@ sqlite3_result_int64(ctx, (sqlite3_int64)iKey); } break; } case JEACH_VALUE: { - if( p->eType==JSON_OBJECT && p->i>0 ) pThis++; + if( pThis->jnFlags & JNODE_LABEL ) pThis++; jsonReturn(pThis, ctx, 0); break; } case JEACH_TYPE: { - if( p->eType==JSON_OBJECT && p->i>0 ) pThis++; + if( pThis->jnFlags & JNODE_LABEL ) pThis++; sqlite3_result_text(ctx, jsonType[pThis->eType], -1, SQLITE_STATIC); break; } case JEACH_ATOM: { - if( p->eType==JSON_OBJECT && p->i>0 ) pThis++; + if( pThis->jnFlags & JNODE_LABEL ) pThis++; if( pThis->eType>=JSON_ARRAY ) break; jsonReturn(pThis, ctx, 0); break; } case JEACH_ID: { - sqlite3_result_int64(ctx, (sqlite3_int64)p->i + (p->eType==JSON_OBJECT)); + sqlite3_result_int64(ctx, + (sqlite3_int64)p->i + ((pThis->jnFlags & JNODE_LABEL)!=0)); break; } case JEACH_PARENT: { - if( p->i>0 && p->bRecursive ){ + if( p->i>p->iBegin && p->bRecursive ){ sqlite3_result_int64(ctx, (sqlite3_int64)p->sParse.aUp[p->i]); } break; } case JEACH_FULLKEY: { @@ -1658,12 +1636,12 @@ JsonString x; jsonInit(&x, ctx); if( p->bRecursive ){ jsonEachComputePath(p, &x, p->i); }else{ - if( p->zPath ){ - jsonAppendRaw(&x, p->zPath, (int)strlen(p->zPath)); + if( p->zRoot ){ + jsonAppendRaw(&x, p->zRoot, (int)strlen(p->zRoot)); }else{ jsonAppendChar(&x, '$'); } if( p->eType==JSON_ARRAY ){ jsonPrintf(30, &x, "[%d]", p->iRowid); @@ -1673,22 +1651,24 @@ } jsonResult(&x); break; } case JEACH_PATH: { - const char *zPath = p->zPath; - if( zPath==0 ){ - if( p->bRecursive ){ - JsonString x; - jsonInit(&x, ctx); - jsonEachComputePath(p, &x, p->sParse.aUp[p->i]); - jsonResult(&x); - break; - } - zPath = "$"; - } - sqlite3_result_text(ctx, zPath, -1, SQLITE_STATIC); + if( p->bRecursive ){ + JsonString x; + jsonInit(&x, ctx); + jsonEachComputePath(p, &x, p->sParse.aUp[p->i]); + jsonResult(&x); + break; + } + /* For json_each() path and root are the same so fall through + ** into the root case */ + } + case JEACH_ROOT: { + const char *zRoot = p->zRoot; + if( zRoot==0 ) zRoot = "$"; + sqlite3_result_text(ctx, zRoot, -1, SQLITE_STATIC); break; } default: { assert( i==JEACH_JSON ); sqlite3_result_text(ctx, p->sParse.zJson, -1, SQLITE_STATIC); @@ -1705,30 +1685,30 @@ return SQLITE_OK; } /* The query strategy is to look for an equality constraint on the json ** column. Without such a constraint, the table cannot operate. idxNum is -** 1 if the constraint is found, 3 if the constraint and zPath are found, +** 1 if the constraint is found, 3 if the constraint and zRoot are found, ** and 0 otherwise. */ static int jsonEachBestIndex( sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo ){ int i; int jsonIdx = -1; - int pathIdx = -1; + int rootIdx = -1; const struct sqlite3_index_constraint *pConstraint; UNUSED_PARAM(tab); pConstraint = pIdxInfo->aConstraint; for(i=0; inConstraint; i++, pConstraint++){ if( pConstraint->usable==0 ) continue; if( pConstraint->op!=SQLITE_INDEX_CONSTRAINT_EQ ) continue; switch( pConstraint->iColumn ){ case JEACH_JSON: jsonIdx = i; break; - case JEACH_PATH: pathIdx = i; break; + case JEACH_ROOT: rootIdx = i; break; default: /* no-op */ break; } } if( jsonIdx<0 ){ pIdxInfo->idxNum = 0; @@ -1735,15 +1715,15 @@ pIdxInfo->estimatedCost = 1e99; }else{ pIdxInfo->estimatedCost = 1.0; pIdxInfo->aConstraintUsage[jsonIdx].argvIndex = 1; pIdxInfo->aConstraintUsage[jsonIdx].omit = 1; - if( pathIdx<0 ){ + if( rootIdx<0 ){ pIdxInfo->idxNum = 1; }else{ - pIdxInfo->aConstraintUsage[pathIdx].argvIndex = 2; - pIdxInfo->aConstraintUsage[pathIdx].omit = 1; + pIdxInfo->aConstraintUsage[rootIdx].argvIndex = 2; + pIdxInfo->aConstraintUsage[rootIdx].omit = 1; pIdxInfo->idxNum = 3; } } return SQLITE_OK; } @@ -1754,25 +1734,25 @@ int idxNum, const char *idxStr, int argc, sqlite3_value **argv ){ JsonEachCursor *p = (JsonEachCursor*)cur; const char *z; - const char *zPath; + const char *zRoot = 0; sqlite3_int64 n; UNUSED_PARAM(idxStr); UNUSED_PARAM(argc); jsonEachCursorReset(p); if( idxNum==0 ) return SQLITE_OK; z = (const char*)sqlite3_value_text(argv[0]); if( z==0 ) return SQLITE_OK; if( idxNum&2 ){ - zPath = (const char*)sqlite3_value_text(argv[1]); - if( zPath==0 ) return SQLITE_OK; - if( zPath[0]!='$' ){ + zRoot = (const char*)sqlite3_value_text(argv[1]); + if( zRoot==0 ) return SQLITE_OK; + if( zRoot[0]!='$' ){ sqlite3_free(cur->pVtab->zErrMsg); - cur->pVtab->zErrMsg = jsonPathSyntaxError(zPath); + cur->pVtab->zErrMsg = jsonPathSyntaxError(zRoot); return cur->pVtab->zErrMsg ? SQLITE_ERROR : SQLITE_NOMEM; } } n = sqlite3_value_bytes(argv[0]); p->zJson = sqlite3_malloc64( n+1 ); @@ -1792,16 +1772,15 @@ return SQLITE_NOMEM; }else{ JsonNode *pNode; if( idxNum==3 ){ const char *zErr = 0; - p->bRecursive = 0; n = sqlite3_value_bytes(argv[1]); - p->zPath = sqlite3_malloc64( n+1 ); - if( p->zPath==0 ) return SQLITE_NOMEM; - memcpy(p->zPath, zPath, (size_t)n+1); - pNode = jsonLookupStep(&p->sParse, 0, p->zPath+1, 0, &zErr); + p->zRoot = sqlite3_malloc64( n+1 ); + if( p->zRoot==0 ) return SQLITE_NOMEM; + memcpy(p->zRoot, zRoot, (size_t)n+1); + pNode = jsonLookupStep(&p->sParse, 0, p->zRoot+1, 0, &zErr); if( p->sParse.nErr ){ sqlite3_free(cur->pVtab->zErrMsg); cur->pVtab->zErrMsg = jsonPathSyntaxError(zErr); jsonEachCursorReset(p); return cur->pVtab->zErrMsg ? SQLITE_ERROR : SQLITE_NOMEM; @@ -1809,16 +1788,22 @@ return SQLITE_OK; } }else{ pNode = p->sParse.aNode; } - p->i = (int)(pNode - p->sParse.aNode); + p->iBegin = p->i = (int)(pNode - p->sParse.aNode); p->eType = pNode->eType; if( p->eType>=JSON_ARRAY ){ pNode->u.iKey = 0; p->iEnd = p->i + pNode->n + 1; - if( !p->bRecursive ) p->i++; + if( p->bRecursive ){ + if( p->i>0 && (p->sParse.aNode[p->i-1].jnFlags & JNODE_LABEL)!=0 ){ + p->i--; + } + }else{ + p->i++; + } }else{ p->iEnd = p->i+1; } } return p->sParse.oom ? SQLITE_NOMEM : SQLITE_OK; @@ -1899,10 +1884,11 @@ const char *zName; int nArg; int flag; void (*xFunc)(sqlite3_context*,int,sqlite3_value**); } aFunc[] = { + { "json", 1, 0, jsonRemoveFunc }, { "json_array", -1, 0, jsonArrayFunc }, { "json_array_length", 1, 0, jsonArrayLengthFunc }, { "json_array_length", 2, 0, jsonArrayLengthFunc }, { "json_extract", -1, 0, jsonExtractFunc }, { "json_insert", -1, 0, jsonSetFunc }, @@ -1916,11 +1902,10 @@ #if SQLITE_DEBUG /* DEBUG and TESTING functions */ { "json_parse", 1, 0, jsonParseFunc }, { "json_test1", 1, 0, jsonTest1Func }, - { "json_nodecount", 1, 0, jsonNodeCountFunc }, #endif }; #ifndef SQLITE_OMIT_VIRTUALTABLE static const struct { const char *zName; Index: main.mk ================================================================== --- main.mk +++ main.mk @@ -45,10 +45,11 @@ # This is how we compile # TCCX = $(TCC) $(OPTS) -I. -I$(TOP)/src -I$(TOP) TCCX += -I$(TOP)/ext/rtree -I$(TOP)/ext/icu -I$(TOP)/ext/fts3 TCCX += -I$(TOP)/ext/async -I$(TOP)/ext/userauth +TCCX += -I$(TOP)/ext/fts5 # Object files for the SQLite library. # LIBOBJ+= vdbe.o parse.o \ alter.o analyze.o attach.o auth.o \ @@ -229,10 +230,33 @@ SRC += \ $(TOP)/ext/rbu/sqlite3rbu.c \ $(TOP)/ext/rbu/sqlite3rbu.h + +# FTS5 things +# +FTS5_HDR = \ + $(TOP)/ext/fts5/fts5.h \ + $(TOP)/ext/fts5/fts5Int.h \ + fts5parse.h + +FTS5_SRC = \ + $(TOP)/ext/fts5/fts5_aux.c \ + $(TOP)/ext/fts5/fts5_buffer.c \ + $(TOP)/ext/fts5/fts5_main.c \ + $(TOP)/ext/fts5/fts5_config.c \ + $(TOP)/ext/fts5/fts5_expr.c \ + $(TOP)/ext/fts5/fts5_hash.c \ + $(TOP)/ext/fts5/fts5_index.c \ + fts5parse.c \ + $(TOP)/ext/fts5/fts5_storage.c \ + $(TOP)/ext/fts5/fts5_tokenize.c \ + $(TOP)/ext/fts5/fts5_unicode2.c \ + $(TOP)/ext/fts5/fts5_varint.c \ + $(TOP)/ext/fts5/fts5_vocab.c \ + # Generated source code files # SRC += \ keywordhash.h \ @@ -543,11 +567,11 @@ parse.h: parse.c parse.c: $(TOP)/src/parse.y lemon $(TOP)/addopcodes.awk cp $(TOP)/src/parse.y . rm -f parse.h - ./lemon $(OPTS) parse.y + ./lemon -s $(OPTS) parse.y mv parse.h parse.h.temp $(NAWK) -f $(TOP)/addopcodes.awk parse.h.temp >parse.h sqlite3.h: $(TOP)/src/sqlite.h.in $(TOP)/manifest.uuid $(TOP)/VERSION $(TOP)/ext/rtree/sqlite3rtree.h tclsh $(TOP)/tool/mksqlite3h.tcl $(TOP) >sqlite3.h @@ -621,40 +645,20 @@ $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_write.c rtree.o: $(TOP)/ext/rtree/rtree.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/rtree/rtree.c -# FTS5 things -# -FTS5_SRC = \ - $(TOP)/ext/fts5/fts5.h \ - $(TOP)/ext/fts5/fts5Int.h \ - $(TOP)/ext/fts5/fts5_aux.c \ - $(TOP)/ext/fts5/fts5_buffer.c \ - $(TOP)/ext/fts5/fts5_main.c \ - $(TOP)/ext/fts5/fts5_config.c \ - $(TOP)/ext/fts5/fts5_expr.c \ - $(TOP)/ext/fts5/fts5_hash.c \ - $(TOP)/ext/fts5/fts5_index.c \ - fts5parse.c fts5parse.h \ - $(TOP)/ext/fts5/fts5_storage.c \ - $(TOP)/ext/fts5/fts5_tokenize.c \ - $(TOP)/ext/fts5/fts5_unicode2.c \ - $(TOP)/ext/fts5/fts5_varint.c \ - $(TOP)/ext/fts5/fts5_vocab.c \ - fts5parse.c: $(TOP)/ext/fts5/fts5parse.y lemon cp $(TOP)/ext/fts5/fts5parse.y . rm -f fts5parse.h ./lemon $(OPTS) fts5parse.y fts5parse.h: fts5parse.c -fts5.c: $(FTS5_SRC) +fts5.c: $(FTS5_SRC) $(FTS5_HDR) tclsh $(TOP)/ext/fts5/tool/mkfts5c.tcl cp $(TOP)/ext/fts5/fts5.h . - userauth.o: $(TOP)/ext/userauth/userauth.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/userauth/userauth.c sqlite3rbu.o: $(TOP)/ext/rbu/sqlite3rbu.c $(HDR) $(EXTHDR) Index: src/analyze.c ================================================================== --- src/analyze.c +++ src/analyze.c @@ -1184,10 +1184,11 @@ Index *pPk = sqlite3PrimaryKeyIndex(pIdx->pTable); int j, k, regKey; regKey = sqlite3GetTempRange(pParse, pPk->nKeyCol); for(j=0; jnKeyCol; j++){ k = sqlite3ColumnOfIndex(pIdx, pPk->aiColumn[j]); + assert( k>=0 && knCol ); sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, k, regKey+j); VdbeComment((v, "%s", pTab->aCol[pPk->aiColumn[j]].zName)); } sqlite3VdbeAddOp3(v, OP_MakeRecord, regKey, pPk->nKeyCol, regRowid); sqlite3ReleaseTempRange(pParse, regKey, pPk->nKeyCol); @@ -1233,16 +1234,14 @@ /* We know that the regSampleRowid row exists because it was read by ** the previous loop. Thus the not-found jump of seekOp will never ** be taken */ VdbeCoverageNeverTaken(v); #ifdef SQLITE_ENABLE_STAT3 - sqlite3ExprCodeGetColumnOfTable(v, pTab, iTabCur, - pIdx->aiColumn[0], regSample); + sqlite3ExprCodeLoadIndexColumn(pParse, pIdx, iTabCur, 0, regSample); #else for(i=0; iaiColumn[i]; - sqlite3ExprCodeGetColumnOfTable(v, pTab, iTabCur, iCol, regCol+i); + sqlite3ExprCodeLoadIndexColumn(pParse, pIdx, iTabCur, i, regCol+i); } sqlite3VdbeAddOp3(v, OP_MakeRecord, regCol, nCol, regSample); #endif sqlite3VdbeAddOp3(v, OP_MakeRecord, regTabname, 6, regTemp); sqlite3VdbeAddOp2(v, OP_NewRowid, iStatCur+1, regNewRowid); Index: src/btree.c ================================================================== --- src/btree.c +++ src/btree.c @@ -595,10 +595,53 @@ pCur->apPage[i] = 0; } pCur->iPage = -1; } +/* +** The cursor passed as the only argument must point to a valid entry +** when this function is called (i.e. have eState==CURSOR_VALID). This +** function saves the current cursor key in variables pCur->nKey and +** pCur->pKey. SQLITE_OK is returned if successful or an SQLite error +** code otherwise. +** +** If the cursor is open on an intkey table, then the integer key +** (the rowid) is stored in pCur->nKey and pCur->pKey is left set to +** NULL. If the cursor is open on a non-intkey table, then pCur->pKey is +** set to point to a malloced buffer pCur->nKey bytes in size containing +** the key. +*/ +static int saveCursorKey(BtCursor *pCur){ + int rc; + assert( CURSOR_VALID==pCur->eState ); + assert( 0==pCur->pKey ); + assert( cursorHoldsMutex(pCur) ); + + rc = sqlite3BtreeKeySize(pCur, &pCur->nKey); + assert( rc==SQLITE_OK ); /* KeySize() cannot fail */ + + /* If this is an intKey table, then the above call to BtreeKeySize() + ** stores the integer key in pCur->nKey. In this case this value is + ** all that is required. Otherwise, if pCur is not open on an intKey + ** table, then malloc space for and store the pCur->nKey bytes of key + ** data. */ + if( 0==pCur->curIntKey ){ + void *pKey = sqlite3Malloc( pCur->nKey ); + if( pKey ){ + rc = sqlite3BtreeKey(pCur, 0, (int)pCur->nKey, pKey); + if( rc==SQLITE_OK ){ + pCur->pKey = pKey; + }else{ + sqlite3_free(pKey); + } + }else{ + rc = SQLITE_NOMEM; + } + } + assert( !pCur->curIntKey || !pCur->pKey ); + return rc; +} /* ** Save the current cursor position in the variables BtCursor.nKey ** and BtCursor.pKey. The cursor's state is set to CURSOR_REQUIRESEEK. ** @@ -615,34 +658,12 @@ if( pCur->eState==CURSOR_SKIPNEXT ){ pCur->eState = CURSOR_VALID; }else{ pCur->skipNext = 0; } - rc = sqlite3BtreeKeySize(pCur, &pCur->nKey); - assert( rc==SQLITE_OK ); /* KeySize() cannot fail */ - - /* If this is an intKey table, then the above call to BtreeKeySize() - ** stores the integer key in pCur->nKey. In this case this value is - ** all that is required. Otherwise, if pCur is not open on an intKey - ** table, then malloc space for and store the pCur->nKey bytes of key - ** data. - */ - if( 0==pCur->curIntKey ){ - void *pKey = sqlite3Malloc( pCur->nKey ); - if( pKey ){ - rc = sqlite3BtreeKey(pCur, 0, (int)pCur->nKey, pKey); - if( rc==SQLITE_OK ){ - pCur->pKey = pKey; - }else{ - sqlite3_free(pKey); - } - }else{ - rc = SQLITE_NOMEM; - } - } - assert( !pCur->curIntKey || !pCur->pKey ); - + + rc = saveCursorKey(pCur); if( rc==SQLITE_OK ){ btreeReleaseAllCursorPages(pCur); pCur->eState = CURSOR_REQUIRESEEK; } @@ -8051,22 +8072,28 @@ end_insert: return rc; } /* -** Delete the entry that the cursor is pointing to. The cursor -** is left pointing at an arbitrary location. +** Delete the entry that the cursor is pointing to. +** +** If the second parameter is zero, then the cursor is left pointing at an +** arbitrary location after the delete. If it is non-zero, then the cursor +** is left in a state such that the next call to BtreeNext() or BtreePrev() +** moves it to the same row as it would if the call to BtreeDelete() had +** been omitted. */ -int sqlite3BtreeDelete(BtCursor *pCur){ +int sqlite3BtreeDelete(BtCursor *pCur, int bPreserve){ Btree *p = pCur->pBtree; BtShared *pBt = p->pBt; int rc; /* Return code */ MemPage *pPage; /* Page to delete cell from */ unsigned char *pCell; /* Pointer to cell to delete */ int iCellIdx; /* Index of cell to delete */ int iCellDepth; /* Depth of node containing pCell */ u16 szCell; /* Size of the cell being deleted */ + int bSkipnext = 0; /* Leaf cursor in SKIPNEXT state */ assert( cursorHoldsMutex(pCur) ); assert( pBt->inTransaction==TRANS_WRITE ); assert( (pBt->btsFlags & BTS_READ_ONLY)==0 ); assert( pCur->curFlags & BTCF_WriteFlag ); @@ -8092,14 +8119,11 @@ rc = sqlite3BtreePrevious(pCur, ¬Used); if( rc ) return rc; } /* Save the positions of any other cursors open on this table before - ** making any modifications. Make the page containing the entry to be - ** deleted writable. Then free any overflow pages associated with the - ** entry and finally remove the cell itself from within the page. - */ + ** making any modifications. */ if( pCur->curFlags & BTCF_Multiple ){ rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur); if( rc ) return rc; } @@ -8107,10 +8131,35 @@ ** invalidate any incrblob cursors open on the row being deleted. */ if( pCur->pKeyInfo==0 ){ invalidateIncrblobCursors(p, pCur->info.nKey, 0); } + /* If the bPreserve flag is set to true, then the cursor position must + ** be preserved following this delete operation. If the current delete + ** will cause a b-tree rebalance, then this is done by saving the cursor + ** key and leaving the cursor in CURSOR_REQUIRESEEK state before + ** returning. + ** + ** Or, if the current delete will not cause a rebalance, then the cursor + ** will be left in CURSOR_SKIPNEXT state pointing to the entry immediately + ** before or after the deleted entry. In this case set bSkipnext to true. */ + if( bPreserve ){ + if( !pPage->leaf + || (pPage->nFree+cellSizePtr(pPage,pCell)+2)>(int)(pBt->usableSize*2/3) + ){ + /* A b-tree rebalance will be required after deleting this entry. + ** Save the cursor key. */ + rc = saveCursorKey(pCur); + if( rc ) return rc; + }else{ + bSkipnext = 1; + } + } + + /* Make the page containing the entry to be deleted writable. Then free any + ** overflow pages associated with the entry and finally remove the cell + ** itself from within the page. */ rc = sqlite3PagerWrite(pPage->pDbPage); if( rc ) return rc; rc = clearCell(pPage, pCell, &szCell); dropCell(pPage, iCellIdx, szCell, &rc); if( rc ) return rc; @@ -8160,11 +8209,26 @@ } rc = balance(pCur); } if( rc==SQLITE_OK ){ - moveToRoot(pCur); + if( bSkipnext ){ + assert( bPreserve && pCur->iPage==iCellDepth ); + assert( pPage->nCell>0 && iCellIdx<=pPage->nCell ); + pCur->eState = CURSOR_SKIPNEXT; + if( iCellIdx>=pPage->nCell ){ + pCur->skipNext = -1; + pCur->aiIdx[iCellDepth] = pPage->nCell-1; + }else{ + pCur->skipNext = 1; + } + }else{ + rc = moveToRoot(pCur); + if( bPreserve ){ + pCur->eState = CURSOR_REQUIRESEEK; + } + } } return rc; } /* Index: src/btree.h ================================================================== --- src/btree.h +++ src/btree.h @@ -183,11 +183,11 @@ int bias, int *pRes ); int sqlite3BtreeCursorHasMoved(BtCursor*); int sqlite3BtreeCursorRestore(BtCursor*, int*); -int sqlite3BtreeDelete(BtCursor*); +int sqlite3BtreeDelete(BtCursor*, int); int sqlite3BtreeInsert(BtCursor*, const void *pKey, i64 nKey, const void *pData, int nData, int nZero, int bias, int seekResult); int sqlite3BtreeFirst(BtCursor*, int *pRes); int sqlite3BtreeLast(BtCursor*, int *pRes); Index: src/build.c ================================================================== --- src/build.c +++ src/build.c @@ -355,26 +355,28 @@ p = sqlite3FindTable(pParse->db, zName, zDbase); if( p==0 ){ const char *zMsg = isView ? "no such view" : "no such table"; #ifndef SQLITE_OMIT_VIRTUALTABLE - /* If zName is the not the name of a table in the schema created using - ** CREATE, then check to see if it is the name of an virtual table that - ** can be an eponymous virtual table. */ - Module *pMod = (Module*)sqlite3HashFind(&pParse->db->aModule, zName); - if( pMod && sqlite3VtabEponymousTableInit(pParse, pMod) ){ - return pMod->pEpoTab; + if( sqlite3FindDbName(pParse->db, zDbase)<1 ){ + /* If zName is the not the name of a table in the schema created using + ** CREATE, then check to see if it is the name of an virtual table that + ** can be an eponymous virtual table. */ + Module *pMod = (Module*)sqlite3HashFind(&pParse->db->aModule, zName); + if( pMod && sqlite3VtabEponymousTableInit(pParse, pMod) ){ + return pMod->pEpoTab; + } } #endif if( zDbase ){ sqlite3ErrorMsg(pParse, "%s: %s.%s", zMsg, zDbase, zName); }else{ sqlite3ErrorMsg(pParse, "%s: %s", zMsg, zName); } pParse->checkSchema = 1; } -#if SQLITE_USER_AUTHENICATION +#if SQLITE_USER_AUTHENTICATION else if( pParse->db->auth.authLevelpPartIdxWhere); + sqlite3ExprListDelete(db, p->aColExpr); sqlite3DbFree(db, p->zColAff); if( p->isResized ) sqlite3DbFree(db, p->azColl); #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 sqlite3_free(p->aiRowEst); #endif @@ -981,10 +984,12 @@ */ if( !db->init.busy && (v = sqlite3GetVdbe(pParse))!=0 ){ int j1; int fileFormat; int reg1, reg2, reg3; + /* nullRow[] is an OP_Record encoding of a row containing 5 NULLs */ + static const char nullRow[] = { 6, 0, 0, 0, 0, 0 }; sqlite3BeginWriteOperation(pParse, 1, iDb); #ifndef SQLITE_OMIT_VIRTUALTABLE if( isVirtual ){ sqlite3VdbeAddOp0(v, OP_VBegin); @@ -1025,11 +1030,11 @@ { pParse->addrCrTab = sqlite3VdbeAddOp2(v, OP_CreateTable, iDb, reg2); } sqlite3OpenMasterTable(pParse, iDb); sqlite3VdbeAddOp2(v, OP_NewRowid, 0, reg1); - sqlite3VdbeAddOp2(v, OP_Null, 0, reg3); + sqlite3VdbeAddOp4(v, OP_Blob, 6, reg3, 0, nullRow, P4_STATIC); sqlite3VdbeAddOp3(v, OP_Insert, 0, reg3, reg1); sqlite3VdbeChangeP5(v, OPFLAG_APPEND); sqlite3VdbeAddOp0(v, OP_Close); } @@ -1307,11 +1312,12 @@ nTerm = 1; }else{ nTerm = pList->nExpr; for(i=0; ia[i].pExpr); - if( pCExpr && pCExpr->op==TK_ID ){ + assert( pCExpr!=0 ); + if( pCExpr->op==TK_ID ){ const char *zCName = pCExpr->u.zToken; for(iCol=0; iColnCol; iCol++){ if( sqlite3StrICmp(zCName, pTab->aCol[iCol].zName)==0 ){ pTab->aCol[iCol].colFlags |= COLFLAG_PRIMKEY; zType = pTab->aCol[iCol].zType; @@ -2845,10 +2851,34 @@ p->nKeyCol = nCol - 1; *ppExtra = ((char*)p) + nByte; } return p; } + +/* +** Backwards Compatibility Hack: +** +** Historical versions of SQLite accepted strings as column names in +** indexes and PRIMARY KEY constraints and in UNIQUE constraints. Example: +** +** CREATE TABLE xyz(a,b,c,d,e,PRIMARY KEY('a'),UNIQUE('b','c' COLLATE trim) +** CREATE INDEX abc ON xyz('c','d' DESC,'e' COLLATE nocase DESC); +** +** This is goofy. But to preserve backwards compatibility we continue to +** accept it. This routine does the necessary conversion. It converts +** the expression given in its argument from a TK_STRING into a TK_ID +** if the expression is just a TK_STRING with an optional COLLATE clause. +** If the epxression is anything other than TK_STRING, the expression is +** unchanged. +*/ +static void sqlite3StringToId(Expr *p){ + if( p->op==TK_STRING ){ + p->op = TK_ID; + }else if( p->op==TK_COLLATE && p->pLeft->op==TK_STRING ){ + p->pLeft->op = TK_ID; + } +} /* ** Create a new index for an SQL table. pName1.pName2 is the name of the index ** and pTblList is the name of the table that is to be indexed. Both will ** be NULL for a primary key or an index that is created to satisfy a @@ -2887,11 +2917,10 @@ sqlite3 *db = pParse->db; Db *pDb; /* The specific table containing the indexed database */ int iDb; /* Index of the database that is being written */ Token *pName = 0; /* Unqualified name of the index to create */ struct ExprList_item *pListItem; /* For looping over pList */ - const Column *pTabCol; /* A column in the table */ int nExtra = 0; /* Space allocated for zExtra[] */ int nExtraCol; /* Number of extra columns needed */ char *zExtra = 0; /* Extra space after the Index object */ Index *pPk = 0; /* PRIMARY KEY index for WITHOUT ROWID tables */ @@ -3059,11 +3088,12 @@ /* Figure out how many bytes of space are required to store explicitly ** specified collation sequence names. */ for(i=0; inExpr; i++){ Expr *pExpr = pList->a[i].pExpr; - if( pExpr && pExpr->op==TK_COLLATE ){ + assert( pExpr!=0 ); + if( pExpr->op==TK_COLLATE ){ nExtra += (1 + sqlite3Strlen30(pExpr->u.zToken)); } } /* @@ -3100,67 +3130,85 @@ sortOrderMask = -1; /* Honor DESC */ }else{ sortOrderMask = 0; /* Ignore DESC */ } - /* Scan the names of the columns of the table to be indexed and - ** load the column indices into the Index structure. Report an error - ** if any column is not found. + /* Analyze the list of expressions that form the terms of the index and + ** report any errors. In the common case where the expression is exactly + ** a table column, store that column in aiColumn[]. For general expressions, + ** populate pIndex->aColExpr and store -2 in aiColumn[]. ** - ** TODO: Add a test to make sure that the same column is not named - ** more than once within the same index. Only the first instance of - ** the column will ever be used by the optimizer. Note that using the - ** same column more than once cannot be an error because that would - ** break backwards compatibility - it needs to be a warning. + ** TODO: Issue a warning if two or more columns of the index are identical. + ** TODO: Issue a warning if the table primary key is used as part of the + ** index key. */ for(i=0, pListItem=pList->a; inExpr; i++, pListItem++){ - const char *zColName; - Expr *pCExpr; - int requestedSortOrder; + Expr *pCExpr; /* The i-th index expression */ + int requestedSortOrder; /* ASC or DESC on the i-th expression */ char *zColl; /* Collation sequence name */ + sqlite3StringToId(pListItem->pExpr); + sqlite3ResolveSelfReference(pParse, pTab, NC_IdxExpr, pListItem->pExpr, 0); + if( pParse->nErr ) goto exit_create_index; pCExpr = sqlite3ExprSkipCollate(pListItem->pExpr); - if( pCExpr->op!=TK_ID ){ - sqlite3ErrorMsg(pParse, "indexes on expressions not yet supported"); - continue; - } - zColName = pCExpr->u.zToken; - for(j=0, pTabCol=pTab->aCol; jnCol; j++, pTabCol++){ - if( sqlite3StrICmp(zColName, pTabCol->zName)==0 ) break; - } - if( j>=pTab->nCol ){ - sqlite3ErrorMsg(pParse, "table %s has no column named %s", - pTab->zName, zColName); - pParse->checkSchema = 1; - goto exit_create_index; - } - assert( j<=0x7fff ); - pIndex->aiColumn[i] = (i16)j; + if( pCExpr->op!=TK_COLUMN ){ + if( pTab==pParse->pNewTable ){ + sqlite3ErrorMsg(pParse, "expressions prohibited in PRIMARY KEY and " + "UNIQUE constraints"); + goto exit_create_index; + } + if( pIndex->aColExpr==0 ){ + ExprList *pCopy = sqlite3ExprListDup(db, pList, 0); + pIndex->aColExpr = pCopy; + if( !db->mallocFailed ){ + assert( pCopy!=0 ); + pListItem = &pCopy->a[i]; + } + } + j = -2; + pIndex->aiColumn[i] = -2; + pIndex->uniqNotNull = 0; + }else{ + j = pCExpr->iColumn; + assert( j<=0x7fff ); + if( j<0 ){ + j = pTab->iPKey; + }else if( pTab->aCol[j].notNull==0 ){ + pIndex->uniqNotNull = 0; + } + pIndex->aiColumn[i] = (i16)j; + } + zColl = 0; if( pListItem->pExpr->op==TK_COLLATE ){ int nColl; zColl = pListItem->pExpr->u.zToken; nColl = sqlite3Strlen30(zColl) + 1; assert( nExtra>=nColl ); memcpy(zExtra, zColl, nColl); zColl = zExtra; zExtra += nColl; nExtra -= nColl; - }else{ + }else if( j>=0 ){ zColl = pTab->aCol[j].zColl; - if( !zColl ) zColl = "BINARY"; } + if( !zColl ) zColl = "BINARY"; if( !db->init.busy && !sqlite3LocateCollSeq(pParse, zColl) ){ goto exit_create_index; } pIndex->azColl[i] = zColl; requestedSortOrder = pListItem->sortOrder & sortOrderMask; pIndex->aSortOrder[i] = (u8)requestedSortOrder; - if( pTab->aCol[j].notNull==0 ) pIndex->uniqNotNull = 0; } + + /* Append the table key to the end of the index. For WITHOUT ROWID + ** tables (when pPk!=0) this will be the declared PRIMARY KEY. For + ** normal tables (when pPk==0) this will be the rowid. + */ if( pPk ){ for(j=0; jnKeyCol; j++){ int x = pPk->aiColumn[j]; + assert( x>=0 ); if( hasColumn(pIndex->aiColumn, pIndex->nKeyCol, x) ){ pIndex->nColumn--; }else{ pIndex->aiColumn[i] = x; pIndex->azColl[i] = pPk->azColl[j]; @@ -3207,10 +3255,11 @@ if( pIdx->nKeyCol!=pIndex->nKeyCol ) continue; for(k=0; knKeyCol; k++){ const char *z1; const char *z2; + assert( pIdx->aiColumn[k]>=0 ); if( pIdx->aiColumn[k]!=pIndex->aiColumn[k] ) break; z1 = pIdx->azColl[k]; z2 = pIndex->azColl[k]; if( z1!=z2 && sqlite3StrICmp(z1, z2) ) break; } @@ -3238,10 +3287,11 @@ } /* Link the new Index structure to its table and to the other ** in-memory database structures. */ + assert( pParse->nErr==0 ); if( db->init.busy ){ Index *p; assert( sqlite3SchemaMutexHeld(db, 0, pIndex->pSchema) ); p = sqlite3HashInsert(&pIndex->pSchema->idxHash, pIndex->zName, pIndex); @@ -3267,11 +3317,11 @@ ** If pTblName==0 it means this index is generated as an implied PRIMARY KEY ** or UNIQUE index in a CREATE TABLE statement. Since the table ** has just been created, it contains no data and the index initialization ** step can be skipped. */ - else if( pParse->nErr==0 && (HasRowid(pTab) || pTblName!=0) ){ + else if( HasRowid(pTab) || pTblName!=0 ){ Vdbe *v; char *zStmt; int iMem = ++pParse->nMem; v = sqlite3GetVdbe(pParse); @@ -4097,16 +4147,20 @@ int j; StrAccum errMsg; Table *pTab = pIdx->pTable; sqlite3StrAccumInit(&errMsg, pParse->db, 0, 0, 200); - for(j=0; jnKeyCol; j++){ - char *zCol = pTab->aCol[pIdx->aiColumn[j]].zName; - if( j ) sqlite3StrAccumAppend(&errMsg, ", ", 2); - sqlite3StrAccumAppendAll(&errMsg, pTab->zName); - sqlite3StrAccumAppend(&errMsg, ".", 1); - sqlite3StrAccumAppendAll(&errMsg, zCol); + if( pIdx->aColExpr ){ + sqlite3XPrintf(&errMsg, 0, "index '%q'", pIdx->zName); + }else{ + for(j=0; jnKeyCol; j++){ + char *zCol; + assert( pIdx->aiColumn[j]>=0 ); + zCol = pTab->aCol[pIdx->aiColumn[j]].zName; + if( j ) sqlite3StrAccumAppend(&errMsg, ", ", 2); + sqlite3XPrintf(&errMsg, 0, "%s.%s", pTab->zName, zCol); + } } zErr = sqlite3StrAccumFinish(&errMsg); sqlite3HaltConstraint(pParse, IsPrimaryKeyIndex(pIdx) ? SQLITE_CONSTRAINT_PRIMARYKEY : SQLITE_CONSTRAINT_UNIQUE, Index: src/date.c ================================================================== --- src/date.c +++ src/date.c @@ -1113,18 +1113,18 @@ ** external linkage. */ void sqlite3RegisterDateTimeFunctions(void){ static SQLITE_WSD FuncDef aDateTimeFuncs[] = { #ifndef SQLITE_OMIT_DATETIME_FUNCS - FUNCTION(julianday, -1, 0, 0, juliandayFunc ), - FUNCTION(date, -1, 0, 0, dateFunc ), - FUNCTION(time, -1, 0, 0, timeFunc ), - FUNCTION(datetime, -1, 0, 0, datetimeFunc ), - FUNCTION(strftime, -1, 0, 0, strftimeFunc ), - FUNCTION(current_time, 0, 0, 0, ctimeFunc ), - FUNCTION(current_timestamp, 0, 0, 0, ctimestampFunc), - FUNCTION(current_date, 0, 0, 0, cdateFunc ), + DFUNCTION(julianday, -1, 0, 0, juliandayFunc ), + DFUNCTION(date, -1, 0, 0, dateFunc ), + DFUNCTION(time, -1, 0, 0, timeFunc ), + DFUNCTION(datetime, -1, 0, 0, datetimeFunc ), + DFUNCTION(strftime, -1, 0, 0, strftimeFunc ), + DFUNCTION(current_time, 0, 0, 0, ctimeFunc ), + DFUNCTION(current_timestamp, 0, 0, 0, ctimestampFunc), + DFUNCTION(current_date, 0, 0, 0, cdateFunc ), #else STR_FUNCTION(current_time, 0, "%H:%M:%S", 0, currentTimeFunc), STR_FUNCTION(current_date, 0, "%Y-%m-%d", 0, currentTimeFunc), STR_FUNCTION(current_timestamp, 0, "%Y-%m-%d %H:%M:%S", 0, currentTimeFunc), #endif Index: src/dbstat.c ================================================================== --- src/dbstat.c +++ src/dbstat.c @@ -14,10 +14,13 @@ ** ** The dbstat virtual table is used to extract low-level formatting ** information from an SQLite database in order to implement the ** "sqlite3_analyzer" utility. See the ../tool/spaceanal.tcl script ** for an example implementation. +** +** Additional information is available on the "dbstat.html" page of the +** official SQLite documentation. */ #include "sqliteInt.h" /* Requires access to internal data structures */ #if (defined(SQLITE_ENABLE_DBSTAT_VTAB) || defined(SQLITE_TEST)) \ && !defined(SQLITE_OMIT_VIRTUALTABLE) @@ -62,11 +65,12 @@ " ncell INTEGER, /* Cells on page (0 for overflow) */" \ " payload INTEGER, /* Bytes of payload on this page */" \ " unused INTEGER, /* Bytes of unused space on this page */" \ " mx_payload INTEGER, /* Largest payload size of all cells */" \ " pgoffset INTEGER, /* Offset of page in file */" \ - " pgsize INTEGER /* Size of the page */" \ + " pgsize INTEGER, /* Size of the page */" \ + " schema TEXT HIDDEN /* Database schema being analyzed */" \ ");" typedef struct StatTable StatTable; typedef struct StatCursor StatCursor; @@ -100,10 +104,11 @@ struct StatCursor { sqlite3_vtab_cursor base; sqlite3_stmt *pStmt; /* Iterates through set of root pages */ int isEof; /* After pStmt has returned SQLITE_DONE */ + int iDb; /* Schema used for this query */ StatPage aPage[32]; int iPage; /* Current entry in aPage[] */ /* Values to return. */ @@ -177,13 +182,36 @@ return SQLITE_OK; } /* ** There is no "best-index". This virtual table always does a linear -** scan of the binary VFS log file. +** scan. However, a schema=? constraint should cause this table to +** operate on a different database schema, so check for it. +** +** idxNum is normally 0, but will be 1 if a schema=? constraint exists. */ static int statBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){ + int i; + + pIdxInfo->estimatedCost = 1.0e6; /* Initial cost estimate */ + + /* Look for a valid schema=? constraint. If found, change the idxNum to + ** 1 and request the value of that constraint be sent to xFilter. And + ** lower the cost estimate to encourage the constrained version to be + ** used. + */ + for(i=0; inConstraint; i++){ + if( pIdxInfo->aConstraint[i].usable==0 ) continue; + if( pIdxInfo->aConstraint[i].op!=SQLITE_INDEX_CONSTRAINT_EQ ) continue; + if( pIdxInfo->aConstraint[i].iColumn!=10 ) continue; + pIdxInfo->idxNum = 1; + pIdxInfo->estimatedCost = 1.0; + pIdxInfo->aConstraintUsage[i].argvIndex = 1; + pIdxInfo->aConstraintUsage[i].omit = 1; + break; + } + /* Records are always returned in ascending order of (name, path). ** If this will satisfy the client, set the orderByConsumed flag so that ** SQLite does not do an external sort. */ @@ -199,50 +227,31 @@ ) ){ pIdxInfo->orderByConsumed = 1; } - pIdxInfo->estimatedCost = 10.0; return SQLITE_OK; } /* ** Open a new statvfs cursor. */ static int statOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ StatTable *pTab = (StatTable *)pVTab; StatCursor *pCsr; - int rc; pCsr = (StatCursor *)sqlite3_malloc64(sizeof(StatCursor)); if( pCsr==0 ){ - rc = SQLITE_NOMEM; + return SQLITE_NOMEM; }else{ - char *zSql; memset(pCsr, 0, sizeof(StatCursor)); pCsr->base.pVtab = pVTab; - - zSql = sqlite3_mprintf( - "SELECT 'sqlite_master' AS name, 1 AS rootpage, 'table' AS type" - " UNION ALL " - "SELECT name, rootpage, type" - " FROM \"%w\".sqlite_master WHERE rootpage!=0" - " ORDER BY name", pTab->db->aDb[pTab->iDb].zName); - if( zSql==0 ){ - rc = SQLITE_NOMEM; - }else{ - rc = sqlite3_prepare_v2(pTab->db, zSql, -1, &pCsr->pStmt, 0); - sqlite3_free(zSql); - } - if( rc!=SQLITE_OK ){ - sqlite3_free(pCsr); - pCsr = 0; - } + pCsr->iDb = pTab->iDb; } *ppCursor = (sqlite3_vtab_cursor *)pCsr; - return rc; + return SQLITE_OK; } static void statClearPage(StatPage *p){ int i; if( p->aCell ){ @@ -263,10 +272,11 @@ statClearPage(&pCsr->aPage[i]); } pCsr->iPage = 0; sqlite3_free(pCsr->zPath); pCsr->zPath = 0; + pCsr->isEof = 0; } /* ** Close a statvfs cursor. */ @@ -425,11 +435,11 @@ int rc; int nPayload; char *z; StatCursor *pCsr = (StatCursor *)pCursor; StatTable *pTab = (StatTable *)pCursor->pVtab; - Btree *pBt = pTab->db->aDb[pTab->iDb].pBt; + Btree *pBt = pTab->db->aDb[pCsr->iDb].pBt; Pager *pPager = sqlite3BtreePager(pBt); sqlite3_free(pCsr->zPath); pCsr->zPath = 0; @@ -563,13 +573,47 @@ sqlite3_vtab_cursor *pCursor, int idxNum, const char *idxStr, int argc, sqlite3_value **argv ){ StatCursor *pCsr = (StatCursor *)pCursor; + StatTable *pTab = (StatTable*)(pCursor->pVtab); + char *zSql; + int rc = SQLITE_OK; + char *zMaster; + if( idxNum==1 ){ + const char *zDbase = (const char*)sqlite3_value_text(argv[0]); + pCsr->iDb = sqlite3FindDbName(pTab->db, zDbase); + if( pCsr->iDb<0 ){ + sqlite3_free(pCursor->pVtab->zErrMsg); + pCursor->pVtab->zErrMsg = sqlite3_mprintf("no such schema: %s", zDbase); + return pCursor->pVtab->zErrMsg ? SQLITE_ERROR : SQLITE_NOMEM; + } + }else{ + pCsr->iDb = pTab->iDb; + } statResetCsr(pCsr); - return statNext(pCursor); + sqlite3_finalize(pCsr->pStmt); + pCsr->pStmt = 0; + zMaster = pCsr->iDb==1 ? "sqlite_temp_master" : "sqlite_master"; + zSql = sqlite3_mprintf( + "SELECT 'sqlite_master' AS name, 1 AS rootpage, 'table' AS type" + " UNION ALL " + "SELECT name, rootpage, type" + " FROM \"%w\".%s WHERE rootpage!=0" + " ORDER BY name", pTab->db->aDb[pCsr->iDb].zName, zMaster); + if( zSql==0 ){ + return SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare_v2(pTab->db, zSql, -1, &pCsr->pStmt, 0); + sqlite3_free(zSql); + } + + if( rc==SQLITE_OK ){ + rc = statNext(pCursor); + } + return rc; } static int statColumn( sqlite3_vtab_cursor *pCursor, sqlite3_context *ctx, @@ -602,14 +646,19 @@ sqlite3_result_int(ctx, pCsr->nMxPayload); break; case 8: /* pgoffset */ sqlite3_result_int64(ctx, pCsr->iOffset); break; - default: /* pgsize */ - assert( i==9 ); + case 9: /* pgsize */ sqlite3_result_int(ctx, pCsr->szPage); break; + default: { /* schema */ + sqlite3 *db = sqlite3_context_db_handle(ctx); + int iDb = pCsr->iDb; + sqlite3_result_text(ctx, db->aDb[iDb].zName, -1, SQLITE_STATIC); + break; + } } return SQLITE_OK; } static int statRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ Index: src/delete.c ================================================================== --- src/delete.c +++ src/delete.c @@ -233,11 +233,11 @@ AuthContext sContext; /* Authorization context */ NameContext sNC; /* Name context to resolve expressions in */ int iDb; /* Database number */ int memCnt = -1; /* Memory cell used for change counting */ int rcauth; /* Value returned by authorization callback */ - int okOnePass; /* True for one-pass algorithm without the FIFO */ + int eOnePass; /* ONEPASS_OFF or _SINGLE or _MULTI */ int aiCurOnePass[2]; /* The write cursors opened by WHERE_ONEPASS */ u8 *aToOpen = 0; /* Open cursor iTabCur+j if aToOpen[j] is true */ Index *pPk; /* The PRIMARY KEY index on the table */ int iPk = 0; /* First of nPk registers holding PRIMARY KEY value */ i16 nPk = 1; /* Number of columns in the PRIMARY KEY */ @@ -245,16 +245,16 @@ i16 nKey; /* Number of memory cells in the row key */ int iEphCur = 0; /* Ephemeral table holding all primary key values */ int iRowSet = 0; /* Register for rowset of rows to delete */ int addrBypass = 0; /* Address of jump over the delete logic */ int addrLoop = 0; /* Top of the delete loop */ - int addrDelete = 0; /* Jump directly to the delete logic */ int addrEphOpen = 0; /* Instruction to open the Ephemeral table */ #ifndef SQLITE_OMIT_TRIGGER int isView; /* True if attempting to delete from a view */ Trigger *pTrigger; /* List of table triggers, if required */ + int bComplex; /* True if there are either triggers or FKs */ #endif memset(&sContext, 0, sizeof(sContext)); db = pParse->db; if( pParse->nErr || db->mallocFailed ){ @@ -274,13 +274,15 @@ ** deleted from is a view */ #ifndef SQLITE_OMIT_TRIGGER pTrigger = sqlite3TriggersExist(pParse, pTab, TK_DELETE, 0, 0); isView = pTab->pSelect!=0; + bComplex = pTrigger || sqlite3FkRequired(pParse, pTab, 0, 0); #else # define pTrigger 0 # define isView 0 +# define bComplex 0 #endif #ifdef SQLITE_OMIT_VIEW # undef isView # define isView 0 #endif @@ -357,12 +359,14 @@ #ifndef SQLITE_OMIT_TRUNCATE_OPTIMIZATION /* Special case: A DELETE without a WHERE clause deletes everything. ** It is easier just to erase the whole table. Prior to version 3.6.5, ** this optimization caused the row change count (the value returned by ** API function sqlite3_count_changes) to be set incorrectly. */ - if( rcauth==SQLITE_OK && pWhere==0 && !pTrigger && !IsVirtual(pTab) - && 0==sqlite3FkRequired(pParse, pTab, 0, 0) + if( rcauth==SQLITE_OK + && pWhere==0 + && !bComplex + && !IsVirtual(pTab) ){ assert( !isView ); sqlite3TableLock(pParse, iDb, pTab->tnum, 1, pTab->zName); if( HasRowid(pTab) ){ sqlite3VdbeAddOp4(v, OP_Clear, pTab->tnum, iDb, memCnt, @@ -373,10 +377,12 @@ sqlite3VdbeAddOp2(v, OP_Clear, pIdx->tnum, iDb); } }else #endif /* SQLITE_OMIT_TRUNCATE_OPTIMIZATION */ { + u16 wcf = WHERE_ONEPASS_DESIRED|WHERE_DUPLICATES_OK; + wcf |= (bComplex ? 0 : WHERE_ONEPASS_MULTIROW); if( HasRowid(pTab) ){ /* For a rowid table, initialize the RowSet to an empty set */ pPk = 0; nPk = 1; iRowSet = ++pParse->nMem; @@ -393,26 +399,31 @@ addrEphOpen = sqlite3VdbeAddOp2(v, OP_OpenEphemeral, iEphCur, nPk); sqlite3VdbeSetP4KeyInfo(pParse, pPk); } /* Construct a query to find the rowid or primary key for every row - ** to be deleted, based on the WHERE clause. + ** to be deleted, based on the WHERE clause. Set variable eOnePass + ** to indicate the strategy used to implement this delete: + ** + ** ONEPASS_OFF: Two-pass approach - use a FIFO for rowids/PK values. + ** ONEPASS_SINGLE: One-pass approach - at most one row deleted. + ** ONEPASS_MULTI: One-pass approach - any number of rows may be deleted. */ - pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, 0, 0, - WHERE_ONEPASS_DESIRED|WHERE_DUPLICATES_OK, - iTabCur+1); + pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, 0, 0, wcf, iTabCur+1); if( pWInfo==0 ) goto delete_from_cleanup; - okOnePass = sqlite3WhereOkOnePass(pWInfo, aiCurOnePass); + eOnePass = sqlite3WhereOkOnePass(pWInfo, aiCurOnePass); + assert( IsVirtual(pTab)==0 || eOnePass==ONEPASS_OFF ); /* Keep track of the number of rows to be deleted */ if( db->flags & SQLITE_CountRows ){ sqlite3VdbeAddOp2(v, OP_AddImm, memCnt, 1); } /* Extract the rowid or primary key for the current row */ if( pPk ){ for(i=0; iaiColumn[i]>=(-1) ); sqlite3ExprCodeGetColumnOfTable(v, pTab, iTabCur, pPk->aiColumn[i], iPk+i); } iKey = iPk; }else{ @@ -419,15 +430,14 @@ iKey = pParse->nMem + 1; iKey = sqlite3ExprCodeGetColumn(pParse, pTab, -1, iTabCur, iKey, 0); if( iKey>pParse->nMem ) pParse->nMem = iKey; } - if( okOnePass ){ - /* For ONEPASS, no need to store the rowid/primary-key. There is only + if( eOnePass!=ONEPASS_OFF ){ + /* For ONEPASS, no need to store the rowid/primary-key. There is only ** one, so just keep it in its register(s) and fall through to the - ** delete code. - */ + ** delete code. */ nKey = nPk; /* OP_Found will use an unpacked key */ aToOpen = sqlite3DbMallocRaw(db, nIdx+2); if( aToOpen==0 ){ sqlite3WhereEnd(pWInfo); goto delete_from_cleanup; @@ -435,53 +445,56 @@ memset(aToOpen, 1, nIdx+1); aToOpen[nIdx+1] = 0; if( aiCurOnePass[0]>=0 ) aToOpen[aiCurOnePass[0]-iTabCur] = 0; if( aiCurOnePass[1]>=0 ) aToOpen[aiCurOnePass[1]-iTabCur] = 0; if( addrEphOpen ) sqlite3VdbeChangeToNoop(v, addrEphOpen); - addrDelete = sqlite3VdbeAddOp0(v, OP_Goto); /* Jump to DELETE logic */ - }else if( pPk ){ - /* Construct a composite key for the row to be deleted and remember it */ - iKey = ++pParse->nMem; - nKey = 0; /* Zero tells OP_Found to use a composite key */ - sqlite3VdbeAddOp4(v, OP_MakeRecord, iPk, nPk, iKey, - sqlite3IndexAffinityStr(pParse->db, pPk), nPk); - sqlite3VdbeAddOp2(v, OP_IdxInsert, iEphCur, iKey); - }else{ - /* Get the rowid of the row to be deleted and remember it in the RowSet */ - nKey = 1; /* OP_Seek always uses a single rowid */ - sqlite3VdbeAddOp2(v, OP_RowSetAdd, iRowSet, iKey); - } - - /* End of the WHERE loop */ - sqlite3WhereEnd(pWInfo); - if( okOnePass ){ - /* Bypass the delete logic below if the WHERE loop found zero rows */ - addrBypass = sqlite3VdbeMakeLabel(v); - sqlite3VdbeGoto(v, addrBypass); - sqlite3VdbeJumpHere(v, addrDelete); + }else{ + if( pPk ){ + /* Add the PK key for this row to the temporary table */ + iKey = ++pParse->nMem; + nKey = 0; /* Zero tells OP_Found to use a composite key */ + sqlite3VdbeAddOp4(v, OP_MakeRecord, iPk, nPk, iKey, + sqlite3IndexAffinityStr(pParse->db, pPk), nPk); + sqlite3VdbeAddOp2(v, OP_IdxInsert, iEphCur, iKey); + }else{ + /* Add the rowid of the row to be deleted to the RowSet */ + nKey = 1; /* OP_Seek always uses a single rowid */ + sqlite3VdbeAddOp2(v, OP_RowSetAdd, iRowSet, iKey); + } + } + + /* If this DELETE cannot use the ONEPASS strategy, this is the + ** end of the WHERE loop */ + if( eOnePass!=ONEPASS_OFF ){ + addrBypass = sqlite3VdbeMakeLabel(v); + }else{ + sqlite3WhereEnd(pWInfo); } /* Unless this is a view, open cursors for the table we are ** deleting from and all its indices. If this is a view, then the ** only effect this statement has is to fire the INSTEAD OF ** triggers. */ if( !isView ){ + int iAddrOnce = 0; + if( eOnePass==ONEPASS_MULTI ){ + iAddrOnce = sqlite3CodeOnce(pParse); VdbeCoverage(v); + } testcase( IsVirtual(pTab) ); sqlite3OpenTableAndIndices(pParse, pTab, OP_OpenWrite, iTabCur, aToOpen, &iDataCur, &iIdxCur); assert( pPk || IsVirtual(pTab) || iDataCur==iTabCur ); assert( pPk || IsVirtual(pTab) || iIdxCur==iDataCur+1 ); + if( eOnePass==ONEPASS_MULTI ) sqlite3VdbeJumpHere(v, iAddrOnce); } /* Set up a loop over the rowids/primary-keys that were found in the ** where-clause loop above. */ - if( okOnePass ){ - /* Just one row. Hence the top-of-loop is a no-op */ + if( eOnePass!=ONEPASS_OFF ){ assert( nKey==nPk ); /* OP_Found will use an unpacked key */ - assert( !IsVirtual(pTab) ); if( aToOpen[iDataCur-iTabCur] ){ assert( pPk!=0 || pTab->pSelect!=0 ); sqlite3VdbeAddOp4Int(v, OP_NotFound, iDataCur, addrBypass, iKey, nKey); VdbeCoverage(v); } @@ -505,17 +518,22 @@ sqlite3MayAbort(pParse); }else #endif { int count = (pParse->nested==0); /* True to count changes */ + int iIdxNoSeek = -1; + if( bComplex==0 && aiCurOnePass[1]!=iDataCur ){ + iIdxNoSeek = aiCurOnePass[1]; + } sqlite3GenerateRowDelete(pParse, pTab, pTrigger, iDataCur, iIdxCur, - iKey, nKey, count, OE_Default, okOnePass); + iKey, nKey, count, OE_Default, eOnePass, iIdxNoSeek); } /* End of the loop over all rowids/primary-keys. */ - if( okOnePass ){ + if( eOnePass!=ONEPASS_OFF ){ sqlite3VdbeResolveLabel(v, addrBypass); + sqlite3WhereEnd(pWInfo); }else if( pPk ){ sqlite3VdbeAddOp2(v, OP_Next, iEphCur, addrLoop+1); VdbeCoverage(v); sqlite3VdbeJumpHere(v, addrLoop); }else{ sqlite3VdbeGoto(v, addrLoop); @@ -583,10 +601,29 @@ ** ** 3. The primary key for the row to be deleted must be stored in a ** sequence of nPk memory cells starting at iPk. If nPk==0 that means ** that a search record formed from OP_MakeRecord is contained in the ** single memory location iPk. +** +** eMode: +** Parameter eMode may be passed either ONEPASS_OFF (0), ONEPASS_SINGLE, or +** ONEPASS_MULTI. If eMode is not ONEPASS_OFF, then the cursor +** iDataCur already points to the row to delete. If eMode is ONEPASS_OFF +** then this function must seek iDataCur to the entry identified by iPk +** and nPk before reading from it. +** +** If eMode is ONEPASS_MULTI, then this call is being made as part +** of a ONEPASS delete that affects multiple rows. In this case, if +** iIdxNoSeek is a valid cursor number (>=0), then its position should +** be preserved following the delete operation. Or, if iIdxNoSeek is not +** a valid cursor number, the position of iDataCur should be preserved +** instead. +** +** iIdxNoSeek: +** If iIdxNoSeek is a valid cursor number (>=0), then it identifies an +** index cursor (from within array of cursors starting at iIdxCur) that +** already points to the index entry to be deleted. */ void sqlite3GenerateRowDelete( Parse *pParse, /* Parsing context */ Table *pTab, /* Table containing the row to be deleted */ Trigger *pTrigger, /* List of triggers to (potentially) fire */ @@ -594,11 +631,12 @@ int iIdxCur, /* First index cursor */ int iPk, /* First memory cell containing the PRIMARY KEY */ i16 nPk, /* Number of PRIMARY KEY memory cells */ u8 count, /* If non-zero, increment the row change counter */ u8 onconf, /* Default ON CONFLICT policy for triggers */ - u8 bNoSeek /* iDataCur is already pointing to the row to delete */ + u8 eMode, /* ONEPASS_OFF, _SINGLE, or _MULTI. See above */ + int iIdxNoSeek /* Cursor number of cursor that does not need seeking */ ){ Vdbe *v = pParse->pVdbe; /* Vdbe */ int iOld = 0; /* First register in OLD.* array */ int iLabel; /* Label resolved to end of generated code */ u8 opSeek; /* Seek opcode */ @@ -611,11 +649,11 @@ /* Seek cursor iCur to the row to delete. If this row no longer exists ** (this can happen if a trigger program has already deleted it), do ** not attempt to delete it or fire any DELETE triggers. */ iLabel = sqlite3VdbeMakeLabel(v); opSeek = HasRowid(pTab) ? OP_NotExists : OP_NotFound; - if( !bNoSeek ){ + if( eMode==ONEPASS_OFF ){ sqlite3VdbeAddOp4Int(v, opSeek, iDataCur, iLabel, iPk, nPk); VdbeCoverageIf(v, opSeek==OP_NotExists); VdbeCoverageIf(v, opSeek==OP_NotFound); } @@ -671,15 +709,19 @@ /* Delete the index and table entries. Skip this step if pTab is really ** a view (in which case the only effect of the DELETE statement is to ** fire the INSTEAD OF triggers). */ if( pTab->pSelect==0 ){ - sqlite3GenerateRowIndexDelete(pParse, pTab, iDataCur, iIdxCur, 0); + sqlite3GenerateRowIndexDelete(pParse, pTab, iDataCur, iIdxCur,0,iIdxNoSeek); sqlite3VdbeAddOp2(v, OP_Delete, iDataCur, (count?OPFLAG_NCHANGE:0)); if( count ){ sqlite3VdbeChangeP4(v, -1, pTab->zName, P4_TRANSIENT); } + if( iIdxNoSeek>=0 ){ + sqlite3VdbeAddOp1(v, OP_Delete, iIdxNoSeek); + } + sqlite3VdbeChangeP5(v, eMode==ONEPASS_MULTI); } /* Do any ON CASCADE, SET NULL or SET DEFAULT operations required to ** handle rows (possibly in other tables) that refer via a foreign key ** to the row just deleted. */ @@ -718,11 +760,12 @@ void sqlite3GenerateRowIndexDelete( Parse *pParse, /* Parsing and code generating context */ Table *pTab, /* Table containing the row to be deleted */ int iDataCur, /* Cursor of table holding data. */ int iIdxCur, /* First index cursor */ - int *aRegIdx /* Only delete if aRegIdx!=0 && aRegIdx[i]>0 */ + int *aRegIdx, /* Only delete if aRegIdx!=0 && aRegIdx[i]>0 */ + int iIdxNoSeek /* Do not delete from this cursor */ ){ int i; /* Index loop counter */ int r1 = -1; /* Register holding an index key */ int iPartIdxLabel; /* Jump destination for skipping partial index entries */ Index *pIdx; /* Current index */ @@ -734,15 +777,16 @@ pPk = HasRowid(pTab) ? 0 : sqlite3PrimaryKeyIndex(pTab); for(i=0, pIdx=pTab->pIndex; pIdx; i++, pIdx=pIdx->pNext){ assert( iIdxCur+i!=iDataCur || pPk==pIdx ); if( aRegIdx!=0 && aRegIdx[i]==0 ) continue; if( pIdx==pPk ) continue; + if( iIdxCur+i==iIdxNoSeek ) continue; VdbeModuleComment((v, "GenRowIdxDel for %s", pIdx->zName)); r1 = sqlite3GenerateIndexKey(pParse, pIdx, iDataCur, 0, 1, - &iPartIdxLabel, pPrior, r1); + &iPartIdxLabel, pPrior, r1); sqlite3VdbeAddOp3(v, OP_IdxDelete, iIdxCur+i, r1, - pIdx->uniqNotNull ? pIdx->nKeyCol : pIdx->nColumn); + pIdx->uniqNotNull ? pIdx->nKeyCol : pIdx->nColumn); sqlite3ResolvePartIdxLabel(pParse, iPartIdxLabel); pPrior = pIdx; } } @@ -787,18 +831,17 @@ Index *pPrior, /* Previously generated index key */ int regPrior /* Register holding previous generated key */ ){ Vdbe *v = pParse->pVdbe; int j; - Table *pTab = pIdx->pTable; int regBase; int nCol; if( piPartIdxLabel ){ if( pIdx->pPartIdxWhere ){ *piPartIdxLabel = sqlite3VdbeMakeLabel(v); - pParse->iPartIdxTab = iDataCur; + pParse->iSelfTab = iDataCur; sqlite3ExprCachePush(pParse); sqlite3ExprIfFalseDup(pParse, pIdx->pPartIdxWhere, *piPartIdxLabel, SQLITE_JUMPIFNULL); }else{ *piPartIdxLabel = 0; @@ -806,13 +849,18 @@ } nCol = (prefixOnly && pIdx->uniqNotNull) ? pIdx->nKeyCol : pIdx->nColumn; regBase = sqlite3GetTempRange(pParse, nCol); if( pPrior && (regBase!=regPrior || pPrior->pPartIdxWhere) ) pPrior = 0; for(j=0; jaiColumn[j]==pIdx->aiColumn[j] ) continue; - sqlite3ExprCodeGetColumnOfTable(v, pTab, iDataCur, pIdx->aiColumn[j], - regBase+j); + if( pPrior + && pPrior->aiColumn[j]==pIdx->aiColumn[j] + && pPrior->aiColumn[j]>=(-1) + ){ + /* This column was already computed by the previous index */ + continue; + } + sqlite3ExprCodeLoadIndexColumn(pParse, pIdx, iDataCur, j, regBase+j); /* If the column affinity is REAL but the number is an integer, then it ** might be stored in the table as an integer (using a compact ** representation) then converted to REAL by an OP_RealAffinity opcode. ** But we are getting ready to store this value back into an index, where ** it should be converted by to INTEGER again. So omit the OP_RealAffinity Index: src/expr.c ================================================================== --- src/expr.c +++ src/expr.c @@ -89,11 +89,11 @@ s.n = sqlite3Strlen30(s.z); return sqlite3ExprAddCollateToken(pParse, pExpr, &s, 0); } /* -** Skip over any TK_COLLATE or TK_AS operators and any unlikely() +** Skip over any TK_COLLATE operators and any unlikely() ** or likelihood() function at the root of an expression. */ Expr *sqlite3ExprSkipCollate(Expr *pExpr){ while( pExpr && ExprHasProperty(pExpr, EP_Skip) ){ if( ExprHasProperty(pExpr, EP_Unlikely) ){ @@ -100,11 +100,11 @@ assert( !ExprHasProperty(pExpr, EP_xIsSelect) ); assert( pExpr->x.pList->nExpr>0 ); assert( pExpr->op==TK_FUNCTION ); pExpr = pExpr->x.pList->a[0].pExpr; }else{ - assert( pExpr->op==TK_COLLATE || pExpr->op==TK_AS ); + assert( pExpr->op==TK_COLLATE ); pExpr = pExpr->pLeft; } } return pExpr; } @@ -2429,10 +2429,32 @@ if( p->iReg==iReg ){ p->tempReg = 0; } } } + +/* Generate code that will load into register regOut a value that is +** appropriate for the iIdxCol-th column of index pIdx. +*/ +void sqlite3ExprCodeLoadIndexColumn( + Parse *pParse, /* The parsing context */ + Index *pIdx, /* The index whose column is to be loaded */ + int iTabCur, /* Cursor pointing to a table row */ + int iIdxCol, /* The column of the index to be loaded */ + int regOut /* Store the index column value in this register */ +){ + i16 iTabCol = pIdx->aiColumn[iIdxCol]; + if( iTabCol>=(-1) ){ + sqlite3ExprCodeGetColumnOfTable(pParse->pVdbe, pIdx->pTable, iTabCur, + iTabCol, regOut); + return; + } + assert( pIdx->aColExpr ); + assert( pIdx->aColExpr->nExpr>iIdxCol ); + pParse->iSelfTab = iTabCur; + sqlite3ExprCode(pParse, pIdx->aColExpr->a[iIdxCol].pExpr, regOut); +} /* ** Generate code to extract the value of the iCol-th column of a table. */ void sqlite3ExprCodeGetColumnOfTable( @@ -2615,12 +2637,13 @@ if( pParse->ckBase>0 ){ /* Generating CHECK constraints or inserting into partial index */ inReg = pExpr->iColumn + pParse->ckBase; break; }else{ - /* Deleting from a partial index */ - iTab = pParse->iPartIdxTab; + /* Coding an expression that is part of an index where column names + ** in the index refer to the table to which the index belongs */ + iTab = pParse->iSelfTab; } } inReg = sqlite3ExprCodeGetColumn(pParse, pExpr->pTab, pExpr->iColumn, iTab, target, pExpr->op2); @@ -2676,14 +2699,10 @@ } case TK_REGISTER: { inReg = pExpr->iTable; break; } - case TK_AS: { - inReg = sqlite3ExprCodeTarget(pParse, pExpr->pLeft, target); - break; - } #ifndef SQLITE_OMIT_CAST case TK_CAST: { /* Expressions of the form: CAST(pLeft AS token) */ inReg = sqlite3ExprCodeTarget(pParse, pExpr->pLeft, target); if( inReg!=target ){ @@ -3763,11 +3782,13 @@ return 1; } return 2; } if( pA->op!=TK_COLUMN && ALWAYS(pA->op!=TK_AGG_COLUMN) && pA->u.zToken ){ - if( strcmp(pA->u.zToken,pB->u.zToken)!=0 ){ + if( pA->op==TK_FUNCTION ){ + if( sqlite3StrICmp(pA->u.zToken,pB->u.zToken)!=0 ) return 2; + }else if( strcmp(pA->u.zToken,pB->u.zToken)!=0 ){ return pA->op==TK_COLLATE ? 1 : 2; } } if( (pA->flags & EP_Distinct)!=(pB->flags & EP_Distinct) ) return 2; if( ALWAYS((combinedFlags & EP_TokenOnly)==0) ){ Index: src/func.c ================================================================== --- src/func.c +++ src/func.c @@ -1735,19 +1735,19 @@ FUNCTION2(likelihood, 2, 0, 0, noopFunc, SQLITE_FUNC_UNLIKELY), FUNCTION2(likely, 1, 0, 0, noopFunc, SQLITE_FUNC_UNLIKELY), VFUNCTION(random, 0, 0, 0, randomFunc ), VFUNCTION(randomblob, 1, 0, 0, randomBlob ), FUNCTION(nullif, 2, 0, 1, nullifFunc ), - FUNCTION(sqlite_version, 0, 0, 0, versionFunc ), - FUNCTION(sqlite_source_id, 0, 0, 0, sourceidFunc ), + DFUNCTION(sqlite_version, 0, 0, 0, versionFunc ), + DFUNCTION(sqlite_source_id, 0, 0, 0, sourceidFunc ), FUNCTION(sqlite_log, 2, 0, 0, errlogFunc ), #if SQLITE_USER_AUTHENTICATION FUNCTION(sqlite_crypt, 2, 0, 0, sqlite3CryptFunc ), #endif #ifndef SQLITE_OMIT_COMPILEOPTION_DIAGS - FUNCTION(sqlite_compileoption_used,1, 0, 0, compileoptionusedFunc ), - FUNCTION(sqlite_compileoption_get, 1, 0, 0, compileoptiongetFunc ), + DFUNCTION(sqlite_compileoption_used,1, 0, 0, compileoptionusedFunc ), + DFUNCTION(sqlite_compileoption_get, 1, 0, 0, compileoptiongetFunc ), #endif /* SQLITE_OMIT_COMPILEOPTION_DIAGS */ FUNCTION(quote, 1, 0, 0, quoteFunc ), VFUNCTION(last_insert_rowid, 0, 0, 0, last_insert_rowid), VFUNCTION(changes, 0, 0, 0, changes ), VFUNCTION(total_changes, 0, 0, 0, total_changes ), @@ -1755,12 +1755,12 @@ FUNCTION(zeroblob, 1, 0, 0, zeroblobFunc ), #ifdef SQLITE_SOUNDEX FUNCTION(soundex, 1, 0, 0, soundexFunc ), #endif #ifndef SQLITE_OMIT_LOAD_EXTENSION - FUNCTION(load_extension, 1, 0, 0, loadExt ), - FUNCTION(load_extension, 2, 0, 0, loadExt ), + VFUNCTION(load_extension, 1, 0, 0, loadExt ), + VFUNCTION(load_extension, 2, 0, 0, loadExt ), #endif AGGREGATE(sum, 1, 0, 0, sumStep, sumFinalize ), AGGREGATE(total, 1, 0, 0, sumStep, totalFinalize ), AGGREGATE(avg, 1, 0, 0, sumStep, avgFinalize ), AGGREGATE2(count, 0, 0, 0, countStep, countFinalize, Index: src/insert.c ================================================================== --- src/insert.c +++ src/insert.c @@ -86,11 +86,22 @@ db->mallocFailed = 1; return 0; } for(n=0; nnColumn; n++){ i16 x = pIdx->aiColumn[n]; - pIdx->zColAff[n] = x<0 ? SQLITE_AFF_INTEGER : pTab->aCol[x].affinity; + if( x>=0 ){ + pIdx->zColAff[n] = pTab->aCol[x].affinity; + }else if( x==(-1) ){ + pIdx->zColAff[n] = SQLITE_AFF_INTEGER; + }else{ + char aff; + assert( x==(-2) ); + assert( pIdx->aColExpr!=0 ); + aff = sqlite3ExprAffinity(pIdx->aColExpr->a[n].pExpr); + if( aff==0 ) aff = SQLITE_AFF_BLOB; + pIdx->zColAff[n] = aff; + } } pIdx->zColAff[n] = 0; } return pIdx->zColAff; @@ -1334,14 +1345,17 @@ pTrigger = sqlite3TriggersExist(pParse, pTab, TK_DELETE, 0, 0); } if( pTrigger || sqlite3FkRequired(pParse, pTab, 0, 0) ){ sqlite3MultiWrite(pParse); sqlite3GenerateRowDelete(pParse, pTab, pTrigger, iDataCur, iIdxCur, - regNewData, 1, 0, OE_Replace, 1); - }else if( pTab->pIndex ){ - sqlite3MultiWrite(pParse); - sqlite3GenerateRowIndexDelete(pParse, pTab, iDataCur, iIdxCur, 0); + regNewData, 1, 0, OE_Replace, + ONEPASS_SINGLE, -1); + }else{ + if( pTab->pIndex ){ + sqlite3MultiWrite(pParse); + sqlite3GenerateRowIndexDelete(pParse, pTab, iDataCur, iIdxCur,0,-1); + } } seenReplace = 1; break; } case OE_Ignore: { @@ -1392,19 +1406,26 @@ */ regIdx = sqlite3GetTempRange(pParse, pIdx->nColumn); for(i=0; inColumn; i++){ int iField = pIdx->aiColumn[i]; int x; - if( iField<0 || iField==pTab->iPKey ){ - if( regRowid==regIdx+i ) continue; /* ROWID already in regIdx+i */ - x = regNewData; - regRowid = pIdx->pPartIdxWhere ? -1 : regIdx+i; + if( iField==(-2) ){ + pParse->ckBase = regNewData+1; + sqlite3ExprCode(pParse, pIdx->aColExpr->a[i].pExpr, regIdx+i); + pParse->ckBase = 0; + VdbeComment((v, "%s column %d", pIdx->zName, i)); }else{ - x = iField + regNewData + 1; + if( iField==(-1) || iField==pTab->iPKey ){ + if( regRowid==regIdx+i ) continue; /* ROWID already in regIdx+i */ + x = regNewData; + regRowid = pIdx->pPartIdxWhere ? -1 : regIdx+i; + }else{ + x = iField + regNewData + 1; + } + sqlite3VdbeAddOp2(v, OP_SCopy, x, regIdx+i); + VdbeComment((v, "%s", iField<0 ? "rowid" : pTab->aCol[iField].zName)); } - sqlite3VdbeAddOp2(v, OP_SCopy, x, regIdx+i); - VdbeComment((v, "%s", iField<0 ? "rowid" : pTab->aCol[iField].zName)); } sqlite3VdbeAddOp3(v, OP_MakeRecord, regIdx, pIdx->nColumn, aRegIdx[ix]); VdbeComment((v, "for %s", pIdx->zName)); sqlite3ExprCacheAffinityChange(pParse, regIdx, pIdx->nColumn); @@ -1508,11 +1529,12 @@ sqlite3MultiWrite(pParse); if( db->flags&SQLITE_RecTriggers ){ pTrigger = sqlite3TriggersExist(pParse, pTab, TK_DELETE, 0, 0); } sqlite3GenerateRowDelete(pParse, pTab, pTrigger, iDataCur, iIdxCur, - regR, nPkField, 0, OE_Replace, pIdx==pPk); + regR, nPkField, 0, OE_Replace, + (pIdx==pPk ? ONEPASS_SINGLE : ONEPASS_OFF), -1); seenReplace = 1; break; } } sqlite3VdbeResolveLabel(v, addrUniqueOk); @@ -1720,10 +1742,17 @@ return 0; /* Different conflict resolution strategies */ } for(i=0; inKeyCol; i++){ if( pSrc->aiColumn[i]!=pDest->aiColumn[i] ){ return 0; /* Different columns indexed */ + } + if( pSrc->aiColumn[i]==(-2) ){ + assert( pSrc->aColExpr!=0 && pDest->aColExpr!=0 ); + if( sqlite3ExprCompare(pSrc->aColExpr->a[i].pExpr, + pDest->aColExpr->a[i].pExpr, -1)!=0 ){ + return 0; /* Different expressions in the index */ + } } if( pSrc->aSortOrder[i]!=pDest->aSortOrder[i] ){ return 0; /* Different sort orders */ } if( !xferCompatibleCollation(pSrc->azColl[i],pDest->azColl[i]) ){ Index: src/lempar.c ================================================================== --- src/lempar.c +++ src/lempar.c @@ -54,19 +54,23 @@ ** zero the stack is dynamically sized using realloc() ** ParseARG_SDECL A static variable declaration for the %extra_argument ** ParseARG_PDECL A parameter declaration for the %extra_argument ** ParseARG_STORE Code to store %extra_argument into yypParser ** ParseARG_FETCH Code to extract %extra_argument from yypParser -** YYNSTATE the combined number of states. -** YYNRULE the number of rules in the grammar ** YYERRORSYMBOL is the code number of the error symbol. If not ** defined, then do no error processing. +** YYNSTATE the combined number of states. +** YYNRULE the number of rules in the grammar +** YY_MAX_SHIFT Maximum value for shift actions +** YY_MIN_SHIFTREDUCE Minimum value for shift-reduce actions +** YY_MAX_SHIFTREDUCE Maximum value for shift-reduce actions +** YY_MIN_REDUCE Maximum value for reduce actions +** YY_ERROR_ACTION The yy_action[] code for syntax error +** YY_ACCEPT_ACTION The yy_action[] code for accept +** YY_NO_ACTION The yy_action[] code for no-op */ %% -#define YY_NO_ACTION (YYNSTATE+YYNRULE+2) -#define YY_ACCEPT_ACTION (YYNSTATE+YYNRULE+1) -#define YY_ERROR_ACTION (YYNSTATE+YYNRULE) /* The yyzerominor constant is used to initialize instances of ** YYMINORTYPE objects to zero. */ static const YYMINORTYPE yyzerominor = { 0 }; @@ -89,20 +93,24 @@ ** action integer. ** ** Suppose the action integer is N. Then the action is determined as ** follows ** -** 0 <= N < YYNSTATE Shift N. That is, push the lookahead +** 0 <= N <= YY_MAX_SHIFT Shift N. That is, push the lookahead ** token onto the stack and goto state N. ** -** YYNSTATE <= N < YYNSTATE+YYNRULE Reduce by rule N-YYNSTATE. +** N between YY_MIN_SHIFTREDUCE Shift to an arbitrary state then +** and YY_MAX_SHIFTREDUCE reduce by rule N-YY_MIN_SHIFTREDUCE. +** +** N between YY_MIN_REDUCE Reduce by rule N-YY_MIN_REDUCE +** and YY_MAX_REDUCE + +** N == YY_ERROR_ACTION A syntax error has occurred. ** -** N == YYNSTATE+YYNRULE A syntax error has occurred. +** N == YY_ACCEPT_ACTION The parser accepts its input. ** -** N == YYNSTATE+YYNRULE+1 The parser accepts its input. -** -** N == YYNSTATE+YYNRULE+2 No such action. Denotes unused +** N == YY_NO_ACTION No such action. Denotes unused ** slots in the yy_action[] table. ** ** The action table is constructed as a single large table named yy_action[]. ** Given state S and lookahead X, the action is computed as ** @@ -157,13 +165,17 @@ ** (In other words, the "major" token.) ** ** + The semantic value stored at this level of the stack. This is ** the information used by the action routines in the grammar. ** It is sometimes called the "minor" token. +** +** After the "shift" half of a SHIFTREDUCE action, the stateno field +** actually contains the reduce action for the second half of the +** SHIFTREDUCE. */ struct yyStackEntry { - YYACTIONTYPE stateno; /* The state-number */ + YYACTIONTYPE stateno; /* The state-number, or reduce action in SHIFTREDUCE */ YYCODETYPE major; /* The major token value. This is the code ** number for the token at this stack level */ YYMINORTYPE minor; /* The user-supplied minor token value. This ** is the value of the token */ }; @@ -393,14 +405,14 @@ YYCODETYPE iLookAhead /* The look-ahead token */ ){ int i; int stateno = pParser->yystack[pParser->yyidx].stateno; - if( stateno>YY_SHIFT_COUNT - || (i = yy_shift_ofst[stateno])==YY_SHIFT_USE_DFLT ){ - return yy_default[stateno]; - } + if( stateno>=YY_MIN_REDUCE ) return stateno; + assert( stateno <= YY_SHIFT_COUNT ); + i = yy_shift_ofst[stateno]; + if( i==YY_SHIFT_USE_DFLT ) return yy_default[stateno]; assert( iLookAhead!=YYNOCODE ); i += iLookAhead; if( i<0 || i>=YY_ACTTAB_COUNT || yy_lookahead[i]!=iLookAhead ){ if( iLookAhead>0 ){ #ifdef YYFALLBACK @@ -497,11 +509,33 @@ %% ParseARG_STORE; /* Suppress warning about unused %extra_argument var */ } /* -** Perform a shift action. +** Print tracing information for a SHIFT action +*/ +#ifndef NDEBUG +static void yyTraceShift(yyParser *yypParser, int yyNewState){ + if( yyTraceFILE ){ + int i; + if( yyNewStateyyidx; i++) + fprintf(yyTraceFILE," %s",yyTokenName[yypParser->yystack[i].major]); + fprintf(yyTraceFILE,"\n"); + }else{ + fprintf(yyTraceFILE,"%sShift *\n",yyTracePrompt); + } + } +} +#else +# define yyTraceShift(X,Y) +#endif + +/* +** Perform a shift action. Return the number of errors. */ static void yy_shift( yyParser *yypParser, /* The parser to be shifted */ int yyNewState, /* The new state to shift in */ int yyMajor, /* The major token to shift in */ @@ -530,20 +564,11 @@ #endif yytos = &yypParser->yystack[yypParser->yyidx]; yytos->stateno = (YYACTIONTYPE)yyNewState; yytos->major = (YYCODETYPE)yyMajor; yytos->minor = *yypMinor; -#ifndef NDEBUG - if( yyTraceFILE && yypParser->yyidx>0 ){ - int i; - fprintf(yyTraceFILE,"%sShift %d\n",yyTracePrompt,yyNewState); - fprintf(yyTraceFILE,"%sStack:",yyTracePrompt); - for(i=1; i<=yypParser->yyidx; i++) - fprintf(yyTraceFILE," %s",yyTokenName[yypParser->yystack[i].major]); - fprintf(yyTraceFILE,"\n"); - } -#endif + yyTraceShift(yypParser, yyNewState); } /* The following table contains information about every rule that ** is used during the reduce. */ @@ -572,12 +597,13 @@ ParseARG_FETCH; yymsp = &yypParser->yystack[yypParser->yyidx]; #ifndef NDEBUG if( yyTraceFILE && yyruleno>=0 && yyruleno<(int)(sizeof(yyRuleName)/sizeof(yyRuleName[0])) ){ - fprintf(yyTraceFILE, "%sReduce [%s].\n", yyTracePrompt, - yyRuleName[yyruleno]); + yysize = yyRuleInfo[yyruleno].nrhs; + fprintf(yyTraceFILE, "%sReduce [%s] -> state %d.\n", yyTracePrompt, + yyRuleName[yyruleno], yymsp[-yysize].stateno); } #endif /* NDEBUG */ /* Silence complaints from purify about yygotominor being uninitialized ** in some cases when it is copied into the stack after the following @@ -611,29 +637,28 @@ assert( yyruleno>=0 && yyrulenoyyidx -= yysize; yyact = yy_find_reduce_action(yymsp[-yysize].stateno,(YYCODETYPE)yygoto); - if( yyact < YYNSTATE ){ -#ifdef NDEBUG - /* If we are not debugging and the reduce action popped at least + if( yyact <= YY_MAX_SHIFTREDUCE ){ + if( yyact>YY_MAX_SHIFT ) yyact += YY_MIN_REDUCE - YY_MIN_SHIFTREDUCE; + /* If the reduce action popped at least ** one element off the stack, then we can push the new element back ** onto the stack here, and skip the stack overflow test in yy_shift(). ** That gives a significant speed improvement. */ if( yysize ){ yypParser->yyidx++; yymsp -= yysize-1; yymsp->stateno = (YYACTIONTYPE)yyact; yymsp->major = (YYCODETYPE)yygoto; yymsp->minor = yygotominor; - }else -#endif - { + yyTraceShift(yypParser, yyact); + }else{ yy_shift(yypParser,yyact,yygoto,&yygotominor); } }else{ - assert( yyact == YYNSTATE + YYNRULE + 1 ); + assert( yyact == YY_ACCEPT_ACTION ); yy_accept(yypParser); } } /* @@ -753,16 +778,17 @@ } #endif do{ yyact = yy_find_shift_action(yypParser,(YYCODETYPE)yymajor); - if( yyact YY_MAX_SHIFT ) yyact += YY_MIN_REDUCE - YY_MIN_SHIFTREDUCE; yy_shift(yypParser,yyact,yymajor,&yyminorunion); yypParser->yyerrcnt--; yymajor = YYNOCODE; - }else if( yyact < YYNSTATE + YYNRULE ){ - yy_reduce(yypParser,yyact-YYNSTATE); + }else if( yyact <= YY_MAX_REDUCE ){ + yy_reduce(yypParser,yyact-YY_MIN_REDUCE); }else{ assert( yyact == YY_ERROR_ACTION ); #ifdef YYERRORSYMBOL int yymx; #endif @@ -808,11 +834,11 @@ while( yypParser->yyidx >= 0 && yymx != YYERRORSYMBOL && (yyact = yy_find_reduce_action( yypParser->yystack[yypParser->yyidx].stateno, - YYERRORSYMBOL)) >= YYNSTATE + YYERRORSYMBOL)) >= YY_MIN_REDUCE ){ yy_pop_parser_stack(yypParser); } if( yypParser->yyidx < 0 || yymajor==0 ){ yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion); @@ -858,7 +884,12 @@ } yymajor = YYNOCODE; #endif } }while( yymajor!=YYNOCODE && yypParser->yyidx>=0 ); +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sReturn\n",yyTracePrompt); + } +#endif return; } Index: src/loadext.c ================================================================== --- src/loadext.c +++ src/loadext.c @@ -405,11 +405,14 @@ sqlite3_strglob, /* Version 3.8.11 and later */ (sqlite3_value*(*)(const sqlite3_value*))sqlite3_value_dup, sqlite3_value_free, sqlite3_result_zeroblob64, - sqlite3_bind_zeroblob64 + sqlite3_bind_zeroblob64, + /* Version 3.8.12 and later */ + sqlite3_value_subtype, + sqlite3_result_subtype }; /* ** Attempt to load an SQLite extension library contained in the file ** zFile. The entry point is zProc. zProc may be 0 in which case a Index: src/malloc.c ================================================================== --- src/malloc.c +++ src/malloc.c @@ -43,11 +43,11 @@ /* ** State information local to the memory allocation subsystem. */ static SQLITE_WSD struct Mem0Global { sqlite3_mutex *mutex; /* Mutex to serialize access */ - sqlite3_int64 alarmThreshold; /* The soft heap limit */ + sqlite3_int64 alarmThreshold; /* The soft heap limit */ /* ** Pointers to the end of sqlite3GlobalConfig.pScratch memory ** (so that a range test can be used to determine if an allocation ** being freed came from pScratch) and a pointer to the list of @@ -71,63 +71,24 @@ */ sqlite3_mutex *sqlite3MallocMutex(void){ return mem0.mutex; } -/* -** Return the amount of memory currently in use. -*/ -static sqlite3_int64 memInUse(void){ - assert( sqlite3_mutex_held(mem0.mutex) ); - return sqlite3StatusValue(SQLITE_STATUS_MEMORY_USED); -} - -/* -** Called when the soft heap limit is exceeded for an allocation -** of nBytes. -*/ -#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT -static void sqlite3HeapLimitExceeded(int nByte){ - sqlite3_int64 excess = memInUse() + nByte - mem0.alarmThreshold; - sqlite3_mutex_leave(mem0.mutex); - sqlite3_release_memory((int)(excess & 0x7fffffff)); - sqlite3_mutex_enter(mem0.mutex); -} -#else -# define sqlite3HeapLimitExceeded(X) /* no-op */ -#endif - -/* -** Check to see if increasing the total memory usage by nNew bytes -** will exceed the soft heap limit. -** -** If the soft heap limit is exceeded, set the mem0.nearlyFull flag -** and invoke sqlite3HeapLimitExceeded() to try to free up some -** memory. -*/ -static void sqlite3CheckSoftHeapLimit(int nNew){ - assert( sqlite3_mutex_held(mem0.mutex) ); - if( mem0.alarmThreshold>0 ){ - if( mem0.alarmThreshold-nNew >= memInUse() ){ - mem0.nearlyFull = 1; - sqlite3HeapLimitExceeded(nNew); - }else{ - mem0.nearlyFull = 0; - } - } -} - #ifndef SQLITE_OMIT_DEPRECATED /* -** Deprecated external interface. First deprecated 2007-11-05. Changed -** into a no-op on 2015-09-02. +** Deprecated external interface. It used to set an alarm callback +** that was invoked when memory usage grew too large. Now it is a +** no-op. */ int sqlite3_memory_alarm( void(*xCallback)(void *pArg, sqlite3_int64 used,int N), void *pArg, sqlite3_int64 iThreshold ){ + (void)xCallback; + (void)pArg; + (void)iThreshold; return SQLITE_OK; } #endif /* @@ -134,24 +95,28 @@ ** Set the soft heap-size limit for the library. Passing a zero or ** negative value indicates no limit. */ sqlite3_int64 sqlite3_soft_heap_limit64(sqlite3_int64 n){ sqlite3_int64 priorLimit; + sqlite3_int64 excess; + sqlite3_int64 nUsed; #ifndef SQLITE_OMIT_AUTOINIT int rc = sqlite3_initialize(); if( rc ) return -1; #endif sqlite3_mutex_enter(mem0.mutex); priorLimit = mem0.alarmThreshold; - if( n>0 ){ - mem0.alarmThreshold = n; - sqlite3CheckSoftHeapLimit(0); - }else if( n==0 ){ - mem0.alarmThreshold = 0; - mem0.nearlyFull = 0; + if( n<0 ){ + sqlite3_mutex_leave(mem0.mutex); + return priorLimit; } + mem0.alarmThreshold = n; + nUsed = sqlite3StatusValue(SQLITE_STATUS_MEMORY_USED); + mem0.nearlyFull = (n>0 && n<=nUsed); sqlite3_mutex_leave(mem0.mutex); + excess = sqlite3_memory_used() - n; + if( excess>0 ) sqlite3_release_memory((int)(excess & 0x7fffffff)); return priorLimit; } void sqlite3_soft_heap_limit(int n){ if( n<0 ) n = 0; sqlite3_soft_heap_limit64(n); @@ -237,10 +202,20 @@ sqlite3_int64 sqlite3_memory_highwater(int resetFlag){ sqlite3_int64 res, mx; sqlite3_status64(SQLITE_STATUS_MEMORY_USED, &res, &mx, resetFlag); return mx; } + +/* +** Trigger the alarm +*/ +static void sqlite3MallocAlarm(int nByte){ + if( mem0.alarmThreshold<=0 ) return; + sqlite3_mutex_leave(mem0.mutex); + sqlite3_release_memory(nByte); + sqlite3_mutex_enter(mem0.mutex); +} /* ** Do a memory allocation with statistics and alarms. Assume the ** lock is already held. */ @@ -248,15 +223,23 @@ int nFull; void *p; assert( sqlite3_mutex_held(mem0.mutex) ); nFull = sqlite3GlobalConfig.m.xRoundup(n); sqlite3StatusSet(SQLITE_STATUS_MALLOC_SIZE, n); - sqlite3CheckSoftHeapLimit(nFull); + if( mem0.alarmThreshold>0 ){ + sqlite3_int64 nUsed = sqlite3StatusValue(SQLITE_STATUS_MEMORY_USED); + if( nUsed >= mem0.alarmThreshold - nFull ){ + mem0.nearlyFull = 1; + sqlite3MallocAlarm(nFull); + }else{ + mem0.nearlyFull = 0; + } + } p = sqlite3GlobalConfig.m.xMalloc(nFull); #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT - if( p==0 && mem0.alarmThreshold ){ - sqlite3HeapLimitExceeded(nFull); + if( p==0 && mem0.alarmThreshold>0 ){ + sqlite3MallocAlarm(nFull); p = sqlite3GlobalConfig.m.xMalloc(nFull); } #endif if( p ){ nFull = sqlite3MallocSize(p); @@ -535,18 +518,19 @@ pNew = pOld; }else if( sqlite3GlobalConfig.bMemstat ){ sqlite3_mutex_enter(mem0.mutex); sqlite3StatusSet(SQLITE_STATUS_MALLOC_SIZE, (int)nBytes); nDiff = nNew - nOld; - sqlite3CheckSoftHeapLimit(nDiff); + if( sqlite3StatusValue(SQLITE_STATUS_MEMORY_USED) >= + mem0.alarmThreshold-nDiff ){ + sqlite3MallocAlarm(nDiff); + } pNew = sqlite3GlobalConfig.m.xRealloc(pOld, nNew); -#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT - if( pNew==0 && mem0.alarmThreshold ){ - sqlite3HeapLimitExceeded((int)nBytes); + if( pNew==0 && mem0.alarmThreshold>0 ){ + sqlite3MallocAlarm((int)nBytes); pNew = sqlite3GlobalConfig.m.xRealloc(pOld, nNew); } -#endif if( pNew ){ nNew = sqlite3MallocSize(pNew); sqlite3StatusUp(SQLITE_STATUS_MEMORY_USED, nNew-nOld); } sqlite3_mutex_leave(mem0.mutex); Index: src/mutex.c ================================================================== --- src/mutex.c +++ src/mutex.c @@ -20,11 +20,11 @@ ** For debugging purposes, record when the mutex subsystem is initialized ** and uninitialized so that we can assert() if there is an attempt to ** allocate a mutex while the system is uninitialized. */ static SQLITE_WSD int mutexIsInit = 0; -#endif /* SQLITE_DEBUG */ +#endif /* SQLITE_DEBUG && !defined(SQLITE_MUTEX_OMIT) */ #ifndef SQLITE_MUTEX_OMIT /* ** Initialize the mutex system. @@ -51,12 +51,14 @@ pTo->xMutexEnter = pFrom->xMutexEnter; pTo->xMutexTry = pFrom->xMutexTry; pTo->xMutexLeave = pFrom->xMutexLeave; pTo->xMutexHeld = pFrom->xMutexHeld; pTo->xMutexNotheld = pFrom->xMutexNotheld; + sqlite3MemoryBarrier(); pTo->xMutexAlloc = pFrom->xMutexAlloc; } + assert( sqlite3GlobalConfig.mutex.xMutexInit ); rc = sqlite3GlobalConfig.mutex.xMutexInit(); #ifdef SQLITE_DEBUG GLOBAL(int, mutexIsInit) = 1; #endif @@ -87,26 +89,29 @@ sqlite3_mutex *sqlite3_mutex_alloc(int id){ #ifndef SQLITE_OMIT_AUTOINIT if( id<=SQLITE_MUTEX_RECURSIVE && sqlite3_initialize() ) return 0; if( id>SQLITE_MUTEX_RECURSIVE && sqlite3MutexInit() ) return 0; #endif + assert( sqlite3GlobalConfig.mutex.xMutexAlloc ); return sqlite3GlobalConfig.mutex.xMutexAlloc(id); } sqlite3_mutex *sqlite3MutexAlloc(int id){ if( !sqlite3GlobalConfig.bCoreMutex ){ return 0; } assert( GLOBAL(int, mutexIsInit) ); + assert( sqlite3GlobalConfig.mutex.xMutexAlloc ); return sqlite3GlobalConfig.mutex.xMutexAlloc(id); } /* ** Free a dynamic mutex. */ void sqlite3_mutex_free(sqlite3_mutex *p){ if( p ){ + assert( sqlite3GlobalConfig.mutex.xMutexFree ); sqlite3GlobalConfig.mutex.xMutexFree(p); } } /* @@ -113,10 +118,11 @@ ** Obtain the mutex p. If some other thread already has the mutex, block ** until it can be obtained. */ void sqlite3_mutex_enter(sqlite3_mutex *p){ if( p ){ + assert( sqlite3GlobalConfig.mutex.xMutexEnter ); sqlite3GlobalConfig.mutex.xMutexEnter(p); } } /* @@ -124,10 +130,11 @@ ** thread holds the mutex and it cannot be obtained, return SQLITE_BUSY. */ int sqlite3_mutex_try(sqlite3_mutex *p){ int rc = SQLITE_OK; if( p ){ + assert( sqlite3GlobalConfig.mutex.xMutexTry ); return sqlite3GlobalConfig.mutex.xMutexTry(p); } return rc; } @@ -137,10 +144,11 @@ ** is not currently entered. If a NULL pointer is passed as an argument ** this function is a no-op. */ void sqlite3_mutex_leave(sqlite3_mutex *p){ if( p ){ + assert( sqlite3GlobalConfig.mutex.xMutexLeave ); sqlite3GlobalConfig.mutex.xMutexLeave(p); } } #ifndef NDEBUG @@ -147,13 +155,15 @@ /* ** The sqlite3_mutex_held() and sqlite3_mutex_notheld() routine are ** intended for use inside assert() statements. */ int sqlite3_mutex_held(sqlite3_mutex *p){ + assert( p==0 || sqlite3GlobalConfig.mutex.xMutexHeld ); return p==0 || sqlite3GlobalConfig.mutex.xMutexHeld(p); } int sqlite3_mutex_notheld(sqlite3_mutex *p){ + assert( p==0 || sqlite3GlobalConfig.mutex.xMutexNotheld ); return p==0 || sqlite3GlobalConfig.mutex.xMutexNotheld(p); } #endif #endif /* !defined(SQLITE_MUTEX_OMIT) */ Index: src/mutex_unix.c ================================================================== --- src/mutex_unix.c +++ src/mutex_unix.c @@ -77,10 +77,21 @@ } static int pthreadMutexNotheld(sqlite3_mutex *p){ return p->nRef==0 || pthread_equal(p->owner, pthread_self())==0; } #endif + +/* +** Try to provide a memory barrier operation, needed for initialization only. +*/ +void sqlite3MemoryBarrier(void){ +#if defined(SQLITE_MEMORY_BARRIER) + SQLITE_MEMORY_BARRIER; +#elif defined(__GNUC__) && GCC_VERSION>=4001000 + __sync_synchronize(); +#endif +} /* ** Initialize and deinitialize the mutex subsystem. */ static int pthreadMutexInit(void){ return SQLITE_OK; } Index: src/mutex_w32.c ================================================================== --- src/mutex_w32.c +++ src/mutex_w32.c @@ -74,10 +74,23 @@ static int winMutexNotheld(sqlite3_mutex *p){ DWORD tid = GetCurrentThreadId(); return winMutexNotheld2(p, tid); } #endif + +/* +** Try to provide a memory barrier operation, needed for initialization only. +*/ +void sqlite3MemoryBarrier(void){ +#if defined(SQLITE_MEMORY_BARRIER) + SQLITE_MEMORY_BARRIER; +#elif defined(__GNUC__) + __sync_synchronize(); +#else + MemoryBarrier(); +#endif +} /* ** Initialize and deinitialize the mutex subsystem. */ static sqlite3_mutex winMutex_staticMutexes[] = { Index: src/pager.c ================================================================== --- src/pager.c +++ src/pager.c @@ -645,11 +645,11 @@ u8 changeCountDone; /* Set after incrementing the change-counter */ u8 setMaster; /* True if a m-j name has been written to jrnl */ u8 doNotSpill; /* Do not spill the cache when non-zero */ u8 subjInMemory; /* True to use in-memory sub-journals */ u8 bUseFetch; /* True to use xFetch() */ - u8 hasBeenUsed; /* True if any content previously read */ + u8 hasHeldSharedLock; /* True if a shared lock has ever been held */ Pgno dbSize; /* Number of pages in the database */ Pgno dbOrigSize; /* dbSize before the current transaction */ Pgno dbFileSize; /* Number of pages in the database file */ Pgno dbHintSize; /* Value passed to FCNTL_SIZE_HINT call */ int errCode; /* One of several kinds of errors */ @@ -5118,14 +5118,14 @@ assert( (pPager->eLock==SHARED_LOCK) || (pPager->exclusiveMode && pPager->eLock>SHARED_LOCK) ); } - if( !pPager->tempFile && pPager->hasBeenUsed ){ + if( !pPager->tempFile && pPager->hasHeldSharedLock ){ /* The shared-lock has just been acquired then check to ** see if the database has been modified. If the database has changed, - ** flush the cache. The pPager->hasBeenUsed flag prevents this from + ** flush the cache. The hasHeldSharedLock flag prevents this from ** occurring on the very first access to a file, in order to save a ** single unnecessary sqlite3OsRead() call at the start-up. ** ** Database changes are detected by looking at 15 bytes beginning ** at offset 24 into the file. The first 4 of these 16 bytes are @@ -5191,10 +5191,11 @@ assert( !MEMDB ); pager_unlock(pPager); assert( pPager->eState==PAGER_OPEN ); }else{ pPager->eState = PAGER_READER; + pPager->hasHeldSharedLock = 1; } return rc; } /* @@ -5274,25 +5275,29 @@ /* It is acceptable to use a read-only (mmap) page for any page except ** page 1 if there is no write-transaction open or the ACQUIRE_READONLY ** flag was specified by the caller. And so long as the db is not a ** temporary or in-memory database. */ - const int bMmapOk = (pgno!=1 && USEFETCH(pPager) + const int bMmapOk = (pgno>1 && USEFETCH(pPager) && (pPager->eState==PAGER_READER || (flags & PAGER_GET_READONLY)) #ifdef SQLITE_HAS_CODEC && pPager->xCodec==0 #endif ); + /* Optimization note: Adding the "pgno<=1" term before "pgno==0" here + ** allows the compiler optimizer to reuse the results of the "pgno>1" + ** test in the previous statement, and avoid testing pgno==0 in the + ** common case where pgno is large. */ + if( pgno<=1 && pgno==0 ){ + return SQLITE_CORRUPT_BKPT; + } assert( pPager->eState>=PAGER_READER ); assert( assert_pager_state(pPager) ); assert( noContent==0 || bMmapOk==0 ); - if( pgno==0 ){ - return SQLITE_CORRUPT_BKPT; - } - pPager->hasBeenUsed = 1; + assert( pPager->hasHeldSharedLock==1 ); /* If the pager is in the error state, return an error immediately. ** Otherwise, request the page from the PCache layer. */ if( pPager->errCode!=SQLITE_OK ){ rc = pPager->errCode; @@ -5443,11 +5448,11 @@ sqlite3_pcache_page *pPage; assert( pPager!=0 ); assert( pgno!=0 ); assert( pPager->pPCache!=0 ); pPage = sqlite3PcacheFetch(pPager->pPCache, pgno, 0); - assert( pPage==0 || pPager->hasBeenUsed ); + assert( pPage==0 || pPager->hasHeldSharedLock ); if( pPage==0 ) return 0; return sqlite3PcacheFetchFinish(pPager->pPCache, pgno, pPage); } /* @@ -6413,11 +6418,11 @@ return pPager->readOnly; } #ifdef SQLITE_DEBUG /* -** Return the number of references to the pager. +** Return the sum of the reference counts for all pages held by pPager. */ int sqlite3PagerRefcount(Pager *pPager){ return sqlite3PcacheRefCount(pPager->pPCache); } #endif Index: src/pcache.c ================================================================== --- src/pcache.c +++ src/pcache.c @@ -17,11 +17,11 @@ ** A complete page cache is an instance of this structure. */ struct PCache { PgHdr *pDirty, *pDirtyTail; /* List of dirty pages in LRU order */ PgHdr *pSynced; /* Last synced page in dirty page list */ - int nRef; /* Number of referenced pages */ + int nRefSum; /* Sum of ref counts over all pages */ int szCache; /* Configured cache size */ int szPage; /* Size of every page in this cache */ int szExtra; /* Size of extra space for each page */ u8 bPurgeable; /* True if pages are on backing store */ u8 eCreate; /* eCreate value for for xFetch() */ @@ -182,11 +182,11 @@ /* ** Change the page size for PCache object. The caller must ensure that there ** are no outstanding page references when this function is called. */ int sqlite3PcacheSetPageSize(PCache *pCache, int szPage){ - assert( pCache->nRef==0 && pCache->pDirty==0 ); + assert( pCache->nRefSum==0 && pCache->pDirty==0 ); if( pCache->szPage ){ sqlite3_pcache *pNew; pNew = sqlite3GlobalConfig.pcache2.xCreate( szPage, pCache->szExtra + ROUND8(sizeof(PgHdr)), pCache->bPurgeable @@ -349,13 +349,11 @@ pPgHdr = (PgHdr *)pPage->pExtra; if( !pPgHdr->pPage ){ return pcacheFetchFinishWithInit(pCache, pgno, pPage); } - if( 0==pPgHdr->nRef ){ - pCache->nRef++; - } + pCache->nRefSum++; pPgHdr->nRef++; return pPgHdr; } /* @@ -362,13 +360,12 @@ ** Decrement the reference count on a page. If the page is clean and the ** reference count drops to 0, then it is made eligible for recycling. */ void SQLITE_NOINLINE sqlite3PcacheRelease(PgHdr *p){ assert( p->nRef>0 ); - p->nRef--; - if( p->nRef==0 ){ - p->pCache->nRef--; + p->pCache->nRefSum--; + if( (--p->nRef)==0 ){ if( p->flags&PGHDR_CLEAN ){ pcacheUnpin(p); }else if( p->pDirtyPrev!=0 ){ /* Move the page to the head of the dirty list. */ pcacheManageDirtyList(p, PCACHE_DIRTYLIST_FRONT); @@ -380,10 +377,11 @@ ** Increase the reference count of a supplied page by 1. */ void sqlite3PcacheRef(PgHdr *p){ assert(p->nRef>0); p->nRef++; + p->pCache->nRefSum++; } /* ** Drop a page from the cache. There must be exactly one reference to the ** page. This function deletes that reference, so after it returns the @@ -392,11 +390,11 @@ void sqlite3PcacheDrop(PgHdr *p){ assert( p->nRef==1 ); if( p->flags&PGHDR_DIRTY ){ pcacheManageDirtyList(p, PCACHE_DIRTYLIST_REMOVE); } - p->pCache->nRef--; + p->pCache->nRefSum--; sqlite3GlobalConfig.pcache2.xUnpin(p->pCache->pCache, p->pPage, 1); } /* ** Make sure the page is marked as dirty. If it isn't dirty already, @@ -488,15 +486,15 @@ if( ALWAYS(p->pgno>pgno) ){ assert( p->flags&PGHDR_DIRTY ); sqlite3PcacheMakeClean(p); } } - if( pgno==0 && pCache->nRef ){ + if( pgno==0 && pCache->nRefSum ){ sqlite3_pcache_page *pPage1; pPage1 = sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache,1,0); if( ALWAYS(pPage1) ){ /* Page 1 is always available in cache, because - ** pCache->nRef>0 */ + ** pCache->nRefSum>0 */ memset(pPage1->pBuf, 0, pCache->szPage); pgno = 1; } } sqlite3GlobalConfig.pcache2.xTruncate(pCache->pCache, pgno+1); @@ -598,14 +596,17 @@ } return pcacheSortDirtyList(pCache->pDirty); } /* -** Return the total number of referenced pages held by the cache. +** Return the total number of references to all pages held by the cache. +** +** This is not the total number of pages referenced, but the sum of the +** reference count for all pages. */ int sqlite3PcacheRefCount(PCache *pCache){ - return pCache->nRef; + return pCache->nRefSum; } /* ** Return the number of references to the page supplied as an argument. */ Index: src/pcache1.c ================================================================== --- src/pcache1.c +++ src/pcache1.c @@ -84,10 +84,28 @@ typedef struct PCache1 PCache1; typedef struct PgHdr1 PgHdr1; typedef struct PgFreeslot PgFreeslot; typedef struct PGroup PGroup; + +/* +** Each cache entry is represented by an instance of the following +** structure. Unless SQLITE_PCACHE_SEPARATE_HEADER is defined, a buffer of +** PgHdr1.pCache->szPage bytes is allocated directly before this structure +** in memory. +*/ +struct PgHdr1 { + sqlite3_pcache_page page; /* Base class. Must be first. pBuf & pExtra */ + unsigned int iKey; /* Key value (page number) */ + u8 isPinned; /* Page in use, not on the LRU list */ + u8 isBulkLocal; /* This page from bulk local storage */ + u8 isAnchor; /* This is the PGroup.lru element */ + PgHdr1 *pNext; /* Next in hash table chain */ + PCache1 *pCache; /* Cache that currently owns this page */ + PgHdr1 *pLruNext; /* Next in LRU list of unpinned pages */ + PgHdr1 *pLruPrev; /* Previous in LRU list of unpinned pages */ +}; /* Each page cache (or PCache) belongs to a PGroup. A PGroup is a set ** of one or more PCaches that are able to recycle each other's unpinned ** pages when they are under memory pressure. A PGroup is an instance of ** the following object. @@ -113,11 +131,11 @@ sqlite3_mutex *mutex; /* MUTEX_STATIC_LRU or NULL */ unsigned int nMaxPage; /* Sum of nMax for purgeable caches */ unsigned int nMinPage; /* Sum of nMin for purgeable caches */ unsigned int mxPinned; /* nMaxpage + 10 - nMinPage */ unsigned int nCurrentPage; /* Number of purgeable pages allocated */ - PgHdr1 *pLruHead, *pLruTail; /* LRU list of unpinned pages */ + PgHdr1 lru; /* The beginning and end of the LRU list */ }; /* Each page cache is an instance of the following object. Every ** open database file (including each in-memory database and each ** temporary or transient database) has a single page cache which @@ -151,27 +169,10 @@ PgHdr1 **apHash; /* Hash table for fast lookup by key */ PgHdr1 *pFree; /* List of unused pcache-local pages */ void *pBulk; /* Bulk memory used by pcache-local */ }; -/* -** Each cache entry is represented by an instance of the following -** structure. Unless SQLITE_PCACHE_SEPARATE_HEADER is defined, a buffer of -** PgHdr1.pCache->szPage bytes is allocated directly before this structure -** in memory. -*/ -struct PgHdr1 { - sqlite3_pcache_page page; - unsigned int iKey; /* Key value (page number) */ - u8 isPinned; /* Page in use, not on the LRU list */ - u8 isBulkLocal; /* This page from bulk local storage */ - PgHdr1 *pNext; /* Next in hash table chain */ - PCache1 *pCache; /* Cache that currently owns this page */ - PgHdr1 *pLruNext; /* Next in LRU list of unpinned pages */ - PgHdr1 *pLruPrev; /* Previous in LRU list of unpinned pages */ -}; - /* ** Free slots in the allocator used to divide up the global page cache ** buffer provided using the SQLITE_CONFIG_PAGECACHE mechanism. */ struct PgFreeslot { @@ -227,10 +228,11 @@ # define PCACHE1_MIGHT_USE_GROUP_MUTEX 1 #endif /******************************************************************************/ /******** Page Allocation/SQLITE_CONFIG_PCACHE Related Functions **************/ + /* ** This function is called during initialization if a static buffer is ** supplied to use for the page-cache by passing the SQLITE_CONFIG_PAGECACHE ** verb to sqlite3_config(). Parameter pBuf points to an allocation large @@ -287,10 +289,11 @@ for(i=0; iszPage]; pX->page.pBuf = zBulk; pX->page.pExtra = &pX[1]; pX->isBulkLocal = 1; + pX->isAnchor = 0; pX->pNext = pCache->pFree; pCache->pFree = pX; zBulk += pCache->szAlloc; } } @@ -429,10 +432,11 @@ #endif if( pPg==0 ) return 0; p->page.pBuf = pPg; p->page.pExtra = &p[1]; p->isBulkLocal = 0; + p->isAnchor = 0; } if( pCache->bPurgeable ){ pCache->pGroup->nCurrentPage++; } return p; @@ -555,26 +559,20 @@ PCache1 *pCache; assert( pPage!=0 ); assert( pPage->isPinned==0 ); pCache = pPage->pCache; - assert( pPage->pLruNext || pPage==pCache->pGroup->pLruTail ); - assert( pPage->pLruPrev || pPage==pCache->pGroup->pLruHead ); + assert( pPage->pLruNext ); + assert( pPage->pLruPrev ); assert( sqlite3_mutex_held(pCache->pGroup->mutex) ); - if( pPage->pLruPrev ){ - pPage->pLruPrev->pLruNext = pPage->pLruNext; - }else{ - pCache->pGroup->pLruHead = pPage->pLruNext; - } - if( pPage->pLruNext ){ - pPage->pLruNext->pLruPrev = pPage->pLruPrev; - }else{ - pCache->pGroup->pLruTail = pPage->pLruPrev; - } + pPage->pLruPrev->pLruNext = pPage->pLruNext; + pPage->pLruNext->pLruPrev = pPage->pLruPrev; pPage->pLruNext = 0; pPage->pLruPrev = 0; pPage->isPinned = 1; + assert( pPage->isAnchor==0 ); + assert( pCache->pGroup->lru.isAnchor==1 ); pCache->nRecyclable--; return pPage; } @@ -603,13 +601,15 @@ ** If there are currently more than nMaxPage pages allocated, try ** to recycle pages to reduce the number allocated to nMaxPage. */ static void pcache1EnforceMaxPage(PCache1 *pCache){ PGroup *pGroup = pCache->pGroup; + PgHdr1 *p; assert( sqlite3_mutex_held(pGroup->mutex) ); - while( pGroup->nCurrentPage>pGroup->nMaxPage && pGroup->pLruTail ){ - PgHdr1 *p = pGroup->pLruTail; + while( pGroup->nCurrentPage>pGroup->nMaxPage + && (p=pGroup->lru.pLruPrev)->isAnchor==0 + ){ assert( p->pCache->pGroup==pGroup ); assert( p->isPinned==0 ); pcache1PinPage(p); pcache1RemoveFromHash(p, 1); } @@ -739,10 +739,14 @@ pGroup = (PGroup*)&pCache[1]; pGroup->mxPinned = 10; }else{ pGroup = &pcache1.grp; } + if( pGroup->lru.isAnchor==0 ){ + pGroup->lru.isAnchor = 1; + pGroup->lru.pLruPrev = pGroup->lru.pLruNext = &pGroup->lru; + } pCache->pGroup = pGroup; pCache->szPage = szPage; pCache->szExtra = szExtra; pCache->szAlloc = szPage + szExtra + ROUND8(sizeof(PgHdr1)); pCache->bPurgeable = (bPurgeable ? 1 : 0); @@ -846,15 +850,15 @@ if( pCache->nPage>=pCache->nHash ) pcache1ResizeHash(pCache); assert( pCache->nHash>0 && pCache->apHash ); /* Step 4. Try to recycle a page. */ if( pCache->bPurgeable - && pGroup->pLruTail + && !pGroup->lru.pLruPrev->isAnchor && ((pCache->nPage+1>=pCache->nMax) || pcache1UnderMemoryPressure(pCache)) ){ PCache1 *pOther; - pPage = pGroup->pLruTail; + pPage = pGroup->lru.pLruPrev; assert( pPage->isPinned==0 ); pcache1RemoveFromHash(pPage, 0); pcache1PinPage(pPage); pOther = pPage->pCache; if( pOther->szAlloc != pCache->szAlloc ){ @@ -959,11 +963,14 @@ /* Step 1: Search the hash table for an existing entry. */ pPage = pCache->apHash[iKey % pCache->nHash]; while( pPage && pPage->iKey!=iKey ){ pPage = pPage->pNext; } - /* Step 2: Abort if no existing page is found and createFlag is 0 */ + /* Step 2: If the page was found in the hash table, then return it. + ** If the page was not in the hash table and createFlag is 0, abort. + ** Otherwise (page not in hash and createFlag!=0) continue with + ** subsequent steps to try to create the page. */ if( pPage ){ if( !pPage->isPinned ){ return pcache1PinPage(pPage); }else{ return pPage; @@ -1036,25 +1043,20 @@ /* It is an error to call this function if the page is already ** part of the PGroup LRU list. */ assert( pPage->pLruPrev==0 && pPage->pLruNext==0 ); - assert( pGroup->pLruHead!=pPage && pGroup->pLruTail!=pPage ); assert( pPage->isPinned==1 ); if( reuseUnlikely || pGroup->nCurrentPage>pGroup->nMaxPage ){ pcache1RemoveFromHash(pPage, 1); }else{ /* Add the page to the PGroup LRU list. */ - if( pGroup->pLruHead ){ - pGroup->pLruHead->pLruPrev = pPage; - pPage->pLruNext = pGroup->pLruHead; - pGroup->pLruHead = pPage; - }else{ - pGroup->pLruTail = pPage; - pGroup->pLruHead = pPage; - } + PgHdr1 **ppFirst = &pGroup->lru.pLruNext; + pPage->pLruPrev = &pGroup->lru; + (pPage->pLruNext = *ppFirst)->pLruPrev = pPage; + *ppFirst = pPage; pCache->nRecyclable++; pPage->isPinned = 0; } pcache1LeaveMutex(pCache->pGroup); @@ -1188,11 +1190,14 @@ assert( sqlite3_mutex_notheld(pcache1.grp.mutex) ); assert( sqlite3_mutex_notheld(pcache1.mutex) ); if( sqlite3GlobalConfig.nPage==0 ){ PgHdr1 *p; pcache1EnterMutex(&pcache1.grp); - while( (nReq<0 || nFreeisAnchor==0 + ){ nFree += pcache1MemSize(p->page.pBuf); #ifdef SQLITE_PCACHE_SEPARATE_HEADER nFree += sqlite3MemSize(p); #endif assert( p->isPinned==0 ); @@ -1216,15 +1221,15 @@ int *pnMin, /* OUT: Sum of PCache1.nMin for purgeable caches */ int *pnRecyclable /* OUT: Total number of pages available for recycling */ ){ PgHdr1 *p; int nRecyclable = 0; - for(p=pcache1.grp.pLruHead; p; p=p->pLruNext){ + for(p=pcache1.grp.lru.pLruNext; p && !p->isAnchor; p=p->pLruNext){ assert( p->isPinned==0 ); nRecyclable++; } *pnCurrent = pcache1.grp.nCurrentPage; *pnMax = (int)pcache1.grp.nMaxPage; *pnMin = (int)pcache1.grp.nMinPage; *pnRecyclable = nRecyclable; } #endif Index: src/resolve.c ================================================================== --- src/resolve.c +++ src/resolve.c @@ -43,34 +43,10 @@ /* ** Turn the pExpr expression into an alias for the iCol-th column of the ** result set in pEList. ** -** If the result set column is a simple column reference, then this routine -** makes an exact copy. But for any other kind of expression, this -** routine make a copy of the result set column as the argument to the -** TK_AS operator. The TK_AS operator causes the expression to be -** evaluated just once and then reused for each alias. -** -** The reason for suppressing the TK_AS term when the expression is a simple -** column reference is so that the column reference will be recognized as -** usable by indices within the WHERE clause processing logic. -** -** The TK_AS operator is inhibited if zType[0]=='G'. This means -** that in a GROUP BY clause, the expression is evaluated twice. Hence: -** -** SELECT random()%5 AS x, count(*) FROM tab GROUP BY x -** -** Is equivalent to: -** -** SELECT random()%5 AS x, count(*) FROM tab GROUP BY random()%5 -** -** The result of random()%5 in the GROUP BY clause is probably different -** from the result in the result-set. On the other hand Standard SQL does -** not allow the GROUP BY clause to contain references to result-set columns. -** So this should never come up in well-formed queries. -** ** If the reference is followed by a COLLATE operator, then make sure ** the COLLATE operator is preserved. For example: ** ** SELECT a+b, c+d FROM t1 ORDER BY 1 COLLATE nocase; ** @@ -100,23 +76,15 @@ pOrig = pEList->a[iCol].pExpr; assert( pOrig!=0 ); db = pParse->db; pDup = sqlite3ExprDup(db, pOrig, 0); if( pDup==0 ) return; - if( pOrig->op!=TK_COLUMN && zType[0]!='G' ){ - incrAggFunctionDepth(pDup, nSubquery); - pDup = sqlite3PExpr(pParse, TK_AS, pDup, 0, 0); - if( pDup==0 ) return; - ExprSetProperty(pDup, EP_Skip); - if( pEList->a[iCol].u.x.iAlias==0 ){ - pEList->a[iCol].u.x.iAlias = (u16)(++pParse->nAlias); - } - pDup->iTable = pEList->a[iCol].u.x.iAlias; - } + if( zType[0]!='G' ) incrAggFunctionDepth(pDup, nSubquery); if( pExpr->op==TK_COLLATE ){ pDup = sqlite3ExprAddCollateString(pParse, pDup, pExpr->u.zToken); } + ExprSetProperty(pDup, EP_Alias); /* Before calling sqlite3ExprDelete(), set the EP_Static flag. This ** prevents ExprDelete() from deleting the Expr structure itself, ** allowing it to be repopulated by the memcpy() on the following line. ** The pExpr->u.zToken might point into memory that will be freed by the @@ -504,11 +472,11 @@ pExpr->pRight = 0; pExpr->op = (isTrigger ? TK_TRIGGER : TK_COLUMN); lookupname_end: if( cnt==1 ){ assert( pNC!=0 ); - if( pExpr->op!=TK_AS ){ + if( !ExprHasProperty(pExpr, EP_Alias) ){ sqlite3AuthRead(pParse, pExpr, pSchema, pNC->pSrcList); } /* Increment the nRef value on all name contexts from TopNC up to ** the point where the name matched. */ for(;;){ @@ -545,40 +513,29 @@ } return p; } /* -** Report an error that an expression is not valid for a partial index WHERE -** clause. -*/ -static void notValidPartIdxWhere( - Parse *pParse, /* Leave error message here */ - NameContext *pNC, /* The name context */ - const char *zMsg /* Type of error */ -){ - if( (pNC->ncFlags & NC_PartIdx)!=0 ){ - sqlite3ErrorMsg(pParse, "%s prohibited in partial index WHERE clauses", - zMsg); - } -} - -#ifndef SQLITE_OMIT_CHECK -/* -** Report an error that an expression is not valid for a CHECK constraint. -*/ -static void notValidCheckConstraint( - Parse *pParse, /* Leave error message here */ - NameContext *pNC, /* The name context */ - const char *zMsg /* Type of error */ -){ - if( (pNC->ncFlags & NC_IsCheck)!=0 ){ - sqlite3ErrorMsg(pParse,"%s prohibited in CHECK constraints", zMsg); - } -} -#else -# define notValidCheckConstraint(P,N,M) -#endif +** Report an error that an expression is not valid for some set of +** pNC->ncFlags values determined by validMask. +*/ +static void notValid( + Parse *pParse, /* Leave error message here */ + NameContext *pNC, /* The name context */ + const char *zMsg, /* Type of error */ + int validMask /* Set of contexts for which prohibited */ +){ + assert( (validMask&~(NC_IsCheck|NC_PartIdx|NC_IdxExpr))==0 ); + if( (pNC->ncFlags & validMask)!=0 ){ + const char *zIn = "partial index WHERE clauses"; + if( pNC->ncFlags & NC_IdxExpr ) zIn = "index expressions"; +#ifndef SQLITE_OMIT_CHECK + else if( pNC->ncFlags & NC_IsCheck ) zIn = "CHECK constraints"; +#endif + sqlite3ErrorMsg(pParse, "%s prohibited in %s", zMsg, zIn); + } +} /* ** Expression p should encode a floating point value between 1.0 and 0.0. ** Return 1024 times this value. Or return -1 if p is not a floating point ** value between 1.0 and 0.0. @@ -659,10 +616,12 @@ const char *zTable; const char *zDb; Expr *pRight; /* if( pSrcList==0 ) break; */ + notValid(pParse, pNC, "the \".\" operator", NC_IdxExpr); + /*notValid(pParse, pNC, "the \".\" operator", NC_PartIdx|NC_IsCheck, 1);*/ pRight = pExpr->pRight; if( pRight->op==TK_ID ){ zDb = 0; zTable = pExpr->pLeft->u.zToken; zColumn = pRight->u.zToken; @@ -688,11 +647,11 @@ const char *zId; /* The function name. */ FuncDef *pDef; /* Information about the function */ u8 enc = ENC(pParse->db); /* The database encoding */ assert( !ExprHasProperty(pExpr, EP_xIsSelect) ); - notValidPartIdxWhere(pParse, pNC, "functions"); + notValid(pParse, pNC, "functions", NC_PartIdx); zId = pExpr->u.zToken; nId = sqlite3Strlen30(zId); pDef = sqlite3FindFunction(pParse->db, zId, nId, n, enc, 0); if( pDef==0 ){ pDef = sqlite3FindFunction(pParse->db, zId, nId, -2, enc, 0); @@ -736,13 +695,22 @@ } pExpr->op = TK_NULL; return WRC_Prune; } #endif - if( pDef->funcFlags & SQLITE_FUNC_CONSTANT ){ + if( pDef->funcFlags & (SQLITE_FUNC_CONSTANT|SQLITE_FUNC_SLOCHNG) ){ + /* For the purposes of the EP_ConstFunc flag, date and time + ** functions and other functions that change slowly are considered + ** constant because they are constant for the duration of one query */ ExprSetProperty(pExpr,EP_ConstFunc); } + if( (pDef->funcFlags & SQLITE_FUNC_CONSTANT)==0 ){ + /* Date/time functions that use 'now', and other functions like + ** sqlite_version() that might change over time cannot be used + ** in an index. */ + notValid(pParse, pNC, "non-deterministic functions", NC_IdxExpr); + } } if( is_agg && (pNC->ncFlags & NC_AllowAgg)==0 ){ sqlite3ErrorMsg(pParse, "misuse of aggregate function %.*s()", nId,zId); pNC->nErr++; is_agg = 0; @@ -784,23 +752,21 @@ #endif case TK_IN: { testcase( pExpr->op==TK_IN ); if( ExprHasProperty(pExpr, EP_xIsSelect) ){ int nRef = pNC->nRef; - notValidCheckConstraint(pParse, pNC, "subqueries"); - notValidPartIdxWhere(pParse, pNC, "subqueries"); + notValid(pParse, pNC, "subqueries", NC_IsCheck|NC_PartIdx|NC_IdxExpr); sqlite3WalkSelect(pWalker, pExpr->x.pSelect); assert( pNC->nRef>=nRef ); if( nRef!=pNC->nRef ){ ExprSetProperty(pExpr, EP_VarSelect); } } break; } case TK_VARIABLE: { - notValidCheckConstraint(pParse, pNC, "parameters"); - notValidPartIdxWhere(pParse, pNC, "parameters"); + notValid(pParse, pNC, "parameters", NC_IsCheck|NC_PartIdx|NC_IdxExpr); break; } } return (pParse->nErr || pParse->db->mallocFailed) ? WRC_Abort : WRC_Continue; } @@ -1499,18 +1465,18 @@ ** Any errors cause an error message to be set in pParse. */ void sqlite3ResolveSelfReference( Parse *pParse, /* Parsing context */ Table *pTab, /* The table being referenced */ - int type, /* NC_IsCheck or NC_PartIdx */ + int type, /* NC_IsCheck or NC_PartIdx or NC_IdxExpr */ Expr *pExpr, /* Expression to resolve. May be NULL. */ ExprList *pList /* Expression list to resolve. May be NUL. */ ){ SrcList sSrc; /* Fake SrcList for pParse->pNewTable */ NameContext sNC; /* Name context for pParse->pNewTable */ - assert( type==NC_IsCheck || type==NC_PartIdx ); + assert( type==NC_IsCheck || type==NC_PartIdx || type==NC_IdxExpr ); memset(&sNC, 0, sizeof(sNC)); memset(&sSrc, 0, sizeof(sSrc)); sSrc.nSrc = 1; sSrc.a[0].zName = pTab->zName; sSrc.a[0].pTab = pTab; Index: src/shell.c ================================================================== --- src/shell.c +++ src/shell.c @@ -4250,12 +4250,12 @@ if( nSql ){ if( !_all_whitespace(zSql) ){ fprintf(stderr, "Error: incomplete SQL: %s\n", zSql); errCnt++; } - free(zSql); } + free(zSql); free(zLine); return errCnt>0; } /* Index: src/sqlite.h.in ================================================================== --- src/sqlite.h.in +++ src/sqlite.h.in @@ -122,11 +122,11 @@ ** These interfaces provide the same information as the [SQLITE_VERSION], ** [SQLITE_VERSION_NUMBER], and [SQLITE_SOURCE_ID] C preprocessor macros ** but are associated with the library instead of the header file. ^(Cautious ** programmers might include assert() statements in their application to ** verify that values returned by these interfaces match the macros in -** the header, and thus insure that the application is +** the header, and thus ensure that the application is ** compiled with matching library and header files. ** **
 ** assert( sqlite3_libversion_number()==SQLITE_VERSION_NUMBER );
 ** assert( strcmp(sqlite3_sourceid(),SQLITE_SOURCE_ID)==0 );
@@ -372,11 +372,11 @@
 ** is not changed.
 **
 ** Restrictions:
 **
 ** 
    -**
  • The application must insure that the 1st parameter to sqlite3_exec() +**
  • The application must ensure that the 1st parameter to sqlite3_exec() ** is a valid and open [database connection]. **
  • The application must not close the [database connection] specified by ** the 1st parameter to sqlite3_exec() while sqlite3_exec() is running. **
  • The application must not modify the SQL statement text passed into ** the 2nd parameter of sqlite3_exec() while sqlite3_exec() is running. @@ -1366,13 +1366,15 @@ ** changes to SQLite in order to tune SQLite to the specific needs of ** the application. The default configuration is recommended for most ** applications and so this routine is usually not necessary. It is ** provided to support rare applications with unusual needs. ** -** The sqlite3_config() interface is not threadsafe. The application -** must insure that no other SQLite interfaces are invoked by other -** threads while sqlite3_config() is running. Furthermore, sqlite3_config() +** The sqlite3_config() interface is not threadsafe. The application +** must ensure that no other SQLite interfaces are invoked by other +** threads while sqlite3_config() is running. +** +** The sqlite3_config() interface ** may only be invoked prior to library initialization using ** [sqlite3_initialize()] or after shutdown by [sqlite3_shutdown()]. ** ^If sqlite3_config() is called after [sqlite3_initialize()] and before ** [sqlite3_shutdown()] then it will return SQLITE_MISUSE. ** Note, however, that ^sqlite3_config() can be called as part of the @@ -4356,10 +4358,26 @@ const void *sqlite3_value_text16le(sqlite3_value*); const void *sqlite3_value_text16be(sqlite3_value*); int sqlite3_value_type(sqlite3_value*); int sqlite3_value_numeric_type(sqlite3_value*); +/* +** CAPI3REF: Obtaining SQL Values +** METHOD: sqlite3_value +** +** The sqlite3_value_subtype(V) function returns the subtype for +** an [application-defined SQL function] argument V. The subtype +** information can be used to pass a limited amount of context from +** one SQL function to another. Use the [sqlite3_result_subtype()] +** routine to set the subtype for the return value of an SQL function. +** +** SQLite makes no use of subtype itself. It merely passes the subtype +** from the result of one [application-defined SQL function] into the +** input of another. +*/ +unsigned int sqlite3_value_subtype(sqlite3_value*); + /* ** CAPI3REF: Copy And Free SQL Values ** METHOD: sqlite3_value ** ** ^The sqlite3_value_dup(V) interface makes a copy of the [sqlite3_value] @@ -4655,10 +4673,25 @@ void sqlite3_result_text16be(sqlite3_context*, const void*, int,void(*)(void*)); void sqlite3_result_value(sqlite3_context*, sqlite3_value*); void sqlite3_result_zeroblob(sqlite3_context*, int n); int sqlite3_result_zeroblob64(sqlite3_context*, sqlite3_uint64 n); + +/* +** CAPI3REF: Setting The Subtype Of An SQL Function +** METHOD: sqlite3_context +** +** The sqlite3_result_subtype(C,T) function causes the subtype of +** the result from the [application-defined SQL function] with +** [sqlite3_context] C to be the value T. Only the lower 8 bits +** of the subtype T are preserved in current versions of SQLite; +** higher order bits are discarded. +** The number of subtype bytes preserved by SQLite might increase +** in future releases of SQLite. +*/ +void sqlite3_result_subtype(sqlite3_context*,unsigned int); + /* ** CAPI3REF: Define New Collating Sequences ** METHOD: sqlite3 ** ** ^These functions add, remove, or modify a [collation] associated @@ -6093,10 +6126,13 @@ **
  • SQLITE_MUTEX_STATIC_LRU **
  • SQLITE_MUTEX_STATIC_PMEM **
  • SQLITE_MUTEX_STATIC_APP1 **
  • SQLITE_MUTEX_STATIC_APP2 **
  • SQLITE_MUTEX_STATIC_APP3 +**
  • SQLITE_MUTEX_STATIC_VFS1 +**
  • SQLITE_MUTEX_STATIC_VFS2 +**
  • SQLITE_MUTEX_STATIC_VFS3 **
** ** ^The first two constants (SQLITE_MUTEX_FAST and SQLITE_MUTEX_RECURSIVE) ** cause sqlite3_mutex_alloc() to create ** a new mutex. ^The new mutex is recursive when SQLITE_MUTEX_RECURSIVE Index: src/sqlite3ext.h ================================================================== --- src/sqlite3ext.h +++ src/sqlite3ext.h @@ -270,10 +270,13 @@ /* Version 3.8.11 and later */ sqlite3_value *(*value_dup)(const sqlite3_value*); void (*value_free)(sqlite3_value*); int (*result_zeroblob64)(sqlite3_context*,sqlite3_uint64); int (*bind_zeroblob64)(sqlite3_stmt*, int, sqlite3_uint64); + /* Version 3.8.12 and later */ + unsigned int (*value_subtype)(sqlite3_value*); + void (*result_subtype)(sqlite3_context*,unsigned int); }; /* ** The following macros redefine the API routines so that they are ** redirected through the global sqlite3_api structure. @@ -283,11 +286,11 @@ ** it can get access to the sqlite3_api_routines structure ** definition. But the main library does not want to redefine ** the API. So the redefinition macros are only valid if the ** SQLITE_CORE macros is undefined. */ -#ifndef SQLITE_CORE +#if !defined(SQLITE_CORE) && !defined(SQLITE_OMIT_LOAD_EXTENSION) #define sqlite3_aggregate_context sqlite3_api->aggregate_context #ifndef SQLITE_OMIT_DEPRECATED #define sqlite3_aggregate_count sqlite3_api->aggregate_count #endif #define sqlite3_bind_blob sqlite3_api->bind_blob @@ -506,13 +509,16 @@ /* Version 3.8.11 and later */ #define sqlite3_value_dup sqlite3_api->value_dup #define sqlite3_value_free sqlite3_api->value_free #define sqlite3_result_zeroblob64 sqlite3_api->result_zeroblob64 #define sqlite3_bind_zeroblob64 sqlite3_api->bind_zeroblob64 -#endif /* SQLITE_CORE */ +/* Version 3.8.12 and later */ +#define sqlite3_value_subtype sqlite3_api->value_subtype +#define sqlite3_result_subtype sqlite3_api->result_subtype +#endif /* !defined(SQLITE_CORE) && !defined(SQLITE_OMIT_LOAD_EXTENSION) */ -#ifndef SQLITE_CORE +#if !defined(SQLITE_CORE) && !defined(SQLITE_OMIT_LOAD_EXTENSION) /* This case when the file really is being compiled as a loadable ** extension */ # define SQLITE_EXTENSION_INIT1 const sqlite3_api_routines *sqlite3_api=0; # define SQLITE_EXTENSION_INIT2(v) sqlite3_api=v; # define SQLITE_EXTENSION_INIT3 \ Index: src/sqliteInt.h ================================================================== --- src/sqliteInt.h +++ src/sqliteInt.h @@ -1392,22 +1392,24 @@ /* ** Possible values for FuncDef.flags. Note that the _LENGTH and _TYPEOF ** values must correspond to OPFLAG_LENGTHARG and OPFLAG_TYPEOFARG. There ** are assert() statements in the code to verify this. */ -#define SQLITE_FUNC_ENCMASK 0x003 /* SQLITE_UTF8, SQLITE_UTF16BE or UTF16LE */ -#define SQLITE_FUNC_LIKE 0x004 /* Candidate for the LIKE optimization */ -#define SQLITE_FUNC_CASE 0x008 /* Case-sensitive LIKE-type function */ -#define SQLITE_FUNC_EPHEM 0x010 /* Ephemeral. Delete with VDBE */ -#define SQLITE_FUNC_NEEDCOLL 0x020 /* sqlite3GetFuncCollSeq() might be called */ -#define SQLITE_FUNC_LENGTH 0x040 /* Built-in length() function */ -#define SQLITE_FUNC_TYPEOF 0x080 /* Built-in typeof() function */ -#define SQLITE_FUNC_COUNT 0x100 /* Built-in count(*) aggregate */ -#define SQLITE_FUNC_COALESCE 0x200 /* Built-in coalesce() or ifnull() */ -#define SQLITE_FUNC_UNLIKELY 0x400 /* Built-in unlikely() function */ -#define SQLITE_FUNC_CONSTANT 0x800 /* Constant inputs give a constant output */ -#define SQLITE_FUNC_MINMAX 0x1000 /* True for min() and max() aggregates */ +#define SQLITE_FUNC_ENCMASK 0x0003 /* SQLITE_UTF8, SQLITE_UTF16BE or UTF16LE */ +#define SQLITE_FUNC_LIKE 0x0004 /* Candidate for the LIKE optimization */ +#define SQLITE_FUNC_CASE 0x0008 /* Case-sensitive LIKE-type function */ +#define SQLITE_FUNC_EPHEM 0x0010 /* Ephemeral. Delete with VDBE */ +#define SQLITE_FUNC_NEEDCOLL 0x0020 /* sqlite3GetFuncCollSeq() might be called*/ +#define SQLITE_FUNC_LENGTH 0x0040 /* Built-in length() function */ +#define SQLITE_FUNC_TYPEOF 0x0080 /* Built-in typeof() function */ +#define SQLITE_FUNC_COUNT 0x0100 /* Built-in count(*) aggregate */ +#define SQLITE_FUNC_COALESCE 0x0200 /* Built-in coalesce() or ifnull() */ +#define SQLITE_FUNC_UNLIKELY 0x0400 /* Built-in unlikely() function */ +#define SQLITE_FUNC_CONSTANT 0x0800 /* Constant inputs give a constant output */ +#define SQLITE_FUNC_MINMAX 0x1000 /* True for min() and max() aggregates */ +#define SQLITE_FUNC_SLOCHNG 0x2000 /* "Slow Change". Value constant during a + ** single query - might change over time */ /* ** The following three macros, FUNCTION(), LIKEFUNC() and AGGREGATE() are ** used to create the initializers for the FuncDef structures. ** @@ -1418,10 +1420,16 @@ ** as the user-data (sqlite3_user_data()) for the function. If ** argument bNC is true, then the SQLITE_FUNC_NEEDCOLL flag is set. ** ** VFUNCTION(zName, nArg, iArg, bNC, xFunc) ** Like FUNCTION except it omits the SQLITE_FUNC_CONSTANT flag. +** +** DFUNCTION(zName, nArg, iArg, bNC, xFunc) +** Like FUNCTION except it omits the SQLITE_FUNC_CONSTANT flag and +** adds the SQLITE_FUNC_SLOCHNG flag. Used for date & time functions +** and functions like sqlite_version() that can change, but not during +** a single query. ** ** AGGREGATE(zName, nArg, iArg, bNC, xStep, xFinal) ** Used to create an aggregate function definition implemented by ** the C functions xStep and xFinal. The first four parameters ** are interpreted in the same way as the first 4 parameters to @@ -1439,15 +1447,18 @@ {nArg, SQLITE_FUNC_CONSTANT|SQLITE_UTF8|(bNC*SQLITE_FUNC_NEEDCOLL), \ SQLITE_INT_TO_PTR(iArg), 0, xFunc, 0, 0, #zName, 0, 0} #define VFUNCTION(zName, nArg, iArg, bNC, xFunc) \ {nArg, SQLITE_UTF8|(bNC*SQLITE_FUNC_NEEDCOLL), \ SQLITE_INT_TO_PTR(iArg), 0, xFunc, 0, 0, #zName, 0, 0} +#define DFUNCTION(zName, nArg, iArg, bNC, xFunc) \ + {nArg, SQLITE_FUNC_SLOCHNG|SQLITE_UTF8|(bNC*SQLITE_FUNC_NEEDCOLL), \ + SQLITE_INT_TO_PTR(iArg), 0, xFunc, 0, 0, #zName, 0, 0} #define FUNCTION2(zName, nArg, iArg, bNC, xFunc, extraFlags) \ {nArg,SQLITE_FUNC_CONSTANT|SQLITE_UTF8|(bNC*SQLITE_FUNC_NEEDCOLL)|extraFlags,\ SQLITE_INT_TO_PTR(iArg), 0, xFunc, 0, 0, #zName, 0, 0} #define STR_FUNCTION(zName, nArg, pArg, bNC, xFunc) \ - {nArg, SQLITE_FUNC_CONSTANT|SQLITE_UTF8|(bNC*SQLITE_FUNC_NEEDCOLL), \ + {nArg, SQLITE_FUNC_SLOCHNG|SQLITE_UTF8|(bNC*SQLITE_FUNC_NEEDCOLL), \ pArg, 0, xFunc, 0, 0, #zName, 0, 0} #define LIKEFUNC(zName, nArg, arg, flags) \ {nArg, SQLITE_FUNC_CONSTANT|SQLITE_UTF8|flags, \ (void *)arg, 0, likeFunc, 0, 0, #zName, 0, 0} #define AGGREGATE(zName, nArg, arg, nc, xStep, xFinal) \ @@ -1879,10 +1890,11 @@ Index *pNext; /* The next index associated with the same table */ Schema *pSchema; /* Schema containing this index */ u8 *aSortOrder; /* for each column: True==DESC, False==ASC */ char **azColl; /* Array of collation sequence names for index */ Expr *pPartIdxWhere; /* WHERE clause for partial indices */ + ExprList *aColExpr; /* Column expressions */ int tnum; /* DB Page containing root of this index */ LogEst szIdxRow; /* Estimated average row size in bytes */ u16 nKeyCol; /* Number of columns forming the key */ u16 nColumn; /* Number of columns stored in the index */ u8 onError; /* OE_Abort, OE_Ignore, OE_Replace, or OE_None */ @@ -2128,13 +2140,14 @@ #define EP_TokenOnly 0x004000 /* Expr struct EXPR_TOKENONLYSIZE bytes only */ #define EP_Static 0x008000 /* Held in memory not obtained from malloc() */ #define EP_MemToken 0x010000 /* Need to sqlite3DbFree() Expr.zToken */ #define EP_NoReduce 0x020000 /* Cannot EXPRDUP_REDUCE this Expr */ #define EP_Unlikely 0x040000 /* unlikely() or likelihood() function */ -#define EP_ConstFunc 0x080000 /* Node is a SQLITE_FUNC_CONSTANT function */ +#define EP_ConstFunc 0x080000 /* A SQLITE_FUNC_CONSTANT or _SLOCHNG function */ #define EP_CanBeNull 0x100000 /* Can be null despite NOT NULL constraint */ #define EP_Subquery 0x200000 /* Tree contains a TK_SELECT operator */ +#define EP_Alias 0x400000 /* Is an alias for a result set column */ /* ** Combinations of two or more EP_* flags */ #define EP_Propagate (EP_Collate|EP_Subquery) /* Propagate these bits up tree */ @@ -2347,10 +2360,11 @@ #define WHERE_GROUPBY 0x0100 /* pOrderBy is really a GROUP BY */ #define WHERE_DISTINCTBY 0x0200 /* pOrderby is really a DISTINCT clause */ #define WHERE_WANT_DISTINCT 0x0400 /* All output needs to be distinct */ #define WHERE_SORTBYGROUP 0x0800 /* Support sqlite3WhereIsSorted() */ #define WHERE_REOPEN_IDX 0x1000 /* Try to use OP_ReopenIdx */ +#define WHERE_ONEPASS_MULTIROW 0x2000 /* ONEPASS is ok with multiple rows */ /* Allowed return values from sqlite3WhereIsDistinct() */ #define WHERE_DISTINCT_NOOP 0 /* DISTINCT keyword not used */ #define WHERE_DISTINCT_UNIQUE 1 /* No duplicates */ @@ -2399,10 +2413,11 @@ #define NC_AllowAgg 0x0001 /* Aggregate functions are allowed here */ #define NC_HasAgg 0x0002 /* One or more aggregate functions seen */ #define NC_IsCheck 0x0004 /* True if resolving names in a CHECK constraint */ #define NC_InAggFunc 0x0008 /* True if analyzing arguments to an agg func */ #define NC_PartIdx 0x0010 /* True if resolving a partial index WHERE */ +#define NC_IdxExpr 0x0020 /* True if resolving columns of CREATE INDEX */ #define NC_MinMaxAgg 0x1000 /* min/max aggregates seen. See note above */ /* ** An instance of the following structure contains all information ** needed to generate code for a single SELECT statement. @@ -2668,11 +2683,11 @@ int nSet; /* Number of sets used so far */ int nOnce; /* Number of OP_Once instructions so far */ int nOpAlloc; /* Number of slots allocated for Vdbe.aOp[] */ int iFixedOp; /* Never back out opcodes iFixedOp-1 or earlier */ int ckBase; /* Base register of data during check constraints */ - int iPartIdxTab; /* Table corresponding to a partial index */ + int iSelfTab; /* Table of an index whose exprs are being coded */ int iCacheLevel; /* ColCache valid when aColCache[].iLevel<=iCacheLevel */ int iCacheCnt; /* Counter used to generate aColCache[].lru values */ int nLabel; /* Number of labels used */ int *aLabel; /* Space to hold the labels */ struct yColCache { @@ -3186,10 +3201,15 @@ sqlite3_mutex_methods const *sqlite3NoopMutex(void); sqlite3_mutex *sqlite3MutexAlloc(int); int sqlite3MutexInit(void); int sqlite3MutexEnd(void); #endif +#if !defined(SQLITE_MUTEX_OMIT) && !defined(SQLITE_MUTEX_NOOP) + void sqlite3MemoryBarrier(void); +#else +# define sqlite3MemoryBarrier() +#endif sqlite3_int64 sqlite3StatusValue(int); void sqlite3StatusUp(int, int); void sqlite3StatusDown(int, int); void sqlite3StatusSet(int, int); @@ -3369,10 +3389,14 @@ int sqlite3WhereIsOrdered(WhereInfo*); int sqlite3WhereIsSorted(WhereInfo*); int sqlite3WhereContinueLabel(WhereInfo*); int sqlite3WhereBreakLabel(WhereInfo*); int sqlite3WhereOkOnePass(WhereInfo*, int*); +#define ONEPASS_OFF 0 /* Use of ONEPASS not allowed */ +#define ONEPASS_SINGLE 1 /* ONEPASS valid for a single row update */ +#define ONEPASS_MULTI 2 /* ONEPASS is valid for multiple rows */ +void sqlite3ExprCodeLoadIndexColumn(Parse*, Index*, int, int, int); int sqlite3ExprCodeGetColumn(Parse*, Table*, int, int, int, u8); void sqlite3ExprCodeGetColumnOfTable(Vdbe*, Table*, int, int, int); void sqlite3ExprCodeMove(Parse*, int, int, int); void sqlite3ExprCacheStore(Parse*, int, int, int); void sqlite3ExprCachePush(Parse*); @@ -3428,12 +3452,13 @@ int sqlite3ExprIsTableConstant(Expr*,int); int sqlite3ExprIsInteger(Expr*, int*); int sqlite3ExprCanBeNull(const Expr*); int sqlite3ExprNeedsNoAffinityChange(const Expr*, char); int sqlite3IsRowid(const char*); -void sqlite3GenerateRowDelete(Parse*,Table*,Trigger*,int,int,int,i16,u8,u8,u8); -void sqlite3GenerateRowIndexDelete(Parse*, Table*, int, int, int*); +void sqlite3GenerateRowDelete( + Parse*,Table*,Trigger*,int,int,int,i16,u8,u8,u8,int); +void sqlite3GenerateRowIndexDelete(Parse*, Table*, int, int, int*, int); int sqlite3GenerateIndexKey(Parse*, Index*, int, int, int, int*,Index*,int); void sqlite3ResolvePartIdxLabel(Parse*,int); void sqlite3GenerateConstraintChecks(Parse*,Table*,int*,int,int,int,int, u8,u8,int,int*); void sqlite3CompleteInsertion(Parse*,Table*,int,int,int,int*,int,int,int); Index: src/test_func.c ================================================================== --- src/test_func.c +++ src/test_func.c @@ -460,11 +460,11 @@ zOut[16] = 0; sqlite3_result_text(context, zOut, -1, SQLITE_TRANSIENT); } /* -** tclcmd: test_extract(record, field) +** test_extract(record, field) ** ** This function implements an SQL user-function that accepts a blob ** containing a formatted database record as the first argument. The ** second argument is the index of the field within that record to ** extract and return. @@ -507,11 +507,11 @@ if( mem.szMalloc ) sqlite3DbFree(db, mem.zMalloc); } } /* -** tclcmd: test_decode(record) +** test_decode(record) ** ** This function implements an SQL user-function that accepts a blob ** containing a formatted database record as its only argument. It returns ** a tcl list (type SQLITE_TEXT) containing each of the values stored ** in the record. @@ -599,10 +599,12 @@ sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT); Tcl_DecrRefCount(pRet); } /* +** test_zeroblob(N) +** ** The implementation of scalar SQL function "test_zeroblob()". This is ** similar to the built-in zeroblob() function, except that it does not ** check that the integer parameter is within range before passing it ** to sqlite3_result_zeroblob(). */ @@ -612,10 +614,35 @@ sqlite3_value **argv ){ int nZero = sqlite3_value_int(argv[0]); sqlite3_result_zeroblob(context, nZero); } + +/* test_getsubtype(V) +** +** Return the subtype for value V. +*/ +static void test_getsubtype( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + sqlite3_result_int(context, (int)sqlite3_value_subtype(argv[0])); +} + +/* test_setsubtype(V, T) +** +** Return the value V with its subtype changed to T +*/ +static void test_setsubtype( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + sqlite3_result_value(context, argv[0]); + sqlite3_result_subtype(context, (unsigned int)sqlite3_value_int(argv[1])); +} static int registerTestFunctions(sqlite3 *db){ static const struct { char *zName; signed char nArg; @@ -639,10 +666,12 @@ { "test_counter", 1, SQLITE_UTF8, counterFunc}, { "real2hex", 1, SQLITE_UTF8, real2hex}, { "test_decode", 1, SQLITE_UTF8, test_decode}, { "test_extract", 2, SQLITE_UTF8, test_extract}, { "test_zeroblob", 1, SQLITE_UTF8|SQLITE_DETERMINISTIC, test_zeroblob}, + { "test_getsubtype", 1, SQLITE_UTF8, test_getsubtype}, + { "test_setsubtype", 2, SQLITE_UTF8, test_setsubtype}, }; int i; for(i=0; irc = SQLITE_OK; pParse->zTail = zSql; i = 0; assert( pzErrMsg!=0 ); + /* sqlite3ParserTrace(stdout, "parser: "); */ pEngine = sqlite3ParserAlloc(sqlite3Malloc); if( pEngine==0 ){ db->mallocFailed = 1; return SQLITE_NOMEM; } Index: src/treeview.c ================================================================== --- src/treeview.c +++ src/treeview.c @@ -251,15 +251,10 @@ } case TK_REGISTER: { sqlite3TreeViewLine(pView,"REGISTER(%d)", pExpr->iTable); break; } - case TK_AS: { - sqlite3TreeViewLine(pView,"AS %Q", pExpr->u.zToken); - sqlite3TreeViewExpr(pView, pExpr->pLeft, 0); - break; - } case TK_ID: { sqlite3TreeViewLine(pView,"ID \"%w\"", pExpr->u.zToken); break; } #ifndef SQLITE_OMIT_CAST Index: src/update.c ================================================================== --- src/update.c +++ src/update.c @@ -270,20 +270,23 @@ hasFK = sqlite3FkRequired(pParse, pTab, aXRef, chngKey); /* There is one entry in the aRegIdx[] array for each index on the table ** being updated. Fill in aRegIdx[] with a register number that will hold - ** the key for accessing each index. + ** the key for accessing each index. + ** + ** FIXME: Be smarter about omitting indexes that use expressions. */ for(j=0, pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext, j++){ int reg; if( chngKey || hasFK || pIdx->pPartIdxWhere || pIdx==pPk ){ reg = ++pParse->nMem; }else{ reg = 0; for(i=0; inKeyCol; i++){ - if( aXRef[pIdx->aiColumn[i]]>=0 ){ + i16 iIdxCol = pIdx->aiColumn[i]; + if( iIdxCol<0 || aXRef[iIdxCol]>=0 ){ reg = ++pParse->nMem; break; } } } @@ -379,10 +382,11 @@ pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, 0, 0, WHERE_ONEPASS_DESIRED, iIdxCur); if( pWInfo==0 ) goto update_cleanup; okOnePass = sqlite3WhereOkOnePass(pWInfo, aiCurOnePass); for(i=0; iaiColumn[i]>=(-1) ); sqlite3ExprCodeGetColumnOfTable(v, pTab, iDataCur, pPk->aiColumn[i], iPk+i); } if( okOnePass ){ sqlite3VdbeChangeToNoop(v, addrOpen); @@ -581,11 +585,11 @@ }else{ j1 = sqlite3VdbeAddOp3(v, OP_NotExists, iDataCur, 0, regOldRowid); } VdbeCoverageNeverTaken(v); } - sqlite3GenerateRowIndexDelete(pParse, pTab, iDataCur, iIdxCur, aRegIdx); + sqlite3GenerateRowIndexDelete(pParse, pTab, iDataCur, iIdxCur, aRegIdx, -1); /* If changing the record number, delete the old record. */ if( hasFK || chngKey || pPk!=0 ){ sqlite3VdbeAddOp2(v, OP_Delete, iDataCur, 0); } Index: src/vdbe.c ================================================================== --- src/vdbe.c +++ src/vdbe.c @@ -3967,13 +3967,14 @@ /* Opcode: NotExists P1 P2 P3 * * ** Synopsis: intkey=r[P3] ** ** P1 is the index of a cursor open on an SQL table btree (with integer ** keys). P3 is an integer rowid. If P1 does not contain a record with -** rowid P3 then jump immediately to P2. If P1 does contain a record -** with rowid P3 then leave the cursor pointing at that record and fall -** through to the next instruction. +** rowid P3 then jump immediately to P2. Or, if P2 is 0, raise an +** SQLITE_CORRUPT error. If P1 does contain a record with rowid P3 then +** leave the cursor pointing at that record and fall through to the next +** instruction. ** ** The OP_NotFound opcode performs the same operation on index btrees ** (with arbitrary multi-value keys). ** ** This opcode leaves the cursor in a state where it cannot be advanced @@ -4001,17 +4002,25 @@ pCrsr = pC->pCursor; assert( pCrsr!=0 ); res = 0; iKey = pIn3->u.i; rc = sqlite3BtreeMovetoUnpacked(pCrsr, 0, iKey, 0, &res); + assert( rc==SQLITE_OK || res==0 ); pC->movetoTarget = iKey; /* Used by OP_Delete */ pC->nullRow = 0; pC->cacheStatus = CACHE_STALE; pC->deferredMoveto = 0; VdbeBranchTaken(res!=0,2); pC->seekResult = res; - if( res!=0 ) goto jump_to_p2; + if( res!=0 ){ + assert( rc==SQLITE_OK ); + if( pOp->p2==0 ){ + rc = SQLITE_CORRUPT_BKPT; + }else{ + goto jump_to_p2; + } + } break; } /* Opcode: Sequence P1 P2 * * * ** Synopsis: r[P2]=cursor[P1].ctr++ @@ -4273,18 +4282,19 @@ assert( pC->iDb>=0 ); } break; } -/* Opcode: Delete P1 P2 * P4 * +/* Opcode: Delete P1 P2 * P4 P5 ** ** Delete the record at which the P1 cursor is currently pointing. ** -** The cursor will be left pointing at either the next or the previous -** record in the table. If it is left pointing at the next record, then -** the next Next instruction will be a no-op. Hence it is OK to delete -** a record from within a Next loop. +** If the P5 parameter is non-zero, the cursor will be left pointing at +** either the next or the previous record in the table. If it is left +** pointing at the next record, then the next Next instruction will be a +** no-op. As a result, in this case it is OK to delete a record from within a +** Next loop. If P5 is zero, then the cursor is left in an undefined state. ** ** If the OPFLAG_NCHANGE flag of P2 is set, then the row change count is ** incremented (otherwise not). ** ** P1 must not be pseudo-table. It has to be a real table with @@ -4295,33 +4305,39 @@ ** If P4 is not NULL then the P1 cursor must have been positioned ** using OP_NotFound prior to invoking this opcode. */ case OP_Delete: { VdbeCursor *pC; + u8 hasUpdateCallback; assert( pOp->p1>=0 && pOp->p1nCursor ); pC = p->apCsr[pOp->p1]; assert( pC!=0 ); assert( pC->pCursor!=0 ); /* Only valid for real tables, no pseudotables */ assert( pC->deferredMoveto==0 ); + + hasUpdateCallback = db->xUpdateCallback && pOp->p4.z && pC->isTable; + if( pOp->p5 && hasUpdateCallback ){ + sqlite3BtreeKeySize(pC->pCursor, &pC->movetoTarget); + } #ifdef SQLITE_DEBUG /* The seek operation that positioned the cursor prior to OP_Delete will ** have also set the pC->movetoTarget field to the rowid of the row that ** is being deleted */ - if( pOp->p4.z && pC->isTable ){ + if( pOp->p4.z && pC->isTable && pOp->p5==0 ){ i64 iKey = 0; sqlite3BtreeKeySize(pC->pCursor, &iKey); assert( pC->movetoTarget==iKey ); } #endif - rc = sqlite3BtreeDelete(pC->pCursor); + rc = sqlite3BtreeDelete(pC->pCursor, pOp->p5); pC->cacheStatus = CACHE_STALE; /* Invoke the update-hook if required. */ - if( rc==SQLITE_OK && db->xUpdateCallback && pOp->p4.z && pC->isTable ){ + if( rc==SQLITE_OK && hasUpdateCallback ){ db->xUpdateCallback(db->pUpdateArg, SQLITE_DELETE, db->aDb[pC->iDb].zName, pOp->p4.z, pC->movetoTarget); assert( pC->iDb>=0 ); } if( pOp->p2 & OPFLAG_NCHANGE ) p->nChange++; @@ -4856,11 +4872,11 @@ #ifdef SQLITE_DEBUG { int i; for(i=0; ideferredMoveto==0 ); pC->cacheStatus = CACHE_STALE; break; } Index: src/vdbeInt.h ================================================================== --- src/vdbeInt.h +++ src/vdbeInt.h @@ -173,10 +173,11 @@ RowSet *pRowSet; /* Used only when flags==MEM_RowSet */ VdbeFrame *pFrame; /* Used when flags==MEM_Frame */ } u; u16 flags; /* Some combination of MEM_Null, MEM_Str, MEM_Dyn, etc. */ u8 enc; /* SQLITE_UTF8, SQLITE_UTF16BE, SQLITE_UTF16LE */ + u8 eSubtype; /* Subtype for this value */ int n; /* Number of characters in string value, excluding '\0' */ char *z; /* String or BLOB value */ /* ShallowCopy only needs to copy the information above */ char *zMalloc; /* Space to hold MEM_Str or MEM_Blob if szMalloc>0 */ int szMalloc; /* Size of the zMalloc allocation */ Index: src/vdbeapi.c ================================================================== --- src/vdbeapi.c +++ src/vdbeapi.c @@ -200,10 +200,13 @@ int sqlite3_value_int(sqlite3_value *pVal){ return (int)sqlite3VdbeIntValue((Mem*)pVal); } sqlite_int64 sqlite3_value_int64(sqlite3_value *pVal){ return sqlite3VdbeIntValue((Mem*)pVal); +} +unsigned int sqlite3_value_subtype(sqlite3_value *pVal){ + return ((Mem*)pVal)->eSubtype; } const unsigned char *sqlite3_value_text(sqlite3_value *pVal){ return (const unsigned char *)sqlite3ValueText(pVal, SQLITE_UTF8); } #ifndef SQLITE_OMIT_UTF16 @@ -378,10 +381,14 @@ sqlite3VdbeMemSetInt64(pCtx->pOut, iVal); } void sqlite3_result_null(sqlite3_context *pCtx){ assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) ); sqlite3VdbeMemSetNull(pCtx->pOut); +} +void sqlite3_result_subtype(sqlite3_context *pCtx, unsigned int eSubtype){ + assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) ); + pCtx->pOut->eSubtype = eSubtype & 0xff; } void sqlite3_result_text( sqlite3_context *pCtx, const char *z, int n, @@ -710,11 +717,11 @@ ** parameter) of the sqlite3_create_function() and ** sqlite3_create_function16() routines that originally registered the ** application defined function. */ sqlite3 *sqlite3_context_db_handle(sqlite3_context *p){ - assert( p && p->pFunc ); + assert( p && p->pOut ); return p->pOut->db; } /* ** Return the current time for a statement. If the current time Index: src/vdbeblob.c ================================================================== --- src/vdbeblob.c +++ src/vdbeblob.c @@ -245,11 +245,12 @@ } #endif for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ int j; for(j=0; jnKeyCol; j++){ - if( pIdx->aiColumn[j]==iCol ){ + /* FIXME: Be smarter about indexes that use expressions */ + if( pIdx->aiColumn[j]==iCol || pIdx->aiColumn[j]==(-2) ){ zFault = "indexed"; } } } if( zFault ){ Index: src/vdbemem.c ================================================================== --- src/vdbemem.c +++ src/vdbemem.c @@ -1153,11 +1153,11 @@ /* ** The expression object indicated by the second argument is guaranteed ** to be a scalar SQL function. If ** ** * all function arguments are SQL literals, -** * the SQLITE_FUNC_CONSTANT function flag is set, and +** * one of the SQLITE_FUNC_CONSTANT or _SLOCHNG function flags is set, and ** * the SQLITE_FUNC_NEEDCOLL function flag is not set, ** ** then this routine attempts to invoke the SQL function. Assuming no ** error occurs, output parameter (*ppVal) is set to point to a value ** object containing the result before returning SQLITE_OK. @@ -1194,11 +1194,11 @@ pList = p->x.pList; if( pList ) nVal = pList->nExpr; nName = sqlite3Strlen30(p->u.zToken); pFunc = sqlite3FindFunction(db, p->u.zToken, nName, nVal, enc, 0); assert( pFunc ); - if( (pFunc->funcFlags & SQLITE_FUNC_CONSTANT)==0 + if( (pFunc->funcFlags & (SQLITE_FUNC_CONSTANT|SQLITE_FUNC_SLOCHNG))==0 || (pFunc->funcFlags & SQLITE_FUNC_NEEDCOLL) ){ return SQLITE_OK; } Index: src/wal.c ================================================================== --- src/wal.c +++ src/wal.c @@ -2468,11 +2468,12 @@ ** of the wal-index file content. Make sure the results agree with the ** result obtained using the hash indexes above. */ { u32 iRead2 = 0; u32 iTest; - for(iTest=iLast; iTest>0; iTest--){ + assert( pWal->minFrame>0 ); + for(iTest=iLast; iTest>=pWal->minFrame; iTest--){ if( walFramePgno(pWal, iTest)==pgno ){ iRead2 = iTest; break; } } Index: src/where.c ================================================================== --- src/where.c +++ src/where.c @@ -67,13 +67,15 @@ int sqlite3WhereBreakLabel(WhereInfo *pWInfo){ return pWInfo->iBreak; } /* -** Return TRUE if an UPDATE or DELETE statement can operate directly on -** the rowids returned by a WHERE clause. Return FALSE if doing an -** UPDATE or DELETE might change subsequent WHERE clause results. +** Return ONEPASS_OFF (0) if an UPDATE or DELETE statement is unable to +** operate directly on the rowis returned by a WHERE clause. Return +** ONEPASS_SINGLE (1) if the statement can operation directly because only +** a single row is to be changed. Return ONEPASS_MULTI (2) if the one-pass +** optimization can be used on multiple ** ** If the ONEPASS optimization is used (if this routine returns true) ** then also write the indices of open cursors used by ONEPASS ** into aiCur[0] and aiCur[1]. iaCur[0] gets the cursor of the data ** table and iaCur[1] gets the cursor used by an auxiliary index. @@ -83,11 +85,11 @@ ** aiCur[0] and aiCur[1] both get -1 if the where-clause logic is ** unable to use the ONEPASS optimization. */ int sqlite3WhereOkOnePass(WhereInfo *pWInfo, int *aiCur){ memcpy(aiCur, pWInfo->aiCurOnePass, sizeof(int)*2); - return pWInfo->okOnePass; + return pWInfo->eOnePass; } /* ** Move the content of pSrc into pDest */ @@ -178,14 +180,17 @@ int k = pScan->k; /* Where to start scanning */ while( pScan->iEquiv<=pScan->nEquiv ){ iCur = pScan->aiCur[pScan->iEquiv-1]; iColumn = pScan->aiColumn[pScan->iEquiv-1]; + assert( iColumn!=(-2) || pScan->pIdxExpr!=0 ); while( (pWC = pScan->pWC)!=0 ){ for(pTerm=pWC->a+k; knTerm; k++, pTerm++){ if( pTerm->leftCursor==iCur && pTerm->u.leftColumn==iColumn + && (iColumn!=(-2) + || sqlite3ExprCompare(pTerm->pExpr->pLeft,pScan->pIdxExpr,iCur)==0) && (pScan->iEquiv<=1 || !ExprHasProperty(pTerm->pExpr, EP_FromJoin)) ){ if( (pTerm->eOperator & WO_EQUIV)!=0 && pScan->nEquivaiCur) ){ @@ -266,20 +271,23 @@ int iCur, /* Cursor to scan for */ int iColumn, /* Column to scan for */ u32 opMask, /* Operator(s) to scan for */ Index *pIdx /* Must be compatible with this index */ ){ - int j; + int j = 0; /* memset(pScan, 0, sizeof(*pScan)); */ pScan->pOrigWC = pWC; pScan->pWC = pWC; + pScan->pIdxExpr = 0; + if( pIdx ){ + j = iColumn; + iColumn = pIdx->aiColumn[j]; + if( iColumn==(-2) ) pScan->pIdxExpr = pIdx->aColExpr->a[j].pExpr; + } if( pIdx && iColumn>=0 ){ pScan->idxaff = pIdx->pTable->aCol[iColumn].affinity; - for(j=0; pIdx->aiColumn[j]!=iColumn; j++){ - if( NEVER(j>pIdx->nColumn) ) return 0; - } pScan->zCollName = pIdx->azColl[j]; }else{ pScan->idxaff = 0; pScan->zCollName = 0; } @@ -295,10 +303,13 @@ /* ** Search for a term in the WHERE clause that is of the form "X " ** where X is a reference to the iColumn of table iCur and is one of ** the WO_xx operator codes specified by the op parameter. ** Return a pointer to the term. Return 0 if not found. +** +** If pIdx!=0 then search for terms matching the iColumn-th column of pIdx +** rather than the iColumn-th column of table iCur. ** ** The term returned might by Y= if there is another constraint in ** the WHERE clause that specifies that X=Y. Any such constraints will be ** identified by the WO_EQUIV bit in the pTerm->eOperator field. The ** aiCur[]/iaColumn[] arrays hold X and all its equivalents. There are 11 @@ -370,10 +381,29 @@ } } return -1; } + +/* +** Return TRUE if the iCol-th column of index pIdx is NOT NULL +*/ +static int indexColumnNotNull(Index *pIdx, int iCol){ + int j; + assert( pIdx!=0 ); + assert( iCol>=0 && iColnColumn ); + j = pIdx->aiColumn[iCol]; + if( j>=0 ){ + return pIdx->pTable->aCol[j].notNull; + }else if( j==(-1) ){ + return 1; + }else{ + assert( j==(-2) ); + return 0; /* Assume an indexed expression can always yield a NULL */ + + } +} /* ** Return true if the DISTINCT expression-list passed as the third argument ** is redundant. ** @@ -421,16 +451,13 @@ ** contain a "col=X" term are subject to a NOT NULL constraint. */ for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ if( !IsUniqueIndex(pIdx) ) continue; for(i=0; inKeyCol; i++){ - i16 iCol = pIdx->aiColumn[i]; - if( 0==sqlite3WhereFindTerm(pWC, iBase, iCol, ~(Bitmask)0, WO_EQ, pIdx) ){ - int iIdxCol = findIndexCol(pParse, pDistinct, iBase, pIdx, i); - if( iIdxCol<0 || pTab->aCol[iCol].notNull==0 ){ - break; - } + if( 0==sqlite3WhereFindTerm(pWC, iBase, i, ~(Bitmask)0, WO_EQ, pIdx) ){ + if( findIndexCol(pParse, pDistinct, iBase, pIdx, i)<0 ) break; + if( indexColumnNotNull(pIdx, i)==0 ) break; } } if( i==pIdx->nKeyCol ){ /* This index implies that the DISTINCT qualifier is redundant. */ return 1; @@ -778,10 +805,11 @@ testcase( pTerm->eOperator & WO_ISNULL ); testcase( pTerm->eOperator & WO_IS ); testcase( pTerm->eOperator & WO_ALL ); if( (pTerm->eOperator & ~(WO_ISNULL|WO_EQUIV|WO_IS))==0 ) continue; if( pTerm->wtFlags & TERM_VNULL ) continue; + assert( pTerm->u.leftColumn>=(-1) ); nTerm++; } /* If the ORDER BY clause contains only columns in the current ** virtual table then allocate space for the aOrderBy part of @@ -833,10 +861,11 @@ testcase( pTerm->eOperator & WO_IS ); testcase( pTerm->eOperator & WO_ISNULL ); testcase( pTerm->eOperator & WO_ALL ); if( (pTerm->eOperator & ~(WO_ISNULL|WO_EQUIV|WO_IS))==0 ) continue; if( pTerm->wtFlags & TERM_VNULL ) continue; + assert( pTerm->u.leftColumn>=(-1) ); pIdxCons[j].iColumn = pTerm->u.leftColumn; pIdxCons[j].iTermOffset = i; op = (u8)pTerm->eOperator & WO_ALL; if( op==WO_IN ) op = WO_EQ; pIdxCons[j].op = op; @@ -2124,11 +2153,10 @@ u16 saved_nLTerm; /* Original value of pNew->nLTerm */ u16 saved_nEq; /* Original value of pNew->u.btree.nEq */ u16 saved_nSkip; /* Original value of pNew->nSkip */ u32 saved_wsFlags; /* Original value of pNew->wsFlags */ LogEst saved_nOut; /* Original value of pNew->nOut */ - int iCol; /* Index of the column in the table */ int rc = SQLITE_OK; /* Return code */ LogEst rSize; /* Number of rows in the table */ LogEst rLogSize; /* Logarithm of table size */ WhereTerm *pTop = 0, *pBtm = 0; /* Top and bottom range constraints */ @@ -2145,20 +2173,19 @@ opMask = WO_EQ|WO_IN|WO_GT|WO_GE|WO_LT|WO_LE|WO_ISNULL|WO_IS; } if( pProbe->bUnordered ) opMask &= ~(WO_GT|WO_GE|WO_LT|WO_LE); assert( pNew->u.btree.nEqnColumn ); - iCol = pProbe->aiColumn[pNew->u.btree.nEq]; - pTerm = whereScanInit(&scan, pBuilder->pWC, pSrc->iCursor, iCol, - opMask, pProbe); saved_nEq = pNew->u.btree.nEq; saved_nSkip = pNew->nSkip; saved_nLTerm = pNew->nLTerm; saved_wsFlags = pNew->wsFlags; saved_prereq = pNew->prereq; saved_nOut = pNew->nOut; + pTerm = whereScanInit(&scan, pBuilder->pWC, pSrc->iCursor, saved_nEq, + opMask, pProbe); pNew->rSetup = 0; rSize = pProbe->aiRowLogEst[0]; rLogSize = estLog(rSize); for(; rc==SQLITE_OK && pTerm!=0; pTerm = whereScanNext(&scan)){ u16 eOp = pTerm->eOperator; /* Shorthand for pTerm->eOperator */ @@ -2167,11 +2194,11 @@ int nIn = 0; #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 int nRecValid = pBuilder->nRecValid; #endif if( (eOp==WO_ISNULL || (pTerm->wtFlags&TERM_VNULL)!=0) - && (iCol<0 || pSrc->pTab->aCol[iCol].notNull) + && indexColumnNotNull(pProbe, saved_nEq) ){ continue; /* ignore IS [NOT] NULL constraints on NOT NULL columns */ } if( pTerm->prereqRight & pNew->maskSelf ) continue; @@ -2204,12 +2231,14 @@ } assert( nIn>0 ); /* RHS always has 2 or more terms... The parser ** changes "x IN (?)" into "x=?". */ }else if( eOp & (WO_EQ|WO_IS) ){ + int iCol = pProbe->aiColumn[saved_nEq]; pNew->wsFlags |= WHERE_COLUMN_EQ; - if( iCol<0 || (nInMul==0 && pNew->u.btree.nEq==pProbe->nKeyCol-1) ){ + assert( saved_nEq==pNew->u.btree.nEq ); + if( iCol==(-1) || (iCol>0 && nInMul==0 && saved_nEq==pProbe->nKeyCol-1) ){ if( iCol>=0 && pProbe->uniqNotNull==0 ){ pNew->wsFlags |= WHERE_UNQ_WANTED; }else{ pNew->wsFlags |= WHERE_ONEROW; } @@ -2256,11 +2285,11 @@ }else{ int nEq = ++pNew->u.btree.nEq; assert( eOp & (WO_ISNULL|WO_EQ|WO_IN|WO_IS) ); assert( pNew->nOut==saved_nOut ); - if( pTerm->truthProb<=0 && iCol>=0 ){ + if( pTerm->truthProb<=0 && pProbe->aiColumn[saved_nEq]>=0 ){ assert( (eOp & WO_IN) || nIn==0 ); testcase( eOp & WO_IN ); pNew->nOut += pTerm->truthProb; pNew->nOut -= nIn; }else{ @@ -3783,11 +3812,11 @@ || pIdx->pPartIdxWhere!=0 || pIdx->nKeyCol>ArraySize(pLoop->aLTermSpace) ) continue; opMask = pIdx->uniqNotNull ? (WO_EQ|WO_IS) : WO_EQ; for(j=0; jnKeyCol; j++){ - pTerm = sqlite3WhereFindTerm(pWC, iCur, pIdx->aiColumn[j], 0, opMask, pIdx); + pTerm = sqlite3WhereFindTerm(pWC, iCur, j, 0, opMask, pIdx); if( pTerm==0 ) break; testcase( pTerm->eOperator & WO_IS ); pLoop->aLTerm[j] = pTerm; } if( j!=pIdx->nKeyCol ) continue; @@ -3929,10 +3958,14 @@ WhereLoop *pLoop; /* Pointer to a single WhereLoop object */ int ii; /* Loop counter */ sqlite3 *db; /* Database connection */ int rc; /* Return code */ + assert( (wctrlFlags & WHERE_ONEPASS_MULTIROW)==0 || ( + (wctrlFlags & WHERE_ONEPASS_DESIRED)!=0 + && (wctrlFlags & WHERE_OMIT_OPEN_CLOSE)==0 + )); /* Variable initialization */ db = pParse->db; memset(&sWLB, 0, sizeof(sWLB)); @@ -3984,10 +4017,11 @@ pWInfo->pOrderBy = pOrderBy; pWInfo->pResultSet = pResultSet; pWInfo->iBreak = pWInfo->iContinue = sqlite3VdbeMakeLabel(v); pWInfo->wctrlFlags = wctrlFlags; pWInfo->savedNQueryLoop = pParse->nQueryLoop; + assert( pWInfo->eOnePass==ONEPASS_OFF ); /* ONEPASS defaults to OFF */ pMaskSet = &pWInfo->sMaskSet; sWLB.pWInfo = pWInfo; sWLB.pWC = &pWInfo->sWC; sWLB.pNew = (WhereLoop*)(((char*)pWInfo)+nByteWInfo); assert( EIGHT_BYTE_ALIGNMENT(sWLB.pNew) ); @@ -4023,18 +4057,16 @@ } } /* Assign a bit from the bitmask to every term in the FROM clause. ** - ** When assigning bitmask values to FROM clause cursors, it must be - ** the case that if X is the bitmask for the N-th FROM clause term then - ** the bitmask for all FROM clause terms to the left of the N-th term - ** is (X-1). An expression from the ON clause of a LEFT JOIN can use - ** its Expr.iRightJoinTable value to find the bitmask of the right table - ** of the join. Subtracting one from the right table bitmask gives a - ** bitmask for all tables to the left of the join. Knowing the bitmask - ** for all tables to the left of a left join is important. Ticket #3015. + ** The N-th term of the FROM clause is assigned a bitmask of 1<nSrc tables in ** pTabList, not just the first nTabList tables. nTabList is normally ** equal to pTabList->nSrc but might be shortened to 1 if the ** WHERE_ONETABLE_ONLY flag is set. @@ -4041,18 +4073,14 @@ */ for(ii=0; iinSrc; ii++){ createMask(pMaskSet, pTabList->a[ii].iCursor); sqlite3WhereTabFuncArgs(pParse, &pTabList->a[ii], &pWInfo->sWC); } -#ifndef NDEBUG - { - Bitmask toTheLeft = 0; - for(ii=0; iinSrc; ii++){ - Bitmask m = sqlite3WhereGetMask(pMaskSet, pTabList->a[ii].iCursor); - assert( (m-1)==toTheLeft ); - toTheLeft |= m; - } +#ifdef SQLITE_DEBUG + for(ii=0; iinSrc; ii++){ + Bitmask m = sqlite3WhereGetMask(pMaskSet, pTabList->a[ii].iCursor); + assert( m==MASKBIT(ii) ); } #endif /* Analyze all of the subexpressions. */ sqlite3WhereExprAnalyze(pTabList, &pWInfo->sWC); @@ -4176,15 +4204,20 @@ ** to use a one-pass algorithm, determine if this is appropriate. ** The one-pass algorithm only works if the WHERE clause constrains ** the statement to update or delete a single row. */ assert( (wctrlFlags & WHERE_ONEPASS_DESIRED)==0 || pWInfo->nLevel==1 ); - if( (wctrlFlags & WHERE_ONEPASS_DESIRED)!=0 - && (pWInfo->a[0].pWLoop->wsFlags & WHERE_ONEROW)!=0 ){ - pWInfo->okOnePass = 1; - if( HasRowid(pTabList->a[0].pTab) ){ - pWInfo->a[0].pWLoop->wsFlags &= ~WHERE_IDX_ONLY; + if( (wctrlFlags & WHERE_ONEPASS_DESIRED)!=0 ){ + int wsFlags = pWInfo->a[0].pWLoop->wsFlags; + int bOnerow = (wsFlags & WHERE_ONEROW)!=0; + if( bOnerow || ( (wctrlFlags & WHERE_ONEPASS_MULTIROW) + && 0==(wsFlags & WHERE_VIRTUALTABLE) + )){ + pWInfo->eOnePass = bOnerow ? ONEPASS_SINGLE : ONEPASS_MULTI; + if( HasRowid(pTabList->a[0].pTab) ){ + pWInfo->a[0].pWLoop->wsFlags &= ~WHERE_IDX_ONLY; + } } } /* Open all tables in the pTabList and any indices selected for ** searching those tables. @@ -4211,19 +4244,19 @@ }else #endif if( (pLoop->wsFlags & WHERE_IDX_ONLY)==0 && (wctrlFlags & WHERE_OMIT_OPEN_CLOSE)==0 ){ int op = OP_OpenRead; - if( pWInfo->okOnePass ){ + if( pWInfo->eOnePass!=ONEPASS_OFF ){ op = OP_OpenWrite; pWInfo->aiCurOnePass[0] = pTabItem->iCursor; }; sqlite3OpenTable(pParse, pTabItem->iCursor, iDb, pTab, op); assert( pTabItem->iCursor==pLevel->iTabCur ); - testcase( !pWInfo->okOnePass && pTab->nCol==BMS-1 ); - testcase( !pWInfo->okOnePass && pTab->nCol==BMS ); - if( !pWInfo->okOnePass && pTab->nColeOnePass==ONEPASS_OFF && pTab->nCol==BMS-1 ); + testcase( pWInfo->eOnePass==ONEPASS_OFF && pTab->nCol==BMS ); + if( pWInfo->eOnePass==ONEPASS_OFF && pTab->nColcolUsed; int n = 0; for(; b; b=b>>1, n++){} sqlite3VdbeChangeP4(v, sqlite3VdbeCurrentAddr(v)-1, SQLITE_INT_TO_PTR(n), P4_INT32); @@ -4247,11 +4280,11 @@ ){ /* This is one term of an OR-optimization using the PRIMARY KEY of a ** WITHOUT ROWID table. No need for a separate index */ iIndexCur = pLevel->iTabCur; op = 0; - }else if( pWInfo->okOnePass ){ + }else if( pWInfo->eOnePass!=ONEPASS_OFF ){ Index *pJ = pTabItem->pTab->pIndex; iIndexCur = iIdxCur; assert( wctrlFlags & WHERE_ONEPASS_DESIRED ); while( ALWAYS(pJ) && pJ!=pIx ){ iIndexCur++; @@ -4455,11 +4488,11 @@ if( (pTab->tabFlags & TF_Ephemeral)==0 && pTab->pSelect==0 && (pWInfo->wctrlFlags & WHERE_OMIT_OPEN_CLOSE)==0 ){ int ws = pLoop->wsFlags; - if( !pWInfo->okOnePass && (ws & WHERE_IDX_ONLY)==0 ){ + if( pWInfo->eOnePass==ONEPASS_OFF && (ws & WHERE_IDX_ONLY)==0 ){ sqlite3VdbeAddOp1(v, OP_Close, pTabItem->iCursor); } if( (ws & WHERE_INDEXED)!=0 && (ws & (WHERE_IPK|WHERE_AUTO_INDEX))==0 && pLevel->iIdxCur!=pWInfo->aiCurOnePass[1] Index: src/whereInt.h ================================================================== --- src/whereInt.h +++ src/whereInt.h @@ -284,10 +284,11 @@ */ struct WhereScan { WhereClause *pOrigWC; /* Original, innermost WhereClause */ WhereClause *pWC; /* WhereClause currently being scanned */ char *zCollName; /* Required collating sequence, if not NULL */ + Expr *pIdxExpr; /* Search for this index expression */ char idxaff; /* Must match this affinity, if zCollName!=NULL */ unsigned char nEquiv; /* Number of entries in aEquiv[] */ unsigned char iEquiv; /* Next unused slot in aEquiv[] */ u32 opMask; /* Acceptable operators */ int k; /* Resume scanning at this->pWC->a[this->k] */ @@ -409,11 +410,11 @@ Bitmask revMask; /* Mask of ORDER BY terms that need reversing */ LogEst nRowOut; /* Estimated number of output rows */ u16 wctrlFlags; /* Flags originally passed to sqlite3WhereBegin() */ i8 nOBSat; /* Number of ORDER BY terms satisfied by indices */ u8 sorted; /* True if really sorted (not just grouped) */ - u8 okOnePass; /* Ok to use one-pass algorithm for UPDATE/DELETE */ + u8 eOnePass; /* ONEPASS_OFF, or _SINGLE, or _MULTI */ u8 untestedTerms; /* Not all WHERE terms resolved by outer loop */ u8 eDistinct; /* One of the WHERE_DISTINCT_* values below */ u8 nLevel; /* Number of nested loop */ int iTop; /* The very beginning of the WHERE loop */ int iContinue; /* Jump here to continue with next record */ Index: src/wherecode.c ================================================================== --- src/wherecode.c +++ src/wherecode.c @@ -38,10 +38,20 @@ if( iTerm ) sqlite3StrAccumAppend(pStr, " AND ", 5); sqlite3StrAccumAppendAll(pStr, zColumn); sqlite3StrAccumAppend(pStr, zOp, 1); sqlite3StrAccumAppend(pStr, "?", 1); } + +/* +** Return the name of the i-th column of the pIdx index. +*/ +static const char *explainIndexColumnName(Index *pIdx, int i){ + i = pIdx->aiColumn[i]; + if( i==(-2) ) return ""; + if( i==(-1) ) return "rowid"; + return pIdx->pTable->aCol[i].zName; +} /* ** Argument pLevel describes a strategy for scanning table pTab. This ** function appends text to pStr that describes the subset of table ** rows scanned by the strategy in the form of an SQL expression. @@ -58,28 +68,26 @@ static void explainIndexRange(StrAccum *pStr, WhereLoop *pLoop, Table *pTab){ Index *pIndex = pLoop->u.btree.pIndex; u16 nEq = pLoop->u.btree.nEq; u16 nSkip = pLoop->nSkip; int i, j; - Column *aCol = pTab->aCol; - i16 *aiColumn = pIndex->aiColumn; if( nEq==0 && (pLoop->wsFlags&(WHERE_BTM_LIMIT|WHERE_TOP_LIMIT))==0 ) return; sqlite3StrAccumAppend(pStr, " (", 2); for(i=0; i=nSkip ? "%s=?" : "ANY(%s)", z); } j = i; if( pLoop->wsFlags&WHERE_BTM_LIMIT ){ - char *z = aiColumn[j] < 0 ? "rowid" : aCol[aiColumn[j]].zName; + const char *z = explainIndexColumnName(pIndex, i); explainAppendTerm(pStr, i++, z, ">"); } if( pLoop->wsFlags&WHERE_TOP_LIMIT ){ - char *z = aiColumn[j] < 0 ? "rowid" : aCol[aiColumn[j]].zName; + const char *z = explainIndexColumnName(pIndex, j); explainAppendTerm(pStr, i, z, "<"); } sqlite3StrAccumAppend(pStr, ")", 1); } @@ -1059,11 +1067,16 @@ /* pIdx is a covering index. No need to access the main table. */ }else if( HasRowid(pIdx->pTable) ){ iRowidReg = ++pParse->nMem; sqlite3VdbeAddOp2(v, OP_IdxRowid, iIdxCur, iRowidReg); sqlite3ExprCacheStore(pParse, iCur, -1, iRowidReg); - sqlite3VdbeAddOp2(v, OP_Seek, iCur, iRowidReg); /* Deferred seek */ + if( pWInfo->eOnePass!=ONEPASS_OFF ){ + sqlite3VdbeAddOp3(v, OP_NotExists, iCur, 0, iRowidReg); + VdbeCoverage(v); + }else{ + sqlite3VdbeAddOp2(v, OP_Seek, iCur, iRowidReg); /* Deferred seek */ + } }else if( iCur!=iIdxCur ){ Index *pPk = sqlite3PrimaryKeyIndex(pIdx->pTable); iRowidReg = sqlite3GetTempRange(pParse, pPk->nKeyCol); for(j=0; jnKeyCol; j++){ k = sqlite3ColumnOfIndex(pIdx, pPk->aiColumn[j]); Index: src/whereexpr.c ================================================================== --- src/whereexpr.c +++ src/whereexpr.c @@ -792,10 +792,55 @@ } pS = pS->pPrior; } return mask; } + +/* +** Expression pExpr is one operand of a comparison operator that might +** be useful for indexing. This routine checks to see if pExpr appears +** in any index. Return TRUE (1) if pExpr is an indexed term and return +** FALSE (0) if not. If TRUE is returned, also set *piCur to the cursor +** number of the table that is indexed and *piColumn to the column number +** of the column that is indexed, or -2 if an expression is being indexed. +** +** If pExpr is a TK_COLUMN column reference, then this routine always returns +** true even if that particular column is not indexed, because the column +** might be added to an automatic index later. +*/ +static int exprMightBeIndexed( + SrcList *pFrom, /* The FROM clause */ + Bitmask mPrereq, /* Bitmask of FROM clause terms referenced by pExpr */ + Expr *pExpr, /* An operand of a comparison operator */ + int *piCur, /* Write the referenced table cursor number here */ + int *piColumn /* Write the referenced table column number here */ +){ + Index *pIdx; + int i; + int iCur; + if( pExpr->op==TK_COLUMN ){ + *piCur = pExpr->iTable; + *piColumn = pExpr->iColumn; + return 1; + } + if( mPrereq==0 ) return 0; /* No table references */ + if( (mPrereq&(mPrereq-1))!=0 ) return 0; /* Refs more than one table */ + for(i=0; mPrereq>1; i++, mPrereq>>=1){} + iCur = pFrom->a[i].iCursor; + for(pIdx=pFrom->a[i].pTab->pIndex; pIdx; pIdx=pIdx->pNext){ + if( pIdx->aColExpr==0 ) continue; + for(i=0; inKeyCol; i++){ + if( pIdx->aiColumn[i]!=(-2) ) continue; + if( sqlite3ExprCompare(pExpr, pIdx->aColExpr->a[i].pExpr, iCur)==0 ){ + *piCur = iCur; + *piColumn = -2; + return 1; + } + } + } + return 0; +} /* ** The input to this routine is an WhereTerm structure with only the ** "pExpr" field filled in. The job of this routine is to analyze the ** subexpression and populate all the other fields of the WhereTerm @@ -863,20 +908,23 @@ pTerm->prereqAll = prereqAll; pTerm->leftCursor = -1; pTerm->iParent = -1; pTerm->eOperator = 0; if( allowedOp(op) ){ + int iCur, iColumn; Expr *pLeft = sqlite3ExprSkipCollate(pExpr->pLeft); Expr *pRight = sqlite3ExprSkipCollate(pExpr->pRight); u16 opMask = (pTerm->prereqRight & prereqLeft)==0 ? WO_ALL : WO_EQUIV; - if( pLeft->op==TK_COLUMN ){ - pTerm->leftCursor = pLeft->iTable; - pTerm->u.leftColumn = pLeft->iColumn; + if( exprMightBeIndexed(pSrc, prereqLeft, pLeft, &iCur, &iColumn) ){ + pTerm->leftCursor = iCur; + pTerm->u.leftColumn = iColumn; pTerm->eOperator = operatorMask(op) & opMask; } if( op==TK_IS ) pTerm->wtFlags |= TERM_IS; - if( pRight && pRight->op==TK_COLUMN ){ + if( pRight + && exprMightBeIndexed(pSrc, pTerm->prereqRight, pRight, &iCur, &iColumn) + ){ WhereTerm *pNew; Expr *pDup; u16 eExtraOp = 0; /* Extra bits for pNew->eOperator */ if( pTerm->leftCursor>=0 ){ int idxNew; @@ -901,12 +949,12 @@ pDup = pExpr; pNew = pTerm; } exprCommute(pParse, pDup); pLeft = sqlite3ExprSkipCollate(pDup->pLeft); - pNew->leftCursor = pLeft->iTable; - pNew->u.leftColumn = pLeft->iColumn; + pNew->leftCursor = iCur; + pNew->u.leftColumn = iColumn; testcase( (prereqLeft | extraRight) != prereqLeft ); pNew->prereqRight = prereqLeft | extraRight; pNew->prereqAll = prereqAll; pNew->eOperator = (operatorMask(pDup->op) + eExtraOp) & opMask; } Index: test/delete.test ================================================================== --- test/delete.test +++ test/delete.test @@ -65,11 +65,10 @@ } 1 do_test delete-3.1.7 { execsql {SELECT * FROM table1 ORDER BY f1} } {1 2 4 16} integrity_check delete-3.2 - # Semantic errors in the WHERE clause # do_test delete-4.1 { execsql {CREATE TABLE table2(f1 int, f2 int)} ADDED test/delete4.test Index: test/delete4.test ================================================================== --- /dev/null +++ test/delete4.test @@ -0,0 +1,102 @@ +# 2005 August 24 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this script is a test of the DELETE command. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set testprefix delete4 + +do_execsql_test 1.1 { + CREATE TABLE t1(x INTEGER PRIMARY KEY, y); + INSERT INTO t1 VALUES(1, 0); + INSERT INTO t1 VALUES(2, 1); + INSERT INTO t1 VALUES(3, 0); + INSERT INTO t1 VALUES(4, 1); + INSERT INTO t1 VALUES(5, 0); + INSERT INTO t1 VALUES(6, 1); + INSERT INTO t1 VALUES(7, 0); + INSERT INTO t1 VALUES(8, 1); +} +do_execsql_test 1.2 { + DELETE FROM t1 WHERE y=1; +} +do_execsql_test 1.3 { + SELECT x FROM t1; +} {1 3 5 7} + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 2.1 { + CREATE TABLE t1(x INTEGER PRIMARY KEY, y, z); + INSERT INTO t1 VALUES(1, 0, randomblob(200)); + INSERT INTO t1 VALUES(2, 1, randomblob(200)); + INSERT INTO t1 VALUES(3, 0, randomblob(200)); + INSERT INTO t1 VALUES(4, 1, randomblob(200)); + INSERT INTO t1 VALUES(5, 0, randomblob(200)); + INSERT INTO t1 VALUES(6, 1, randomblob(200)); + INSERT INTO t1 VALUES(7, 0, randomblob(200)); + INSERT INTO t1 VALUES(8, 1, randomblob(200)); +} +do_execsql_test 2.2 { + DELETE FROM t1 WHERE y=1; +} +do_execsql_test 2.3 { + SELECT x FROM t1; +} {1 3 5 7} + + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 3.1 { + CREATE TABLE t1(a, b, PRIMARY KEY(a, b)) WITHOUT ROWID; + INSERT INTO t1 VALUES(1, 2); + INSERT INTO t1 VALUES(2, 4); + INSERT INTO t1 VALUES(1, 5); + DELETE FROM t1 WHERE a=1; + SELECT * FROM t1; +} {2 4} + +#------------------------------------------------------------------------- +# DELETE statement that uses the OR optimization +# +reset_db +do_execsql_test 3.1 { + CREATE TABLE t1(i INTEGER PRIMARY KEY, a, b); + CREATE INDEX i1a ON t1(a); + CREATE INDEX i1b ON t1(b); + INSERT INTO t1 VALUES(1, 'one', 'i'); + INSERT INTO t1 VALUES(2, 'two', 'ii'); + INSERT INTO t1 VALUES(3, 'three', 'iii'); + INSERT INTO t1 VALUES(4, 'four', 'iv'); + INSERT INTO t1 VALUES(5, 'one', 'i'); + INSERT INTO t1 VALUES(6, 'two', 'ii'); + INSERT INTO t1 VALUES(7, 'three', 'iii'); + INSERT INTO t1 VALUES(8, 'four', 'iv'); +} {} + +do_execsql_test 3.2 { + DELETE FROM t1 WHERE a='two' OR b='iv'; +} + +do_execsql_test 3.3 { + SELECT i FROM t1 ORDER BY i; +} {1 3 5 7} + +do_execsql_test 3.4 { + PRAGMA integrity_check; +} {ok} + + +finish_test Index: test/e_createtable.test ================================================================== --- test/e_createtable.test +++ test/e_createtable.test @@ -371,12 +371,12 @@ 4 {CREATE TABLE auxb."sqlite-"(z)} {} 5 {CREATE TABLE "SQLITE-TBL"(z)} {} } -# EVIDENCE-OF: R-10195-31023 If a is specified, it -# must be either "main", "temp", or the name of an attached database. +# EVIDENCE-OF: R-18448-33677 If a schema-name is specified, it must be +# either "main", "temp", or the name of an attached database. # # EVIDENCE-OF: R-39822-07822 In this case the new table is created in # the named database. # # Test cases 1.2.* test the first of the two requirements above. The @@ -420,13 +420,13 @@ } { 1 "CREATE TEMP TABLE t1(a, b)" {{} t1 {} {}} 2 "CREATE TEMPORARY TABLE t2(a, b)" {{} {t1 t2} {} {}} } -# EVIDENCE-OF: R-49439-47561 It is an error to specify both a -# and the TEMP or TEMPORARY keyword, unless the -# is "temp". +# EVIDENCE-OF: R-23976-43329 It is an error to specify both a +# schema-name and the TEMP or TEMPORARY keyword, unless the schema-name +# is "temp". # drop_all_tables do_createtable_tests 1.5.1 -error { temporary table name must be unqualified } { @@ -445,13 +445,12 @@ 2 "CREATE TEMPORARY TABLE temp.t2(a, b)" {{} {t1 t2} {} {}} 3 "CREATE TEMP TABLE TEMP.t3(a, b)" {{} {t1 t2 t3} {} {}} 4 "CREATE TEMPORARY TABLE TEMP.xxx(x)" {{} {t1 t2 t3 xxx} {} {}} } -# EVIDENCE-OF: R-00917-09393 If no database name is specified and the -# TEMP keyword is not present then the table is created in the main -# database. +# EVIDENCE-OF: R-31997-24564 If no schema name is specified and the TEMP +# keyword is not present then the table is created in the main database. # drop_all_tables do_createtable_tests 1.6 -tclquery { unset -nocomplain X array set X [table_list] Index: test/e_delete.test ================================================================== --- test/e_delete.test +++ test/e_delete.test @@ -68,13 +68,16 @@ do_delete_tests e_delete-1.1 { 1 "DELETE FROM t1 ; SELECT * FROM t1" {} 2 "DELETE FROM main.t2 ; SELECT * FROM t2" {} } -# EVIDENCE-OF: R-30203-16177 If a WHERE clause is supplied, then only -# those rows for which the result of evaluating the WHERE clause as a -# boolean expression is true are deleted. +# EVIDENCE-OF: R-26300-50198 If a WHERE clause is supplied, then only +# those rows for which the WHERE clause boolean expression is true are +# deleted. +# +# EVIDENCE-OF: R-23360-48280 Rows for which the expression is false or +# NULL are retained. # do_delete_tests e_delete-1.2 { 1 "DELETE FROM t3 WHERE 1 ; SELECT x FROM t3" {} 2 "DELETE FROM main.t4 WHERE 0 ; SELECT x FROM t4" {1 2 3 4 5} 3 "DELETE FROM t4 WHERE 0.0 ; SELECT x FROM t4" {1 2 3 4 5} @@ -115,12 +118,12 @@ # EVIDENCE-OF: R-09681-58560 The table-name specified as part of a # DELETE statement within a trigger body must be unqualified. # -# EVIDENCE-OF: R-36771-43788 In other words, the database-name. prefix -# on the table name is not allowed within triggers. +# EVIDENCE-OF: R-12275-20298 In other words, the schema-name. prefix on +# the table name is not allowed within triggers. # do_delete_tests e_delete-2.1 -error { qualified table names are not allowed on INSERT, UPDATE, and DELETE statements within triggers } { 1 { Index: test/e_expr.test ================================================================== --- test/e_expr.test +++ test/e_expr.test @@ -1424,20 +1424,20 @@ typeof(CAST(X'555655' as TEXT)), CAST(X'555655' as TEXT), typeof(CAST('1.23abc' as REAL)), CAST('1.23abc' as REAL), typeof(CAST(4.5 as INTEGER)), CAST(4.5 as INTEGER) } {text UVU real 1.23 integer 4} -# EVIDENCE-OF: R-27225-65050 If the value of is NULL, then -# the result of the CAST expression is also NULL. +# EVIDENCE-OF: R-32434-09092 If the value of expr is NULL, then the +# result of the CAST expression is also NULL. # do_expr_test e_expr-27.2.1 { CAST(NULL AS integer) } null {} do_expr_test e_expr-27.2.2 { CAST(NULL AS text) } null {} do_expr_test e_expr-27.2.3 { CAST(NULL AS blob) } null {} do_expr_test e_expr-27.2.4 { CAST(NULL AS number) } null {} -# EVIDENCE-OF: R-31076-23575 Casting a value to a with -# no affinity causes the value to be converted into a BLOB. +# EVIDENCE-OF: R-43522-35548 Casting a value to a type-name with no +# affinity causes the value to be converted into a BLOB. # do_expr_test e_expr-27.3.1 { CAST('abc' AS blob) } blob abc do_expr_test e_expr-27.3.2 { CAST('def' AS shobblob_x) } blob def do_expr_test e_expr-27.3.3 { CAST('ghi' AS abbLOb10) } blob ghi Index: test/e_insert.test ================================================================== --- test/e_insert.test +++ test/e_insert.test @@ -155,13 +155,13 @@ 3a "INSERT INTO a2(a) VALUES(3),(4)" {} 3b "SELECT count(*) FROM a2" {4} } -# EVIDENCE-OF: R-53616-44976 If no column-list is specified then the -# number of values inserted into each row must be the same as the number -# of columns in the table. +# EVIDENCE-OF: R-19218-01018 If the column-name list after table-name is +# omitted then the number of values inserted into each row must be the +# same as the number of columns in the table. # # A test in the block above verifies that if the VALUES list has the # correct number of columns (for table a2, 3 columns) works. So these # tests just show that other values cause an error. # @@ -189,13 +189,13 @@ 3a "INSERT INTO a2 VALUES((SELECT count(*) FROM a2), 'x', 'y')" {} 3b "SELECT * FROM a2 WHERE oid=last_insert_rowid()" {2 x y} } -# EVIDENCE-OF: R-09234-17933 If a column-list is specified, then the -# number of values in each term of the VALUE list must match the number -# of specified columns. +# EVIDENCE-OF: R-21115-58321 If a column-name list is specified, then +# the number of values in each term of the VALUE list must match the +# number of specified columns. # do_insert_tests e_insert-1.4 -error { %d values for %d columns } { 1 "INSERT INTO a2(a, b, c) VALUES(1)" {1 3} @@ -392,12 +392,12 @@ do_catchsql_test e_insert-4.1.$tn.1 $sql [list [expr {$error!=""}] $error] do_execsql_test e_insert-4.1.$tn.2 {SELECT * FROM a4} [list {*}$data] do_test e_insert-4.1.$tn.3 {sqlite3_get_autocommit db} $ac } -# EVIDENCE-OF: R-64196-02418 The optional "database-name." prefix on the -# table-name is support for top-level INSERT statements only. +# EVIDENCE-OF: R-59829-49719 The optional "schema-name." prefix on the +# table-name is supported for top-level INSERT statements only. # # EVIDENCE-OF: R-05731-00924 The table name must be unqualified for # INSERT statements that occur within CREATE TRIGGER statements. # set err {1 {qualified table names are not allowed on INSERT, UPDATE, and DELETE statements within triggers}} Index: test/e_reindex.test ================================================================== --- test/e_reindex.test +++ test/e_reindex.test @@ -263,14 +263,13 @@ test_index 5.39 t2 collA length test_index 5.40 t2 collB value test_index 5.41 aux.t1 collA length test_index 5.42 aux.t1 collB value -# EVIDENCE-OF: R-15639-02023 If no database-name is specified and there -# exists both a table or index and a collation sequence of the specified -# name, SQLite interprets this as a request to rebuild the indices that -# use the named collation sequence. +# EVIDENCE-OF: R-35892-30289 For a command of the form "REINDEX name", a +# match against collation-name takes precedence over a match against +# index-name or table-name. # set_collations value length do_execsql_test e_reindex-2.6.0 { CREATE TABLE collA(x); CREATE INDEX icolla_a ON collA(x COLLATE collA); Index: test/e_resolve.test ================================================================== --- test/e_resolve.test +++ test/e_resolve.test @@ -63,13 +63,13 @@ do_execsql_test 1.1 { SELECT * FROM n1 } {temp n1} do_execsql_test 1.2 { SELECT * FROM n2 } {main n2} do_execsql_test 1.3 { SELECT * FROM n3 } {at1 n3} do_execsql_test 1.4 { SELECT * FROM n4 } {at2 n4} -# EVIDENCE-OF: R-54577-28142 If a database name is specified as part of -# an object reference, it must be either "main", or "temp" or the name -# of an attached database. +# EVIDENCE-OF: R-00634-08585 If a schema name is specified as part of an +# object reference, it must be either "main", or "temp" or the +# schema-name of an attached database. # # Or else it is a "no such table: xxx" error. # resolve_reopen_db do_execsql_test 2.1.1 { SELECT * FROM main.n1 } {main n1} @@ -77,21 +77,21 @@ do_execsql_test 2.1.3 { SELECT * FROM at1.n1 } {at1 n1} do_execsql_test 2.1.4 { SELECT * FROM at2.n1 } {at2 n1} do_catchsql_test 2.2 { SELECT * FROM xxx.n1 } {1 {no such table: xxx.n1}} -# EVIDENCE-OF: R-26223-47623 Like other SQL identifiers, database names +# EVIDENCE-OF: R-17446-42210 Like other SQL identifiers, schema names # are case-insensitive. # resolve_reopen_db do_execsql_test 3.1 { SELECT * FROM MAIN.n1 } {main n1} do_execsql_test 3.2 { SELECT * FROM tEmP.n1 } {temp n1} do_execsql_test 3.3 { SELECT * FROM aT1.n1 } {at1 n1} do_execsql_test 3.4 { SELECT * FROM At2.n1 } {at2 n1} -# EVIDENCE-OF: R-15639-28392 If a database name is specified, then only -# the named database is searched for the named object. +# EVIDENCE-OF: R-14755-58619 If a schema name is specified, then only +# that one schema is searched for the named object. # do_catchsql_test 4.1 { SELECT * FROM temp.n2 } {1 {no such table: temp.n2}} do_catchsql_test 4.2 { SELECT * FROM main.n2 } {0 {main n2}} do_catchsql_test 4.3 { SELECT * FROM at1.n2 } {0 {at1 n2}} do_catchsql_test 4.4 { SELECT * FROM at2.n2 } {0 {at2 n2}} Index: test/e_update.test ================================================================== --- test/e_update.test +++ test/e_update.test @@ -144,13 +144,12 @@ 2 "UPDATE t1 SET a = 'greek' ; SELECT * FROM t1" {greek roman greek roman greek roman} } -# EVIDENCE-OF: R-42117-40023 Otherwise, the UPDATE affects only those -# rows for which the result of evaluating the WHERE clause expression as -# a boolean expression is true. +# EVIDENCE-OF: R-58095-46013 Otherwise, the UPDATE affects only those +# rows for which the WHERE clause boolean expression is true. # do_execsql_test e_update-1.3.0 { DELETE FROM main.t1; INSERT INTO main.t1 VALUES(NULL, ''); INSERT INTO main.t1 VALUES(1, 'i'); @@ -263,13 +262,14 @@ 1 "UPDATE t2 SET a=b+c" {5 1 4 14 5 9 11 6 5} 2 "UPDATE t2 SET a=b, b=a" {1 5 4 5 14 9 6 11 5} 3 "UPDATE t2 SET a=c||c, c=NULL" {44 5 {} 99 14 {} 55 11 {}} } -# EVIDENCE-OF: R-12619-24112 The optional conflict-clause allows the -# user to nominate a specific constraint conflict resolution algorithm -# to use during this one UPDATE command. +# EVIDENCE-OF: R-28518-13457 The optional "OR action" conflict clause +# that follows the UPDATE keyword allows the user to nominate a specific +# constraint conflict resolution algorithm to use during this one UPDATE +# command. # do_execsql_test e_update-1.8.0 { DELETE FROM t3; INSERT INTO t3 VALUES(1, 'one'); INSERT INTO t3 VALUES(2, 'two'); @@ -320,12 +320,12 @@ # EVIDENCE-OF: R-12123-54095 The table-name specified as part of an # UPDATE statement within a trigger body must be unqualified. # -# EVIDENCE-OF: R-09690-36749 In other words, the database-name. prefix -# on the table name of the UPDATE is not allowed within triggers. +# EVIDENCE-OF: R-43190-62442 In other words, the schema-name. prefix on +# the table name of the UPDATE is not allowed within triggers. # do_update_tests e_update-2.1 -error { qualified table names are not allowed on INSERT, UPDATE, and DELETE statements within triggers } { 1 { Index: test/fkey5.test ================================================================== --- test/fkey5.test +++ test/fkey5.test @@ -10,12 +10,12 @@ #*********************************************************************** # This file implements regression tests for SQLite library. # # This file tests the PRAGMA foreign_key_check command. # -# EVIDENCE-OF: R-01427-50262 PRAGMA database.foreign_key_check; PRAGMA -# database.foreign_key_check(table-name); +# EVIDENCE-OF: R-15402-03103 PRAGMA schema.foreign_key_check; PRAGMA +# schema.foreign_key_check(table-name); # # EVIDENCE-OF: R-23918-17301 The foreign_key_check pragma checks the # database, or the table called "table-name", for foreign key # constraints that are violated and returns one row of output for each # violation. Index: test/index.test ================================================================== --- test/index.test +++ test/index.test @@ -54,23 +54,23 @@ } {1 {no such table: main.test1}} # Try adding an index on a column of a table where the table # exists but the column does not. # -do_test index-2.1 { +do_test index-2.1b { execsql {CREATE TABLE test1(f1 int, f2 int, f3 int)} set v [catch {execsql {CREATE INDEX index1 ON test1(f4)}} msg] lappend v $msg -} {1 {table test1 has no column named f4}} +} {1 {no such column: f4}} # Try an index with some columns that match and others that do now. # do_test index-2.2 { set v [catch {execsql {CREATE INDEX index1 ON test1(f1, f2, f4, f3)}} msg] execsql {DROP TABLE test1} lappend v $msg -} {1 {table test1 has no column named f4}} +} {1 {no such column: f4}} # Try creating a bunch of indices on the same table # set r {} for {set i 1} {$i<100} {incr i} { Index: test/index2.test ================================================================== --- test/index2.test +++ test/index2.test @@ -1,6 +1,6 @@ -# 2005 January 11 +# 2005-01-11 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. @@ -9,11 +9,10 @@ # #*********************************************************************** # This file implements regression tests for SQLite library. The # focus of this file is testing the CREATE INDEX statement. # -# $Id: index2.test,v 1.3 2006/03/03 19:12:30 drh Exp $ set testdir [file dirname $argv0] source $testdir/tester.tcl # Create a table with a large number of columns Index: test/index3.test ================================================================== --- test/index3.test +++ test/index3.test @@ -1,6 +1,6 @@ -# 2005 February 14 +# 2005-02-14 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. @@ -9,11 +9,10 @@ # #*********************************************************************** # This file implements regression tests for SQLite library. The # focus of this file is testing the CREATE INDEX statement. # -# $Id: index3.test,v 1.3 2008/03/19 13:03:34 drh Exp $ set testdir [file dirname $argv0] source $testdir/tester.tcl @@ -37,20 +36,45 @@ } {1 {UNIQUE constraint failed: t1.a}} do_test index3-1.3 { catchsql COMMIT; } {0 {}} integrity_check index3-1.4 + +# Backwards compatibility test: +# +# Verify that CREATE INDEX statements that use strings instead of +# identifiers for the the column names continue to work correctly. +# This is undocumented behavior retained for backwards compatiblity. +# +do_execsql_test index3-2.1 { + DROP TABLE t1; + CREATE TABLE t1(a, b, c, d, e, + PRIMARY KEY('a'), UNIQUE('b' COLLATE nocase DESC)); + CREATE INDEX t1c ON t1('c'); + CREATE INDEX t1d ON t1('d' COLLATE binary ASC); + WITH RECURSIVE c(x) AS (VALUES(1) UNION SELECT x+1 FROM c WHERE x<30) + INSERT INTO t1(a,b,c,d,e) + SELECT x, printf('ab%03xxy',x), x, x, x FROM c; +} {} +do_execsql_test index3-2.2 { + SELECT a FROM t1 WHERE b='ab005xy' COLLATE nocase; +} {5} +do_execsql_test index3-2.2eqp { + EXPLAIN QUERY PLAN + SELECT a FROM t1 WHERE b='ab005xy' COLLATE nocase; +} {/USING INDEX/} + # This test corrupts the database file so it must be the last test # in the series. # do_test index3-99.1 { execsql { PRAGMA writable_schema=on; - UPDATE sqlite_master SET sql='nonsense'; + UPDATE sqlite_master SET sql='nonsense' WHERE name='t1d' } db close catch { sqlite3 db test.db } - catchsql { DROP INDEX i1 } -} {1 {malformed database schema (t1)}} + catchsql { DROP INDEX t1c } +} {1 {malformed database schema (t1d)}} finish_test Index: test/indexedby.test ================================================================== --- test/indexedby.test +++ test/indexedby.test @@ -56,11 +56,11 @@ # Parser tests. Test that an INDEXED BY or NOT INDEX clause can be # attached to a table in the FROM clause, but not to a sub-select or # SQL view. Also test that specifying an index that does not exist or # is attached to a different table is detected as an error. # -# EVIDENCE-OF: R-63761-48810 -- syntax diagram qualified-table-name +# EVIDENCE-OF: R-07004-11522 -- syntax diagram qualified-table-name # # EVIDENCE-OF: R-58230-57098 The "INDEXED BY index-name" phrase # specifies that the named index must be used in order to look up values # on the preceding table. # @@ -229,17 +229,17 @@ # # Test that "INDEXED BY" can be used in a DELETE statement. # do_execsql_test indexedby-7.1 { EXPLAIN QUERY PLAN DELETE FROM t1 WHERE a = 5 -} {0 0 0 {SEARCH TABLE t1 USING COVERING INDEX i1 (a=?)}} +} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (a=?)}} do_execsql_test indexedby-7.2 { EXPLAIN QUERY PLAN DELETE FROM t1 NOT INDEXED WHERE a = 5 } {0 0 0 {SCAN TABLE t1}} do_execsql_test indexedby-7.3 { EXPLAIN QUERY PLAN DELETE FROM t1 INDEXED BY i1 WHERE a = 5 -} {0 0 0 {SEARCH TABLE t1 USING COVERING INDEX i1 (a=?)}} +} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (a=?)}} do_execsql_test indexedby-7.4 { EXPLAIN QUERY PLAN DELETE FROM t1 INDEXED BY i1 WHERE a = 5 AND b = 10 } {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (a=?)}} do_execsql_test indexedby-7.5 { EXPLAIN QUERY PLAN DELETE FROM t1 INDEXED BY i2 WHERE a = 5 AND b = 10 ADDED test/indexexpr1.test Index: test/indexexpr1.test ================================================================== --- /dev/null +++ test/indexexpr1.test @@ -0,0 +1,222 @@ +# 2015-08-31 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this file is testing indexes on expressions. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +do_execsql_test indexexpr1-100 { + CREATE TABLE t1(a,b,c); + INSERT INTO t1(a,b,c) + /* 123456789 123456789 123456789 123456789 123456789 123456789 */ + VALUES('In_the_beginning_was_the_Word',1,1), + ('and_the_Word_was_with_God',1,2), + ('and_the_Word_was_God',1,3), + ('The_same_was_in_the_beginning_with_God',2,1), + ('All_things_were_made_by_him',3,1), + ('and_without_him_was_not_any_thing_made_that_was_made',3,2); + CREATE INDEX t1a1 ON t1(substr(a,1,12)); +} {} +do_execsql_test indexexpr1-110 { + SELECT b, c, '|' FROM t1 WHERE substr(a,1,12)=='and_the_Word' ORDER BY b, c; +} {1 2 | 1 3 |} +do_execsql_test indexexpr1-110eqp { + EXPLAIN QUERY PLAN + SELECT b, c, '|' FROM t1 WHERE substr(a,1,12)=='and_the_Word' ORDER BY b, c; +} {/USING INDEX t1a1/} +do_execsql_test indexexpr1-120 { + SELECT b, c, '|' FROM t1 WHERE 'and_the_Word'==substr(a,1,12) ORDER BY b, c; +} {1 2 | 1 3 |} +do_execsql_test indexexpr1-120eqp { + EXPLAIN QUERY PLAN + SELECT b, c, '|' FROM t1 WHERE 'and_the_Word'==substr(a,1,12) ORDER BY b, c; +} {/USING INDEX t1a1/} + +do_execsql_test indexexpr1-130 { + CREATE INDEX t1ba ON t1(b,substr(a,2,3),c); + SELECT c FROM t1 WHERE b=1 AND substr(a,2,3)='nd_' ORDER BY c; +} {2 3} +do_execsql_test indexexpr1-130eqp { + EXPLAIN QUERY PLAN + SELECT c FROM t1 WHERE b=1 AND substr(a,2,3)='nd_' ORDER BY c; +} {/USING INDEX t1ba/} + +do_execsql_test indexexpr1-140 { + SELECT rowid, substr(a,b,3), '|' FROM t1 ORDER BY 2; +} {1 In_ | 2 and | 3 and | 6 d_w | 4 he_ | 5 l_t |} +do_execsql_test indexexpr1-141 { + CREATE INDEX t1abx ON t1(substr(a,b,3)); + SELECT rowid FROM t1 WHERE substr(a,b,3)<='and' ORDER BY +rowid; +} {1 2 3} +do_execsql_test indexexpr1-141eqp { + EXPLAIN QUERY PLAN + SELECT rowid FROM t1 WHERE substr(a,b,3)<='and' ORDER BY +rowid; +} {/USING INDEX t1abx/} +do_execsql_test indexexpr1-142 { + SELECT rowid FROM t1 WHERE +substr(a,b,3)<='and' ORDER BY +rowid; +} {1 2 3} +do_execsql_test indexexpr1-150 { + SELECT rowid FROM t1 WHERE substr(a,b,3) IN ('and','l_t','xyz') + ORDER BY +rowid; +} {2 3 5} +do_execsql_test indexexpr1-150eqp { + EXPLAIN QUERY PLAN + SELECT rowid FROM t1 WHERE substr(a,b,3) IN ('and','l_t','xyz') + ORDER BY +rowid; +} {/USING INDEX t1abx/} + +do_execsql_test indexexpr1-160 { + ALTER TABLE t1 ADD COLUMN d; + UPDATE t1 SET d=length(a); + CREATE INDEX t1a2 ON t1(SUBSTR(a, 27, 3)) WHERE d>=29; + SELECT rowid, b, c FROM t1 + WHERE substr(a,27,3)=='ord' AND d>=29; +} {1 1 1} +do_execsql_test indexexpr1-160eqp { + EXPLAIN QUERY PLAN + SELECT rowid, b, c FROM t1 + WHERE substr(a,27,3)=='ord' AND d>=29; +} {/USING INDEX t1a2/} + + +do_execsql_test indexexpr1-200 { + DROP TABLE t1; + CREATE TABLE t1(id ANY PRIMARY KEY, a,b,c) WITHOUT ROWID; + INSERT INTO t1(id,a,b,c) + VALUES(1,'In_the_beginning_was_the_Word',1,1), + (2,'and_the_Word_was_with_God',1,2), + (3,'and_the_Word_was_God',1,3), + (4,'The_same_was_in_the_beginning_with_God',2,1), + (5,'All_things_were_made_by_him',3,1), + (6,'and_without_him_was_not_any_thing_made_that_was_made',3,2); + CREATE INDEX t1a1 ON t1(substr(a,1,12)); +} {} +do_execsql_test indexexpr1-210 { + SELECT b, c, '|' FROM t1 WHERE substr(a,1,12)=='and_the_Word' ORDER BY b, c; +} {1 2 | 1 3 |} +do_execsql_test indexexpr1-210eqp { + EXPLAIN QUERY PLAN + SELECT b, c, '|' FROM t1 WHERE substr(a,1,12)=='and_the_Word' ORDER BY b, c; +} {/USING INDEX t1a1/} +do_execsql_test indexexpr1-220 { + SELECT b, c, '|' FROM t1 WHERE 'and_the_Word'==substr(a,1,12) ORDER BY b, c; +} {1 2 | 1 3 |} +do_execsql_test indexexpr1-220eqp { + EXPLAIN QUERY PLAN + SELECT b, c, '|' FROM t1 WHERE 'and_the_Word'==substr(a,1,12) ORDER BY b, c; +} {/USING INDEX t1a1/} + +do_execsql_test indexexpr1-230 { + CREATE INDEX t1ba ON t1(b,substr(a,2,3),c); + SELECT c FROM t1 WHERE b=1 AND substr(a,2,3)='nd_' ORDER BY c; +} {2 3} +do_execsql_test indexexpr1-230eqp { + EXPLAIN QUERY PLAN + SELECT c FROM t1 WHERE b=1 AND substr(a,2,3)='nd_' ORDER BY c; +} {/USING INDEX t1ba/} + +do_execsql_test indexexpr1-240 { + SELECT id, substr(a,b,3), '|' FROM t1 ORDER BY 2; +} {1 In_ | 2 and | 3 and | 6 d_w | 4 he_ | 5 l_t |} +do_execsql_test indexexpr1-241 { + CREATE INDEX t1abx ON t1(substr(a,b,3)); + SELECT id FROM t1 WHERE substr(a,b,3)<='and' ORDER BY +id; +} {1 2 3} +do_execsql_test indexexpr1-241eqp { + EXPLAIN QUERY PLAN + SELECT id FROM t1 WHERE substr(a,b,3)<='and' ORDER BY +id; +} {/USING INDEX t1abx/} +do_execsql_test indexexpr1-242 { + SELECT id FROM t1 WHERE +substr(a,b,3)<='and' ORDER BY +id; +} {1 2 3} +do_execsql_test indexexpr1-250 { + SELECT id FROM t1 WHERE substr(a,b,3) IN ('and','l_t','xyz') + ORDER BY +id; +} {2 3 5} +do_execsql_test indexexpr1-250eqp { + EXPLAIN QUERY PLAN + SELECT id FROM t1 WHERE substr(a,b,3) IN ('and','l_t','xyz') + ORDER BY +id; +} {/USING INDEX t1abx/} + +do_execsql_test indexexpr1-260 { + ALTER TABLE t1 ADD COLUMN d; + UPDATE t1 SET d=length(a); + CREATE INDEX t1a2 ON t1(SUBSTR(a, 27, 3)) WHERE d>=29; + SELECT id, b, c FROM t1 + WHERE substr(a,27,3)=='ord' AND d>=29; +} {1 1 1} +do_execsql_test indexexpr1-260eqp { + EXPLAIN QUERY PLAN + SELECT id, b, c FROM t1 + WHERE substr(a,27,3)=='ord' AND d>=29; +} {/USING INDEX t1a2/} + + +do_catchsql_test indexexpr1-300 { + CREATE TABLE t2(a,b,c); + CREATE INDEX t2x1 ON t2(a,b+random()); +} {1 {non-deterministic functions prohibited in index expressions}} +do_catchsql_test indexexpr1-301 { + CREATE INDEX t2x1 ON t2(a+julianday('now')); +} {1 {non-deterministic functions prohibited in index expressions}} +do_catchsql_test indexexpr1-310 { + CREATE INDEX t2x2 ON t2(a,b+(SELECT 15)); +} {1 {subqueries prohibited in index expressions}} +do_catchsql_test indexexpr1-320 { + CREATE TABLE e1(x,y,UNIQUE(y,substr(x,1,5))); +} {1 {expressions prohibited in PRIMARY KEY and UNIQUE constraints}} +do_catchsql_test indexexpr1-330 { + CREATE TABLE e1(x,y,PRIMARY KEY(y,substr(x,1,5))); +} {1 {expressions prohibited in PRIMARY KEY and UNIQUE constraints}} +do_catchsql_test indexexpr1-331 { + CREATE TABLE e1(x,y,PRIMARY KEY(y,substr(x,1,5))) WITHOUT ROWID; +} {1 {expressions prohibited in PRIMARY KEY and UNIQUE constraints}} +do_catchsql_test indexexpr1-340 { + CREATE TABLE e1(x,y,FOREIGN KEY(substr(y,1,5)) REFERENCES t1); +} {1 {near "(": syntax error}} + +do_execsql_test indexexpr1-400 { + CREATE TABLE t3(a,b,c); + WITH RECURSIVE c(x) AS (VALUES(1) UNION SELECT x+1 FROM c WHERE x<30) + INSERT INTO t3(a,b,c) + SELECT x, printf('ab%04xyz',x), random() FROM c; + CREATE UNIQUE INDEX t3abc ON t3(CAST(a AS text), b, substr(c,1,3)); + SELECT a FROM t3 WHERE CAST(a AS text)<='10' ORDER BY +a; +} {1 10} +do_catchsql_test indexexpr1-410 { + INSERT INTO t3 SELECT * FROM t3 WHERE rowid=10; +} {1 {UNIQUE constraint failed: index 't3abc'}} + +do_execsql_test indexexpr1-500 { + CREATE TABLE t5(a); + CREATE TABLE cnt(x); + WITH RECURSIVE + c(x) AS (VALUES(1) UNION ALL SELECT x+1 FROM c WHERE x<5) + INSERT INTO cnt(x) SELECT x FROM c; + INSERT INTO t5(a) SELECT printf('abc%03dxyz',x) FROM cnt; + CREATE INDEX t5ax ON t5( substr(a,4,3) ); +} {} +do_execsql_test indexexpr1-510 { + -- The use of the "k" alias in the WHERE clause is technically + -- illegal, but SQLite allows it for historical reasons. In this + -- test and the next, verify that "k" can be used by the t5ax index + SELECT substr(a,4,3) AS k FROM cnt, t5 WHERE k=printf('%03d',x); +} {001 002 003 004 005} +do_execsql_test indexexpr1-510eqp { + EXPLAIN QUERY PLAN + SELECT substr(a,4,3) AS k FROM cnt, t5 WHERE k=printf('%03d',x); +} {/USING INDEX t5ax/} + + +finish_test Index: test/json101.test ================================================================== --- test/json101.test +++ test/json101.test @@ -14,13 +14,25 @@ set testdir [file dirname $argv0] source $testdir/tester.tcl load_static_extension db json -do_execsql_test json1-1.1 { +do_execsql_test json1-1.1.00 { SELECT json_array(1,2.5,null,'hello'); } {[1,2.5,null,"hello"]} +do_execsql_test json1-1.1.01 { + SELECT json_array(1,'{"abc":2.5,"def":null,"ghi":hello}',99); + -- the second term goes in as a string: +} {[1,"{\\"abc\\":2.5,\\"def\\":null,\\"ghi\\":hello}",99]} +do_execsql_test json1-1.1.02 { + SELECT json_array(1,json('{"abc":2.5,"def":null,"ghi":"hello"}'),99); + -- the second term goes in as JSON +} {[1,{"abc":2.5,"def":null,"ghi":"hello"},99]} +do_execsql_test json1-1.1.03 { + SELECT json_array(1,json_object('abc',2.5,'def',null,'ghi','hello'),99); + -- the second term goes in as JSON +} {[1,{"abc":2.5,"def":null,"ghi":"hello"},99]} do_execsql_test json1-1.2 { SELECT hex(json_array('String "\ Test')); } {5B22537472696E67205C225C5C2054657374225D} do_catchsql_test json1-1.3 { SELECT json_array(1,2,x'abcd',3); @@ -52,17 +64,17 @@ do_execsql_test json1-3.1 { SELECT json_replace('{"a":1,"b":2}','$.a','[3,4,5]'); } {{{"a":"[3,4,5]","b":2}}} do_execsql_test json1-3.2 { - SELECT json_replace('{"a":1,"b":2}','$$.a','[3,4,5]'); + SELECT json_replace('{"a":1,"b":2}','$.a',json('[3,4,5]')); } {{{"a":[3,4,5],"b":2}}} do_execsql_test json1-3.3 { SELECT json_type(json_set('{"a":1,"b":2}','$.b','{"x":3,"y":4}'),'$.b'); } {text} do_execsql_test json1-3.4 { - SELECT json_type(json_set('{"a":1,"b":2}','$$.b','{"x":3,"y":4}'),'$.b'); + SELECT json_type(json_set('{"a":1,"b":2}','$.b',json('{"x":3,"y":4}')),'$.b'); } {object} # Per rfc7159, any JSON value is allowed at the top level, and whitespace # is permitting before and/or after that value. # ADDED test/json102.test Index: test/json102.test ================================================================== --- /dev/null +++ test/json102.test @@ -0,0 +1,281 @@ +# 2015-08-12 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements tests for JSON SQL functions extension to the +# SQLite library. +# +# This file contains tests automatically generated from the json1 +# documentation. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +load_static_extension db json +do_execsql_test json102-100 { + SELECT json_object('ex','[52,3.14159]'); +} {{{"ex":"[52,3.14159]"}}} +do_execsql_test json102-110 { + SELECT json_object('ex',json('[52,3.14159]')); +} {{{"ex":[52,3.14159]}}} +do_execsql_test json102-120 { + SELECT json_object('ex',json_array(52,3.14159)); +} {{{"ex":[52,3.14159]}}} +do_execsql_test json102-130 { + SELECT json(' { "this" : "is", "a": [ "test" ] } '); +} {{{"this":"is","a":["test"]}}} +do_execsql_test json102-140 { + SELECT json_array(1,2,'3',4); +} {{[1,2,"3",4]}} +do_execsql_test json102-150 { + SELECT json_array('[1,2]'); +} {{["[1,2]"]}} +do_execsql_test json102-160 { + SELECT json_array(json_array(1,2)); +} {{[[1,2]]}} +do_execsql_test json102-170 { + SELECT json_array(1,null,'3','[4,5]','{"six":7.7}'); +} {{[1,null,"3","[4,5]","{\"six\":7.7}"]}} +do_execsql_test json102-180 { + SELECT json_array(1,null,'3',json('[4,5]'),json('{"six":7.7}')); +} {{[1,null,"3",[4,5],{"six":7.7}]}} +do_execsql_test json102-190 { + SELECT json_array_length('[1,2,3,4]'); +} {{4}} +do_execsql_test json102-200 { + SELECT json_array_length('[1,2,3,4]', '$'); +} {{4}} +do_execsql_test json102-210 { + SELECT json_array_length('[1,2,3,4]', '$[2]'); +} {{0}} +do_execsql_test json102-220 { + SELECT json_array_length('{"one":[1,2,3]}'); +} {{0}} +do_execsql_test json102-230 { + SELECT json_array_length('{"one":[1,2,3]}', '$.one'); +} {{3}} +do_execsql_test json102-240 { + SELECT json_array_length('{"one":[1,2,3]}', '$.two'); +} {{}} +do_execsql_test json102-250 { + SELECT json_extract('{"a":2,"c":[4,5,{"f":7}]}', '$'); +} {{{"a":2,"c":[4,5,{"f":7}]}}} +do_execsql_test json102-260 { + SELECT json_extract('{"a":2,"c":[4,5,{"f":7}]}', '$.c'); +} {{[4,5,{"f":7}]}} +do_execsql_test json102-270 { + SELECT json_extract('{"a":2,"c":[4,5,{"f":7}]}', '$.c[2]'); +} {{{"f":7}}} +do_execsql_test json102-280 { + SELECT json_extract('{"a":2,"c":[4,5,{"f":7}]}', '$.c[2].f'); +} {{7}} +do_execsql_test json102-290 { + SELECT json_extract('{"a":2,"c":[4,5],"f":7}','$.c','$.a'); +} {{[[4,5],2]}} +do_execsql_test json102-300 { + SELECT json_extract('{"a":2,"c":[4,5,{"f":7}]}', '$.x'); +} {{}} +do_execsql_test json102-310 { + SELECT json_extract('{"a":2,"c":[4,5,{"f":7}]}', '$.x', '$.a'); +} {{[null,2]}} +do_execsql_test json102-320 { + SELECT json_insert('{"a":2,"c":4}', '$.a', 99); +} {{{"a":2,"c":4}}} +do_execsql_test json102-330 { + SELECT json_insert('{"a":2,"c":4}', '$.e', 99); +} {{{"a":2,"c":4,"e":99}}} +do_execsql_test json102-340 { + SELECT json_replace('{"a":2,"c":4}', '$.a', 99); +} {{{"a":99,"c":4}}} +do_execsql_test json102-350 { + SELECT json_replace('{"a":2,"c":4}', '$.e', 99); +} {{{"a":2,"c":4}}} +do_execsql_test json102-360 { + SELECT json_set('{"a":2,"c":4}', '$.a', 99); +} {{{"a":99,"c":4}}} +do_execsql_test json102-370 { + SELECT json_set('{"a":2,"c":4}', '$.e', 99); +} {{{"a":2,"c":4,"e":99}}} +do_execsql_test json102-380 { + SELECT json_set('{"a":2,"c":4}', '$.c', '[97,96]'); +} {{{"a":2,"c":"[97,96]"}}} +do_execsql_test json102-390 { + SELECT json_set('{"a":2,"c":4}', '$.c', json('[97,96]')); +} {{{"a":2,"c":[97,96]}}} +do_execsql_test json102-400 { + SELECT json_set('{"a":2,"c":4}', '$.c', json_array(97,96)); +} {{{"a":2,"c":[97,96]}}} +do_execsql_test json102-410 { + SELECT json_object('a',2,'c',4); +} {{{"a":2,"c":4}}} +do_execsql_test json102-420 { + SELECT json_object('a',2,'c','{e:5}'); +} {{{"a":2,"c":"{e:5}"}}} +do_execsql_test json102-430 { + SELECT json_object('a',2,'c',json_object('e',5)); +} {{{"a":2,"c":{"e":5}}}} +do_execsql_test json102-440 { + SELECT json_remove('[0,1,2,3,4]','$[2]'); +} {{[0,1,3,4]}} +do_execsql_test json102-450 { + SELECT json_remove('[0,1,2,3,4]','$[2]','$[0]'); +} {{[1,3,4]}} +do_execsql_test json102-460 { + SELECT json_remove('[0,1,2,3,4]','$[0]','$[2]'); +} {{[1,2,4]}} +do_execsql_test json102-470 { + SELECT json_remove('{"x":25,"y":42}'); +} {{{"x":25,"y":42}}} +do_execsql_test json102-480 { + SELECT json_remove('{"x":25,"y":42}','$.z'); +} {{{"x":25,"y":42}}} +do_execsql_test json102-490 { + SELECT json_remove('{"x":25,"y":42}','$.y'); +} {{{"x":25}}} +do_execsql_test json102-500 { + SELECT json_remove('{"x":25,"y":42}','$'); +} {{}} +do_execsql_test json102-510 { + SELECT json_type('{"a":[2,3.5,true,false,null,"x"]}'); +} {{object}} +do_execsql_test json102-520 { + SELECT json_type('{"a":[2,3.5,true,false,null,"x"]}','$'); +} {{object}} +do_execsql_test json102-530 { + SELECT json_type('{"a":[2,3.5,true,false,null,"x"]}','$.a'); +} {{array}} +do_execsql_test json102-540 { + SELECT json_type('{"a":[2,3.5,true,false,null,"x"]}','$.a[0]'); +} {{integer}} +do_execsql_test json102-550 { + SELECT json_type('{"a":[2,3.5,true,false,null,"x"]}','$.a[1]'); +} {{real}} +do_execsql_test json102-560 { + SELECT json_type('{"a":[2,3.5,true,false,null,"x"]}','$.a[2]'); +} {{true}} +do_execsql_test json102-570 { + SELECT json_type('{"a":[2,3.5,true,false,null,"x"]}','$.a[3]'); +} {{false}} +do_execsql_test json102-580 { + SELECT json_type('{"a":[2,3.5,true,false,null,"x"]}','$.a[4]'); +} {{null}} +do_execsql_test json102-590 { + SELECT json_type('{"a":[2,3.5,true,false,null,"x"]}','$.a[5]'); +} {{text}} +do_execsql_test json102-600 { + SELECT json_type('{"a":[2,3.5,true,false,null,"x"]}','$.a[6]'); +} {{}} +do_execsql_test json102-610 { + SELECT json_valid(char(123)||'"x":35'||char(125)); +} {{1}} +do_execsql_test json102-620 { + SELECT json_valid(char(123)||'"x":35'); +} {{0}} + +ifcapable vtab { +do_execsql_test json102-1000 { + CREATE TABLE user(name,phone); + INSERT INTO user(name,phone) VALUES + ('Alice','["919-555-2345","804-555-3621"]'), + ('Bob','["201-555-8872"]'), + ('Cindy','["704-555-9983"]'), + ('Dave','["336-555-8421","704-555-4321","803-911-4421"]'); + SELECT DISTINCT user.name + FROM user, json_each(user.phone) + WHERE json_each.value LIKE '704-%' + ORDER BY 1; +} {Cindy Dave} + +do_execsql_test json102-1010 { + UPDATE user + SET phone=json_extract(phone,'$[0]') + WHERE json_array_length(phone)<2; + SELECT name, substr(phone,1,5) FROM user ORDER BY name; +} {Alice {["919} Bob 201-5 Cindy 704-5 Dave {["336}} +do_execsql_test json102-1011 { + SELECT name FROM user WHERE phone LIKE '704-%' + UNION + SELECT user.name + FROM user, json_each(user.phone) + WHERE json_valid(user.phone) + AND json_each.value LIKE '704-%'; +} {Cindy Dave} + +do_execsql_test json102-1100 { + CREATE TABLE big(json JSON); + INSERT INTO big(json) VALUES('{ + "id":123, + "stuff":[1,2,3,4], + "partlist":[ + {"uuid":"bb108722-572e-11e5-9320-7f3b63a4ca74"}, + {"uuid":"c690dc14-572e-11e5-95f9-dfc8861fd535"}, + {"subassembly":[ + {"uuid":"6fa5181e-5721-11e5-a04e-57f3d7b32808"} + ]} + ] + }'); + INSERT INTO big(json) VALUES('{ + "id":456, + "stuff":["hello","world","xyzzy"], + "partlist":[ + {"uuid":false}, + {"uuid":"c690dc14-572e-11e5-95f9-dfc8861fd535"} + ] + }'); +} {} +set correct_answer [list \ + 1 {$.id} 123 \ + 1 {$.stuff[0]} 1 \ + 1 {$.stuff[1]} 2 \ + 1 {$.stuff[2]} 3 \ + 1 {$.stuff[3]} 4 \ + 1 {$.partlist[0].uuid} bb108722-572e-11e5-9320-7f3b63a4ca74 \ + 1 {$.partlist[1].uuid} c690dc14-572e-11e5-95f9-dfc8861fd535 \ + 1 {$.partlist[2].subassembly[0].uuid} 6fa5181e-5721-11e5-a04e-57f3d7b32808 \ + 2 {$.id} 456 \ + 2 {$.stuff[0]} hello \ + 2 {$.stuff[1]} world \ + 2 {$.stuff[2]} xyzzy \ + 2 {$.partlist[0].uuid} 0 \ + 2 {$.partlist[1].uuid} c690dc14-572e-11e5-95f9-dfc8861fd535] +do_execsql_test json102-1110 { + SELECT big.rowid, fullkey, value + FROM big, json_tree(big.json) + WHERE json_tree.type NOT IN ('object','array') + ORDER BY +big.rowid, +json_tree.id +} $correct_answer +do_execsql_test json102-1120 { + SELECT big.rowid, fullkey, atom + FROM big, json_tree(big.json) + WHERE atom IS NOT NULL + ORDER BY +big.rowid, +json_tree.id +} $correct_answer + +do_execsql_test json102-1130 { + SELECT DISTINCT json_extract(big.json,'$.id') + FROM big, json_tree(big.json,'$.partlist') + WHERE json_tree.key='uuid' + AND json_tree.value='6fa5181e-5721-11e5-a04e-57f3d7b32808'; +} {123} +do_execsql_test json102-1131 { + SELECT DISTINCT json_extract(big.json,'$.id') + FROM big, json_tree(big.json,'$') + WHERE json_tree.key='uuid' + AND json_tree.value='6fa5181e-5721-11e5-a04e-57f3d7b32808'; +} {123} +do_execsql_test json102-1132 { + SELECT DISTINCT json_extract(big.json,'$.id') + FROM big, json_tree(big.json) + WHERE json_tree.key='uuid' + AND json_tree.value='6fa5181e-5721-11e5-a04e-57f3d7b32808'; +} {123} +} ;# end ifcapable vtab + +finish_test Index: test/pragma.test ================================================================== --- test/pragma.test +++ test/pragma.test @@ -81,12 +81,12 @@ db close delete_file test.db test.db-journal delete_file test3.db test3.db-journal sqlite3 db test.db; set DB [sqlite3_connection_pointer db] -# EVIDENCE-OF: R-24197-42751 PRAGMA database.cache_size; PRAGMA -# database.cache_size = pages; PRAGMA database.cache_size = -kibibytes; +# EVIDENCE-OF: R-13861-56665 PRAGMA schema.cache_size; PRAGMA +# schema.cache_size = pages; PRAGMA schema.cache_size = -kibibytes; # Query or change the suggested maximum number of database disk pages # that SQLite will hold in memory at once per open database file. # ifcapable pager_pragmas { set DFLT_CACHE_SZ [db one {PRAGMA default_cache_size}] @@ -695,12 +695,12 @@ capture_pragma db out {PRAGMA index_xinfo(t3i1)} db eval {SELECT seqno, cid, name FROM out ORDER BY seqno} } {0 0 a 1 1 b 2 -1 {}} -# EVIDENCE-OF: R-62725-03366 PRAGMA database.index_info(index-name); -# This pragma returns one row for each key column in the named index. +# EVIDENCE-OF: R-29448-60346 PRAGMA schema.index_info(index-name); This +# pragma returns one row for each key column in the named index. # # (The first column of output from PRAGMA index_info is...) # EVIDENCE-OF: R-34186-52914 The rank of the column within the index. (0 # means left-most.) # @@ -782,13 +782,13 @@ ] } ;# ifcapable schema_pragmas # Miscellaneous tests # ifcapable schema_pragmas { -# EVIDENCE-OF: R-63500-32024 PRAGMA database.index_list(table-name); -# This pragma returns one row for each index associated with the given -# table. +# EVIDENCE-OF: R-64103-17776 PRAGMA schema.index_list(table-name); This +# pragma returns one row for each index associated with the given table. +# do_test pragma-7.1.1 { # Make sure a pragma knows to read the schema if it needs to db close sqlite3 db test.db capture_pragma db out "PRAGMA index_list(t3)" @@ -1377,12 +1377,12 @@ ifcapable pager_pragmas { db close forcedelete test.db sqlite3 db test.db - # EVIDENCE-OF: R-13905-26312 PRAGMA database.page_count; Return the - # total number of pages in the database file. + # EVIDENCE-OF: R-15672-33611 PRAGMA schema.page_count; Return the total + # number of pages in the database file. # do_test pragma-14.1 { execsql { pragma auto_vacuum = 0 } execsql { pragma page_count; pragma main.page_count } } {0 0} @@ -1920,12 +1920,12 @@ } capture_pragma db2 out {PRAGMA index_info(i2)} db2 eval {SELECT cid, name, '|' FROM out ORDER BY seqno} } {2 c | 3 d | 1 b |} -# EVIDENCE-OF: R-44874-46325 PRAGMA database.index_xinfo(index-name); -# This pragma returns information about every column in an index. +# EVIDENCE-OF: R-56143-29319 PRAGMA schema.index_xinfo(index-name); This +# pragma returns information about every column in an index. # # EVIDENCE-OF: R-45970-35618 Unlike this index_info pragma, this pragma # returns information about every column in the index, not just the key # columns. # @@ -1964,13 +1964,12 @@ } {0 2 c 0 BINARY 1 1 3 d 0 BINARY 1 2 1 b 0 BINARY 1 3 -1 {} 0 BINARY 0} do_test 23.2d { db2 eval {PRAGMA index_xinfo(i2x)} } {0 3 d 0 nocase 1 1 2 c 1 BINARY 1 2 -1 {} 0 BINARY 0} -# EVIDENCE-OF: R-63500-32024 PRAGMA database.index_list(table-name); -# This pragma returns one row for each index associated with the given -# table. +# EVIDENCE-OF: R-64103-17776 PRAGMA schema.index_list(table-name); This +# pragma returns one row for each index associated with the given table. # # (The first column of output from PRAGMA index_list is...) # EVIDENCE-OF: R-02753-24748 A sequence number assigned to each index # for internal tracking purposes. # Index: test/pragma2.test ================================================================== --- test/pragma2.test +++ test/pragma2.test @@ -40,11 +40,11 @@ delete_file test3.db test3.db-journal sqlite3 db test.db; set DB [sqlite3_connection_pointer db] db eval {PRAGMA auto_vacuum=0} -# EVIDENCE-OF: R-17887-14874 PRAGMA database.freelist_count; Return the +# EVIDENCE-OF: R-11211-21323 PRAGMA schema.freelist_count; Return the # number of unused pages in the database file. # do_test pragma2-1.1 { execsql { PRAGMA freelist_count; Index: test/releasetest.tcl ================================================================== --- test/releasetest.tcl +++ test/releasetest.tcl @@ -290,11 +290,11 @@ # set LOG [open releasetest-out.txt w] proc PUTS {args} { if {[llength $args]==2} { puts [lindex $args 0] [lindex $args 1] - puts [lindex $args 0] $::LOG [lindex $args 1] + puts $::LOG [lindex $args 1] } else { puts [lindex $args 0] puts $::LOG [lindex $args 0] } } @@ -708,12 +708,13 @@ set hr [expr {$elapsetime/3600}] set min [expr {($elapsetime/60)%60}] set sec [expr {$elapsetime%60}] set etime [format (%02d:%02d:%02d) $hr $min $sec] PUTS [string repeat * 79] + incr ::NERRCASE $::NERR PUTS "$::NERRCASE failures out of $::NTESTCASE tests in $etime" if {$::SQLITE_VERSION ne ""} { PUTS "SQLite $::SQLITE_VERSION" } } main $argv Index: test/rowid.test ================================================================== --- test/rowid.test +++ test/rowid.test @@ -142,10 +142,12 @@ set sql "UPDATE t1 SET x=3 WHERE _rowid_==$x2rowid(3)" execsql $sql execsql {SELECT x FROM t1 ORDER BY x} } {1 3 5 7 9} +if 0 { # With the index-on-expressions enhancement, creating + # an index on ROWID has become possible. # We cannot index by ROWID # do_test rowid-2.9 { set v [catch {execsql {CREATE INDEX idxt1 ON t1(rowid)}} msg] lappend v $msg @@ -160,10 +162,11 @@ } {1 {table t1 has no column named oid}} do_test rowid-2.12 { set v [catch {execsql {CREATE INDEX idxt1 ON t1(x, rowid)}} msg] lappend v $msg } {1 {table t1 has no column named rowid}} +} # Columns defined in the CREATE statement override the buildin ROWID # column names. # do_test rowid-3.1 { ADDED test/subtype1.test Index: test/subtype1.test ================================================================== --- /dev/null +++ test/subtype1.test @@ -0,0 +1,31 @@ +# 2015-09-10 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements tests for sqlite3_value_subtype() and +# sqlite3_result_subtype() interfaces. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +do_execsql_test subtype1-100 { + SELECT test_getsubtype('hello'); +} {0} +do_execsql_test subtype1-110 { + SELECT test_getsubtype(test_setsubtype('hello',123)); +} {123} +do_execsql_test subtype1-120 { + SELECT typeof(test_setsubtype('hello',123)); +} {text} +do_execsql_test subtype1-130 { + SELECT test_setsubtype('hello',123); +} {hello} + +finish_test Index: test/tabfunc01.test ================================================================== --- test/tabfunc01.test +++ test/tabfunc01.test @@ -67,7 +67,20 @@ do_execsql_test tabfunc01-3.1 { SELECT DISTINCT value FROM generate_series(1,x), t1 ORDER BY 1; } {1 2 3} +# Eponymous virtual table exists in the "main" schema only +# +do_execsql_test tabfunc01-4.1 { + SELECT * FROM main.generate_series(1,4) +} {1 2 3 4} +do_catchsql_test tabfunc01-4.2 { + SELECT * FROM temp.generate_series(1,4) +} {1 {no such table: temp.generate_series}} +do_catchsql_test tabfunc01-4.3 { + ATTACH ':memory:' AS aux1; + CREATE TABLE aux1.t1(a,b,c); + SELECT * FROM aux1.generate_series(1,4) +} {1 {no such table: aux1.generate_series}} finish_test Index: test/table.test ================================================================== --- test/table.test +++ test/table.test @@ -820,8 +820,20 @@ } {1 {string or blob too big}} do_execsql_test table-18.2 { COMMIT; PRAGMA integrity_check; } {ok} + +# 2015-09-09 +# Ticket [https://www.sqlite.org/src/info/acd12990885d9276] +# "CREATE TABLE ... AS SELECT ... FROM sqlite_master" fails because the row +# in the sqlite_master table for the next table is initially populated +# with a NULL instead of a record created by OP_Record. +# +do_execsql_test table-19.1 { + CREATE TABLE t19 AS SELECT * FROM sqlite_master; + SELECT name FROM t19 ORDER BY name; +} {{} savepoint t10 t11 t12 t13 t16 t2 t3 t3\"xyz t4\"abc t7 t8 t9 tablet8 test1 weird} + finish_test Index: test/where.test ================================================================== --- test/where.test +++ test/where.test @@ -40,12 +40,12 @@ INSERT INTO t2 SELECT 101-w, x, $maxy+1-y, y FROM t1; " } execsql { - CREATE INDEX i1w ON t1(w); - CREATE INDEX i1xy ON t1(x,y); + CREATE INDEX i1w ON t1("w"); -- Verify quoted identifier names + CREATE INDEX i1xy ON t1(`x`,'y' ASC); -- Old MySQL compatibility CREATE INDEX i2p ON t2(p); CREATE INDEX i2r ON t2(r); CREATE INDEX i2qs ON t2(q, s); } } {} Index: test/where4.test ================================================================== --- test/where4.test +++ test/where4.test @@ -134,11 +134,11 @@ execsql { CREATE TABLE t2(a); INSERT INTO t2 VALUES(1); INSERT INTO t2 VALUES(2); INSERT INTO t2 VALUES(3); - CREATE TABLE t3(x,y,UNIQUE(x,y)); + CREATE TABLE t3(x,y,UNIQUE("x",'y' ASC)); -- Goofy syntax allowed INSERT INTO t3 VALUES(1,11); INSERT INTO t3 VALUES(2,NULL); SELECT * FROM t2 LEFT JOIN t3 ON a=x WHERE +y IS NULL; } @@ -198,11 +198,12 @@ # Ticket #2273. Problems with IN operators and NULLs. # ifcapable subquery { do_test where4-5.1 { execsql { - CREATE TABLE t4(x,y,z,PRIMARY KEY(x,y)); + -- Allow the 'x' syntax for backwards compatibility + CREATE TABLE t4(x,y,z,PRIMARY KEY('x' ASC, "y" ASC)); } execsql { SELECT * FROM t2 LEFT JOIN t4 b1 LEFT JOIN t4 b2 ON b2.x=b1.x AND b2.y IN (b1.y); @@ -302,6 +303,5 @@ finish_test - Index: tool/lemon.c ================================================================== --- tool/lemon.c +++ tool/lemon.c @@ -53,11 +53,11 @@ /* ** Compilers are starting to complain about the use of sprintf() and strcpy(), ** saying they are unsafe. So we define our own versions of those routines too. ** ** There are three routines here: lemon_sprintf(), lemon_vsprintf(), and -** lemon_addtext(). The first two are replacements for sprintf() and vsprintf(). +** lemon_addtext(). The first two are replacements for sprintf() and vsprintf(). ** The third is a helper routine for vsnprintf() that adds texts to the end of a ** buffer, making sure the buffer is always zero-terminated. ** ** The string formatter is a minimal subset of stdlib sprintf() supporting only ** a few simply conversions: @@ -314,11 +314,12 @@ SSCONFLICT, /* A shift/shift conflict */ SRCONFLICT, /* Was a reduce, but part of a conflict */ RRCONFLICT, /* Was a reduce, but part of a conflict */ SH_RESOLVED, /* Was a shift. Precedence resolved conflict */ RD_RESOLVED, /* Was reduce. Precedence resolved conflict */ - NOT_USED /* Deleted by compression */ + NOT_USED, /* Deleted by compression */ + SHIFTREDUCE /* Shift first, then reduce */ }; /* Every shift or reduce operation is stored as one of the following */ struct action { struct symbol *sp; /* The look-ahead symbol */ @@ -338,11 +339,13 @@ struct config *cfp; /* All configurations in this set */ int statenum; /* Sequential number for this state */ struct action *ap; /* Array of actions for this state */ int nTknAct, nNtAct; /* Number of actions on terminals and nonterminals */ int iTknOfst, iNtOfst; /* yy_action[] offset for terminals and nonterms */ - int iDflt; /* Default action */ + int iDfltReduce; /* Default action is to REDUCE by this rule */ + struct rule *pDfltReduce;/* The default REDUCE rule. */ + int autoReduce; /* True if this is an auto-reduce state */ }; #define NO_OFFSET (-2147483647) /* A followset propagation link indicates that the contents of one ** configuration followset should be propagated to another whenever @@ -358,10 +361,11 @@ ** of as begin global variables in the program.) */ struct lemon { struct state **sorted; /* Table of states sorted by state number */ struct rule *rule; /* List of all rules */ int nstate; /* Number of states */ + int nxstate; /* nstate with tail degenerate states removed */ int nrule; /* Number of rules */ int nsymbol; /* Number of terminal and nonterminal symbols */ int nterminal; /* Number of terminal symbols */ struct symbol **symbols; /* Sorted array of pointers to symbols */ int errorcnt; /* Number of errors */ @@ -383,11 +387,12 @@ char *vardest; /* Code for the default non-terminal destructor */ char *filename; /* Name of the input file */ char *outname; /* Name of the current output file */ char *tokenprefix; /* A prefix added to token names in the .h file */ int nconflict; /* Number of parsing conflicts */ - int tablesize; /* Size of the parse tables */ + int nactiontab; /* Number of entries in the yy_action[] table */ + int tablesize; /* Total table size of all tables in bytes */ int basisflag; /* Print only basis configurations */ int has_fallback; /* True if any %fallback is seen in the grammar */ int nolinenosflag; /* True if #line statements should not be printed */ char *argv0; /* Name of the program */ }; @@ -481,11 +486,11 @@ int rc; rc = ap1->sp->index - ap2->sp->index; if( rc==0 ){ rc = (int)ap1->type - (int)ap2->type; } - if( rc==0 && ap1->type==REDUCE ){ + if( rc==0 && (ap1->type==REDUCE || ap1->type==SHIFTREDUCE) ){ rc = ap1->x.rp->index - ap2->x.rp->index; } if( rc==0 ){ rc = (int) (ap2 - ap1); } @@ -1373,18 +1378,20 @@ return; } /* Sort the configuration list */ void Configlist_sort(){ - current = (struct config *)msort((char *)current,(char **)&(current->next),Configcmp); + current = (struct config*)msort((char*)current,(char**)&(current->next), + Configcmp); currentend = 0; return; } /* Sort the basis configuration list */ void Configlist_sortbasis(){ - basis = (struct config *)msort((char *)current,(char **)&(current->bp),Configcmp); + basis = (struct config*)msort((char*)current,(char**)&(current->bp), + Configcmp); basisend = 0; return; } /* Return a pointer to the head of the configuration list and @@ -1477,10 +1484,22 @@ if( user_templatename==0 ){ memory_error(); } lemon_strcpy(user_templatename, z); } + +/* forward reference */ +static const char *minimum_size_type(int lwr, int upr, int *pnByte); + +/* Print a single line of the "Parser Stats" output +*/ +static void stats_line(const char *zLabel, int iValue){ + int nLabel = lemonStrlen(zLabel); + printf(" %s%.*s %5d\n", zLabel, + 35-nLabel, "................................", + iValue); +} /* The main program. Parse the command line and do it... */ int main(int argc, char **argv) { static int version = 0; @@ -1609,14 +1628,19 @@ ** omitted if the "-m" option is used because makeheaders will ** generate the file for us.) */ if( !mhflag ) ReportHeader(&lem); } if( statistics ){ - printf("Parser statistics: %d terminals, %d nonterminals, %d rules\n", - lem.nterminal, lem.nsymbol - lem.nterminal, lem.nrule); - printf(" %d states, %d parser table entries, %d conflicts\n", - lem.nstate, lem.tablesize, lem.nconflict); + printf("Parser statistics:\n"); + stats_line("terminal symbols", lem.nterminal); + stats_line("non-terminal symbols", lem.nsymbol - lem.nterminal); + stats_line("total symbols", lem.nsymbol); + stats_line("rules", lem.nrule); + stats_line("states", lem.nxstate); + stats_line("conflicts", lem.nconflict); + stats_line("action table entries", lem.nactiontab); + stats_line("total table size (bytes)", lem.tablesize); } if( lem.nconflict > 0 ){ fprintf(stderr,"%d parsing conflicts.\n",lem.nconflict); } @@ -1871,11 +1895,12 @@ case OPT_DBL: case OPT_FDBL: dv = strtod(cp,&end); if( *end ){ if( err ){ - fprintf(err,"%sillegal character in floating-point argument.\n",emsg); + fprintf(err, + "%sillegal character in floating-point argument.\n",emsg); errline(i,(int)((char*)end-(char*)argv[i]),err); } errcnt++; } break; @@ -2937,19 +2962,18 @@ /* if( rp->code ) printf("\n %s",rp->code); */ printf("\n"); } } -void ConfigPrint(FILE *fp, struct config *cfp) -{ - struct rule *rp; +/* Print a single rule. +*/ +void RulePrint(FILE *fp, struct rule *rp, int iCursor){ struct symbol *sp; int i, j; - rp = cfp->rp; fprintf(fp,"%s ::=",rp->lhs->name); for(i=0; i<=rp->nrhs; i++){ - if( i==cfp->dot ) fprintf(fp," *"); + if( i==iCursor ) fprintf(fp," *"); if( i==rp->nrhs ) break; sp = rp->rhs[i]; if( sp->type==MULTITERMINAL ){ fprintf(fp," %s", sp->subsym[0]->name); for(j=1; jnsubsym; j++){ @@ -2958,10 +2982,16 @@ }else{ fprintf(fp," %s", sp->name); } } } + +/* Print the rule for a configuration. +*/ +void ConfigPrint(FILE *fp, struct config *cfp){ + RulePrint(fp, cfp->rp, cfp->dot); +} /* #define TEST */ #if 0 /* Print a set */ PRIVATE void SetPrint(out,set,lemp) @@ -2998,45 +3028,60 @@ #endif /* Print an action to the given file descriptor. Return FALSE if ** nothing was actually printed. */ -int PrintAction(struct action *ap, FILE *fp, int indent){ +int PrintAction( + struct action *ap, /* The action to print */ + FILE *fp, /* Print the action here */ + int indent /* Indent by this amount */ +){ int result = 1; switch( ap->type ){ - case SHIFT: - fprintf(fp,"%*s shift %d",indent,ap->sp->name,ap->x.stp->statenum); + case SHIFT: { + struct state *stp = ap->x.stp; + fprintf(fp,"%*s shift %-7d",indent,ap->sp->name,stp->statenum); + break; + } + case REDUCE: { + struct rule *rp = ap->x.rp; + fprintf(fp,"%*s reduce %-7d",indent,ap->sp->name,rp->index); + RulePrint(fp, rp, -1); break; - case REDUCE: - fprintf(fp,"%*s reduce %d",indent,ap->sp->name,ap->x.rp->index); + } + case SHIFTREDUCE: { + struct rule *rp = ap->x.rp; + fprintf(fp,"%*s shift-reduce %-7d",indent,ap->sp->name,rp->index); + RulePrint(fp, rp, -1); break; + } case ACCEPT: fprintf(fp,"%*s accept",indent,ap->sp->name); break; case ERROR: fprintf(fp,"%*s error",indent,ap->sp->name); break; case SRCONFLICT: case RRCONFLICT: - fprintf(fp,"%*s reduce %-3d ** Parsing conflict **", + fprintf(fp,"%*s reduce %-7d ** Parsing conflict **", indent,ap->sp->name,ap->x.rp->index); break; case SSCONFLICT: - fprintf(fp,"%*s shift %-3d ** Parsing conflict **", + fprintf(fp,"%*s shift %-7d ** Parsing conflict **", indent,ap->sp->name,ap->x.stp->statenum); break; case SH_RESOLVED: if( showPrecedenceConflict ){ - fprintf(fp,"%*s shift %-3d -- dropped by precedence", + fprintf(fp,"%*s shift %-7d -- dropped by precedence", indent,ap->sp->name,ap->x.stp->statenum); }else{ result = 0; } break; case RD_RESOLVED: if( showPrecedenceConflict ){ - fprintf(fp,"%*s reduce %-3d -- dropped by precedence", + fprintf(fp,"%*s reduce %-7d -- dropped by precedence", indent,ap->sp->name,ap->x.rp->index); }else{ result = 0; } break; @@ -3045,11 +3090,11 @@ break; } return result; } -/* Generate the "y.output" log file */ +/* Generate the "*.out" log file */ void ReportOutput(struct lemon *lemp) { int i; struct state *stp; struct config *cfp; @@ -3056,11 +3101,11 @@ struct action *ap; FILE *fp; fp = file_open(lemp,".out","wb"); if( fp==0 ) return; - for(i=0; instate; i++){ + for(i=0; inxstate; i++){ stp = lemp->sorted[i]; fprintf(fp,"State %d:\n",stp->statenum); if( lemp->basisflag ) cfp=stp->bp; else cfp=stp->cfp; while( cfp ){ @@ -3164,14 +3209,15 @@ */ PRIVATE int compute_action(struct lemon *lemp, struct action *ap) { int act; switch( ap->type ){ - case SHIFT: act = ap->x.stp->statenum; break; - case REDUCE: act = ap->x.rp->index + lemp->nstate; break; - case ERROR: act = lemp->nstate + lemp->nrule; break; - case ACCEPT: act = lemp->nstate + lemp->nrule + 1; break; + case SHIFT: act = ap->x.stp->statenum; break; + case SHIFTREDUCE: act = ap->x.rp->index + lemp->nstate; break; + case REDUCE: act = ap->x.rp->index + lemp->nstate+lemp->nrule; break; + case ERROR: act = lemp->nstate + lemp->nrule*2; break; + case ACCEPT: act = lemp->nstate + lemp->nrule*2 + 1; break; default: act = -1; break; } return act; } @@ -3226,11 +3272,12 @@ lemp->errorcnt++; return 0; } in = fopen(user_templatename,"rb"); if( in==0 ){ - fprintf(stderr,"Can't open the template file \"%s\".\n",user_templatename); + fprintf(stderr,"Can't open the template file \"%s\".\n", + user_templatename); lemp->errorcnt++; return 0; } return in; } @@ -3311,11 +3358,14 @@ if( cp==0 ) return; fprintf(out,"{\n"); (*lineno)++; }else if( sp->destructor ){ cp = sp->destructor; fprintf(out,"{\n"); (*lineno)++; - if (!lemp->nolinenosflag) { (*lineno)++; tplt_linedir(out,sp->destLineno,lemp->filename); } + if( !lemp->nolinenosflag ){ + (*lineno)++; + tplt_linedir(out,sp->destLineno,lemp->filename); + } }else if( lemp->vardest ){ cp = lemp->vardest; if( cp==0 ) return; fprintf(out,"{\n"); (*lineno)++; }else{ @@ -3508,17 +3558,23 @@ ){ const char *cp; /* Generate code to do the reduce action */ if( rp->code ){ - if (!lemp->nolinenosflag) { (*lineno)++; tplt_linedir(out,rp->line,lemp->filename); } + if( !lemp->nolinenosflag ){ + (*lineno)++; + tplt_linedir(out,rp->line,lemp->filename); + } fprintf(out,"{%s",rp->code); for(cp=rp->code; *cp; cp++){ if( *cp=='\n' ) (*lineno)++; } /* End loop */ fprintf(out,"}\n"); (*lineno)++; - if (!lemp->nolinenosflag) { (*lineno)++; tplt_linedir(out,*lineno,lemp->outname); } + if( !lemp->nolinenosflag ){ + (*lineno)++; + tplt_linedir(out,*lineno,lemp->outname); + } } /* End if( rp->code ) */ return; } @@ -3645,28 +3701,36 @@ *plineno = lineno; } /* ** Return the name of a C datatype able to represent values between -** lwr and upr, inclusive. +** lwr and upr, inclusive. If pnByte!=NULL then also write the sizeof +** for that type (1, 2, or 4) into *pnByte. */ -static const char *minimum_size_type(int lwr, int upr){ +static const char *minimum_size_type(int lwr, int upr, int *pnByte){ + const char *zType = "int"; + int nByte = 4; if( lwr>=0 ){ if( upr<=255 ){ - return "unsigned char"; + zType = "unsigned char"; + nByte = 1; }else if( upr<65535 ){ - return "unsigned short int"; + zType = "unsigned short int"; + nByte = 2; }else{ - return "unsigned int"; + zType = "unsigned int"; + nByte = 4; } }else if( lwr>=-127 && upr<=127 ){ - return "signed char"; + zType = "signed char"; + nByte = 1; }else if( lwr>=-32767 && upr<32767 ){ - return "short"; - }else{ - return "int"; + zType = "short"; + nByte = 2; } + if( pnByte ) *pnByte = nByte; + return zType; } /* ** Each state contains a set of token transaction and a set of ** nonterminal transactions. Each of these sets makes an instance @@ -3687,11 +3751,11 @@ struct axset *p1 = (struct axset*)a; struct axset *p2 = (struct axset*)b; int c; c = p2->nAction - p1->nAction; if( c==0 ){ - c = p2->iOrder - p1->iOrder; + c = p1->iOrder - p2->iOrder; } assert( c!=0 || p1==p2 ); return c; } @@ -3726,11 +3790,13 @@ int lineno; struct state *stp; struct action *ap; struct rule *rp; struct acttab *pActtab; - int i, j, n; + int i, j, n, sz; + int szActionType; /* sizeof(YYACTIONTYPE) */ + int szCodeType; /* sizeof(YYCODETYPE) */ const char *name; int mnTknOfst, mxTknOfst; int mnNtOfst, mxNtOfst; struct axset *ax; @@ -3767,14 +3833,14 @@ } tplt_xfer(lemp->name,in,out,&lineno); /* Generate the defines */ fprintf(out,"#define YYCODETYPE %s\n", - minimum_size_type(0, lemp->nsymbol+1)); lineno++; + minimum_size_type(0, lemp->nsymbol+1, &szCodeType)); lineno++; fprintf(out,"#define YYNOCODE %d\n",lemp->nsymbol+1); lineno++; fprintf(out,"#define YYACTIONTYPE %s\n", - minimum_size_type(0, lemp->nstate+lemp->nrule+5)); lineno++; + minimum_size_type(0,lemp->nstate+lemp->nrule*2+5,&szActionType)); lineno++; if( lemp->wildcard ){ fprintf(out,"#define YYWILDCARD %d\n", lemp->wildcard->index); lineno++; } print_stack_union(out,lemp,&lineno,mhflag); @@ -3806,40 +3872,28 @@ fprintf(out,"#define %sARG_STORE\n",name); lineno++; } if( mhflag ){ fprintf(out,"#endif\n"); lineno++; } - fprintf(out,"#define YYNSTATE %d\n",lemp->nstate); lineno++; - fprintf(out,"#define YYNRULE %d\n",lemp->nrule); lineno++; if( lemp->errsym->useCnt ){ - fprintf(out,"#define YYERRORSYMBOL %d\n",lemp->errsym->index); lineno++; - fprintf(out,"#define YYERRSYMDT yy%d\n",lemp->errsym->dtnum); lineno++; + fprintf(out,"#define YYERRORSYMBOL %d\n",lemp->errsym->index); lineno++; + fprintf(out,"#define YYERRSYMDT yy%d\n",lemp->errsym->dtnum); lineno++; } if( lemp->has_fallback ){ fprintf(out,"#define YYFALLBACK 1\n"); lineno++; } - tplt_xfer(lemp->name,in,out,&lineno); - - /* Generate the action table and its associates: - ** - ** yy_action[] A single table containing all actions. - ** yy_lookahead[] A table containing the lookahead for each entry in - ** yy_action. Used to detect hash collisions. - ** yy_shift_ofst[] For each state, the offset into yy_action for - ** shifting terminals. - ** yy_reduce_ofst[] For each state, the offset into yy_action for - ** shifting non-terminals after a reduce. - ** yy_default[] Default action for each state. + + /* Compute the action table, but do not output it yet. The action + ** table must be computed before generating the YYNSTATE macro because + ** we need to know how many states can be eliminated. */ - - /* Compute the actions on all states and count them up */ - ax = (struct axset *) calloc(lemp->nstate*2, sizeof(ax[0])); + ax = (struct axset *) calloc(lemp->nxstate*2, sizeof(ax[0])); if( ax==0 ){ fprintf(stderr,"malloc failed\n"); exit(1); } - for(i=0; instate; i++){ + for(i=0; inxstate; i++){ stp = lemp->sorted[i]; ax[i*2].stp = stp; ax[i*2].isTkn = 1; ax[i*2].nAction = stp->nTknAct; ax[i*2+1].stp = stp; @@ -3846,19 +3900,16 @@ ax[i*2+1].isTkn = 0; ax[i*2+1].nAction = stp->nNtAct; } mxTknOfst = mnTknOfst = 0; mxNtOfst = mnNtOfst = 0; - - /* Compute the action table. In order to try to keep the size of the - ** action table to a minimum, the heuristic of placing the largest action - ** sets first is used. - */ - for(i=0; instate*2; i++) ax[i].iOrder = i; - qsort(ax, lemp->nstate*2, sizeof(ax[0]), axset_compare); + /* In an effort to minimize the action table size, use the heuristic + ** of placing the largest action sets first */ + for(i=0; inxstate*2; i++) ax[i].iOrder = i; + qsort(ax, lemp->nxstate*2, sizeof(ax[0]), axset_compare); pActtab = acttab_alloc(); - for(i=0; instate*2 && ax[i].nAction>0; i++){ + for(i=0; inxstate*2 && ax[i].nAction>0; i++){ stp = ax[i].stp; if( ax[i].isTkn ){ for(ap=stp->ap; ap; ap=ap->next){ int action; if( ap->sp->index>=lemp->nterminal ) continue; @@ -3880,15 +3931,54 @@ } stp->iNtOfst = acttab_insert(pActtab); if( stp->iNtOfstiNtOfst; if( stp->iNtOfst>mxNtOfst ) mxNtOfst = stp->iNtOfst; } +#if 0 /* Uncomment for a trace of how the yy_action[] table fills out */ + { int jj, nn; + for(jj=nn=0; jjnAction; jj++){ + if( pActtab->aAction[jj].action<0 ) nn++; + } + printf("%4d: State %3d %s n: %2d size: %5d freespace: %d\n", + i, stp->statenum, ax[i].isTkn ? "Token" : "Var ", + ax[i].nAction, pActtab->nAction, nn); + } +#endif } free(ax); + + /* Finish rendering the constants now that the action table has + ** been computed */ + fprintf(out,"#define YYNSTATE %d\n",lemp->nxstate); lineno++; + fprintf(out,"#define YYNRULE %d\n",lemp->nrule); lineno++; + fprintf(out,"#define YY_MAX_SHIFT %d\n",lemp->nxstate-1); lineno++; + fprintf(out,"#define YY_MIN_SHIFTREDUCE %d\n",lemp->nstate); lineno++; + i = lemp->nstate + lemp->nrule; + fprintf(out,"#define YY_MAX_SHIFTREDUCE %d\n", i-1); lineno++; + fprintf(out,"#define YY_MIN_REDUCE %d\n", i); lineno++; + i = lemp->nstate + lemp->nrule*2; + fprintf(out,"#define YY_MAX_REDUCE %d\n", i-1); lineno++; + fprintf(out,"#define YY_ERROR_ACTION %d\n", i); lineno++; + fprintf(out,"#define YY_ACCEPT_ACTION %d\n", i+1); lineno++; + fprintf(out,"#define YY_NO_ACTION %d\n", i+2); lineno++; + tplt_xfer(lemp->name,in,out,&lineno); + + /* Now output the action table and its associates: + ** + ** yy_action[] A single table containing all actions. + ** yy_lookahead[] A table containing the lookahead for each entry in + ** yy_action. Used to detect hash collisions. + ** yy_shift_ofst[] For each state, the offset into yy_action for + ** shifting terminals. + ** yy_reduce_ofst[] For each state, the offset into yy_action for + ** shifting non-terminals after a reduce. + ** yy_default[] Default action for each state. + */ /* Output the yy_action table */ - n = acttab_size(pActtab); + lemp->nactiontab = n = acttab_size(pActtab); + lemp->tablesize += n*szActionType; fprintf(out,"#define YY_ACTTAB_COUNT (%d)\n", n); lineno++; fprintf(out,"static const YYACTIONTYPE yy_action[] = {\n"); lineno++; for(i=j=0; instate + lemp->nrule + 2; @@ -3902,10 +3992,11 @@ } } fprintf(out, "};\n"); lineno++; /* Output the yy_lookahead table */ + lemp->tablesize += n*szCodeType; fprintf(out,"static const YYCODETYPE yy_lookahead[] = {\n"); lineno++; for(i=j=0; insymbol; if( j==0 ) fprintf(out," /* %5d */ ", i); @@ -3919,17 +4010,18 @@ } fprintf(out, "};\n"); lineno++; /* Output the yy_shift_ofst[] table */ fprintf(out, "#define YY_SHIFT_USE_DFLT (%d)\n", mnTknOfst-1); lineno++; - n = lemp->nstate; + n = lemp->nxstate; while( n>0 && lemp->sorted[n-1]->iTknOfst==NO_OFFSET ) n--; fprintf(out, "#define YY_SHIFT_COUNT (%d)\n", n-1); lineno++; fprintf(out, "#define YY_SHIFT_MIN (%d)\n", mnTknOfst); lineno++; fprintf(out, "#define YY_SHIFT_MAX (%d)\n", mxTknOfst); lineno++; fprintf(out, "static const %s yy_shift_ofst[] = {\n", - minimum_size_type(mnTknOfst-1, mxTknOfst)); lineno++; + minimum_size_type(mnTknOfst-1, mxTknOfst, &sz)); lineno++; + lemp->tablesize += n*sz; for(i=j=0; isorted[i]; ofst = stp->iTknOfst; if( ofst==NO_OFFSET ) ofst = mnTknOfst - 1; @@ -3944,17 +4036,18 @@ } fprintf(out, "};\n"); lineno++; /* Output the yy_reduce_ofst[] table */ fprintf(out, "#define YY_REDUCE_USE_DFLT (%d)\n", mnNtOfst-1); lineno++; - n = lemp->nstate; + n = lemp->nxstate; while( n>0 && lemp->sorted[n-1]->iNtOfst==NO_OFFSET ) n--; fprintf(out, "#define YY_REDUCE_COUNT (%d)\n", n-1); lineno++; fprintf(out, "#define YY_REDUCE_MIN (%d)\n", mnNtOfst); lineno++; fprintf(out, "#define YY_REDUCE_MAX (%d)\n", mxNtOfst); lineno++; fprintf(out, "static const %s yy_reduce_ofst[] = {\n", - minimum_size_type(mnNtOfst-1, mxNtOfst)); lineno++; + minimum_size_type(mnNtOfst-1, mxNtOfst, &sz)); lineno++; + lemp->tablesize += n*sz; for(i=j=0; isorted[i]; ofst = stp->iNtOfst; if( ofst==NO_OFFSET ) ofst = mnNtOfst - 1; @@ -3969,15 +4062,16 @@ } fprintf(out, "};\n"); lineno++; /* Output the default action table */ fprintf(out, "static const YYACTIONTYPE yy_default[] = {\n"); lineno++; - n = lemp->nstate; + n = lemp->nxstate; + lemp->tablesize += n*szActionType; for(i=j=0; isorted[i]; if( j==0 ) fprintf(out," /* %5d */ ", i); - fprintf(out, " %4d,", stp->iDflt); + fprintf(out, " %4d,", stp->iDfltReduce+lemp->nstate+lemp->nrule); if( j==9 || i==n-1 ){ fprintf(out, "\n"); lineno++; j = 0; }else{ j++; @@ -3989,10 +4083,11 @@ /* Generate the table of fallback tokens. */ if( lemp->has_fallback ){ int mx = lemp->nterminal - 1; while( mx>0 && lemp->symbols[mx]->fallback==0 ){ mx--; } + lemp->tablesize += (mx+1)*szCodeType; for(i=0; i<=mx; i++){ struct symbol *p = lemp->symbols[i]; if( p->fallback==0 ){ fprintf(out, " 0, /* %10s => nothing */\n", p->name); }else{ @@ -4253,10 +4348,36 @@ ap->sp = Symbol_new("{default}"); for(ap=ap->next; ap; ap=ap->next){ if( ap->type==REDUCE && ap->x.rp==rbest ) ap->type = NOT_USED; } stp->ap = Action_sort(stp->ap); + + for(ap=stp->ap; ap; ap=ap->next){ + if( ap->type==SHIFT ) break; + if( ap->type==REDUCE && ap->x.rp!=rbest ) break; + } + if( ap==0 ){ + stp->autoReduce = 1; + stp->pDfltReduce = rbest; + } + } + + /* Make a second pass over all states and actions. Convert + ** every action that is a SHIFT to an autoReduce state into + ** a SHIFTREDUCE action. + */ + for(i=0; instate; i++){ + stp = lemp->sorted[i]; + for(ap=stp->ap; ap; ap=ap->next){ + struct state *pNextState; + if( ap->type!=SHIFT ) continue; + pNextState = ap->x.stp; + if( pNextState->autoReduce && pNextState->pDfltReduce!=0 ){ + ap->type = SHIFTREDUCE; + ap->x.rp = pNextState->pDfltReduce; + } + } } } /* @@ -4293,29 +4414,35 @@ struct action *ap; for(i=0; instate; i++){ stp = lemp->sorted[i]; stp->nTknAct = stp->nNtAct = 0; - stp->iDflt = lemp->nstate + lemp->nrule; + stp->iDfltReduce = lemp->nrule; /* Init dflt action to "syntax error" */ stp->iTknOfst = NO_OFFSET; stp->iNtOfst = NO_OFFSET; for(ap=stp->ap; ap; ap=ap->next){ - if( compute_action(lemp,ap)>=0 ){ + int iAction = compute_action(lemp,ap); + if( iAction>=0 ){ if( ap->sp->indexnterminal ){ stp->nTknAct++; }else if( ap->sp->indexnsymbol ){ stp->nNtAct++; }else{ - stp->iDflt = compute_action(lemp, ap); + assert( stp->autoReduce==0 || stp->pDfltReduce==ap->x.rp ); + stp->iDfltReduce = iAction - lemp->nstate - lemp->nrule; } } } } qsort(&lemp->sorted[1], lemp->nstate-1, sizeof(lemp->sorted[0]), stateResortCompare); for(i=0; instate; i++){ lemp->sorted[i]->statenum = i; + } + lemp->nxstate = lemp->nstate; + while( lemp->nxstate>1 && lemp->sorted[lemp->nxstate-1]->autoReduce ){ + lemp->nxstate--; } } /***************** From the file "set.c" ************************************/ Index: tool/lempar.c ================================================================== --- tool/lempar.c +++ tool/lempar.c @@ -48,19 +48,23 @@ ** zero the stack is dynamically sized using realloc() ** ParseARG_SDECL A static variable declaration for the %extra_argument ** ParseARG_PDECL A parameter declaration for the %extra_argument ** ParseARG_STORE Code to store %extra_argument into yypParser ** ParseARG_FETCH Code to extract %extra_argument from yypParser -** YYNSTATE the combined number of states. -** YYNRULE the number of rules in the grammar ** YYERRORSYMBOL is the code number of the error symbol. If not ** defined, then do no error processing. +** YYNSTATE the combined number of states. +** YYNRULE the number of rules in the grammar +** YY_MAX_SHIFT Maximum value for shift actions +** YY_MIN_SHIFTREDUCE Minimum value for shift-reduce actions +** YY_MAX_SHIFTREDUCE Maximum value for shift-reduce actions +** YY_MIN_REDUCE Maximum value for reduce actions +** YY_ERROR_ACTION The yy_action[] code for syntax error +** YY_ACCEPT_ACTION The yy_action[] code for accept +** YY_NO_ACTION The yy_action[] code for no-op */ %% -#define YY_NO_ACTION (YYNSTATE+YYNRULE+2) -#define YY_ACCEPT_ACTION (YYNSTATE+YYNRULE+1) -#define YY_ERROR_ACTION (YYNSTATE+YYNRULE) /* The yyzerominor constant is used to initialize instances of ** YYMINORTYPE objects to zero. */ static const YYMINORTYPE yyzerominor = { 0 }; @@ -83,20 +87,24 @@ ** action integer. ** ** Suppose the action integer is N. Then the action is determined as ** follows ** -** 0 <= N < YYNSTATE Shift N. That is, push the lookahead +** 0 <= N <= YY_MAX_SHIFT Shift N. That is, push the lookahead ** token onto the stack and goto state N. ** -** YYNSTATE <= N < YYNSTATE+YYNRULE Reduce by rule N-YYNSTATE. +** N between YY_MIN_SHIFTREDUCE Shift to an arbitrary state then +** and YY_MAX_SHIFTREDUCE reduce by rule N-YY_MIN_SHIFTREDUCE. +** +** N between YY_MIN_REDUCE Reduce by rule N-YY_MIN_REDUCE +** and YY_MAX_REDUCE + +** N == YY_ERROR_ACTION A syntax error has occurred. ** -** N == YYNSTATE+YYNRULE A syntax error has occurred. +** N == YY_ACCEPT_ACTION The parser accepts its input. ** -** N == YYNSTATE+YYNRULE+1 The parser accepts its input. -** -** N == YYNSTATE+YYNRULE+2 No such action. Denotes unused +** N == YY_NO_ACTION No such action. Denotes unused ** slots in the yy_action[] table. ** ** The action table is constructed as a single large table named yy_action[]. ** Given state S and lookahead X, the action is computed as ** @@ -151,13 +159,17 @@ ** (In other words, the "major" token.) ** ** + The semantic value stored at this level of the stack. This is ** the information used by the action routines in the grammar. ** It is sometimes called the "minor" token. +** +** After the "shift" half of a SHIFTREDUCE action, the stateno field +** actually contains the reduce action for the second half of the +** SHIFTREDUCE. */ struct yyStackEntry { - YYACTIONTYPE stateno; /* The state-number */ + YYACTIONTYPE stateno; /* The state-number, or reduce action in SHIFTREDUCE */ YYCODETYPE major; /* The major token value. This is the code ** number for the token at this stack level */ YYMINORTYPE minor; /* The user-supplied minor token value. This ** is the value of the token */ }; @@ -382,15 +394,15 @@ yyParser *pParser, /* The parser */ YYCODETYPE iLookAhead /* The look-ahead token */ ){ int i; int stateno = pParser->yystack[pParser->yyidx].stateno; - - if( stateno>YY_SHIFT_COUNT - || (i = yy_shift_ofst[stateno])==YY_SHIFT_USE_DFLT ){ - return yy_default[stateno]; - } + + if( stateno>=YY_MIN_REDUCE ) return stateno; + assert( stateno <= YY_SHIFT_COUNT ); + i = yy_shift_ofst[stateno]; + if( i==YY_SHIFT_USE_DFLT ) return yy_default[stateno]; assert( iLookAhead!=YYNOCODE ); i += iLookAhead; if( i<0 || i>=YY_ACTTAB_COUNT || yy_lookahead[i]!=iLookAhead ){ if( iLookAhead>0 ){ #ifdef YYFALLBACK @@ -487,11 +499,33 @@ %% ParseARG_STORE; /* Suppress warning about unused %extra_argument var */ } /* -** Perform a shift action. +** Print tracing information for a SHIFT action +*/ +#ifndef NDEBUG +static void yyTraceShift(yyParser *yypParser, int yyNewState){ + if( yyTraceFILE ){ + int i; + if( yyNewStateyyidx; i++) + fprintf(yyTraceFILE," %s",yyTokenName[yypParser->yystack[i].major]); + fprintf(yyTraceFILE,"\n"); + }else{ + fprintf(yyTraceFILE,"%sShift *\n",yyTracePrompt); + } + } +} +#else +# define yyTraceShift(X,Y) +#endif + +/* +** Perform a shift action. Return the number of errors. */ static void yy_shift( yyParser *yypParser, /* The parser to be shifted */ int yyNewState, /* The new state to shift in */ int yyMajor, /* The major token to shift in */ @@ -520,20 +554,11 @@ #endif yytos = &yypParser->yystack[yypParser->yyidx]; yytos->stateno = (YYACTIONTYPE)yyNewState; yytos->major = (YYCODETYPE)yyMajor; yytos->minor = *yypMinor; -#ifndef NDEBUG - if( yyTraceFILE && yypParser->yyidx>0 ){ - int i; - fprintf(yyTraceFILE,"%sShift %d\n",yyTracePrompt,yyNewState); - fprintf(yyTraceFILE,"%sStack:",yyTracePrompt); - for(i=1; i<=yypParser->yyidx; i++) - fprintf(yyTraceFILE," %s",yyTokenName[yypParser->yystack[i].major]); - fprintf(yyTraceFILE,"\n"); - } -#endif + yyTraceShift(yypParser, yyNewState); } /* The following table contains information about every rule that ** is used during the reduce. */ @@ -562,12 +587,13 @@ ParseARG_FETCH; yymsp = &yypParser->yystack[yypParser->yyidx]; #ifndef NDEBUG if( yyTraceFILE && yyruleno>=0 && yyruleno<(int)(sizeof(yyRuleName)/sizeof(yyRuleName[0])) ){ - fprintf(yyTraceFILE, "%sReduce [%s].\n", yyTracePrompt, - yyRuleName[yyruleno]); + yysize = yyRuleInfo[yyruleno].nrhs; + fprintf(yyTraceFILE, "%sReduce [%s] -> state %d.\n", yyTracePrompt, + yyRuleName[yyruleno], yymsp[-yysize].stateno); } #endif /* NDEBUG */ /* Silence complaints from purify about yygotominor being uninitialized ** in some cases when it is copied into the stack after the following @@ -600,29 +626,28 @@ }; yygoto = yyRuleInfo[yyruleno].lhs; yysize = yyRuleInfo[yyruleno].nrhs; yypParser->yyidx -= yysize; yyact = yy_find_reduce_action(yymsp[-yysize].stateno,(YYCODETYPE)yygoto); - if( yyact < YYNSTATE ){ -#ifdef NDEBUG - /* If we are not debugging and the reduce action popped at least + if( yyact <= YY_MAX_SHIFTREDUCE ){ + if( yyact>YY_MAX_SHIFT ) yyact += YY_MIN_REDUCE - YY_MIN_SHIFTREDUCE; + /* If the reduce action popped at least ** one element off the stack, then we can push the new element back ** onto the stack here, and skip the stack overflow test in yy_shift(). ** That gives a significant speed improvement. */ if( yysize ){ yypParser->yyidx++; yymsp -= yysize-1; yymsp->stateno = (YYACTIONTYPE)yyact; yymsp->major = (YYCODETYPE)yygoto; yymsp->minor = yygotominor; - }else -#endif - { + yyTraceShift(yypParser, yyact); + }else{ yy_shift(yypParser,yyact,yygoto,&yygotominor); } }else{ - assert( yyact == YYNSTATE + YYNRULE + 1 ); + assert( yyact == YY_ACCEPT_ACTION ); yy_accept(yypParser); } } /* @@ -738,17 +763,17 @@ } #endif do{ yyact = yy_find_shift_action(yypParser,(YYCODETYPE)yymajor); - if( yyact YY_MAX_SHIFT ) yyact += YY_MIN_REDUCE - YY_MIN_SHIFTREDUCE; yy_shift(yypParser,yyact,yymajor,&yyminorunion); yypParser->yyerrcnt--; yymajor = YYNOCODE; - }else if( yyact < YYNSTATE + YYNRULE ){ - yy_reduce(yypParser,yyact-YYNSTATE); + }else if( yyact <= YY_MAX_REDUCE ){ + yy_reduce(yypParser,yyact-YY_MIN_REDUCE); }else{ assert( yyact == YY_ERROR_ACTION ); #ifdef YYERRORSYMBOL int yymx; #endif @@ -794,11 +819,11 @@ while( yypParser->yyidx >= 0 && yymx != YYERRORSYMBOL && (yyact = yy_find_reduce_action( yypParser->yystack[yypParser->yyidx].stateno, - YYERRORSYMBOL)) >= YYNSTATE + YYERRORSYMBOL)) >= YY_MIN_REDUCE ){ yy_pop_parser_stack(yypParser); } if( yypParser->yyidx < 0 || yymajor==0 ){ yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion); @@ -844,7 +869,12 @@ } yymajor = YYNOCODE; #endif } }while( yymajor!=YYNOCODE && yypParser->yyidx>=0 ); +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sReturn\n",yyTracePrompt); + } +#endif return; } Index: tool/spaceanal.tcl ================================================================== --- tool/spaceanal.tcl +++ tool/spaceanal.tcl @@ -24,11 +24,23 @@ # Get the name of the database to analyze # proc usage {} { set argv0 [file rootname [file tail [info nameofexecutable]]] - puts stderr "Usage: $argv0 \[--pageinfo] \[--stats] database-name" + puts stderr "Usage: $argv0 ?--pageinfo? ?--stats? database-filename" + puts stderr { +Analyze the SQLite3 database file specified by the "database-filename" +argument and output a report detailing size and storage efficiency +information for the database and its constituent tables and indexes. + +Options: + + --stats Output SQL text that creates a new database containing + statistics about the database that was analyzed + + --pageinfo Show how each page of the database-file is used +} exit 1 } set file_to_analyze {} set flags(-pageinfo) 0 set flags(-stats) 0