Index: doc/lemon.html ================================================================== --- doc/lemon.html +++ doc/lemon.html @@ -20,10 +20,30 @@ in long-running programs such as graphical user interfaces or embedded controllers.

This document is an introduction to the Lemon parser generator.

+ +

Security Note

+ +

The language parser code created by Lemon is very robust and +is well-suited for use in internet-facing applications that need to +safely process maliciously crafted inputs. + +

The "lemon.exe" command-line tool itself works great when given a valid +input grammar file and almost always gives helpful +error messages for malformed inputs. However, it is possible for +a malicious user to craft a grammar file that will cause +lemon.exe to crash. +We do not see this as a problem, as lemon.exe is not intended to be used +with hostile inputs. +To summarize:

+ +

Theory of Operation

The main goal of Lemon is to translate a context free grammar (CFG) for a particular language into C code that implements a parser for Index: ext/misc/series.c ================================================================== --- ext/misc/series.c +++ ext/misc/series.c @@ -31,11 +31,11 @@ ** HOW IT WORKS ** ** The generate_series "function" is really a virtual table with the ** following schema: ** -** CREATE FUNCTION generate_series( +** CREATE TABLE generate_series( ** value, ** start HIDDEN, ** stop HIDDEN, ** step HIDDEN ** ); Index: src/build.c ================================================================== --- src/build.c +++ src/build.c @@ -937,11 +937,15 @@ } pTable->zName = zName; pTable->iPKey = -1; pTable->pSchema = db->aDb[iDb].pSchema; pTable->nTabRef = 1; +#ifdef SQLITE_DEFAULT_ROWEST + pTable->nRowLogEst = sqlite3LogEst(SQLITE_DEFAULT_ROWEST); +#else pTable->nRowLogEst = 200; assert( 200==sqlite3LogEst(1048576) ); +#endif assert( pParse->pNewTable==0 ); pParse->pNewTable = pTable; /* If this is the magic sqlite_sequence table used by autoincrement, ** then record a pointer to this table in the main database structure Index: src/delete.c ================================================================== --- src/delete.c +++ src/delete.c @@ -457,11 +457,11 @@ sqlite3VdbeAddOp4(v, OP_MakeRecord, iPk, nPk, iKey, sqlite3IndexAffinityStr(pParse->db, pPk), nPk); sqlite3VdbeAddOp4Int(v, OP_IdxInsert, iEphCur, iKey, iPk, nPk); }else{ /* Add the rowid of the row to be deleted to the RowSet */ - nKey = 1; /* OP_Seek always uses a single rowid */ + nKey = 1; /* OP_DeferredSeek always uses a single rowid */ sqlite3VdbeAddOp2(v, OP_RowSetAdd, iRowSet, iKey); } } /* If this DELETE cannot use the ONEPASS strategy, this is the Index: src/insert.c ================================================================== --- src/insert.c +++ src/insert.c @@ -522,10 +522,11 @@ db = pParse->db; if( pParse->nErr || db->mallocFailed ){ goto insert_cleanup; } + dest.iSDParm = 0; /* Suppress a harmless compiler warning */ /* If the Select object is really just a simple VALUES() list with a ** single row (the common case) then keep that one row of values ** and discard the other (unused) parts of the pSelect object */ Index: src/shell.c ================================================================== --- src/shell.c +++ src/shell.c @@ -506,10 +506,22 @@ static int strlen30(const char *z){ const char *z2 = z; while( *z2 ){ z2++; } return 0x3fffffff & (int)(z2 - z); } + +/* +** Return the length of a string in characters. Multibyte UTF8 characters +** count as a single character. +*/ +static int strlenChar(const char *z){ + int n = 0; + while( *z ){ + if( (0xc0&*(z++))!=0x80 ) n++; + } + return n; +} /* ** This routine reads a line of text from FILE in, stores ** the text in memory obtained from malloc() and returns a pointer ** to the text. NULL is returned at end of file, or if malloc() @@ -1915,13 +1927,13 @@ w = colWidth[i]; }else{ w = 0; } if( w==0 ){ - w = strlen30(azCol[i] ? azCol[i] : ""); + w = strlenChar(azCol[i] ? azCol[i] : ""); if( w<10 ) w = 10; - n = strlen30(azArg && azArg[i] ? azArg[i] : p->nullValue); + n = strlenChar(azArg && azArg[i] ? azArg[i] : p->nullValue); if( wactualWidth) ){ p->actualWidth[i] = w; } @@ -1952,12 +1964,12 @@ if( iactualWidth) ){ w = p->actualWidth[i]; }else{ w = 10; } - if( p->cMode==MODE_Explain && azArg[i] && strlen30(azArg[i])>w ){ - w = strlen30(azArg[i]); + if( p->cMode==MODE_Explain && azArg[i] && strlenChar(azArg[i])>w ){ + w = strlenChar(azArg[i]); } if( i==1 && p->aiIndent && p->pStmt ){ if( p->iIndentnIndent ){ utf8_printf(p->out, "%*.s", p->aiIndent[p->iIndent], ""); } Index: src/test_fs.c ================================================================== --- src/test_fs.c +++ src/test_fs.c @@ -543,10 +543,11 @@ if( zQuery[i]=='/' ) nDir = i; } zDir = zQuery; } } + if( nDir==0 ) nDir = 1; sqlite3_bind_text(pCsr->pStmt, 1, zDir, nDir, SQLITE_TRANSIENT); sqlite3_bind_text(pCsr->pStmt, 2, zRoot, nRoot, SQLITE_TRANSIENT); sqlite3_bind_text(pCsr->pStmt, 3, zPrefix, nPrefix, SQLITE_TRANSIENT); Index: src/vdbe.c ================================================================== --- src/vdbe.c +++ src/vdbe.c @@ -2495,11 +2495,13 @@ Mem *pReg; /* PseudoTable input register */ pC = p->apCsr[pOp->p1]; p2 = pOp->p2; - /* If the cursor cache is stale, bring it up-to-date */ + /* If the cursor cache is stale (meaning it is not currently point at + ** the correct row) then bring it up-to-date by doing the necessary + ** B-Tree seek. */ rc = sqlite3VdbeCursorMoveto(&pC, &p2); if( rc ) goto abort_due_to_error; assert( pOp->p3>0 && pOp->p3<=(p->nMem+1 - p->nCursor) ); pDest = &aMem[pOp->p3]; @@ -5263,12 +5265,12 @@ pC->cacheStatus = CACHE_STALE; pC->seekResult = 0; break; } -/* Opcode: Seek P1 * P3 P4 * -** Synopsis: Move P3 to P1.rowid +/* Opcode: DeferredSeek P1 * P3 P4 * +** Synopsis: Move P3 to P1.rowid if needed ** ** P1 is an open index cursor and P3 is a cursor on the corresponding ** table. This opcode does a deferred seek of the P3 table cursor ** to the row that corresponds to the current row of P1. ** @@ -5291,15 +5293,15 @@ ** the end of the index key pointed to by cursor P1. This integer should be ** the rowid of the table entry to which this index entry points. ** ** See also: Rowid, MakeRecord. */ -case OP_Seek: -case OP_IdxRowid: { /* out2 */ - VdbeCursor *pC; /* The P1 index cursor */ - VdbeCursor *pTabCur; /* The P2 table cursor (OP_Seek only) */ - i64 rowid; /* Rowid that P1 current points to */ +case OP_DeferredSeek: +case OP_IdxRowid: { /* out2 */ + VdbeCursor *pC; /* The P1 index cursor */ + VdbeCursor *pTabCur; /* The P2 table cursor (OP_DeferredSeek only) */ + i64 rowid; /* Rowid that P1 current points to */ assert( pOp->p1>=0 && pOp->p1nCursor ); pC = p->apCsr[pOp->p1]; assert( pC!=0 ); assert( pC->eCurType==CURTYPE_BTREE ); @@ -5321,11 +5323,11 @@ rowid = 0; /* Not needed. Only used to silence a warning. */ rc = sqlite3VdbeIdxRowid(db, pC->uc.pCursor, &rowid); if( rc!=SQLITE_OK ){ goto abort_due_to_error; } - if( pOp->opcode==OP_Seek ){ + if( pOp->opcode==OP_DeferredSeek ){ assert( pOp->p3>=0 && pOp->p3nCursor ); pTabCur = p->apCsr[pOp->p3]; assert( pTabCur!=0 ); assert( pTabCur->eCurType==CURTYPE_BTREE ); assert( pTabCur->uc.pCursor!=0 ); Index: src/wherecode.c ================================================================== --- src/wherecode.c +++ src/wherecode.c @@ -964,14 +964,14 @@ ** function generates code to do a deferred seek of cursor iCur to the ** rowid stored in register iRowid. ** ** Normally, this is just: ** -** OP_Seek $iCur $iRowid +** OP_DeferredSeek $iCur $iRowid ** ** However, if the scan currently being coded is a branch of an OR-loop and -** the statement currently being coded is a SELECT, then P3 of the OP_Seek +** the statement currently being coded is a SELECT, then P3 of OP_DeferredSeek ** is set to iIdxCur and P4 is set to point to an array of integers ** containing one entry for each column of the table cursor iCur is open ** on. For each table column, if the column is the i'th column of the ** index, then the corresponding array entry is set to (i+1). If the column ** does not appear in the index at all, the array entry is set to 0. @@ -986,11 +986,11 @@ Vdbe *v = pParse->pVdbe; /* Vdbe to generate code within */ assert( iIdxCur>0 ); assert( pIdx->aiColumn[pIdx->nColumn-1]==-1 ); - sqlite3VdbeAddOp3(v, OP_Seek, iIdxCur, 0, iCur); + sqlite3VdbeAddOp3(v, OP_DeferredSeek, iIdxCur, 0, iCur); if( (pWInfo->wctrlFlags & WHERE_OR_SUBCLAUSE) && DbMaskAllZero(sqlite3ParseToplevel(pParse)->writeMask) ){ int i; Table *pTab = pIdx->pTable; Index: test/kvtest.c ================================================================== --- test/kvtest.c +++ test/kvtest.c @@ -69,18 +69,25 @@ " BLOBs each of size M bytes. The page size of the new database\n" " file will be X. Additional options:\n" "\n" " --variance V Randomly vary M by plus or minus V\n" "\n" -" kvtest export DBFILE DIRECTORY\n" +" kvtest export DBFILE DIRECTORY [--tree]\n" "\n" " Export all the blobs in the kv table of DBFILE into separate\n" -" files in DIRECTORY.\n" +" files in DIRECTORY. DIRECTORY is created if it does not previously\n" +" exist. If the --tree option is used, then the blobs are written\n" +" into a hierarchy of directories, using names like 00/00/00,\n" +" 00/00/01, 00/00/02, and so forth. Without the --tree option, all\n" +" files are in the top-level directory with names like 000000, 000001,\n" +" 000002, and so forth.\n" +"\n" +" kvtest stat DBFILE [options]\n" "\n" -" kvtest stat DBFILE\n" +" Display summary information about DBFILE. Options:\n" "\n" -" Display summary information about DBFILE\n" +" --vacuum Run VACUUM on the database file\n" "\n" " kvtest run DBFILE [options]\n" "\n" " Run a performance test. DBFILE can be either the name of a\n" " database or a directory containing sample files. Options:\n" @@ -88,16 +95,22 @@ " --asc Read blobs in ascending order\n" " --blob-api Use the BLOB API\n" " --cache-size N Database cache size\n" " --count N Read N blobs\n" " --desc Read blobs in descending order\n" +" --fsync Synchronous file writes\n" +" --integrity-check Run \"PRAGMA integrity_check\" after test\n" " --max-id N Maximum blob key to use\n" " --mmap N Mmap as much as N bytes of DBFILE\n" +" --multitrans Each read or write in its own transaction\n" +" --nocheckpoint Omit the checkpoint on WAL mode writes\n" +" --nosync Set \"PRAGMA synchronous=OFF\"\n" " --jmode MODE Set MODE journal mode prior to starting\n" " --random Read blobs in a random order\n" " --start N Start reading with this blob key\n" " --stats Output operating stats before exiting\n" +" --update Do an overwrite test\n" ; /* Reference resources used */ #include #include @@ -109,17 +122,43 @@ #ifndef _WIN32 # include #else /* Provide Windows equivalent for the needed parts of unistd.h */ +# include # include # define R_OK 2 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) # define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) # define access _access #endif +#include + +/* +** The following macros are used to cast pointers to integers and +** integers to pointers. The way you do this varies from one compiler +** to the next, so we have developed the following set of #if statements +** to generate appropriate macros for a wide range of compilers. +** +** The correct "ANSI" way to do this is to use the intptr_t type. +** Unfortunately, that typedef is not available on all compilers, or +** if it is available, it requires an #include of specific headers +** that vary from one machine to the next. +** +** Ticket #3860: The llvm-gcc-4.2 compiler from Apple chokes on +** the ((void*)&((char*)0)[X]) construct. But MSVC chokes on ((void*)(X)). +** So we have to define the macros in different ways depending on the +** compiler. +*/ +#if defined(__PTRDIFF_TYPE__) /* This case should work for GCC */ +# define SQLITE_INT_TO_PTR(X) ((void*)(__PTRDIFF_TYPE__)(X)) +# define SQLITE_PTR_TO_INT(X) ((sqlite3_int64)(__PTRDIFF_TYPE__)(X)) +#else +# define SQLITE_INT_TO_PTR(X) ((void*)(intptr_t)(X)) +# define SQLITE_PTR_TO_INT(X) ((sqlite3_int64)(intptr_t)(X)) +#endif /* ** Show thqe help text and quit. */ static void showHelp(void){ @@ -199,27 +238,45 @@ /* ** Check the filesystem object zPath. Determine what it is: ** -** PATH_DIR A directory +** PATH_DIR A single directory holding many files +** PATH_TREE A directory hierarchy with files at the leaves ** PATH_DB An SQLite database ** PATH_NEXIST Does not exist ** PATH_OTHER Something else +** +** PATH_DIR means all of the separate files are grouped together +** into a single directory with names like 000000, 000001, 000002, and +** so forth. PATH_TREE means there is a hierarchy of directories so +** that no single directory has too many entries. The files have names +** like 00/00/00, 00/00/01, 00/00/02 and so forth. The decision between +** PATH_DIR and PATH_TREE is determined by the presence of a subdirectory +** named "00" at the top-level. */ #define PATH_DIR 1 -#define PATH_DB 2 +#define PATH_TREE 2 +#define PATH_DB 3 #define PATH_NEXIST 0 #define PATH_OTHER 99 static int pathType(const char *zPath){ struct stat x; int rc; if( access(zPath,R_OK) ) return PATH_NEXIST; memset(&x, 0, sizeof(x)); rc = stat(zPath, &x); if( rc<0 ) return PATH_OTHER; - if( S_ISDIR(x.st_mode) ) return PATH_DIR; + if( S_ISDIR(x.st_mode) ){ + char *zLayer1 = sqlite3_mprintf("%s/00", zPath); + memset(&x, 0, sizeof(x)); + rc = stat(zLayer1, &x); + sqlite3_free(zLayer1); + if( rc<0 ) return PATH_DIR; + if( S_ISDIR(x.st_mode) ) return PATH_TREE; + return PATH_DIR; + } if( (x.st_size%512)==0 ) return PATH_DB; return PATH_OTHER; } /* @@ -326,24 +383,34 @@ char *zDb; int i, rc; sqlite3 *db; char *zSql; sqlite3_stmt *pStmt; + int doVacuum = 0; assert( strcmp(argv[1],"stat")==0 ); assert( argc>=3 ); zDb = argv[2]; for(i=3; i=3 ); + if( argc<4 ) fatalError("Usage: kvtest export DATABASE DIRECTORY [OPTIONS]"); zDb = argv[2]; - if( argc!=4 ) fatalError("Usage: kvtest export DATABASE DIRECTORY"); zDir = argv[3]; - if( pathType(zDir)!=PATH_DIR ){ + kvtest_mkdir(zDir); + for(i=4; i=3 ); @@ -658,15 +835,45 @@ if( eType==PATH_NEXIST ) fatalError("object does not exist: \"%s\"", zDb); for(i=3; iiMax ) iKey = 1; @@ -833,17 +1076,37 @@ if( pStmt ) sqlite3_finalize(pStmt); if( pBlob ) sqlite3_blob_close(pBlob); if( bStats ){ display_stats(db, 0); } - if( db ) sqlite3_close(db); + if( db ){ + if( !doMultiTrans ) sqlite3_exec(db, "COMMIT", 0, 0, 0); + if( !noCheckpoint ){ + sqlite3_close(db); + db = 0; + } + } tmElapsed = timeOfDay() - tmStart; + if( db && noCheckpoint ){ + sqlite3_close(db); + db = 0; + } if( nExtra ){ printf("%d cycles due to %d misses\n", nCount, nExtra); } if( eType==PATH_DB ){ printf("SQLite version: %s\n", sqlite3_libversion()); + if( doIntegrityCk ){ + sqlite3_open(zDb, &db); + sqlite3_prepare_v2(db, "PRAGMA integrity_check", -1, &pStmt, 0); + while( sqlite3_step(pStmt)==SQLITE_ROW ){ + printf("integrity-check: %s\n", sqlite3_column_text(pStmt, 0)); + } + sqlite3_finalize(pStmt); + sqlite3_close(db); + db = 0; + } } printf("--count %d --max-id %d", nCount-nExtra, iMax); switch( eOrder ){ case ORDER_RANDOM: printf(" --random\n"); break; case ORDER_DESC: printf(" --desc\n"); break; @@ -850,15 +1113,21 @@ default: printf(" --asc\n"); break; } if( eType==PATH_DB ){ printf("--cache-size %d --jmode %s\n", iCache, zJMode); printf("--mmap %d%s\n", mmapSize, bBlobApi ? " --blob-api" : ""); + if( noSync ) printf("--nosync\n"); } if( iPagesize ) printf("Database page size: %d\n", iPagesize); printf("Total elapsed time: %.3f\n", tmElapsed/1000.0); - printf("Microseconds per BLOB read: %.3f\n", tmElapsed*1000.0/nCount); - printf("Content read rate: %.1f MB/s\n", nTotal/(1000.0*tmElapsed)); + if( isUpdateTest ){ + printf("Microseconds per BLOB write: %.3f\n", tmElapsed*1000.0/nCount); + printf("Content write rate: %.1f MB/s\n", nTotal/(1000.0*tmElapsed)); + }else{ + printf("Microseconds per BLOB read: %.3f\n", tmElapsed*1000.0/nCount); + printf("Content read rate: %.1f MB/s\n", nTotal/(1000.0*tmElapsed)); + } return 0; } int main(int argc, char **argv){ Index: test/vtabH.test ================================================================== --- test/vtabH.test +++ test/vtabH.test @@ -214,28 +214,32 @@ close $fd } } {} set pwd [pwd] - do_execsql_test 3.5 { - SELECT path, size FROM fstree WHERE path GLOB $pwd || '/subdir/*' ORDER BY 1 - } [list \ - "$pwd/subdir/x1.txt" 143 \ - "$pwd/subdir/x2.txt" 153 \ - ] - do_execsql_test 3.6 { - SELECT path, size FROM fstree WHERE path LIKE $pwd || '/subdir/%' ORDER BY 1 - } [list \ - "$pwd/subdir/x1.txt" 143 \ - "$pwd/subdir/x2.txt" 153 \ - ] - do_execsql_test 3.7 { - SELECT sum(size) FROM fstree WHERE path LIKE $pwd || '/subdir/%' - } 296 - do_execsql_test 3.8 { - SELECT size FROM fstree WHERE path = $pwd || '/subdir/x1.txt' - } 143 + if {![string match {*[_%]*} $pwd]} { + do_execsql_test 3.5 { + SELECT path, size FROM fstree + WHERE path GLOB $pwd || '/subdir/*' ORDER BY 1 + } [list \ + "$pwd/subdir/x1.txt" 143 \ + "$pwd/subdir/x2.txt" 153 \ + ] + do_execsql_test 3.6 { + SELECT path, size FROM fstree + WHERE path LIKE $pwd || '/subdir/%' ORDER BY 1 + } [list \ + "$pwd/subdir/x1.txt" 143 \ + "$pwd/subdir/x2.txt" 153 \ + ] + do_execsql_test 3.7 { + SELECT sum(size) FROM fstree WHERE path LIKE $pwd || '/subdir/%' + } 296 + do_execsql_test 3.8 { + SELECT size FROM fstree WHERE path = $pwd || '/subdir/x1.txt' + } 143 + } } finish_test