Index: ext/expert/expert.c ================================================================== --- ext/expert/expert.c +++ ext/expert/expert.c @@ -34,10 +34,11 @@ fprintf(stderr, "\n"); fprintf(stderr, "Options are:\n"); fprintf(stderr, " -sql SQL (analyze SQL statements passed as argument)\n"); fprintf(stderr, " -file FILE (read SQL statements from file FILE)\n"); fprintf(stderr, " -verbose LEVEL (integer verbosity level. default 1)\n"); + fprintf(stderr, " -sample PERCENT (percent of db to sample. default 100)\n"); exit(-1); } static int readSqlFromFile(sqlite3expert *p, const char *zFile, char **pzErr){ FILE *in = fopen(zFile, "rb"); @@ -95,14 +96,21 @@ if( nArg>=2 && 0==sqlite3_strnicmp(zArg, "-file", nArg) ){ if( ++i==(argc-1) ) option_requires_argument("-file"); rc = readSqlFromFile(p, argv[i], &zErr); } - else if( nArg>=2 && 0==sqlite3_strnicmp(zArg, "-sql", nArg) ){ + else if( nArg>=3 && 0==sqlite3_strnicmp(zArg, "-sql", nArg) ){ if( ++i==(argc-1) ) option_requires_argument("-sql"); rc = sqlite3_expert_sql(p, argv[i], &zErr); } + + else if( nArg>=3 && 0==sqlite3_strnicmp(zArg, "-sample", nArg) ){ + int iSample; + if( ++i==(argc-1) ) option_requires_argument("-sample"); + iSample = option_integer_arg(argv[i]); + sqlite3_expert_config(p, EXPERT_CONFIG_SAMPLE, iSample); + } else if( nArg>=2 && 0==sqlite3_strnicmp(zArg, "-verbose", nArg) ){ if( ++i==(argc-1) ) option_requires_argument("-verbose"); iVerbose = option_integer_arg(argv[i]); } Index: ext/expert/sqlite3expert.c ================================================================== --- ext/expert/sqlite3expert.c +++ ext/expert/sqlite3expert.c @@ -25,10 +25,13 @@ typedef struct IdxConstraint IdxConstraint; typedef struct IdxScan IdxScan; typedef struct IdxStatement IdxStatement; typedef struct IdxTable IdxTable; typedef struct IdxWrite IdxWrite; + +#define UNIQUE_TABLE_NAME "t592690916721053953805701627921227776" + /* ** A single constraint. Equivalent to either "col = ?" or "col < ?" (or ** any other type of single-ended range constraint on a column). ** @@ -125,10 +128,11 @@ /* ** sqlite3expert object. */ struct sqlite3expert { + int iSample; /* Percentage of tables to sample for stat1 */ sqlite3 *db; /* User database */ sqlite3 *dbm; /* In-memory db for this analysis */ sqlite3 *dbv; /* Vtab schema for this analysis */ IdxTable *pTable; /* List of all IdxTable objects */ IdxScan *pScan; /* List of scan objects */ @@ -1078,12 +1082,12 @@ static int idxProcessOneTrigger( sqlite3expert *p, IdxWrite *pWrite, char **pzErr ){ - static const char *zInt = "t592690916721053953805701627921227776"; - static const char *zDrop = "DROP TABLE t592690916721053953805701627921227776"; + static const char *zInt = UNIQUE_TABLE_NAME; + static const char *zDrop = "DROP TABLE " UNIQUE_TABLE_NAME; IdxTable *pTab = pWrite->pTab; const char *zTab = pTab->zName; const char *zSql = "SELECT 'CREATE TEMP' || substr(sql, 7) FROM sqlite_master " "WHERE tbl_name = %Q AND type IN ('table', 'trigger') " @@ -1232,10 +1236,42 @@ } } idxFinalize(&rc, pSchema); return rc; } + +struct IdxSampleCtx { + int iTarget; + double target; /* Target nRet/nRow value */ + double nRow; /* Number of rows seen */ + double nRet; /* Number of rows returned */ +}; + +static void idxSampleFunc( + sqlite3_context *pCtx, + int argc, + sqlite3_value **argv +){ + struct IdxSampleCtx *p = (struct IdxSampleCtx*)sqlite3_user_data(pCtx); + int bRet; + + assert( argc==0 ); + if( p->nRow==0.0 ){ + bRet = 1; + }else{ + bRet = (p->nRet / p->nRow) <= p->target; + if( bRet==0 ){ + unsigned short rnd; + sqlite3_randomness(2, (void*)&rnd); + bRet = ((int)rnd % 100) <= p->iTarget; + } + } + + sqlite3_result_int(pCtx, bRet); + p->nRow += 1.0; + p->nRet += (double)bRet; +} struct IdxRemCtx { int nSlot; struct IdxRemSlot { int eType; /* SQLITE_NULL, INTEGER, REAL, TEXT, BLOB */ @@ -1357,10 +1393,12 @@ int nCol = 0; int i; sqlite3_stmt *pQuery = 0; int *aStat = 0; int rc = SQLITE_OK; + + assert( p->iSample>0 ); /* Formulate the query text */ sqlite3_bind_text(pIndexXInfo, 1, zIdx, -1, SQLITE_STATIC); while( SQLITE_OK==rc && SQLITE_ROW==sqlite3_step(pIndexXInfo) ){ const char *zComma = zCols==0 ? "" : ", "; @@ -1370,13 +1408,19 @@ "%sx.%Q IS rem(%d, x.%Q) COLLATE %s", zComma, zName, nCol, zName, zColl ); zOrder = idxAppendText(&rc, zOrder, "%s%d", zComma, ++nCol); } if( rc==SQLITE_OK ){ - zQuery = sqlite3_mprintf( - "SELECT %s FROM %Q x ORDER BY %s", zCols, zTab, zOrder - ); + if( p->iSample==100 ){ + zQuery = sqlite3_mprintf( + "SELECT %s FROM %Q x ORDER BY %s", zCols, zTab, zOrder + ); + }else{ + zQuery = sqlite3_mprintf( + "SELECT %s FROM temp."UNIQUE_TABLE_NAME" x ORDER BY %s", zCols, zOrder + ); + } } sqlite3_free(zCols); sqlite3_free(zOrder); /* Formulate the query text */ @@ -1430,10 +1474,29 @@ sqlite3_free(aStat); idxFinalize(&rc, pQuery); return rc; } + +static int idxBuildSampleTable(sqlite3expert *p, const char *zTab){ + int rc; + char *zSql; + + rc = sqlite3_exec(p->db, "DROP TABLE IF EXISTS temp."UNIQUE_TABLE_NAME,0,0,0); + if( rc!=SQLITE_OK ) return rc; + + zSql = sqlite3_mprintf( + "CREATE TABLE temp." UNIQUE_TABLE_NAME + " AS SELECT * FROM %Q WHERE sample()" + , zTab + ); + if( zSql==0 ) return SQLITE_NOMEM; + rc = sqlite3_exec(p->db, zSql, 0, 0, 0); + sqlite3_free(zSql); + + return rc; +} /* ** This function is called as part of sqlite3_expert_analyze(). Candidate ** indexes have already been created in database sqlite3expert.dbm, this ** function populates sqlite_stat1 table in the same database. @@ -1442,23 +1505,28 @@ */ static int idxPopulateStat1(sqlite3expert *p, char **pzErr){ int rc = SQLITE_OK; int nMax =0; struct IdxRemCtx *pCtx = 0; + struct IdxSampleCtx samplectx; int i; + i64 iPrev = -100000; sqlite3_stmt *pAllIndex = 0; sqlite3_stmt *pIndexXInfo = 0; sqlite3_stmt *pWrite = 0; - const char *zAllIndex = - "SELECT s.name, l.name FROM " + const char *zAllIndex = + "SELECT s.rowid, s.name, l.name FROM " " sqlite_master AS s, " " pragma_index_list(s.name) AS l " "WHERE s.type = 'table'"; const char *zIndexXInfo = "SELECT name, coll FROM pragma_index_xinfo(?) WHERE key"; const char *zWrite = "INSERT INTO sqlite_stat1 VALUES(?, ?, ?)"; + + /* If iSample==0, no sqlite_stat1 data is required. */ + if( p->iSample==0 ) return SQLITE_OK; rc = idxLargestIndex(p->dbm, &nMax, pzErr); if( nMax<=0 || rc!=SQLITE_OK ) return rc; rc = sqlite3_exec(p->dbm, "ANALYZE; PRAGMA writable_schema=1", 0, 0, 0); @@ -1471,10 +1539,15 @@ if( rc==SQLITE_OK ){ rc = sqlite3_create_function( p->db, "rem", 2, SQLITE_UTF8, (void*)pCtx, idxRemFunc, 0, 0 ); } + if( rc==SQLITE_OK ){ + rc = sqlite3_create_function( + p->db, "sample", 0, SQLITE_UTF8, (void*)&samplectx, idxSampleFunc, 0, 0 + ); + } if( rc==SQLITE_OK ){ pCtx->nSlot = nMax+1; rc = idxPrepareStmt(p->dbm, &pAllIndex, pzErr, zAllIndex); } @@ -1484,13 +1557,28 @@ if( rc==SQLITE_OK ){ rc = idxPrepareStmt(p->dbm, &pWrite, pzErr, zWrite); } while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pAllIndex) ){ - const char *zTab = (const char*)sqlite3_column_text(pAllIndex, 0); - const char *zIdx = (const char*)sqlite3_column_text(pAllIndex, 1); + i64 iRowid = sqlite3_column_int64(pAllIndex, 0); + const char *zTab = (const char*)sqlite3_column_text(pAllIndex, 1); + const char *zIdx = (const char*)sqlite3_column_text(pAllIndex, 2); + if( p->iSample<100 && iPrev!=iRowid ){ + samplectx.target = (double)p->iSample / 100.0; + samplectx.iTarget = p->iSample; + samplectx.nRow = 0.0; + samplectx.nRet = 0.0; + rc = idxBuildSampleTable(p, zTab); + if( rc!=SQLITE_OK ) break; + } rc = idxPopulateOneStat1(p, pIndexXInfo, pWrite, zTab, zIdx, pzErr); + iPrev = iRowid; + } + if( p->iSample<100 ){ + rc = sqlite3_exec(p->db, "DROP TABLE IF EXISTS temp." UNIQUE_TABLE_NAME, + 0,0,0 + ); } idxFinalize(&rc, pAllIndex); idxFinalize(&rc, pIndexXInfo); idxFinalize(&rc, pWrite); @@ -1501,10 +1589,12 @@ sqlite3_free(pCtx); if( rc==SQLITE_OK ){ rc = sqlite3_exec(p->dbm, "ANALYZE sqlite_master", 0, 0, 0); } + + sqlite3_exec(p->db, "DROP TABLE IF EXISTS temp."UNIQUE_TABLE_NAME,0,0,0); return rc; } /* ** Allocate a new sqlite3expert object. @@ -1521,10 +1611,11 @@ ** collect information regarding the WHERE, ORDER BY and other clauses ** of the user's query. */ if( rc==SQLITE_OK ){ pNew->db = db; + pNew->iSample = 100; rc = sqlite3_open(":memory:", &pNew->dbv); } if( rc==SQLITE_OK ){ rc = sqlite3_open(":memory:", &pNew->dbm); if( rc==SQLITE_OK ){ @@ -1562,10 +1653,34 @@ sqlite3_expert_destroy(pNew); pNew = 0; } return pNew; } + +/* +** Configure an sqlite3expert object. +*/ +int sqlite3_expert_config(sqlite3expert *p, int op, ...){ + int rc = SQLITE_OK; + va_list ap; + va_start(ap, op); + switch( op ){ + case EXPERT_CONFIG_SAMPLE: { + int iVal = va_arg(ap, int); + if( iVal<0 ) iVal = 0; + if( iVal>100 ) iVal = 100; + p->iSample = iVal; + break; + } + default: + rc = SQLITE_NOTFOUND; + break; + } + + va_end(ap); + return rc; +} /* ** Add an SQL statement to the analysis. */ int sqlite3_expert_sql( Index: ext/expert/sqlite3expert.h ================================================================== --- ext/expert/sqlite3expert.h +++ ext/expert/sqlite3expert.h @@ -25,10 +25,42 @@ ** of the caller to eventually free the error message buffer using ** sqlite3_free(). */ sqlite3expert *sqlite3_expert_new(sqlite3 *db, char **pzErr); +/* +** Configure an sqlite3expert object. +** +** EXPERT_CONFIG_SAMPLE: +** By default, sqlite3_expert_analyze() generates sqlite_stat1 data for +** each candidate index. This involves scanning and sorting the entire +** contents of each user database table once for each candidate index +** associated with the table. For large databases, this can be +** prohibitively slow. This option allows the sqlite3expert object to +** be configured so that sqlite_stat1 data is instead generated based on a +** subset of each table, or so that no sqlite_stat1 data is used at all. +** +** A single integer argument is passed to this option. If the value is less +** than or equal to zero, then no sqlite_stat1 data is generated or used by +** the analysis - indexes are recommended based on the database schema only. +** Or, if the value is 100 or greater, complete sqlite_stat1 data is +** generated for each candidate index (this is the default). Finally, if the +** value falls between 0 and 100, then it represents the percentage of user +** table rows that should be considered when generating sqlite_stat1 data. +** +** Examples: +** +** // Do not generate any sqlite_stat1 data +** sqlite3_expert_config(pExpert, EXPERT_CONFIG_SAMPLE, 0); +** +** // Generate sqlite_stat1 data based on 10% of the rows in each table. +** sqlite3_expert_config(pExpert, EXPERT_CONFIG_SAMPLE, 10); +*/ +int sqlite3_expert_config(sqlite3expert *p, int op, ...); + +#define EXPERT_CONFIG_SAMPLE 1 /* int */ + /* ** Specify zero or more SQL statements to be included in the analysis. ** ** Buffer zSql must contain zero or more complete SQL statements. This ** function parses all statements contained in the buffer and adds them @@ -51,10 +83,11 @@ int sqlite3_expert_sql( sqlite3expert *p, /* From a successful sqlite3_expert_new() */ const char *zSql, /* SQL statement(s) to add */ char **pzErr /* OUT: Error message (if any) */ ); + /* ** This function is called after the sqlite3expert object has been configured ** with all SQL statements using sqlite3_expert_sql() to actually perform ** the analysis. Once this function has been called, it is not possible to