Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Add an option to generate stat1 data based on a subset of the user database table contents to sqlite3_expert. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | schemalint |
Files: | files | file ages | folders |
SHA3-256: |
c69c3e21db6e141f7e24226c6432f2ed |
User & Date: | dan 2017-04-20 09:54:04 |
Context
2017-04-20
| ||
16:18 | Avoid adding INTEGER PRIMARY KEY columns to candidate indexes. check-in: 4577fea5 user: dan tags: schemalint | |
16:08 | Avoid creating a temp table in the user database in the sqlite3_expert code. Trouble is, this makes sampling for stat1 data much slower. check-in: c62e3582 user: dan tags: schemalint-failure | |
09:54 | Add an option to generate stat1 data based on a subset of the user database table contents to sqlite3_expert. check-in: c69c3e21 user: dan tags: schemalint | |
2017-04-18
| ||
20:10 | Have sqlite3_expert_analyze() populate the sqlite_stat1 table before running queries through the planner for the second time. check-in: a157fcfd user: dan tags: schemalint | |
Changes
Changes to ext/expert/expert.c.
32 32 fprintf(stderr, "\n"); 33 33 fprintf(stderr, "Usage %s ?OPTIONS? DATABASE\n", argv[0]); 34 34 fprintf(stderr, "\n"); 35 35 fprintf(stderr, "Options are:\n"); 36 36 fprintf(stderr, " -sql SQL (analyze SQL statements passed as argument)\n"); 37 37 fprintf(stderr, " -file FILE (read SQL statements from file FILE)\n"); 38 38 fprintf(stderr, " -verbose LEVEL (integer verbosity level. default 1)\n"); 39 + fprintf(stderr, " -sample PERCENT (percent of db to sample. default 100)\n"); 39 40 exit(-1); 40 41 } 41 42 42 43 static int readSqlFromFile(sqlite3expert *p, const char *zFile, char **pzErr){ 43 44 FILE *in = fopen(zFile, "rb"); 44 45 long nIn; 45 46 size_t nRead; ................................................................................ 93 94 char *zArg = argv[i]; 94 95 int nArg = strlen(zArg); 95 96 if( nArg>=2 && 0==sqlite3_strnicmp(zArg, "-file", nArg) ){ 96 97 if( ++i==(argc-1) ) option_requires_argument("-file"); 97 98 rc = readSqlFromFile(p, argv[i], &zErr); 98 99 } 99 100 100 - else if( nArg>=2 && 0==sqlite3_strnicmp(zArg, "-sql", nArg) ){ 101 + else if( nArg>=3 && 0==sqlite3_strnicmp(zArg, "-sql", nArg) ){ 101 102 if( ++i==(argc-1) ) option_requires_argument("-sql"); 102 103 rc = sqlite3_expert_sql(p, argv[i], &zErr); 103 104 } 105 + 106 + else if( nArg>=3 && 0==sqlite3_strnicmp(zArg, "-sample", nArg) ){ 107 + int iSample; 108 + if( ++i==(argc-1) ) option_requires_argument("-sample"); 109 + iSample = option_integer_arg(argv[i]); 110 + sqlite3_expert_config(p, EXPERT_CONFIG_SAMPLE, iSample); 111 + } 104 112 105 113 else if( nArg>=2 && 0==sqlite3_strnicmp(zArg, "-verbose", nArg) ){ 106 114 if( ++i==(argc-1) ) option_requires_argument("-verbose"); 107 115 iVerbose = option_integer_arg(argv[i]); 108 116 } 109 117 110 118 else{
Changes to ext/expert/sqlite3expert.c.
23 23 24 24 typedef struct IdxColumn IdxColumn; 25 25 typedef struct IdxConstraint IdxConstraint; 26 26 typedef struct IdxScan IdxScan; 27 27 typedef struct IdxStatement IdxStatement; 28 28 typedef struct IdxTable IdxTable; 29 29 typedef struct IdxWrite IdxWrite; 30 + 31 +#define UNIQUE_TABLE_NAME "t592690916721053953805701627921227776" 32 + 30 33 31 34 /* 32 35 ** A single constraint. Equivalent to either "col = ?" or "col < ?" (or 33 36 ** any other type of single-ended range constraint on a column). 34 37 ** 35 38 ** pLink: 36 39 ** Used to temporarily link IdxConstraint objects into lists while ................................................................................ 123 126 IdxHashEntry *aHash[IDX_HASH_SIZE]; 124 127 }; 125 128 126 129 /* 127 130 ** sqlite3expert object. 128 131 */ 129 132 struct sqlite3expert { 133 + int iSample; /* Percentage of tables to sample for stat1 */ 130 134 sqlite3 *db; /* User database */ 131 135 sqlite3 *dbm; /* In-memory db for this analysis */ 132 136 sqlite3 *dbv; /* Vtab schema for this analysis */ 133 137 IdxTable *pTable; /* List of all IdxTable objects */ 134 138 IdxScan *pScan; /* List of scan objects */ 135 139 IdxWrite *pWrite; /* List of write objects */ 136 140 IdxStatement *pStatement; /* List of IdxStatement objects */ ................................................................................ 1076 1080 } 1077 1081 1078 1082 static int idxProcessOneTrigger( 1079 1083 sqlite3expert *p, 1080 1084 IdxWrite *pWrite, 1081 1085 char **pzErr 1082 1086 ){ 1083 - static const char *zInt = "t592690916721053953805701627921227776"; 1084 - static const char *zDrop = "DROP TABLE t592690916721053953805701627921227776"; 1087 + static const char *zInt = UNIQUE_TABLE_NAME; 1088 + static const char *zDrop = "DROP TABLE " UNIQUE_TABLE_NAME; 1085 1089 IdxTable *pTab = pWrite->pTab; 1086 1090 const char *zTab = pTab->zName; 1087 1091 const char *zSql = 1088 1092 "SELECT 'CREATE TEMP' || substr(sql, 7) FROM sqlite_master " 1089 1093 "WHERE tbl_name = %Q AND type IN ('table', 'trigger') " 1090 1094 "ORDER BY type;"; 1091 1095 sqlite3_stmt *pSelect = 0; ................................................................................ 1230 1234 sqlite3_free(zOuter); 1231 1235 } 1232 1236 } 1233 1237 } 1234 1238 idxFinalize(&rc, pSchema); 1235 1239 return rc; 1236 1240 } 1241 + 1242 +struct IdxSampleCtx { 1243 + int iTarget; 1244 + double target; /* Target nRet/nRow value */ 1245 + double nRow; /* Number of rows seen */ 1246 + double nRet; /* Number of rows returned */ 1247 +}; 1248 + 1249 +static void idxSampleFunc( 1250 + sqlite3_context *pCtx, 1251 + int argc, 1252 + sqlite3_value **argv 1253 +){ 1254 + struct IdxSampleCtx *p = (struct IdxSampleCtx*)sqlite3_user_data(pCtx); 1255 + int bRet; 1256 + 1257 + assert( argc==0 ); 1258 + if( p->nRow==0.0 ){ 1259 + bRet = 1; 1260 + }else{ 1261 + bRet = (p->nRet / p->nRow) <= p->target; 1262 + if( bRet==0 ){ 1263 + unsigned short rnd; 1264 + sqlite3_randomness(2, (void*)&rnd); 1265 + bRet = ((int)rnd % 100) <= p->iTarget; 1266 + } 1267 + } 1268 + 1269 + sqlite3_result_int(pCtx, bRet); 1270 + p->nRow += 1.0; 1271 + p->nRet += (double)bRet; 1272 +} 1237 1273 1238 1274 struct IdxRemCtx { 1239 1275 int nSlot; 1240 1276 struct IdxRemSlot { 1241 1277 int eType; /* SQLITE_NULL, INTEGER, REAL, TEXT, BLOB */ 1242 1278 i64 iVal; /* SQLITE_INTEGER value */ 1243 1279 double rVal; /* SQLITE_FLOAT value */ ................................................................................ 1355 1391 char *zOrder = 0; 1356 1392 char *zQuery = 0; 1357 1393 int nCol = 0; 1358 1394 int i; 1359 1395 sqlite3_stmt *pQuery = 0; 1360 1396 int *aStat = 0; 1361 1397 int rc = SQLITE_OK; 1398 + 1399 + assert( p->iSample>0 ); 1362 1400 1363 1401 /* Formulate the query text */ 1364 1402 sqlite3_bind_text(pIndexXInfo, 1, zIdx, -1, SQLITE_STATIC); 1365 1403 while( SQLITE_OK==rc && SQLITE_ROW==sqlite3_step(pIndexXInfo) ){ 1366 1404 const char *zComma = zCols==0 ? "" : ", "; 1367 1405 const char *zName = (const char*)sqlite3_column_text(pIndexXInfo, 0); 1368 1406 const char *zColl = (const char*)sqlite3_column_text(pIndexXInfo, 1); 1369 1407 zCols = idxAppendText(&rc, zCols, 1370 1408 "%sx.%Q IS rem(%d, x.%Q) COLLATE %s", zComma, zName, nCol, zName, zColl 1371 1409 ); 1372 1410 zOrder = idxAppendText(&rc, zOrder, "%s%d", zComma, ++nCol); 1373 1411 } 1374 1412 if( rc==SQLITE_OK ){ 1375 - zQuery = sqlite3_mprintf( 1376 - "SELECT %s FROM %Q x ORDER BY %s", zCols, zTab, zOrder 1377 - ); 1413 + if( p->iSample==100 ){ 1414 + zQuery = sqlite3_mprintf( 1415 + "SELECT %s FROM %Q x ORDER BY %s", zCols, zTab, zOrder 1416 + ); 1417 + }else{ 1418 + zQuery = sqlite3_mprintf( 1419 + "SELECT %s FROM temp."UNIQUE_TABLE_NAME" x ORDER BY %s", zCols, zOrder 1420 + ); 1421 + } 1378 1422 } 1379 1423 sqlite3_free(zCols); 1380 1424 sqlite3_free(zOrder); 1381 1425 1382 1426 /* Formulate the query text */ 1383 1427 if( rc==SQLITE_OK ){ 1384 1428 rc = idxPrepareStmt(p->db, &pQuery, pzErr, zQuery); ................................................................................ 1428 1472 } 1429 1473 } 1430 1474 sqlite3_free(aStat); 1431 1475 idxFinalize(&rc, pQuery); 1432 1476 1433 1477 return rc; 1434 1478 } 1479 + 1480 +static int idxBuildSampleTable(sqlite3expert *p, const char *zTab){ 1481 + int rc; 1482 + char *zSql; 1483 + 1484 + rc = sqlite3_exec(p->db, "DROP TABLE IF EXISTS temp."UNIQUE_TABLE_NAME,0,0,0); 1485 + if( rc!=SQLITE_OK ) return rc; 1486 + 1487 + zSql = sqlite3_mprintf( 1488 + "CREATE TABLE temp." UNIQUE_TABLE_NAME 1489 + " AS SELECT * FROM %Q WHERE sample()" 1490 + , zTab 1491 + ); 1492 + if( zSql==0 ) return SQLITE_NOMEM; 1493 + rc = sqlite3_exec(p->db, zSql, 0, 0, 0); 1494 + sqlite3_free(zSql); 1495 + 1496 + return rc; 1497 +} 1435 1498 1436 1499 /* 1437 1500 ** This function is called as part of sqlite3_expert_analyze(). Candidate 1438 1501 ** indexes have already been created in database sqlite3expert.dbm, this 1439 1502 ** function populates sqlite_stat1 table in the same database. 1440 1503 ** 1441 1504 ** The stat1 data is generated by querying the 1442 1505 */ 1443 1506 static int idxPopulateStat1(sqlite3expert *p, char **pzErr){ 1444 1507 int rc = SQLITE_OK; 1445 1508 int nMax =0; 1446 1509 struct IdxRemCtx *pCtx = 0; 1510 + struct IdxSampleCtx samplectx; 1447 1511 int i; 1512 + i64 iPrev = -100000; 1448 1513 sqlite3_stmt *pAllIndex = 0; 1449 1514 sqlite3_stmt *pIndexXInfo = 0; 1450 1515 sqlite3_stmt *pWrite = 0; 1451 1516 1452 - const char *zAllIndex = 1453 - "SELECT s.name, l.name FROM " 1517 + const char *zAllIndex = 1518 + "SELECT s.rowid, s.name, l.name FROM " 1454 1519 " sqlite_master AS s, " 1455 1520 " pragma_index_list(s.name) AS l " 1456 1521 "WHERE s.type = 'table'"; 1457 1522 const char *zIndexXInfo = 1458 1523 "SELECT name, coll FROM pragma_index_xinfo(?) WHERE key"; 1459 1524 const char *zWrite = "INSERT INTO sqlite_stat1 VALUES(?, ?, ?)"; 1525 + 1526 + /* If iSample==0, no sqlite_stat1 data is required. */ 1527 + if( p->iSample==0 ) return SQLITE_OK; 1460 1528 1461 1529 rc = idxLargestIndex(p->dbm, &nMax, pzErr); 1462 1530 if( nMax<=0 || rc!=SQLITE_OK ) return rc; 1463 1531 1464 1532 rc = sqlite3_exec(p->dbm, "ANALYZE; PRAGMA writable_schema=1", 0, 0, 0); 1465 1533 1466 1534 if( rc==SQLITE_OK ){ ................................................................................ 1469 1537 } 1470 1538 1471 1539 if( rc==SQLITE_OK ){ 1472 1540 rc = sqlite3_create_function( 1473 1541 p->db, "rem", 2, SQLITE_UTF8, (void*)pCtx, idxRemFunc, 0, 0 1474 1542 ); 1475 1543 } 1544 + if( rc==SQLITE_OK ){ 1545 + rc = sqlite3_create_function( 1546 + p->db, "sample", 0, SQLITE_UTF8, (void*)&samplectx, idxSampleFunc, 0, 0 1547 + ); 1548 + } 1476 1549 1477 1550 if( rc==SQLITE_OK ){ 1478 1551 pCtx->nSlot = nMax+1; 1479 1552 rc = idxPrepareStmt(p->dbm, &pAllIndex, pzErr, zAllIndex); 1480 1553 } 1481 1554 if( rc==SQLITE_OK ){ 1482 1555 rc = idxPrepareStmt(p->dbm, &pIndexXInfo, pzErr, zIndexXInfo); 1483 1556 } 1484 1557 if( rc==SQLITE_OK ){ 1485 1558 rc = idxPrepareStmt(p->dbm, &pWrite, pzErr, zWrite); 1486 1559 } 1487 1560 1488 1561 while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pAllIndex) ){ 1489 - const char *zTab = (const char*)sqlite3_column_text(pAllIndex, 0); 1490 - const char *zIdx = (const char*)sqlite3_column_text(pAllIndex, 1); 1562 + i64 iRowid = sqlite3_column_int64(pAllIndex, 0); 1563 + const char *zTab = (const char*)sqlite3_column_text(pAllIndex, 1); 1564 + const char *zIdx = (const char*)sqlite3_column_text(pAllIndex, 2); 1565 + if( p->iSample<100 && iPrev!=iRowid ){ 1566 + samplectx.target = (double)p->iSample / 100.0; 1567 + samplectx.iTarget = p->iSample; 1568 + samplectx.nRow = 0.0; 1569 + samplectx.nRet = 0.0; 1570 + rc = idxBuildSampleTable(p, zTab); 1571 + if( rc!=SQLITE_OK ) break; 1572 + } 1491 1573 rc = idxPopulateOneStat1(p, pIndexXInfo, pWrite, zTab, zIdx, pzErr); 1574 + iPrev = iRowid; 1575 + } 1576 + if( p->iSample<100 ){ 1577 + rc = sqlite3_exec(p->db, "DROP TABLE IF EXISTS temp." UNIQUE_TABLE_NAME, 1578 + 0,0,0 1579 + ); 1492 1580 } 1493 1581 1494 1582 idxFinalize(&rc, pAllIndex); 1495 1583 idxFinalize(&rc, pIndexXInfo); 1496 1584 idxFinalize(&rc, pWrite); 1497 1585 1498 1586 for(i=0; i<pCtx->nSlot; i++){ ................................................................................ 1499 1587 sqlite3_free(pCtx->aSlot[i].z); 1500 1588 } 1501 1589 sqlite3_free(pCtx); 1502 1590 1503 1591 if( rc==SQLITE_OK ){ 1504 1592 rc = sqlite3_exec(p->dbm, "ANALYZE sqlite_master", 0, 0, 0); 1505 1593 } 1594 + 1595 + sqlite3_exec(p->db, "DROP TABLE IF EXISTS temp."UNIQUE_TABLE_NAME,0,0,0); 1506 1596 return rc; 1507 1597 } 1508 1598 1509 1599 /* 1510 1600 ** Allocate a new sqlite3expert object. 1511 1601 */ 1512 1602 sqlite3expert *sqlite3_expert_new(sqlite3 *db, char **pzErrmsg){ ................................................................................ 1519 1609 ** will contain a virtual table corresponding to each real table in 1520 1610 ** the user database schema, and a copy of each view. It is used to 1521 1611 ** collect information regarding the WHERE, ORDER BY and other clauses 1522 1612 ** of the user's query. 1523 1613 */ 1524 1614 if( rc==SQLITE_OK ){ 1525 1615 pNew->db = db; 1616 + pNew->iSample = 100; 1526 1617 rc = sqlite3_open(":memory:", &pNew->dbv); 1527 1618 } 1528 1619 if( rc==SQLITE_OK ){ 1529 1620 rc = sqlite3_open(":memory:", &pNew->dbm); 1530 1621 if( rc==SQLITE_OK ){ 1531 1622 sqlite3_db_config(pNew->dbm, SQLITE_DBCONFIG_FULL_EQP, 1, (int*)0); 1532 1623 } ................................................................................ 1560 1651 ** return the new sqlite3expert handle. */ 1561 1652 if( rc!=SQLITE_OK ){ 1562 1653 sqlite3_expert_destroy(pNew); 1563 1654 pNew = 0; 1564 1655 } 1565 1656 return pNew; 1566 1657 } 1658 + 1659 +/* 1660 +** Configure an sqlite3expert object. 1661 +*/ 1662 +int sqlite3_expert_config(sqlite3expert *p, int op, ...){ 1663 + int rc = SQLITE_OK; 1664 + va_list ap; 1665 + va_start(ap, op); 1666 + switch( op ){ 1667 + case EXPERT_CONFIG_SAMPLE: { 1668 + int iVal = va_arg(ap, int); 1669 + if( iVal<0 ) iVal = 0; 1670 + if( iVal>100 ) iVal = 100; 1671 + p->iSample = iVal; 1672 + break; 1673 + } 1674 + default: 1675 + rc = SQLITE_NOTFOUND; 1676 + break; 1677 + } 1678 + 1679 + va_end(ap); 1680 + return rc; 1681 +} 1567 1682 1568 1683 /* 1569 1684 ** Add an SQL statement to the analysis. 1570 1685 */ 1571 1686 int sqlite3_expert_sql( 1572 1687 sqlite3expert *p, /* From sqlite3_expert_new() */ 1573 1688 const char *zSql, /* SQL statement to add */
Changes to ext/expert/sqlite3expert.h.
23 23 ** to NULL. Or, if an error occurs, NULL is returned and (*pzErr) set to 24 24 ** an English-language error message. In this case it is the responsibility 25 25 ** of the caller to eventually free the error message buffer using 26 26 ** sqlite3_free(). 27 27 */ 28 28 sqlite3expert *sqlite3_expert_new(sqlite3 *db, char **pzErr); 29 29 30 +/* 31 +** Configure an sqlite3expert object. 32 +** 33 +** EXPERT_CONFIG_SAMPLE: 34 +** By default, sqlite3_expert_analyze() generates sqlite_stat1 data for 35 +** each candidate index. This involves scanning and sorting the entire 36 +** contents of each user database table once for each candidate index 37 +** associated with the table. For large databases, this can be 38 +** prohibitively slow. This option allows the sqlite3expert object to 39 +** be configured so that sqlite_stat1 data is instead generated based on a 40 +** subset of each table, or so that no sqlite_stat1 data is used at all. 41 +** 42 +** A single integer argument is passed to this option. If the value is less 43 +** than or equal to zero, then no sqlite_stat1 data is generated or used by 44 +** the analysis - indexes are recommended based on the database schema only. 45 +** Or, if the value is 100 or greater, complete sqlite_stat1 data is 46 +** generated for each candidate index (this is the default). Finally, if the 47 +** value falls between 0 and 100, then it represents the percentage of user 48 +** table rows that should be considered when generating sqlite_stat1 data. 49 +** 50 +** Examples: 51 +** 52 +** // Do not generate any sqlite_stat1 data 53 +** sqlite3_expert_config(pExpert, EXPERT_CONFIG_SAMPLE, 0); 54 +** 55 +** // Generate sqlite_stat1 data based on 10% of the rows in each table. 56 +** sqlite3_expert_config(pExpert, EXPERT_CONFIG_SAMPLE, 10); 57 +*/ 58 +int sqlite3_expert_config(sqlite3expert *p, int op, ...); 59 + 60 +#define EXPERT_CONFIG_SAMPLE 1 /* int */ 61 + 30 62 /* 31 63 ** Specify zero or more SQL statements to be included in the analysis. 32 64 ** 33 65 ** Buffer zSql must contain zero or more complete SQL statements. This 34 66 ** function parses all statements contained in the buffer and adds them 35 67 ** to the internal list of statements to analyze. If successful, SQLITE_OK 36 68 ** is returned and (*pzErr) set to NULL. Or, if an error occurs - for example ................................................................................ 49 81 ** immediately and no statements are added to the analysis. 50 82 */ 51 83 int sqlite3_expert_sql( 52 84 sqlite3expert *p, /* From a successful sqlite3_expert_new() */ 53 85 const char *zSql, /* SQL statement(s) to add */ 54 86 char **pzErr /* OUT: Error message (if any) */ 55 87 ); 88 + 56 89 57 90 /* 58 91 ** This function is called after the sqlite3expert object has been configured 59 92 ** with all SQL statements using sqlite3_expert_sql() to actually perform 60 93 ** the analysis. Once this function has been called, it is not possible to 61 94 ** add further SQL statements to the analysis. 62 95 **