/ Check-in [c69c3e21]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add an option to generate stat1 data based on a subset of the user database table contents to sqlite3_expert.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | schemalint
Files: files | file ages | folders
SHA3-256: c69c3e21db6e141f7e24226c6432f2ed31fe5f177bd23781915871f8600ee56a
User & Date: dan 2017-04-20 09:54:04
Context
2017-04-20
16:18
Avoid adding INTEGER PRIMARY KEY columns to candidate indexes. check-in: 4577fea5 user: dan tags: schemalint
16:08
Avoid creating a temp table in the user database in the sqlite3_expert code. Trouble is, this makes sampling for stat1 data much slower. check-in: c62e3582 user: dan tags: schemalint-failure
09:54
Add an option to generate stat1 data based on a subset of the user database table contents to sqlite3_expert. check-in: c69c3e21 user: dan tags: schemalint
2017-04-18
20:10
Have sqlite3_expert_analyze() populate the sqlite_stat1 table before running queries through the planner for the second time. check-in: a157fcfd user: dan tags: schemalint
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/expert/expert.c.

    32     32     fprintf(stderr, "\n");
    33     33     fprintf(stderr, "Usage %s ?OPTIONS? DATABASE\n", argv[0]);
    34     34     fprintf(stderr, "\n");
    35     35     fprintf(stderr, "Options are:\n");
    36     36     fprintf(stderr, "  -sql SQL   (analyze SQL statements passed as argument)\n");
    37     37     fprintf(stderr, "  -file FILE (read SQL statements from file FILE)\n");
    38     38     fprintf(stderr, "  -verbose LEVEL (integer verbosity level. default 1)\n");
           39  +  fprintf(stderr, "  -sample PERCENT (percent of db to sample. default 100)\n");
    39     40     exit(-1);
    40     41   }
    41     42   
    42     43   static int readSqlFromFile(sqlite3expert *p, const char *zFile, char **pzErr){
    43     44     FILE *in = fopen(zFile, "rb");
    44     45     long nIn;
    45     46     size_t nRead;
................................................................................
    93     94         char *zArg = argv[i];
    94     95         int nArg = strlen(zArg);
    95     96         if( nArg>=2 && 0==sqlite3_strnicmp(zArg, "-file", nArg) ){
    96     97           if( ++i==(argc-1) ) option_requires_argument("-file");
    97     98           rc = readSqlFromFile(p, argv[i], &zErr);
    98     99         }
    99    100   
   100         -      else if( nArg>=2 && 0==sqlite3_strnicmp(zArg, "-sql", nArg) ){
          101  +      else if( nArg>=3 && 0==sqlite3_strnicmp(zArg, "-sql", nArg) ){
   101    102           if( ++i==(argc-1) ) option_requires_argument("-sql");
   102    103           rc = sqlite3_expert_sql(p, argv[i], &zErr);
   103    104         }
          105  +
          106  +      else if( nArg>=3 && 0==sqlite3_strnicmp(zArg, "-sample", nArg) ){
          107  +        int iSample;
          108  +        if( ++i==(argc-1) ) option_requires_argument("-sample");
          109  +        iSample = option_integer_arg(argv[i]);
          110  +        sqlite3_expert_config(p, EXPERT_CONFIG_SAMPLE, iSample);
          111  +      }
   104    112   
   105    113         else if( nArg>=2 && 0==sqlite3_strnicmp(zArg, "-verbose", nArg) ){
   106    114           if( ++i==(argc-1) ) option_requires_argument("-verbose");
   107    115           iVerbose = option_integer_arg(argv[i]);
   108    116         }
   109    117   
   110    118         else{

Changes to ext/expert/sqlite3expert.c.

    23     23   
    24     24   typedef struct IdxColumn IdxColumn;
    25     25   typedef struct IdxConstraint IdxConstraint;
    26     26   typedef struct IdxScan IdxScan;
    27     27   typedef struct IdxStatement IdxStatement;
    28     28   typedef struct IdxTable IdxTable;
    29     29   typedef struct IdxWrite IdxWrite;
           30  +
           31  +#define UNIQUE_TABLE_NAME "t592690916721053953805701627921227776"
           32  +
    30     33   
    31     34   /*
    32     35   ** A single constraint. Equivalent to either "col = ?" or "col < ?" (or
    33     36   ** any other type of single-ended range constraint on a column).
    34     37   **
    35     38   ** pLink:
    36     39   **   Used to temporarily link IdxConstraint objects into lists while
................................................................................
   123    126     IdxHashEntry *aHash[IDX_HASH_SIZE];
   124    127   };
   125    128   
   126    129   /*
   127    130   ** sqlite3expert object.
   128    131   */
   129    132   struct sqlite3expert {
          133  +  int iSample;                    /* Percentage of tables to sample for stat1 */
   130    134     sqlite3 *db;                    /* User database */
   131    135     sqlite3 *dbm;                   /* In-memory db for this analysis */
   132    136     sqlite3 *dbv;                   /* Vtab schema for this analysis */
   133    137     IdxTable *pTable;               /* List of all IdxTable objects */
   134    138     IdxScan *pScan;                 /* List of scan objects */
   135    139     IdxWrite *pWrite;               /* List of write objects */
   136    140     IdxStatement *pStatement;       /* List of IdxStatement objects */
................................................................................
  1076   1080   }
  1077   1081   
  1078   1082   static int idxProcessOneTrigger(
  1079   1083     sqlite3expert *p, 
  1080   1084     IdxWrite *pWrite, 
  1081   1085     char **pzErr
  1082   1086   ){
  1083         -  static const char *zInt = "t592690916721053953805701627921227776";
  1084         -  static const char *zDrop = "DROP TABLE t592690916721053953805701627921227776";
         1087  +  static const char *zInt = UNIQUE_TABLE_NAME;
         1088  +  static const char *zDrop = "DROP TABLE " UNIQUE_TABLE_NAME;
  1085   1089     IdxTable *pTab = pWrite->pTab;
  1086   1090     const char *zTab = pTab->zName;
  1087   1091     const char *zSql = 
  1088   1092       "SELECT 'CREATE TEMP' || substr(sql, 7) FROM sqlite_master "
  1089   1093       "WHERE tbl_name = %Q AND type IN ('table', 'trigger') "
  1090   1094       "ORDER BY type;";
  1091   1095     sqlite3_stmt *pSelect = 0;
................................................................................
  1230   1234           sqlite3_free(zOuter);
  1231   1235         }
  1232   1236       }
  1233   1237     }
  1234   1238     idxFinalize(&rc, pSchema);
  1235   1239     return rc;
  1236   1240   }
         1241  +
         1242  +struct IdxSampleCtx {
         1243  +  int iTarget;
         1244  +  double target;                  /* Target nRet/nRow value */
         1245  +  double nRow;                    /* Number of rows seen */
         1246  +  double nRet;                    /* Number of rows returned */
         1247  +};
         1248  +
         1249  +static void idxSampleFunc(
         1250  +  sqlite3_context *pCtx,
         1251  +  int argc,
         1252  +  sqlite3_value **argv
         1253  +){
         1254  +  struct IdxSampleCtx *p = (struct IdxSampleCtx*)sqlite3_user_data(pCtx);
         1255  +  int bRet;
         1256  +
         1257  +  assert( argc==0 );
         1258  +  if( p->nRow==0.0 ){
         1259  +    bRet = 1;
         1260  +  }else{
         1261  +    bRet = (p->nRet / p->nRow) <= p->target;
         1262  +    if( bRet==0 ){
         1263  +      unsigned short rnd;
         1264  +      sqlite3_randomness(2, (void*)&rnd);
         1265  +      bRet = ((int)rnd % 100) <= p->iTarget;
         1266  +    }
         1267  +  }
         1268  +
         1269  +  sqlite3_result_int(pCtx, bRet);
         1270  +  p->nRow += 1.0;
         1271  +  p->nRet += (double)bRet;
         1272  +}
  1237   1273   
  1238   1274   struct IdxRemCtx {
  1239   1275     int nSlot;
  1240   1276     struct IdxRemSlot {
  1241   1277       int eType;                    /* SQLITE_NULL, INTEGER, REAL, TEXT, BLOB */
  1242   1278       i64 iVal;                     /* SQLITE_INTEGER value */
  1243   1279       double rVal;                  /* SQLITE_FLOAT value */
................................................................................
  1355   1391     char *zOrder = 0;
  1356   1392     char *zQuery = 0;
  1357   1393     int nCol = 0;
  1358   1394     int i;
  1359   1395     sqlite3_stmt *pQuery = 0;
  1360   1396     int *aStat = 0;
  1361   1397     int rc = SQLITE_OK;
         1398  +
         1399  +  assert( p->iSample>0 );
  1362   1400   
  1363   1401     /* Formulate the query text */
  1364   1402     sqlite3_bind_text(pIndexXInfo, 1, zIdx, -1, SQLITE_STATIC);
  1365   1403     while( SQLITE_OK==rc && SQLITE_ROW==sqlite3_step(pIndexXInfo) ){
  1366   1404       const char *zComma = zCols==0 ? "" : ", ";
  1367   1405       const char *zName = (const char*)sqlite3_column_text(pIndexXInfo, 0);
  1368   1406       const char *zColl = (const char*)sqlite3_column_text(pIndexXInfo, 1);
  1369   1407       zCols = idxAppendText(&rc, zCols, 
  1370   1408           "%sx.%Q IS rem(%d, x.%Q) COLLATE %s", zComma, zName, nCol, zName, zColl
  1371   1409       );
  1372   1410       zOrder = idxAppendText(&rc, zOrder, "%s%d", zComma, ++nCol);
  1373   1411     }
  1374   1412     if( rc==SQLITE_OK ){
  1375         -    zQuery = sqlite3_mprintf(
  1376         -        "SELECT %s FROM %Q x ORDER BY %s", zCols, zTab, zOrder
  1377         -    );
         1413  +    if( p->iSample==100 ){
         1414  +      zQuery = sqlite3_mprintf(
         1415  +          "SELECT %s FROM %Q x ORDER BY %s", zCols, zTab, zOrder
         1416  +      );
         1417  +    }else{
         1418  +      zQuery = sqlite3_mprintf(
         1419  +          "SELECT %s FROM temp."UNIQUE_TABLE_NAME" x ORDER BY %s", zCols, zOrder
         1420  +      );
         1421  +    }
  1378   1422     }
  1379   1423     sqlite3_free(zCols);
  1380   1424     sqlite3_free(zOrder);
  1381   1425   
  1382   1426     /* Formulate the query text */
  1383   1427     if( rc==SQLITE_OK ){
  1384   1428       rc = idxPrepareStmt(p->db, &pQuery, pzErr, zQuery);
................................................................................
  1428   1472       }
  1429   1473     }
  1430   1474     sqlite3_free(aStat);
  1431   1475     idxFinalize(&rc, pQuery);
  1432   1476   
  1433   1477     return rc;
  1434   1478   }
         1479  +
         1480  +static int idxBuildSampleTable(sqlite3expert *p, const char *zTab){
         1481  +  int rc;
         1482  +  char *zSql;
         1483  +
         1484  +  rc = sqlite3_exec(p->db, "DROP TABLE IF EXISTS temp."UNIQUE_TABLE_NAME,0,0,0);
         1485  +  if( rc!=SQLITE_OK ) return rc;
         1486  +
         1487  +  zSql = sqlite3_mprintf(
         1488  +      "CREATE TABLE temp." UNIQUE_TABLE_NAME 
         1489  +      "  AS SELECT * FROM %Q WHERE sample()"
         1490  +      , zTab
         1491  +  );
         1492  +  if( zSql==0 ) return SQLITE_NOMEM;
         1493  +  rc = sqlite3_exec(p->db, zSql, 0, 0, 0);
         1494  +  sqlite3_free(zSql);
         1495  +
         1496  +  return rc;
         1497  +}
  1435   1498   
  1436   1499   /*
  1437   1500   ** This function is called as part of sqlite3_expert_analyze(). Candidate
  1438   1501   ** indexes have already been created in database sqlite3expert.dbm, this
  1439   1502   ** function populates sqlite_stat1 table in the same database.
  1440   1503   **
  1441   1504   ** The stat1 data is generated by querying the 
  1442   1505   */
  1443   1506   static int idxPopulateStat1(sqlite3expert *p, char **pzErr){
  1444   1507     int rc = SQLITE_OK;
  1445   1508     int nMax =0;
  1446   1509     struct IdxRemCtx *pCtx = 0;
         1510  +  struct IdxSampleCtx samplectx; 
  1447   1511     int i;
         1512  +  i64 iPrev = -100000;
  1448   1513     sqlite3_stmt *pAllIndex = 0;
  1449   1514     sqlite3_stmt *pIndexXInfo = 0;
  1450   1515     sqlite3_stmt *pWrite = 0;
  1451   1516   
  1452         -  const char *zAllIndex = 
  1453         -    "SELECT s.name, l.name FROM "
         1517  +  const char *zAllIndex =
         1518  +    "SELECT s.rowid, s.name, l.name FROM "
  1454   1519       "  sqlite_master AS s, "
  1455   1520       "  pragma_index_list(s.name) AS l "
  1456   1521       "WHERE s.type = 'table'";
  1457   1522     const char *zIndexXInfo = 
  1458   1523       "SELECT name, coll FROM pragma_index_xinfo(?) WHERE key";
  1459   1524     const char *zWrite = "INSERT INTO sqlite_stat1 VALUES(?, ?, ?)";
         1525  +
         1526  +  /* If iSample==0, no sqlite_stat1 data is required. */
         1527  +  if( p->iSample==0 ) return SQLITE_OK;
  1460   1528   
  1461   1529     rc = idxLargestIndex(p->dbm, &nMax, pzErr);
  1462   1530     if( nMax<=0 || rc!=SQLITE_OK ) return rc;
  1463   1531   
  1464   1532     rc = sqlite3_exec(p->dbm, "ANALYZE; PRAGMA writable_schema=1", 0, 0, 0);
  1465   1533   
  1466   1534     if( rc==SQLITE_OK ){
................................................................................
  1469   1537     }
  1470   1538   
  1471   1539     if( rc==SQLITE_OK ){
  1472   1540       rc = sqlite3_create_function(
  1473   1541           p->db, "rem", 2, SQLITE_UTF8, (void*)pCtx, idxRemFunc, 0, 0
  1474   1542       );
  1475   1543     }
         1544  +  if( rc==SQLITE_OK ){
         1545  +    rc = sqlite3_create_function(
         1546  +        p->db, "sample", 0, SQLITE_UTF8, (void*)&samplectx, idxSampleFunc, 0, 0
         1547  +    );
         1548  +  }
  1476   1549   
  1477   1550     if( rc==SQLITE_OK ){
  1478   1551       pCtx->nSlot = nMax+1;
  1479   1552       rc = idxPrepareStmt(p->dbm, &pAllIndex, pzErr, zAllIndex);
  1480   1553     }
  1481   1554     if( rc==SQLITE_OK ){
  1482   1555       rc = idxPrepareStmt(p->dbm, &pIndexXInfo, pzErr, zIndexXInfo);
  1483   1556     }
  1484   1557     if( rc==SQLITE_OK ){
  1485   1558       rc = idxPrepareStmt(p->dbm, &pWrite, pzErr, zWrite);
  1486   1559     }
  1487   1560   
  1488   1561     while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pAllIndex) ){
  1489         -    const char *zTab = (const char*)sqlite3_column_text(pAllIndex, 0);
  1490         -    const char *zIdx = (const char*)sqlite3_column_text(pAllIndex, 1);
         1562  +    i64 iRowid = sqlite3_column_int64(pAllIndex, 0);
         1563  +    const char *zTab = (const char*)sqlite3_column_text(pAllIndex, 1);
         1564  +    const char *zIdx = (const char*)sqlite3_column_text(pAllIndex, 2);
         1565  +    if( p->iSample<100 && iPrev!=iRowid ){
         1566  +      samplectx.target = (double)p->iSample / 100.0;
         1567  +      samplectx.iTarget = p->iSample;
         1568  +      samplectx.nRow = 0.0;
         1569  +      samplectx.nRet = 0.0;
         1570  +      rc = idxBuildSampleTable(p, zTab);
         1571  +      if( rc!=SQLITE_OK ) break;
         1572  +    }
  1491   1573       rc = idxPopulateOneStat1(p, pIndexXInfo, pWrite, zTab, zIdx, pzErr);
         1574  +    iPrev = iRowid;
         1575  +  }
         1576  +  if( p->iSample<100 ){
         1577  +    rc = sqlite3_exec(p->db, "DROP TABLE IF EXISTS temp." UNIQUE_TABLE_NAME,
         1578  +        0,0,0
         1579  +    );
  1492   1580     }
  1493   1581   
  1494   1582     idxFinalize(&rc, pAllIndex);
  1495   1583     idxFinalize(&rc, pIndexXInfo);
  1496   1584     idxFinalize(&rc, pWrite);
  1497   1585   
  1498   1586     for(i=0; i<pCtx->nSlot; i++){
................................................................................
  1499   1587       sqlite3_free(pCtx->aSlot[i].z);
  1500   1588     }
  1501   1589     sqlite3_free(pCtx);
  1502   1590   
  1503   1591     if( rc==SQLITE_OK ){
  1504   1592       rc = sqlite3_exec(p->dbm, "ANALYZE sqlite_master", 0, 0, 0);
  1505   1593     }
         1594  +
         1595  +  sqlite3_exec(p->db, "DROP TABLE IF EXISTS temp."UNIQUE_TABLE_NAME,0,0,0);
  1506   1596     return rc;
  1507   1597   }
  1508   1598   
  1509   1599   /*
  1510   1600   ** Allocate a new sqlite3expert object.
  1511   1601   */
  1512   1602   sqlite3expert *sqlite3_expert_new(sqlite3 *db, char **pzErrmsg){
................................................................................
  1519   1609     ** will contain a virtual table corresponding to each real table in
  1520   1610     ** the user database schema, and a copy of each view. It is used to
  1521   1611     ** collect information regarding the WHERE, ORDER BY and other clauses
  1522   1612     ** of the user's query.
  1523   1613     */
  1524   1614     if( rc==SQLITE_OK ){
  1525   1615       pNew->db = db;
         1616  +    pNew->iSample = 100;
  1526   1617       rc = sqlite3_open(":memory:", &pNew->dbv);
  1527   1618     }
  1528   1619     if( rc==SQLITE_OK ){
  1529   1620       rc = sqlite3_open(":memory:", &pNew->dbm);
  1530   1621       if( rc==SQLITE_OK ){
  1531   1622         sqlite3_db_config(pNew->dbm, SQLITE_DBCONFIG_FULL_EQP, 1, (int*)0);
  1532   1623       }
................................................................................
  1560   1651     ** return the new sqlite3expert handle.  */
  1561   1652     if( rc!=SQLITE_OK ){
  1562   1653       sqlite3_expert_destroy(pNew);
  1563   1654       pNew = 0;
  1564   1655     }
  1565   1656     return pNew;
  1566   1657   }
         1658  +
         1659  +/*
         1660  +** Configure an sqlite3expert object.
         1661  +*/
         1662  +int sqlite3_expert_config(sqlite3expert *p, int op, ...){
         1663  +  int rc = SQLITE_OK;
         1664  +  va_list ap;
         1665  +  va_start(ap, op);
         1666  +  switch( op ){
         1667  +    case EXPERT_CONFIG_SAMPLE: {
         1668  +      int iVal = va_arg(ap, int);
         1669  +      if( iVal<0 ) iVal = 0;
         1670  +      if( iVal>100 ) iVal = 100;
         1671  +      p->iSample = iVal;
         1672  +      break;
         1673  +    }
         1674  +    default:
         1675  +      rc = SQLITE_NOTFOUND;
         1676  +      break;
         1677  +  }
         1678  +
         1679  +  va_end(ap);
         1680  +  return rc;
         1681  +}
  1567   1682   
  1568   1683   /*
  1569   1684   ** Add an SQL statement to the analysis.
  1570   1685   */
  1571   1686   int sqlite3_expert_sql(
  1572   1687     sqlite3expert *p,               /* From sqlite3_expert_new() */
  1573   1688     const char *zSql,               /* SQL statement to add */

Changes to ext/expert/sqlite3expert.h.

    23     23   ** to NULL. Or, if an error occurs, NULL is returned and (*pzErr) set to
    24     24   ** an English-language error message. In this case it is the responsibility
    25     25   ** of the caller to eventually free the error message buffer using
    26     26   ** sqlite3_free().
    27     27   */
    28     28   sqlite3expert *sqlite3_expert_new(sqlite3 *db, char **pzErr);
    29     29   
           30  +/*
           31  +** Configure an sqlite3expert object.
           32  +**
           33  +** EXPERT_CONFIG_SAMPLE:
           34  +**   By default, sqlite3_expert_analyze() generates sqlite_stat1 data for
           35  +**   each candidate index. This involves scanning and sorting the entire
           36  +**   contents of each user database table once for each candidate index
           37  +**   associated with the table. For large databases, this can be 
           38  +**   prohibitively slow. This option allows the sqlite3expert object to
           39  +**   be configured so that sqlite_stat1 data is instead generated based on a
           40  +**   subset of each table, or so that no sqlite_stat1 data is used at all.
           41  +**
           42  +**   A single integer argument is passed to this option. If the value is less
           43  +**   than or equal to zero, then no sqlite_stat1 data is generated or used by
           44  +**   the analysis - indexes are recommended based on the database schema only.
           45  +**   Or, if the value is 100 or greater, complete sqlite_stat1 data is
           46  +**   generated for each candidate index (this is the default). Finally, if the
           47  +**   value falls between 0 and 100, then it represents the percentage of user
           48  +**   table rows that should be considered when generating sqlite_stat1 data.
           49  +**
           50  +**   Examples:
           51  +**
           52  +**     // Do not generate any sqlite_stat1 data
           53  +**     sqlite3_expert_config(pExpert, EXPERT_CONFIG_SAMPLE, 0);
           54  +**
           55  +**     // Generate sqlite_stat1 data based on 10% of the rows in each table.
           56  +**     sqlite3_expert_config(pExpert, EXPERT_CONFIG_SAMPLE, 10);
           57  +*/
           58  +int sqlite3_expert_config(sqlite3expert *p, int op, ...);
           59  +
           60  +#define EXPERT_CONFIG_SAMPLE 1    /* int */
           61  +
    30     62   /*
    31     63   ** Specify zero or more SQL statements to be included in the analysis.
    32     64   **
    33     65   ** Buffer zSql must contain zero or more complete SQL statements. This
    34     66   ** function parses all statements contained in the buffer and adds them
    35     67   ** to the internal list of statements to analyze. If successful, SQLITE_OK
    36     68   ** is returned and (*pzErr) set to NULL. Or, if an error occurs - for example
................................................................................
    49     81   ** immediately and no statements are added to the analysis.
    50     82   */
    51     83   int sqlite3_expert_sql(
    52     84     sqlite3expert *p,               /* From a successful sqlite3_expert_new() */
    53     85     const char *zSql,               /* SQL statement(s) to add */
    54     86     char **pzErr                    /* OUT: Error message (if any) */
    55     87   );
           88  +
    56     89   
    57     90   /*
    58     91   ** This function is called after the sqlite3expert object has been configured
    59     92   ** with all SQL statements using sqlite3_expert_sql() to actually perform
    60     93   ** the analysis. Once this function has been called, it is not possible to
    61     94   ** add further SQL statements to the analysis.
    62     95   **