/ Check-in [7b83581a]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:First attempt at a utility program to compute sqlite_stat1 without doing a full table scan.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | est_count_pragma
Files: files | file ages | folders
SHA1: 7b83581a43384fe81dc319482e03be0df45ab25d
User & Date: drh 2016-10-25 13:57:40
Context
2016-10-25
17:28
Merge recent trunk changes, and especially the PRAGMA index_info enhancement which is needed on this branch. check-in: c3570e46 user: drh tags: est_count_pragma
13:57
First attempt at a utility program to compute sqlite_stat1 without doing a full table scan. check-in: 7b83581a user: drh tags: est_count_pragma
2016-10-21
18:01
When reading from an index, the shared-cache lock must be on the corresponding table. check-in: 04fe12b5 user: drh tags: est_count_pragma
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to main.mk.

   492    492   sqldiff$(EXE):	$(TOP)/tool/sqldiff.c sqlite3.c sqlite3.h
   493    493   	$(TCCX) -o sqldiff$(EXE) -DSQLITE_THREADSAFE=0 \
   494    494   		$(TOP)/tool/sqldiff.c sqlite3.c $(TLIBS) $(THREADLIB)
   495    495   
   496    496   dbhash$(EXE):	$(TOP)/tool/dbhash.c sqlite3.c sqlite3.h
   497    497   	$(TCCX) -o dbhash$(EXE) -DSQLITE_THREADSAFE=0 \
   498    498   		$(TOP)/tool/dbhash.c sqlite3.c $(TLIBS) $(THREADLIB)
          499  +
          500  +faststat1$(EXE):	$(TOP)/tool/faststat1.c sqlite3.c sqlite3.h
          501  +	$(TCCX) -o faststat1$(EXE) -DSQLITE_THREADSAFE=0 \
          502  +		$(TOP)/tool/faststat1.c sqlite3.c $(TLIBS) $(THREADLIB)
   499    503   
   500    504   scrub$(EXE):	$(TOP)/ext/misc/scrub.c sqlite3.o
   501    505   	$(TCC) -I. -DSCRUB_STANDALONE -o scrub$(EXE) $(TOP)/ext/misc/scrub.c sqlite3.o $(THREADLIB)
   502    506   
   503    507   srcck1$(EXE):	$(TOP)/tool/srcck1.c
   504    508   	$(BCC) -o srcck1$(EXE) $(TOP)/tool/srcck1.c
   505    509   

Added tool/faststat1.c.

            1  +/*
            2  +** 2016-10-24
            3  +**
            4  +** The author disclaims copyright to this source code.  In place of
            5  +** a legal notice, here is a blessing:
            6  +**
            7  +**    May you do good and not evil.
            8  +**    May you find forgiveness for yourself and forgive others.
            9  +**    May you share freely, never taking more than you give.
           10  +**
           11  +*************************************************************************
           12  +**
           13  +** This is a utility program that uses the est_count and btree_sample
           14  +** pragmas to try to approximate the content of the sqlite_stat1 table
           15  +** without doing a full table scan.
           16  +**
           17  +** To compile, simply link against SQLite.
           18  +**
           19  +** See the showHelp() routine below for a brief description of how to
           20  +** run the utility.
           21  +*/
           22  +#include <stdio.h>
           23  +#include <stdlib.h>
           24  +#include <stdarg.h>
           25  +#include <ctype.h>
           26  +#include <string.h>
           27  +#include <assert.h>
           28  +#include "sqlite3.h"
           29  +
           30  +/*
           31  +** All global variables are gathered into the "g" singleton.
           32  +*/
           33  +struct GlobalVars {
           34  +  const char *zArgv0;       /* Name of program */
           35  +  unsigned fDebug;          /* Debug flags */
           36  +  sqlite3 *db;              /* The database connection */
           37  +} g;
           38  +
           39  +/*
           40  +** Allowed values for g.fDebug
           41  +*/
           42  +#define DEBUG_NONE          0
           43  +
           44  +  
           45  +/*
           46  +** Print an error resulting from faulting command-line arguments and
           47  +** abort the program.
           48  +*/
           49  +static void cmdlineError(const char *zFormat, ...){
           50  +  va_list ap;
           51  +  fprintf(stderr, "%s: ", g.zArgv0);
           52  +  va_start(ap, zFormat);
           53  +  vfprintf(stderr, zFormat, ap);
           54  +  va_end(ap);
           55  +  fprintf(stderr, "\n\"%s --help\" for more help\n", g.zArgv0);
           56  +  exit(1);
           57  +}
           58  +
           59  +/*
           60  +** Print an error message for an error that occurs at runtime, then
           61  +** abort the program.
           62  +*/
           63  +static void runtimeError(const char *zFormat, ...){
           64  +  va_list ap;
           65  +  fprintf(stderr, "%s: ", g.zArgv0);
           66  +  va_start(ap, zFormat);
           67  +  vfprintf(stderr, zFormat, ap);
           68  +  va_end(ap);
           69  +  fprintf(stderr, "\n");
           70  +  exit(1);
           71  +}
           72  +
           73  +/*
           74  +** Prepare a new SQL statement.  Print an error and abort if anything
           75  +** goes wrong.
           76  +*/
           77  +static sqlite3_stmt *db_vprepare(const char *zFormat, va_list ap){
           78  +  char *zSql;
           79  +  int rc;
           80  +  sqlite3_stmt *pStmt;
           81  +
           82  +  zSql = sqlite3_vmprintf(zFormat, ap);
           83  +  if( zSql==0 ) runtimeError("out of memory");
           84  +  rc = sqlite3_prepare_v2(g.db, zSql, -1, &pStmt, 0);
           85  +  if( rc ){
           86  +    runtimeError("SQL statement error: %s\n\"%s\"", sqlite3_errmsg(g.db),
           87  +                 zSql);
           88  +  }
           89  +  sqlite3_free(zSql);
           90  +  return pStmt;
           91  +}
           92  +static sqlite3_stmt *db_prepare(const char *zFormat, ...){
           93  +  va_list ap;
           94  +  sqlite3_stmt *pStmt;
           95  +  va_start(ap, zFormat);
           96  +  pStmt = db_vprepare(zFormat, ap);
           97  +  va_end(ap);
           98  +  return pStmt;
           99  +}
          100  +
          101  +/*
          102  +** Estimate the number of rows in the given table or index.
          103  +*/
          104  +static sqlite3_int64 estEntryCount(const char *zTabIdx){
          105  +  double sum = 0.0;
          106  +  int i;
          107  +  int n = 0;
          108  +  sqlite3_stmt *pStmt;
          109  +# define N_CNT_SAMPLE 10
          110  +  for(i=0; i<=N_CNT_SAMPLE; i++){
          111  +    pStmt = db_prepare("PRAGMA est_count(\"%w\",%g)", 
          112  +                       zTabIdx, ((double)i)/(double)(N_CNT_SAMPLE));
          113  +    if( sqlite3_step(pStmt)==SQLITE_ROW ){
          114  +      sum += sqlite3_column_double(pStmt, 0);
          115  +      n++;
          116  +    }
          117  +    sqlite3_finalize(pStmt);
          118  +  }
          119  +  return n==0 ? 0 : (sqlite3_int64)(sum/n);
          120  +}
          121  +
          122  +/*
          123  +** Stat1 for a table.
          124  +*/
          125  +static void analyzeTable(const char *zTab){
          126  +  sqlite3_int64 n = estEntryCount(zTab);
          127  +  sqlite3_stmt *pStmt;
          128  +  if( n==0 ){
          129  +    printf("-- empty table: %s\n", zTab);
          130  +    return;
          131  +  }
          132  +  pStmt = db_prepare(
          133  +     "INSERT INTO temp.est_stat1(tbl,idx,stat)"
          134  +     "VALUES(\"%w\",NULL,'%lld')", zTab, n
          135  +  );
          136  +  sqlite3_step(pStmt);
          137  +  sqlite3_finalize(pStmt);
          138  +}
          139  +
          140  +/*
          141  +** Compare the i-th column of pStmt against pValue.  Return true if they
          142  +** are different.
          143  +*/
          144  +static int columnNotEqual(sqlite3_stmt *pStmt, int i, sqlite3_value *pValue){
          145  +  int n1, n2, n;
          146  +  if( sqlite3_column_type(pStmt,i)!=sqlite3_value_type(pValue) ) return 1;
          147  +  switch( sqlite3_column_type(pStmt,i) ){
          148  +    case SQLITE_NULL:
          149  +      return 0;  /* Nulls compare equal to one another in this context */
          150  +
          151  +    case SQLITE_INTEGER:
          152  +      return sqlite3_column_int64(pStmt,i)!=sqlite3_value_int64(pValue);
          153  +
          154  +    case SQLITE_FLOAT:
          155  +      return sqlite3_column_double(pStmt,i)!=sqlite3_value_double(pValue);
          156  +
          157  +    case SQLITE_BLOB:
          158  +      n1 = sqlite3_column_bytes(pStmt,i);
          159  +      n2 = sqlite3_value_bytes(pValue);
          160  +      n = n1<n2 ? n1 : n2;
          161  +      if( memcmp(sqlite3_column_blob(pStmt,i), sqlite3_value_blob(pValue),n) ){
          162  +        return 1;
          163  +      }
          164  +      return n1!=n2;
          165  +
          166  +    case SQLITE_TEXT:
          167  +      n1 = sqlite3_column_bytes(pStmt,i);
          168  +      n2 = sqlite3_value_bytes(pValue);
          169  +      n = n1<n2 ? n1 : n2;
          170  +      if( memcmp(sqlite3_column_text(pStmt,i), sqlite3_value_text(pValue),n) ){
          171  +        return 1;
          172  +      }
          173  +      return n1!=n2;
          174  + 
          175  +  }
          176  +  return 1;
          177  +}
          178  +
          179  +/*
          180  +** Stat1 for an index;
          181  +*/
          182  +static void analyzeIndex(const char *zTab, const char *zIdx){
          183  +  sqlite3_int64 n = estEntryCount(zIdx);
          184  +  sqlite3_stmt *pStmt;
          185  +  sqlite3_uint64 *aCnt;
          186  +  sqlite3_value **apValue;
          187  +  int nCol = 0;
          188  +  int nByte;
          189  +  int i, j, k;
          190  +  int iLimit;
          191  +  int nRow = 0;
          192  +  char *zRes;
          193  +  int szRes;
          194  +  int rc;
          195  +
          196  +# define N_SPAN  5
          197  +  if( n==0 ) return;
          198  +  pStmt = db_prepare("PRAGMA index_xinfo=\"%w\"", zIdx);
          199  +  while( sqlite3_step(pStmt)==SQLITE_ROW ){
          200  +    const char *zColl = (const char*)sqlite3_column_text(pStmt,4);
          201  +    if( sqlite3_stricmp(zColl,"binary")!=0 ){
          202  +      printf("-- cannot analyze index \"%s\" because column \"%s\" uses"
          203  +             " collating sequence \"%s\".\n",
          204  +             zIdx, sqlite3_column_text(pStmt, 2), zColl);
          205  +      sqlite3_finalize(pStmt);
          206  +      return;
          207  +    }
          208  +    if( sqlite3_column_int(pStmt, 5)==0 ) break;
          209  +    nCol++;
          210  +  }
          211  +  sqlite3_finalize(pStmt);
          212  +  nByte = (sizeof(aCnt[0]) + sizeof(apValue[0]))*nCol + 30*(nCol+1);
          213  +  aCnt = sqlite3_malloc( nByte );
          214  +  if( aCnt==0 ){
          215  +    runtimeError("out of memory");
          216  +  }
          217  +  memset(aCnt, 0, nByte);
          218  +  apValue = (sqlite3_value**)&aCnt[nCol];
          219  +  zRes = (char*)&apValue[nCol];
          220  +  szRes = 30*(nCol+1);
          221  +
          222  +  iLimit = n>10000 ? 100 : 20000;
          223  +  pStmt = db_prepare("PRAGMA btree_sample(\"%w\",0.0,%lld)",
          224  +                     zIdx, n*2);
          225  +  for(i=0; i<N_SPAN; i++){
          226  +    k = 0;
          227  +    while( k<iLimit && (rc = sqlite3_step(pStmt))==SQLITE_ROW ){
          228  +      int iFirst;
          229  +      for(iFirst=0; iFirst<nCol; iFirst++){
          230  +        if( apValue[iFirst]==0 ) break;
          231  +        if( columnNotEqual(pStmt, iFirst, apValue[iFirst]) ) break;
          232  +      }
          233  +      for(j=iFirst; j<nCol; j++){
          234  +        aCnt[j]++;
          235  +        sqlite3_value_free(apValue[j]);
          236  +        apValue[j] = sqlite3_value_dup(sqlite3_column_value(pStmt,j));
          237  +      }
          238  +      nRow++;
          239  +      k++;
          240  +    }
          241  +    sqlite3_finalize(pStmt);
          242  +    if( rc!=SQLITE_ROW || i==N_SPAN-1 ) break;
          243  +    pStmt = db_prepare("PRAGMA btree_sample(\"%w\",%g,%lld)",
          244  +                       zIdx, ((double)i)/(double)N_SPAN, n*2);
          245  +  }  
          246  +  for(j=0; j<nCol; j++) sqlite3_value_free(apValue[j]);
          247  +  sqlite3_snprintf(szRes, zRes, "%lld", n);
          248  +  k = (int)strlen(zRes);
          249  +  for(j=0; j<nCol; j++){
          250  +    sqlite3_snprintf(szRes-k, zRes+k, " %d", nRow/aCnt[j]);
          251  +    k += (int)strlen(zRes+k);
          252  +  }
          253  +  pStmt = db_prepare(
          254  +     "INSERT INTO temp.est_stat1(tbl,idx,stat)"
          255  +     "VALUES(\"%w\",\"%w\",'%s')", zTab, zIdx, zRes
          256  +  );
          257  +  sqlite3_step(pStmt);
          258  +  sqlite3_finalize(pStmt);
          259  +}
          260  +
          261  +/*
          262  +** Print the sqlite3_value X as an SQL literal.
          263  +*/
          264  +static void printQuoted(FILE *out, sqlite3_value *X){
          265  +  switch( sqlite3_value_type(X) ){
          266  +    case SQLITE_FLOAT: {
          267  +      double r1;
          268  +      char zBuf[50];
          269  +      r1 = sqlite3_value_double(X);
          270  +      sqlite3_snprintf(sizeof(zBuf), zBuf, "%!.15g", r1);
          271  +      fprintf(out, "%s", zBuf);
          272  +      break;
          273  +    }
          274  +    case SQLITE_INTEGER: {
          275  +      fprintf(out, "%lld", sqlite3_value_int64(X));
          276  +      break;
          277  +    }
          278  +    case SQLITE_BLOB: {
          279  +      const unsigned char *zBlob = sqlite3_value_blob(X);
          280  +      int nBlob = sqlite3_value_bytes(X);
          281  +      if( zBlob ){
          282  +        int i;
          283  +        fprintf(out, "x'");
          284  +        for(i=0; i<nBlob; i++){
          285  +          fprintf(out, "%02x", zBlob[i]);
          286  +        }
          287  +        fprintf(out, "'");
          288  +      }else{
          289  +        /* Could be an OOM, could be a zero-byte blob */
          290  +        fprintf(out, "X''");
          291  +      }
          292  +      break;
          293  +    }
          294  +    case SQLITE_TEXT: {
          295  +      const unsigned char *zArg = sqlite3_value_text(X);
          296  +      int i, j;
          297  +
          298  +      if( zArg==0 ){
          299  +        fprintf(out, "NULL");
          300  +      }else{
          301  +        fprintf(out, "'");
          302  +        for(i=j=0; zArg[i]; i++){
          303  +          if( zArg[i]=='\'' ){
          304  +            fprintf(out, "%.*s'", i-j+1, &zArg[j]);
          305  +            j = i+1;
          306  +          }
          307  +        }
          308  +        fprintf(out, "%s'", &zArg[j]);
          309  +      }
          310  +      break;
          311  +    }
          312  +    case SQLITE_NULL: {
          313  +      fprintf(out, "NULL");
          314  +      break;
          315  +    }
          316  +  }
          317  +}
          318  +
          319  +/*
          320  +** Output SQL that will recreate the aux.zTab table.
          321  +*/
          322  +static void dump_table(const char *zTab, const char *zAlias){
          323  +  int i;                    /* Loop counter */
          324  +  int nCol;                 /* Number of result columns */
          325  +  sqlite3_stmt *pStmt;      /* SQL statement */
          326  +  const char *zSep;         /* Separator string */
          327  +
          328  +  pStmt = db_prepare("SELECT * FROM %s", zTab);
          329  +  nCol = sqlite3_column_count(pStmt);
          330  +  while( SQLITE_ROW==sqlite3_step(pStmt) ){
          331  +    printf("INSERT INTO %s VALUES", zAlias);
          332  +    zSep = "(";
          333  +    for(i=0; i<nCol; i++){
          334  +      fprintf(stdout, "%s",zSep);
          335  +      printQuoted(stdout, sqlite3_column_value(pStmt,i));
          336  +      zSep = ",";
          337  +    }
          338  +    fprintf(stdout, ");\n");
          339  +  }
          340  +  sqlite3_finalize(pStmt);
          341  +}
          342  +
          343  +
          344  +/*
          345  +** Print sketchy documentation for this utility program
          346  +*/
          347  +static void showHelp(void){
          348  +  printf("Usage: %s [options] DBFILE\n", g.zArgv0);
          349  +  printf(
          350  +"Generate an approximate sqlite_stat1 table for the database in the DBFILE\n"
          351  +"file. Write the result to standard output.\n"
          352  +"Options:\n"
          353  +"  (none yet....)\n"
          354  +  );
          355  +}
          356  +
          357  +int main(int argc, char **argv){
          358  +  const char *zDb = 0;
          359  +  int i;
          360  +  int rc;
          361  +  char *zErrMsg = 0;
          362  +  sqlite3_stmt *pStmt;
          363  +
          364  +  g.zArgv0 = argv[0];
          365  +  sqlite3_config(SQLITE_CONFIG_SINGLETHREAD);
          366  +  for(i=1; i<argc; i++){
          367  +    const char *z = argv[i];
          368  +    if( z[0]=='-' ){
          369  +      z++;
          370  +      if( z[0]=='-' ) z++;
          371  +      if( strcmp(z,"debug")==0 ){
          372  +        if( i==argc-1 ) cmdlineError("missing argument to %s", argv[i]);
          373  +        g.fDebug = strtol(argv[++i], 0, 0);
          374  +      }else
          375  +      if( strcmp(z,"help")==0 ){
          376  +        showHelp();
          377  +        return 0;
          378  +      }else
          379  +      {
          380  +        cmdlineError("unknown option: %s", argv[i]);
          381  +      }
          382  +    }else if( zDb==0 ){
          383  +      zDb = argv[i];
          384  +    }else{
          385  +      cmdlineError("unknown argument: %s", argv[i]);
          386  +    }
          387  +  }
          388  +  if( zDb==0 ){
          389  +    cmdlineError("database filename required");
          390  +  }
          391  +  rc = sqlite3_open(zDb, &g.db);
          392  +  if( rc ){
          393  +    cmdlineError("cannot open database file \"%s\"", zDb);
          394  +  }
          395  +  rc = sqlite3_exec(g.db, "SELECT * FROM sqlite_master", 0, 0, &zErrMsg);
          396  +  if( rc || zErrMsg ){
          397  +    cmdlineError("\"%s\" does not appear to be a valid SQLite database", zDb);
          398  +  }
          399  +  rc = sqlite3_exec(g.db, "CREATE TEMP TABLE est_stat1(tbl,idx,stat);",
          400  +                    0, 0, &zErrMsg);
          401  +  if( rc || zErrMsg ){
          402  +    cmdlineError("Cannot CREATE TEMP TABLE");
          403  +  }
          404  +  pStmt = db_prepare("SELECT type, name, tbl_name FROM sqlite_master"
          405  +                     " WHERE type IN ('table','index')"
          406  +                     "   AND rootpage>0"
          407  +                     "   AND (type='index' OR name NOT LIKE 'sqlite_%%')"
          408  +                     " ORDER BY tbl_name, type DESC, name");
          409  +  while( sqlite3_step(pStmt)==SQLITE_ROW ){
          410  +    const char *zType = (const char*)sqlite3_column_text(pStmt, 0);
          411  +    const char *zName = (const char*)sqlite3_column_text(pStmt, 1);
          412  +    const char *zTblName = (const char*)sqlite3_column_text(pStmt, 2);
          413  +    if( zType[0]=='t' ){
          414  +      analyzeTable(zName);
          415  +    }else{
          416  +      analyzeIndex(zTblName, zName);
          417  +    }
          418  +  }
          419  +  sqlite3_finalize(pStmt);
          420  +  dump_table("temp.est_stat1","sqlite_stat1");
          421  +  sqlite3_close(g.db);
          422  +  return 0;
          423  +}