/ Check-in [63f2ee22]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Refactor the space-analysis script for readability. (CVS 2081)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 63f2ee22e20ed7e520fd9230acc5c6db43b69d13
User & Date: danielk1977 2004-11-09 07:42:11
Context
2004-11-09
12:44
Port the "DEFAULT CURRENT_TIME" etc. functionality from an earlier fork of sqlite. (CVS 2082) check-in: 0d27c8ff user: danielk1977 tags: trunk
07:42
Refactor the space-analysis script for readability. (CVS 2081) check-in: 63f2ee22 user: danielk1977 tags: trunk
2004-11-08
16:15
Update the space-analyzer tool to support auto-vacuum databases. (CVS 2080) check-in: 1cb80866 user: danielk1977 tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to tool/spaceanal.tcl.

    25     25   }
    26     26   
    27     27   # Open the database
    28     28   #
    29     29   sqlite3 db [lindex $argv 0]
    30     30   set DB [btree_open [lindex $argv 0] 1000 0]
    31     31   
    32         -# In-memory database for collecting statistics
           32  +# In-memory database for collecting statistics. This script loops through
           33  +# the tables and indices in the database being analyzed, adding a row for each
           34  +# to an in-memory database (for which the schema is shown below). It then
           35  +# queries the in-memory db to produce the space-analysis report.
    33     36   #
    34     37   sqlite3 mem :memory:
    35     38   set tabledef\
    36     39   {CREATE TABLE space_used(
    37     40      name clob,        -- Name of a table or index in the database file
    38     41      tblname clob,     -- Name of associated table
    39     42      is_index boolean, -- TRUE if it is an index, false for a table
................................................................................
    48     51      ovfl_pages int,   -- Number of overflow pages used
    49     52      int_unused int,   -- Number of unused bytes on interior pages
    50     53      leaf_unused int,  -- Number of unused bytes on primary pages
    51     54      ovfl_unused int   -- Number of unused bytes on overflow pages
    52     55   );}
    53     56   mem eval $tabledef
    54     57   
    55         -# This query will be used to find the root page number for every table
    56         -# in the database.
           58  +# Quote a string for use in an SQL query. Examples:
    57     59   #
    58         -set sql {
    59         -  SELECT name, rootpage 
    60         -    FROM sqlite_master WHERE type='table'
    61         -  UNION ALL
    62         -  SELECT 'sqlite_master', 1
    63         -  ORDER BY 1
    64         -}
    65         -
    66         -# Quote a string for SQL
           60  +# [quote {hello world}]   == {'hello world'}
           61  +# [quote {hello world's}] == {'hello world''s'}
    67     62   #
    68         -proc quote txt {
           63  +proc quote {txt} {
    69     64     regsub -all ' $txt '' q
    70     65     return '$q'
    71     66   }
    72     67   
    73         -# Analyze every table in the database, one at a time.
           68  +# This proc is a wrapper around the btree_cursor_info command. The
           69  +# second argument is an open btree cursor returned by [btree_cursor].
           70  +# The first argument is the name of an array variable that exists in
           71  +# the scope of the caller. If the third argument is non-zero, then
           72  +# info is returned for the page that lies $up entries upwards in the
           73  +# tree-structure. (i.e. $up==1 returns the parent page, $up==2 the 
           74  +# grandparent etc.)
           75  +#
           76  +# The following entries in that array are filled in with information retrieved
           77  +# using [btree_cursor_info]:
           78  +#
           79  +#   $arrayvar(page_no)             =  The page number
           80  +#   $arrayvar(entry_no)            =  The entry number
           81  +#   $arrayvar(page_entries)        =  Total number of entries on this page
           82  +#   $arrayvar(cell_size)           =  Cell size (local payload + header)
           83  +#   $arrayvar(page_freebytes)      =  Number of free bytes on this page
           84  +#   $arrayvar(page_freeblocks)     =  Number of free blocks on the page
           85  +#   $arrayvar(payload_bytes)       =  Total payload size (local + overflow)
           86  +#   $arrayvar(header_bytes)        =  Header size in bytes
           87  +#   $arrayvar(local_payload_bytes) =  Local payload size
           88  +#   $arrayvar(parent)              =  Parent page number
           89  +# 
           90  +proc cursor_info {arrayvar csr {up 0}} {
           91  +  upvar $arrayvar a
           92  +  foreach [list a(page_no) \
           93  +                a(entry_no) \
           94  +                a(page_entries) \
           95  +                a(cell_size) \
           96  +                a(page_freebytes) \
           97  +                a(page_freeblocks) \
           98  +                a(payload_bytes) \
           99  +                a(header_bytes) \
          100  +                a(local_payload_bytes) \
          101  +                a(parent) ] [btree_cursor_info $csr $up] {}
          102  +}
          103  +
          104  +# Determine the page-size of the database. This global variable is used
          105  +# throughout the script.
    74    106   #
    75    107   set pageSize [db eval {PRAGMA page_size}]
          108  +
          109  +# Analyze every table in the database, one at a time.
          110  +#
          111  +# The following query returns the name and root-page of each table in the
          112  +# database, including the sqlite_master table.
          113  +#
          114  +set sql {
          115  +  SELECT name, rootpage FROM sqlite_master WHERE type='table'
          116  +  UNION ALL
          117  +  SELECT 'sqlite_master', 1
          118  +  ORDER BY 1
          119  +}
    76    120   foreach {name rootpage} [db eval $sql] {
    77    121     puts stderr "Analyzing table $name..."
    78         -  set cursor [btree_cursor $DB $rootpage 0]
    79         -  set go [btree_first $cursor]
    80         -  catch {unset seen}
          122  +
          123  +  # Code below traverses the table being analyzed (table name $name), using the
          124  +  # btree cursor $cursor. Statistics related to table $name are accumulated in
          125  +  # the following variables:
          126  +  #
    81    127     set total_payload 0        ;# Payload space used by all entries
    82    128     set total_ovfl 0           ;# Payload space on overflow pages
    83    129     set unused_int 0           ;# Unused space on interior nodes
    84    130     set unused_leaf 0          ;# Unused space on leaf nodes
    85    131     set unused_ovfl 0          ;# Unused space on overflow pages
    86    132     set cnt_ovfl 0             ;# Number of entries that use overflows
    87    133     set cnt_leaf_entry 0       ;# Number of leaf entries
    88    134     set cnt_int_entry 0        ;# Number of interor entries
    89    135     set mx_payload 0           ;# Maximum payload size
    90    136     set ovfl_pages 0           ;# Number of overflow pages used
    91    137     set leaf_pages 0           ;# Number of leaf pages
    92    138     set int_pages 0            ;# Number of interior pages
    93         -  while {$go==0} {
          139  +
          140  +  # As the btree is traversed, the array variable $seen($pgno) is set to 1
          141  +  # the first time page $pgno is encountered.
          142  +  #
          143  +  catch {unset seen}
          144  +
          145  +  # The following loop runs once for each entry in table $name. The table
          146  +  # is traversed using the btree cursor stored in variable $csr
          147  +  #
          148  +  set csr [btree_cursor $DB $rootpage 0]
          149  +  for {btree_first $csr} {![btree_eof $csr]} {btree_next $csr} {
    94    150       incr cnt_leaf_entry
    95         -    set stat [btree_cursor_info $cursor]
    96         -    set payload [lindex $stat 6]
    97         -    if {$payload>$mx_payload} {set mx_payload $payload}
    98         -    incr total_payload $payload
    99         -    set local [lindex $stat 8]    
   100         -    set ovfl [expr {$payload-$local}]
          151  +
          152  +    # Retrieve information about the entry the btree-cursor points to into
          153  +    # the array variable $ci (cursor info).
          154  +    #
          155  +    cursor_info ci $csr
          156  +
          157  +    # Check if the payload of this entry is greater than the current 
          158  +    # $mx_payload statistic for the table. Also increase the $total_payload
          159  +    # statistic.
          160  +    #
          161  +    if {$ci(payload_bytes)>$mx_payload} {set mx_payload $ci(payload_bytes)}
          162  +    incr total_payload $ci(payload_bytes)
          163  +
          164  +    # If this entry uses overflow pages, then update the $cnt_ovfl, 
          165  +    # $total_ovfl, $ovfl_pages and $unused_ovfl statistics.
          166  +    #
          167  +    set ovfl [expr {$ci(payload_bytes)-$ci(local_payload_bytes)}]
   101    168       if {$ovfl} {
   102    169         incr cnt_ovfl
   103    170         incr total_ovfl $ovfl
   104    171         set n [expr {int(ceil($ovfl/($pageSize-4.0)))}]
   105    172         incr ovfl_pages $n
   106    173         incr unused_ovfl [expr {$n*($pageSize-4) - $ovfl}]
   107    174       }
   108         -    set pgno [lindex $stat 0]
   109         -    if {![info exists seen($pgno)]} {
   110         -      set seen($pgno) 1
          175  +
          176  +    # If this is the first table entry analyzed for the page, then update
          177  +    # the page-related statistics $leaf_pages and $unused_leaf. Also, if
          178  +    # this page has a parent page that has not been analyzed, retrieve
          179  +    # info for the parent and update statistics for it too.
          180  +    #
          181  +    if {![info exists seen($ci(page_no))]} {
          182  +      set seen($ci(page_no)) 1
   111    183         incr leaf_pages
   112         -      incr unused_leaf [lindex $stat 4]
   113         -      set parent [lindex $stat 9]
   114         -      set up 0
   115         -      while {$parent!=0 && ![info exists seen($parent)]} {
   116         -        incr up
   117         -        set stat [btree_cursor_info $cursor $up]
   118         -        set seen($parent) 1
          184  +      incr unused_leaf $ci(page_freebytes)
          185  +
          186  +      # Now check if the page has a parent that has not been analyzed. If
          187  +      # so, update the $int_pages, $cnt_int_entry and $unused_int statistics
          188  +      # accordingly. Then check if the parent page has a parent that has
          189  +      # not yet been analyzed etc.
          190  +      #
          191  +      # set parent $ci(parent_page_no)
          192  +      for {set up 1} \
          193  +          {$ci(parent)!=0 && ![info exists seen($ci(parent))]} {incr up} \
          194  +      {
          195  +        # Mark the parent as seen.
          196  +        #
          197  +        set seen($ci(parent)) 1
          198  +
          199  +        # Retrieve info for the parent and update statistics.
          200  +        cursor_info ci $csr $up
   119    201           incr int_pages
   120         -        incr cnt_int_entry [lindex $stat 2]
   121         -        incr unused_int [lindex $stat 4]
   122         -        set parent [lindex $stat 9]
          202  +        incr cnt_int_entry $ci(page_entries)
          203  +        incr unused_int $ci(page_freebytes)
   123    204         }
   124    205       }
   125         -    set go [btree_next $cursor]
   126    206     }
   127         -  btree_close_cursor $cursor
          207  +  btree_close_cursor $csr
          208  +
          209  +  # Handle the special case where a table contains no data. In this case
          210  +  # all statistics are zero, except for the number of leaf pages (1) and
          211  +  # the unused bytes on leaf pages ($pageSize - 8).
          212  +  #
          213  +  # An exception to the above is the sqlite_master table. If it is empty
          214  +  # then all statistics are zero except for the number of leaf pages (1),
          215  +  # and the number of unused bytes on leaf pages ($pageSize - 112).
          216  +  #
   128    217     if {[llength [array names seen]]==0} {
   129    218       set leaf_pages 1
   130         -    set unused_leaf [expr {$pageSize-8}]
   131         -  } elseif {$rootpage==1 && ![info exists seen(1)]} {
   132         -    incr int_pages
   133         -    incr unused_int [expr {$pageSize-112}]
          219  +    if {$rootpage==1} {
          220  +      set unused_leaf [expr {$pageSize-112}]
          221  +    } else {
          222  +      set unused_leaf [expr {$pageSize-8}]
          223  +    }
   134    224     }
          225  +
          226  +  # Insert the statistics for the table analyzed into the in-memory database.
          227  +  #
   135    228     set sql "INSERT INTO space_used VALUES("
   136    229     append sql [quote $name]
   137    230     append sql ",[quote $name]"
   138    231     append sql ",0"
   139    232     append sql ",[expr {$cnt_leaf_entry+$cnt_int_entry}]"
   140    233     append sql ",$cnt_leaf_entry"
   141    234     append sql ",$total_payload"
................................................................................
   148    241     append sql ",$unused_int"
   149    242     append sql ",$unused_leaf"
   150    243     append sql ",$unused_ovfl"
   151    244     append sql );
   152    245     mem eval $sql
   153    246   }
   154    247   
   155         -# This query will be used to find the root page number for every index
   156         -# in the database.
          248  +# Analyze every index in the database, one at a time.
          249  +#
          250  +# The query below returns the name, associated table and root-page number 
          251  +# for every index in the database.
   157    252   #
   158    253   set sql {
   159         -  SELECT name, tbl_name, rootpage 
   160         -    FROM sqlite_master WHERE type='index'
          254  +  SELECT name, tbl_name, rootpage FROM sqlite_master WHERE type='index'
   161    255     ORDER BY 2, 1
   162    256   }
   163         -
   164         -# Analyze every index in the database, one at a time.
   165         -#
   166         -set pageSize [db eval {PRAGMA page_size}]
   167    257   foreach {name tbl_name rootpage} [db eval $sql] {
   168    258     puts stderr "Analyzing index $name of table $tbl_name..."
   169         -  set cursor [btree_cursor $DB $rootpage 0]
   170         -  set go [btree_first $cursor]
   171         -  catch {unset seen}
          259  +
          260  +  # Code below traverses the index being analyzed (index name $name), using the
          261  +  # btree cursor $cursor. Statistics related to index $name are accumulated in
          262  +  # the following variables:
          263  +  #
   172    264     set total_payload 0        ;# Payload space used by all entries
   173    265     set total_ovfl 0           ;# Payload space on overflow pages
   174    266     set unused_leaf 0          ;# Unused space on leaf nodes
   175    267     set unused_ovfl 0          ;# Unused space on overflow pages
   176    268     set cnt_ovfl 0             ;# Number of entries that use overflows
   177    269     set cnt_leaf_entry 0       ;# Number of leaf entries
   178    270     set mx_payload 0           ;# Maximum payload size
   179    271     set ovfl_pages 0           ;# Number of overflow pages used
   180    272     set leaf_pages 0           ;# Number of leaf pages
   181         -  while {$go==0} {
          273  +
          274  +  # As the btree is traversed, the array variable $seen($pgno) is set to 1
          275  +  # the first time page $pgno is encountered.
          276  +  #
          277  +  catch {unset seen}
          278  +
          279  +  # The following loop runs once for each entry in index $name. The index
          280  +  # is traversed using the btree cursor stored in variable $csr
          281  +  #
          282  +  set csr [btree_cursor $DB $rootpage 0]
          283  +  for {btree_first $csr} {![btree_eof $csr]} {btree_next $csr} {
   182    284       incr cnt_leaf_entry
   183         -    set stat [btree_cursor_info $cursor]
   184         -    set payload [btree_keysize $cursor]
          285  +
          286  +    # Retrieve information about the entry the btree-cursor points to into
          287  +    # the array variable $ci (cursor info).
          288  +    #
          289  +    cursor_info ci $csr
          290  +
          291  +    # Check if the payload of this entry is greater than the current 
          292  +    # $mx_payload statistic for the table. Also increase the $total_payload
          293  +    # statistic.
          294  +    #
          295  +    set payload [btree_keysize $csr]
   185    296       if {$payload>$mx_payload} {set mx_payload $payload}
   186    297       incr total_payload $payload
   187         -    set local [lindex $stat 8]    
   188         -    set ovfl [expr {$payload-$local}]
          298  +
          299  +    # If this entry uses overflow pages, then update the $cnt_ovfl, 
          300  +    # $total_ovfl, $ovfl_pages and $unused_ovfl statistics.
          301  +    #
          302  +    set ovfl [expr {$payload-$ci(local_payload_bytes)}]
   189    303       if {$ovfl} {
   190    304         incr cnt_ovfl
   191    305         incr total_ovfl $ovfl
   192    306         set n [expr {int(ceil($ovfl/($pageSize-4.0)))}]
   193    307         incr ovfl_pages $n
   194    308         incr unused_ovfl [expr {$n*($pageSize-4) - $ovfl}]
   195    309       }
   196         -    set pgno [lindex $stat 0]
   197         -    if {![info exists seen($pgno)]} {
   198         -      set seen($pgno) 1
          310  +
          311  +    # If this is the first table entry analyzed for the page, then update
          312  +    # the page-related statistics $leaf_pages and $unused_leaf.
          313  +    #
          314  +    if {![info exists seen($ci(page_no))]} {
          315  +      set seen($ci(page_no)) 1
   199    316         incr leaf_pages
   200         -      incr unused_leaf [lindex $stat 4]
          317  +      incr unused_leaf $ci(page_freebytes)
   201    318       }
   202         -    set go [btree_next $cursor]
   203    319     }
   204         -  btree_close_cursor $cursor
          320  +  btree_close_cursor $csr
          321  +
          322  +  # Handle the special case where a index contains no data. In this case
          323  +  # all statistics are zero, except for the number of leaf pages (1) and
          324  +  # the unused bytes on leaf pages ($pageSize - 8).
          325  +  #
   205    326     if {[llength [array names seen]]==0} {
   206    327       set leaf_pages 1
   207    328       set unused_leaf [expr {$pageSize-8}]
   208    329     }
          330  +
          331  +  # Insert the statistics for the index analyzed into the in-memory database.
          332  +  #
   209    333     set sql "INSERT INTO space_used VALUES("
   210    334     append sql [quote $name]
   211    335     append sql ",[quote $tbl_name]"
   212    336     append sql ",1"
   213    337     append sql ",$cnt_leaf_entry"
   214    338     append sql ",$cnt_leaf_entry"
   215    339     append sql ",$total_payload"
................................................................................
   255    379     }
   256    380   }
   257    381   
   258    382   # Generate a subreport that covers some subset of the database.
   259    383   # the $where clause determines which subset to analyze.
   260    384   #
   261    385   proc subreport {title where} {
   262         -  global pageSize
   263         -  set hit 0
          386  +  global pageSize file_pgcnt
          387  +
          388  +  # Query the in-memory database for the sum of various statistics 
          389  +  # for the subset of tables/indices identified by the WHERE clause in
          390  +  # $where. Note that even if the WHERE clause matches no rows, the
          391  +  # following query returns exactly one row (because it is an aggregate).
          392  +  #
          393  +  # The results of the query are stored directly by SQLite into local 
          394  +  # variables (i.e. $nentry, $nleaf etc.).
          395  +  #
   264    396     mem eval "
   265    397       SELECT
   266    398         sum(nentry) AS nentry,
   267    399         sum(leaf_entries) AS nleaf,
   268    400         sum(payload) AS payload,
   269    401         sum(ovfl_payload) AS ovfl_payload,
   270    402         max(mx_payload) AS mx_payload,
................................................................................
   271    403         sum(ovfl_cnt) as ovfl_cnt,
   272    404         sum(leaf_pages) AS leaf_pages,
   273    405         sum(int_pages) AS int_pages,
   274    406         sum(ovfl_pages) AS ovfl_pages,
   275    407         sum(leaf_unused) AS leaf_unused,
   276    408         sum(int_unused) AS int_unused,
   277    409         sum(ovfl_unused) AS ovfl_unused
   278         -    FROM space_used WHERE $where" {} {set hit 1}
   279         -  if {!$hit} {return 0}
          410  +    FROM space_used WHERE $where" {} {}
          411  +
          412  +  # Output the sub-report title, nicely decorated with * characters.
          413  +  #
   280    414     puts ""
   281    415     set len [string length $title]
   282         -  incr len 5
   283         -  set stars "***********************************"
   284         -  append stars $stars
   285         -  set stars [string range $stars $len end]
          416  +  set stars [string repeat * [expr 65-$len]]
   286    417     puts "*** $title $stars"
   287    418     puts ""
          419  +
          420  +  # Calculate statistics and store the results in TCL variables, as follows:
          421  +  #
          422  +  # total_pages: Database pages consumed.
          423  +  # total_pages_percent: Pages consumed as a percentage of the file.
          424  +  # storage: Bytes consumed.
          425  +  # payload_percent: Payload bytes used as a percentage of $storage.
          426  +  # total_unused: Unused bytes on pages.
          427  +  # avg_payload: Average payload per btree entry.
          428  +  # avg_fanout: Average fanout for internal pages.
          429  +  # avg_unused: Average unused bytes per btree entry.
          430  +  # ovfl_cnt_percent: Percentage of btree entries that use overflow pages.
          431  +  #
   288    432     set total_pages [expr {$leaf_pages+$int_pages+$ovfl_pages}]
   289         -  statline "Percentage of total database" [percent $total_pages $::file_pgcnt]
   290         -  statline "Number of entries" $nleaf
   291         -  set total_unused [expr {$ovfl_unused+$int_unused+$leaf_unused}]
          433  +  set total_pages_percent [percent $total_pages $file_pgcnt]
   292    434     set storage [expr {$total_pages*$pageSize}]
   293         -  statline "Bytes of storage consumed" $storage
   294         -  statline "Bytes of payload" $payload \
   295         -       [percent $payload $storage {of storage consumed}]
   296         -  statline "Average payload per entry" [expr {$nleaf>0?$payload/$nleaf:0}]
   297         -  set avgunused [expr {$nleaf>0?$total_unused/$nleaf:0}]
   298         -  statline "Average unused bytes per entry" $avgunused
   299         -  set nint [expr {$nentry-$nleaf}]
          435  +  set payload_percent [percent $payload $storage {of storage consumed}]
          436  +  set total_unused [expr {$ovfl_unused+$int_unused+$leaf_unused}]
          437  +  set avg_payload [expr {$nleaf>0?$payload/$nleaf:0}]
          438  +  set avg_unused [expr {$nleaf>0?$total_unused/$nleaf:0}]
   300    439     if {$int_pages>0} {
   301         -    statline "Average fanout" [format %.2f [expr {($nint+0.0)/$int_pages}]]
          440  +    # TODO: Is this formula correct?
          441  +    set avg_fanout [format %.2f [expr double($nentry-$nleaf)/$int_pages]]
          442  +  }
          443  +  set ovfl_cnt_percent [percent $ovfl_cnt $nleaf {of all entries}]
          444  +
          445  +  # Print out the sub-report statistics.
          446  +  #
          447  +  statline {Percentage of total database} $total_pages_percent
          448  +  statline {Number of entries} $nleaf
          449  +  statline {Bytes of storage consumed} $storage
          450  +  statline {Bytes of payload} $payload $payload_percent
          451  +  statline {Average payload per entry} $avg_payload
          452  +  statline {Average unused bytes per entry} $avg_unused
          453  +  if {[info exists avg_fanout]} {
          454  +    statline {Average fanout} $avg_fanout
   302    455     }
   303         -  statline "Maximum payload per entry" $mx_payload
   304         -  statline "Entries that use overflow" $ovfl_cnt \
   305         -        [percent $ovfl_cnt $nleaf {of all entries}]
          456  +  statline {Maximum payload per entry} $mx_payload
          457  +  statline {Entries that use overflow} $ovfl_cnt $ovfl_cnt_percent
   306    458     if {$int_pages>0} {
   307         -    statline "Index pages used" $int_pages
          459  +    statline {Index pages used} $int_pages
   308    460     }
   309         -  statline "Primary pages used" $leaf_pages
   310         -  statline "Overflow pages used" $ovfl_pages
   311         -  statline "Total pages used" $total_pages
          461  +  statline {Primary pages used} $leaf_pages
          462  +  statline {Overflow pages used} $ovfl_pages
          463  +  statline {Total pages used} $total_pages
   312    464     if {$int_unused>0} {
   313         -    statline "Unused bytes on index pages" $int_unused \
          465  +    set int_unused_percent \
   314    466            [percent $int_unused [expr {$int_pages*$pageSize}] {of index space}]
          467  +    statline "Unused bytes on index pages" $int_unused $int_unused_percent
   315    468     }
   316    469     statline "Unused bytes on primary pages" $leaf_unused \
   317    470        [percent $leaf_unused [expr {$leaf_pages*$pageSize}] {of primary space}]
   318    471     statline "Unused bytes on overflow pages" $ovfl_unused \
   319    472        [percent $ovfl_unused [expr {$ovfl_pages*$pageSize}] {of overflow space}]
   320    473     statline "Unused bytes on all pages" $total_unused \
   321    474                  [percent $total_unused $storage {of all space}]
................................................................................
   323    476   }
   324    477   
   325    478   # Calculate the overhead in pages caused by auto-vacuum. 
   326    479   #
   327    480   # This procedure calculates and returns the number of pages used by the 
   328    481   # auto-vacuum 'pointer-map'. If the database does not support auto-vacuum,
   329    482   # then 0 is returned. The two arguments are the size of the database file in
   330         -# bytes and the page size used by the database (also in bytes).
          483  +# pages and the page size used by the database (in bytes).
   331    484   proc autovacuum_overhead {filePages pageSize} {
   332    485   
   333    486     # Read the value of meta 4. If non-zero, then the database supports
   334    487     # auto-vacuum. It would be possible to use "PRAGMA auto_vacuum" instead,
   335    488     # but that would not work if the SQLITE_OMIT_PRAGMA macro was defined
   336    489     # when the library was built.
   337    490     set meta4 [lindex [btree_get_meta $::DB] 4]
................................................................................
   348    501     # is the second page of the file overall.
   349    502     set ptrsPerPage [expr double($pageSize/5)]
   350    503   
   351    504     # Return the number of pointer map pages in the database.
   352    505     return [expr int(ceil( ($filePages-1.0)/($ptrsPerPage+1.0) ))]
   353    506   }
   354    507   
   355         -# Output summary statistics:
   356         -#
   357         -puts "/** Disk-Space Utilization Report For $file_to_analyze"
   358         -puts "*** As of [clock format [clock seconds] -format {%Y-%b-%d %H:%M:%S}]"
   359         -puts ""
   360    508   
   361         -# Variables:
          509  +# Calculate the summary statistics for the database and store the results
          510  +# in TCL variables. They are output below. Variables are as follows:
   362    511   #
   363    512   # pageSize:      Size of each page in bytes.
   364    513   # file_bytes:    File size in bytes.
   365    514   # file_pgcnt:    Number of pages in the file.
   366    515   # file_pgcnt2:   Number of pages in the file (calculated).
   367    516   # av_pgcnt:      Pages consumed by the auto-vacuum pointer-map.
   368    517   # av_percent:    Percentage of the file consumed by auto-vacuum pointer-map.
................................................................................
   372    521   # free_pgcnt2:   Free pages in the file according to the file header.
   373    522   # free_percent:  Percentage of file consumed by free pages (calculated).
   374    523   # free_percent2: Percentage of file consumed by free pages (header).
   375    524   # ntable:        Number of tables in the db.
   376    525   # nindex:        Number of indices in the db.
   377    526   # nautoindex:    Number of indices created automatically.
   378    527   # nmanindex:     Number of indices created manually.
   379         -# user_payload:
          528  +# user_payload:  Number of bytes of payload in table btrees 
          529  +#                (not including sqlite_master)
          530  +# user_percent:  $user_payload as a percentage of total file size.
   380    531   
   381    532   set file_bytes  [file size $file_to_analyze]
   382    533   set file_pgcnt  [expr {$file_bytes/$pageSize}]
   383    534   
   384    535   set av_pgcnt    [autovacuum_overhead $file_pgcnt $pageSize]
   385    536   set av_percent  [percent $av_pgcnt $file_pgcnt]
   386    537   
   387         -set q {SELECT sum(leaf_pages+int_pages+ovfl_pages) FROM space_used}
   388         -set inuse_pgcnt   [expr [mem eval $q]]
          538  +set sql {SELECT sum(leaf_pages+int_pages+ovfl_pages) FROM space_used}
          539  +set inuse_pgcnt   [expr [mem eval $sql]]
   389    540   set inuse_percent [percent $inuse_pgcnt $file_pgcnt]
   390    541   
   391    542   set free_pgcnt    [expr $file_pgcnt-$inuse_pgcnt-$av_pgcnt]
   392    543   set free_percent  [percent $free_pgcnt $file_pgcnt]
   393    544   set free_pgcnt2   [lindex [btree_get_meta $DB] 0]
   394    545   set free_percent2 [percent $free_pgcnt2 $file_pgcnt]
   395    546   
   396    547   set file_pgcnt2 [expr {$inuse_pgcnt+$free_pgcnt2+$av_pgcnt}]
   397    548   
   398    549   set ntable [db eval {SELECT count(*)+1 FROM sqlite_master WHERE type='table'}]
   399    550   set nindex [db eval {SELECT count(*) FROM sqlite_master WHERE type='index'}]
   400         -set q {SELECT count(*) FROM sqlite_master WHERE name LIKE 'sqlite_autoindex%'}
   401         -set nautoindex [db eval $q]
          551  +set sql {SELECT count(*) FROM sqlite_master WHERE name LIKE 'sqlite_autoindex%'}
          552  +set nautoindex [db eval $sql]
   402    553   set nmanindex [expr {$nindex-$nautoindex}]
   403    554   
   404    555   # set total_payload [mem eval "SELECT sum(payload) FROM space_used"]
   405    556   set user_payload [mem one {SELECT sum(payload) FROM space_used
   406    557        WHERE NOT is_index AND name NOT LIKE 'sqlite_master'}]
   407         -set user_payload_percent [percent $user_payload $file_bytes]
          558  +set user_percent [percent $user_payload $file_bytes]
   408    559   
          560  +# Output the summary statistics calculated above.
          561  +#
          562  +puts "/** Disk-Space Utilization Report For $file_to_analyze"
          563  +puts "*** As of [clock format [clock seconds] -format {%Y-%b-%d %H:%M:%S}]"
          564  +puts ""
   409    565   statline {Page size in bytes} $pageSize
   410    566   statline {Pages in the whole file (measured)} $file_pgcnt
   411    567   statline {Pages in the whole file (calculated)} $file_pgcnt2
   412    568   statline {Pages that store data} $inuse_pgcnt $inuse_percent
   413    569   statline {Pages on the freelist (per header)} $free_pgcnt2 $free_percent2
   414    570   statline {Pages on the freelist (calculated)} $free_pgcnt $free_percent
   415    571   statline {Pages of auto-vacuum overhead} $av_pgcnt $av_percent
   416    572   statline {Number of tables in the database} $ntable
   417    573   statline {Number of indices} $nindex
   418    574   statline {Number of named indices} $nmanindex
   419    575   statline {Automatically generated indices} $nautoindex
   420    576   statline {Size of the file in bytes} $file_bytes
   421         -statline {Bytes of user payload stored} $user_payload $user_payload_percent
          577  +statline {Bytes of user payload stored} $user_payload $user_percent
   422    578   
   423    579   # Output table rankings
   424    580   #
   425    581   puts ""
   426    582   puts "*** Page counts for all tables with their indices ********************"
   427    583   puts ""
   428    584   mem eval {SELECT tblname, count(*) AS cnt, 
................................................................................
   597    753   Unused bytes on all pages
   598    754   
   599    755       The total number of bytes of unused space on all primary and overflow 
   600    756       pages.  The percentage at the right is the number of unused bytes 
   601    757       divided by the total number of bytes.
   602    758   }
   603    759   
   604         -# Output the database
          760  +# Output a dump of the in-memory database. This can be used for more
          761  +# complex offline analysis.
   605    762   #
   606    763   puts "**********************************************************************"
   607    764   puts "The entire text of this report can be sourced into any SQL database"
   608    765   puts "engine for further analysis.  All of the text above is an SQL comment."
   609    766   puts "The data used to generate this report follows:"
   610    767   puts "*/"
   611    768   puts "BEGIN;"