/ Check-in [fa5ed532]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Update the sqlite3_analyzer utility program to provide more details about the compression performance of ZIPVFS database files.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:fa5ed53296d45a5f963d20fb361555c432e0f31b
User & Date: drh 2011-10-05 18:18:13
Context
2011-10-05
19:46
Add the --pageinfo and --stats options to the sqlite3_analyzer utility. check-in: baa80c7b user: drh tags: trunk
18:18
Update the sqlite3_analyzer utility program to provide more details about the compression performance of ZIPVFS database files. check-in: fa5ed532 user: drh tags: trunk
17:36
Fix test code related to reporting the size of overflow pages in zipvfs databases. check-in: ad7c9eed user: dan tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to tool/spaceanal.tcl.

1
2
3
4
5
6
7
8
9
10
11
12
13
14


15
16
17
18















19
20
21
22
23
24
25
26
27

28
29
30
31
32
33
34
..
81
82
83
84
85
86
87


88
89
90
91
92
93
94
...
102
103
104
105
106
107
108







109
110
111
112
113
114
115
...
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
...
275
276
277
278
279
280
281

282
283
284
285
286
287
288
...
415
416
417
418
419
420
421





422

423
424
425
426
427
428
429
430
431
432
433


















434
435
436
437
438
439
440
# Run this TCL script using "testfixture" in order get a report that shows
# how much disk space is used by a particular data to actually store data
# versus how much space is unused.
#

if {[catch {
if {![info exists argv0]} {
  set argv0 [file rootname [file tail [info nameofexecutable]]]
}

# Get the name of the database to analyze
#
#set argv $argv0
if {![info exists argv] || [llength $argv]!=1} {


  puts stderr "Usage: $argv0 database-name"
  exit 1
}
set file_to_analyze [lindex $argv 0]















if {![file exists $file_to_analyze]} {
  puts stderr "No such file: $file_to_analyze"
  exit 1
}
if {![file readable $file_to_analyze]} {
  puts stderr "File is not readable: $file_to_analyze"
  exit 1
}
if {[file size $file_to_analyze]<512} {

  puts stderr "Empty or malformed database: $file_to_analyze"
  exit 1
}

# Open the database
#
sqlite3 db $file_to_analyze
................................................................................
  return [expr {$pagetype == "internal" && $is_index==0}]
}

db func isleaf isleaf
db func isinternal isinternal
db func isoverflow isoverflow



set sql { SELECT name, tbl_name FROM sqlite_master WHERE rootpage>0 }
foreach {name tblname} [concat sqlite_master sqlite_master [db eval $sql]] {

  set is_index [expr {$name!=$tblname}]
  db eval {
    SELECT 
      sum(ncell) AS nentry,
................................................................................
      sum(isoverflow(pagetype, $is_index)) AS ovfl_pages,
      sum(isinternal(pagetype, $is_index) * unused) AS int_unused,
      sum(isleaf(pagetype, $is_index) * unused) AS leaf_unused,
      sum(isoverflow(pagetype, $is_index) * unused) AS ovfl_unused,
      sum(pgsize) AS compressed_size
    FROM temp.dbstat WHERE name = $name
  } break








  # Column 'gap_cnt' is set to the number of non-contiguous entries in the
  # list of pages visited if the b-tree structure is traversed in a top-down
  # fashion (each node visited before its child-tree is passed). Any overflow
  # chains present are traversed from start to finish before any child-tree
  # is.
  #
................................................................................
  return [format %.2f [expr double($num)/double($denom)]]
}

# Generate a subreport that covers some subset of the database.
# the $where clause determines which subset to analyze.
#
proc subreport {title where} {
  global pageSize file_pgcnt

  # Query the in-memory database for the sum of various statistics 
  # for the subset of tables/indices identified by the WHERE clause in
  # $where. Note that even if the WHERE clause matches no rows, the
  # following query returns exactly one row (because it is an aggregate).
  #
  # The results of the query are stored directly by SQLite into local 
................................................................................

  # Print out the sub-report statistics.
  #
  statline {Percentage of total database} $total_pages_percent
  statline {Number of entries} $nleaf
  statline {Bytes of storage consumed} $storage
  if {$compressed_size!=$storage} {

    set pct [expr {$compressed_size*100.0/$storage}]
    set pct [format {%5.1f%%} $pct]
    statline {Bytes used after compression} $compressed_size $pct
  }
  statline {Bytes of payload} $payload $payload_percent
  statline {Average payload per entry} $avg_payload
  statline {Average unused bytes per entry} $avg_unused
................................................................................
statline {Pages on the freelist (per header)} $free_pgcnt2 $free_percent2
statline {Pages on the freelist (calculated)} $free_pgcnt $free_percent
statline {Pages of auto-vacuum overhead} $av_pgcnt $av_percent
statline {Number of tables in the database} $ntable
statline {Number of indices} $nindex
statline {Number of named indices} $nmanindex
statline {Automatically generated indices} $nautoindex





statline {Size of the file in bytes} $file_bytes

statline {Bytes of user payload stored} $user_payload $user_percent

# Output table rankings
#
puts ""
puts "*** Page counts for all tables with their indices ********************"
puts ""
mem eval {SELECT tblname, count(*) AS cnt, 
              int(sum(int_pages+leaf_pages+ovfl_pages)) AS size
          FROM space_used GROUP BY tblname ORDER BY size+0 DESC, tblname} {} {
  statline [string toupper $tblname] $size [percent $size $file_pgcnt]


















}

# Output subreports
#
if {$nindex>0} {
  subreport {All tables and indices} 1
}






<
<
<
<


<
<
>
>



|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>








|
>







 







>
>







 







>
>
>
>
>
>
>







 







|







 







>







 







>
>
>
>
>
|
>











>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1
2
3
4
5
6




7
8


9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
..
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
...
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
...
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
...
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
...
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
# Run this TCL script using "testfixture" in order get a report that shows
# how much disk space is used by a particular data to actually store data
# versus how much space is unused.
#

if {[catch {




# Get the name of the database to analyze
#


proc usage {} {
  set argv0 [file rootname [file tail [info nameofexecutable]]]
  puts stderr "Usage: $argv0 database-name"
  exit 1
}
set file_to_analyze {}
set flags(-pageinfo) 0
append argv {}
foreach arg $argv {
  if {[regexp {^-+pageinfo$} $arg]} {
    set flags(-pageinfo) 1
  } elseif {[regexp {^-} $arg]} {
    puts stderr "Unknown option: $arg"
    usage
  } elseif {$file_to_analyze!=""} {
    usage
  } else {
    set file_to_analyze $arg
  }
}
if {$file_to_analyze==""} usage
if {![file exists $file_to_analyze]} {
  puts stderr "No such file: $file_to_analyze"
  exit 1
}
if {![file readable $file_to_analyze]} {
  puts stderr "File is not readable: $file_to_analyze"
  exit 1
}
set true_file_size [file size $file_to_analyze]
if {$true_file_size<512} {
  puts stderr "Empty or malformed database: $file_to_analyze"
  exit 1
}

# Open the database
#
sqlite3 db $file_to_analyze
................................................................................
  return [expr {$pagetype == "internal" && $is_index==0}]
}

db func isleaf isleaf
db func isinternal isinternal
db func isoverflow isoverflow

set isCompressed 0
set compressOverhead 0
set sql { SELECT name, tbl_name FROM sqlite_master WHERE rootpage>0 }
foreach {name tblname} [concat sqlite_master sqlite_master [db eval $sql]] {

  set is_index [expr {$name!=$tblname}]
  db eval {
    SELECT 
      sum(ncell) AS nentry,
................................................................................
      sum(isoverflow(pagetype, $is_index)) AS ovfl_pages,
      sum(isinternal(pagetype, $is_index) * unused) AS int_unused,
      sum(isleaf(pagetype, $is_index) * unused) AS leaf_unused,
      sum(isoverflow(pagetype, $is_index) * unused) AS ovfl_unused,
      sum(pgsize) AS compressed_size
    FROM temp.dbstat WHERE name = $name
  } break

  set total_pages [expr {$leaf_pages+$int_pages+$ovfl_pages}]
  set storage [expr {$total_pages*$pageSize}]
  if {!$isCompressed && $storage>$compressed_size} {
    set isCompressed 1
    set compressOverhead 14
  }

  # Column 'gap_cnt' is set to the number of non-contiguous entries in the
  # list of pages visited if the b-tree structure is traversed in a top-down
  # fashion (each node visited before its child-tree is passed). Any overflow
  # chains present are traversed from start to finish before any child-tree
  # is.
  #
................................................................................
  return [format %.2f [expr double($num)/double($denom)]]
}

# Generate a subreport that covers some subset of the database.
# the $where clause determines which subset to analyze.
#
proc subreport {title where} {
  global pageSize file_pgcnt compressOverhead

  # Query the in-memory database for the sum of various statistics 
  # for the subset of tables/indices identified by the WHERE clause in
  # $where. Note that even if the WHERE clause matches no rows, the
  # following query returns exactly one row (because it is an aggregate).
  #
  # The results of the query are stored directly by SQLite into local 
................................................................................

  # Print out the sub-report statistics.
  #
  statline {Percentage of total database} $total_pages_percent
  statline {Number of entries} $nleaf
  statline {Bytes of storage consumed} $storage
  if {$compressed_size!=$storage} {
    set compressed_size [expr {$compressed_size+$compressOverhead*$total_pages}]
    set pct [expr {$compressed_size*100.0/$storage}]
    set pct [format {%5.1f%%} $pct]
    statline {Bytes used after compression} $compressed_size $pct
  }
  statline {Bytes of payload} $payload $payload_percent
  statline {Average payload per entry} $avg_payload
  statline {Average unused bytes per entry} $avg_unused
................................................................................
statline {Pages on the freelist (per header)} $free_pgcnt2 $free_percent2
statline {Pages on the freelist (calculated)} $free_pgcnt $free_percent
statline {Pages of auto-vacuum overhead} $av_pgcnt $av_percent
statline {Number of tables in the database} $ntable
statline {Number of indices} $nindex
statline {Number of named indices} $nmanindex
statline {Automatically generated indices} $nautoindex
if {$isCompressed} {
  statline {Size of uncompressed content in bytes} $file_bytes
  set efficiency [percent $true_file_size $file_bytes]
  statline {Size of compressed file on disk} $true_file_size $efficiency
} else {
  statline {Size of the file in bytes} $file_bytes
}
statline {Bytes of user payload stored} $user_payload $user_percent

# Output table rankings
#
puts ""
puts "*** Page counts for all tables with their indices ********************"
puts ""
mem eval {SELECT tblname, count(*) AS cnt, 
              int(sum(int_pages+leaf_pages+ovfl_pages)) AS size
          FROM space_used GROUP BY tblname ORDER BY size+0 DESC, tblname} {} {
  statline [string toupper $tblname] $size [percent $size $file_pgcnt]
}
if {$isCompressed} {
  puts ""
  puts "*** Bytes of disk space used after compression ***********************"
  puts ""
  set csum 0
  mem eval {SELECT tblname,
                  int(sum(compressed_size)) +
                         $compressOverhead*sum(int_pages+leaf_pages+ovfl_pages)
                        AS csize
          FROM space_used GROUP BY tblname ORDER BY csize+0 DESC, tblname} {} {
    incr csum $csize
    statline [string toupper $tblname] $csize [percent $csize $true_file_size]
  }
  set overhead [expr {$true_file_size - $csum}]
  if {$overhead>0} {
    statline {Header and free space} $overhead [percent $overhead $true_file_size]
  }
}

# Output subreports
#
if {$nindex>0} {
  subreport {All tables and indices} 1
}