From f28e81d03e28d6260d3cb89d13ef31e0cd9a3888 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 12 Sep 2005 15:54:23 +1000 Subject: W#2776 Utility for calculating storage requirements for NDB Initial ndb_size.pl Connects to an existing MySQL database and produces a XHTML document containing an estimation of storage requirements. ndb/tools/ndb_size.pl: Initial checkin of ndb_size.pl ndb/tools/ndb_size.tmpl: Initial checkin of XHTML template file for ndb_size.pl --- ndb/tools/ndb_size.pl | 260 ++++++++++++++++++++++++++++++++++++++++++++++++ ndb/tools/ndb_size.tmpl | 175 ++++++++++++++++++++++++++++++++ 2 files changed, 435 insertions(+) create mode 100644 ndb/tools/ndb_size.pl create mode 100644 ndb/tools/ndb_size.tmpl (limited to 'ndb') diff --git a/ndb/tools/ndb_size.pl b/ndb/tools/ndb_size.pl new file mode 100644 index 00000000000..92ecf5ca784 --- /dev/null +++ b/ndb/tools/ndb_size.pl @@ -0,0 +1,260 @@ +#!/usr/bin/perl -w + +use strict; + +use DBI; +use POSIX; +use HTML::Template; + +# MySQL Cluster size estimator +# ---------------------------- +# +# (C)2005 MySQL AB +# +# +# The purpose of this tool is to work out storage requirements +# from an existing MySQL database. +# +# This involves connecting to a mysql server and throwing a bunch +# of queries at it. +# +# We currently estimate sizes for: 4.1, 5.0 and 5.1 to various amounts +# of accurracy. +# +# There is no warranty. +# +# BUGS +# ---- +# - enum/set is 0 byte storage! Woah - efficient! +# - some float stores come out weird (when there's a comma e.g. 'float(4,1)') +# - no disk data values +# - computes the storage requirements of views (and probably MERGE) +# - ignores character sets. + +my $template = HTML::Template->new(filename => 'ndb_size.tmpl', + die_on_bad_params => 0); + +my $dbh; + +{ + my $database= $ARGV[0]; + my $hostname= $ARGV[1]; + my $port= $ARGV[2]; + my $user= $ARGV[3]; + my $password= $ARGV[4]; + my $dsn = "DBI:mysql:database=$database;host=$hostname;port=$port"; + $dbh= DBI->connect($dsn, $user, $password); + $template->param(db => $database); + $template->param(dsn => $dsn); +} + +my @releases = ({rel=>'4.1'},{rel=>'5.0'},{rel=>'5.1'}); +$template->param(releases => \@releases); + +my $tables = $dbh->selectall_arrayref("show tables"); + +my @table_size; + +sub align { + my($to,@unaligned) = @_; + my @aligned; + foreach my $x (@unaligned) { + push @aligned, $to * POSIX::floor(($x+$to-1)/$to); + } + return @aligned; +} + +foreach(@{$tables}) +{ + my $table= @{$_}[0]; + my @columns; + my $info= $dbh->selectall_hashref("describe $table","Field"); + my @count = $dbh->selectrow_array("select count(*) from $table"); + my %columnsize; # used for index calculations + + # We now work out the DataMemory usage + + # sizes for 4.1, 5.0, 5.1 + my @totalsize= (0,0,0); + + foreach(keys %$info) + { + my @realsize = (0,0,0); + my $type; + my $size; + my $name= $_; + + if($$info{$_}{Type} =~ /^(.*?)\((\d+)\)/) + { + $type= $1; + $size= $2; + } + else + { + $type= $$info{$_}{Type}; + } + + if($type =~ /tinyint/) + {@realsize=(1,1,1)} + elsif($type =~ /smallint/) + {@realsize=(2,2,2)} + elsif($type =~ /mediumint/) + {@realsize=(3,3,3)} + elsif($type =~ /bigint/) + {@realsize=(8,8,8)} + elsif($type =~ /int/) + {@realsize=(4,4,4)} + elsif($type =~ /float/) + { + if($size<=24) + {@realsize=(4,4,4)} + else + {@realsize=(8,8,8)} + } + elsif($type =~ /double/ || $type =~ /real/) + {@realsize=(8,8,8)} + elsif($type =~ /bit/) + { + my $a=($size+7)/8; + @realsize = ($a,$a,$a); + } + elsif($type =~ /datetime/) + {@realsize=(8,8,8)} + elsif($type =~ /timestamp/) + {@realsize=(4,4,4)} + elsif($type =~ /date/ || $type =~ /time/) + {@realsize=(3,3,3)} + elsif($type =~ /year/) + {@realsize=(1,1,1)} + elsif($type =~ /varchar/ || $type =~ /varbinary/) + { + my $fixed= 1+$size; + my @dynamic=$dbh->selectrow_array("select avg(length($name)) from $table"); + $dynamic[0]=0 if !$dynamic[0]; + @realsize= ($fixed,$fixed,ceil($dynamic[0])); + } + elsif($type =~ /binary/ || $type =~ /char/) + {@realsize=($size,$size,$size)} + elsif($type =~ /text/ || $type =~ /blob/) + {@realsize=(256,256,1)} # FIXME check if 5.1 is correct + + @realsize= align(4,@realsize); + + $totalsize[$_]+=$realsize[$_] foreach 0..$#totalsize; + + my @realout; + push @realout,{val=>$_} foreach @realsize; + + push @columns, { + name=>$name, + type=>$type, + size=>$size, + key=>$$info{$_}{Key}, + datamemory=>\@realout, + }; + + $columnsize{$name}= \@realsize; # used for index calculations + } + + # And now... the IndexMemory usage. + # + # Firstly, we assemble some information about the indexes. + # We use SHOW INDEX instead of using INFORMATION_SCHEMA so + # we can still connect to pre-5.0 mysqlds. + my %indexes; + { + my $sth= $dbh->prepare("show index from $table"); + $sth->execute; + while(my $i = $sth->fetchrow_hashref) + { + $indexes{${%$i}{Key_name}}= { + type=>${%$i}{Index_type}, + unique=>!${%$i}{Non_unique}, + comment=>${%$i}{Comment}, + } if !defined($indexes{${%$i}{Key_name}}); + + $indexes{${%$i}{Key_name}}{columns}[${%$i}{Seq_in_index}-1]= + ${%$i}{Column_name}; + } + } + + if(!defined($indexes{PRIMARY})) { + $indexes{PRIMARY}= { + type=>'BTREE', + unique=>1, + comment=>'Hidden pkey created by NDB', + columns=>['HIDDEN_NDB_PKEY'], + }; + push @columns, { + name=>'HIDDEN_NDB_PKEY', + type=>'bigint', + size=>8, + key=>'PRI', + datamemory=>[{val=>8},{val=>8},{val=>8}], + }; + $columnsize{'HIDDEN_NDB_PKEY'}= [8,8,8]; + } + + my @IndexDataMemory= ({val=>0},{val=>0},{val=>0}); + my @RowIndexMemory= ({val=>0},{val=>0},{val=>0}); + + my @indexes; + foreach my $index (keys %indexes) { + my $im41= 25; + $im41+=$columnsize{$_}[0] foreach @{$indexes{$index}{columns}}; + my @im = ({val=>$im41},{val=>25},{val=>25}); + my @dm = ({val=>10},{val=>10},{val=>10}); + push @indexes, { + name=>$index, + type=>$indexes{$index}{type}, + columns=>join(',',@{$indexes{$index}{columns}}), + indexmemory=>\@im, + datamemory=>\@dm, + }; + $IndexDataMemory[$_]{val}+=$dm[$_]{val} foreach 0..2; + $RowIndexMemory[$_]{val}+=$im[$_]{val} foreach 0..2; + } + + # total size + 16 bytes overhead + my @TotalDataMemory; + $TotalDataMemory[$_]{val}=$IndexDataMemory[$_]{val}+$totalsize[$_]+16 foreach 0..2; + + my @RowDataMemory; + push @RowDataMemory,{val=>$_} foreach @totalsize; + + my @RowPerPage; + push @RowPerPage,{val=>(floor((32768-128)/$TotalDataMemory[$_]{val}))} foreach 0..$#TotalDataMemory; + + my @RowPerIndexPage; + push @RowPerIndexPage,{val=>(floor(8192/$RowIndexMemory[$_]{val}))} foreach 0..$#TotalDataMemory; + + my @DataMemory; + push @DataMemory,{val=>ceil(($count[0]/($RowPerPage[$_]{val})))*32} foreach 0..$#RowPerPage; + + my @IndexMemory; + push @IndexMemory,{val=>ceil(($count[0]/($RowPerIndexPage[$_]{val})))*8} foreach 0..$#RowPerPage; + + my $count= $count[0]; + my @counts; + $counts[$_]{val}= $count foreach 0..$#releases; + + push @table_size, { + table=>$table, + indexes=>\@indexes, + columns=>\@columns, + count=>\@counts, + RowDataMemory=>\@RowDataMemory, + releases=>\@releases, + IndexDataMemory=>\@IndexDataMemory, + TotalDataMemory=>\@TotalDataMemory, + RowPerPage=>\@RowPerPage, + DataMemory=>\@DataMemory, + RowIndexMemory=>\@RowIndexMemory, + RowPerIndexPage=>\@RowPerIndexPage, + IndexMemory=>\@IndexMemory, + + }; +} + +$template->param(tables => \@table_size); +print $template->output; diff --git a/ndb/tools/ndb_size.tmpl b/ndb/tools/ndb_size.tmpl new file mode 100644 index 00000000000..d83d5d2c6af --- /dev/null +++ b/ndb/tools/ndb_size.tmpl @@ -0,0 +1,175 @@ + + + + + + MySQL Cluster size estimate for <TMPL_VAR NAME="db" ESCAPE="HTML"> + + + +

MySQL Cluster analysis for

+

This is an automated analysis of the database for migration into MySQL Cluster. No warranty is made to the accuracy of the information.

+ +

This information should be valid for MySQL 4.1

+ + + +
+ + +

+ + + + + + + + + + + + + + + + + + + + + +
ColumnTypeSizeKey NDB Size
+ +

 

+ +

Indexes

+ +

We assume that indexes are ORDERED (not created USING HASH). If order is not required, 10 bytes of data memory can be saved per row if the index is created USING HASH

+ + + + + + + + + + + + + + + + + + + + + + + + + +
IndexTypeColumns IdxMem DatMem
+ +

DataMemory Usage

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 
Row Overhead16
Column DataMemory/Row
Index DataMemory/Row
Total DataMemory/Row
Rows per 32kb page
Current number of rows
Total DataMemory (kb)
+ +

IndexMemory Usage

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 
IndexMemory/Row
Rows per 8kb page
Current number of rows
Total IndexMemory (kb)
+ +
+
+ +

This is the output of ndb_size.pl.

+ + + -- cgit v1.2.1 From da5ffc732cae26712a504874e4c4f3f60309d4a4 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 14 Sep 2005 16:28:19 +1000 Subject: WL#2779 ndb_size.pl Bug reported by Adam Dixon - table name needs quoting. ndb/tools/ndb_size.pl: Always quote the table and database names when sending to cluster. --- ndb/tools/ndb_size.pl | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'ndb') diff --git a/ndb/tools/ndb_size.pl b/ndb/tools/ndb_size.pl index 92ecf5ca784..64a20423636 100644 --- a/ndb/tools/ndb_size.pl +++ b/ndb/tools/ndb_size.pl @@ -68,8 +68,9 @@ foreach(@{$tables}) { my $table= @{$_}[0]; my @columns; - my $info= $dbh->selectall_hashref("describe $table","Field"); - my @count = $dbh->selectrow_array("select count(*) from $table"); + my $info= $dbh->selectall_hashref("describe ".$dbh->quote($table),"Field"); + my @count = $dbh->selectrow_array("select count(*) from " + .$dbh->quote($table)); my %columnsize; # used for index calculations # We now work out the DataMemory usage @@ -129,7 +130,9 @@ foreach(@{$tables}) elsif($type =~ /varchar/ || $type =~ /varbinary/) { my $fixed= 1+$size; - my @dynamic=$dbh->selectrow_array("select avg(length($name)) from $table"); + my @dynamic=$dbh->selectrow_array("select avg(length(" + .$dbh->quote($name) + .")) from ".$dbh->quote($table)); $dynamic[0]=0 if !$dynamic[0]; @realsize= ($fixed,$fixed,ceil($dynamic[0])); } @@ -163,7 +166,7 @@ foreach(@{$tables}) # we can still connect to pre-5.0 mysqlds. my %indexes; { - my $sth= $dbh->prepare("show index from $table"); + my $sth= $dbh->prepare("show index from "$dbh->quote($table)); $sth->execute; while(my $i = $sth->fetchrow_hashref) { -- cgit v1.2.1