From 7c48e67cf07ee41bfde7139a62bb232bd23a4a48 Mon Sep 17 00:00:00 2001 From: Lorry Tar Creator Date: Wed, 6 Jun 2012 16:41:29 +0000 Subject: Imported from /srv/lorry/lorry-area/perl-dbi-tarball/DBI-1.622.tar.gz. --- lib/DBI/ProfileData.pm | 737 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 737 insertions(+) create mode 100644 lib/DBI/ProfileData.pm (limited to 'lib/DBI/ProfileData.pm') diff --git a/lib/DBI/ProfileData.pm b/lib/DBI/ProfileData.pm new file mode 100644 index 0000000..b2db087 --- /dev/null +++ b/lib/DBI/ProfileData.pm @@ -0,0 +1,737 @@ +package DBI::ProfileData; +use strict; + +=head1 NAME + +DBI::ProfileData - manipulate DBI::ProfileDumper data dumps + +=head1 SYNOPSIS + +The easiest way to use this module is through the dbiprof frontend +(see L for details): + + dbiprof --number 15 --sort count + +This module can also be used to roll your own profile analysis: + + # load data from dbi.prof + $prof = DBI::ProfileData->new(File => "dbi.prof"); + + # get a count of the records (unique paths) in the data set + $count = $prof->count(); + + # sort by longest overall time + $prof->sort(field => "longest"); + + # sort by longest overall time, least to greatest + $prof->sort(field => "longest", reverse => 1); + + # exclude records with key2 eq 'disconnect' + $prof->exclude(key2 => 'disconnect'); + + # exclude records with key1 matching /^UPDATE/i + $prof->exclude(key1 => qr/^UPDATE/i); + + # remove all records except those where key1 matches /^SELECT/i + $prof->match(key1 => qr/^SELECT/i); + + # produce a formatted report with the given number of items + $report = $prof->report(number => 10); + + # clone the profile data set + $clone = $prof->clone(); + + # get access to hash of header values + $header = $prof->header(); + + # get access to sorted array of nodes + $nodes = $prof->nodes(); + + # format a single node in the same style as report() + $text = $prof->format($nodes->[0]); + + # get access to Data hash in DBI::Profile format + $Data = $prof->Data(); + +=head1 DESCRIPTION + +This module offers the ability to read, manipulate and format +DBI::ProfileDumper profile data. + +Conceptually, a profile consists of a series of records, or nodes, +each of each has a set of statistics and set of keys. Each record +must have a unique set of keys, but there is no requirement that every +record have the same number of keys. + +=head1 METHODS + +The following methods are supported by DBI::ProfileData objects. + +=cut + + +our $VERSION = sprintf("2.%06d", q$Revision: 10007 $ =~ /(\d+)/o); + +use Carp qw(croak); +use Symbol; +use Fcntl qw(:flock); + +use DBI::Profile qw(dbi_profile_merge); + +# some constants for use with node data arrays +sub COUNT () { 0 }; +sub TOTAL () { 1 }; +sub FIRST () { 2 }; +sub SHORTEST () { 3 }; +sub LONGEST () { 4 }; +sub FIRST_AT () { 5 }; +sub LAST_AT () { 6 }; +sub PATH () { 7 }; + + +my $HAS_FLOCK = (defined $ENV{DBI_PROFILE_FLOCK}) + ? $ENV{DBI_PROFILE_FLOCK} + : do { local $@; eval { flock STDOUT, 0; 1 } }; + + +=head2 $prof = DBI::ProfileData->new(File => "dbi.prof") + +=head2 $prof = DBI::ProfileData->new(File => "dbi.prof", Filter => sub { ... }) + +=head2 $prof = DBI::ProfileData->new(Files => [ "dbi.prof.1", "dbi.prof.2" ]) + +Creates a a new DBI::ProfileData object. Takes either a single file +through the File option or a list of Files in an array ref. If +multiple files are specified then the header data from the first file +is used. + +=head3 Files + +Reference to an array of file names to read. + +=head3 File + +Name of file to read. Takes precedence over C. + +=head3 DeleteFiles + +If true, the files are deleted after being read. + +Actually the files are renamed with a C.deleteme> suffix before being read, +and then, after reading all the files, they're all deleted together. + +The files are locked while being read which, combined with the rename, makes it +safe to 'consume' files that are still being generated by L. + +=head3 Filter + +The C parameter can be used to supply a code reference that can +manipulate the profile data as it is being read. This is most useful for +editing SQL statements so that slightly different statements in the raw data +will be merged and aggregated in the loaded data. For example: + + Filter => sub { + my ($path_ref, $data_ref) = @_; + s/foo = '.*?'/foo = '...'/ for @$path_ref; + } + +Here's an example that performs some normalization on the SQL. It converts all +numbers to C and all quoted strings to C. It can also convert digits to +N within names. Finally, it summarizes long "IN (...)" clauses. + +It's aggressive and simplistic, but it's often sufficient, and serves as an +example that you can tailor to suit your own needs: + + Filter => sub { + my ($path_ref, $data_ref) = @_; + local $_ = $path_ref->[0]; # whichever element contains the SQL Statement + s/\b\d+\b/N/g; # 42 -> N + s/\b0x[0-9A-Fa-f]+\b/N/g; # 0xFE -> N + s/'.*?'/'S'/g; # single quoted strings (doesn't handle escapes) + s/".*?"/"S"/g; # double quoted strings (doesn't handle escapes) + # convert names like log_20001231 into log_NNNNNNNN, controlled by $opt{n} + s/([a-z_]+)(\d{$opt{n},})/$1.('N' x length($2))/ieg if $opt{n}; + # abbreviate massive "in (...)" statements and similar + s!(([NS],){100,})!sprintf("$2,{repeated %d times}",length($1)/2)!eg; + } + +It's often better to perform this kinds of normalization in the DBI while the +data is being collected, to avoid too much memory being used by storing profile +data for many different SQL statement. See L. + +=cut + +sub new { + my $pkg = shift; + my $self = { + Files => [ "dbi.prof" ], + Filter => undef, + DeleteFiles => 0, + LockFile => $HAS_FLOCK, + _header => {}, + _nodes => [], + _node_lookup => {}, + _sort => 'none', + @_ + }; + bless $self, $pkg; + + # File (singular) overrides Files (plural) + $self->{Files} = [ $self->{File} ] if exists $self->{File}; + + $self->_read_files(); + return $self; +} + +# read files into _header and _nodes +sub _read_files { + my $self = shift; + my $files = $self->{Files}; + my $read_header = 0; + my @files_to_delete; + + my $fh = gensym; + foreach (@$files) { + my $filename = $_; + + if ($self->{DeleteFiles}) { + my $newfilename = $filename . ".deleteme"; + if ($^O eq 'VMS') { + # VMS default filesystem can only have one period + $newfilename = $filename . 'deleteme'; + } + # will clobber an existing $newfilename + rename($filename, $newfilename) + or croak "Can't rename($filename, $newfilename): $!"; + # On a versioned filesystem we want old versions to be removed + 1 while (unlink $filename); + $filename = $newfilename; + } + + open($fh, "<", $filename) + or croak("Unable to read profile file '$filename': $!"); + + # lock the file in case it's still being written to + # (we'll be foced to wait till the write is complete) + flock($fh, LOCK_SH) if $self->{LockFile}; + + if (-s $fh) { # not empty + $self->_read_header($fh, $filename, $read_header ? 0 : 1); + $read_header = 1; + $self->_read_body($fh, $filename); + } + close($fh); # and release lock + + push @files_to_delete, $filename + if $self->{DeleteFiles}; + } + for (@files_to_delete){ + # for versioned file systems + 1 while (unlink $_); + if(-e $_){ + warn "Can't delete '$_': $!"; + } + } + + # discard node_lookup now that all files are read + delete $self->{_node_lookup}; +} + +# read the header from the given $fh named $filename. Discards the +# data unless $keep. +sub _read_header { + my ($self, $fh, $filename, $keep) = @_; + + # get profiler module id + my $first = <$fh>; + chomp $first; + $self->{_profiler} = $first if $keep; + + # collect variables from the header + local $_; + while (<$fh>) { + chomp; + last unless length $_; + /^(\S+)\s*=\s*(.*)/ + or croak("Syntax error in header in $filename line $.: $_"); + # XXX should compare new with existing (from previous file) + # and warn if they differ (diferent program or path) + $self->{_header}{$1} = unescape_key($2) if $keep; + } +} + + +sub unescape_key { # inverse of escape_key() in DBI::ProfileDumper + local $_ = shift; + s/(?{_nodes}; + my $lookup = $self->{_node_lookup}; + my $filter = $self->{Filter}; + + # build up node array + my @path = (""); + my (@data, $path_key); + local $_; + while (<$fh>) { + chomp; + if (/^\+\s+(\d+)\s?(.*)/) { + # it's a key + my ($key, $index) = ($2, $1 - 1); + + $#path = $index; # truncate path to new length + $path[$index] = unescape_key($key); # place new key at end + + } + elsif (s/^=\s+//) { + # it's data - file in the node array with the path in index 0 + # (the optional minus is to make it more robust against systems + # with unstable high-res clocks - typically due to poor NTP config + # of kernel SMP behaviour, i.e. min time may be -0.000008)) + + @data = split / /, $_; + + # corrupt data? + croak("Invalid number of fields in $filename line $.: $_") + unless @data == 7; + croak("Invalid leaf node characters $filename line $.: $_") + unless m/^[-+ 0-9eE\.]+$/; + + # hook to enable pre-processing of the data - such as mangling SQL + # so that slightly different statements get treated as the same + # and so merged in the results + $filter->(\@path, \@data) if $filter; + + # elements of @path can't have NULLs in them, so this + # forms a unique string per @path. If there's some way I + # can get this without arbitrarily stripping out a + # character I'd be happy to hear it! + $path_key = join("\0",@path); + + # look for previous entry + if (exists $lookup->{$path_key}) { + # merge in the new data + dbi_profile_merge($nodes->[$lookup->{$path_key}], \@data); + } else { + # insert a new node - nodes are arrays with data in 0-6 + # and path data after that + push(@$nodes, [ @data, @path ]); + + # record node in %seen + $lookup->{$path_key} = $#$nodes; + } + } + else { + croak("Invalid line type syntax error in $filename line $.: $_"); + } + } +} + + + +=head2 $copy = $prof->clone(); + +Clone a profile data set creating a new object. + +=cut + +sub clone { + my $self = shift; + + # start with a simple copy + my $clone = bless { %$self }, ref($self); + + # deep copy nodes + $clone->{_nodes} = [ map { [ @$_ ] } @{$self->{_nodes}} ]; + + # deep copy header + $clone->{_header} = { %{$self->{_header}} }; + + return $clone; +} + +=head2 $header = $prof->header(); + +Returns a reference to a hash of header values. These are the key +value pairs included in the header section of the DBI::ProfileDumper +data format. For example: + + $header = { + Path => [ '!Statement', '!MethodName' ], + Program => 't/42profile_data.t', + }; + +Note that modifying this hash will modify the header data stored +inside the profile object. + +=cut + +sub header { shift->{_header} } + + +=head2 $nodes = $prof->nodes() + +Returns a reference the sorted nodes array. Each element in the array +is a single record in the data set. The first seven elements are the +same as the elements provided by DBI::Profile. After that each key is +in a separate element. For example: + + $nodes = [ + [ + 2, # 0, count + 0.0312958955764771, # 1, total duration + 0.000490069389343262, # 2, first duration + 0.000176072120666504, # 3, shortest duration + 0.00140702724456787, # 4, longest duration + 1023115819.83019, # 5, time of first event + 1023115819.86576, # 6, time of last event + 'SELECT foo FROM bar' # 7, key1 + 'execute' # 8, key2 + # 6+N, keyN + ], + # ... + ]; + +Note that modifying this array will modify the node data stored inside +the profile object. + +=cut + +sub nodes { shift->{_nodes} } + + +=head2 $count = $prof->count() + +Returns the number of items in the profile data set. + +=cut + +sub count { scalar @{shift->{_nodes}} } + + +=head2 $prof->sort(field => "field") + +=head2 $prof->sort(field => "field", reverse => 1) + +Sorts data by the given field. Available fields are: + + longest + total + count + shortest + +The default sort is greatest to smallest, which is the opposite of the +normal Perl meaning. This, however, matches the expected behavior of +the dbiprof frontend. + +=cut + + +# sorts data by one of the available fields +{ + my %FIELDS = ( + longest => LONGEST, + total => TOTAL, + count => COUNT, + shortest => SHORTEST, + key1 => PATH+0, + key2 => PATH+1, + key3 => PATH+2, + ); + sub sort { + my $self = shift; + my $nodes = $self->{_nodes}; + my %opt = @_; + + croak("Missing required field option.") unless $opt{field}; + + my $index = $FIELDS{$opt{field}}; + + croak("Unrecognized sort field '$opt{field}'.") + unless defined $index; + + # sort over index + if ($opt{reverse}) { + @$nodes = sort { + $a->[$index] <=> $b->[$index] + } @$nodes; + } else { + @$nodes = sort { + $b->[$index] <=> $a->[$index] + } @$nodes; + } + + # remember how we're sorted + $self->{_sort} = $opt{field}; + + return $self; + } +} + + +=head2 $count = $prof->exclude(key2 => "disconnect") + +=head2 $count = $prof->exclude(key2 => "disconnect", case_sensitive => 1) + +=head2 $count = $prof->exclude(key1 => qr/^SELECT/i) + +Removes records from the data set that match the given string or +regular expression. This method modifies the data in a permanent +fashion - use clone() first to maintain the original data after +exclude(). Returns the number of nodes left in the profile data set. + +=cut + +sub exclude { + my $self = shift; + my $nodes = $self->{_nodes}; + my %opt = @_; + + # find key index number + my ($index, $val); + foreach (keys %opt) { + if (/^key(\d+)$/) { + $index = PATH + $1 - 1; + $val = $opt{$_}; + last; + } + } + croak("Missing required keyN option.") unless $index; + + if (UNIVERSAL::isa($val,"Regexp")) { + # regex match + @$nodes = grep { + $#$_ < $index or $_->[$index] !~ /$val/ + } @$nodes; + } else { + if ($opt{case_sensitive}) { + @$nodes = grep { + $#$_ < $index or $_->[$index] ne $val; + } @$nodes; + } else { + $val = lc $val; + @$nodes = grep { + $#$_ < $index or lc($_->[$index]) ne $val; + } @$nodes; + } + } + + return scalar @$nodes; +} + + +=head2 $count = $prof->match(key2 => "disconnect") + +=head2 $count = $prof->match(key2 => "disconnect", case_sensitive => 1) + +=head2 $count = $prof->match(key1 => qr/^SELECT/i) + +Removes records from the data set that do not match the given string +or regular expression. This method modifies the data in a permanent +fashion - use clone() first to maintain the original data after +match(). Returns the number of nodes left in the profile data set. + +=cut + +sub match { + my $self = shift; + my $nodes = $self->{_nodes}; + my %opt = @_; + + # find key index number + my ($index, $val); + foreach (keys %opt) { + if (/^key(\d+)$/) { + $index = PATH + $1 - 1; + $val = $opt{$_}; + last; + } + } + croak("Missing required keyN option.") unless $index; + + if (UNIVERSAL::isa($val,"Regexp")) { + # regex match + @$nodes = grep { + $#$_ >= $index and $_->[$index] =~ /$val/ + } @$nodes; + } else { + if ($opt{case_sensitive}) { + @$nodes = grep { + $#$_ >= $index and $_->[$index] eq $val; + } @$nodes; + } else { + $val = lc $val; + @$nodes = grep { + $#$_ >= $index and lc($_->[$index]) eq $val; + } @$nodes; + } + } + + return scalar @$nodes; +} + + +=head2 $Data = $prof->Data() + +Returns the same Data hash structure as seen in DBI::Profile. This +structure is not sorted. The nodes() structure probably makes more +sense for most analysis. + +=cut + +sub Data { + my $self = shift; + my (%Data, @data, $ptr); + + foreach my $node (@{$self->{_nodes}}) { + # traverse to key location + $ptr = \%Data; + foreach my $key (@{$node}[PATH .. $#$node - 1]) { + $ptr->{$key} = {} unless exists $ptr->{$key}; + $ptr = $ptr->{$key}; + } + + # slice out node data + $ptr->{$node->[-1]} = [ @{$node}[0 .. 6] ]; + } + + return \%Data; +} + + +=head2 $text = $prof->format($nodes->[0]) + +Formats a single node into a human-readable block of text. + +=cut + +sub format { + my ($self, $node) = @_; + my $format; + + # setup keys + my $keys = ""; + for (my $i = PATH; $i <= $#$node; $i++) { + my $key = $node->[$i]; + + # remove leading and trailing space + $key =~ s/^\s+//; + $key =~ s/\s+$//; + + # if key has newlines or is long take special precautions + if (length($key) > 72 or $key =~ /\n/) { + $keys .= " Key " . ($i - PATH + 1) . " :\n\n$key\n\n"; + } else { + $keys .= " Key " . ($i - PATH + 1) . " : $key\n"; + } + } + + # nodes with multiple runs get the long entry format, nodes with + # just one run get a single count. + if ($node->[COUNT] > 1) { + $format = <[TOTAL] / $node->[COUNT]) . $keys; + } else { + $format = <report(number => 10) + +Produces a report with the given number of items. + +=cut + +sub report { + my $self = shift; + my $nodes = $self->{_nodes}; + my %opt = @_; + + croak("Missing required number option") unless exists $opt{number}; + + $opt{number} = @$nodes if @$nodes < $opt{number}; + + my $report = $self->_report_header($opt{number}); + for (0 .. $opt{number} - 1) { + $report .= sprintf("#" x 5 . "[ %d ]". "#" x 59 . "\n", + $_ + 1); + $report .= $self->format($nodes->[$_]); + $report .= "\n"; + } + return $report; +} + +# format the header for report() +sub _report_header { + my ($self, $number) = @_; + my $nodes = $self->{_nodes}; + my $node_count = @$nodes; + + # find total runtime and method count + my ($time, $count) = (0,0); + foreach my $node (@$nodes) { + $time += $node->[TOTAL]; + $count += $node->[COUNT]; + } + + my $header = <{_profiler}) + +END + + # output header fields + while (my ($key, $value) = each %{$self->{_header}}) { + $header .= sprintf(" %-13s : %s\n", $key, $value); + } + + # output summary data fields + $header .= sprintf(<{_sort}, $count, $time); + Total Records : %d (showing %d, sorted by %s) + Total Count : %d + Total Runtime : %3.6f seconds + +END + + return $header; +} + + +1; + +__END__ + +=head1 AUTHOR + +Sam Tregar + +=head1 COPYRIGHT AND LICENSE + +Copyright (C) 2002 Sam Tregar + +This program is free software; you can redistribute it and/or modify +it under the same terms as Perl 5 itself. + +=cut -- cgit v1.2.1