summaryrefslogtreecommitdiff
path: root/lib/Dist/Metadata.pm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Dist/Metadata.pm')
-rw-r--r--lib/Dist/Metadata.pm677
1 files changed, 677 insertions, 0 deletions
diff --git a/lib/Dist/Metadata.pm b/lib/Dist/Metadata.pm
new file mode 100644
index 0000000..a05414e
--- /dev/null
+++ b/lib/Dist/Metadata.pm
@@ -0,0 +1,677 @@
+# vim: set ts=2 sts=2 sw=2 expandtab smarttab:
+#
+# This file is part of Dist-Metadata
+#
+# This software is copyright (c) 2011 by Randy Stauner.
+#
+# This is free software; you can redistribute it and/or modify it under
+# the same terms as the Perl 5 programming language system itself.
+#
+use strict;
+use warnings;
+
+package Dist::Metadata;
+# git description: v0.925-17-g08a6891
+
+our $AUTHORITY = 'cpan:RWSTAUNER';
+# ABSTRACT: Information about a perl module distribution
+$Dist::Metadata::VERSION = '0.926';
+use Carp qw(croak carp);
+use CPAN::Meta 2.1 ();
+use List::Util qw(first); # core in perl v5.7.3
+
+# something that is obviously not a real value
+sub UNKNOWN () { '- unknown -' } # constant
+
+
+sub new {
+ my $class = shift;
+ my $self = {
+ determine_packages => 1,
+ @_ == 1 ? %{ $_[0] } : @_
+ };
+
+ my @formats = qw( dist file dir struct );
+ croak(qq[A dist must be specified (one of ] .
+ join(', ', map { "'$_'" } @formats) . ')')
+ unless first { $self->{$_} } @formats;
+
+ bless $self, $class;
+}
+
+
+sub dist {
+ my ($self) = @_;
+ return $self->{dist} ||= do {
+ my $dist;
+ if( my $struct = $self->{struct} ){
+ require Dist::Metadata::Struct;
+ $dist = Dist::Metadata::Struct->new(%$struct);
+ }
+ elsif( my $dir = $self->{dir} ){
+ require Dist::Metadata::Dir;
+ $dist = Dist::Metadata::Dir->new(dir => $dir);
+ }
+ elsif ( my $file = $self->{file} ){
+ require Dist::Metadata::Archive;
+ $dist = Dist::Metadata::Archive->new(file => $file);
+ }
+ else {
+ # new() checks for one and dies without so we shouldn't get here
+ croak q[No dist format parameters found!];
+ }
+ $dist; # return
+ };
+}
+
+
+sub default_metadata {
+ my ($self) = @_;
+
+ return {
+ # required
+ abstract => UNKNOWN,
+ author => [],
+ dynamic_config => 0,
+ generated_by => ( ref($self) || $self ) . ' version ' . ( $self->VERSION || 0 ),
+ license => ['unknown'], # this 'unknown' comes from CPAN::Meta::Spec
+ 'meta-spec' => {
+ version => '2',
+ url => 'http://search.cpan.org/perldoc?CPAN::Meta::Spec',
+ },
+ name => UNKNOWN,
+
+ # strictly speaking, release_status is also required but
+ # CPAN::Meta will figure it out based on the version number. if
+ # we were to set it explicitly, then we would first need to
+ # examine the version number for '_' or 'TRIAL' or 'RC' etc.
+
+ version => 0,
+
+ # optional
+ no_index => {
+ # Ignore the same directories as PAUSE (https://github.com/andk/pause/blob/master/lib/PAUSE/dist.pm#L758):
+ # skip "t" - libraries in ./t are test libraries!
+ # skip "xt" - libraries in ./xt are author test libraries!
+ # skip "inc" - libraries in ./inc are usually install libraries
+ # skip "local" - somebody shipped his carton setup!
+ # skip 'perl5" - somebody shipped her local::lib!
+ # skip 'fatlib' - somebody shipped their fatpack lib!
+ directory => [qw( inc t xt local perl5 fatlib )],
+ },
+ # provides => { package => { file => $file, version => $version } }
+ };
+}
+
+
+sub determine_metadata {
+ my ($self) = @_;
+
+ my $dist = $self->dist;
+ my $meta = $self->default_metadata;
+
+ # get name and version from dist if dist was able to parse them
+ foreach my $att (qw(name version)) {
+ my $val = $dist->$att;
+ # if the dist could determine it that's better than the default
+ # but undef won't validate. value in $self will still override.
+ $meta->{$att} = $val
+ if defined $val;
+ }
+
+ # any passed in values should take priority
+ foreach my $field ( keys %$meta ){
+ $meta->{$field} = $self->{$field}
+ if exists $self->{$field};
+ }
+
+ return $meta;
+}
+
+
+sub determine_packages {
+ # meta must be passed to avoid infinite loop
+ my ( $self, $meta ) = @_;
+ # if not passed in, use defaults (we just want the 'no_index' property)
+ $meta ||= $self->meta_from_struct( $self->determine_metadata );
+
+ # should_index_file() expects unix paths
+ my @files = grep {
+ $meta->should_index_file(
+ $self->dist->path_classify_file($_)->as_foreign('Unix')->stringify
+ );
+ }
+ $self->dist->perl_files;
+
+ # TODO: should we limit packages to lib/ if it exists?
+ # my @lib = grep { m#^lib/# } @files; @files = @lib if @lib;
+
+ return {} if not @files;
+
+ my $packages = $self->dist->determine_packages(@files);
+
+
+ foreach my $pack ( keys %$packages ) {
+
+ # Remove any packages that should not be indexed
+ if ( !$meta->should_index_package($pack) ) {
+ delete $packages->{$pack};
+ next;
+ }
+
+ unless( $self->{include_inner_packages} ){
+ # PAUSE only considers packages that match the basename of the
+ # containing file. For example, file Foo.pm may only contain a
+ # package that matches /\bFoo$/. This is what PAUSE calls a
+ # "simile". All other packages in the file will be ignored.
+
+ # capture file basename (without the extension)
+ my ($base) = ($packages->{$pack}->{file} =~ m!([^/]+)\.pm(?:\.PL)?$!);
+ # remove if file didn't match regexp or package doesn't match basename
+ delete $packages->{$pack}
+ if !$base || $pack !~ m{\b\Q$base\E$};
+ }
+ }
+
+ return $packages;
+}
+
+
+sub load_meta {
+ my ($self) = @_;
+
+ my $dist = $self->dist;
+ my @files = $dist->list_files;
+ my ( $meta, $metafile );
+ my $default_meta = $self->determine_metadata;
+
+ # prefer json file (spec v2)
+ if ( $metafile = first { m#^META\.json$# } @files ) {
+ $meta = CPAN::Meta->load_json_string( $dist->file_content($metafile) );
+ }
+ # fall back to yaml file (spec v1)
+ elsif ( $metafile = first { m#^META\.ya?ml$# } @files ) {
+ $meta = CPAN::Meta->load_yaml_string( $dist->file_content($metafile) );
+ }
+ # no META file found in dist
+ else {
+ $meta = $self->meta_from_struct( $default_meta );
+ }
+
+ {
+ # always include (never index) the default no_index dirs
+ my $dir = ($meta->{no_index} ||= {})->{directory} ||= [];
+ my %seen = map { ($_ => 1) } @$dir;
+ unshift @$dir,
+ grep { !$seen{$_}++ }
+ @{ $default_meta->{no_index}->{directory} };
+ }
+
+ # Something has to be indexed, so if META has no (or empty) 'provides'
+ # attempt to determine packages unless specifically configured not to
+ if ( !keys %{ $meta->provides || {} } && $self->{determine_packages} ) {
+ # respect api/encapsulation
+ my $struct = $meta->as_struct;
+ $struct->{provides} = $self->determine_packages($meta);
+ $meta = $self->meta_from_struct($struct);
+ }
+
+ return $meta;
+}
+
+
+sub meta {
+ my ($self) = @_;
+ return $self->{meta} ||= $self->load_meta;
+}
+
+
+sub meta_from_struct {
+ my ($self, $struct) = @_;
+ return CPAN::Meta->create( $struct, { lazy_validation => 1 } );
+}
+
+
+sub package_versions {
+ my ($self) = shift;
+ my $provides = @_ ? shift : $self->provides; # || {}
+ return {
+ map { ($_ => $provides->{$_}{version}) } keys %$provides
+ };
+}
+
+
+sub module_info {
+ my ($self, $opts) = @_;
+ my $provides = $opts->{provides} || $self->provides;
+ $provides = { %$provides }; # break reference
+
+ my $checksums = $opts->{checksum} || $opts->{digest} || [];
+ $checksums = [ $checksums ]
+ unless ref($checksums) eq 'ARRAY';
+
+ my $digest_cache = {};
+ foreach my $mod ( keys %$provides ){
+ my $data = { %{ $provides->{ $mod } } }; # break reference
+
+ foreach my $checksum ( @$checksums ){
+ $data->{ $checksum } =
+ $digest_cache->{ $data->{file} }->{ $checksum } ||=
+ $self->dist->file_checksum($data->{file}, $checksum);
+ }
+
+ # TODO: $opts->{callback}->($self, $mod, $data, sub { $self->dist->file_content($data->{file}) });
+
+ $provides->{ $mod } = $data;
+ }
+
+ return $provides;
+}
+
+
+{
+ no strict 'refs'; ## no critic (NoStrict)
+ foreach my $method ( qw(
+ name
+ provides
+ version
+ ) ){
+ *$method = sub { $_[0]->meta->$method };
+ }
+}
+
+1;
+
+__END__
+
+=pod
+
+=encoding UTF-8
+
+=for :stopwords Randy Stauner ACKNOWLEDGEMENTS TODO dist dists dir unix checksum checksums
+David Jeffrey Ryan Sawyer Steinbrunner Thalhammer X cpan testmatrix url
+annocpan anno bugtracker rt cpants kwalitee diff irc mailto metadata
+placeholders metacpan
+
+=head1 NAME
+
+Dist::Metadata - Information about a perl module distribution
+
+=head1 VERSION
+
+version 0.926
+
+=head1 SYNOPSIS
+
+ my $dist = Dist::Metadata->new(file => $path_to_archive);
+
+ my $description = sprintf "Dist %s (%s)", $dist->name, $dist->version;
+
+ my $provides = $dist->package_versions;
+ while( my ($package, $version) = each %$provides ){
+ print "$description includes $package $version\n";
+ }
+
+=head1 DESCRIPTION
+
+This module provides an easy interface for getting various metadata
+about a Perl module distribution.
+
+It takes care of the common logic of:
+
+=over 4
+
+=item *
+
+reading a tar file (L<Archive::Tar>)
+
+=item *
+
+finding and reading the correct META file if the distribution contains one (L<CPAN::Meta>)
+
+=item *
+
+and determining some of the metadata if there is no META file (L<Module::Metadata>, L<CPAN::DistnameInfo>)
+
+=back
+
+This is mostly a wrapper around L<CPAN::Meta> providing an easy interface
+to find and load the meta file from a F<tar.gz> file.
+A dist can also be represented by a directory or merely a structure of data.
+
+If the dist does not contain a meta file
+the module will attempt to determine some of that data from the dist.
+
+B<NOTE>: This interface is still being defined.
+Please submit any suggestions or concerns.
+
+=head1 METHODS
+
+=head2 new
+
+ Dist::Metadata->new(file => $path);
+
+A dist can be represented by
+a tar file,
+a directory,
+or a data structure.
+
+The format will be determined by the presence of the following options
+(checked in this order):
+
+=over 4
+
+=item *
+
+C<struct> - hash of data to build a mock dist; See L<Dist::Metadata::Struct>.
+
+=item *
+
+C<dir> - path to the root directory of a dist
+
+=item *
+
+C<file> - the path to a F<.tar.gz> file
+
+=back
+
+You can also slyly pass in your own object as a C<dist> parameter
+in which case this module will just use that.
+This can be useful if you need to use your own subclass
+(perhaps while developing a new format).
+
+Other options that can be specified:
+
+=over 4
+
+=item *
+
+C<name> - dist name
+
+=item *
+
+C<version> - dist version
+
+=item *
+
+C<determine_packages> - boolean to indicate whether dist should be searched
+for packages if no META file is found. Defaults to true.
+
+=item *
+
+C<include_inner_packages> - When determining provided packages
+the default behavior is to only include packages that match the name
+of the file that defines them (like C<Foo::Bar> matches C<*/Bar.pm>).
+This way only modules that can be loaded (via C<use> or C<require>)
+will be returned (and "inner" packages will be ignored).
+This mimics the behavior of PAUSE.
+Set this to true to include any "inner" packages provided by the dist
+(that are not otherwise excluded by another mechanism (such as C<no_index>)).
+
+=back
+
+=head2 dist
+
+Returns the dist object (subclass of L<Dist::Metadata::Dist>).
+
+=head2 default_metadata
+
+Returns a hashref of default values
+used to initialize a L<CPAN::Meta> object
+when a META file is not found.
+Called from L</determine_metadata>.
+
+=head2 determine_metadata
+
+Examine the dist and try to determine metadata.
+Returns a hashref which can be passed to L<CPAN::Meta/new>.
+This is used when the dist does not contain a META file.
+
+=head2 determine_packages
+
+ my $provides = $dm->determine_packages($meta);
+
+Attempt to determine packages provided by the dist.
+This is used when the META file does not include a C<provides>
+section and C<determine_packages> is not set to false in the constructor.
+
+If a L<CPAN::Meta> object is not provided a default one will be used.
+Files contained in the dist and packages found therein will be checked against
+the meta object's C<no_index> attribute
+(see L<CPAN::Meta/should_index_file>
+and L<CPAN::Meta/should_index_package>).
+By default this ignores any files found in
+F<inc/>,
+F<t/>,
+or F<xt/>
+directories.
+
+=head2 load_meta
+
+Loads the metadata from the L</dist>.
+
+=head2 meta
+
+Returns the L<CPAN::Meta> instance in use.
+
+=head2 meta_from_struct
+
+ $meta = $dm->meta_from_struct(\%struct);
+
+Passes the provided C<\%struct> to L<CPAN::Meta/create>
+and returns the result.
+
+=head2 package_versions
+
+ $pv = $dm->package_versions();
+ # { 'Package::Name' => '1.0', 'Module::2' => '2.1' }
+
+Returns a simplified version of C<provides>:
+a hashref with package names as keys and versions as values.
+
+This can also be called as a class method
+which will operate on a passed in hashref.
+
+ $pv = Dist::Metadata->package_versions(\%provides);
+
+=head2 module_info
+
+Returns a hashref of meta data for each of the packages provided by this dist.
+
+The hashref starts with the same data as L</provides>
+but additional data can be added to the output by specifying options in a hashref:
+
+=over 4
+
+=item C<checksum>
+
+Use the specified algorithm to compute a hex digest of the file.
+The type you specify will be the key in the returned hashref.
+You can use an arrayref to specify more than one type.
+
+ $dm->module_info({checksum => ['sha256', 'md5']});
+ # returns:
+ {
+ 'Mod::Name' => {
+ file => 'lib/Mod/Name.pm',
+ version => '0.1',
+ md5 => '258e88dcbd3cd44d8e7ab43f6ecb6af0',
+ sha256 => 'f22136124cd3e1d65a48487cecf310771b2fd1e83dc032e3d19724160ac0ff71',
+ },
+ }
+
+See L<Dist::Metadata::Dist/file_checksum> for more information.
+
+=item C<provides>
+
+The default is to start with the hashref returned from L</provides>
+but you can pass in an alternate hashref using this key.
+
+=back
+
+Other options may be added in the future.
+
+=head1 INHERITED METHODS
+
+The following methods are available on this object
+and simply call the corresponding method on the L<CPAN::Meta> object.
+
+=over 4
+
+=item *
+
+X<name> name
+
+=item *
+
+X<provides> provides
+
+=item *
+
+X<version> version
+
+=back
+
+=for Pod::Coverage name version provides
+UNKNOWN
+
+=for test_synopsis my $path_to_archive;
+
+=head1 TODO
+
+=over 4
+
+=item *
+
+More tests
+
+=item *
+
+C<trust_meta> option (to allow setting it to false)
+
+=item *
+
+Guess main module from dist name if no packages can be found
+
+=item *
+
+Determine abstract?
+
+=item *
+
+Add change log info (L<CPAN::Changes>)?
+
+=item *
+
+Subclass as C<CPAN::Dist::Metadata> just so that it has C<CPAN> in the name?
+
+=item *
+
+Use L<File::Find::Rule::Perl>?
+
+=back
+
+=head1 SEE ALSO
+
+=head2 Dependencies
+
+=over 4
+
+=item *
+
+L<CPAN::Meta>
+
+=item *
+
+L<Module::Metadata>
+
+=item *
+
+L<CPAN::DistnameInfo>
+
+=back
+
+=head2 Related Modules
+
+=over 4
+
+=item *
+
+L<MyCPAN::Indexer>
+
+=item *
+
+L<CPAN::ParseDistribution>
+
+=back
+
+=head1 SUPPORT
+
+=head2 Perldoc
+
+You can find documentation for this module with the perldoc command.
+
+ perldoc Dist::Metadata
+
+=head2 Websites
+
+The following websites have more information about this module, and may be of help to you. As always,
+in addition to those websites please use your favorite search engine to discover more resources.
+
+=over 4
+
+=item *
+
+MetaCPAN
+
+A modern, open-source CPAN search engine, useful to view POD in HTML format.
+
+L<http://metacpan.org/release/Dist-Metadata>
+
+=back
+
+=head2 Bugs / Feature Requests
+
+Please report any bugs or feature requests by email to C<bug-dist-metadata at rt.cpan.org>, or through
+the web interface at L<https://rt.cpan.org/Public/Bug/Report.html?Queue=Dist-Metadata>. You will be automatically notified of any
+progress on the request by the system.
+
+=head2 Source Code
+
+
+L<https://github.com/rwstauner/Dist-Metadata>
+
+ git clone https://github.com/rwstauner/Dist-Metadata.git
+
+=head1 AUTHOR
+
+Randy Stauner <rwstauner@cpan.org>
+
+=head1 CONTRIBUTORS
+
+=for stopwords David Steinbrunner Jeffrey Ryan Thalhammer Sawyer X
+
+=over 4
+
+=item *
+
+David Steinbrunner <dsteinbrunner@pobox.com>
+
+=item *
+
+Jeffrey Ryan Thalhammer <thaljef@cpan.org>
+
+=item *
+
+Sawyer X <xsawyerx@cpan.org>
+
+=back
+
+=head1 COPYRIGHT AND LICENSE
+
+This software is copyright (c) 2011 by Randy Stauner.
+
+This is free software; you can redistribute it and/or modify it under
+the same terms as the Perl 5 programming language system itself.
+
+=cut