summaryrefslogtreecommitdiff
path: root/Porting/checkpodencoding.pl
diff options
context:
space:
mode:
authorÆvar Arnfjörð Bjarmason <avar@cpan.org>2010-03-30 14:09:24 +0000
committerJesse Vincent <jesse@bestpractical.com>2010-05-08 16:35:35 -0400
commit49781f4a0073e7152ff1359411cc1790db050942 (patch)
treef4cbc1b500eac522b28853ab647ae042f24611e2 /Porting/checkpodencoding.pl
parentbdbefedf6ca6c8253d0fccc6b9d99d7ae86dcd96 (diff)
downloadperl-49781f4a0073e7152ff1359411cc1790db050942.tar.gz
Add =encoding utf8 to all core non-ASCII POD files + test script
It's now possible to run: perl Porting/checkpodencoding.pl To check if the core contains any naughty POD that uses non-ASCII without declaring an encoding. With this patch all the POD in core (except POD tests we're ignoring) has a correct =encoding directive. The script also flags problems in lib/* and cpan/* which aren't being fixed as part of this commit.
Diffstat (limited to 'Porting/checkpodencoding.pl')
-rw-r--r--Porting/checkpodencoding.pl69
1 files changed, 69 insertions, 0 deletions
diff --git a/Porting/checkpodencoding.pl b/Porting/checkpodencoding.pl
new file mode 100644
index 0000000000..a2d12df494
--- /dev/null
+++ b/Porting/checkpodencoding.pl
@@ -0,0 +1,69 @@
+#!/usr/bin/env perl
+use 5.010;
+use open qw< :encoding(utf8) :std >;
+use autodie;
+use strict;
+use File::Find;
+use Encode::Guess;
+
+# Check if POD files contain non-ASCII without specifying
+# =encoding. Run it as:
+
+## perl Porting/checkpodencoding.pl
+
+find(
+ {
+ wanted => \&finder,
+ no_chdir => 1,
+ },
+ '.'
+);
+
+sub finder {
+ my $file = $_;
+
+ return if -B $file;
+
+ open my $fh, '<', $file;
+
+ #say STDERR "Checking $file";
+
+ next if
+ # Test cases
+ $file ~~ m[Pod-Simple/t];
+
+ my ($in_pod, $has_encoding, @non_ascii);
+
+ FILE: while (my $line = <$fh>) {
+ chomp $line;
+ if ($line ~~ /^=[a-z]+/) {
+ $in_pod = 1;
+ }
+
+ if ($in_pod) {
+ if ($line ~~ /^=encoding (\S+)/) {
+ $has_encoding = 1;
+ last FILE;
+ } elsif ($line ~~ /[^[:ascii:]]/) {
+ my $encoding = guess_encoding($line);
+ push @non_ascii => {
+ num => $.,
+ line => $line,
+ encoding => (ref $encoding ? "$encoding->{Name}?" : 'unknown!'),
+ };
+ }
+ }
+
+ if ($line ~~ /^=cut/) {
+ $in_pod = 0;
+ }
+ }
+
+ if (@non_ascii and not $has_encoding) {
+ say "$file:";
+ $DB::single = 1;
+ for (@non_ascii) {
+ say " $_->{num} ($_->{encoding}): $_->{line}";
+ }
+ }
+}