diff options
author | Karl Williamson <khw@cpan.org> | 2014-06-17 12:07:51 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2014-06-17 20:21:38 -0600 |
commit | 010b53a6755090e2e0d9ab5ab14e4d5d344707d7 (patch) | |
tree | 4e0334957d604abb53662a7df51142a79822fc99 | |
parent | 46973bb268abc7b71092cab1591882d604bae0ce (diff) | |
download | perl-010b53a6755090e2e0d9ab5ab14e4d5d344707d7.tar.gz |
lib/locale.t: Add debugging subroutine
This prints out a string unambiguously, both well and ill-formed UTF-8.
The next commit will use it.
-rw-r--r-- | lib/locale.t | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/lib/locale.t b/lib/locale.t index 76f5fe6572..70766f207f 100644 --- a/lib/locale.t +++ b/lib/locale.t @@ -805,6 +805,64 @@ sub disp_chars { return $output; } +sub disp_str ($) { + my $string = shift; + + # Displays the string unambiguously. ASCII printables are always output + # as-is, though perhaps separated by blanks from other characters. If + # entirely printable ASCII, just returns the string. Otherwise if valid + # UTF-8 it uses the character names for non-printable-ASCII. Otherwise it + # outputs hex for each non-ASCII-printable byte. + + return $string if $string =~ / ^ [[:print:]]* $/xa; + + my $result = ""; + my $prev_was_punct = 1; # Beginning is considered punct + if (utf8::valid($string) && utf8::is_utf8($string)) { + use charnames (); + foreach my $char (split "", $string) { + + # Keep punctuation adjacent to other characters; otherwise + # separate them with a blank + if ($char =~ /[[:punct:]]/a) { + $result .= $char; + $prev_was_punct = 1; + } + elsif ($char =~ /[[:print:]]/a) { + $result .= " " unless $prev_was_punct; + $result .= $char; + $prev_was_punct = 0; + } + else { + $result .= " " unless $prev_was_punct; + $result .= charnames::viacode(ord $char); + $prev_was_punct = 0; + } + } + } + else { + use bytes; + foreach my $char (split "", $string) { + if ($char =~ /[[:punct:]]/a) { + $result .= $char; + $prev_was_punct = 1; + } + elsif ($char =~ /[[:print:]]/a) { + $result .= " " unless $prev_was_punct; + $result .= $char; + $prev_was_punct = 0; + } + else { + $result .= " " unless $prev_was_punct; + $result .= sprintf("%02X", ord $char); + $prev_was_punct = 0; + } + } + } + + return $result; +} + sub report_result { my ($Locale, $i, $pass_fail, $message) = @_; $message //= ""; |