summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2014-06-17 12:07:51 -0600
committerKarl Williamson <khw@cpan.org>2014-06-17 20:21:38 -0600
commit010b53a6755090e2e0d9ab5ab14e4d5d344707d7 (patch)
tree4e0334957d604abb53662a7df51142a79822fc99
parent46973bb268abc7b71092cab1591882d604bae0ce (diff)
downloadperl-010b53a6755090e2e0d9ab5ab14e4d5d344707d7.tar.gz
lib/locale.t: Add debugging subroutine
This prints out a string unambiguously, both well and ill-formed UTF-8. The next commit will use it.
-rw-r--r--lib/locale.t58
1 files changed, 58 insertions, 0 deletions
diff --git a/lib/locale.t b/lib/locale.t
index 76f5fe6572..70766f207f 100644
--- a/lib/locale.t
+++ b/lib/locale.t
@@ -805,6 +805,64 @@ sub disp_chars {
return $output;
}
+sub disp_str ($) {
+ my $string = shift;
+
+ # Displays the string unambiguously. ASCII printables are always output
+ # as-is, though perhaps separated by blanks from other characters. If
+ # entirely printable ASCII, just returns the string. Otherwise if valid
+ # UTF-8 it uses the character names for non-printable-ASCII. Otherwise it
+ # outputs hex for each non-ASCII-printable byte.
+
+ return $string if $string =~ / ^ [[:print:]]* $/xa;
+
+ my $result = "";
+ my $prev_was_punct = 1; # Beginning is considered punct
+ if (utf8::valid($string) && utf8::is_utf8($string)) {
+ use charnames ();
+ foreach my $char (split "", $string) {
+
+ # Keep punctuation adjacent to other characters; otherwise
+ # separate them with a blank
+ if ($char =~ /[[:punct:]]/a) {
+ $result .= $char;
+ $prev_was_punct = 1;
+ }
+ elsif ($char =~ /[[:print:]]/a) {
+ $result .= " " unless $prev_was_punct;
+ $result .= $char;
+ $prev_was_punct = 0;
+ }
+ else {
+ $result .= " " unless $prev_was_punct;
+ $result .= charnames::viacode(ord $char);
+ $prev_was_punct = 0;
+ }
+ }
+ }
+ else {
+ use bytes;
+ foreach my $char (split "", $string) {
+ if ($char =~ /[[:punct:]]/a) {
+ $result .= $char;
+ $prev_was_punct = 1;
+ }
+ elsif ($char =~ /[[:print:]]/a) {
+ $result .= " " unless $prev_was_punct;
+ $result .= $char;
+ $prev_was_punct = 0;
+ }
+ else {
+ $result .= " " unless $prev_was_punct;
+ $result .= sprintf("%02X", ord $char);
+ $prev_was_punct = 0;
+ }
+ }
+ }
+
+ return $result;
+}
+
sub report_result {
my ($Locale, $i, $pass_fail, $message) = @_;
$message //= "";