lib/locale.t: Add debugging subroutine

This prints out a string unambiguously, both well and ill-formed UTF-8. The next commit will use it.
author: Karl Williamson <khw@cpan.org> 2014-06-17 12:07:51 -0600
committer: Karl Williamson <khw@cpan.org> 2014-06-17 20:21:38 -0600
commit: 010b53a6755090e2e0d9ab5ab14e4d5d344707d7 (patch)
tree: 4e0334957d604abb53662a7df51142a79822fc99
parent: 46973bb268abc7b71092cab1591882d604bae0ce (diff)
download: perl-010b53a6755090e2e0d9ab5ab14e4d5d344707d7.tar.gz
1 files changed, 58 insertions, 0 deletions
diff --git a/lib/locale.t b/lib/locale.t
index 76f5fe6572..70766f207f 100644
--- a/lib/locale.t
+++ b/lib/locale.t
@@ -805,6 +805,64 @@ sub disp_chars {
     return $output;
 }
 
+sub disp_str ($) {
+    my $string = shift;
+
+    # Displays the string unambiguously.  ASCII printables are always output
+    # as-is, though perhaps separated by blanks from other characters.  If
+    # entirely printable ASCII, just returns the string.  Otherwise if valid
+    # UTF-8 it uses the character names for non-printable-ASCII.  Otherwise it
+    # outputs hex for each non-ASCII-printable byte.
+
+    return $string if $string =~ / ^ [[:print:]]* $/xa;
+
+    my $result = "";
+    my $prev_was_punct = 1; # Beginning is considered punct
+    if (utf8::valid($string) && utf8::is_utf8($string)) {
+        use charnames ();
+        foreach my $char (split "", $string) {
+
+            # Keep punctuation adjacent to other characters; otherwise
+            # separate them with a blank
+            if ($char =~ /[[:punct:]]/a) {
+                $result .= $char;
+                $prev_was_punct = 1;
+            }
+            elsif ($char =~ /[[:print:]]/a) {
+                $result .= "  " unless $prev_was_punct;
+                $result .= $char;
+                $prev_was_punct = 0;
+            }
+            else {
+                $result .= "  " unless $prev_was_punct;
+                $result .= charnames::viacode(ord $char);
+                $prev_was_punct = 0;
+            }
+        }
+    }
+    else {
+        use bytes;
+        foreach my $char (split "", $string) {
+            if ($char =~ /[[:punct:]]/a) {
+                $result .= $char;
+                $prev_was_punct = 1;
+            }
+            elsif ($char =~ /[[:print:]]/a) {
+                $result .= " " unless $prev_was_punct;
+                $result .= $char;
+                $prev_was_punct = 0;
+            }
+            else {
+                $result .= " " unless $prev_was_punct;
+                $result .= sprintf("%02X", ord $char);
+                $prev_was_punct = 0;
+            }
+        }
+    }
+
+    return $result;
+}
+
 sub report_result {
     my ($Locale, $i, $pass_fail, $message) = @_;
     $message //= "";
author	Karl Williamson <khw@cpan.org>	2014-06-17 12:07:51 -0600
committer	Karl Williamson <khw@cpan.org>	2014-06-17 20:21:38 -0600
commit	010b53a6755090e2e0d9ab5ab14e4d5d344707d7 (patch)
tree	4e0334957d604abb53662a7df51142a79822fc99
parent	46973bb268abc7b71092cab1591882d604bae0ce (diff)
download	perl-010b53a6755090e2e0d9ab5ab14e4d5d344707d7.tar.gz