summaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2020-01-16 16:14:40 -0700
committerKarl Williamson <khw@cpan.org>2020-01-23 15:46:56 -0700
commitdaf6caf1ef25ff48f871fa1e53adcefc11bf1d08 (patch)
treed82154f20ec7d73e8fae65ff09c56458f4c667a5 /utf8.c
parent5e6ebb121e85847e480061b087a1c6fe206e6c3a (diff)
downloadperl-daf6caf1ef25ff48f871fa1e53adcefc11bf1d08.tar.gz
pv_uni_display: Use common fcn; \b mnemonic
This removes the (almost) duplicate code in this function to display mnemonics for control characters that have them. The reason the two pieces of code aren't precisely the same is that the other function also uses \b as a mnemonic for backspace. Using all possible mnemonics is desirable, so a flag is added for pv_uni_display to now use \b. This is now by default enabled in double-quoted strings, but not regex patterns (as \b there means something quite different except in character classes). B.pm is changed to expect \b.
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c48
1 files changed, 20 insertions, 28 deletions
diff --git a/utf8.c b/utf8.c
index 7b82985eac..a67c987620 100644
--- a/utf8.c
+++ b/utf8.c
@@ -4053,9 +4053,9 @@ Perl_check_utf8_print(pTHX_ const U8* s, const STRLEN len)
/*
=for apidoc pv_uni_display
-Build to the scalar C<dsv> a displayable version of the string C<spv>,
-length C<len>, the displayable version being at most C<pvlim> bytes long
-(if longer, the rest is truncated and C<"..."> will be appended).
+Build to the scalar C<dsv> a displayable version of the UTF-8 encoded string
+C<spv>, length C<len>, the displayable version being at most C<pvlim> bytes
+long (if longer, the rest is truncated and C<"..."> will be appended).
The C<flags> argument can have C<UNI_DISPLAY_ISPRINT> set to display
C<isPRINT()>able characters as themselves, C<UNI_DISPLAY_BACKSLASH>
@@ -4064,6 +4064,9 @@ to display the C<\\[nrfta\\]> as the backslashed versions (like C<"\n">)
C<UNI_DISPLAY_QQ> (and its alias C<UNI_DISPLAY_REGEX>) have both
C<UNI_DISPLAY_BACKSLASH> and C<UNI_DISPLAY_ISPRINT> turned on.
+Additionally, there is now C<UNI_DISPLAY_BACKSPACE> which allows C<\b> for a
+backspace, but only when C<UNI_DISPLAY_BACKSLASH> also is set.
+
The pointer to the PV of the C<dsv> is returned.
See also L</sv_uni_display>.
@@ -4082,10 +4085,7 @@ Perl_pv_uni_display(pTHX_ SV *dsv, const U8 *spv, STRLEN len, STRLEN pvlim,
SvUTF8_off(dsv);
for (s = (const char *)spv, e = s + len; s < e; s += UTF8SKIP(s)) {
UV u;
- /* This serves double duty as a flag and a character to print after
- a \ when flags & UNI_DISPLAY_BACKSLASH is true.
- */
- char ok = 0;
+ bool ok = 0;
if (pvlim && SvCUR(dsv) >= pvlim) {
truncated++;
@@ -4095,27 +4095,19 @@ Perl_pv_uni_display(pTHX_ SV *dsv, const U8 *spv, STRLEN len, STRLEN pvlim,
if (u < 256) {
const unsigned char c = (unsigned char)u & 0xFF;
if (flags & UNI_DISPLAY_BACKSLASH) {
- switch (c) {
- case '\n':
- ok = 'n'; break;
- case '\r':
- ok = 'r'; break;
- case '\t':
- ok = 't'; break;
- case '\f':
- ok = 'f'; break;
- case '\a':
- ok = 'a'; break;
- case '\\':
- ok = '\\'; break;
- default: break;
- }
- if (ok) {
- const char string = ok;
- sv_catpvs(dsv, "\\");
- sv_catpvn(dsv, &string, 1);
- }
- }
+ if ( isMNEMONIC_CNTRL(c)
+ && ( c != '\b'
+ || (flags & UNI_DISPLAY_BACKSPACE)))
+ {
+ const char * mnemonic = cntrl_to_mnemonic(c);
+ sv_catpvn(dsv, mnemonic, strlen(mnemonic));
+ ok = 1;
+ }
+ else if (c == '\\') {
+ sv_catpvs(dsv, "\\\\");
+ ok = 1;
+ }
+ }
/* isPRINT() is the locale-blind version. */
if (!ok && (flags & UNI_DISPLAY_ISPRINT) && isPRINT(c)) {
const char string = c;