summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@khw-desktop.(none)>2010-07-15 17:28:28 -0600
committerDavid Golden <dagolden@cpan.org>2010-07-17 21:50:48 -0400
commitf0a2b745ce6c03aec6412d79ce0b782f20eddce4 (patch)
treed1786b1a4a80f6b848dca1ab4eba6e3ffd5dc5d1
parent8e4698ef1ed0da722532bfcc769ba22fe85c4b47 (diff)
downloadperl-f0a2b745ce6c03aec6412d79ce0b782f20eddce4.tar.gz
Add \o{} escape
This commit adds the new construct \o{} to express a character constant by its octal ordinal value, along with ancillary tests and documentation. A function to handle this is added to util.c, and it is called from the 3 parsing places it could occur. The function is a candidate for in-lining, though I doubt that it will ever be used frequently.
-rw-r--r--embed.fnc1
-rw-r--r--embed.h2
-rw-r--r--global.sym1
-rw-r--r--pod/perl5133delta.pod15
-rw-r--r--pod/perldiag.pod14
-rw-r--r--pod/perlre.pod2
-rw-r--r--pod/perlrebackslash.pod83
-rw-r--r--pod/perlretut.pod9
-rw-r--r--proto.h8
-rw-r--r--regcomp.c37
-rw-r--r--t/lib/warnings/regcomp30
-rw-r--r--t/lib/warnings/toke23
-rw-r--r--t/op/qq.t5
-rw-r--r--t/re/re_tests6
-rw-r--r--toke.c14
-rw-r--r--util.c68
16 files changed, 284 insertions, 34 deletions
diff --git a/embed.fnc b/embed.fnc
index 8493dd7bc7..37c7f2b98e 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -639,6 +639,7 @@ p |OP* |localize |NN OP *o|I32 lex
ApdR |I32 |looks_like_number|NN SV *const sv
Apd |UV |grok_bin |NN const char* start|NN STRLEN* len_p|NN I32* flags|NULLOK NV *result
EXpR |char |grok_bslash_c |const char source|const bool output_warning
+EXpR |char* |grok_bslash_o |NN const char* s|NN UV* uv|NN STRLEN* len|const bool output_warning
Apd |UV |grok_hex |NN const char* start|NN STRLEN* len_p|NN I32* flags|NULLOK NV *result
Apd |int |grok_number |NN const char *pv|STRLEN len|NULLOK UV *valuep
ApdR |bool |grok_numeric_radix|NN const char **sp|NN const char *send
diff --git a/embed.h b/embed.h
index 8fb3cbe4bf..fffdedec1a 100644
--- a/embed.h
+++ b/embed.h
@@ -463,6 +463,7 @@
#define grok_bin Perl_grok_bin
#if defined(PERL_CORE) || defined(PERL_EXT)
#define grok_bslash_c Perl_grok_bslash_c
+#define grok_bslash_o Perl_grok_bslash_o
#endif
#define grok_hex Perl_grok_hex
#define grok_number Perl_grok_number
@@ -2909,6 +2910,7 @@
#define grok_bin(a,b,c,d) Perl_grok_bin(aTHX_ a,b,c,d)
#if defined(PERL_CORE) || defined(PERL_EXT)
#define grok_bslash_c(a,b) Perl_grok_bslash_c(aTHX_ a,b)
+#define grok_bslash_o(a,b,c,d) Perl_grok_bslash_o(aTHX_ a,b,c,d)
#endif
#define grok_hex(a,b,c,d) Perl_grok_hex(aTHX_ a,b,c,d)
#define grok_number(a,b,c) Perl_grok_number(aTHX_ a,b,c)
diff --git a/global.sym b/global.sym
index aa61a699a5..332381564d 100644
--- a/global.sym
+++ b/global.sym
@@ -283,6 +283,7 @@ Perl_vload_module
Perl_looks_like_number
Perl_grok_bin
Perl_grok_bslash_c
+Perl_grok_bslash_o
Perl_grok_hex
Perl_grok_number
Perl_grok_numeric_radix
diff --git a/pod/perl5133delta.pod b/pod/perl5133delta.pod
index 476427e025..d4db338fd9 100644
--- a/pod/perl5133delta.pod
+++ b/pod/perl5133delta.pod
@@ -28,6 +28,17 @@ here, but most should go in the L</Performance Enhancements> section.
[ List each enhancement as a =head2 entry ]
+=head2 \o{...}
+
+The escape sequence C<"\o"> in double-quotish contexts is now defined. It
+must be followed by braces enclosing an octal number of at least one digit. It
+means the character whose ordinal value is that octal number. This construct
+allows large octal ordinals beyond the current max of 0777 to be represented.
+It also allows you to specify a character in octal which can safely be
+concatenated with other regex snippets without danger of changing its meaning,
+and one which won't ever be confused with being a backreference to a regex
+capture group. See L<perlre/Capture groups>
+
=head2 C<\N{I<name>}> and C<charnames> enhancements
C<\N{}> and C<charnames::vianame> now know about the abbreviated character
@@ -84,7 +95,9 @@ anomalous behavior than their use in all other double-quotish contexts. Since
all double-quotish contexts have the same behavior, namely to be equivalent to
C<\x{100}> - C<\x{1FF}>, with no deprecation warning. Use of these values in the
command line option C<"-0"> retains the current meaning to slurp input files
-whole; previously, this was documented only for C<"-0777">.
+whole; previously, this was documented only for C<"-0777">. It is recommended,
+however, because of various ambiguities, to use the new L</\o{...}> construct
+to represent characters in octal.
=head1 Deprecations
diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index 9e8a28735c..9f9fe4b553 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -2510,6 +2510,10 @@ comment) between the C<\N> and the C<{> in a regex with the C</x> modifier.
This modifier does not change the requirement that the brace immediately follow
the C<\N>.
+=item Missing braces on \o{}
+
+(F) A C<\o> must be followed immediately by a C<{> in double-quotish context.
+
=item Missing comma after first argument to %s function
(F) While certain functions allow you to specify a filehandle or an
@@ -2978,6 +2982,11 @@ to UTC. If it's not, define the logical name
F<SYS$TIMEZONE_DIFFERENTIAL> to translate to the number of seconds which
need to be added to UTC to get local time.
+=item Non-octal character '%c'. Resolved as "%s"
+
+(W digit) In parsing an octal numeric constant, a character was unexpectedly
+encountered that isn't octal. The resulting value is as indicated.
+
=item Non-string passed as bitmask
(W misc) A number has been passed as a bitmask argument to select().
@@ -3020,6 +3029,11 @@ versions of Perl are likely to eliminate this arbitrary limitation. In
the meantime, try using scientific notation (e.g. "1e6" instead of
"1_000_000").
+=item Number with no digits
+
+(F) Perl was looking for a number but found nothing that looked like a number.
+This happens, for example with C<\o{}>, with no number between the braces.
+
=item Octal number in vector unsupported
(F) Numbers with a leading C<0> are not currently allowed in vectors.
diff --git a/pod/perlre.pod b/pod/perlre.pod
index 5ea15a013a..2e00f0bc69 100644
--- a/pod/perlre.pod
+++ b/pod/perlre.pod
@@ -229,11 +229,11 @@ also work:
\f form feed (FF)
\a alarm (bell) (BEL)
\e escape (think troff) (ESC)
- \033 octal char (example: ESC)
\cK control char (example: VT)
\x{}, \x00 character whose ordinal is the given hexadecimal number
\N{name} named Unicode character
\N{U+263D} Unicode character (example: FIRST QUARTER MOON)
+ \o{}, \000 character whose ordinal is the given octal number
\l lowercase next char (think vi)
\u uppercase next char (think vi)
\L lowercase till \E (think vi)
diff --git a/pod/perlrebackslash.pod b/pod/perlrebackslash.pod
index 9d246bdc2e..d460f7f052 100644
--- a/pod/perlrebackslash.pod
+++ b/pod/perlrebackslash.pod
@@ -62,7 +62,7 @@ quoted constructs>.
Those not usable within a bracketed character class (like C<[\da-z]>) are marked
as C<Not in [].>
- \000 Octal escape sequence.
+ \000 Octal escape sequence. See also \o{}.
\1 Absolute backreference. Not in [].
\a Alarm or bell.
\A Beginning of string. Not in [].
@@ -86,6 +86,7 @@ as C<Not in [].>
\n (Logical) newline character.
\N Any character but newline. Experimental. Not in [].
\N{} Named or numbered (Unicode) character.
+ \o{} Octal escape sequence.
\p{}, \pP Character with the given Unicode property.
\P{}, \PP Character without the given Unicode property.
\Q Quotemeta till \E. Not in [].
@@ -207,33 +208,57 @@ match "as is".
=head3 Octal escapes
-Octal escapes consist of a backslash followed by three octal digits
-matching the code point of the character you want to use. (In some contexts,
-two or even one octal digits are also accepted, sometimes with a warning.) This
-allows for 512 characters (C<\000> up to C<\777>) that can be expressed this
-way. Enough in pre-Unicode days,
-but most Unicode characters cannot be escaped this way.
+There are two forms of octal escapes. Each is used to specify a character by
+its ordinal, specified in octal notation.
+
+One form, available starting in Perl 5.14 looks like C<\o{...}>, where the dots
+represent one or more octal digits. It can be used for any Unicode character.
+
+It was introduced to avoid the potential problems with the other form,
+available in all Perls. That form consists of a backslash followed by three
+octal digits. One problem with this form is that it can look exactly like an
+old-style backreference (see
+L</Disambiguation rules between old-style octal escapes and backreferences>
+below.) You can avoid this by making the first of the three digits always a
+zero, but that makes \077 the largest ordinal unambiguously specifiable by this
+form.
+
+In some contexts, a backslash followed by two or even one octal digits may be
+interpreted as an octal escape, sometimes with a warning, and because of some
+bugs, sometimes with surprising results. Also, if you are creating a regex
+out of smaller snippets concatentated together, and you use fewer than three
+digits, the beginning of one snippet may be interpreted as adding digits to the
+ending of the snippet before it. See L</Absolute referencing> for more
+discussion and examples of the snippet problem.
Note that a character that is expressed as an octal escape is considered
as a character without special meaning by the regex engine, and will match
"as is".
-=head4 Examples (assuming an ASCII platform)
+To summarize, the C<\o{}> form is always safe to use, and the other form is
+safe to use for ordinals up through \077 when you use exactly three digits to
+specify them.
- $str = "Perl";
- $str =~ /\120/; # Match, "\120" is "P".
- $str =~ /\120+/; # Match, "\120" is "P", it is repeated at least once
- $str =~ /P\053/; # No match, "\053" is "+" and taken literally.
+Mnemonic: I<0>ctal or I<o>ctal.
-=head4 Caveat
+=head4 Examples (assuming an ASCII platform)
-Octal escapes potentially clash with old-style backreferences (see L</Absolute
-referencing> below). They both consist of a backslash followed by numbers. So
-Perl has to use heuristics to determine whether it is a backreference or an
-octal escape. You can avoid ambiguity by using the C<\g> form for
-backreferences, and by beginning octal escapes with a "0". (Since octal
-escapes are 3 digits, this latter method works only up to C<\077>.) In the
-absence of C<\g>, Perl uses the following rules:
+ $str = "Perl";
+ $str =~ /\o{120}/; # Match, "\120" is "P".
+ $str =~ /\120/; # Same.
+ $str =~ /\o{120}+/; # Match, "\120" is "P", it's repeated at least once
+ $str =~ /\120+/; # Same.
+ $str =~ /P\053/; # No match, "\053" is "+" and taken literally.
+ /\o{23073}/ # Black foreground, white background smiling face.
+ /\o{4801234567}/ # Raises a warning, and yields chr(4)
+
+=head4 Disambiguation rules between old-style octal escapes and backreferences
+
+Octal escapes of the C<\000> form outside of bracketed character classes
+potentially clash with old-style backreferences. (see L</Absolute referencing>
+below). They both consist of a backslash followed by numbers. So Perl has to
+use heuristics to determine whether it is a backreference or an octal escape.
+Perl uses the following rules to disambiguate:
=over 4
@@ -258,18 +283,24 @@ the rest are matched as is.
$pat .= ")" x 999;
/^($pat)\1000$/; # Matches 'aa'; there are 1000 capture groups.
/^$pat\1000$/; # Matches 'a@0'; there are 999 capture groups
- # and \1000 is seen as \100 (a '@') and a '0'.
+ # and \1000 is seen as \100 (a '@') and a '0'
=back
+You can the force a backreference interpretation always by using the C<\g{...}>
+form. You can the force an octal interpretation always by using the C<\o{...}>
+form, or for numbers up through \077 (= 63 decimal), by using three digits,
+beginning with a "0".
+
=head3 Hexadecimal escapes
-Hexadecimal escapes start with C<\x> and are then either followed by a
-two digit hexadecimal number, or a hexadecimal number of arbitrary length
-surrounded by curly braces. The hexadecimal number is the code point of
-the character you want to express.
+Like octal escapes, there are two forms of hexadecimal escapes, but both start
+with the same thing, C<\x>. This is followed by either exactly two hexadecimal
+digits forming a number, or a hexadecimal number of arbitrary length surrounded
+by curly braces. The hexadecimal number is the code point of the character you
+want to express.
-Note that a character that is expressed as a hexadecimal escape is considered
+Note that a character that is expressed as one of these escapes is considered
as a character without special meaning by the regex engine, and will match
"as is".
diff --git a/pod/perlretut.pod b/pod/perlretut.pod
index eae266a407..f2187179c2 100644
--- a/pod/perlretut.pod
+++ b/pod/perlretut.pod
@@ -184,7 +184,8 @@ bytes. Here are some examples of escapes:
"1000\t2000" =~ m(0\t2) # matches
"1000\n2000" =~ /0\n20/ # matches
"1000\t2000" =~ /\000\t2/ # doesn't match, "0" ne "\000"
- "cat" =~ /\143\x61\x74/ # matches in ASCII, but a weird way to spell cat
+ "cat" =~ /\o{143}\x61\x74/ # matches in ASCII, but a weird way
+ # to spell cat
If you've been around Perl a while, all this talk of escape sequences
may seem familiar. Similar escape sequences are used in double-quoted
@@ -1876,9 +1877,9 @@ much about Perl's internal representation of strings. But they do need
to know 1) how to represent Unicode characters in a regexp and 2) that
a matching operation will treat the string to be searched as a sequence
of characters, not bytes. The answer to 1) is that Unicode characters
-greater than C<chr(255)> are represented using the C<\x{hex}> notation,
-because the \0 octal and \x hex (without curly braces) don't go further
-than 255.
+greater than C<chr(255)> are represented using the C<\x{hex}> notation, because
+\x hex (without curly braces) doesn't go further than 255. (Starting in Perl
+5.14) if you're an octal fan, you can also use C<\o{oct}>.
/\x{263a}/; # match a Unicode smiley face :)
diff --git a/proto.h b/proto.h
index 6a5110ea27..1fc11806e8 100644
--- a/proto.h
+++ b/proto.h
@@ -1633,6 +1633,14 @@ PERL_CALLCONV UV Perl_grok_bin(pTHX_ const char* start, STRLEN* len_p, I32* flag
PERL_CALLCONV char Perl_grok_bslash_c(pTHX_ const char source, const bool output_warning)
__attribute__warn_unused_result__;
+PERL_CALLCONV char* Perl_grok_bslash_o(pTHX_ const char* s, UV* uv, STRLEN* len, const bool output_warning)
+ __attribute__warn_unused_result__
+ __attribute__nonnull__(pTHX_1)
+ __attribute__nonnull__(pTHX_2)
+ __attribute__nonnull__(pTHX_3);
+#define PERL_ARGS_ASSERT_GROK_BSLASH_O \
+ assert(s); assert(uv); assert(len)
+
PERL_CALLCONV UV Perl_grok_hex(pTHX_ const char* start, STRLEN* len_p, I32* flags, NV *result)
__attribute__nonnull__(pTHX_1)
__attribute__nonnull__(pTHX_2)
diff --git a/regcomp.c b/regcomp.c
index 72af569aae..74f996bcd6 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -7362,6 +7362,7 @@ tryagain:
register UV ender;
register char *p;
char *s;
+ char *error_msg;
STRLEN foldlen;
U8 tmpbuf[UTF8_MAXBYTES_CASE+1], *foldbuf;
@@ -7462,6 +7463,26 @@ tryagain:
ender = ASCII_TO_NATIVE('\007');
p++;
break;
+ case 'o':
+ {
+ STRLEN brace_len = len;
+ if ((error_msg = grok_bslash_o(p,
+ &ender,
+ &brace_len,
+ SIZE_ONLY))
+ != NULL)
+ {
+ vFAIL(error_msg);
+ }
+ p += brace_len;
+ if (PL_encoding && ender < 0x100) {
+ goto recode_encoding;
+ }
+ if (ender > 0xff) {
+ RExC_utf8 = 1;
+ }
+ break;
+ }
case 'x':
if (*++p == '{') {
char* const e = strchr(p, '}');
@@ -7971,6 +7992,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
parseit:
while (RExC_parse < RExC_end && UCHARAT(RExC_parse) != ']') {
+ char* error_msg;
charclassloop:
@@ -8077,6 +8099,21 @@ parseit:
case 'b': value = '\b'; break;
case 'e': value = ASCII_TO_NATIVE('\033');break;
case 'a': value = ASCII_TO_NATIVE('\007');break;
+ case 'o':
+ RExC_parse--; /* function expects to be pointed at the 'o' */
+ if ((error_msg = grok_bslash_o(RExC_parse,
+ &value,
+ &numlen,
+ SIZE_ONLY))
+ != NULL)
+ {
+ vFAIL(error_msg);
+ }
+ RExC_parse += numlen;
+ if (PL_encoding && value < 0x100) {
+ goto recode_encoding;
+ }
+ break;
case 'x':
if (*RExC_parse == '{') {
I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
diff --git a/t/lib/warnings/regcomp b/t/lib/warnings/regcomp
index f85aa440c4..3f80ccce72 100644
--- a/t/lib/warnings/regcomp
+++ b/t/lib/warnings/regcomp
@@ -207,3 +207,33 @@ Useless (?-c) - don't use /gc modifier in regex; marked by <-- HERE in m/(?o-c <
Useless (?o) - use /o modifier in regex; marked by <-- HERE in m/(?o <-- HERE gc)/ at - line 12.
Useless (?g) - use /g modifier in regex; marked by <-- HERE in m/(?og <-- HERE c)/ at - line 12.
Useless (?c) - use /gc modifier in regex; marked by <-- HERE in m/(?ogc <-- HERE )/ at - line 12.
+########
+# regcomp.c [S_regatom]
+$a = qr/\o{/;
+EXPECT
+Missing right brace on \o{ in regex; marked by <-- HERE in m/\ <-- HERE o{/ at - line 2.
+########
+# regcomp.c [S_regatom]
+$a = qr/\o/;
+EXPECT
+Missing braces on \o{} in regex; marked by <-- HERE in m/\ <-- HERE o/ at - line 2.
+########
+# regcomp.c [S_regatom]
+$a = qr/\o{}/;
+EXPECT
+Number with no digits in regex; marked by <-- HERE in m/\ <-- HERE o{}/ at - line 2.
+########
+# regcomp.c [S_regclass]
+$a = qr/[\o{]/;
+EXPECT
+Missing right brace on \o{ in regex; marked by <-- HERE in m/[\ <-- HERE o{]/ at - line 2.
+########
+# regcomp.c [S_regclass]
+$a = qr/[\o]/;
+EXPECT
+Missing braces on \o{} in regex; marked by <-- HERE in m/[\ <-- HERE o]/ at - line 2.
+########
+# regcomp.c [S_regclass]
+$a = qr/[\o{}]/;
+EXPECT
+Number with no digits in regex; marked by <-- HERE in m/[\ <-- HERE o{}]/ at - line 2.
diff --git a/t/lib/warnings/toke b/t/lib/warnings/toke
index 4bb131f7b9..076270ce06 100644
--- a/t/lib/warnings/toke
+++ b/t/lib/warnings/toke
@@ -966,3 +966,26 @@ Use of := for an empty attribute list is deprecated at - line 36.
Use of := for an empty attribute list is deprecated at - line 38.
Use of := for an empty attribute list is deprecated at - line 41.
Use of := for an empty attribute list is deprecated at - line 42.
+########
+# toke.c
+use warnings 'syntax' ;
+my $a = "\o";
+my $a = "\o{";
+my $a = "\o{}";
+no warnings 'syntax' ;
+my $a = "\o";
+my $a = "\o{";
+my $a = "\o{}";
+EXPECT
+Missing braces on \o{} at - line 3, within string
+Missing right brace on \o{ at - line 4, within string
+Number with no digits at - line 5, within string
+BEGIN not safe after errors--compilation aborted at - line 6.
+########
+# toke.c
+use warnings 'digit' ;
+my $a = "\o{1238456}";
+no warnings 'digit' ;
+my $a = "\o{1238456}";
+EXPECT
+Non-octal character '8'. Resolved as "\o{123}" at - line 3.
diff --git a/t/op/qq.t b/t/op/qq.t
index 7b75b9b7ef..01366086b1 100644
--- a/t/op/qq.t
+++ b/t/op/qq.t
@@ -5,7 +5,7 @@ BEGIN {
@INC = '../lib';
}
-print q(1..26
+print q(1..29
);
# This is() function is written to avoid ""
@@ -64,6 +64,9 @@ is ("\x{10FFFD}", chr 1114109);
is ("\400", chr 0x100);
is ("\600", chr 0x180);
is ("\777", chr 0x1FF);
+is ("a\o{120}b", "a" . chr(0x50) . "b");
+is ("a\o{400}b", "a" . chr(0x100) . "b");
+is ("a\o{1000}b", "a" . chr(0x200) . "b");
# These kludged tests should change when we remove the temporary fatal error
# in util.c for "\c{". And, the warning there should probably not be
diff --git a/t/re/re_tests b/t/re/re_tests
index fc29fb6bd5..36a2f4cee5 100644
--- a/t/re/re_tests
+++ b/t/re/re_tests
@@ -1460,5 +1460,11 @@ abc\N{def - c - \\N{NAME} must be resolved by the lexer
[a\400] \x{100} y $& \x{100}
[b\600] \x{180} y $& \x{180}
[c\777] \x{1FF} y $& \x{1FF}
+\o{120} \x{50} y $& \x{50}
+\o{400} \x{100} y $& \x{100}
+\o{1000} \x{200} y $& \x{200}
+[a\o{120}] \x{50} y $& \x{50}
+[a\o{400}] \x{100} y $& \x{100}
+[a\o{1000}] \x{200} y $& \x{200}
# vim: softtabstop=0 noexpandtab
diff --git a/toke.c b/toke.c
index b7b33e87b1..75fb327148 100644
--- a/toke.c
+++ b/toke.c
@@ -2879,6 +2879,20 @@ S_scan_const(pTHX_ char *start)
}
goto NUM_ESCAPE_INSERT;
+ /* eg. \o{24} indicates the octal constant \024 */
+ case 'o':
+ {
+ STRLEN len;
+
+ char* error = grok_bslash_o(s, &uv, &len, 1);
+ s += len;
+ if (error) {
+ yyerror(error);
+ continue;
+ }
+ goto NUM_ESCAPE_INSERT;
+ }
+
/* eg. \x24 indicates the hex constant 0x24 */
case 'x':
++s;
diff --git a/util.c b/util.c
index b3b385e2cc..6fdc6534a9 100644
--- a/util.c
+++ b/util.c
@@ -3904,7 +3904,7 @@ Perl_report_evil_fh(pTHX_ const GV *gv, const IO *io, I32 op)
char
Perl_grok_bslash_c(pTHX_ const char source, const bool output_warning)
{
-
+
U8 result;
if (! isASCII(source)) {
@@ -3935,6 +3935,72 @@ Perl_grok_bslash_c(pTHX_ const char source, const bool output_warning)
return result;
}
+char *
+Perl_grok_bslash_o(pTHX_ const char *s, UV *uv, STRLEN *len, const bool output_warning)
+{
+
+/* Documentation to be supplied when interface nailed down finally
+ * This returns NULL on success, otherwise a pointer to an internal constant
+ * error message. On input:
+ * s points to a string that begins with o, and the previous character was
+ * a backslash.
+ * uv points to a UV that will hold the output value
+ * len will point to the next character in the string past the end of this
+ * construct
+ * output_warning says whether to output any warning messages, or suppress
+ * them
+ */
+ char* e;
+ STRLEN numbers_len;
+ I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
+ | PERL_SCAN_DISALLOW_PREFIX
+ /* XXX Until the message is improved in grok_oct, handle errors
+ * ourselves */
+ | PERL_SCAN_SILENT_ILLDIGIT;
+
+ PERL_ARGS_ASSERT_GROK_BSLASH_O;
+
+
+ assert(*s == 'o');
+ s++;
+
+ if (*s != '{') {
+ *len = 1; /* Move past the o */
+ return "Missing braces on \\o{}";
+ }
+
+ e = strchr(s, '}');
+ if (!e) {
+ *len = 2; /* Move past the o{ */
+ return "Missing right brace on \\o{";
+ }
+
+ /* Return past the '}' no matter what is inside the braces */
+ *len = e - s + 2; /* 2 = 1 for the o + 1 for the '}' */
+
+ s++; /* Point to first digit */
+
+ numbers_len = e - s;
+ if (numbers_len == 0) {
+ return "Number with no digits";
+ }
+
+ *uv = NATIVE_TO_UNI(grok_oct(s, &numbers_len, &flags, NULL));
+ /* Note that if has non-octal, will ignore everything starting with that up
+ * to the '}' */
+
+ if (output_warning && numbers_len != (STRLEN) (e - s)) {
+ Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
+ /* diag_listed_as: Non-octal character '%c'. Resolved as "%s" */
+ "Non-octal character '%c'. Resolved as \"\\o{%.*s}\"",
+ *(s + numbers_len),
+ (int) numbers_len,
+ s);
+ }
+
+ return NULL;
+}
+
/* To workaround core dumps from the uninitialised tm_zone we get the
* system to give us a reasonable struct to copy. This fix means that
* strftime uses the tm_zone and tm_gmtoff values returned by