diff options
-rw-r--r-- | lib/charnames.pm | 16 | ||||
-rw-r--r-- | lib/utf8.pm | 5 | ||||
-rw-r--r-- | pod/perldiag.pod | 4 | ||||
-rw-r--r-- | pod/perlop.pod | 4 | ||||
-rw-r--r-- | pod/perlre.pod | 6 | ||||
-rw-r--r-- | regcomp.c | 2 | ||||
-rw-r--r-- | t/lib/charnames.t | 10 | ||||
-rw-r--r-- | toke.c | 6 |
8 files changed, 27 insertions, 26 deletions
diff --git a/lib/charnames.pm b/lib/charnames.pm index e407ff7c8a..bd97983abc 100644 --- a/lib/charnames.pm +++ b/lib/charnames.pm @@ -60,28 +60,28 @@ __END__ =head1 NAME -charnames - define character names for C<\C{named}> string literal escape. +charnames - define character names for C<\N{named}> string literal escape. =head1 SYNOPSIS use charnames ':full'; - print "\C{GREEK SMALL LETTER SIGMA} is called sigma.\n"; + print "\N{GREEK SMALL LETTER SIGMA} is called sigma.\n"; use charnames ':short'; - print "\C{greek:Sigma} is an upper-case sigma.\n"; + print "\N{greek:Sigma} is an upper-case sigma.\n"; use charnames qw(cyrillic greek); - print "\C{sigma} is Greek sigma, and \C{be} is Cyrillic b.\n"; + print "\N{sigma} is Greek sigma, and \N{be} is Cyrillic b.\n"; =head1 DESCRIPTION Pragma C<use charnames> supports arguments C<:full>, C<:short> and script names. If C<:full> is present, for expansion of -C<\C{CHARNAME}}> string C<CHARNAME> is first looked in the list of +C<\N{CHARNAME}}> string C<CHARNAME> is first looked in the list of standard Unicode names of chars. If C<:short> is present, and C<CHARNAME> has the form C<SCRIPT:CNAME>, then C<CNAME> is looked up as a letter in script C<SCRIPT>. If pragma C<use charnames> is used -with script name arguments, then for C<\C{CHARNAME}}> the name +with script name arguments, then for C<\N{CHARNAME}}> the name C<CHARNAME> is looked up as a letter in the given scripts (in the specified order). @@ -98,7 +98,7 @@ ignored. =head1 CUSTOM TRANSLATORS -The mechanism of translation is C<\C{...}> escapes is general and not +The mechanism of translation is C<\N{...}> escapes is general and not hardwired into F<charnames.pm>. A module can install custom translations (inside the scope which C<use>s the module) by the following magic incantation: @@ -111,7 +111,7 @@ following magic incantation: Here translator() is a subroutine which takes C<CHARNAME> as an argument, and returns text to insert into the string instead of the -C<\C{CHARNAME}> escape. Since the text to insert should be different +C<\N{CHARNAME}> escape. Since the text to insert should be different in C<utf8> mode and out of it, the function should check the current state of C<utf8>-flag as in diff --git a/lib/utf8.pm b/lib/utf8.pm index 8f650d9d00..5ddd4ba21a 100644 --- a/lib/utf8.pm +++ b/lib/utf8.pm @@ -71,8 +71,9 @@ attempt to canonicalize variable names for you.) =item * Regular expressions match characters instead of bytes. For instance, -"." matches a character instead of a byte. (However, the C<\O> pattern -is provided to force a match a single byte ("octet", hence C<\O>).) +"." matches a character instead of a byte. (However, the C<\C> pattern +is provided to force a match a single byte ("C<char>" in C, hence +C<\C>).) =item * diff --git a/pod/perldiag.pod b/pod/perldiag.pod index cd9583bc87..2634d837fc 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -1725,9 +1725,9 @@ ended earlier on the current line. mentioned with the $ in Perl, unlike in the shells, where it can vary from one line to the next. -=item Missing %sbrace%s on \C{} +=item Missing %sbrace%s on \N{} -(F) Wrong syntax of character name literal C<\C{charname}> within +(F) Wrong syntax of character name literal C<\N{charname}> within double-quotish context. =item Missing comma after first argument to %s function diff --git a/pod/perlop.pod b/pod/perlop.pod index bd4ca1df7d..14ca6b5ec0 100644 --- a/pod/perlop.pod +++ b/pod/perlop.pod @@ -673,7 +673,7 @@ a transliteration, the first eleven of these sequences may be used. \x1b hex char (ESC) \x{263a} wide hex char (SMILEY) \c[ control char (ESC) - \C{name} named char + \N{name} named char \l lowercase next char \u uppercase next char @@ -684,7 +684,7 @@ a transliteration, the first eleven of these sequences may be used. If C<use locale> is in effect, the case map used by C<\l>, C<\L>, C<\u> and C<\U> is taken from the current locale. See L<perllocale>. For -documentation of C<\C{name}>, see L<charnames>. +documentation of C<\N{name}>, see L<charnames>. All systems use the virtual C<"\n"> to represent a line terminator, called a "newline". There is no such thing as an unvarying, physical diff --git a/pod/perlre.pod b/pod/perlre.pod index a1a118f371..76f345de35 100644 --- a/pod/perlre.pod +++ b/pod/perlre.pod @@ -149,7 +149,7 @@ also work: \x1B hex char \x{263a} wide hex char (Unicode SMILEY) \c[ control char - \C{name} named char + \N{name} named char \l lowercase next char (think vi) \u uppercase next char (think vi) \L lowercase till \E (think vi) @@ -159,7 +159,7 @@ also work: If C<use locale> is in effect, the case map used by C<\l>, C<\L>, C<\u> and C<\U> is taken from the current locale. See L<perllocale>. For -documentation of C<\C{name}>, see L<charnames>. +documentation of C<\N{name}>, see L<charnames>. You cannot include a literal C<$> or C<@> within a C<\Q> sequence. An unescaped C<$> or C<@> interpolates the corresponding variable, @@ -178,7 +178,7 @@ In addition, Perl defines the following: \PP Match non-P \X Match eXtended Unicode "combining character sequence", equivalent to C<(?:\PM\pM*)> - \O Match a single C char (octet) even under utf8. + \C Match a single C char (octet) even under utf8. A C<\w> matches a single alphanumeric character, not a whole word. Use C<\w+> to match a string of Perl-identifier characters (which isn't @@ -1780,7 +1780,7 @@ tryagain: PL_seen_zerolen++; /* Do not optimize RE away */ nextchar(); break; - case 'O': + case 'C': ret = reg_node(SANY); *flagp |= HASWIDTH|SIMPLE; nextchar(); diff --git a/t/lib/charnames.t b/t/lib/charnames.t index 860cc03c75..8d5c8db384 100644 --- a/t/lib/charnames.t +++ b/t/lib/charnames.t @@ -12,13 +12,13 @@ print "1..5\n"; use charnames ':full'; -print "not " unless "Here\C{EXCLAMATION MARK}?" eq 'Here!?'; +print "not " unless "Here\N{EXCLAMATION MARK}?" eq 'Here!?'; print "ok 1\n"; print "# \$res=$res \$\@='$@'\nnot " if $res = eval <<'EOE' use charnames ":full"; -"Here: \C{CYRILLIC SMALL LETTER BE}!"; +"Here: \N{CYRILLIC SMALL LETTER BE}!"; 1 EOE or $@ !~ /above 0xFF/; @@ -28,7 +28,7 @@ print "ok 2\n"; print "# \$res=$res \$\@='$@'\nnot " if $res = eval <<'EOE' use charnames 'cyrillic'; -"Here: \C{Be}!"; +"Here: \N{Be}!"; 1 EOE or $@ !~ /CYRILLIC CAPITAL LETTER BE.*above 0xFF/; @@ -42,12 +42,12 @@ $encoded_bet = "\327\221"; use charnames ':full'; use utf8; - print "not " unless "\C{CYRILLIC SMALL LETTER BE}" eq $encoded_be; + print "not " unless "\N{CYRILLIC SMALL LETTER BE}" eq $encoded_be; print "ok 4\n"; use charnames qw(cyrillic greek :short); - print "not " unless "\C{be},\C{alpha},\C{hebrew:bet}" + print "not " unless "\N{be},\N{alpha},\N{hebrew:bet}" eq "$encoded_be,$encoded_alpha,$encoded_bet"; print "ok 5\n"; } @@ -1151,7 +1151,7 @@ S_scan_const(pTHX_ char *start) : UTF; char *leaveit = /* set of acceptably-backslashed characters */ PL_lex_inpat - ? "\\.^$@AGZdDwWsSbBpPXO+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#" + ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#" : ""; while (s < send || dorange) { @@ -1372,8 +1372,8 @@ S_scan_const(pTHX_ char *start) } continue; - /* \C{latin small letter a} is a named character */ - case 'C': + /* \N{latin small letter a} is a named character */ + case 'N': ++s; if (*s == '{') { char* e = strchr(s, '}'); |