summaryrefslogtreecommitdiff
path: root/lib/charnames.pm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/charnames.pm')
-rw-r--r--lib/charnames.pm378
1 files changed, 322 insertions, 56 deletions
diff --git a/lib/charnames.pm b/lib/charnames.pm
index c8bd923ff6..adbda1ab78 100644
--- a/lib/charnames.pm
+++ b/lib/charnames.pm
@@ -2,10 +2,50 @@ package charnames;
use strict;
use warnings;
use File::Spec;
-our $VERSION = '1.21';
+our $VERSION = '1.22';
use bytes (); # for $bytes::hint_bits
+# Translate between Unicode character names and their code points.
+#
+# The official names with their code points are stored in a table in
+# lib/unicore/Name.pl which is read in as a large string (almost 3/4 Mb in
+# Unicode 6.0). Each code point/name combination is separated by a \n in the
+# string. (Some of the CJK and the Hangul syllable names are determined
+# instead algorithmically via subroutines also stored in Name.pl). Because of
+# the large size of this table, it isn't converted into hashes for faster
+# lookup.
+#
+# But, user defined aliases are stored in their own hashes, as are Perl
+# extensions to the official names. These are checked first before looking at
+# the official table.
+#
+# Basically, the table is grepped for the input code point (viacode()) or
+# name (the other functions), and the corresponding value on the same line is
+# returned. The grepping is done by turning the input into a regular
+# expression. Thus, the same table does double duty, used by both name and
+# code point lookup. (If we were to have hashes, we would need two, one for
+# each lookup direction.)
+#
+# For loose name matching, the logical thing would be to have a table
+# with all the ignorable characters squeezed out, and then grep it with the
+# similiarly-squeezed input name. (And this is in fact how the lookups are
+# done with the small Perl extension hashes.) But since we need to be able to
+# go from code point to official name, the original table would still need to
+# exist. Due to the large size of the table, it was decided to not read
+# another very large string into memory for a second table. Instead, the
+# regular expression of the input name is modified to have optional spaces and
+# dashes between characters. For example, in strict matching, the regular
+# expression would be:
+# qr/\tDIGIT ONE$/m
+# Under loose matching, the blank would be squeezed out, and the re would be:
+# qr/\tD[- ]?I[- ]?G[- ]?I[- ]?T[- ]?O[- ]?N[- ]?E$/m
+# which matches a blank or dash between any characters in the official table.
+#
+# This is also how script lookup is done. Basically the re looks like
+# qr/ (?:LATIN|GREEK|CYRILLIC) (?:SMALL )?LETTER $name/
+# where $name is the loose or strict regex for the remainder of the name.
+
# The hashes are stored as utf8 strings. This makes it easier to deal with
# sequences. I (khw) also tried making Name.pl utf8, but it slowed things
# down by a factor of 7. I then tried making Name.pl store the ut8
@@ -386,6 +426,25 @@ my %system_aliases = (
'ZWSP' => pack("U", 0x200B), # ZERO WIDTH SPACE
);
+# These are the aliases above that differ under :loose and :full matching
+# because the :full versions have blanks or hyphens in them.
+my %loose_system_aliases = (
+ 'LINEFEED' => pack("U", 0x0A),
+ 'FORMFEED' => pack("U", 0x0C),
+ 'CARRIAGERETURN' => pack("U", 0x0D),
+ 'NEXTLINE' => pack("U", 0x85),
+ 'SINGLESHIFT2' => pack("U", 0x8E),
+ 'SINGLESHIFT3' => pack("U", 0x8F),
+ 'PRIVATEUSE1' => pack("U", 0x91),
+ 'PRIVATEUSE2' => pack("U", 0x92),
+ 'STARTOFPROTECTEDAREA' => pack("U", 0x96),
+ 'ENDOFPROTECTEDAREA' => pack("U", 0x97),
+ 'PADDINGCHARACTER' => pack("U", 0x80),
+ 'HIGHOCTETPRESET' => pack("U", 0x81),
+ 'SINGLEGRAPHICCHARACTERINTRODUCER' => pack("U", 0x99),
+ 'BYTEORDERMARK' => pack("U", 0xFEFF),
+);
+
my %deprecated_aliases = (
# Pre-3.2 compatibility (only for the first 256 characters).
# Use of these gives deprecated message.
@@ -406,6 +465,26 @@ my %deprecated_aliases = (
'BELL' => pack("U", 0x07),
);
+my %loose_deprecated_aliases = (
+ 'HORIZONTALTABULATION' => pack("U", 0x09),
+ 'VERTICALTABULATION' => pack("U", 0x0B),
+ 'FILESEPARATOR' => pack("U", 0x1C),
+ 'GROUPSEPARATOR' => pack("U", 0x1D),
+ 'RECORDSEPARATOR' => pack("U", 0x1E),
+ 'UNITSEPARATOR' => pack("U", 0x1F),
+ 'HORIZONTALTABULATIONSET' => pack("U", 0x88),
+ 'HORIZONTALTABULATIONWITHJUSTIFICATION' => pack("U", 0x89),
+ 'PARTIALLINEDOWN' => pack("U", 0x8B),
+ 'PARTIALLINEUP' => pack("U", 0x8C),
+ 'VERTICALTABULATIONSET' => pack("U", 0x8A),
+ 'REVERSEINDEX' => pack("U", 0x8D),
+);
+
+# These are special cased in :loose matching, differing only in a medial
+# hyphen
+my $HANGUL_JUNGSEONG_O_E_utf8 = pack("U", 0x1180);
+my $HANGUL_JUNGSEONG_OE_utf8 = pack("U", 0x116C);
+
my $txt; # The table of official character names
@@ -436,6 +515,12 @@ my %full_names_cache; # Holds already-looked-up names, so don't have to
# decided not to do that for now, just because it's added complication,
# and because I'm just trying to maintain parity, not extend it.
+# Like %full_names_cache, but for use when :loose is in effect. There needs
+# to be two caches because :loose may not be in effect for a scope, and a
+# loose name could inappropriately be returned when only exact matching is
+# called for.
+my %loose_names_cache;
+
# Designed so that test decimal first, and then hex. Leading zeros
# imply non-decimal, as do non-[0-9]
my $decimal_qr = qr/^[1-9]\d*$/;
@@ -524,6 +609,7 @@ my %dummy_H = (
charnames_stringified_ords => "",
charnames_scripts => "",
charnames_full => 1,
+ charnames_loose => 0,
charnames_short => 0,
);
@@ -553,7 +639,8 @@ sub lookup_name ($$$) {
# If we didn't import anything (which happens with 'use charnames ()',
# substitute a dummy structure.
$hints_ref = \%dummy_H if ! defined $hints_ref
- || ! defined $hints_ref->{charnames_full};
+ || (! defined $hints_ref->{charnames_full}
+ && ! defined $hints_ref->{charnames_loose});
# At runtime, but currently not at compile time, $^H gets
# stringified, so un-stringify back to the original data structures.
@@ -567,9 +654,14 @@ sub lookup_name ($$$) {
$hints_ref->{charnames_stringified_ords};
$^H{charnames_scripts} = $hints_ref->{charnames_scripts};
$^H{charnames_full} = $hints_ref->{charnames_full};
+ $^H{charnames_loose} = $hints_ref->{charnames_loose};
$^H{charnames_short} = $hints_ref->{charnames_short};
}
+ my $loose = $^H{charnames_loose};
+ my $lookup_name; # Input name suitably modified for grepping for in the
+ # table
+
# User alias should be checked first or else can't override ours, and if we
# were to add any, could conflict with theirs.
if (exists $^H{charnames_ord_aliases}{$name}) {
@@ -577,26 +669,83 @@ sub lookup_name ($$$) {
}
elsif (exists $^H{charnames_name_aliases}{$name}) {
$name = $^H{charnames_name_aliases}{$name};
- $save_input = $name; # Cache the result for any error message
- }
- elsif (exists $system_aliases{$name}) {
- $utf8 = $system_aliases{$name};
+ $save_input = $lookup_name = $name; # Cache the result for any error
+ # message
+ # The aliases are documented to not match loosely, so change loose match
+ # into full.
+ if ($loose) {
+ $loose = 0;
+ $^H{charnames_full} = 1;
+ }
}
- elsif (exists $deprecated_aliases{$name}) {
- require warnings;
- warnings::warnif('deprecated', "Unicode character name \"$name\" is deprecated, use \"" . viacode(ord $deprecated_aliases{$name}) . "\" instead");
- $utf8 = $deprecated_aliases{$name};
+ else {
+
+ # Here, not a user alias. That means that loose matching may be in
+ # effect; will have to modify the input name.
+ $lookup_name = $name;
+ if ($loose) {
+ $lookup_name = uc $lookup_name;
+
+ # Squeeze out all underscores
+ $lookup_name =~ s/_//g;
+
+ # Remove all medial hyphens
+ $lookup_name =~ s/ (?<= \S ) - (?= \S )//gx;
+
+ # Squeeze out all spaces
+ $lookup_name =~ s/\s//g;
+ }
+
+ # Here, $lookup_name has been modified as necessary for looking in the
+ # hashes. Check the system alias files next. Most of these aliases are
+ # the same for both strict and loose matching. To save space, the ones
+ # which differ are in their own separate hash, which is checked if loose
+ # matching is selected and the regular match fails. To save time, the
+ # loose hashes could be expanded to include all aliases, and there would
+ # only have to be one check. But if someone specifies :loose, they are
+ # interested in convenience over speed, and the time for this second check
+ # is miniscule compared to the rest of the routine.
+ if (exists $system_aliases{$lookup_name}) {
+ $utf8 = $system_aliases{$lookup_name};
+ }
+ elsif ($loose && exists $loose_system_aliases{$lookup_name}) {
+ $utf8 = $loose_system_aliases{$lookup_name};
+ }
+ elsif (exists $deprecated_aliases{$lookup_name}) {
+ require warnings;
+ warnings::warnif('deprecated',
+ "Unicode character name \"$name\" is deprecated, use \""
+ . viacode(ord $deprecated_aliases{$lookup_name})
+ . "\" instead");
+ $utf8 = $deprecated_aliases{$lookup_name};
+ }
+ elsif ($loose && exists $loose_deprecated_aliases{$lookup_name}) {
+ require warnings;
+ warnings::warnif('deprecated',
+ "Unicode character name \"$name\" is deprecated, use \""
+ . viacode(ord $loose_deprecated_aliases{$lookup_name})
+ . "\" instead");
+ $utf8 = $loose_deprecated_aliases{$lookup_name};
+ }
}
- my @off;
+ my @off; # Offsets into table of pattern match begin and end
+ # If haven't found it yet...
if (! defined $utf8) {
# See if has looked this input up earlier.
- if ($^H{charnames_full} && exists $full_names_cache{$name}) {
+ if (! $loose && $^H{charnames_full} && exists $full_names_cache{$name}) {
$utf8 = $full_names_cache{$name};
}
- else {
+ elsif ($loose && exists $loose_names_cache{$name}) {
+ $utf8 = $loose_names_cache{$name};
+ }
+ else { # Here, must do a look-up
+
+ # If full or loose matching succeeded, points to where to cache the
+ # result
+ my $cache_ref;
## Suck in the code/name list as a big string.
## Lines look like:
@@ -608,9 +757,9 @@ sub lookup_name ($$$) {
## @off will hold the index into the code/name string of the start and
## end of the name as we find it.
- ## If :full, look for the name exactly; runtime implies full
- my $found_full_in_table = 0; # Tells us if can cache the result
- if ($^H{charnames_full}) {
+ ## If :loose, look for a loose match; if :full, look for the name
+ ## exactly
+
# See if the name is one which is algorithmically determinable.
# The subroutine is included in Name.pl. The table contained in
@@ -618,40 +767,89 @@ sub lookup_name ($$$) {
# for these before checking for the regular names has no
# noticeable impact on performance for the regular names, but
# the other way around slows down finding these immensely.
- # Algorithmically determinables are not placed in the cache (that
- # $found_full_in_table indicates) because that uses up memory,
- # and finding these again is fast.
- if (defined (my $ord = name_to_code_point_special($name))) {
- $utf8 = pack("U", $ord);
+ # Algorithmically determinables are not placed in the cache because
+ # that uses up memory, and finding these again is fast.
+ if (($loose || $^H{charnames_full})
+ && (defined (my $ord = name_to_code_point_special($lookup_name, $loose))))
+ {
+ $utf8 = pack("U", $ord);
+ }
+ else {
+
+ # Not algorithmically determinable; look up in the table. The name
+ # will be turned into a regex, so quote any meta characters.
+ $lookup_name = quotemeta $lookup_name;
+
+ if ($loose) {
+
+ # For loose matches, $lookup_name has already squeezed out the
+ # non-essential characters. We have to add in code to make the
+ # squeezed version match the non-squeezed equivalent in the table.
+ # The only remaining hyphens are ones that start or end a word in
+ # the original. They have been quoted in $lookup_name so they look
+ # like "\-". Change all other characters except the backslash
+ # quotes for any metacharacters, and the final character, so that
+ # e.g., COLON gets transformed into: /C[- ]?O[- ]?L[- ]?O[- ]?N/
+ $lookup_name =~ s/ (?! \\ -) # Don't do this to the \- sequence
+ ( [^-\\] ) # Nor the "-" within that sequence,
+ # nor the "\" that quotes metachars,
+ # but otherwise put the char into $1
+ (?=.) # And don't do it for the final char
+ /$1\[- \]?/gx; # And add an optional blank or
+ # '-' after each $1 char
+
+ # Those remaining hyphens were originally at the beginning or end of
+ # a word, so they can match either a blank before or after, but not
+ # both. (Keep in mind that they have been quoted, so are a '\-'
+ # sequence)
+ $lookup_name =~ s/\\ -/(?:- | -)/xg;
}
- else {
- # Not algorithmically determinable; look up in the table.
- if ($txt =~ /\t\Q$name\E$/m) {
- @off = ($-[0] + 1, $+[0]); # The 1 is for the tab
- $found_full_in_table = 1;
- }
+ # Do the lookup in the full table if asked for, and if succeeds
+ # save the offsets and set where to cache the result.
+ if (($loose || $^H{charnames_full}) && $txt =~ /\t$lookup_name$/m) {
+ @off = ($-[0] + 1, $+[0]); # The 1 is for the tab
+ $cache_ref = ($loose) ? \%loose_names_cache : \%full_names_cache;
}
- }
-
- # If we didn't get it above, keep looking
- if (! $found_full_in_table && ! defined $utf8) {
+ else {
+ # Here, didn't look for, or didn't find the name.
# If :short is allowed, see if input is like "greek:Sigma".
- my $scripts_trie;
+ # Keep in mind that $lookup_name has had the metas quoted.
+ my $scripts_trie = "";
+ my $name_has_uppercase;
if (($^H{charnames_short})
- && $name =~ /^ \s* (.+?) \s* : \s* (.+?) \s* $ /xs)
+ && $lookup_name =~ /^ (?: \\ \s)* # Quoted space
+ (.+?) # $1 = the script
+ (?: \\ \s)*
+ \\ : # Quoted colon
+ (?: \\ \s)*
+ (.+?) # $2 = the name
+ (?: \\ \s)* $
+ /xs)
{
- $scripts_trie = "\U\Q$1";
- $name = $2;
+ # Even in non-loose matching, the script traditionally has been
+ # case insensitve
+ $scripts_trie = "\U$1";
+ $lookup_name = $2;
+
+ # Use original name to find its input casing, but ignore the
+ # script part of that to make the determination.
+ $save_input = $name if ! defined $save_input;
+ $name =~ s/.*?://;
+ $name_has_uppercase = $name =~ /[[:upper:]]/;
}
else { # Otherwise look in allowed scripts
$scripts_trie = $^H{charnames_scripts};
+
+ # Use original name to find its input casing
+ $name_has_uppercase = $name =~ /[[:upper:]]/;
}
- my $case = $name =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL";
- if ($txt !~
- /\t (?: $scripts_trie ) \ (?:$case\ )? LETTER \ \U\Q$name\E $/xm)
+ my $case = $name_has_uppercase ? "CAPITAL" : "SMALL";
+ if (! $scripts_trie
+ || $txt !~
+ /\t (?: $scripts_trie ) \ (?:$case\ )? LETTER \ \U$lookup_name $/xm)
{
# Here we still don't have it, give up.
return if $runtime;
@@ -664,9 +862,7 @@ sub lookup_name ($$$) {
# Here have found the input name in the table.
@off = ($-[0] + 1, $+[0]); # The 1 is for the tab
- }
-
- if (! defined $utf8) {
+ }
# Here, the input name has been found; we haven't set up the output,
# but we know where in the string
@@ -680,6 +876,16 @@ sub lookup_name ($$$) {
# also be a \n, so the statement works anyway.)
if (substr($txt, $off[0] - 7, 1) eq "\n") {
$utf8 = pack("U", CORE::hex substr($txt, $off[0] - 6, 5));
+
+ # Handle the single loose matching special case, in which two names
+ # differ only by a single medial hyphen. If the original had a
+ # hyphen (or more) in the right place, then it is that one.
+ $utf8 = $HANGUL_JUNGSEONG_O_E_utf8
+ if $loose
+ && $utf8 eq $HANGUL_JUNGSEONG_OE_utf8
+ && $name =~ m/O \s* - [-\s]* E/ix;
+ # Note that this wouldn't work if there were a 2nd
+ # OE in the name
}
else {
@@ -695,7 +901,9 @@ sub lookup_name ($$$) {
# Cache the input so as to not have to search the large table
# again, but only if it came from the one search that we cache.
- $full_names_cache{$name} = $utf8 if $found_full_in_table;
+ # (Haven't bothered with the pain of sorting out scoping issues for the
+ # scripts searches.)
+ $cache_ref->{$name} = $utf8 if defined $cache_ref;
}
}
@@ -784,7 +992,7 @@ sub import
next;
}
if ($alias =~ m{:(\w+)$}) {
- $1 eq "full" || $1 eq "short" and
+ $1 eq "full" || $1 eq "loose" || $1 eq "short" and
croak ":alias cannot use existing pragma :$1 (reversed order?)";
alias_file ($1) and $promote = 1;
next;
@@ -792,7 +1000,9 @@ sub import
alias_file ($alias);
next;
}
- if (substr($arg, 0, 1) eq ':' and ! ($arg eq ":full" || $arg eq ":short")) {
+ if (substr($arg, 0, 1) eq ':'
+ and ! ($arg eq ":full" || $arg eq ":short" || $arg eq ":loose"))
+ {
warn "unsupported special '$arg' in charnames";
next;
}
@@ -801,9 +1011,9 @@ sub import
@args == 0 && $promote and @args = (":full");
@h{@args} = (1) x @args;
- $^H{charnames_full} = delete $h{':full'} || 0; # Don't leave undefined,
- # as tested for in
- # lookup_names
+ # Don't leave these undefined as are tested for in lookup_names
+ $^H{charnames_full} = delete $h{':full'} || 0;
+ $^H{charnames_loose} = delete $h{':loose'} || 0;
$^H{charnames_short} = delete $h{':short'} || 0;
my @scripts = map { uc quotemeta } keys %h;
@@ -827,6 +1037,20 @@ sub import
$^H{charnames_stringified_ords} = join ",", %{$^H{charnames_ord_aliases}};
$^H{charnames_stringified_names} = join ",", %{$^H{charnames_name_aliases}};
$^H{charnames_stringified_inverse_ords} = join ",", %{$^H{charnames_inverse_ords}};
+
+ # Modify the input script names for loose name matching if that is also
+ # specified, similar to the way the base character name is prepared. They
+ # don't (currently, and hopefully never will) have dashes. These go into a
+ # regex, and have already been uppercased and quotemeta'd. Squeeze out all
+ # input underscores, blanks, and dashes. Then convert so will match a blank
+ # between any characters.
+ if ($^H{charnames_loose}) {
+ for (my $i = 0; $i < @scripts; $i++) {
+ $scripts[$i] =~ s/[_ -]//g;
+ $scripts[$i] =~ s/ ( [^\\] ) (?= . ) /$1\\ ?/gx;
+ }
+ }
+
$^H{charnames_scripts} = join "|", @scripts; # Stringifiy them as a trie
} # import
@@ -978,6 +1202,11 @@ charnames - access to Unicode character names and named character sequences; als
print "\N{LATIN CAPITAL LETTER E WITH VERTICAL LINE BELOW}",
" is an officially named sequence of two Unicode characters\n";
+ use charnames ':loose';
+ print "\N{Greek small-letter sigma}",
+ "can be used to ignore case, underscores, most blanks,"
+ "and when you aren't sure if the official name has hyphens\n";
+
use charnames ':short';
print "\N{greek:Sigma} is an upper-case sigma.\n";
@@ -1044,11 +1273,17 @@ Also, C<\N{I<...>}> can mean a regex quantifier instead of a character
name, when the I<...> is a number (or comma separated pair of numbers
(see L<perlreref/QUANTIFIERS>), and is not related to this pragma.
-The C<charnames> pragma supports arguments C<:full>, C<:short>, script
-names and L<customized aliases|/CUSTOM ALIASES>. If C<:full> is present, for
-expansion of
+The C<charnames> pragma supports arguments C<:full>, C<:loose>, C<:short>,
+script names and L<customized aliases|/CUSTOM ALIASES>.
+
+If C<:full> is present, for expansion of
C<\N{I<CHARNAME>}>, the string I<CHARNAME> is first looked up in the list of
-standard Unicode character names. If C<:short> is present, and
+standard Unicode character names.
+
+C<:loose> is a variant of C<:full> which allows I<CHARNAME> to be less
+precisely specified. Details are in L</LOOSE MATCHES>.
+
+If C<:short> is present, and
I<CHARNAME> has the form C<I<SCRIPT>:I<CNAME>>, then I<CNAME> is looked up
as a letter in script I<SCRIPT>, as described in the next paragraph.
Or, if C<use charnames> is used
@@ -1067,7 +1302,8 @@ this pragma looks in the table of standard Unicode names for the names
If I<CHARNAME> is all lowercase,
then the C<CAPITAL> variant is ignored, otherwise the C<SMALL> variant
is ignored, and both I<CHARNAME> and I<SCRIPTNAME> are converted to all
-uppercase for look-up.
+uppercase for look-up. Other than that, both of them follow L<loose|/LOOSE
+MATCHES> rules if C<:loose> is also specified; strict otherwise.
Note that C<\N{...}> is compile-time; it's a special form of string
constant used inside double-quotish strings; this means that you cannot
@@ -1093,6 +1329,32 @@ Otherwise, any string that includes a C<\N{I<charname>}> or
C<S<\N{U+I<code point>}>> will automatically have Unicode semantics (see
L<perlunicode/Byte and Character Semantics>).
+=head1 LOOSE MATCHES
+
+By specifying C<:loose>, Unicode's L<loose character name
+matching|http://www.unicode.org/reports/tr44/Matching_Rules> rules are
+selected instead of the strict exact match used otherwise.
+That means that I<CHARNAME> doesn't have to be so precisely specified.
+Upper/lower case doesn't matter (except with scripts as mentioned above), nor
+do any underscores, and the only hyphens that matter are those at the
+beginning or end of a word in the name (with one exception: the hyphen in
+U+1180 C<HANGUL JUNGSEONG O-E> does matter).
+Also, blanks not adjacent to hyphens don't matter.
+The official Unicode names are quite variable as to where they use hyphens
+versus spaces to separate word-like units, and this option allows you to not
+have to care as much.
+The reason non-medial hyphens matter is because of cases like
+U+0F60 C<TIBETAN LETTER -A> versus U+0F68 C<TIBETAN LETTER A>.
+The hyphen here is significant, as is the space before it, and so both must be
+included.
+
+C<:loose> slows down look-ups by a factor of 2 to 3 versus
+C<:full>, but the trade-off may be worth it to you. Each individual look-up
+takes very little time, and the results are cached, so the speed difference
+would become a factor only in programs that do look-ups of many different
+spellings, and probably only when those look-ups are through vianame() and
+string_vianame(), since C<\N{...}> look-ups are done at compile time.
+
=head1 ALIASES
A few aliases have been defined for convenience; instead of having
@@ -1209,7 +1471,8 @@ other than an alphabetic character and from containing anything other
than alphanumerics, spaces, dashes, parentheses, and underscores.
Currently they must be ASCII.
-An alias can map to either an official Unicode character name or to a
+An alias can map to either an official Unicode character name (not a loose
+matched name) or to a
numeric code point (ordinal). The latter is useful for assigning names
to code points in Unicode private use areas such as U+E800 through
U+F8FF.
@@ -1251,6 +1514,9 @@ well, like
use charnames ":full", ":alias" => "pro";
+C<":loose"> has no effect with these. Input names must match exactly, using
+C<":full"> rules.
+
Also, both these methods currently allow only single characters to be named.
To name a sequence of characters, use a
L<custom translator|/CUSTOM TRANSLATORS> (described below).
@@ -1292,9 +1558,9 @@ SPACE", not "BYTE ORDER MARK".
This is a runtime equivalent to C<\N{...}>. I<name> can be any expression
that evaluates to a name accepted by C<\N{...}> under the L<C<:full>
option|/DESCRIPTION> to C<charnames>. In addition, any other options for the
-controlling C<"use charnames"> in the same scope apply, like any L<script
-list, C<:short> option|/DESCRIPTION>, or L<custom aliases|/CUSTOM ALIASES> you
-may have defined.
+controlling C<"use charnames"> in the same scope apply, like C<:loose> or any
+L<script list, C<:short> option|/DESCRIPTION>, or L<custom aliases|/CUSTOM
+ALIASES> you may have defined.
The only difference is that if the input name is unknown, C<string_vianame>
returns C<undef> instead of the REPLACEMENT CHARACTER and does not raise a