1 files changed, 322 insertions, 56 deletions
diff --git a/lib/charnames.pm b/lib/charnames.pm
index c8bd923ff6..adbda1ab78 100644
--- a/lib/charnames.pm
+++ b/lib/charnames.pm
@@ -2,10 +2,50 @@ package charnames;
 use strict;
 use warnings;
 use File::Spec;
-our $VERSION = '1.21';
+our $VERSION = '1.22';
 
 use bytes ();          # for $bytes::hint_bits
 
+# Translate between Unicode character names and their code points.
+#
+# The official names with their code points are stored in a table in
+# lib/unicore/Name.pl which is read in as a large string (almost 3/4 Mb in
+# Unicode 6.0).  Each code point/name combination is separated by a \n in the
+# string.  (Some of the CJK and the Hangul syllable names are determined
+# instead algorithmically via subroutines also stored in Name.pl).  Because of
+# the large size of this table, it isn't converted into hashes for faster
+# lookup.
+#
+# But, user defined aliases are stored in their own hashes, as are Perl
+# extensions to the official names.  These are checked first before looking at
+# the official table.
+#
+# Basically, the table is grepped for the input code point (viacode()) or
+# name (the other functions), and the corresponding value on the same line is
+# returned.  The grepping is done by turning the input into a regular
+# expression.  Thus, the same table does double duty, used by both name and
+# code point lookup.  (If we were to have hashes, we would need two, one for
+# each lookup direction.)
+#
+# For loose name matching, the logical thing would be to have a table
+# with all the ignorable characters squeezed out, and then grep it with the
+# similiarly-squeezed input name.  (And this is in fact how the lookups are
+# done with the small Perl extension hashes.)  But since we need to be able to
+# go from code point to official name, the original table would still need to
+# exist.  Due to the large size of the table, it was decided to not read
+# another very large string into memory for a second table.  Instead, the
+# regular expression of the input name is modified to have optional spaces and
+# dashes between characters.  For example, in strict matching, the regular
+# expression would be:
+#   qr/\tDIGIT ONE$/m
+# Under loose matching, the blank would be squeezed out, and the re would be:
+#   qr/\tD[- ]?I[- ]?G[- ]?I[- ]?T[- ]?O[- ]?N[- ]?E$/m
+# which matches a blank or dash between any characters in the official table.
+#
+# This is also how script lookup is done.  Basically the re looks like
+#   qr/ (?:LATIN|GREEK|CYRILLIC) (?:SMALL )?LETTER $name/
+# where $name is the loose or strict regex for the remainder of the name.
+
 # The hashes are stored as utf8 strings.  This makes it easier to deal with
 # sequences.  I (khw) also tried making Name.pl utf8, but it slowed things
 # down by a factor of 7.  I then tried making Name.pl store the ut8
@@ -386,6 +426,25 @@ my %system_aliases = (
     'ZWSP'  => pack("U", 0x200B), # ZERO WIDTH SPACE
 );
 
+# These are the aliases above that differ under :loose and :full matching
+# because the :full versions have blanks or hyphens in them.
+my %loose_system_aliases = (
+    'LINEFEED'                         => pack("U", 0x0A),
+    'FORMFEED'                         => pack("U", 0x0C),
+    'CARRIAGERETURN'                   => pack("U", 0x0D),
+    'NEXTLINE'                         => pack("U", 0x85),
+    'SINGLESHIFT2'                     => pack("U", 0x8E),
+    'SINGLESHIFT3'                     => pack("U", 0x8F),
+    'PRIVATEUSE1'                      => pack("U", 0x91),
+    'PRIVATEUSE2'                      => pack("U", 0x92),
+    'STARTOFPROTECTEDAREA'             => pack("U", 0x96),
+    'ENDOFPROTECTEDAREA'               => pack("U", 0x97),
+    'PADDINGCHARACTER'                 => pack("U", 0x80),
+    'HIGHOCTETPRESET'                  => pack("U", 0x81),
+    'SINGLEGRAPHICCHARACTERINTRODUCER' => pack("U", 0x99),
+    'BYTEORDERMARK'                    => pack("U", 0xFEFF),
+);
+
 my %deprecated_aliases = (
     # Pre-3.2 compatibility (only for the first 256 characters).
     # Use of these gives deprecated message.
@@ -406,6 +465,26 @@ my %deprecated_aliases = (
     'BELL'                    => pack("U", 0x07),
 );
 
+my %loose_deprecated_aliases = (
+    'HORIZONTALTABULATION'                  => pack("U", 0x09),
+    'VERTICALTABULATION'                    => pack("U", 0x0B),
+    'FILESEPARATOR'                         => pack("U", 0x1C),
+    'GROUPSEPARATOR'                        => pack("U", 0x1D),
+    'RECORDSEPARATOR'                       => pack("U", 0x1E),
+    'UNITSEPARATOR'                         => pack("U", 0x1F),
+    'HORIZONTALTABULATIONSET'               => pack("U", 0x88),
+    'HORIZONTALTABULATIONWITHJUSTIFICATION' => pack("U", 0x89),
+    'PARTIALLINEDOWN'                       => pack("U", 0x8B),
+    'PARTIALLINEUP'                         => pack("U", 0x8C),
+    'VERTICALTABULATIONSET'                 => pack("U", 0x8A),
+    'REVERSEINDEX'                          => pack("U", 0x8D),
+);
+
+# These are special cased in :loose matching, differing only in a medial
+# hyphen
+my $HANGUL_JUNGSEONG_O_E_utf8 = pack("U", 0x1180);
+my $HANGUL_JUNGSEONG_OE_utf8 = pack("U", 0x116C);
+
 
 my $txt;  # The table of official character names
 
@@ -436,6 +515,12 @@ my %full_names_cache; # Holds already-looked-up names, so don't have to
 # decided not to do that for now, just because it's added complication,
 # and because I'm just trying to maintain parity, not extend it.
 
+# Like %full_names_cache, but for use when :loose is in effect.  There needs
+# to be two caches because :loose may not be in effect for a scope, and a
+# loose name could inappropriately be returned when only exact matching is
+# called for.
+my %loose_names_cache;
+
 # Designed so that test decimal first, and then hex.  Leading zeros
 # imply non-decimal, as do non-[0-9]
 my $decimal_qr = qr/^[1-9]\d*$/;
@@ -524,6 +609,7 @@ my %dummy_H = (
                 charnames_stringified_ords => "",
                 charnames_scripts => "",
                 charnames_full => 1,
+                charnames_loose => 0,
                 charnames_short => 0,
               );
 
@@ -553,7 +639,8 @@ sub lookup_name ($$$) {
     # If we didn't import anything (which happens with 'use charnames ()',
     # substitute a dummy structure.
     $hints_ref = \%dummy_H if ! defined $hints_ref
-                              || ! defined $hints_ref->{charnames_full};
+                              || (! defined $hints_ref->{charnames_full}
+                                  && ! defined $hints_ref->{charnames_loose});
 
     # At runtime, but currently not at compile time, $^H gets
     # stringified, so un-stringify back to the original data structures.
@@ -567,9 +654,14 @@ sub lookup_name ($$$) {
                                       $hints_ref->{charnames_stringified_ords};
     $^H{charnames_scripts} = $hints_ref->{charnames_scripts};
     $^H{charnames_full} = $hints_ref->{charnames_full};
+    $^H{charnames_loose} = $hints_ref->{charnames_loose};
     $^H{charnames_short} = $hints_ref->{charnames_short};
   }
 
+  my $loose = $^H{charnames_loose};
+  my $lookup_name;  # Input name suitably modified for grepping for in the
+                    # table
+
   # User alias should be checked first or else can't override ours, and if we
   # were to add any, could conflict with theirs.
   if (exists $^H{charnames_ord_aliases}{$name}) {
@@ -577,26 +669,83 @@ sub lookup_name ($$$) {
   }
   elsif (exists $^H{charnames_name_aliases}{$name}) {
     $name = $^H{charnames_name_aliases}{$name};
-    $save_input = $name;  # Cache the result for any error message
-  }
-  elsif (exists $system_aliases{$name}) {
-    $utf8 = $system_aliases{$name};
+    $save_input = $lookup_name = $name;  # Cache the result for any error
+                                         # message
+    # The aliases are documented to not match loosely, so change loose match
+    # into full.
+    if ($loose) {
+      $loose = 0;
+      $^H{charnames_full} = 1;
+    }
   }
-  elsif (exists $deprecated_aliases{$name}) {
-    require warnings;
-    warnings::warnif('deprecated', "Unicode character name \"$name\" is deprecated, use \"" . viacode(ord $deprecated_aliases{$name}) . "\" instead");
-    $utf8 = $deprecated_aliases{$name};
+  else {
+
+    # Here, not a user alias.  That means that loose matching may be in
+    # effect; will have to modify the input name.
+    $lookup_name = $name;
+    if ($loose) {
+      $lookup_name = uc $lookup_name;
+
+      # Squeeze out all underscores
+      $lookup_name =~ s/_//g;
+
+      # Remove all medial hyphens
+      $lookup_name =~ s/ (?<= \S  ) - (?= \S  )//gx;
+
+      # Squeeze out all spaces
+      $lookup_name =~ s/\s//g;
+    }
+
+    # Here, $lookup_name has been modified as necessary for looking in the
+    # hashes.  Check the system alias files next.  Most of these aliases are
+    # the same for both strict and loose matching.  To save space, the ones
+    # which differ are in their own separate hash, which is checked if loose
+    # matching is selected and the regular match fails.  To save time, the
+    # loose hashes could be expanded to include all aliases, and there would
+    # only have to be one check.  But if someone specifies :loose, they are
+    # interested in convenience over speed, and the time for this second check
+    # is miniscule compared to the rest of the routine.
+    if (exists $system_aliases{$lookup_name}) {
+      $utf8 = $system_aliases{$lookup_name};
+    }
+    elsif ($loose && exists $loose_system_aliases{$lookup_name}) {
+      $utf8 = $loose_system_aliases{$lookup_name};
+    }
+    elsif (exists $deprecated_aliases{$lookup_name}) {
+      require warnings;
+      warnings::warnif('deprecated',
+                       "Unicode character name \"$name\" is deprecated, use \""
+                       . viacode(ord $deprecated_aliases{$lookup_name})
+                       . "\" instead");
+      $utf8 = $deprecated_aliases{$lookup_name};
+    }
+    elsif ($loose && exists $loose_deprecated_aliases{$lookup_name}) {
+      require warnings;
+      warnings::warnif('deprecated',
+                       "Unicode character name \"$name\" is deprecated, use \""
+                       . viacode(ord $loose_deprecated_aliases{$lookup_name})
+                       . "\" instead");
+      $utf8 = $loose_deprecated_aliases{$lookup_name};
+    }
   }
 
-  my @off;
+  my @off;  # Offsets into table of pattern match begin and end
 
+  # If haven't found it yet...
   if (! defined $utf8) {
 
     # See if has looked this input up earlier.
-    if ($^H{charnames_full} && exists $full_names_cache{$name}) {
+    if (! $loose && $^H{charnames_full} && exists $full_names_cache{$name}) {
       $utf8 = $full_names_cache{$name};
     }
-    else {
+    elsif ($loose && exists $loose_names_cache{$name}) {
+      $utf8 = $loose_names_cache{$name};
+    }
+    else { # Here, must do a look-up
+
+      # If full or loose matching succeeded, points to where to cache the
+      # result
+      my $cache_ref;
 
       ## Suck in the code/name list as a big string.
       ## Lines look like:
@@ -608,9 +757,9 @@ sub lookup_name ($$$) {
       ## @off will hold the index into the code/name string of the start and
       ## end of the name as we find it.
 
-      ## If :full, look for the name exactly; runtime implies full
-      my $found_full_in_table = 0;  # Tells us if can cache the result
-      if ($^H{charnames_full}) {
+      ## If :loose, look for a loose match; if :full, look for the name
+      ## exactly
+
 
         # See if the name is one which is algorithmically determinable.
         # The subroutine is included in Name.pl.  The table contained in
@@ -618,40 +767,89 @@ sub lookup_name ($$$) {
         # for these before checking for the regular names has no
         # noticeable impact on performance for the regular names, but
         # the other way around slows down finding these immensely.
-        # Algorithmically determinables are not placed in the cache (that
-        # $found_full_in_table indicates) because that uses up memory,
-        # and finding these again is fast.
-        if (defined (my $ord = name_to_code_point_special($name))) {
-          $utf8 = pack("U", $ord);
+        # Algorithmically determinables are not placed in the cache because
+        # that uses up memory, and finding these again is fast.
+      if (($loose || $^H{charnames_full})
+          && (defined (my $ord = name_to_code_point_special($lookup_name, $loose))))
+      {
+        $utf8 = pack("U", $ord);
+      }
+      else {
+
+        # Not algorithmically determinable; look up in the table.  The name
+        # will be turned into a regex, so quote any meta characters.
+        $lookup_name = quotemeta $lookup_name;
+
+        if ($loose) {
+
+          # For loose matches, $lookup_name has already squeezed out the
+          # non-essential characters.  We have to add in code to make the
+          # squeezed version match the non-squeezed equivalent in the table.
+          # The only remaining hyphens are ones that start or end a word in
+          # the original.  They have been quoted in $lookup_name so they look
+          # like "\-".  Change all other characters except the backslash
+          # quotes for any metacharacters, and the final character, so that
+          # e.g., COLON gets transformed into: /C[- ]?O[- ]?L[- ]?O[- ]?N/
+          $lookup_name =~ s/ (?! \\ -)    # Don't do this to the \- sequence
+                             ( [^-\\] )   # Nor the "-" within that sequence,
+                                          # nor the "\" that quotes metachars,
+                                          # but otherwise put the char into $1
+                             (?=.)        # And don't do it for the final char
+                           /$1\[- \]?/gx; # And add an optional blank or
+                                          # '-' after each $1 char
+
+          # Those remaining hyphens were originally at the beginning or end of
+          # a word, so they can match either a blank before or after, but not
+          # both.  (Keep in mind that they have been quoted, so are a '\-'
+          # sequence)
+          $lookup_name =~ s/\\ -/(?:- | -)/xg;
         }
-        else {
 
-          # Not algorithmically determinable; look up in the table.
-          if ($txt =~ /\t\Q$name\E$/m) {
-            @off = ($-[0] + 1, $+[0]);    # The 1 is for the tab
-            $found_full_in_table = 1;
-          }
+        # Do the lookup in the full table if asked for, and if succeeds
+        # save the offsets and set where to cache the result.
+        if (($loose || $^H{charnames_full}) && $txt =~ /\t$lookup_name$/m) {
+          @off = ($-[0] + 1, $+[0]);    # The 1 is for the tab
+          $cache_ref = ($loose) ? \%loose_names_cache : \%full_names_cache;
         }
-      }
-
-      # If we didn't get it above, keep looking
-      if (! $found_full_in_table && ! defined $utf8) {
+        else {
 
+        # Here, didn't look for, or didn't find the name.
         # If :short is allowed, see if input is like "greek:Sigma".
-        my $scripts_trie;
+        # Keep in mind that $lookup_name has had the metas quoted.
+        my $scripts_trie = "";
+        my $name_has_uppercase;
         if (($^H{charnames_short})
-            && $name =~ /^ \s* (.+?) \s* : \s* (.+?) \s* $ /xs)
+            && $lookup_name =~ /^ (?: \\ \s)*   # Quoted space
+                                  (.+?)         # $1 = the script
+                                  (?: \\ \s)*
+                                  \\ :          # Quoted colon
+                                  (?: \\ \s)*
+                                  (.+?)         # $2 = the name
+                                  (?: \\ \s)* $
+                                /xs)
         {
-            $scripts_trie = "\U\Q$1";
-            $name = $2;
+            # Even in non-loose matching, the script traditionally has been
+            # case insensitve
+            $scripts_trie = "\U$1";
+            $lookup_name = $2;
+
+            # Use original name to find its input casing, but ignore the
+            # script part of that to make the determination.
+            $save_input = $name if ! defined $save_input;
+            $name =~ s/.*?://;
+            $name_has_uppercase = $name =~ /[[:upper:]]/;
         }
         else { # Otherwise look in allowed scripts
             $scripts_trie = $^H{charnames_scripts};
+
+            # Use original name to find its input casing
+            $name_has_uppercase = $name =~ /[[:upper:]]/;
         }
 
-        my $case = $name =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL";
-        if ($txt !~
-            /\t (?: $scripts_trie ) \ (?:$case\ )? LETTER \ \U\Q$name\E $/xm)
+        my $case = $name_has_uppercase ? "CAPITAL" : "SMALL";
+        if (! $scripts_trie
+            || $txt !~
+            /\t (?: $scripts_trie ) \ (?:$case\ )? LETTER \ \U$lookup_name $/xm)
         {
           # Here we still don't have it, give up.
           return if $runtime;
@@ -664,9 +862,7 @@ sub lookup_name ($$$) {
 
         # Here have found the input name in the table.
         @off = ($-[0] + 1, $+[0]);  # The 1 is for the tab
-      }
-
-      if (! defined $utf8) {
+        }
 
         # Here, the input name has been found; we haven't set up the output,
         # but we know where in the string
@@ -680,6 +876,16 @@ sub lookup_name ($$$) {
         # also be a \n, so the statement works anyway.)
         if (substr($txt, $off[0] - 7, 1) eq "\n") {
           $utf8 = pack("U", CORE::hex substr($txt, $off[0] - 6, 5));
+
+          # Handle the single loose matching special case, in which two names
+          # differ only by a single medial hyphen.  If the original had a
+          # hyphen (or more) in the right place, then it is that one.
+          $utf8 = $HANGUL_JUNGSEONG_O_E_utf8
+                  if $loose
+                     && $utf8 eq $HANGUL_JUNGSEONG_OE_utf8
+                     && $name =~ m/O \s* - [-\s]* E/ix;
+                     # Note that this wouldn't work if there were a 2nd
+                     # OE in the name
         }
         else {
 
@@ -695,7 +901,9 @@ sub lookup_name ($$$) {
 
       # Cache the input so as to not have to search the large table
       # again, but only if it came from the one search that we cache.
-      $full_names_cache{$name} = $utf8 if $found_full_in_table;
+      # (Haven't bothered with the pain of sorting out scoping issues for the
+      # scripts searches.)
+      $cache_ref->{$name} = $utf8 if defined $cache_ref;
     }
   }
 
@@ -784,7 +992,7 @@ sub import
         next;
       }
       if ($alias =~ m{:(\w+)$}) {
-        $1 eq "full" || $1 eq "short" and
+        $1 eq "full" || $1 eq "loose" || $1 eq "short" and
           croak ":alias cannot use existing pragma :$1 (reversed order?)";
         alias_file ($1) and $promote = 1;
         next;
@@ -792,7 +1000,9 @@ sub import
       alias_file ($alias);
       next;
     }
-    if (substr($arg, 0, 1) eq ':' and ! ($arg eq ":full" || $arg eq ":short")) {
+    if (substr($arg, 0, 1) eq ':'
+      and ! ($arg eq ":full" || $arg eq ":short" || $arg eq ":loose"))
+    {
       warn "unsupported special '$arg' in charnames";
       next;
     }
@@ -801,9 +1011,9 @@ sub import
   @args == 0 && $promote and @args = (":full");
   @h{@args} = (1) x @args;
 
-  $^H{charnames_full} = delete $h{':full'} || 0;  # Don't leave undefined,
-                                                  # as tested for in
-                                                  # lookup_names
+  # Don't leave these undefined as are tested for in lookup_names
+  $^H{charnames_full} = delete $h{':full'} || 0;
+  $^H{charnames_loose} = delete $h{':loose'} || 0;
   $^H{charnames_short} = delete $h{':short'} || 0;
   my @scripts = map { uc quotemeta } keys %h;
 
@@ -827,6 +1037,20 @@ sub import
   $^H{charnames_stringified_ords} = join ",", %{$^H{charnames_ord_aliases}};
   $^H{charnames_stringified_names} = join ",", %{$^H{charnames_name_aliases}};
   $^H{charnames_stringified_inverse_ords} = join ",", %{$^H{charnames_inverse_ords}};
+
+  # Modify the input script names for loose name matching if that is also
+  # specified, similar to the way the base character name is prepared.  They
+  # don't (currently, and hopefully never will) have dashes.  These go into a
+  # regex, and have already been uppercased and quotemeta'd.  Squeeze out all
+  # input underscores, blanks, and dashes.  Then convert so will match a blank
+  # between any characters.
+  if ($^H{charnames_loose}) {
+    for (my $i = 0; $i < @scripts; $i++) {
+      $scripts[$i] =~ s/[_ -]//g;
+      $scripts[$i] =~ s/ ( [^\\] ) (?= . ) /$1\\ ?/gx;
+    }
+  }
+
   $^H{charnames_scripts} = join "|", @scripts;  # Stringifiy them as a trie
 } # import
 
@@ -978,6 +1202,11 @@ charnames - access to Unicode character names and named character sequences; als
  print "\N{LATIN CAPITAL LETTER E WITH VERTICAL LINE BELOW}",
        " is an officially named sequence of two Unicode characters\n";
 
+ use charnames ':loose';
+ print "\N{Greek small-letter  sigma}",
+        "can be used to ignore case, underscores, most blanks,"
+        "and when you aren't sure if the official name has hyphens\n";
+
  use charnames ':short';
  print "\N{greek:Sigma} is an upper-case sigma.\n";
 
@@ -1044,11 +1273,17 @@ Also, C<\N{I<...>}> can mean a regex quantifier instead of a character
 name, when the I<...> is a number (or comma separated pair of numbers
 (see L<perlreref/QUANTIFIERS>), and is not related to this pragma.
 
-The C<charnames> pragma supports arguments C<:full>, C<:short>, script
-names and L<customized aliases|/CUSTOM ALIASES>.  If C<:full> is present, for
-expansion of
+The C<charnames> pragma supports arguments C<:full>, C<:loose>, C<:short>,
+script names and L<customized aliases|/CUSTOM ALIASES>.
+
+If C<:full> is present, for expansion of
 C<\N{I<CHARNAME>}>, the string I<CHARNAME> is first looked up in the list of
-standard Unicode character names.  If C<:short> is present, and
+standard Unicode character names.
+
+C<:loose> is a variant of C<:full> which allows I<CHARNAME> to be less
+precisely specified.  Details are in L</LOOSE MATCHES>.
+
+If C<:short> is present, and
 I<CHARNAME> has the form C<I<SCRIPT>:I<CNAME>>, then I<CNAME> is looked up
 as a letter in script I<SCRIPT>, as described in the next paragraph.
 Or, if C<use charnames> is used
@@ -1067,7 +1302,8 @@ this pragma looks in the table of standard Unicode names for the names
 If I<CHARNAME> is all lowercase,
 then the C<CAPITAL> variant is ignored, otherwise the C<SMALL> variant
 is ignored, and both I<CHARNAME> and I<SCRIPTNAME> are converted to all
-uppercase for look-up.
+uppercase for look-up.  Other than that, both of them follow L<loose|/LOOSE
+MATCHES> rules if C<:loose> is also specified; strict otherwise.
 
 Note that C<\N{...}> is compile-time; it's a special form of string
 constant used inside double-quotish strings; this means that you cannot
@@ -1093,6 +1329,32 @@ Otherwise, any string that includes a C<\N{I<charname>}> or
 C<S<\N{U+I<code point>}>> will automatically have Unicode semantics (see
 L<perlunicode/Byte and Character Semantics>).
 
+=head1 LOOSE MATCHES
+
+By specifying C<:loose>, Unicode's L<loose character name
+matching|http://www.unicode.org/reports/tr44/Matching_Rules> rules are
+selected instead of the strict exact match used otherwise.
+That means that I<CHARNAME> doesn't have to be so precisely specified.
+Upper/lower case doesn't matter (except with scripts as mentioned above), nor
+do any underscores, and the only hyphens that matter are those at the
+beginning or end of a word in the name (with one exception:  the hyphen in
+U+1180 C<HANGUL JUNGSEONG O-E> does matter).
+Also, blanks not adjacent to hyphens don't matter.
+The official Unicode names are quite variable as to where they use hyphens
+versus spaces to separate word-like units, and this option allows you to not
+have to care as much.
+The reason non-medial hyphens matter is because of cases like
+U+0F60 C<TIBETAN LETTER -A> versus U+0F68 C<TIBETAN LETTER A>.
+The hyphen here is significant, as is the space before it, and so both must be
+included.
+
+C<:loose> slows down look-ups by a factor of 2 to 3 versus
+C<:full>, but the trade-off may be worth it to you.  Each individual look-up
+takes very little time, and the results are cached, so the speed difference
+would become a factor only in programs that do look-ups of many different
+spellings, and probably only when those look-ups are through vianame() and
+string_vianame(), since C<\N{...}> look-ups are done at compile time.
+
 =head1 ALIASES
 
 A few aliases have been defined for convenience; instead of having
@@ -1209,7 +1471,8 @@ other than an alphabetic character and from containing anything other
 than alphanumerics, spaces, dashes, parentheses, and underscores.
 Currently they must be ASCII.
 
-An alias can map to either an official Unicode character name or to a
+An alias can map to either an official Unicode character name (not a loose
+matched name) or to a
 numeric code point (ordinal).  The latter is useful for assigning names
 to code points in Unicode private use areas such as U+E800 through
 U+F8FF.
@@ -1251,6 +1514,9 @@ well, like
 
     use charnames ":full", ":alias" => "pro";
 
+C<":loose"> has no effect with these.  Input names must match exactly, using
+C<":full"> rules.
+
 Also, both these methods currently allow only single characters to be named.
 To name a sequence of characters, use a
 L<custom translator|/CUSTOM TRANSLATORS> (described below).
@@ -1292,9 +1558,9 @@ SPACE", not "BYTE ORDER MARK".
 This is a runtime equivalent to C<\N{...}>.  I<name> can be any expression
 that evaluates to a name accepted by C<\N{...}> under the L<C<:full>
 option|/DESCRIPTION> to C<charnames>.  In addition, any other options for the
-controlling C<"use charnames"> in the same scope apply, like any L<script
-list, C<:short> option|/DESCRIPTION>, or L<custom aliases|/CUSTOM ALIASES> you
-may have defined.
+controlling C<"use charnames"> in the same scope apply, like C<:loose> or any
+L<script list, C<:short> option|/DESCRIPTION>, or L<custom aliases|/CUSTOM
+ALIASES> you may have defined.
 
 The only difference is that if the input name is unknown, C<string_vianame>
 returns C<undef> instead of the REPLACEMENT CHARACTER and does not raise a