diff options
-rw-r--r-- | MANIFEST | 1 | ||||
-rw-r--r-- | perl.h | 6 | ||||
-rw-r--r-- | pod/perllocale.pod | 2 | ||||
-rw-r--r-- | pod/perlop.pod | 8 | ||||
-rw-r--r-- | pod/perlre.pod | 7 | ||||
-rw-r--r-- | regcomp.c | 21 | ||||
-rwxr-xr-x | t/pragma/locale.t | 6 | ||||
-rw-r--r-- | toke.c | 25 |
8 files changed, 67 insertions, 9 deletions
@@ -1116,6 +1116,7 @@ t/op/tie.t See if tie/untie functions work t/op/tiearray.t See if tie for arrays works t/op/tiehandle.t See if tie for handles works t/op/time.t See if time functions work +t/op/tr.t See if tr works t/op/undef.t See if undef works t/op/universal.t See if UNIVERSAL class works t/op/unshift.t See if unshift works @@ -209,6 +209,12 @@ register struct op *op asm(stringify(OP_IN_REGISTER)); # define LIBERAL 1 #endif +#if 'A' == 65 && 'I' == 73 && 'J' == 74 && 'Z' == 90 +#define ASCIIish +#else +#undef ASCIIish +#endif + /* * The following contortions are brought to you on behalf of all the * standards, semi-standards, de facto standards, not-so-de-facto standards diff --git a/pod/perllocale.pod b/pod/perllocale.pod index 4401be2053..0a85c0eb11 100644 --- a/pod/perllocale.pod +++ b/pod/perllocale.pod @@ -710,7 +710,7 @@ case-mapping with C<\l>, C<\L>,C<\u> or C<\U>. =item B<In-memory formatting function> (sprintf()): -Result is tainted if "use locale" is in effect. +Result is tainted if C<use locale> is in effect. =item B<Output formatting functions> (printf() and write()): diff --git a/pod/perlop.pod b/pod/perlop.pod index 35f9e5f4f8..8e50ec36ea 100644 --- a/pod/perlop.pod +++ b/pod/perlop.pod @@ -1149,6 +1149,7 @@ the number of characters replaced or deleted. If no string is specified via the =~ or !~ operator, the $_ string is transliterated. (The string specified with =~ must be a scalar variable, an array element, a hash element, or an assignment to one of those, i.e., an lvalue.) + A character range may be specified with a hyphen, so C<tr/A-J/0-9/> does the same replacement as C<tr/ACEGIBDFHJ/0246813579/>. For B<sed> devotees, C<y> is provided as a synonym for C<tr>. If the @@ -1156,6 +1157,13 @@ SEARCHLIST is delimited by bracketing quotes, the REPLACEMENTLIST has its own pair of quotes, which may or may not be bracketing quotes, e.g., C<tr[A-Z][a-z]> or C<tr(+\-*/)/ABCD/>. +Note also that the whole range idea is rather unportable between +character sets--and even within character sets they may cause results +you probably didn't expect. A sound principle is to use only ranges +that begin from and end at either alphabets of equal case (a-e, A-E), +or digits (0-4). Anything else is unsafe. If in doubt, spell out the +character sets in full. + Options: c Complement the SEARCHLIST. diff --git a/pod/perlre.pod b/pod/perlre.pod index 1b49ba4e7b..f696525155 100644 --- a/pod/perlre.pod +++ b/pod/perlre.pod @@ -735,6 +735,13 @@ following all specify the same class of three characters: C<[-az]>, C<[az-]>, and C<[a\-z]>. All are different from C<[a-z]>, which specifies a class containing twenty-six characters.) +Note also that the whole range idea is rather unportable between +character sets--and even within character sets they may cause results +you probably didn't expect. A sound principle is to use only ranges +that begin from and end at either alphabets of equal case ([a-e], +[A-E]), or digits ([0-9]). Anything else is unsafe. If in doubt, +spell out the character sets in full. + Characters may be specified using a metacharacter syntax much like that used in C: "\n" matches a newline, "\t" a tab, "\r" a carriage return, "\f" a form feed, etc. More generally, \I<nnn>, where I<nnn> is a string @@ -2245,9 +2245,24 @@ regclass(void) } } if (!SIZE_ONLY) { - for ( ; lastvalue <= value; lastvalue++) - ANYOF_SET(opnd, lastvalue); - } +#ifndef ASCIIish + if ((isLOWER(lastvalue) && isLOWER(value)) || + (isUPPER(lastvalue) && isUPPER(value))) { + if (isLOWER(lastvalue)) { + for (i = lastvalue; i <= value; i++) + if (isLOWER(i)) + ANYOF_SET(opnd, i); + } else { + for (i = lastvalue; i <= value; i++) + if (isUPPER(i)) + ANYOF_SET(opnd, i); + } + } + else +#endif + for ( ; lastvalue <= value; lastvalue++) + ANYOF_SET(opnd, lastvalue); + } lastvalue = value; } /* optimize case-insensitive simple patterns (e.g. /[a-z]/i) */ diff --git a/t/pragma/locale.t b/t/pragma/locale.t index 00baa6645e..7e3df8c3f1 100755 --- a/t/pragma/locale.t +++ b/t/pragma/locale.t @@ -23,6 +23,9 @@ eval { # and mingw32 uses said silly CRT $have_setlocale = 0 if $^O eq 'MSWin32' && $Config{cc} =~ /^(cl|gcc)/i; +# 103 (the last test) may fail but that is okay. +# (It indicates something broken in the environment, not Perl) +# Therefore .. only until 102, not 103. print "1..", ($have_setlocale ? 102 : 98), "\n"; use vars qw($a @@ -404,6 +407,7 @@ print "ok 101\n"; # Test for read-onlys. +print "# testing 102\n"; { no locale; $a = "qwerty"; @@ -419,7 +423,7 @@ print "ok 102\n"; # Thanks to Hallvard Furuseth <h.b.furuseth@usit.uio.no> # for inventing a way to test for ordering consistency # without requiring any particular order. -# ++$jhi;#@iki.fi +# <jhi@iki.fi> print "# testing 103\n"; { @@ -904,6 +904,7 @@ scan_const(char *start) /* expand a range A-Z to the full set of characters. AIE! */ if (dorange) { I32 i; /* current expanded character */ + I32 min; /* first character in range */ I32 max; /* last character in range */ i = d - SvPVX(sv); /* remember current offset */ @@ -911,10 +912,26 @@ scan_const(char *start) d = SvPVX(sv) + i; /* restore d after the grow potentially has changed the ptr */ d -= 2; /* eat the first char and the - */ - max = (U8)d[1]; /* last char in range */ - - for (i = (U8)*d; i <= max; i++) - *d++ = i; + min = (U8)*d; /* first char in range */ + max = (U8)d[1]; /* last char in range */ + +#ifndef ASCIIish + if ((isLOWER(min) && isLOWER(max)) || + (isUPPER(min) && isUPPER(max))) { + if (isLOWER(min)) { + for (i = min; i <= max; i++) + if (isLOWER(i)) + *d++ = i; + } else { + for (i = min; i <= max; i++) + if (isUPPER(i)) + *d++ = i; + } + } + else +#endif + for (i = min; i <= max; i++) + *d++ = i; /* mark the range as done, and continue */ dorange = FALSE; |