summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MANIFEST1
-rw-r--r--perl.h6
-rw-r--r--pod/perllocale.pod2
-rw-r--r--pod/perlop.pod8
-rw-r--r--pod/perlre.pod7
-rw-r--r--regcomp.c21
-rwxr-xr-xt/pragma/locale.t6
-rw-r--r--toke.c25
8 files changed, 67 insertions, 9 deletions
diff --git a/MANIFEST b/MANIFEST
index 91147fda99..966cc80abe 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -1116,6 +1116,7 @@ t/op/tie.t See if tie/untie functions work
t/op/tiearray.t See if tie for arrays works
t/op/tiehandle.t See if tie for handles works
t/op/time.t See if time functions work
+t/op/tr.t See if tr works
t/op/undef.t See if undef works
t/op/universal.t See if UNIVERSAL class works
t/op/unshift.t See if unshift works
diff --git a/perl.h b/perl.h
index 0f7fe6de94..c406f38187 100644
--- a/perl.h
+++ b/perl.h
@@ -209,6 +209,12 @@ register struct op *op asm(stringify(OP_IN_REGISTER));
# define LIBERAL 1
#endif
+#if 'A' == 65 && 'I' == 73 && 'J' == 74 && 'Z' == 90
+#define ASCIIish
+#else
+#undef ASCIIish
+#endif
+
/*
* The following contortions are brought to you on behalf of all the
* standards, semi-standards, de facto standards, not-so-de-facto standards
diff --git a/pod/perllocale.pod b/pod/perllocale.pod
index 4401be2053..0a85c0eb11 100644
--- a/pod/perllocale.pod
+++ b/pod/perllocale.pod
@@ -710,7 +710,7 @@ case-mapping with C<\l>, C<\L>,C<\u> or C<\U>.
=item B<In-memory formatting function> (sprintf()):
-Result is tainted if "use locale" is in effect.
+Result is tainted if C<use locale> is in effect.
=item B<Output formatting functions> (printf() and write()):
diff --git a/pod/perlop.pod b/pod/perlop.pod
index 35f9e5f4f8..8e50ec36ea 100644
--- a/pod/perlop.pod
+++ b/pod/perlop.pod
@@ -1149,6 +1149,7 @@ the number of characters replaced or deleted. If no string is
specified via the =~ or !~ operator, the $_ string is transliterated. (The
string specified with =~ must be a scalar variable, an array element, a
hash element, or an assignment to one of those, i.e., an lvalue.)
+
A character range may be specified with a hyphen, so C<tr/A-J/0-9/>
does the same replacement as C<tr/ACEGIBDFHJ/0246813579/>.
For B<sed> devotees, C<y> is provided as a synonym for C<tr>. If the
@@ -1156,6 +1157,13 @@ SEARCHLIST is delimited by bracketing quotes, the REPLACEMENTLIST has
its own pair of quotes, which may or may not be bracketing quotes,
e.g., C<tr[A-Z][a-z]> or C<tr(+\-*/)/ABCD/>.
+Note also that the whole range idea is rather unportable between
+character sets--and even within character sets they may cause results
+you probably didn't expect. A sound principle is to use only ranges
+that begin from and end at either alphabets of equal case (a-e, A-E),
+or digits (0-4). Anything else is unsafe. If in doubt, spell out the
+character sets in full.
+
Options:
c Complement the SEARCHLIST.
diff --git a/pod/perlre.pod b/pod/perlre.pod
index 1b49ba4e7b..f696525155 100644
--- a/pod/perlre.pod
+++ b/pod/perlre.pod
@@ -735,6 +735,13 @@ following all specify the same class of three characters: C<[-az]>,
C<[az-]>, and C<[a\-z]>. All are different from C<[a-z]>, which
specifies a class containing twenty-six characters.)
+Note also that the whole range idea is rather unportable between
+character sets--and even within character sets they may cause results
+you probably didn't expect. A sound principle is to use only ranges
+that begin from and end at either alphabets of equal case ([a-e],
+[A-E]), or digits ([0-9]). Anything else is unsafe. If in doubt,
+spell out the character sets in full.
+
Characters may be specified using a metacharacter syntax much like that
used in C: "\n" matches a newline, "\t" a tab, "\r" a carriage return,
"\f" a form feed, etc. More generally, \I<nnn>, where I<nnn> is a string
diff --git a/regcomp.c b/regcomp.c
index ef651fad83..8db8b8a58a 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -2245,9 +2245,24 @@ regclass(void)
}
}
if (!SIZE_ONLY) {
- for ( ; lastvalue <= value; lastvalue++)
- ANYOF_SET(opnd, lastvalue);
- }
+#ifndef ASCIIish
+ if ((isLOWER(lastvalue) && isLOWER(value)) ||
+ (isUPPER(lastvalue) && isUPPER(value))) {
+ if (isLOWER(lastvalue)) {
+ for (i = lastvalue; i <= value; i++)
+ if (isLOWER(i))
+ ANYOF_SET(opnd, i);
+ } else {
+ for (i = lastvalue; i <= value; i++)
+ if (isUPPER(i))
+ ANYOF_SET(opnd, i);
+ }
+ }
+ else
+#endif
+ for ( ; lastvalue <= value; lastvalue++)
+ ANYOF_SET(opnd, lastvalue);
+ }
lastvalue = value;
}
/* optimize case-insensitive simple patterns (e.g. /[a-z]/i) */
diff --git a/t/pragma/locale.t b/t/pragma/locale.t
index 00baa6645e..7e3df8c3f1 100755
--- a/t/pragma/locale.t
+++ b/t/pragma/locale.t
@@ -23,6 +23,9 @@ eval {
# and mingw32 uses said silly CRT
$have_setlocale = 0 if $^O eq 'MSWin32' && $Config{cc} =~ /^(cl|gcc)/i;
+# 103 (the last test) may fail but that is okay.
+# (It indicates something broken in the environment, not Perl)
+# Therefore .. only until 102, not 103.
print "1..", ($have_setlocale ? 102 : 98), "\n";
use vars qw($a
@@ -404,6 +407,7 @@ print "ok 101\n";
# Test for read-onlys.
+print "# testing 102\n";
{
no locale;
$a = "qwerty";
@@ -419,7 +423,7 @@ print "ok 102\n";
# Thanks to Hallvard Furuseth <h.b.furuseth@usit.uio.no>
# for inventing a way to test for ordering consistency
# without requiring any particular order.
-# ++$jhi;#@iki.fi
+# <jhi@iki.fi>
print "# testing 103\n";
{
diff --git a/toke.c b/toke.c
index 2381be3cda..90fc7b8223 100644
--- a/toke.c
+++ b/toke.c
@@ -904,6 +904,7 @@ scan_const(char *start)
/* expand a range A-Z to the full set of characters. AIE! */
if (dorange) {
I32 i; /* current expanded character */
+ I32 min; /* first character in range */
I32 max; /* last character in range */
i = d - SvPVX(sv); /* remember current offset */
@@ -911,10 +912,26 @@ scan_const(char *start)
d = SvPVX(sv) + i; /* restore d after the grow potentially has changed the ptr */
d -= 2; /* eat the first char and the - */
- max = (U8)d[1]; /* last char in range */
-
- for (i = (U8)*d; i <= max; i++)
- *d++ = i;
+ min = (U8)*d; /* first char in range */
+ max = (U8)d[1]; /* last char in range */
+
+#ifndef ASCIIish
+ if ((isLOWER(min) && isLOWER(max)) ||
+ (isUPPER(min) && isUPPER(max))) {
+ if (isLOWER(min)) {
+ for (i = min; i <= max; i++)
+ if (isLOWER(i))
+ *d++ = i;
+ } else {
+ for (i = min; i <= max; i++)
+ if (isUPPER(i))
+ *d++ = i;
+ }
+ }
+ else
+#endif
+ for (i = min; i <= max; i++)
+ *d++ = i;
/* mark the range as done, and continue */
dorange = FALSE;