summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGurusamy Sarathy <gsar@cpan.org>1999-10-14 18:15:11 +0000
committerGurusamy Sarathy <gsar@cpan.org>1999-10-14 18:15:11 +0000
commitb969bfaafad703cf1922804c46bc20822388650b (patch)
tree83f991f4a4c42120940f15ebf5e39eb769ab1d9e
parent44d40e1ddd92ecf5fa8d8034ee60e61897f46c8d (diff)
parentc7a6a1a52721830cdeb59a9590209efc8a486204 (diff)
downloadperl-b969bfaafad703cf1922804c46bc20822388650b.tar.gz
integrate cfgperl contents into mainline
p4raw-id: //depot/perl@4377
-rw-r--r--MANIFEST1
-rw-r--r--ext/DB_File/hints/sco.pl2
-rw-r--r--hints/aix.sh32
-rw-r--r--hints/linux.sh9
-rw-r--r--hints/svr5.sh195
-rw-r--r--pod/perldelta.pod17
-rw-r--r--pod/perldiag.pod12
-rw-r--r--pod/perlop.pod2
-rw-r--r--pod/perlre.pod8
-rw-r--r--regcomp.c677
-rw-r--r--t/op/re_tests14
-rw-r--r--t/pragma/warn/regcomp68
12 files changed, 597 insertions, 440 deletions
diff --git a/MANIFEST b/MANIFEST
index 9669df8d34..aebd662964 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -207,6 +207,7 @@ ext/DB_File/DB_File_BS Berkeley DB extension mkbootstrap fodder
ext/DB_File/Makefile.PL Berkeley DB extension makefile writer
ext/DB_File/dbinfo Berkeley DB database version checker
ext/DB_File/hints/dynixptx.pl Hint for DB_File for named architecture
+ext/DB_File/hints/sco.pl Hint for DB_File for named architecture
ext/DB_File/typemap Berkeley DB extension interface types
ext/DB_File/version.c Berkeley DB extension interface version check
ext/Data/Dumper/Changes Data pretty printer, changelog
diff --git a/ext/DB_File/hints/sco.pl b/ext/DB_File/hints/sco.pl
new file mode 100644
index 0000000000..ff60440949
--- /dev/null
+++ b/ext/DB_File/hints/sco.pl
@@ -0,0 +1,2 @@
+# osr5 needs to explicitly link against libc to pull in some static symbols
+$self->{LIBS} = ['-ldb -lc'] if $Config{'osvers'} =~ '3\.2v5\.0\..' ;
diff --git a/hints/aix.sh b/hints/aix.sh
index bd1d859c0f..60ca22a74f 100644
--- a/hints/aix.sh
+++ b/hints/aix.sh
@@ -230,29 +230,15 @@ EOCBU
# terminateAndUnload() which work correctly with C++ statics while libc
# load() and unload() do not. See ext/DynaLoader/dl_aix.xs.
# The C-to-C_r switch is done by usethreads.cbu, if needed.
-if test -f /lib/libC.a; then
- case "$cc" in
- xlC*)
- # Cify libswanted for xlC.
- set `echo X "$libswanted "| sed -e 's/ c / C /'`
- shift
- libswanted="$*"
- # Cify lddlflags for xlC.
- set `echo X "$lddlflags "| sed -e 's/ -lc / -lC /'`
- shift
- lddlflags="$*"
- ;;
- *)
- # Cify libswanted for non-xlC.
- set `echo X "$libswanted "| sed -e 's/ c / c C /'`
- shift
- libswanted="$*"
- # Cify lddlflags for non-xlC.
- set `echo X "$lddlflags "| sed -e 's/ -lc / -lc -lC /'`
- shift
- lddlflags="$*"
- ;;
- esac
+if test -f /lib/libC.a -a X"$gccversion" = X; then
+ # Cify libswanted.
+ set `echo X "$libswanted "| sed -e 's/ c / C c /'`
+ shift
+ libswanted="$*"
+ # Cify lddlflags.
+ set `echo X "$lddlflags "| sed -e 's/ -lc / -lC -lc /'`
+ shift
+ lddlflags="$*"
fi
# EOF
diff --git a/hints/linux.sh b/hints/linux.sh
index c1172caba2..e9af509ab5 100644
--- a/hints/linux.sh
+++ b/hints/linux.sh
@@ -258,6 +258,15 @@ fi
#'osfmach3ppc') ccdlflags='-Wl,-E' ;;
#esac
+case "`uname -r`" in
+sparc-linux)
+ case "$cccdlflags" in
+ *-fpic*) cccdlflags="`echo $cccdlflags|sed 's/-fpic/-fPIC/'`" ;;
+ *) cccdlflags="$cccdlflags -fPIC" ;;
+ esac
+ ;;
+esac
+
# This script UU/usethreads.cbu will get 'called-back' by Configure
# after it has prompted the user for whether to use threads.
cat > UU/usethreads.cbu <<'EOCBU'
diff --git a/hints/svr5.sh b/hints/svr5.sh
index 44c03c9fc9..6a76ea5406 100644
--- a/hints/svr5.sh
+++ b/hints/svr5.sh
@@ -1,57 +1,72 @@
-# svr5 hints, System V Release 5.x
-# Last modified 1999/09/21 by Boyd Gerber, gerberb@zenez.com
+# svr5 hints, System V Release 5.x (UnixWare 7)
+# Reworked by hops@sco.com Sept 1999 for better platform support
+# Boyd Gerber, gerberb@zenez.com 1999/09/21 for threads support.
+# Originally taken from svr4 hints.sh 21-Sep-98 hops@sco.com
+# which was version of 1996/10/25 by Tye McQueen, tye@metronet.com
+# Use Configure -Dusethreads to enable threads.
# Use Configure -Dcc=gcc to use gcc.
case "$cc" in
'') cc='/bin/cc'
test -f $cc || cc='/usr/ccs/bin/cc'
;;
- *)
- case "$gccversion" in
- *2.95*)
- ccflags='-fno-strict-aliasing'
- ;;
- *);;
+*gcc*)
+ # "$gccversion" not set yet
+ vers=`gcc -v 2>&1 | sed -n -e 's@.*version \([^ ][^ ]*\) .*@\1@p'`
+ case $vers in
+ *2.95*)
+ ccflags='-fno-strict-aliasing'
+ # More optimisation provided in gcc-2.95 causes miniperl to segv.
+ # -fno-strict-aliasing is supposed to correct this but
+ # if it doesn't and you get segv when the build runs miniperl then
+ # disable optimisation as below
+ # optimize=' '
+ ;;
esac
- ;;
+ ;;
esac
-# want_ucb=''
-# want_dbm='yes'
-want_gdbm='yes'
-
-# We include support for using libraries in /usr/ucblib, but the setting
-# of libswanted excludes some libraries found there. If you run into
-# problems, you may have to remove "ucb" from libswanted. Just delete
-# the comment '#' from the sed command below.
-# ldflags='-L/usr/ccs/lib -L/usr/ucblib'
-# ccflags='-I/usr/include -I/usr/ucbinclude'
-# Don't use problematic libraries:
-libswanted=`echo " $libswanted " | sed -e 's/ malloc / /'` # -e 's/ ucb / /'`
-# libmalloc.a - Probably using Perl's malloc() anyway.
-# libucb.a - Remove it if you have problems ld'ing. We include it because
-# it is needed for ODBM_File and NDBM_File extensions.
+# Hardwire the processor to 586 for consistancy with autoconf
+# archname='i586-svr5'
+# -- seems this is generally disliked by perl porters so leave it to float
+
+# Our default setup excludes anything from /usr/ucblib (and consequently dbm)
+# as later modules assume symbols found are available in shared libs
+# On svr5 these are static archives which causes problems for
+# dynamic modules loaded later (and ucblib is a bad dream anyway)
+#
+# However there is a dbm library built from the ucb sources outside ucblib
+# at http://www.sco.com/skunkware (installing into /usr/local) so if we
+# detect this we'll use it. You can change the default
+# (to allow ucblib and its dbm or disallowing non ucb dbm) by
+# changing 'want_*' config values below to '' to disable or otherwise to enable
+
+# Leave leading tabs so Configure doesn't propagate variables to config.sh
+
+ want_ucb='' # don't use anything from /usr/ucblib - icky
+ want_dbm='yes' # use dbm if can find library in /usr/local/lib
+ want_gdbm='yes' # use gdbm if can find library in /usr/local/lib
+ want_udk70='' # link with old static libc pieces
+ # link with udk70 if want resulting binary to run on uw7.0*
+ # - it will link in referenced static symbols of libc that are (now)
+ # in the shared libc.so on 7.1 but were not in 7.0.
+ # There are still scenarios where this is still insufficient so
+ # overall it is preferable to get ptf7051e
+ # ftp://ftp.sco.com/SLS/ptf7051e.Z
+ # installed on any/all 7.0 systems.
if [ "$want_ucb" ] ; then
- ldflags= '-L/usr/ccs/lib -L/usr/ucblib'
- ccflags='-I/usr/include -I/usr/ucbinclude'
- if [ -r /usr/ucblib/libucb.a ]; then # If using BSD-compat. library:
- d_Gconvert='gcvt((x),(n),(b))' # Try gcvt() before gconvert().
- # Use the "native" counterparts, not the BSD emulation stuff:
- d_bcmp='undef' d_bcopy='undef' d_bzero='undef' d_safebcpy='undef'
- d_index='undef' d_killpg='undef' d_getprior='undef' d_setprior='undef'
- d_setlinebuf='undef'
- # d_setregid='undef' d_setreuid='undef' # ???
- fi
+ ldflags= '-L/usr/ucblib'
+ ccflags='-I/usr/ucbinclude'
+ # /usr/ccs/include and /usr/ccs/lib are used implicitly by cc as reqd
else
-# libswanted=`echo " $libswanted " | sed -e 's/ ucb / /' -e 's/ dbm / /'`
libswanted=`echo " $libswanted " | sed -e 's/ ucb / /'`
glibpth=`echo " $glibpth " | sed -e 's/ \/usr\/ucblib / /'`
- # a non ucb native version of libdbm for /usr/local is available from
- # http://www.sco.com/skunkware
- # if its installed (and not overidden) we'll use it.
+ # If see libdbm in /usr/local and not overidden assume its the
+ # non ucblib rebuild from skunkware and use it
if [ ! -f /usr/local/lib/libdbm.so -o ! "$want_dbm" ] ; then
+ i_dbm='undef'
libswanted=`echo " $libswanted " | sed -e 's/ dbm / /'`
fi
fi
@@ -63,38 +78,32 @@ else
libswanted=`echo " $libswanted " | sed -e 's/ gdbm / /'`
fi
+
# Don't use problematic libraries:
# libmalloc.a - Probably using Perl's malloc() anyway.
-# libc: on UW7 don't want -lc explicitly - cc gives warnings/errors
+# libc: on UW7 don't want -lc explicitly as native cc gives warnings/errors
libswanted=`echo " $libswanted " | sed -e 's/ malloc / /' -e 's/ c / /'`
+# Don't use irrelevant (but existing) lib dirs
+# don't want /usr/gnu/lib - original(older) system supplied distrib of perl5
+loclibpth=`echo " $loclibpth " | sed -e 's@ /usr/gnu/lib @ @'`
+
# remove /shlib and /lib from library search path as both symlink to /usr/lib
# where runtime shared libc is
glibpth=`echo " $glibpth " | sed -e 's/ \/shlib / /' -e 's/ \/lib / /`
-# UnixWare has /usr/lib/libc.so.1, /usr/lib/libc.so.1.1, and
-# /usr/ccs/lib/libc.so. Configure chooses libc.so.1.1 while it
-# appears that /usr/ccs/lib/libc.so contains more symbols:
-#
-# Try the following if you want to use nm-extraction. We'll just
-# skip the nm-extraction phase, since searching for all the different
-# library versions will be hard to keep up-to-date.
-#
-# if [ "" = "$libc" -a -f /usr/ccs/lib/libc.so -a \
-# -f /usr/lib/libc.so.1 -a -f /usr/lib/libc.so.1.1 ]; then
-# if nm -h /usr/ccs/lib/libc.so | egrep '\<_?select$' >/dev/null; then
-# if nm -h /usr/lib/libc.so.1 | egrep '\<_?select$'` >/dev/null ||
-# nm -h /usr/lib/libc.so.1.1 | egrep '\<_?select$'` >/dev/null; then
-# :
-# else
-# libc=/usr/ccs/lib/libc.so
-# fi
-# fi
-# fi
-#
-# Don't bother with nm. Just compile & link a small C program.
+# Don't use BSD emulation pieces (/usr/ucblib) regardless
+# these would probably be autonondetected anyway but ...
+d_Gconvert='gcvt((x),(n),(b))' # Try gcvt() before gconvert().
+d_bcopy='undef' d_bcmp='undef' d_bzero='undef' d_safebcpy='undef'
+d_index='undef' d_killpg='undef' d_getprior='undef' d_setprior='undef'
+d_setlinebuf='undef'
+d_setregid='undef' d_setreuid='undef' # -- in /usr/lib/libc.so.1
+
+
+# use nm to probe libs - its fast enough on uw7
case "$usenm" in
-'') usenm=false;;
+'') usenm=true;;
esac
# Broken C-Shell tests (Thanks to Tye McQueen):
@@ -107,18 +116,34 @@ if [ "$sh_cnt" -ne "$csh_cnt" ]; then
d_csh='undef'
fi
-# Unixware-specific problems. The undocumented -X argument to uname
-# is probably a reasonable way of detecting UnixWare.
+# Unixware-specific problems. UW7 give correctname with uname -s
# UnixWare has a broken csh. (This might already be detected above).
# Configure can't detect memcpy or memset on Unixware 2 or 7
#
# Leave leading tabs on the next two lines so Configure doesn't
# propagate these variables to config.sh
uw_ver=`uname -v`
- uw_isuw=`uname -X 2>&1 | grep Release`
+ uw_isuw=`uname -s 2>&1`
-if [ "$uw_isuw" = "Release = 5" ]; then
+if [ "$uw_isuw" = "UnixWare" ]; then
case $uw_ver in
+ 7.1*)
+ d_csh='undef'
+ d_memcpy='define'
+ d_memset='define'
+ stdio_cnt='((fp)->__cnt)'
+ d_stdio_cnt_lval='define'
+ stdio_ptr='((fp)->__ptr)'
+ d_stdio_ptr_lval='define'
+
+ d_bcopy='define' # In /usr/lib/libc.so.1
+ d_setregid='define' # "
+ d_setreuid='define' # "
+
+ if [ -f /usr/ccs/lib/libcudk70.a -a "$want_udk70" ] ; then
+ libswanted=" $libswanted cudk70"
+ fi
+ ;;
7*)
d_csh='undef'
d_memcpy='define'
@@ -130,6 +155,7 @@ if [ "$uw_isuw" = "Release = 5" ]; then
;;
esac
fi
+# End of Unixware-specific tests.
###############################################################
# Dynamic loading section:
@@ -150,9 +176,10 @@ case "$cc" in
cccdlflags='-fpic'
lddlflags='-G -L/usr/local/lib'
;;
+
*)
ccdlflags='-Wl,-Bexport -L/usr/local/lib'
- cccdlflags='-KPIC'
+ cccdlflags='-Kpic'
lddlflags='-G -Wl,-Bexport -L/usr/local/lib'
;;
esac
@@ -163,29 +190,10 @@ usedl='define'
dlext='so'
dlsrc='dl_dlopen.xs'
-# Configure may fail to find lstat() since it's a static/inline function
-# in <sys/stat.h> on Unisys U6000 SVR4, UnixWare 2.x, and possibly other
-# SVR4 derivatives. (Though UnixWare has it in /usr/ccs/lib/libc.so.)
-d_lstat=define
-
-
-# DDE SMES Supermax Enterprise Server
-case "`uname -sm`" in
-"UNIX_SV SMES")
- # the *grent functions are in libgen.
- libswanted="$libswanted gen"
- # csh is broken (also) in SMES
- # This may already be detected by the generic test above.
- d_csh='undef'
- case "$cc" in
- *gcc*) ;;
- *) # for cc we need -K PIC (not -K pic)
- cccdlflags="$cccdlflags -K PIC"
- ;;
- esac
- ;;
-esac
+############################################################################
+# Thread support
+# use Configure -Dusethreads to enable
# This script UU/usethreads.cbu will get 'called-back' by Configure
# after it has prompted the user for whether to use threads.
cat > UU/usethreads.cbu <<'EOCBU'
@@ -206,17 +214,14 @@ $define|true|[yY]*)
ccdlflags='-Kthread -Wl,-Bexport -L/usr/local/lib'
cccdlflags='-KPIC -Kthread'
lddlflags='-G -Kthread -Wl,-Bexport -L/usr/local/lib'
- ldflags='-Kthread -L/usr/local/lib -L/usr/gnu/lib'
+ ldflags='-Kthread -L/usr/local/lib'
;;
esac
esac
EOCBU
-# End of Unixware-specific tests.
-# Configure may fail to find lstat() since it's a static/inline function
-# in <sys/stat.h> on Unisys U6000 SVR4, UnixWare 2.x, and possibly other
-# SVR4 derivatives. (Though UnixWare has it in /usr/ccs/lib/libc.so.)
-d_lstat=define
-d_suidsafe='define' # "./Configure -d" can't figure this out easilly
+# Just in case Configure fails to find lstat() Its in /usr/lib/libc.so.1.
+d_lstat=define
+d_suidsafe='define' # "./Configure -d" can't figure this out easily
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index 9af933bfb0..b4d4d217de 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -15,8 +15,8 @@ This document describes differences between the 5.005 release and this one.
=head2 Perl Source Incompatibilities
-Beware that any new warnings that have been added are B<not> considered
-incompatible changes.
+Beware that any new warnings that have been added or enhanced old
+warnings are B<not> considered incompatible changes.
Since all new warnings must be explicitly requested via the C<-w>
switch or the C<warnings> pragma, it is ultimately the programmer's
@@ -1342,7 +1342,7 @@ A tutorial on managing class data for object modules.
=back
-=head1 New Diagnostics
+=head1 New or Changed Diagnostics
=over 4
@@ -1561,6 +1561,13 @@ See Server error.
(F) While under the C<use filetest> pragma, switching the real and
effective uids or gids failed.
+=item false [] range "%s" in regexp
+
+(W) A character class range must start and end at a literal character, not
+another character class like C<\d> or C<[:alpha:]>. The "-" in your false
+range is interpreted as a literal "-". Consider quoting the "-", "\-".
+See L<perlre>.
+
=item Filehandle %s opened only for output
(W) You tried to read from a filehandle opened only for writing. If you
@@ -1624,6 +1631,10 @@ by Perl or by a user-supplied handler. See L<attributes>.
The indicated attributes for a subroutine or variable were not recognized
by Perl or by a user-supplied handler. See L<attributes>.
+=item invalid [] range "%s" in regexp
+
+The offending range is now explicitly displayed.
+
=item Invalid separator character %s in attribute list
(F) Something other than a comma or whitespace was seen between the
diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index 527f73bbe6..11758e0e88 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -1380,6 +1380,13 @@ the effect of blessing the reference into the package main. This is
usually not what you want. Consider providing a default target
package, e.g. bless($ref, $p || 'MyPackage');
+=item false [] range "%s" in regexp
+
+(W) A character class range must start and end at a literal character, not
+another character class like C<\d> or C<[:alpha:]>. The "-" in your false
+range is interpreted as a literal "-". Consider quoting the "-", "\-".
+See L<perlre>.
+
=item Fatal VMS error at %s, line %d
(P) An error peculiar to VMS. Something untoward happened in a VMS system
@@ -1680,11 +1687,10 @@ by Perl or by a user-supplied handler. See L<attributes>.
The indicated attributes for a subroutine or variable were not recognized
by Perl or by a user-supplied handler. See L<attributes>.
-=item invalid [] range in regexp
+=item invalid [] range "%s" in regexp
(F) The range specified in a character class had a minimum character
-greater than the maximum character, or the range didn't start/end with
-a literal character. See L<perlre>.
+greater than the maximum character. See L<perlre>.
=item Invalid conversion in %s: "%s"
diff --git a/pod/perlop.pod b/pod/perlop.pod
index 01074b3096..e563888c9c 100644
--- a/pod/perlop.pod
+++ b/pod/perlop.pod
@@ -173,7 +173,7 @@ $_. The return value indicates the success of the operation. (If the
right argument is an expression rather than a search pattern,
substitution, or transliteration, it is interpreted as a search pattern at run
time. This can be is less efficient than an explicit search, because the
-pattern must be compiled every time the expression is evaluated.
+pattern must be compiled every time the expression is evaluated).
Binary "!~" is just like "=~" except the return value is negated in
the logical sense.
diff --git a/pod/perlre.pod b/pod/perlre.pod
index 9a06305629..1610254da5 100644
--- a/pod/perlre.pod
+++ b/pod/perlre.pod
@@ -185,8 +185,9 @@ Use C<\w+> to match a string of Perl-identifier characters (which isn't
the same as matching an English word). If C<use locale> is in effect, the
list of alphabetic characters generated by C<\w> is taken from the
current locale. See L<perllocale>. You may use C<\w>, C<\W>, C<\s>, C<\S>,
-C<\d>, and C<\D> within character classes (though not as either end of
-a range). See L<utf8> for details about C<\pP>, C<\PP>, and C<\X>.
+C<\d>, and C<\D> within character classes, but if you try to use them
+as endpoints of a range, that's not a range, the "-" is understood literally.
+See L<utf8> for details about C<\pP>, C<\PP>, and C<\X>.
The POSIX character class syntax
@@ -940,6 +941,9 @@ at the start or end of the list, or escape it with a backslash. (The
following all specify the same class of three characters: C<[-az]>,
C<[az-]>, and C<[a\-z]>. All are different from C<[a-z]>, which
specifies a class containing twenty-six characters.)
+Also, if you try to use the character classes C<\w>, C<\W>, C<\s>,
+C<\S>, C<\d>, or C<\D> as endpoints of a range, that's not a range,
+the "-" is understood literally.
Note also that the whole range idea is rather unportable between
character sets--and even within character sets they may cause results
diff --git a/regcomp.c b/regcomp.c
index 99423e199d..0dafdd0b9b 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -2300,6 +2300,7 @@ S_regclass(pTHX)
register I32 def;
I32 numlen;
I32 namedclass;
+ char *rangebegin;
s = opnd = MASK(PL_regcode);
ret = reg_node(ANYOF);
@@ -2329,6 +2330,8 @@ S_regclass(pTHX)
while (PL_regcomp_parse < PL_regxend && *PL_regcomp_parse != ']') {
skipcond:
namedclass = OOB_NAMEDCLASS;
+ if (!range)
+ rangebegin = PL_regcomp_parse;
value = UCHARAT(PL_regcomp_parse++);
if (value == '[')
namedclass = regpposixcc(value);
@@ -2363,264 +2366,293 @@ S_regclass(pTHX)
break;
}
}
- if (!SIZE_ONLY && namedclass > OOB_NAMEDCLASS) {
- if (range)
- FAIL("invalid [] range in regexp"); /* [a-\w], [a-[:word:]] */
- switch (namedclass) {
- case ANYOF_ALNUM:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_ALNUM);
- else {
- for (value = 0; value < 256; value++)
- if (isALNUM(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_NALNUM:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NALNUM);
- else {
- for (value = 0; value < 256; value++)
- if (!isALNUM(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_SPACE:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_SPACE);
- else {
- for (value = 0; value < 256; value++)
- if (isSPACE(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_NSPACE:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NSPACE);
- else {
- for (value = 0; value < 256; value++)
- if (!isSPACE(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_DIGIT:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_DIGIT);
- else {
- for (value = '0'; value <= '9'; value++)
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_NDIGIT:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NDIGIT);
- else {
- for (value = 0; value < '0'; value++)
- ANYOF_BITMAP_SET(opnd, value);
- for (value = '9' + 1; value < 256; value++)
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_NALNUMC:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NALNUMC);
- else {
- for (value = 0; value < 256; value++)
- if (!isALNUMC(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_ALNUMC:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_ALNUMC);
- else {
- for (value = 0; value < 256; value++)
- if (isALNUMC(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_ALPHA:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_ALPHA);
- else {
- for (value = 0; value < 256; value++)
- if (isALPHA(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_NALPHA:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NALPHA);
- else {
- for (value = 0; value < 256; value++)
- if (!isALPHA(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_ASCII:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_ASCII);
- else {
- for (value = 0; value < 128; value++)
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_NASCII:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NASCII);
- else {
- for (value = 128; value < 256; value++)
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_CNTRL:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_CNTRL);
- else {
- for (value = 0; value < 256; value++)
- if (isCNTRL(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- lastvalue = OOB_CHAR8;
- break;
- case ANYOF_NCNTRL:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NCNTRL);
- else {
- for (value = 0; value < 256; value++)
- if (!isCNTRL(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_GRAPH:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_GRAPH);
- else {
- for (value = 0; value < 256; value++)
- if (isGRAPH(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_NGRAPH:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NGRAPH);
- else {
- for (value = 0; value < 256; value++)
- if (!isGRAPH(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_LOWER:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_LOWER);
- else {
- for (value = 0; value < 256; value++)
- if (isLOWER(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_NLOWER:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NLOWER);
- else {
- for (value = 0; value < 256; value++)
- if (!isLOWER(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_PRINT:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_PRINT);
- else {
- for (value = 0; value < 256; value++)
- if (isPRINT(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_NPRINT:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NPRINT);
- else {
- for (value = 0; value < 256; value++)
- if (!isPRINT(value))
- ANYOF_BITMAP_SET(opnd, value);
+ if (namedclass > OOB_NAMEDCLASS) {
+ if (range) { /* a-\d, a-[:digit:] */
+ if (!SIZE_ONLY) {
+ if (ckWARN(WARN_UNSAFE))
+ Perl_warner(aTHX_ WARN_UNSAFE,
+ "/%.127s/: false [] range \"%*.*s\" in regexp",
+ PL_regprecomp,
+ PL_regcomp_parse - rangebegin,
+ PL_regcomp_parse - rangebegin,
+ rangebegin);
+ ANYOF_BITMAP_SET(opnd, lastvalue);
+ ANYOF_BITMAP_SET(opnd, '-');
}
- break;
- case ANYOF_PUNCT:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_PUNCT);
- else {
- for (value = 0; value < 256; value++)
- if (isPUNCT(value))
+ range = 0; /* this is not a true range */
+ }
+ if (!SIZE_ONLY) {
+ switch (namedclass) {
+ case ANYOF_ALNUM:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_ALNUM);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isALNUM(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_NALNUM:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_NALNUM);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isALNUM(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_SPACE:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_SPACE);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isSPACE(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_NSPACE:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_NSPACE);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isSPACE(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_DIGIT:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_DIGIT);
+ else {
+ for (value = '0'; value <= '9'; value++)
ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_NPUNCT:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NPUNCT);
- else {
- for (value = 0; value < 256; value++)
- if (!isPUNCT(value))
+ }
+ break;
+ case ANYOF_NDIGIT:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_NDIGIT);
+ else {
+ for (value = 0; value < '0'; value++)
ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_UPPER:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_UPPER);
- else {
- for (value = 0; value < 256; value++)
- if (isUPPER(value))
+ for (value = '9' + 1; value < 256; value++)
ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_NUPPER:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NUPPER);
- else {
- for (value = 0; value < 256; value++)
- if (!isUPPER(value))
+ }
+ break;
+ case ANYOF_NALNUMC:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_NALNUMC);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isALNUMC(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_ALNUMC:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_ALNUMC);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isALNUMC(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_ALPHA:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_ALPHA);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isALPHA(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_NALPHA:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_NALPHA);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isALPHA(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_ASCII:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_ASCII);
+ else {
+ for (value = 0; value < 128; value++)
ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_XDIGIT:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_XDIGIT);
- else {
- for (value = 0; value < 256; value++)
- if (isXDIGIT(value))
+ }
+ break;
+ case ANYOF_NASCII:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_NASCII);
+ else {
+ for (value = 128; value < 256; value++)
ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_CNTRL:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_CNTRL);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isCNTRL(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ lastvalue = OOB_CHAR8;
+ break;
+ case ANYOF_NCNTRL:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_NCNTRL);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isCNTRL(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_GRAPH:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_GRAPH);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isGRAPH(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_NGRAPH:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_NGRAPH);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isGRAPH(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_LOWER:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_LOWER);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isLOWER(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_NLOWER:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_NLOWER);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isLOWER(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_PRINT:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_PRINT);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isPRINT(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_NPRINT:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_NPRINT);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isPRINT(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_PUNCT:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_PUNCT);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isPUNCT(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_NPUNCT:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_NPUNCT);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isPUNCT(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_UPPER:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_UPPER);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isUPPER(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_NUPPER:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_NUPPER);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isUPPER(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_XDIGIT:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_XDIGIT);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isXDIGIT(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_NXDIGIT:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_NXDIGIT);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isXDIGIT(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ default:
+ FAIL("invalid [::] class in regexp");
+ break;
}
- break;
- case ANYOF_NXDIGIT:
if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NXDIGIT);
- else {
- for (value = 0; value < 256; value++)
- if (!isXDIGIT(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- default:
- FAIL("invalid [::] class in regexp");
- break;
+ ANYOF_FLAGS(opnd) |= ANYOF_CLASS;
+ continue;
}
- if (LOC)
- ANYOF_FLAGS(opnd) |= ANYOF_CLASS;
- continue;
}
if (range) {
- if (lastvalue > value)
- FAIL("invalid [] range in regexp"); /* [b-a] */
+ if (lastvalue > value) /* b-a */ {
+ Perl_croak(aTHX_
+ "/%.127s/: invalid [] range \"%*.*s\" in regexp",
+ PL_regprecomp,
+ PL_regcomp_parse - rangebegin,
+ PL_regcomp_parse - rangebegin,
+ rangebegin);
+ }
range = 0;
}
else {
lastvalue = value;
if (*PL_regcomp_parse == '-' && PL_regcomp_parse+1 < PL_regxend &&
PL_regcomp_parse[1] != ']') {
- if (namedclass > OOB_NAMEDCLASS)
- FAIL("invalid [] range in regexp"); /* [\w-a] */
PL_regcomp_parse++;
- range = 1;
+ if (namedclass > OOB_NAMEDCLASS) { /* \w-, [:word:]- */
+ if (ckWARN(WARN_UNSAFE))
+ Perl_warner(aTHX_ WARN_UNSAFE,
+ "/%.127s/: false [] range \"%*.*s\" in regexp",
+ PL_regprecomp,
+ PL_regcomp_parse - rangebegin,
+ PL_regcomp_parse - rangebegin,
+ rangebegin);
+ if (!SIZE_ONLY)
+ ANYOF_BITMAP_SET(opnd, '-');
+ } else
+ range = 1;
continue; /* do it next time */
}
}
@@ -2682,6 +2714,7 @@ S_regclassutf8(pTHX)
SV *listsv;
U8 flags = 0;
I32 namedclass;
+ char *rangebegin;
if (*PL_regcomp_parse == '^') { /* Complement of range. */
PL_regnaughty++;
@@ -2705,9 +2738,10 @@ S_regclassutf8(pTHX)
while (PL_regcomp_parse < PL_regxend && *PL_regcomp_parse != ']') {
skipcond:
namedclass = OOB_NAMEDCLASS;
+ if (!range)
+ rangebegin = PL_regcomp_parse;
value = utf8_to_uv((U8*)PL_regcomp_parse, &numlen);
PL_regcomp_parse += numlen;
-
if (value == '[')
namedclass = regpposixcc(value);
else if (value == '\\') {
@@ -2776,86 +2810,117 @@ S_regclassutf8(pTHX)
break;
}
}
- if (!SIZE_ONLY && namedclass > OOB_NAMEDCLASS) {
- if (range)
- FAIL("invalid [] range in regexp"); /* [a-\w], [a-[:word:]] */
- switch (namedclass) {
- case ANYOF_ALNUM:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsWord\n"); break;
- case ANYOF_NALNUM:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsWord\n"); break;
- case ANYOF_ALNUMC:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlnum\n"); break;
- case ANYOF_NALNUMC:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlnum\n"); break;
- case ANYOF_ALPHA:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlpha\n"); break;
- case ANYOF_NALPHA:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlpha\n"); break;
- case ANYOF_ASCII:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsASCII\n"); break;
- case ANYOF_NASCII:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsASCII\n"); break;
- case ANYOF_CNTRL:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsCntrl\n"); break;
- case ANYOF_NCNTRL:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsCntrl\n"); break;
- case ANYOF_GRAPH:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsGraph\n"); break;
- case ANYOF_NGRAPH:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsGraph\n"); break;
- case ANYOF_DIGIT:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsDigit\n"); break;
- case ANYOF_NDIGIT:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsDigit\n"); break;
- case ANYOF_LOWER:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsLower\n"); break;
- case ANYOF_NLOWER:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsLower\n"); break;
- case ANYOF_PRINT:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPrint\n"); break;
- case ANYOF_NPRINT:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPrint\n"); break;
- case ANYOF_PUNCT:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPunct\n"); break;
- case ANYOF_NPUNCT:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPunct\n"); break;
- case ANYOF_SPACE:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpace\n"); break;
- case ANYOF_NSPACE:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpace\n"); break;
- case ANYOF_UPPER:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsUpper\n"); break;
- case ANYOF_NUPPER:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsUpper\n"); break;
- case ANYOF_XDIGIT:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsXDigit\n"); break;
- case ANYOF_NXDIGIT:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsXDigit\n"); break;
+ if (namedclass > OOB_NAMEDCLASS) {
+ if (range) { /* a-\d, a-[:digit:] */
+ if (!SIZE_ONLY) {
+ if (ckWARN(WARN_UNSAFE))
+ Perl_warner(aTHX_ WARN_UNSAFE,
+ "/%.127s/: false [] range \"%*.*s\" in regexp",
+ PL_regprecomp,
+ PL_regcomp_parse - rangebegin,
+ PL_regcomp_parse - rangebegin,
+ rangebegin);
+ Perl_sv_catpvf(aTHX_ listsv,
+ /* 0x002D is Unicode for '-' */
+ "%04"UVxf"\n002D\n", (UV)lastvalue);
+ }
+ range = 0;
+ }
+ if (!SIZE_ONLY) {
+ switch (namedclass) {
+ case ANYOF_ALNUM:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsWord\n"); break;
+ case ANYOF_NALNUM:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsWord\n"); break;
+ case ANYOF_ALNUMC:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlnum\n"); break;
+ case ANYOF_NALNUMC:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlnum\n"); break;
+ case ANYOF_ALPHA:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlpha\n"); break;
+ case ANYOF_NALPHA:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlpha\n"); break;
+ case ANYOF_ASCII:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsASCII\n"); break;
+ case ANYOF_NASCII:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsASCII\n"); break;
+ case ANYOF_CNTRL:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsCntrl\n"); break;
+ case ANYOF_NCNTRL:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsCntrl\n"); break;
+ case ANYOF_GRAPH:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsGraph\n"); break;
+ case ANYOF_NGRAPH:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsGraph\n"); break;
+ case ANYOF_DIGIT:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsDigit\n"); break;
+ case ANYOF_NDIGIT:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsDigit\n"); break;
+ case ANYOF_LOWER:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsLower\n"); break;
+ case ANYOF_NLOWER:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsLower\n"); break;
+ case ANYOF_PRINT:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPrint\n"); break;
+ case ANYOF_NPRINT:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPrint\n"); break;
+ case ANYOF_PUNCT:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPunct\n"); break;
+ case ANYOF_NPUNCT:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPunct\n"); break;
+ case ANYOF_SPACE:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpace\n"); break;
+ case ANYOF_NSPACE:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpace\n"); break;
+ case ANYOF_UPPER:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsUpper\n"); break;
+ case ANYOF_NUPPER:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsUpper\n"); break;
+ case ANYOF_XDIGIT:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsXDigit\n"); break;
+ case ANYOF_NXDIGIT:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsXDigit\n"); break;
+ }
+ continue;
}
- continue;
}
if (range) {
- if (lastvalue > value)
- FAIL("invalid [] range in regexp"); /* [b-a] */
- if (!SIZE_ONLY)
- Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n", (UV)lastvalue, (UV)value);
+ if (lastvalue > value) { /* b-a */
+ Perl_croak(aTHX_
+ "/%.127s/: invalid [] range \"%*.*s\" in regexp",
+ PL_regprecomp,
+ PL_regcomp_parse - rangebegin,
+ PL_regcomp_parse - rangebegin,
+ rangebegin);
+ }
range = 0;
}
else {
lastvalue = value;
if (*PL_regcomp_parse == '-' && PL_regcomp_parse+1 < PL_regxend &&
PL_regcomp_parse[1] != ']') {
- if (namedclass > OOB_NAMEDCLASS)
- FAIL("invalid [] range in regexp"); /* [\w-a] */
PL_regcomp_parse++;
- range = 1;
+ if (namedclass > OOB_NAMEDCLASS) { /* \w-, [:word:]- */
+ if (ckWARN(WARN_UNSAFE))
+ Perl_warner(aTHX_ WARN_UNSAFE,
+ "/%.127s/: false [] range \"%*.*s\" in regexp",
+ PL_regprecomp,
+ PL_regcomp_parse - rangebegin,
+ PL_regcomp_parse - rangebegin,
+ rangebegin);
+ if (!SIZE_ONLY)
+ Perl_sv_catpvf(aTHX_ listsv,
+ /* 0x002D is Unicode for '-' */
+ "002D\n");
+ } else
+ range = 1;
continue; /* do it next time */
}
}
/* now is the next time */
if (!SIZE_ONLY)
- Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n", (UV)value);
+ Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n",
+ (UV)lastvalue, (UV)value);
range = 0;
}
diff --git a/t/op/re_tests b/t/op/re_tests
index 695672da71..d72a0f73b2 100644
--- a/t/op/re_tests
+++ b/t/op/re_tests
@@ -45,7 +45,7 @@ a[b-d]e ace y $& ace
a[b-d] aac y $& ac
a[-b] a- y $& a-
a[b-] a- y $& a-
-a[b-a] - c - /a[b-a]/: invalid [] range in regexp
+a[b-a] - c - /a[b-a]/: invalid [] range "b-a" in regexp
a[]b - c - /a[]b/: unmatched [] in regexp
a[ - c - /a[/: unmatched [] in regexp
a] a] y $& a]
@@ -218,7 +218,7 @@ a[-]?c ac y $& ac
'a[b-d]'i AAC y $& AC
'a[-b]'i A- y $& A-
'a[b-]'i A- y $& A-
-'a[b-a]'i - c - /a[b-a]/: invalid [] range in regexp
+'a[b-a]'i - c - /a[b-a]/: invalid [] range "b-a" in regexp
'a[]b'i - c - /a[]b/: unmatched [] in regexp
'a['i - c - /a[/: unmatched [] in regexp
'a]'i A] y $& A]
@@ -735,8 +735,10 @@ foo.bart foo.bart y - -
.[X](.+)+[X][X] bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa n - -
.[X][X](.+)+[X] bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa n - -
tt+$ xxxtt y - -
-[a-\w] - c - /[a-\w]/: invalid [] range in regexp
-[\w-z] - c - /[\w-z]/: invalid [] range in regexp
-[0-[:digit:]] - c - /[0-[:digit:]]/: invalid [] range in regexp
-[[:digit:]-9] - c - /[[:digit:]-9]/: invalid [] range in regexp
+([a-\d]+) za-9z y $1 a-9
+([\d-z]+) a0-za y $1 0-z
+([\d-\s]+) a0- z y $1 0-
+([a-[:digit:]]+) za-9z y $1 a-9
+([[:digit:]-z]+) =0-z= y $1 0-z
+([[:digit:]-[:alpha:]]+) =0-z= y $1 0-z
\GX.*X aaaXbX n - -
diff --git a/t/pragma/warn/regcomp b/t/pragma/warn/regcomp
index 9c3677ee10..88909626db 100644
--- a/t/pragma/warn/regcomp
+++ b/t/pragma/warn/regcomp
@@ -15,8 +15,9 @@
Character class syntax [%c %c] belongs inside character classes [S_checkposixcc]
+ /%.127s/: false [] range \"%*.*s\" in regexp [S_regclass]
-
+ /%.127s/: false [] range \"%*.*s\" in regexp [S_regclassutf8]
__END__
# regcomp.c [S_regpiece]
@@ -73,3 +74,68 @@ Character class syntax [. .] is reserved for future extensions at - line 8.
Character class syntax [= =] is reserved for future extensions at - line 9.
Character class syntax [: :] belongs inside character classes at - line 10.
Character class [:zog:] unknown at - line 19.
+########
+# regcomp.c [S_regclass]
+$_ = "";
+use warnings 'unsafe' ;
+/[a-b]/;
+/[a-\d]/;
+/[\d-b]/;
+/[\s-\d]/;
+/[\d-\s]/;
+/[a-[:digit:]]/;
+/[[:digit:]-b]/;
+/[[:alpha:]-[:digit:]]/;
+/[[:digit:]-[:alpha:]]/;
+no warnings 'unsafe' ;
+/[a-b]/;
+/[a-\d]/;
+/[\d-b]/;
+/[\s-\d]/;
+/[\d-\s]/;
+/[a-[:digit:]]/;
+/[[:digit:]-b]/;
+/[[:alpha:]-[:digit:]]/;
+/[[:digit:]-[:alpha:]]/;
+EXPECT
+/[a-\d]/: false [] range "a-\d" in regexp at - line 5.
+/[\d-b]/: false [] range "\d-" in regexp at - line 6.
+/[\s-\d]/: false [] range "\s-" in regexp at - line 7.
+/[\d-\s]/: false [] range "\d-" in regexp at - line 8.
+/[a-[:digit:]]/: false [] range "a-[:digit:]" in regexp at - line 9.
+/[[:digit:]-b]/: false [] range "[:digit:]-" in regexp at - line 10.
+/[[:alpha:]-[:digit:]]/: false [] range "[:alpha:]-" in regexp at - line 11.
+/[[:digit:]-[:alpha:]]/: false [] range "[:digit:]-" in regexp at - line 12.
+########
+# regcomp.c [S_regclassutf8]
+use utf8;
+$_ = "";
+use warnings 'unsafe' ;
+/[a-b]/;
+/[a-\d]/;
+/[\d-b]/;
+/[\s-\d]/;
+/[\d-\s]/;
+/[a-[:digit:]]/;
+/[[:digit:]-b]/;
+/[[:alpha:]-[:digit:]]/;
+/[[:digit:]-[:alpha:]]/;
+no warnings 'unsafe' ;
+/[a-b]/;
+/[a-\d]/;
+/[\d-b]/;
+/[\s-\d]/;
+/[\d-\s]/;
+/[a-[:digit:]]/;
+/[[:digit:]-b]/;
+/[[:alpha:]-[:digit:]]/;
+/[[:digit:]-[:alpha:]]/;
+EXPECT
+/[a-\d]/: false [] range "a-\d" in regexp at - line 6.
+/[\d-b]/: false [] range "\d-" in regexp at - line 7.
+/[\s-\d]/: false [] range "\s-" in regexp at - line 8.
+/[\d-\s]/: false [] range "\d-" in regexp at - line 9.
+/[a-[:digit:]]/: false [] range "a-[:digit:]" in regexp at - line 10.
+/[[:digit:]-b]/: false [] range "[:digit:]-" in regexp at - line 11.
+/[[:alpha:]-[:digit:]]/: false [] range "[:alpha:]-" in regexp at - line 12.
+/[[:digit:]-[:alpha:]]/: false [] range "[:digit:]-" in regexp at - line 13.