summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2012-02-22 16:52:31 -0700
committerKarl Williamson <public@khwilliamson.com>2012-02-22 16:52:31 -0700
commit586dfbd6e5ee94dd476a41d816c18f4ba1ebaa73 (patch)
tree5a8938a3ef8e15f2e1e394d33bf15450b905549c
parent39ec54a59ce332fc44e553f4e5eeceef88e8369e (diff)
downloadperl-smoke-me/khw-quotemeta.tar.gz
-rw-r--r--charclass_invlists.h82
-rw-r--r--embed.fnc2
-rw-r--r--embed.h2
-rw-r--r--lib/Unicode/UCD.pm7
-rw-r--r--proto.h14
-rw-r--r--regcomp.c24
-rw-r--r--regen/mk_invlists.pl21
-rw-r--r--utf8.c12
8 files changed, 135 insertions, 29 deletions
diff --git a/charclass_invlists.h b/charclass_invlists.h
index 152793a2e6..e327525497 100644
--- a/charclass_invlists.h
+++ b/charclass_invlists.h
@@ -532,4 +532,86 @@ UV XPosixXDigit_invlist[] = {
65351
};
+STATIC UV _Perl_QuoteMeta_invlist[] = {
+ 74, /* Number of elements */
+ 0, /* Current iteration position */
+ 1064334010, /* Version and data structure type */
+ 0, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
+ 48,
+ 58,
+ 65,
+ 91,
+ 95,
+ 96,
+ 97,
+ 123,
+ 168,
+ 169,
+ 170,
+ 171,
+ 175,
+ 176,
+ 178,
+ 182,
+ 183,
+ 187,
+ 188,
+ 191,
+ 192,
+ 215,
+ 216,
+ 247,
+ 248,
+ 847,
+ 848,
+ 4447,
+ 4449,
+ 5760,
+ 5761,
+ 6068,
+ 6070,
+ 6155,
+ 6159,
+ 8192,
+ 8255,
+ 8257,
+ 8276,
+ 8277,
+ 8304,
+ 8592,
+ 9312,
+ 9472,
+ 10102,
+ 10132,
+ 11264,
+ 11776,
+ 11904,
+ 12288,
+ 12292,
+ 12296,
+ 12321,
+ 12336,
+ 12337,
+ 12644,
+ 12645,
+ 64830,
+ 64832,
+ 65024,
+ 65040,
+ 65093,
+ 65095,
+ 65279,
+ 65280,
+ 65440,
+ 65441,
+ 65520,
+ 65529,
+ 119155,
+ 119163,
+ 917504,
+ 921600,
+ 0
+};
+
/* ex: set ro: */
diff --git a/embed.fnc b/embed.fnc
index a7e004fe26..43eda858af 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -608,6 +608,8 @@ p |UV |_to_fold_latin1|const U8 c|NN U8 *p|NN STRLEN *lenp|const bool flags
#endif
#if defined(PERL_IN_UTF8_C) || defined(PERL_IN_PP_C)
p |UV |_to_upper_title_latin1|const U8 c|NN U8 *p|NN STRLEN *lenp|const char S_or_s
+#endif
+#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_PP_C)
ApR |bool |_is_utf8_quotemeta|NN const U8 *p
#endif
Ap |UV |to_uni_lower |UV c|NN U8 *p|NN STRLEN *lenp
diff --git a/embed.h b/embed.h
index 541309e42e..2163f714fb 100644
--- a/embed.h
+++ b/embed.h
@@ -781,7 +781,7 @@
#define warn_nocontext Perl_warn_nocontext
#define warner_nocontext Perl_warner_nocontext
#endif
-#if defined(PERL_IN_UTF8_C) || defined(PERL_IN_PP_C)
+#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_PP_C)
#define _is_utf8_quotemeta(a) Perl__is_utf8_quotemeta(aTHX_ a)
#endif
#if defined(PERL_MAD)
diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm
index 6cefe977a0..9e8e8f3b82 100644
--- a/lib/Unicode/UCD.pm
+++ b/lib/Unicode/UCD.pm
@@ -6,7 +6,7 @@ no warnings 'surrogate'; # surrogates can be inputs to this
use charnames ();
use Unicode::Normalize qw(getCombinClass NFD);
-our $VERSION = '0.41';
+our $VERSION = '0.42';
use Storable qw(dclone);
@@ -1949,8 +1949,9 @@ properties, and will return C<undef> if called with one of those.
our %loose_defaults;
our $MAX_UNICODE_CODEPOINT;
-sub prop_invlist ($) {
+sub prop_invlist ($;$) {
my $prop = $_[0];
+ my $internal_ok = defined $_[1] && $_[1] eq 'perl_core_internal_ok';
return if ! defined $prop;
require "utf8_heavy.pl";
@@ -1967,7 +1968,7 @@ sub prop_invlist ($) {
|| ref $swash eq ""
|| $swash->{'BITS'} != 1
|| $swash->{'USER_DEFINED'}
- || $prop =~ /^\s*_/;
+ || (! $internal_ok && $prop =~ /^\s*_/);
if ($swash->{'EXTRAS'}) {
carp __PACKAGE__, "::prop_invlist: swash returned for $prop unexpectedly has EXTRAS magic";
diff --git a/proto.h b/proto.h
index f01e7c3cdf..5ae99e509f 100644
--- a/proto.h
+++ b/proto.h
@@ -6554,6 +6554,14 @@ STATIC I32 S_study_chunk(pTHX_ struct RExC_state_t *pRExC_state, regnode **scanp
assert(pRExC_state); assert(scanp); assert(minlenp); assert(deltap); assert(last)
#endif
+#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_PP_C)
+PERL_CALLCONV bool Perl__is_utf8_quotemeta(pTHX_ const U8 *p)
+ __attribute__warn_unused_result__
+ __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT__IS_UTF8_QUOTEMETA \
+ assert(p)
+
+#endif
#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C)
PERL_CALLCONV SV* Perl__core_swash_init(pTHX_ const char* pkg, const char* name, SV* listsv, I32 minbits, I32 none, bool return_if_undef, SV* invlist, bool passed_in_invlist_has_user_defined_property)
__attribute__nonnull__(pTHX_1)
@@ -7154,12 +7162,6 @@ STATIC U8 S_to_lower_latin1(pTHX_ const U8 c, U8 *p, STRLEN *lenp)
#endif
#if defined(PERL_IN_UTF8_C) || defined(PERL_IN_PP_C)
-PERL_CALLCONV bool Perl__is_utf8_quotemeta(pTHX_ const U8 *p)
- __attribute__warn_unused_result__
- __attribute__nonnull__(pTHX_1);
-#define PERL_ARGS_ASSERT__IS_UTF8_QUOTEMETA \
- assert(p)
-
PERL_CALLCONV UV Perl__to_upper_title_latin1(pTHX_ const U8 c, U8 *p, STRLEN *lenp, const char S_or_s)
__attribute__nonnull__(pTHX_2)
__attribute__nonnull__(pTHX_3);
diff --git a/regcomp.c b/regcomp.c
index dd5a37c118..dd47209bac 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -6406,6 +6406,30 @@ S__new_invlist_C_array(pTHX_ UV* list)
return invlist;
}
+#ifndef PERL_IN_XSUB_RE
+bool
+Perl__is_utf8_quotemeta(pTHX_ const U8 *p)
+{
+ /* For exclusive use of pp_quotemeta() */
+
+ dVAR;
+
+ PERL_ARGS_ASSERT__IS_UTF8_QUOTEMETA;
+ if (! PL_utf8_quotemeta) {
+ PL_utf8_quotemeta = _core_swash_init("utf8", "",
+ &PL_sv_undef,
+ 1, /* binary */
+ 0, /* not tr/// */
+ FALSE, /* XXX fail if undefined */
+ _new_invlist_C_array(_Perl_QuoteMeta_invlist),
+ FALSE /* Not user-defined */
+ );
+ }
+ return swash_fetch(PL_utf8_quotemeta, p, TRUE /* is utf8 */)
+ != 0;
+}
+#endif
+
STATIC void
S_invlist_extend(pTHX_ SV* const invlist, const UV new_max)
{
diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl
index 8102c29cd3..88f0b0a6f0 100644
--- a/regen/mk_invlists.pl
+++ b/regen/mk_invlists.pl
@@ -23,9 +23,10 @@ my $out_fh = open_new('charclass_invlists.h', '>',
print $out_fh "/* See the generating file for comments */\n\n";
-sub output_invlist ($$) {
+sub output_invlist ($$$) {
my $name = shift;
my $invlist = shift; # Reference to inversion list array
+ my $read_only = shift;
# Output the inversion list $invlist using the name $name for it.
# It is output in the exact internal form for inversion lists.
@@ -47,7 +48,9 @@ sub output_invlist ($$) {
$zero_or_one = 1;
}
- print $out_fh "\nUV ${name}_invlist[] = {\n";
+ print $out_fh "\n";
+ print $out_fh "STATIC " if $read_only;
+ print $out_fh "UV ${name}_invlist[] = {\n";
print $out_fh "\t", scalar @$invlist, ",\t/* Number of elements */\n";
print $out_fh "\t0,\t/* Current iteration position */\n";
@@ -68,8 +71,8 @@ sub output_invlist ($$) {
print $out_fh "};\n";
}
-output_invlist("Latin1", [ 0, 256 ]);
-output_invlist("AboveLatin1", [ 256 ]);
+output_invlist("Latin1", [ 0, 256 ], 0);
+output_invlist("AboveLatin1", [ 256 ], 0);
# We construct lists for all the POSIX and backslash sequence character
# classes in two forms:
@@ -122,9 +125,13 @@ for my $prop (qw(
L1PosixWord
PosixXDigit
XPosixXDigit
+ RO__Perl_QuoteMeta
)
) {
+ my $property = $prop;
+ my $read_only = $property =~ s/^RO_//;
+
# For the Latin1 properties, we change to use the eXtended version of the
# base property, then go through the result and get rid of everything not
# in Latin1 (above 255). Actually, we retain the element that crosses the
@@ -133,11 +140,11 @@ for my $prop (qw(
# artifically cutting that off at 256 because 256 is the first code point
# above Latin1, we let the range go to its natural ending. That gives us
# extra information with no added space taken.
- my $lookup_prop = $prop;
+ my $lookup_prop = $property;
$lookup_prop =~ s/^L1Posix/XPosix/ or $lookup_prop =~ s/^L1//;
my @invlist = prop_invlist($lookup_prop);
- if ($lookup_prop ne $prop) {
+ if ($lookup_prop ne $property) {
for my $i (0 .. @invlist - 1 - 1) {
if ($invlist[$i] > 255) {
splice @invlist, $i+1;
@@ -146,7 +153,7 @@ for my $prop (qw(
}
}
- output_invlist($prop, \@invlist);
+ output_invlist($property, \@invlist, $read_only);
}
read_only_bottom_close_and_rename($out_fh)
diff --git a/utf8.c b/utf8.c
index 2b1e99b23a..2a5d93e193 100644
--- a/utf8.c
+++ b/utf8.c
@@ -2029,18 +2029,6 @@ Perl_is_utf8_X_LV_LVT_V(pTHX_ const U8 *p)
return is_utf8_common(p, &PL_utf8_X_LV_LVT_V, "_X_LV_LVT_V");
}
-bool
-Perl__is_utf8_quotemeta(pTHX_ const U8 *p)
-{
- /* For exclusive use of pp_quotemeta() */
-
- dVAR;
-
- PERL_ARGS_ASSERT__IS_UTF8_QUOTEMETA;
-
- return is_utf8_common(p, &PL_utf8_quotemeta, "_Perl_Quotemeta");
-}
-
/*
=for apidoc to_utf8_case