diff options
author | Karl Williamson <public@khwilliamson.com> | 2012-02-22 16:52:31 -0700 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2012-02-22 16:52:31 -0700 |
commit | 586dfbd6e5ee94dd476a41d816c18f4ba1ebaa73 (patch) | |
tree | 5a8938a3ef8e15f2e1e394d33bf15450b905549c | |
parent | 39ec54a59ce332fc44e553f4e5eeceef88e8369e (diff) | |
download | perl-smoke-me/khw-quotemeta.tar.gz |
for smokingsmoke-me/khw-quotemeta
-rw-r--r-- | charclass_invlists.h | 82 | ||||
-rw-r--r-- | embed.fnc | 2 | ||||
-rw-r--r-- | embed.h | 2 | ||||
-rw-r--r-- | lib/Unicode/UCD.pm | 7 | ||||
-rw-r--r-- | proto.h | 14 | ||||
-rw-r--r-- | regcomp.c | 24 | ||||
-rw-r--r-- | regen/mk_invlists.pl | 21 | ||||
-rw-r--r-- | utf8.c | 12 |
8 files changed, 135 insertions, 29 deletions
diff --git a/charclass_invlists.h b/charclass_invlists.h index 152793a2e6..e327525497 100644 --- a/charclass_invlists.h +++ b/charclass_invlists.h @@ -532,4 +532,86 @@ UV XPosixXDigit_invlist[] = { 65351 }; +STATIC UV _Perl_QuoteMeta_invlist[] = { + 74, /* Number of elements */ + 0, /* Current iteration position */ + 1064334010, /* Version and data structure type */ + 0, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ + 48, + 58, + 65, + 91, + 95, + 96, + 97, + 123, + 168, + 169, + 170, + 171, + 175, + 176, + 178, + 182, + 183, + 187, + 188, + 191, + 192, + 215, + 216, + 247, + 248, + 847, + 848, + 4447, + 4449, + 5760, + 5761, + 6068, + 6070, + 6155, + 6159, + 8192, + 8255, + 8257, + 8276, + 8277, + 8304, + 8592, + 9312, + 9472, + 10102, + 10132, + 11264, + 11776, + 11904, + 12288, + 12292, + 12296, + 12321, + 12336, + 12337, + 12644, + 12645, + 64830, + 64832, + 65024, + 65040, + 65093, + 65095, + 65279, + 65280, + 65440, + 65441, + 65520, + 65529, + 119155, + 119163, + 917504, + 921600, + 0 +}; + /* ex: set ro: */ @@ -608,6 +608,8 @@ p |UV |_to_fold_latin1|const U8 c|NN U8 *p|NN STRLEN *lenp|const bool flags #endif #if defined(PERL_IN_UTF8_C) || defined(PERL_IN_PP_C) p |UV |_to_upper_title_latin1|const U8 c|NN U8 *p|NN STRLEN *lenp|const char S_or_s +#endif +#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_PP_C) ApR |bool |_is_utf8_quotemeta|NN const U8 *p #endif Ap |UV |to_uni_lower |UV c|NN U8 *p|NN STRLEN *lenp @@ -781,7 +781,7 @@ #define warn_nocontext Perl_warn_nocontext #define warner_nocontext Perl_warner_nocontext #endif -#if defined(PERL_IN_UTF8_C) || defined(PERL_IN_PP_C) +#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_PP_C) #define _is_utf8_quotemeta(a) Perl__is_utf8_quotemeta(aTHX_ a) #endif #if defined(PERL_MAD) diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm index 6cefe977a0..9e8e8f3b82 100644 --- a/lib/Unicode/UCD.pm +++ b/lib/Unicode/UCD.pm @@ -6,7 +6,7 @@ no warnings 'surrogate'; # surrogates can be inputs to this use charnames (); use Unicode::Normalize qw(getCombinClass NFD); -our $VERSION = '0.41'; +our $VERSION = '0.42'; use Storable qw(dclone); @@ -1949,8 +1949,9 @@ properties, and will return C<undef> if called with one of those. our %loose_defaults; our $MAX_UNICODE_CODEPOINT; -sub prop_invlist ($) { +sub prop_invlist ($;$) { my $prop = $_[0]; + my $internal_ok = defined $_[1] && $_[1] eq 'perl_core_internal_ok'; return if ! defined $prop; require "utf8_heavy.pl"; @@ -1967,7 +1968,7 @@ sub prop_invlist ($) { || ref $swash eq "" || $swash->{'BITS'} != 1 || $swash->{'USER_DEFINED'} - || $prop =~ /^\s*_/; + || (! $internal_ok && $prop =~ /^\s*_/); if ($swash->{'EXTRAS'}) { carp __PACKAGE__, "::prop_invlist: swash returned for $prop unexpectedly has EXTRAS magic"; @@ -6554,6 +6554,14 @@ STATIC I32 S_study_chunk(pTHX_ struct RExC_state_t *pRExC_state, regnode **scanp assert(pRExC_state); assert(scanp); assert(minlenp); assert(deltap); assert(last) #endif +#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_PP_C) +PERL_CALLCONV bool Perl__is_utf8_quotemeta(pTHX_ const U8 *p) + __attribute__warn_unused_result__ + __attribute__nonnull__(pTHX_1); +#define PERL_ARGS_ASSERT__IS_UTF8_QUOTEMETA \ + assert(p) + +#endif #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C) PERL_CALLCONV SV* Perl__core_swash_init(pTHX_ const char* pkg, const char* name, SV* listsv, I32 minbits, I32 none, bool return_if_undef, SV* invlist, bool passed_in_invlist_has_user_defined_property) __attribute__nonnull__(pTHX_1) @@ -7154,12 +7162,6 @@ STATIC U8 S_to_lower_latin1(pTHX_ const U8 c, U8 *p, STRLEN *lenp) #endif #if defined(PERL_IN_UTF8_C) || defined(PERL_IN_PP_C) -PERL_CALLCONV bool Perl__is_utf8_quotemeta(pTHX_ const U8 *p) - __attribute__warn_unused_result__ - __attribute__nonnull__(pTHX_1); -#define PERL_ARGS_ASSERT__IS_UTF8_QUOTEMETA \ - assert(p) - PERL_CALLCONV UV Perl__to_upper_title_latin1(pTHX_ const U8 c, U8 *p, STRLEN *lenp, const char S_or_s) __attribute__nonnull__(pTHX_2) __attribute__nonnull__(pTHX_3); @@ -6406,6 +6406,30 @@ S__new_invlist_C_array(pTHX_ UV* list) return invlist; } +#ifndef PERL_IN_XSUB_RE +bool +Perl__is_utf8_quotemeta(pTHX_ const U8 *p) +{ + /* For exclusive use of pp_quotemeta() */ + + dVAR; + + PERL_ARGS_ASSERT__IS_UTF8_QUOTEMETA; + if (! PL_utf8_quotemeta) { + PL_utf8_quotemeta = _core_swash_init("utf8", "", + &PL_sv_undef, + 1, /* binary */ + 0, /* not tr/// */ + FALSE, /* XXX fail if undefined */ + _new_invlist_C_array(_Perl_QuoteMeta_invlist), + FALSE /* Not user-defined */ + ); + } + return swash_fetch(PL_utf8_quotemeta, p, TRUE /* is utf8 */) + != 0; +} +#endif + STATIC void S_invlist_extend(pTHX_ SV* const invlist, const UV new_max) { diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl index 8102c29cd3..88f0b0a6f0 100644 --- a/regen/mk_invlists.pl +++ b/regen/mk_invlists.pl @@ -23,9 +23,10 @@ my $out_fh = open_new('charclass_invlists.h', '>', print $out_fh "/* See the generating file for comments */\n\n"; -sub output_invlist ($$) { +sub output_invlist ($$$) { my $name = shift; my $invlist = shift; # Reference to inversion list array + my $read_only = shift; # Output the inversion list $invlist using the name $name for it. # It is output in the exact internal form for inversion lists. @@ -47,7 +48,9 @@ sub output_invlist ($$) { $zero_or_one = 1; } - print $out_fh "\nUV ${name}_invlist[] = {\n"; + print $out_fh "\n"; + print $out_fh "STATIC " if $read_only; + print $out_fh "UV ${name}_invlist[] = {\n"; print $out_fh "\t", scalar @$invlist, ",\t/* Number of elements */\n"; print $out_fh "\t0,\t/* Current iteration position */\n"; @@ -68,8 +71,8 @@ sub output_invlist ($$) { print $out_fh "};\n"; } -output_invlist("Latin1", [ 0, 256 ]); -output_invlist("AboveLatin1", [ 256 ]); +output_invlist("Latin1", [ 0, 256 ], 0); +output_invlist("AboveLatin1", [ 256 ], 0); # We construct lists for all the POSIX and backslash sequence character # classes in two forms: @@ -122,9 +125,13 @@ for my $prop (qw( L1PosixWord PosixXDigit XPosixXDigit + RO__Perl_QuoteMeta ) ) { + my $property = $prop; + my $read_only = $property =~ s/^RO_//; + # For the Latin1 properties, we change to use the eXtended version of the # base property, then go through the result and get rid of everything not # in Latin1 (above 255). Actually, we retain the element that crosses the @@ -133,11 +140,11 @@ for my $prop (qw( # artifically cutting that off at 256 because 256 is the first code point # above Latin1, we let the range go to its natural ending. That gives us # extra information with no added space taken. - my $lookup_prop = $prop; + my $lookup_prop = $property; $lookup_prop =~ s/^L1Posix/XPosix/ or $lookup_prop =~ s/^L1//; my @invlist = prop_invlist($lookup_prop); - if ($lookup_prop ne $prop) { + if ($lookup_prop ne $property) { for my $i (0 .. @invlist - 1 - 1) { if ($invlist[$i] > 255) { splice @invlist, $i+1; @@ -146,7 +153,7 @@ for my $prop (qw( } } - output_invlist($prop, \@invlist); + output_invlist($property, \@invlist, $read_only); } read_only_bottom_close_and_rename($out_fh) @@ -2029,18 +2029,6 @@ Perl_is_utf8_X_LV_LVT_V(pTHX_ const U8 *p) return is_utf8_common(p, &PL_utf8_X_LV_LVT_V, "_X_LV_LVT_V"); } -bool -Perl__is_utf8_quotemeta(pTHX_ const U8 *p) -{ - /* For exclusive use of pp_quotemeta() */ - - dVAR; - - PERL_ARGS_ASSERT__IS_UTF8_QUOTEMETA; - - return is_utf8_common(p, &PL_utf8_quotemeta, "_Perl_Quotemeta"); -} - /* =for apidoc to_utf8_case |