summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--embed.fnc7
-rw-r--r--embed.h7
-rw-r--r--embedvar.h7
-rw-r--r--intrpvar.h7
-rw-r--r--proto.h42
-rw-r--r--regen/unicode_constants.pl3
-rw-r--r--regexec.c78
-rw-r--r--sv.c8
-rw-r--r--unicode_constants.h3
-rw-r--r--utf8.c100
10 files changed, 27 insertions, 235 deletions
diff --git a/embed.fnc b/embed.fnc
index ab2cdec4b3..756f7c14c7 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -661,16 +661,9 @@ ApR |bool |is_utf8_punct |NN const U8 *p
ApR |bool |is_utf8_xdigit |NN const U8 *p
ApR |bool |is_utf8_mark |NN const U8 *p
EXpR |bool |is_utf8_X_extend |NN const U8 *p
-EXpR |bool |is_utf8_X_prepend |NN const U8 *p
EXpR |bool |is_utf8_X_regular_begin|NN const U8 *p
-EXpR |bool |is_utf8_X_special_begin|NN const U8 *p
-EXpR |bool |is_utf8_X_L |NN const U8 *p
-EXpR |bool |is_utf8_X_RI |NN const U8 *p
:not currently used EXpR |bool |is_utf8_X_LV |NN const U8 *p
EXpR |bool |is_utf8_X_LVT |NN const U8 *p
-EXpR |bool |is_utf8_X_LV_LVT_V |NN const U8 *p
-EXpR |bool |is_utf8_X_T |NN const U8 *p
-EXpR |bool |is_utf8_X_V |NN const U8 *p
: Used in perly.y
p |OP* |jmaybe |NN OP *o
: Used in pp.c
diff --git a/embed.h b/embed.h
index 45291f0983..33d732d54d 100644
--- a/embed.h
+++ b/embed.h
@@ -855,16 +855,9 @@
#define _is_utf8__perl_idstart(a) Perl__is_utf8__perl_idstart(aTHX_ a)
#define av_reify(a) Perl_av_reify(aTHX_ a)
#define current_re_engine() Perl_current_re_engine(aTHX)
-#define is_utf8_X_L(a) Perl_is_utf8_X_L(aTHX_ a)
#define is_utf8_X_LVT(a) Perl_is_utf8_X_LVT(aTHX_ a)
-#define is_utf8_X_LV_LVT_V(a) Perl_is_utf8_X_LV_LVT_V(aTHX_ a)
-#define is_utf8_X_RI(a) Perl_is_utf8_X_RI(aTHX_ a)
-#define is_utf8_X_T(a) Perl_is_utf8_X_T(aTHX_ a)
-#define is_utf8_X_V(a) Perl_is_utf8_X_V(aTHX_ a)
#define is_utf8_X_extend(a) Perl_is_utf8_X_extend(aTHX_ a)
-#define is_utf8_X_prepend(a) Perl_is_utf8_X_prepend(aTHX_ a)
#define is_utf8_X_regular_begin(a) Perl_is_utf8_X_regular_begin(aTHX_ a)
-#define is_utf8_X_special_begin(a) Perl_is_utf8_X_special_begin(aTHX_ a)
#define op_clear(a) Perl_op_clear(aTHX_ a)
#define qerror(a) Perl_qerror(aTHX_ a)
#define reg_named_buff(a,b,c,d) Perl_reg_named_buff(aTHX_ a,b,c,d)
diff --git a/embedvar.h b/embedvar.h
index 877e81161f..d3eeaf03a0 100644
--- a/embedvar.h
+++ b/embedvar.h
@@ -353,16 +353,9 @@
#define PL_unitcheckav_save (vTHX->Iunitcheckav_save)
#define PL_unlockhook (vTHX->Iunlockhook)
#define PL_unsafe (vTHX->Iunsafe)
-#define PL_utf8_X_L (vTHX->Iutf8_X_L)
#define PL_utf8_X_LVT (vTHX->Iutf8_X_LVT)
-#define PL_utf8_X_LV_LVT_V (vTHX->Iutf8_X_LV_LVT_V)
-#define PL_utf8_X_RI (vTHX->Iutf8_X_RI)
-#define PL_utf8_X_T (vTHX->Iutf8_X_T)
-#define PL_utf8_X_V (vTHX->Iutf8_X_V)
#define PL_utf8_X_extend (vTHX->Iutf8_X_extend)
-#define PL_utf8_X_prepend (vTHX->Iutf8_X_prepend)
#define PL_utf8_X_regular_begin (vTHX->Iutf8_X_regular_begin)
-#define PL_utf8_X_special_begin (vTHX->Iutf8_X_special_begin)
#define PL_utf8_alnum (vTHX->Iutf8_alnum)
#define PL_utf8_alpha (vTHX->Iutf8_alpha)
#define PL_utf8_blank (vTHX->Iutf8_blank)
diff --git a/intrpvar.h b/intrpvar.h
index 94b7425c10..641cac6268 100644
--- a/intrpvar.h
+++ b/intrpvar.h
@@ -628,14 +628,7 @@ PERLVAR(I, utf8_xdigit, SV *)
PERLVAR(I, utf8_mark, SV *)
PERLVAR(I, utf8_X_regular_begin, SV *)
PERLVAR(I, utf8_X_extend, SV *)
-PERLVAR(I, utf8_X_prepend, SV *)
-PERLVAR(I, utf8_X_special_begin, SV *)
-PERLVAR(I, utf8_X_L, SV *)
PERLVAR(I, utf8_X_LVT, SV *)
-PERLVAR(I, utf8_X_RI, SV *)
-PERLVAR(I, utf8_X_T, SV *)
-PERLVAR(I, utf8_X_V, SV *)
-PERLVAR(I, utf8_X_LV_LVT_V, SV *)
PERLVAR(I, utf8_toupper, SV *)
PERLVAR(I, utf8_totitle, SV *)
PERLVAR(I, utf8_tolower, SV *)
diff --git a/proto.h b/proto.h
index f97fe1fbae..187b0ae5aa 100644
--- a/proto.h
+++ b/proto.h
@@ -1764,66 +1764,24 @@ PERL_CALLCONV bool Perl_is_uni_xdigit_lc(pTHX_ UV c)
__attribute__warn_unused_result__
__attribute__pure__;
-PERL_CALLCONV bool Perl_is_utf8_X_L(pTHX_ const U8 *p)
- __attribute__warn_unused_result__
- __attribute__nonnull__(pTHX_1);
-#define PERL_ARGS_ASSERT_IS_UTF8_X_L \
- assert(p)
-
PERL_CALLCONV bool Perl_is_utf8_X_LVT(pTHX_ const U8 *p)
__attribute__warn_unused_result__
__attribute__nonnull__(pTHX_1);
#define PERL_ARGS_ASSERT_IS_UTF8_X_LVT \
assert(p)
-PERL_CALLCONV bool Perl_is_utf8_X_LV_LVT_V(pTHX_ const U8 *p)
- __attribute__warn_unused_result__
- __attribute__nonnull__(pTHX_1);
-#define PERL_ARGS_ASSERT_IS_UTF8_X_LV_LVT_V \
- assert(p)
-
-PERL_CALLCONV bool Perl_is_utf8_X_RI(pTHX_ const U8 *p)
- __attribute__warn_unused_result__
- __attribute__nonnull__(pTHX_1);
-#define PERL_ARGS_ASSERT_IS_UTF8_X_RI \
- assert(p)
-
-PERL_CALLCONV bool Perl_is_utf8_X_T(pTHX_ const U8 *p)
- __attribute__warn_unused_result__
- __attribute__nonnull__(pTHX_1);
-#define PERL_ARGS_ASSERT_IS_UTF8_X_T \
- assert(p)
-
-PERL_CALLCONV bool Perl_is_utf8_X_V(pTHX_ const U8 *p)
- __attribute__warn_unused_result__
- __attribute__nonnull__(pTHX_1);
-#define PERL_ARGS_ASSERT_IS_UTF8_X_V \
- assert(p)
-
PERL_CALLCONV bool Perl_is_utf8_X_extend(pTHX_ const U8 *p)
__attribute__warn_unused_result__
__attribute__nonnull__(pTHX_1);
#define PERL_ARGS_ASSERT_IS_UTF8_X_EXTEND \
assert(p)
-PERL_CALLCONV bool Perl_is_utf8_X_prepend(pTHX_ const U8 *p)
- __attribute__warn_unused_result__
- __attribute__nonnull__(pTHX_1);
-#define PERL_ARGS_ASSERT_IS_UTF8_X_PREPEND \
- assert(p)
-
PERL_CALLCONV bool Perl_is_utf8_X_regular_begin(pTHX_ const U8 *p)
__attribute__warn_unused_result__
__attribute__nonnull__(pTHX_1);
#define PERL_ARGS_ASSERT_IS_UTF8_X_REGULAR_BEGIN \
assert(p)
-PERL_CALLCONV bool Perl_is_utf8_X_special_begin(pTHX_ const U8 *p)
- __attribute__warn_unused_result__
- __attribute__nonnull__(pTHX_1);
-#define PERL_ARGS_ASSERT_IS_UTF8_X_SPECIAL_BEGIN \
- assert(p)
-
PERL_CALLCONV bool Perl_is_utf8_alnum(pTHX_ const U8 *p)
__attribute__warn_unused_result__
__attribute__nonnull__(pTHX_1);
diff --git a/regen/unicode_constants.pl b/regen/unicode_constants.pl
index 73ec4ae0bf..56e53491b1 100644
--- a/regen/unicode_constants.pl
+++ b/regen/unicode_constants.pl
@@ -127,9 +127,6 @@ __DATA__
03C5 first
03C5 tail
-1100
-1160
-11A8
2010 string
007F native
diff --git a/regexec.c b/regexec.c
index f51d50d612..4e9b80cc54 100644
--- a/regexec.c
+++ b/regexec.c
@@ -145,14 +145,7 @@
/* No asserts are done for some of these, in case called on a */ \
/* Unicode version in which they map to nothing */ \
LOAD_UTF8_CHARCLASS(X_regular_begin, HYPHEN_UTF8); \
- LOAD_UTF8_CHARCLASS_NO_CHECK(X_special_begin); \
LOAD_UTF8_CHARCLASS(X_extend, COMBINING_GRAVE_ACCENT_UTF8); \
- LOAD_UTF8_CHARCLASS_NO_CHECK(X_prepend);/* empty in most releases*/ \
- LOAD_UTF8_CHARCLASS(X_L, HANGUL_CHOSEONG_KIYEOK_UTF8); \
- LOAD_UTF8_CHARCLASS(X_LV_LVT_V, HANGUL_JUNGSEONG_FILLER_UTF8); \
- LOAD_UTF8_CHARCLASS_NO_CHECK(X_RI); /* empty in many releases */ \
- LOAD_UTF8_CHARCLASS(X_T, HANGUL_JONGSEONG_KIYEOK_UTF8); \
- LOAD_UTF8_CHARCLASS(X_V, HANGUL_JUNGSEONG_FILLER_UTF8)
#define PLACEHOLDER /* Something for the preprocessor to grab onto */
@@ -4058,6 +4051,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
locinput += 2;
}
else {
+ STRLEN len;
+
/* In case have to backtrack to beginning, then match '.' */
char *starting = locinput;
@@ -4066,16 +4061,12 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
LOAD_UTF8_CHARCLASS_GCB();
- /* Match (prepend)*, but don't bother trying if empty (as
- * being set to _undef indicates) */
- if (PL_utf8_X_prepend != &PL_sv_undef) {
- while (locinput < PL_regeol
- && swash_fetch(PL_utf8_X_prepend,
- (U8*)locinput, utf8_target))
- {
- previous_prepend = locinput;
- locinput += UTF8SKIP(locinput);
- }
+ /* Match (prepend)* */
+ while (locinput < PL_regeol
+ && (len = is_GCB_Prepend_utf8(locinput)))
+ {
+ previous_prepend = locinput;
+ locinput += len;
}
/* As noted above, if we matched a prepend character, but
@@ -4085,8 +4076,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
&& (locinput >= PL_regeol
|| (! swash_fetch(PL_utf8_X_regular_begin,
(U8*)locinput, utf8_target)
- && ! swash_fetch(PL_utf8_X_special_begin,
- (U8*)locinput, utf8_target)))
+ && ! is_GCB_SPECIAL_BEGIN_utf8(locinput)))
)
{
locinput = previous_prepend;
@@ -4101,9 +4091,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
(U8*)locinput, utf8_target)) {
locinput += UTF8SKIP(locinput);
}
- else if (! swash_fetch(PL_utf8_X_special_begin,
- (U8*)locinput, utf8_target))
- {
+ else if (! is_GCB_SPECIAL_BEGIN_utf8(locinput)) {
/* Here did not match the required 'Begin' in the
* second term. So just match the very first
@@ -4115,26 +4103,20 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
/* Here is a special begin. It can be composed of
* several individual characters. One possibility is
* RI+ */
- if (swash_fetch(PL_utf8_X_RI,
- (U8*)locinput, utf8_target))
- {
- locinput += UTF8SKIP(locinput);
+ if ((len = is_GCB_RI_utf8(locinput))) {
+ locinput += len;
while (locinput < PL_regeol
- && swash_fetch(PL_utf8_X_RI,
- (U8*)locinput, utf8_target))
+ && (len = is_GCB_RI_utf8(locinput)))
{
- locinput += UTF8SKIP(locinput);
+ locinput += len;
}
- } else /* Another possibility is T+ */
- if (swash_fetch(PL_utf8_X_T,
- (U8*)locinput, utf8_target))
- {
- locinput += UTF8SKIP(locinput);
+ } else if ((len = is_GCB_T_utf8(locinput))) {
+ /* Another possibility is T+ */
+ locinput += len;
while (locinput < PL_regeol
- && swash_fetch(PL_utf8_X_T,
- (U8*)locinput, utf8_target))
+ && (len = is_GCB_T_utf8(locinput)))
{
- locinput += UTF8SKIP(locinput);
+ locinput += len;
}
} else {
@@ -4145,10 +4127,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
/* Match L* */
while (locinput < PL_regeol
- && swash_fetch(PL_utf8_X_L,
- (U8*)locinput, utf8_target))
+ && (len = is_GCB_L_utf8(locinput)))
{
- locinput += UTF8SKIP(locinput);
+ locinput += len;
}
/* Here, have exhausted L*. If the next character
@@ -4158,8 +4139,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
* Are done. */
if (locinput < PL_regeol
- && swash_fetch(PL_utf8_X_LV_LVT_V,
- (U8*)locinput, utf8_target))
+ && is_GCB_LV_LVT_V_utf8(locinput))
{
/* Otherwise keep going. Must be LV, LVT or V.
@@ -4172,22 +4152,18 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
* V* */
locinput += UTF8SKIP(locinput);
while (locinput < PL_regeol
- && swash_fetch(PL_utf8_X_V,
- (U8*)locinput,
- utf8_target))
+ && (len = is_GCB_V_utf8(locinput)))
{
- locinput += UTF8SKIP(locinput);
+ locinput += len;
}
}
/* And any of LV, LVT, or V can be followed
- * by T* */
+ * by T* */
while (locinput < PL_regeol
- && swash_fetch(PL_utf8_X_T,
- (U8*)locinput,
- utf8_target))
+ && (len = is_GCB_T_utf8(locinput)))
{
- locinput += UTF8SKIP(locinput);
+ locinput += len;
}
}
}
diff --git a/sv.c b/sv.c
index 497417c53a..a757ad2b0e 100644
--- a/sv.c
+++ b/sv.c
@@ -13376,15 +13376,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
PL_utf8_mark = sv_dup_inc(proto_perl->Iutf8_mark, param);
PL_utf8_X_regular_begin = sv_dup_inc(proto_perl->Iutf8_X_regular_begin, param);
PL_utf8_X_extend = sv_dup_inc(proto_perl->Iutf8_X_extend, param);
- PL_utf8_X_prepend = sv_dup_inc(proto_perl->Iutf8_X_prepend, param);
- PL_utf8_X_special_begin = sv_dup_inc(proto_perl->Iutf8_X_special_begin, param);
- PL_utf8_X_L = sv_dup_inc(proto_perl->Iutf8_X_L, param);
- /*not currently used: PL_utf8_X_LV = sv_dup_inc(proto_perl->Iutf8_X_LV, param);*/
PL_utf8_X_LVT = sv_dup_inc(proto_perl->Iutf8_X_LVT, param);
- PL_utf8_X_RI = sv_dup_inc(proto_perl->Iutf8_X_RI, param);
- PL_utf8_X_T = sv_dup_inc(proto_perl->Iutf8_X_T, param);
- PL_utf8_X_V = sv_dup_inc(proto_perl->Iutf8_X_V, param);
- PL_utf8_X_LV_LVT_V = sv_dup_inc(proto_perl->Iutf8_X_LV_LVT_V, param);
PL_utf8_toupper = sv_dup_inc(proto_perl->Iutf8_toupper, param);
PL_utf8_totitle = sv_dup_inc(proto_perl->Iutf8_totitle, param);
PL_utf8_tolower = sv_dup_inc(proto_perl->Iutf8_tolower, param);
diff --git a/unicode_constants.h b/unicode_constants.h
index f915d59b8e..82081d910a 100644
--- a/unicode_constants.h
+++ b/unicode_constants.h
@@ -30,9 +30,6 @@
#define GREEK_SMALL_LETTER_UPSILON_UTF8_FIRST_BYTE 0xCF /* U+03C5 */
#define GREEK_SMALL_LETTER_UPSILON_UTF8_TAIL "\x85" /* U+03C5 */
-#define HANGUL_CHOSEONG_KIYEOK_UTF8 "\xE1\x84\x80" /* U+1100 */
-#define HANGUL_JUNGSEONG_FILLER_UTF8 "\xE1\x85\xA0" /* U+1160 */
-#define HANGUL_JONGSEONG_KIYEOK_UTF8 "\xE1\x86\xA8" /* U+11A8 */
#define HYPHEN_UTF8 "\xE2\x80\x90" /* U+2010 */
#define DELETE_NATIVE 0x007F /* U+007F */
diff --git a/utf8.c b/utf8.c
index 2172d311b4..88ca041eee 100644
--- a/utf8.c
+++ b/utf8.c
@@ -2229,76 +2229,6 @@ Perl_is_utf8_X_extend(pTHX_ const U8 *p)
return is_utf8_common(p, &PL_utf8_X_extend, "_X_Extend");
}
-bool
-Perl_is_utf8_X_prepend(pTHX_ const U8 *p)
-{
- /* If no code points in the Unicode version being worked on match
- * GCB=Prepend, this will set PL_utf8_X_prepend to &PL_sv_undef during its
- * first call. Otherwise, it will set it to a swash created for it.
- * swash_fetch() hence can't be used without checking first if it is valid
- * to do so. */
-
- dVAR;
- bool initialized = cBOOL(PL_utf8_X_prepend);
- bool ret;
-
- PERL_ARGS_ASSERT_IS_UTF8_X_PREPEND;
-
- if (PL_utf8_X_prepend == &PL_sv_undef) {
- return FALSE;
- }
-
- if ((ret = is_utf8_common(p, &PL_utf8_X_prepend, "_X_GCB_Prepend"))
- || initialized)
- {
- return ret;
- }
-
- /* Here the code point being checked was not a prepend, and we hadn't
- * initialized PL_utf8_X_prepend, so we don't know if it is just this
- * particular input code point that didn't match, or if the table is
- * completely empty. The is_utf8_common() call did the initialization, so
- * we can inspect the swash's inversion list to find out. If there are no
- * elements in its inversion list, it's empty, and nothing will ever match,
- * so set things up so we can skip the check in future calls. */
- if (_invlist_len(_get_swash_invlist(PL_utf8_X_prepend)) == 0) {
- SvREFCNT_dec(PL_utf8_X_prepend);
- PL_utf8_X_prepend = &PL_sv_undef;
- }
-
- return FALSE;
-}
-
-bool
-Perl_is_utf8_X_special_begin(pTHX_ const U8 *p)
-{
- dVAR;
-
- PERL_ARGS_ASSERT_IS_UTF8_X_SPECIAL_BEGIN;
-
- return is_utf8_common(p, &PL_utf8_X_special_begin, "_X_Special_Begin");
-}
-
-bool
-Perl_is_utf8_X_L(pTHX_ const U8 *p)
-{
- dVAR;
-
- PERL_ARGS_ASSERT_IS_UTF8_X_L;
-
- return is_utf8_common(p, &PL_utf8_X_L, "_X_GCB_L");
-}
-
-bool
-Perl_is_utf8_X_RI(pTHX_ const U8 *p)
-{
- dVAR;
-
- PERL_ARGS_ASSERT_IS_UTF8_X_RI;
-
- return is_utf8_common(p, &PL_utf8_X_RI, "_X_RI");
-}
-
/* These constants are for finding GCB=LV and GCB=LVT. These are for the
* pre-composed Hangul syllables, which are all in a contiguous block and
* arranged there in such a way so as to facilitate alorithmic determination of
@@ -2367,35 +2297,6 @@ Perl_is_utf8_X_LVT(pTHX_ const U8 *p)
&& (cp - SBASE) % TCount != 0); /* All but every TCount one is LV */
}
-bool
-Perl_is_utf8_X_T(pTHX_ const U8 *p)
-{
- dVAR;
-
- PERL_ARGS_ASSERT_IS_UTF8_X_T;
-
- return is_utf8_common(p, &PL_utf8_X_T, "_X_GCB_T");
-}
-
-bool
-Perl_is_utf8_X_V(pTHX_ const U8 *p)
-{
- dVAR;
-
- PERL_ARGS_ASSERT_IS_UTF8_X_V;
-
- return is_utf8_common(p, &PL_utf8_X_V, "_X_GCB_V");
-}
-
-bool
-Perl_is_utf8_X_LV_LVT_V(pTHX_ const U8 *p)
-{
- dVAR;
-
- PERL_ARGS_ASSERT_IS_UTF8_X_LV_LVT_V;
-
- return is_utf8_common(p, &PL_utf8_X_LV_LVT_V, "_X_LV_LVT_V");
-}
bool
Perl__is_utf8_quotemeta(pTHX_ const U8 *p)
@@ -2408,7 +2309,6 @@ Perl__is_utf8_quotemeta(pTHX_ const U8 *p)
return is_utf8_common(p, &PL_utf8_quotemeta, "_Perl_Quotemeta");
}
-
/*
=for apidoc to_utf8_case