summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>2017-09-12 16:28:42 +0000
committerph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>2017-09-12 16:28:42 +0000
commitf924d0df40a16b940cc181b6fd7598fa21309d4a (patch)
tree80102385042074089b05fdda89c1101abad9b851
parent3368f46ed7aa7d5667fb4d3ec6d93e6a46949e11 (diff)
downloadpcre2-f924d0df40a16b940cc181b6fd7598fa21309d4a.tar.gz
Replace multiple copies of extended grapheme sequence code with a single
subroutine. git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@858 6239d852-aaf2-0410-a92c-79f79f948069
-rw-r--r--CMakeLists.txt1
-rw-r--r--ChangeLog6
-rw-r--r--Makefile.am1
-rw-r--r--NON-AUTOTOOLS-BUILD4
-rwxr-xr-xPrepareRelease2
-rw-r--r--README3
-rw-r--r--src/pcre2_dfa_match.c213
-rw-r--r--src/pcre2_extuni.c129
-rw-r--r--src/pcre2_internal.h3
-rw-r--r--src/pcre2_match.c204
10 files changed, 163 insertions, 403 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index fbc37fe..7303dcc 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -432,6 +432,7 @@ SET(PCRE2_SOURCES
src/pcre2_convert.c
src/pcre2_dfa_match.c
src/pcre2_error.c
+ src/pcre2_extuni.c
src/pcre2_find_bracket.c
src/pcre2_jit_compile.c
src/pcre2_maketables.c
diff --git a/ChangeLog b/ChangeLog
index 2a49eef..1384320 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -5,7 +5,11 @@ Change Log for PCRE2
Version 10.31 xx-xxx-201x
-------------------------
-1. Fix typo (missing ]) in VMS code in pcre2test.c.
+1. Fix typo (missing ]) in VMS code in pcre2test.c.
+
+2. Replace the replicated code for matching extended Unicode grapheme sequences
+(which got a lot more complicated by change 10.30/49) by a single subroutine
+that is called by both pcre2_match() and pcre2_dfa_match().
Version 10.30 14-August-2017
diff --git a/Makefile.am b/Makefile.am
index 7fa98c5..7dbe569 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -351,6 +351,7 @@ COMMON_SOURCES = \
src/pcre2_convert.c \
src/pcre2_dfa_match.c \
src/pcre2_error.c \
+ src/pcre2_extuni.c \
src/pcre2_find_bracket.c \
src/pcre2_internal.h \
src/pcre2_intmodedep.h \
diff --git a/NON-AUTOTOOLS-BUILD b/NON-AUTOTOOLS-BUILD
index f8c6359..2898948 100644
--- a/NON-AUTOTOOLS-BUILD
+++ b/NON-AUTOTOOLS-BUILD
@@ -91,8 +91,10 @@ can skip ahead to the CMake section.
pcre2_compile.c
pcre2_config.c
pcre2_context.c
+ pcre2_convert.c
pcre2_dfa_match.c
pcre2_error.c
+ pcre2_extuni.c
pcre2_find_bracket.c
pcre2_jit_compile.c
pcre2_maketables.c
@@ -377,4 +379,4 @@ and executable, is in EBCDIC and native z/OS file formats and this is the
recommended download site.
=============================
-Last Updated: 17 March 2017
+Last Updated: 12 September 2017
diff --git a/PrepareRelease b/PrepareRelease
index 0cd4c96..9aa6b7d 100755
--- a/PrepareRelease
+++ b/PrepareRelease
@@ -196,8 +196,10 @@ files="\
src/pcre2_compile.c \
src/pcre2_config.c \
src/pcre2_context.c \
+ src/pcre2_convert.c \
src/pcre2_dfa_match.c \
src/pcre2_error.c \
+ src/pcre2_extuni.c \
src/pcre2_find_bracket.c \
src/pcre2_internal.h \
src/pcre2_intmodedep.h \
diff --git a/README b/README
index bed0513..2e376b0 100644
--- a/README
+++ b/README
@@ -773,6 +773,7 @@ The distribution should contain the files listed below.
src/pcre2_convert.c )
src/pcre2_dfa_match.c )
src/pcre2_error.c )
+ src/pcre2_extuni.c )
src/pcre2_find_bracket.c )
src/pcre2_jit_compile.c )
src/pcre2_jit_match.c ) sources for the functions in the library,
@@ -882,4 +883,4 @@ The distribution should contain the files listed below.
Philip Hazel
Email local part: ph10
Email domain: cam.ac.uk
-Last updated: 18 July 2017
+Last updated: 12 September 2017
diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c
index 5ae1394..b78ad07 100644
--- a/src/pcre2_dfa_match.c
+++ b/src/pcre2_dfa_match.c
@@ -1364,63 +1364,14 @@ for (;;)
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
if (clen > 0)
{
- uint32_t lgb, rgb;
- PCRE2_SPTR nptr = ptr + clen;
int ncount = 0;
if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
{
active_count--; /* Remove non-match possibility */
next_active_state--;
}
- lgb = UCD_GRAPHBREAK(c);
- while (nptr < end_subject)
- {
- dlen = 1;
- if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
- rgb = UCD_GRAPHBREAK(d);
- if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
-
- /* Not breaking between Regional Indicators is allowed only if
- there are an even number of preceding RIs. */
-
- if (lgb == ucp_gbRegionalIndicator &&
- rgb == ucp_gbRegionalIndicator)
- {
- int ricount = 0;
- PCRE2_SPTR bptr = nptr - 1;
-#ifdef SUPPORT_UNICODE
- if (utf) BACKCHAR(bptr);
-#endif
- /* bptr is pointing to the left-hand character */
-
- while (bptr > mb->start_subject)
- {
- bptr--;
-#ifdef SUPPORT_UNICODE
- if (utf)
- {
- BACKCHAR(bptr);
- GETCHAR(d, bptr);
- }
- else
-#endif
- d = *bptr;
- if (UCD_GRAPHBREAK(d) != ucp_gbRegionalIndicator) break;
- ricount++;
- }
- if ((ricount & 1) != 0) break; /* Grapheme break required */
- }
-
- /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
- any number of Extend before a following E_Modifier. */
-
- if (rgb != ucp_gbExtend ||
- (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
- lgb = rgb;
-
- ncount++;
- nptr += dlen;
- }
+ (void)PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf,
+ &ncount);
count++;
ADD_NEW_DATA(-state_offset, count, ncount);
}
@@ -1663,8 +1614,6 @@ for (;;)
ADD_ACTIVE(state_offset + 2, 0);
if (clen > 0)
{
- uint32_t lgb, rgb;
- PCRE2_SPTR nptr = ptr + clen;
int ncount = 0;
if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
@@ -1672,55 +1621,8 @@ for (;;)
active_count--; /* Remove non-match possibility */
next_active_state--;
}
- lgb = UCD_GRAPHBREAK(c);
- while (nptr < end_subject)
- {
- dlen = 1;
- if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
- rgb = UCD_GRAPHBREAK(d);
- if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
-
- /* Not breaking between Regional Indicators is allowed only if
- there are an even number of preceding RIs. */
-
- if (lgb == ucp_gbRegionalIndicator &&
- rgb == ucp_gbRegionalIndicator)
- {
- int ricount = 0;
- PCRE2_SPTR bptr = nptr - 1;
-#ifdef SUPPORT_UNICODE
- if (utf) BACKCHAR(bptr);
-#endif
- /* bptr is pointing to the left-hand character */
-
- while (bptr > mb->start_subject)
- {
- bptr--;
-#ifdef SUPPORT_UNICODE
- if (utf)
- {
- BACKCHAR(bptr);
- GETCHAR(d, bptr);
- }
- else
-#endif
- d = *bptr;
- if (UCD_GRAPHBREAK(d) != ucp_gbRegionalIndicator) break;
- ricount++;
- }
- if ((ricount & 1) != 0) break; /* Grapheme break required */
- }
-
- /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
- any number of Extend before a following E_Modifier. */
-
- if (rgb != ucp_gbExtend ||
- (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
- lgb = rgb;
-
- ncount++;
- nptr += dlen;
- }
+ (void)PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf,
+ &ncount);
ADD_NEW_DATA(-(state_offset + count), 0, ncount);
}
break;
@@ -1973,63 +1875,15 @@ for (;;)
count = current_state->count; /* Number already matched */
if (clen > 0)
{
- uint32_t lgb, rgb;
- PCRE2_SPTR nptr = ptr + clen;
+ PCRE2_SPTR nptr;
int ncount = 0;
if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
{
active_count--; /* Remove non-match possibility */
next_active_state--;
}
- lgb = UCD_GRAPHBREAK(c);
- while (nptr < end_subject)
- {
- dlen = 1;
- if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
- rgb = UCD_GRAPHBREAK(d);
- if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
-
- /* Not breaking between Regional Indicators is allowed only if
- there are an even number of preceding RIs. */
-
- if (lgb == ucp_gbRegionalIndicator &&
- rgb == ucp_gbRegionalIndicator)
- {
- int ricount = 0;
- PCRE2_SPTR bptr = nptr - 1;
-#ifdef SUPPORT_UNICODE
- if (utf) BACKCHAR(bptr);
-#endif
- /* bptr is pointing to the left-hand character */
-
- while (bptr > mb->start_subject)
- {
- bptr--;
-#ifdef SUPPORT_UNICODE
- if (utf)
- {
- BACKCHAR(bptr);
- GETCHAR(d, bptr);
- }
- else
-#endif
- d = *bptr;
- if (UCD_GRAPHBREAK(d) != ucp_gbRegionalIndicator) break;
- ricount++;
- }
- if ((ricount & 1) != 0) break; /* Grapheme break required */
- }
-
- /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
- any number of Extend before a following E_Modifier. */
-
- if (rgb != ucp_gbExtend ||
- (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
- lgb = rgb;
-
- ncount++;
- nptr += dlen;
- }
+ nptr = PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf,
+ &ncount);
if (nptr >= end_subject && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
reset_could_continue = TRUE;
if (++count >= (int)GET2(code, 1))
@@ -2206,58 +2060,9 @@ for (;;)
case OP_EXTUNI:
if (clen > 0)
{
- uint32_t lgb, rgb;
- PCRE2_SPTR nptr = ptr + clen;
int ncount = 0;
- lgb = UCD_GRAPHBREAK(c);
- while (nptr < end_subject)
- {
- dlen = 1;
- if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
- rgb = UCD_GRAPHBREAK(d);
- if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
-
- /* Not breaking between Regional Indicators is allowed only if
- there are an even number of preceding RIs. */
-
- if (lgb == ucp_gbRegionalIndicator &&
- rgb == ucp_gbRegionalIndicator)
- {
- int ricount = 0;
- PCRE2_SPTR bptr = nptr - 1;
-#ifdef SUPPORT_UNICODE
- if (utf) BACKCHAR(bptr);
-#endif
- /* bptr is pointing to the left-hand character */
-
- while (bptr > mb->start_subject)
- {
- bptr--;
-#ifdef SUPPORT_UNICODE
- if (utf)
- {
- BACKCHAR(bptr);
- GETCHAR(d, bptr);
- }
- else
-#endif
- d = *bptr;
- if (UCD_GRAPHBREAK(d) != ucp_gbRegionalIndicator) break;
- ricount++;
- }
- if ((ricount & 1) != 0) break; /* Grapheme break required */
- }
-
- /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
- any number of Extend before a following E_Modifier. */
-
- if (rgb != ucp_gbExtend ||
- (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
- lgb = rgb;
-
- ncount++;
- nptr += dlen;
- }
+ PCRE2_SPTR nptr = PRIV(extuni)(c, ptr + clen, mb->start_subject,
+ end_subject, utf, &ncount);
if (nptr >= end_subject && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
reset_could_continue = TRUE;
ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
diff --git a/src/pcre2_extuni.c b/src/pcre2_extuni.c
new file mode 100644
index 0000000..ed56812
--- /dev/null
+++ b/src/pcre2_extuni.c
@@ -0,0 +1,129 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+ Written by Philip Hazel
+ Original API code Copyright (c) 1997-2012 University of Cambridge
+ New API code Copyright (c) 2016-2017 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* This module contains an internal function that is used to match a Unicode
+extended grapheme sequence. It is used by both pcre2_match() and
+pcre2_def_match(). */
+
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#include "pcre2_internal.h"
+
+/*************************************************
+* Match an extended grapheme sequence *
+*************************************************/
+
+/*
+Arguments:
+ c the first character
+ eptr pointer to next character
+ start_subject pointer to start of subject
+ end_subject pointer to end of subject
+ utf TRUE if in UTF mode
+ xcount pointer to count of additional characters,
+ or NULL if count not needed
+
+Returns: pointer after the end of the sequence
+*/
+
+PCRE2_SPTR
+PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject,
+ PCRE2_SPTR end_subject, BOOL utf, int *xcount)
+{
+int lgb = UCD_GRAPHBREAK(c);
+
+while (eptr < end_subject)
+ {
+ int rgb;
+ int len = 1;
+ if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
+ rgb = UCD_GRAPHBREAK(c);
+ if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
+
+ /* Not breaking between Regional Indicators is allowed only if there
+ are an even number of preceding RIs. */
+
+ if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
+ {
+ int ricount = 0;
+ PCRE2_SPTR bptr = eptr - 1;
+#ifdef SUPPORT_UNICODE
+ if (utf) BACKCHAR(bptr);
+#endif
+
+ /* bptr is pointing to the left-hand character */
+
+ while (bptr > start_subject)
+ {
+ bptr--;
+#ifdef SUPPORT_UNICODE
+ if (utf)
+ {
+ BACKCHAR(bptr);
+ GETCHAR(c, bptr);
+ }
+ else
+#endif
+ c = *bptr;
+ if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break;
+ ricount++;
+ }
+ if ((ricount & 1) != 0) break; /* Grapheme break required */
+ }
+
+ /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
+ any number of Extend before a following E_Modifier. */
+
+ if (rgb != ucp_gbExtend ||
+ (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
+ lgb = rgb;
+
+ eptr += len;
+ if (xcount != NULL) *xcount += 1;
+ }
+
+return eptr;
+}
+
+/* End of pcre2_extuni.c */
diff --git a/src/pcre2_internal.h b/src/pcre2_internal.h
index 9ccce25..4886bf1 100644
--- a/src/pcre2_internal.h
+++ b/src/pcre2_internal.h
@@ -1926,6 +1926,7 @@ is available. */
#define _pcre2_auto_possessify PCRE2_SUFFIX(_pcre2_auto_possessify_)
#define _pcre2_check_escape PCRE2_SUFFIX(_pcre2_check_escape_)
+#define _pcre2_extuni PCRE2_SUFFIX(_pcre2_extuni_)
#define _pcre2_find_bracket PCRE2_SUFFIX(_pcre2_find_bracket_)
#define _pcre2_is_newline PCRE2_SUFFIX(_pcre2_is_newline_)
#define _pcre2_jit_free_rodata PCRE2_SUFFIX(_pcre2_jit_free_rodata_)
@@ -1949,6 +1950,8 @@ extern int _pcre2_auto_possessify(PCRE2_UCHAR *, BOOL,
const compile_block *);
extern int _pcre2_check_escape(PCRE2_SPTR *, PCRE2_SPTR, uint32_t *,
int *, uint32_t, BOOL, compile_block *);
+extern PCRE2_SPTR _pcre2_extuni(uint32_t, PCRE2_SPTR, PCRE2_SPTR, PCRE2_SPTR,
+ BOOL, int *);
extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int);
extern BOOL _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR,
uint32_t *, BOOL);
diff --git a/src/pcre2_match.c b/src/pcre2_match.c
index 050b7e9..70bf936 100644
--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
@@ -2440,55 +2440,9 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
}
else
{
- int lgb, rgb;
GETCHARINCTEST(fc, Feptr);
- lgb = UCD_GRAPHBREAK(fc);
- while (Feptr < mb->end_subject)
- {
- int len = 1;
- if (!utf) fc = *Feptr; else { GETCHARLEN(fc, Feptr, len); }
- rgb = UCD_GRAPHBREAK(fc);
- if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
-
- /* Not breaking between Regional Indicators is allowed only if there
- are an even number of preceding RIs. */
-
- if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
- {
- int ricount = 0;
- PCRE2_SPTR bptr = Feptr - 1;
-#ifdef SUPPORT_UNICODE
- if (utf) BACKCHAR(bptr);
-#endif
- /* bptr is pointing to the left-hand character */
-
- while (bptr > mb->start_subject)
- {
- bptr--;
-#ifdef SUPPORT_UNICODE
- if (utf)
- {
- BACKCHAR(bptr);
- GETCHAR(fc, bptr);
- }
- else
-#endif
- fc = *bptr;
- if (UCD_GRAPHBREAK(fc) != ucp_gbRegionalIndicator) break;
- ricount++;
- }
- if ((ricount & 1) != 0) break; /* Grapheme break required */
- }
-
- /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
- any number of Extend before a following E_Modifier. */
-
- if (rgb != ucp_gbExtend ||
- (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
- lgb = rgb;
-
- Feptr += len;
- }
+ Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject, utf,
+ NULL);
}
CHECK_PARTIAL();
Fecode++;
@@ -2785,61 +2739,13 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
}
else
{
- int lgb, rgb;
GETCHARINCTEST(fc, Feptr);
- lgb = UCD_GRAPHBREAK(fc);
- while (Feptr < mb->end_subject)
- {
- int len = 1;
- if (!utf) fc = *Feptr; else { GETCHARLEN(fc, Feptr, len); }
- rgb = UCD_GRAPHBREAK(fc);
- if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
-
- /* Not breaking between Regional Indicators is allowed only if
- there are an even number of preceding RIs. */
-
- if (lgb == ucp_gbRegionalIndicator &&
- rgb == ucp_gbRegionalIndicator)
- {
- int ricount = 0;
- PCRE2_SPTR bptr = Feptr - 1;
-#ifdef SUPPORT_UNICODE
- if (utf) BACKCHAR(bptr);
-#endif
- /* bptr is pointing to the left-hand character */
-
- while (bptr > mb->start_subject)
- {
- bptr--;
-#ifdef SUPPORT_UNICODE
- if (utf)
- {
- BACKCHAR(bptr);
- GETCHAR(fc, bptr);
- }
- else
-#endif
- fc = *bptr;
- if (UCD_GRAPHBREAK(fc) != ucp_gbRegionalIndicator) break;
- ricount++;
- }
- if ((ricount & 1) != 0) break; /* Grapheme break required */
- }
-
- /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
- any number of Extend before a following E_Modifier. */
-
- if (rgb != ucp_gbExtend ||
- (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
- lgb = rgb;
-
- Feptr += len;
- }
+ Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject,
+ mb->end_subject, utf, NULL);
}
CHECK_PARTIAL();
}
}
-
else
#endif /* SUPPORT_UNICODE */
@@ -3593,56 +3499,9 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
}
else
{
- int lgb, rgb;
GETCHARINCTEST(fc, Feptr);
- lgb = UCD_GRAPHBREAK(fc);
- while (Feptr < mb->end_subject)
- {
- int len = 1;
- if (!utf) fc = *Feptr; else { GETCHARLEN(fc, Feptr, len); }
- rgb = UCD_GRAPHBREAK(fc);
- if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
-
- /* Not breaking between Regional Indicators is allowed only if
- there are an even number of preceding RIs. */
-
- if (lgb == ucp_gbRegionalIndicator &&
- rgb == ucp_gbRegionalIndicator)
- {
- int ricount = 0;
- PCRE2_SPTR bptr = Feptr - 1;
-#ifdef SUPPORT_UNICODE
- if (utf) BACKCHAR(bptr);
-#endif
- /* bptr is pointing to the left-hand character */
-
- while (bptr > mb->start_subject)
- {
- bptr--;
-#ifdef SUPPORT_UNICODE
- if (utf)
- {
- BACKCHAR(bptr);
- GETCHAR(fc, bptr);
- }
- else
-#endif
- fc = *bptr;
- if (UCD_GRAPHBREAK(fc) != ucp_gbRegionalIndicator) break;
- ricount++;
- }
- if ((ricount & 1) != 0) break; /* Grapheme break required */
- }
-
- /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
- any number of Extend before a following E_Modifier. */
-
- if (rgb != ucp_gbExtend ||
- (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
- lgb = rgb;
-
- Feptr += len;
- }
+ Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
+ utf, NULL);
}
CHECK_PARTIAL();
}
@@ -4167,56 +4026,9 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
}
else
{
- int lgb, rgb;
GETCHARINCTEST(fc, Feptr);
- lgb = UCD_GRAPHBREAK(fc);
- while (Feptr < mb->end_subject)
- {
- int len = 1;
- if (!utf) fc = *Feptr; else { GETCHARLEN(fc, Feptr, len); }
- rgb = UCD_GRAPHBREAK(fc);
- if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
-
- /* Not breaking between Regional Indicators is allowed only if
- there are an even number of preceding RIs. */
-
- if (lgb == ucp_gbRegionalIndicator &&
- rgb == ucp_gbRegionalIndicator)
- {
- int ricount = 0;
- PCRE2_SPTR bptr = Feptr - 1;
-#ifdef SUPPORT_UNICODE
- if (utf) BACKCHAR(bptr);
-#endif
- /* bptr is pointing to the left-hand character */
-
- while (bptr > mb->start_subject)
- {
- bptr--;
-#ifdef SUPPORT_UNICODE
- if (utf)
- {
- BACKCHAR(bptr);
- GETCHAR(fc, bptr);
- }
- else
-#endif
- fc = *bptr;
- if (UCD_GRAPHBREAK(fc) != ucp_gbRegionalIndicator) break;
- ricount++;
- }
- if ((ricount & 1) != 0) break; /* Grapheme break required */
- }
-
- /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
- any number of Extend before a following E_Modifier. */
-
- if (rgb != ucp_gbExtend ||
- (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
- lgb = rgb;
-
- Feptr += len;
- }
+ Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
+ utf, NULL);
}
CHECK_PARTIAL();
}