summaryrefslogtreecommitdiff
path: root/src/regex-emacs.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/regex-emacs.c')
-rw-r--r--src/regex-emacs.c24
1 files changed, 20 insertions, 4 deletions
diff --git a/src/regex-emacs.c b/src/regex-emacs.c
index c353a78fb4f..5887eaa30c7 100644
--- a/src/regex-emacs.c
+++ b/src/regex-emacs.c
@@ -2794,6 +2794,7 @@ static int
analyze_first (re_char *p, re_char *pend, char *fastmap, bool multibyte)
{
int j, k;
+ int nbits;
bool not;
/* If all elements for base leading-codes in fastmap is set, this
@@ -2854,7 +2855,14 @@ analyze_first (re_char *p, re_char *pend, char *fastmap, bool multibyte)
each byte is a character. Thus, this works in both
cases. */
fastmap[p[1]] = 1;
- if (! multibyte)
+ if (multibyte)
+ {
+ /* Cover the case of matching a raw char in a
+ multibyte regexp against unibyte. */
+ if (CHAR_BYTE8_HEAD_P (p[1]))
+ fastmap[CHAR_TO_BYTE8 (STRING_CHAR (p + 1))] = 1;
+ }
+ else
{
/* For the case of matching this unibyte regex
against multibyte, we must set a leading code of
@@ -2886,11 +2894,18 @@ analyze_first (re_char *p, re_char *pend, char *fastmap, bool multibyte)
case charset:
if (!fastmap) break;
not = (re_opcode_t) *(p - 1) == charset_not;
- for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
- j >= 0; j--)
+ nbits = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
+ p++;
+ for (j = 0; j < nbits; j++)
if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
fastmap[j] = 1;
+ /* To match raw bytes (in the 80..ff range) against multibyte
+ strings, add their leading bytes to the fastmap. */
+ for (j = 0x80; j < nbits; j++)
+ if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
+ fastmap[CHAR_LEADING_CODE (BYTE8_TO_CHAR (j))] = 1;
+
if (/* Any leading code can possibly start a character
which doesn't match the specified set of characters. */
not
@@ -4251,8 +4266,9 @@ re_match_2_internal (struct re_pattern_buffer *bufp,
}
p += pat_charlen;
d++;
+ mcnt -= pat_charlen;
}
- while (--mcnt);
+ while (mcnt > 0);
break;