summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJim Meyering <jim@meyering.net>1993-10-05 18:21:52 +0000
committerJim Meyering <jim@meyering.net>1993-10-05 18:21:52 +0000
commit4b2db36e1453ce2aac552eb4fbf49e284565ca2f (patch)
treef0bc33d934dd916f897c309dbeef5c2b0b3b1f2e
parent0b7fe2dc03f9a8e5330f4c96f5d6279f61f3fb1d (diff)
downloadgnulib-4b2db36e1453ce2aac552eb4fbf49e284565ca2f.tar.gz
GNU text utilitiesTEXTUTILS-1_8a
-rw-r--r--lib/memchr.c33
-rw-r--r--lib/regex.c107
-rw-r--r--lib/regex.h2
3 files changed, 106 insertions, 36 deletions
diff --git a/lib/memchr.c b/lib/memchr.c
index 12c4419c05..f69c301c4e 100644
--- a/lib/memchr.c
+++ b/lib/memchr.c
@@ -57,15 +57,17 @@ memchr (s, c, n)
The 1-bits make sure that carries propagate to the next 0-bit.
The 0-bits provide holes for carries to fall into. */
magic_bits = 0x7efefeff;
- if (sizeof (longword) > 4)
- /* 64-bit version of the magic. */
- magic_bits = (0x7efefefe << 32) | 0xfefefeff;
+#ifdef LONG_64_BITS
+ /* 64-bit version of the magic. */
+ magic_bits = (0x7efefefe << 32) | 0xfefefeff;
+#endif /* LONG_64_BITS */
/* Set up a longword, each of whose bytes is C. */
charmask = c | (c << 8);
charmask |= charmask << 16;
- if (sizeof (longword) > 4)
- charmask |= charmask << 32;
+#ifdef LONG_64_BITS
+ charmask |= charmask << 32;
+#endif /* LONG_64_BITS */
/* Instead of the traditional loop which tests each character,
we will test a longword at a time. The tricky part is testing
@@ -132,17 +134,16 @@ memchr (s, c, n)
return (char *) &cp[2];
if (cp[3] == c)
return (char *) &cp[3];
- if (sizeof (longword) > 4)
- {
- if (cp[4] == c)
- return (char *) &cp[4];
- if (cp[5] == c)
- return (char *) &cp[5];
- if (cp[6] == c)
- return (char *) &cp[6];
- if (cp[7] == c)
- return (char *) &cp[7];
- }
+#ifdef LONG_64_BITS
+ if (cp[4] == c)
+ return (char *) &cp[4];
+ if (cp[5] == c)
+ return (char *) &cp[5];
+ if (cp[6] == c)
+ return (char *) &cp[6];
+ if (cp[7] == c)
+ return (char *) &cp[7];
+#endif /* LONG_64_BITS */
}
n -= sizeof (longword);
diff --git a/lib/regex.c b/lib/regex.c
index bf3e96855d..15ca91bc54 100644
--- a/lib/regex.c
+++ b/lib/regex.c
@@ -46,6 +46,14 @@
#else /* not emacs */
+#ifdef STDC_HEADERS
+#include <stdlib.h>
+#else
+char *malloc ();
+char *realloc ();
+#endif
+
+
/* We used to test for `BSTRING' here, but only GCC and Emacs define
`BSTRING', as far as I know, and neither of them use this code. */
#if HAVE_STRING_H || STDC_HEADERS
@@ -63,14 +71,6 @@
#include <strings.h>
#endif
-#ifdef STDC_HEADERS
-#include <stdlib.h>
-#else
-char *malloc ();
-char *realloc ();
-#endif
-
-
/* Define the syntax stuff for \<, \>, etc. */
/* This must be nonzero for the wordchar and notwordchar pattern
@@ -881,13 +881,22 @@ static const char *re_error_msg[] =
using the relocating allocator routines, then malloc could cause a
relocation, which might (if the strings being searched are in the
ralloc heap) shift the data out from underneath the regexp
- routines. */
+ routines.
+
+ Here's another reason to avoid allocation: Emacs insists on
+ processing input from X in a signal handler; processing X input may
+ call malloc; if input arrives while a matching routine is calling
+ malloc, then we're scrod. But Emacs can't just block input while
+ calling matching routines; then we don't notice interrupts when
+ they come in. So, Emacs blocks input around all regexp calls
+ except the matching calls, which it leaves unprotected, in the
+ faith that they will not malloc. */
/* Normally, this is fine. */
#define MATCH_MAY_ALLOCATE
/* But under some circumstances, it's not. */
-#if defined (REL_ALLOC) && defined (C_ALLOCA)
+#if defined (emacs) || (defined (REL_ALLOC) && defined (C_ALLOCA))
#undef MATCH_MAY_ALLOCATE
#endif
@@ -4130,11 +4139,27 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
detect that here, the alternative has put on a dummy
failure point which is what we will end up popping. */
- /* Skip over open/close-group commands. */
- while (p2 + 2 < pend
- && ((re_opcode_t) *p2 == stop_memory
- || (re_opcode_t) *p2 == start_memory))
- p2 += 3; /* Skip over args, too. */
+ /* Skip over open/close-group commands.
+ If what follows this loop is a ...+ construct,
+ look at what begins its body, since we will have to
+ match at least one of that. */
+ while (1)
+ {
+ if (p2 + 2 < pend
+ && ((re_opcode_t) *p2 == stop_memory
+ || (re_opcode_t) *p2 == start_memory))
+ p2 += 3;
+ else if (p2 + 6 < pend
+ && (re_opcode_t) *p2 == dummy_failure_jump)
+ p2 += 6;
+ else
+ break;
+ }
+
+ p1 = p + mcnt;
+ /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
+ to the `maybe_finalize_jump' of this case. Examine what
+ follows. */
/* If we're at the end of the pattern, we can change. */
if (p2 == pend)
@@ -4152,11 +4177,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
{
register unsigned char c
= *p2 == (unsigned char) endline ? '\n' : p2[2];
- p1 = p + mcnt;
- /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
- to the `maybe_finalize_jump' of this case. Examine what
- follows. */
if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
{
p[-3] = (unsigned char) pop_failure_jump;
@@ -4182,6 +4203,54 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
}
}
}
+ else if ((re_opcode_t) *p2 == charset)
+ {
+ register unsigned char c
+ = *p2 == (unsigned char) endline ? '\n' : p2[2];
+
+ if ((re_opcode_t) p1[3] == exactn
+ && ! (p2[1] * BYTEWIDTH > p1[4]
+ && (p2[1 + p1[4] / BYTEWIDTH]
+ & (1 << (p1[4] % BYTEWIDTH)))))
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
+ c, p1[5]);
+ }
+
+ else if ((re_opcode_t) p1[3] == charset_not)
+ {
+ int idx;
+ /* We win if the charset_not inside the loop
+ lists every character listed in the charset after. */
+ for (idx = 0; idx < p2[1]; idx++)
+ if (! (p2[2 + idx] == 0
+ || (idx < p1[4]
+ && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
+ break;
+
+ if (idx == p2[1])
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
+ }
+ }
+ else if ((re_opcode_t) p1[3] == charset)
+ {
+ int idx;
+ /* We win if the charset inside the loop
+ has no overlap with the one after the loop. */
+ for (idx = 0; idx < p2[1] && idx < p1[4]; idx++)
+ if ((p2[2 + idx] & p1[5 + idx]) != 0)
+ break;
+
+ if (idx == p2[1] || idx == p1[4])
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
+ }
+ }
+ }
}
p -= 2; /* Point at relative address again. */
if ((re_opcode_t) p[-1] != pop_failure_jump)
diff --git a/lib/regex.h b/lib/regex.h
index 408dd21034..a495005ce9 100644
--- a/lib/regex.h
+++ b/lib/regex.h
@@ -1,7 +1,7 @@
/* Definitions for data structures and routines for the regular
expression library, version 0.12.
- Copyright (C) 1985, 1989, 1990, 1991, 1992, 1993 Free Software Foundation, Inc.
+ Copyright (C) 1985, 89, 90, 91, 92, 1993 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by