summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJim Meyering <jim@meyering.net>1993-01-20 02:49:28 +0000
committerJim Meyering <jim@meyering.net>1993-01-20 02:49:28 +0000
commit24b6d68e4b4ccd207fae2492c5018ee208b2e206 (patch)
tree8b012abeac1c4127c1ada52136b321ace47c9af5
parentcd8263cefca26ed1f54316a259a9368baef74542 (diff)
downloadgnulib-24b6d68e4b4ccd207fae2492c5018ee208b2e206.tar.gz
GNU text utilitiesTEXTUTILS-1_4
-rw-r--r--lib/regex.c114
-rw-r--r--lib/regex.h14
-rw-r--r--lib/strtol.c8
3 files changed, 76 insertions, 60 deletions
diff --git a/lib/regex.c b/lib/regex.c
index a5594be55e..eda11b5350 100644
--- a/lib/regex.c
+++ b/lib/regex.c
@@ -3,7 +3,7 @@
(Implements POSIX draft P10003.2/D11.2, except for
internationalization features.)
- Copyright (C) 1985, 89, 90, 91, 92 Free Software Foundation, Inc.
+ Copyright (C) 1993 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -29,7 +29,7 @@
/* We need this for `regex.h', and perhaps for the Emacs include files. */
#include <sys/types.h>
-#if defined (HAVE_CONFIG_H) || defined (emacs)
+#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
@@ -124,15 +124,34 @@ init_syntax_once ()
/* Get the interface, including the syntax bits. */
#include "regex.h"
-
/* isalpha etc. are used for the character classes. */
#include <ctype.h>
-#ifndef isgraph
-#define isgraph(c) (isprint (c) && !isspace (c))
+
+#ifndef isascii
+#define isascii(c) 1
#endif
-#ifndef isblank
-#define isblank(c) ((c) == ' ' || (c) == '\t')
+
+#ifdef isblank
+#define ISBLANK(c) (isascii (c) && isblank (c))
+#else
+#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
#endif
+#ifdef isgraph
+#define ISGRAPH(c) (isascii (c) && isgraph (c))
+#else
+#define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
+#endif
+
+#define ISPRINT(c) (isascii (c) && isprint (c))
+#define ISDIGIT(c) (isascii (c) && isdigit (c))
+#define ISALNUM(c) (isascii (c) && isalnum (c))
+#define ISALPHA(c) (isascii (c) && isalpha (c))
+#define ISCNTRL(c) (isascii (c) && iscntrl (c))
+#define ISLOWER(c) (isascii (c) && islower (c))
+#define ISPUNCT(c) (isascii (c) && ispunct (c))
+#define ISSPACE(c) (isascii (c) && isspace (c))
+#define ISUPPER(c) (isascii (c) && isupper (c))
+#define ISXDIGIT(c) (isascii (c) && isxdigit (c))
#ifndef NULL
#define NULL 0
@@ -999,7 +1018,7 @@ typedef struct
{ if (p != pend) \
{ \
PATFETCH (c); \
- while (isdigit (c)) \
+ while (ISDIGIT (c)) \
{ \
if (num < 0) \
num = 0; \
@@ -1464,18 +1483,18 @@ regex_compile (pattern, size, syntax, bufp)
for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
{
- if ( (is_alnum && isalnum (ch))
- || (is_alpha && isalpha (ch))
- || (is_blank && isblank (ch))
- || (is_cntrl && iscntrl (ch))
- || (is_digit && isdigit (ch))
- || (is_graph && isgraph (ch))
- || (is_lower && islower (ch))
- || (is_print && isprint (ch))
- || (is_punct && ispunct (ch))
- || (is_space && isspace (ch))
- || (is_upper && isupper (ch))
- || (is_xdigit && isxdigit (ch)))
+ if ( (is_alnum && ISALNUM (ch))
+ || (is_alpha && ISALPHA (ch))
+ || (is_blank && ISBLANK (ch))
+ || (is_cntrl && ISCNTRL (ch))
+ || (is_digit && ISDIGIT (ch))
+ || (is_graph && ISGRAPH (ch))
+ || (is_lower && ISLOWER (ch))
+ || (is_print && ISPRINT (ch))
+ || (is_punct && ISPUNCT (ch))
+ || (is_space && ISSPACE (ch))
+ || (is_upper && ISUPPER (ch))
+ || (is_xdigit && ISXDIGIT (ch)))
SET_LIST_BIT (ch);
}
had_char_class = true;
@@ -2178,18 +2197,20 @@ compile_range (p_ptr, pend, translate, syntax, b)
unsigned this_char;
const char *p = *p_ptr;
+ int range_start, range_end;
- /* Even though the pattern is a signed `char *', we need to fetch into
- `unsigned char's. Reason: if the high bit of the pattern character
- is set, the range endpoints will be negative if we fetch into a
- signed `char *'. */
- unsigned char range_end;
- unsigned char range_start = p[-2];
-
if (p == pend)
return REG_ERANGE;
- PATFETCH (range_end);
+ /* Even though the pattern is a signed `char *', we need to fetch
+ with unsigned char *'s; if the high bit of the pattern character
+ is set, the range endpoints will be negative if we fetch using a
+ signed char *.
+
+ We also want to fetch the endpoints without translating them; the
+ appropriate translation is done in the bit-setting loop below. */
+ range_start = ((unsigned char *) p)[-2];
+ range_end = ((unsigned char *) p)[0];
/* Have to increment the pointer into the pattern string, so the
caller isn't still at the ending character. */
@@ -3970,21 +3991,13 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
/* If we're at the end of the pattern, we can change. */
if (p2 == pend)
- { /* But if we're also at the end of the string, we might
- as well skip changing anything. For example, in `a+'
- against `a', we'll have already matched the `a', and
- I don't see the the point of changing the opcode,
- popping the failure point, finding out it fails, and
- then going into our endgame. */
- if (d == dend)
- {
- p = pend;
- DEBUG_PRINT1 (" End of pattern & string => done.\n");
- continue;
- }
-
+ {
+ /* Consider what happens when matching ":\(.*\)"
+ against ":/". I don't really understand this code
+ yet. */
p[-3] = (unsigned char) pop_failure_jump;
- DEBUG_PRINT1 (" End of pattern => pop_failure_jump.\n");
+ DEBUG_PRINT1
+ (" End of pattern: change to `pop_failure_jump'.\n");
}
else if ((re_opcode_t) *p2 == exactn
@@ -4740,7 +4753,7 @@ regcomp (preg, pattern, cflags)
/* Map uppercase characters to corresponding lowercase ones. */
for (i = 0; i < CHAR_SET_SIZE; i++)
- preg->translate[i] = isupper (i) ? tolower (i) : i;
+ preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
}
else
preg->translate = NULL;
@@ -4856,9 +4869,18 @@ regerror (errcode, preg, errbuf, errbuf_size)
char *errbuf;
size_t errbuf_size;
{
- const char *msg
- = re_error_msg[errcode] == NULL ? "Success" : re_error_msg[errcode];
- size_t msg_size = strlen (msg) + 1; /* Includes the null. */
+ const char *msg;
+ size_t msg_size;
+
+ if (errcode < 0
+ || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0])))
+ /* Only error codes returned by the rest of the code should be passed
+ to this routine. If we are given anything else, or if other regex
+ code generates an invalid error code, then the program has a bug.
+ Dump core so we can fix it. */
+ abort ();
+
+ msg_size = strlen (msg) + 1; /* Includes the null. */
if (errbuf_size != 0)
{
diff --git a/lib/regex.h b/lib/regex.h
index e38853eaf6..0840861da3 100644
--- a/lib/regex.h
+++ b/lib/regex.h
@@ -145,7 +145,7 @@ extern reg_syntax_t re_syntax_options;
#define RE_SYNTAX_AWK \
(RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
| RE_NO_BK_PARENS | RE_NO_BK_REFS \
- | RE_NO_BK_VAR | RE_NO_EMPTY_RANGES \
+ | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
| RE_UNMATCHED_RIGHT_PAREN_ORD)
#define RE_SYNTAX_POSIX_AWK \
@@ -387,18 +387,16 @@ typedef struct
prototype (if we are ANSI), and once without (if we aren't) -- we
use the following macro to declare argument types. This
unfortunately clutters up the declarations a bit, but I think it's
- worth it.
-
- We may also have to undo `const' if we are not ANSI -- but if it has
- already been defined, as by Autoconf's AC_CONST, don't do anything. */
+ worth it. */
#if __STDC__
+
#define _RE_ARGS(args) args
+
#else /* not __STDC__ */
+
#define _RE_ARGS(args) ()
-#if !const && !HAVE_CONST
-#define const
-#endif
+
#endif /* not __STDC__ */
/* Sets the current default syntax to SYNTAX, and return the old syntax.
diff --git a/lib/strtol.c b/lib/strtol.c
index d91db4bfe6..a88ec5eeaa 100644
--- a/lib/strtol.c
+++ b/lib/strtol.c
@@ -36,10 +36,6 @@ Cambridge, MA 02139, USA. */
extern int errno;
#endif
-#if !__STDC__ && !defined(const)
-#define const
-#endif
-
#ifndef UNSIGNED
#define UNSIGNED 0
#endif
@@ -156,7 +152,7 @@ strtol (nptr, endptr, base)
/* Check for a value that is within the range of
`unsigned long int', but outside the range of `long int'. */
if (i > (negative ?
- - (unsigned long int) LONG_MIN : (unsigned long int) LONG_MAX))
+ -(unsigned long int) LONG_MIN : (unsigned long int) LONG_MAX))
overflow = 1;
#endif
@@ -171,7 +167,7 @@ strtol (nptr, endptr, base)
}
/* Return the result of the appropriate sign. */
- return (negative ? - i : i);
+ return (negative ? -i : i);
noconv:;
/* There was no number to convert. */