summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2014-11-15 18:35:45 +0200
committerArnold D. Robbins <arnold@skeeve.com>2014-11-15 18:35:45 +0200
commit8b863f8852067b0638e09dc7c82355b96381dc12 (patch)
treeeeedacfa918ae53bd52a69fb2c7a47ce1bfbc3e5
parent05de499531bc8fece2625b27a728bd24412ab41a (diff)
downloadgawk-8b863f8852067b0638e09dc7c82355b96381dc12.tar.gz
Remove MBS_SUPPORT ifdefs.
-rw-r--r--ChangeLog8
-rw-r--r--array.c9
-rw-r--r--awk.h18
-rw-r--r--awkgram.c30
-rw-r--r--awkgram.y30
-rw-r--r--builtin.c84
-rw-r--r--dfa.c71
-rw-r--r--eval.c13
-rw-r--r--field.c57
-rw-r--r--interpret.h2
-rw-r--r--io.c5
-rw-r--r--main.c7
-rw-r--r--mbsupport.h74
-rw-r--r--mpfr.c2
-rw-r--r--node.c18
-rw-r--r--re.c7
-rw-r--r--regex_internal.h8
-rw-r--r--replace.c2
18 files changed, 80 insertions, 365 deletions
diff --git a/ChangeLog b/ChangeLog
index 543ddd49..864ea364 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2014-11-15 Arnold D. Robbins <arnold@skeeve.com>
+
+ * array.c, awk.h, awkgram.y, builtin.c, dfa.c, eval.c, field.c,
+ interpret.h, io.c, main.c, mpfr.c, node.c, re.c, regex_internal.h,
+ replace.c: Remove all uses of MBS_SUPPORT.
+ * regex_internal.h: Disable wide characters on DJGPP.
+ * mbsupport.h: Rework to be needed only for DJGPP.
+
2014-11-11 Arnold D. Robbins <arnold@skeeve.com>
Don't let memory used increase linearly in the size of
diff --git a/array.c b/array.c
index 682b8ddb..f7993624 100644
--- a/array.c
+++ b/array.c
@@ -978,14 +978,13 @@ cmp_strings(const NODE *n1, const NODE *n2)
const unsigned char *cp1 = (const unsigned char *) s1;
const unsigned char *cp2 = (const unsigned char *) s2;
-#if MBS_SUPPORT
if (gawk_mb_cur_max > 1) {
ret = strncasecmpmbs((const unsigned char *) cp1,
(const unsigned char *) cp2, lmin);
- } else
-#endif
- for (ret = 0; lmin-- > 0 && ret == 0; cp1++, cp2++)
- ret = casetable[*cp1] - casetable[*cp2];
+ } else {
+ for (ret = 0; lmin-- > 0 && ret == 0; cp1++, cp2++)
+ ret = casetable[*cp1] - casetable[*cp2];
+ }
if (ret != 0)
return ret;
/*
diff --git a/awk.h b/awk.h
index 8bc393e7..9b72a53c 100644
--- a/awk.h
+++ b/awk.h
@@ -95,13 +95,11 @@ extern int errno;
#include "missing_d/gawkbool.h"
#endif
-#include "mbsupport.h" /* defines MBS_SUPPORT */
-
-#if MBS_SUPPORT
/* We can handle multibyte strings. */
#include <wchar.h>
#include <wctype.h>
-#endif
+
+#include "mbsupport.h" /* defines stuff for DJGPP to fake MBS */
#ifdef STDC_HEADERS
#include <float.h>
@@ -395,10 +393,8 @@ typedef struct exp_node {
size_t slen;
long sref;
int idx;
-#if MBS_SUPPORT
wchar_t *wsp;
size_t wslen;
-#endif
} val;
} sub;
NODETYPE type;
@@ -1104,11 +1100,7 @@ extern int exit_val;
#define do_lint (do_flags & (DO_LINT_INVALID|DO_LINT_ALL))
#define do_lint_old (do_flags & DO_LINT_OLD)
#endif
-#if MBS_SUPPORT
extern int gawk_mb_cur_max;
-#else
-#define gawk_mb_cur_max (1)
-#endif
#if defined (HAVE_GETGROUPS) && defined(NGROUPS_MAX) && NGROUPS_MAX > 0
extern GETGROUPS_T *groupset;
@@ -1416,10 +1408,8 @@ extern AWKNUM nondec2awknum(char *str, size_t len);
extern NODE *do_dcgettext(int nargs);
extern NODE *do_dcngettext(int nargs);
extern NODE *do_bindtextdomain(int nargs);
-#if MBS_SUPPORT
extern int strncasecmpmbs(const unsigned char *,
const unsigned char *, size_t);
-#endif
/* eval.c */
extern void PUSH_CODE(INSTRUCTION *cp);
extern INSTRUCTION *POP_CODE(void);
@@ -1602,7 +1592,6 @@ extern NODE *r_dupnode(NODE *n);
extern NODE *make_str_node(const char *s, size_t len, int flags);
extern void *more_blocks(int id);
extern int parse_escape(const char **string_ptr);
-#if MBS_SUPPORT
extern NODE *str2wstr(NODE *n, size_t **ptr);
extern NODE *wstr2str(NODE *n);
#define force_wstring(n) str2wstr(n, NULL)
@@ -1616,9 +1605,6 @@ extern wint_t btowc_cache[];
#define btowc_cache(x) btowc_cache[(x)&0xFF]
extern void init_btowc_cache();
#define is_valid_character(b) (btowc_cache[(b)&0xFF] != WEOF)
-#else
-#define free_wstr(NODE) /* empty */
-#endif
/* re.c */
extern Regexp *make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal);
extern int research(Regexp *rp, char *str, int start, size_t len, int flags);
diff --git a/awkgram.c b/awkgram.c
index 63439b36..431954d9 100644
--- a/awkgram.c
+++ b/awkgram.c
@@ -4255,7 +4255,6 @@ static const struct token tokentab[] = {
{"xor", Op_builtin, LEX_BUILTIN, GAWKX, do_xor, MPF(xor)},
};
-#if MBS_SUPPORT
/* Variable containing the current shift state. */
static mbstate_t cur_mbstate;
/* Ring buffer containing current characters. */
@@ -4267,10 +4266,6 @@ static int cur_ring_idx;
/* This macro means that last nextc() return a singlebyte character
or 1st byte of a multibyte character. */
#define nextc_is_1stbyte (cur_char_ring[cur_ring_idx] == 1)
-#else /* MBS_SUPPORT */
-/* a dummy */
-#define nextc_is_1stbyte 1
-#endif /* MBS_SUPPORT */
/* getfname --- return name of a builtin function (for pretty printing) */
@@ -5159,8 +5154,6 @@ check_bad_char(int c)
/* nextc --- get the next input character */
-#if MBS_SUPPORT
-
static int
nextc(bool check_for_bad)
{
@@ -5231,35 +5224,14 @@ again:
}
}
-#else /* MBS_SUPPORT */
-
-int
-nextc(bool check_for_bad)
-{
- do {
- if (lexeof)
- return END_FILE;
- if (lexptr && lexptr < lexend) {
- if (check_for_bad)
- check_bad_char(*lexptr);
- return ((int) (unsigned char) *lexptr++);
- }
- } while (get_src_buf());
- return END_SRC;
-}
-
-#endif /* MBS_SUPPORT */
-
/* pushback --- push a character back on the input */
static inline void
pushback(void)
{
-#if MBS_SUPPORT
if (gawk_mb_cur_max > 1)
cur_ring_idx = (cur_ring_idx == 0)? RING_BUFFER_SIZE - 1 :
cur_ring_idx - 1;
-#endif
(! lexeof && lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr);
}
@@ -5468,9 +5440,7 @@ retry:
thisline = NULL;
tok = tokstart;
-#if MBS_SUPPORT
if (gawk_mb_cur_max == 1 || nextc_is_1stbyte)
-#endif
switch (c) {
case END_SRC:
return 0;
diff --git a/awkgram.y b/awkgram.y
index c59547eb..9cf88da3 100644
--- a/awkgram.y
+++ b/awkgram.y
@@ -1916,7 +1916,6 @@ static const struct token tokentab[] = {
{"xor", Op_builtin, LEX_BUILTIN, GAWKX, do_xor, MPF(xor)},
};
-#if MBS_SUPPORT
/* Variable containing the current shift state. */
static mbstate_t cur_mbstate;
/* Ring buffer containing current characters. */
@@ -1928,10 +1927,6 @@ static int cur_ring_idx;
/* This macro means that last nextc() return a singlebyte character
or 1st byte of a multibyte character. */
#define nextc_is_1stbyte (cur_char_ring[cur_ring_idx] == 1)
-#else /* MBS_SUPPORT */
-/* a dummy */
-#define nextc_is_1stbyte 1
-#endif /* MBS_SUPPORT */
/* getfname --- return name of a builtin function (for pretty printing) */
@@ -2820,8 +2815,6 @@ check_bad_char(int c)
/* nextc --- get the next input character */
-#if MBS_SUPPORT
-
static int
nextc(bool check_for_bad)
{
@@ -2892,35 +2885,14 @@ again:
}
}
-#else /* MBS_SUPPORT */
-
-int
-nextc(bool check_for_bad)
-{
- do {
- if (lexeof)
- return END_FILE;
- if (lexptr && lexptr < lexend) {
- if (check_for_bad)
- check_bad_char(*lexptr);
- return ((int) (unsigned char) *lexptr++);
- }
- } while (get_src_buf());
- return END_SRC;
-}
-
-#endif /* MBS_SUPPORT */
-
/* pushback --- push a character back on the input */
static inline void
pushback(void)
{
-#if MBS_SUPPORT
if (gawk_mb_cur_max > 1)
cur_ring_idx = (cur_ring_idx == 0)? RING_BUFFER_SIZE - 1 :
cur_ring_idx - 1;
-#endif
(! lexeof && lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr);
}
@@ -3129,9 +3101,7 @@ retry:
thisline = NULL;
tok = tokstart;
-#if MBS_SUPPORT
if (gawk_mb_cur_max == 1 || nextc_is_1stbyte)
-#endif
switch (c) {
case END_SRC:
return 0;
diff --git a/builtin.c b/builtin.c
index 3eb09b48..75e4f580 100644
--- a/builtin.c
+++ b/builtin.c
@@ -247,7 +247,6 @@ do_fflush(int nargs)
return make_number((AWKNUM) status);
}
-#if MBS_SUPPORT
/* strncasecmpmbs --- like strncasecmp (multibyte string version) */
int
@@ -327,14 +326,6 @@ index_multibyte_buffer(char* src, char* dest, int len)
dest[idx] = mbclen;
}
}
-#else
-/* a dummy function */
-static void
-index_multibyte_buffer(char* src ATTRIBUTE_UNUSED, char* dest ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED)
-{
- cant_happen();
-}
-#endif
/* do_index --- find index of a string */
@@ -345,7 +336,6 @@ do_index(int nargs)
const char *p1, *p2;
size_t l1, l2;
long ret;
-#if MBS_SUPPORT
bool do_single_byte = false;
mbstate_t mbs1, mbs2;
@@ -353,7 +343,6 @@ do_index(int nargs)
memset(& mbs1, 0, sizeof(mbstate_t));
memset(& mbs2, 0, sizeof(mbstate_t));
}
-#endif
POP_TWO_SCALARS(s1, s2);
@@ -383,7 +372,6 @@ do_index(int nargs)
goto out;
}
-#if MBS_SUPPORT
if (gawk_mb_cur_max > 1) {
s1 = force_wstring(s1);
s2 = force_wstring(s2);
@@ -394,14 +382,12 @@ do_index(int nargs)
do_single_byte = ((s1->wstlen == 0 && s1->stlen > 0)
|| (s2->wstlen == 0 && s2->stlen > 0));
}
-#endif
/* IGNORECASE will already be false if posix */
if (IGNORECASE) {
while (l1 > 0) {
if (l2 > l1)
break;
-#if MBS_SUPPORT
if (! do_single_byte && gawk_mb_cur_max > 1) {
const wchar_t *pos;
@@ -412,21 +398,18 @@ do_index(int nargs)
ret = pos - s1->wstptr + 1; /* 1-based */
goto out;
} else {
-#endif
- /*
- * Could use tolower(*p1) == tolower(*p2) here.
- * See discussion in eval.c as to why not.
- */
- if (casetable[(unsigned char)*p1] == casetable[(unsigned char)*p2]
- && (l2 == 1 || strncasecmp(p1, p2, l2) == 0)) {
- ret = 1 + s1->stlen - l1;
- break;
- }
- l1--;
- p1++;
-#if MBS_SUPPORT
+ /*
+ * Could use tolower(*p1) == tolower(*p2) here.
+ * See discussion in eval.c as to why not.
+ */
+ if (casetable[(unsigned char)*p1] == casetable[(unsigned char)*p2]
+ && (l2 == 1 || strncasecmp(p1, p2, l2) == 0)) {
+ ret = 1 + s1->stlen - l1;
+ break;
+ }
+ l1--;
+ p1++;
}
-#endif
}
} else {
while (l1 > 0) {
@@ -437,7 +420,6 @@ do_index(int nargs)
ret = 1 + s1->stlen - l1;
break;
}
-#if MBS_SUPPORT
if (! do_single_byte && gawk_mb_cur_max > 1) {
const wchar_t *pos;
@@ -451,10 +433,6 @@ do_index(int nargs)
l1--;
p1++;
}
-#else
- l1--;
- p1++;
-#endif
}
}
out:
@@ -544,7 +522,6 @@ do_length(int nargs)
lintwarn(_("length: received non-string argument"));
tmp = force_string(tmp);
-#if MBS_SUPPORT
if (gawk_mb_cur_max > 1) {
tmp = force_wstring(tmp);
len = tmp->wstlen;
@@ -555,7 +532,6 @@ do_length(int nargs)
if (len == 0 && tmp->stlen > 0)
len = tmp->stlen;
} else
-#endif
len = tmp->stlen;
DEREF(tmp);
@@ -1058,7 +1034,6 @@ check_pos:
(void) force_number(arg);
if ((arg->flags & NUMBER) != 0) {
uval = get_number_uj(arg);
-#if MBS_SUPPORT
if (gawk_mb_cur_max > 1) {
char buf[100];
wchar_t wc;
@@ -1099,7 +1074,7 @@ out0:
;
/* else,
fall through */
-#endif
+
cpbuf[0] = uval;
prec = 1;
cp = cpbuf;
@@ -1113,7 +1088,6 @@ out0:
*/
cp = arg->stptr;
prec = 1;
-#if MBS_SUPPORT
/*
* First character can be multiple bytes if
* it's a multibyte character. Grr.
@@ -1131,7 +1105,6 @@ out0:
fw += count - 1;
}
}
-#endif
goto pr_tail;
case 's':
need_format = false;
@@ -1805,13 +1778,11 @@ do_substr(int nargs)
if (nargs == 2) { /* third arg. missing */
/* use remainder of string */
length = t1->stlen - indx; /* default to bytes */
-#if MBS_SUPPORT
if (gawk_mb_cur_max > 1) {
t1 = force_wstring(t1);
if (t1->wstlen > 0) /* use length of wide char string if we have one */
length = t1->wstlen - indx;
}
-#endif
d_length = length; /* set here in case used in diagnostics, below */
}
@@ -1824,12 +1795,10 @@ do_substr(int nargs)
}
/* get total len of input string, for following checks */
-#if MBS_SUPPORT
if (gawk_mb_cur_max > 1) {
t1 = force_wstring(t1);
src_len = t1->wstlen;
} else
-#endif
src_len = t1->stlen;
if (indx >= src_len) {
@@ -1847,7 +1816,6 @@ do_substr(int nargs)
length = src_len - indx;
}
-#if MBS_SUPPORT
/* force_wstring() already called */
if (gawk_mb_cur_max == 1 || t1->wstlen == t1->stlen)
/* single byte case */
@@ -1877,9 +1845,6 @@ do_substr(int nargs)
*cp = '\0';
r = make_str_node(substr, cp - substr, ALREADY_MALLOCED);
}
-#else
- r = make_string(t1->stptr + indx, length);
-#endif
DEREF(t1);
return r;
@@ -2211,7 +2176,6 @@ do_print_rec(int nargs, int redirtype)
rp->output.gawk_fflush(rp->output.fp, rp->output.opaque);
}
-#if MBS_SUPPORT
/* is_wupper --- function version of iswupper for passing function pointers */
@@ -2276,7 +2240,6 @@ wide_tolower(wchar_t *wstr, size_t wlen)
{
wide_change_case(wstr, wlen, is_wupper, to_wlower);
}
-#endif
/* do_tolower --- lower case a string */
@@ -2299,14 +2262,11 @@ do_tolower(int nargs)
cp < cp2; cp++)
if (isupper(*cp))
*cp = tolower(*cp);
- }
-#if MBS_SUPPORT
- else {
+ } else {
force_wstring(t2);
wide_tolower(t2->wstptr, t2->wstlen);
wstr2str(t2);
}
-#endif
DEREF(t1);
return t2;
@@ -2333,14 +2293,11 @@ do_toupper(int nargs)
cp < cp2; cp++)
if (islower(*cp))
*cp = toupper(*cp);
- }
-#if MBS_SUPPORT
- else {
+ } else {
force_wstring(t2);
wide_toupper(t2->wstptr, t2->wstlen);
wstr2str(t2);
}
-#endif
DEREF(t1);
return t2;
@@ -2490,13 +2447,12 @@ do_match(int nargs)
size_t *wc_indices = NULL;
rlength = REEND(rp, t1->stptr) - RESTART(rp, t1->stptr); /* byte length */
-#if MBS_SUPPORT
if (rlength > 0 && gawk_mb_cur_max > 1) {
t1 = str2wstr(t1, & wc_indices);
rlength = wc_indices[rstart + rlength - 1] - wc_indices[rstart] + 1;
rstart = wc_indices[rstart];
}
-#endif
+
rstart++; /* now it's 1-based indexing */
/* Build the array only if the caller wants the optional subpatterns */
@@ -2518,12 +2474,10 @@ do_match(int nargs)
start = t1->stptr + s;
subpat_start = s;
subpat_len = len = SUBPATEND(rp, t1->stptr, ii) - s;
-#if MBS_SUPPORT
if (len > 0 && gawk_mb_cur_max > 1) {
subpat_start = wc_indices[s];
subpat_len = wc_indices[s + len - 1] - subpat_start + 1;
}
-#endif
it = make_string(start, len);
it->flags |= MAYBE_NUM; /* user input */
@@ -3578,7 +3532,6 @@ do_bindtextdomain(int nargs)
static size_t
mbc_byte_count(const char *ptr, size_t numchars)
{
-#if MBS_SUPPORT
mbstate_t cur_state;
size_t sum = 0;
int mb_len;
@@ -3599,9 +3552,6 @@ mbc_byte_count(const char *ptr, size_t numchars)
}
return sum;
-#else
- return numchars;
-#endif
}
/* mbc_char_count --- return number of m.b. chars in string, up to numbytes bytes */
@@ -3609,7 +3559,6 @@ mbc_byte_count(const char *ptr, size_t numchars)
static size_t
mbc_char_count(const char *ptr, size_t numbytes)
{
-#if MBS_SUPPORT
mbstate_t cur_state;
size_t sum = 0;
int mb_len;
@@ -3632,7 +3581,4 @@ mbc_char_count(const char *ptr, size_t numbytes)
}
return sum;
-#else
- return numbytes;
-#endif
}
diff --git a/dfa.c b/dfa.c
index e658ad8a..53a8c2cc 100644
--- a/dfa.c
+++ b/dfa.c
@@ -58,15 +58,15 @@
#include "gettext.h"
#define _(str) gettext (str)
-#include "mbsupport.h" /* Define MBS_SUPPORT to 1 or 0, as appropriate. */
-#if MBS_SUPPORT
-/* We can handle multibyte strings. */
-# include <wchar.h>
-# include <wctype.h>
-#endif
+#include <wchar.h>
+#include <wctype.h>
#include "xalloc.h"
+#if defined(__DJGPP__)
+#include "mbsupport.h"
+#endif
+
#include "dfa.h"
#ifdef GAWK
@@ -399,12 +399,10 @@ struct dfa
*/
int *multibyte_prop;
-#if MBS_SUPPORT
/* A table indexed by byte values that contains the corresponding wide
character (if any) for that byte. WEOF means the byte is not a
valid single-byte character. */
wint_t mbrtowc_cache[NOTCHAR];
-#endif
/* Array of the bracket expression in the DFA. */
struct mb_char_classes *mbcsets;
@@ -489,7 +487,6 @@ static void regexp (void);
static void
dfambcache (struct dfa *d)
{
-#if MBS_SUPPORT
int i;
for (i = CHAR_MIN; i <= CHAR_MAX; ++i)
{
@@ -499,10 +496,8 @@ dfambcache (struct dfa *d)
wchar_t wc;
d->mbrtowc_cache[uc] = mbrtowc (&wc, &c, 1, &s) <= 1 ? wc : WEOF;
}
-#endif
}
-#if MBS_SUPPORT
/* Store into *PWC the result of converting the leading bytes of the
multibyte buffer S of length N bytes, using the mbrtowc_cache in *D
and updating the conversion state in *D. On conversion error,
@@ -541,9 +536,6 @@ mbs_to_wchar (wint_t *pwc, char const *s, size_t n, struct dfa *d)
*pwc = wc;
return 1;
}
-#else
-#define mbs_to_wchar(pwc, s, n, d) (WEOF)
-#endif
#ifdef DEBUG
@@ -738,7 +730,7 @@ static charclass newline;
#ifdef __GLIBC__
# define is_valid_unibyte_character(c) 1
#else
-# define is_valid_unibyte_character(c) (! (MBS_SUPPORT && btowc (c) == WEOF))
+# define is_valid_unibyte_character(c) (btowc (c) != WEOF)
#endif
/* C is a "word-constituent" byte. */
@@ -799,17 +791,12 @@ dfasyntax (reg_syntax_t bits, int fold, unsigned char eol)
static bool
setbit_wc (wint_t wc, charclass c)
{
-#if MBS_SUPPORT
int b = wctob (wc);
if (b == EOF)
return false;
setbit (b, c);
return true;
-#else
- abort ();
- /*NOTREACHED*/ return false;
-#endif
}
/* Set a bit for B and its case variants in the charclass C.
@@ -907,7 +894,6 @@ static wint_t wctok; /* Wide character representation of the current
MB_CUR_MAX > 1. */
-#if MBS_SUPPORT
/* Fetch the next lexical input character. Set C (of type int) to the
next input byte, except set C to EOF if the input is a multibyte
character of length greater than 1. Set WC (of type wint_t) to the
@@ -936,23 +922,6 @@ static wint_t wctok; /* Wide character representation of the current
} \
} while (0)
-#else
-/* Note that characters become unsigned here. */
-# define FETCH_WC(c, unused, eoferr) \
- do { \
- if (! lexleft) \
- { \
- if ((eoferr) != 0) \
- dfaerror (eoferr); \
- else \
- return lasttok = END; \
- } \
- (c) = to_uchar (*lexptr++); \
- --lexleft; \
- } while (0)
-
-#endif /* MBS_SUPPORT */
-
#ifndef MIN
# define MIN(a,b) ((a) < (b) ? (a) : (b))
#endif
@@ -1764,7 +1733,6 @@ addtok (token t)
}
}
-#if MBS_SUPPORT
/* We treat a multibyte character as a single atom, so that DFA
can treat a multibyte character as a single expression.
@@ -1796,17 +1764,10 @@ addtok_wc (wint_t wc)
addtok (CAT);
}
}
-#else
-static void
-addtok_wc (wint_t wc)
-{
-}
-#endif
static void
add_utf8_anychar (void)
{
-#if MBS_SUPPORT
static const charclass utf8_classes[5] = {
/* 80-bf: non-leading bytes. */
{0, 0, 0, 0, CHARCLASS_WORD_MASK, CHARCLASS_WORD_MASK, 0, 0},
@@ -1861,7 +1822,6 @@ add_utf8_anychar (void)
addtok (CAT);
addtok (OR);
}
-#endif
}
/* The grammar understood by the parser is as follows.
@@ -1902,7 +1862,7 @@ add_utf8_anychar (void)
static void
atom (void)
{
- if (MBS_SUPPORT && tok == WCHAR)
+ if (tok == WCHAR)
{
if (wctok == WEOF)
addtok (BACKREF);
@@ -1924,7 +1884,7 @@ atom (void)
tok = lex ();
}
- else if (MBS_SUPPORT && tok == ANYCHAR && using_utf8 ())
+ else if (tok == ANYCHAR && using_utf8 ())
{
/* For UTF-8 expand the period to a series of CSETs that define a valid
UTF-8 character. This avoids using the slow multibyte path. I'm
@@ -1938,9 +1898,7 @@ atom (void)
}
else if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF
|| tok == BEGLINE || tok == ENDLINE || tok == BEGWORD
-#if MBS_SUPPORT
|| tok == ANYCHAR || tok == MBCSET
-#endif /* MBS_SUPPORT */
|| tok == ENDWORD || tok == LIMWORD || tok == NOTLIMWORD)
{
addtok (tok);
@@ -2273,10 +2231,8 @@ epsclosure (position_set *s, struct dfa const *d, char *visited)
for (i = 0; i < s->nelem; ++i)
if (d->tokens[s->elems[i].index] >= NOTCHAR
&& d->tokens[s->elems[i].index] != BACKREF
-#if MBS_SUPPORT
&& d->tokens[s->elems[i].index] != ANYCHAR
&& d->tokens[s->elems[i].index] != MBCSET
-#endif
&& d->tokens[s->elems[i].index] < CSET)
{
if (!initialized)
@@ -2595,9 +2551,7 @@ dfaanalyze (struct dfa *d, int searchflag)
it with its epsilon closure. */
for (i = 0; i < d->tindex; ++i)
if (d->tokens[i] < NOTCHAR || d->tokens[i] == BACKREF
-#if MBS_SUPPORT
|| d->tokens[i] == ANYCHAR || d->tokens[i] == MBCSET
-#endif
|| d->tokens[i] >= CSET)
{
#ifdef DEBUG
@@ -2707,9 +2661,8 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
copyset (d->charclasses[d->tokens[pos.index] - CSET], matches);
else
{
- if (MBS_SUPPORT
- && (d->tokens[pos.index] == MBCSET
- || d->tokens[pos.index] == ANYCHAR))
+ if (d->tokens[pos.index] == MBCSET
+ || d->tokens[pos.index] == ANYCHAR)
{
/* MB_CUR_MAX > 1 */
if (d->tokens[pos.index] == MBCSET)
@@ -3684,7 +3637,7 @@ dfaoptimize (struct dfa *d)
size_t i;
bool have_backref = false;
- if (!MBS_SUPPORT || !using_utf8 ())
+ if (!using_utf8 ())
return;
for (i = 0; i < d->tindex; ++i)
diff --git a/eval.c b/eval.c
index 0d6a07b6..82b11719 100644
--- a/eval.c
+++ b/eval.c
@@ -530,7 +530,7 @@ posix_compare(NODE *s1, NODE *s2)
* In either case, ret will be the right thing to return.
*/
}
-#if MBS_SUPPORT
+#if ! defined(__DJGPP__)
else {
/* Similar logic, using wide characters */
(void) force_wstring(s1);
@@ -610,15 +610,14 @@ cmp_nodes(NODE *t1, NODE *t2)
const unsigned char *cp1 = (const unsigned char *) t1->stptr;
const unsigned char *cp2 = (const unsigned char *) t2->stptr;
-#if MBS_SUPPORT
if (gawk_mb_cur_max > 1) {
ret = strncasecmpmbs((const unsigned char *) cp1,
(const unsigned char *) cp2, l);
- } else
-#endif
- /* Could use tolower() here; see discussion above. */
- for (ret = 0; l-- > 0 && ret == 0; cp1++, cp2++)
- ret = casetable[*cp1] - casetable[*cp2];
+ } else {
+ /* Could use tolower() here; see discussion above. */
+ for (ret = 0; l-- > 0 && ret == 0; cp1++, cp2++)
+ ret = casetable[*cp1] - casetable[*cp2];
+ }
} else
ret = memcmp(t1->stptr, t2->stptr, l);
diff --git a/field.c b/field.c
index 7b4f2190..6a7c6b1d 100644
--- a/field.c
+++ b/field.c
@@ -392,12 +392,10 @@ re_parse_field(long up_to, /* parse only up to this field number */
char *end = scan + len;
int regex_flags = RE_NEED_START;
char *sep;
-#if MBS_SUPPORT
size_t mbclen = 0;
mbstate_t mbs;
- if (gawk_mb_cur_max > 1)
- memset(&mbs, 0, sizeof(mbstate_t));
-#endif
+
+ memset(&mbs, 0, sizeof(mbstate_t));
if (in_middle)
regex_flags |= RE_NO_BOL;
@@ -424,7 +422,6 @@ re_parse_field(long up_to, /* parse only up to this field number */
&& nf < up_to) {
regex_flags |= RE_NO_BOL;
if (REEND(rp, scan) == RESTART(rp, scan)) { /* null match */
-#if MBS_SUPPORT
if (gawk_mb_cur_max > 1) {
mbclen = mbrlen(scan, end-scan, &mbs);
if ((mbclen == 1) || (mbclen == (size_t) -1)
@@ -434,8 +431,7 @@ re_parse_field(long up_to, /* parse only up to this field number */
}
scan += mbclen;
} else
-#endif
- scan++;
+ scan++;
if (scan == end) {
(*set)(++nf, field, (long)(scan - field), n);
up_to = nf;
@@ -636,7 +632,6 @@ null_parse_field(long up_to, /* parse only up to this field number */
if (len == 0)
return nf;
-#if MBS_SUPPORT
if (gawk_mb_cur_max > 1) {
mbstate_t mbs;
memset(&mbs, 0, sizeof(mbstate_t));
@@ -652,12 +647,12 @@ null_parse_field(long up_to, /* parse only up to this field number */
(*set)(++nf, scan, mbclen, n);
scan += mbclen;
}
- } else
-#endif
- for (; nf < up_to && scan < end; scan++) {
- if (sep_arr != NULL && nf > 0)
- set_element(nf, scan, 0L, sep_arr);
- (*set)(++nf, scan, 1L, n);
+ } else {
+ for (; nf < up_to && scan < end; scan++) {
+ if (sep_arr != NULL && nf > 0)
+ set_element(nf, scan, 0L, sep_arr);
+ (*set)(++nf, scan, 1L, n);
+ }
}
*buf = scan;
@@ -688,12 +683,10 @@ sc_parse_field(long up_to, /* parse only up to this field number */
char *field;
char *end = scan + len;
char sav;
-#if MBS_SUPPORT
size_t mbclen = 0;
mbstate_t mbs;
- if (gawk_mb_cur_max > 1)
- memset(&mbs, 0, sizeof(mbstate_t));
-#endif
+
+ memset(&mbs, 0, sizeof(mbstate_t));
if (up_to == UNLIMITED)
nf = 0;
@@ -712,7 +705,6 @@ sc_parse_field(long up_to, /* parse only up to this field number */
for (; nf < up_to;) {
field = scan;
-#if MBS_SUPPORT
if (gawk_mb_cur_max > 1) {
while (*scan != fschar) {
mbclen = mbrlen(scan, end-scan, &mbs);
@@ -723,10 +715,10 @@ sc_parse_field(long up_to, /* parse only up to this field number */
}
scan += mbclen;
}
- } else
-#endif
- while (*scan != fschar)
- scan++;
+ } else {
+ while (*scan != fschar)
+ scan++;
+ }
(*set)(++nf, field, (long)(scan - field), n);
if (scan == end)
break;
@@ -766,7 +758,6 @@ fw_parse_field(long up_to, /* parse only up to this field number */
char *scan = *buf;
long nf = parse_high_water;
char *end = scan + len;
-#if MBS_SUPPORT
int nmbc;
size_t mbclen;
size_t mbslen;
@@ -775,14 +766,12 @@ fw_parse_field(long up_to, /* parse only up to this field number */
mbstate_t mbs;
memset(&mbs, 0, sizeof(mbstate_t));
-#endif
if (up_to == UNLIMITED)
nf = 0;
if (len == 0)
return nf;
for (; nf < up_to && (len = FIELDWIDTHS[nf+1]) != -1; ) {
-#if MBS_SUPPORT
if (gawk_mb_cur_max > 1) {
nmbc = 0;
mbslen = 0;
@@ -805,10 +794,7 @@ fw_parse_field(long up_to, /* parse only up to this field number */
}
(*set)(++nf, scan, (long) mbslen, n);
scan += mbslen;
- }
- else
-#endif
- {
+ } else {
if (len > end - scan)
len = end - scan;
(*set)(++nf, scan, (long) len, n);
@@ -1451,13 +1437,8 @@ set_fpat_function:
* Implementation varies if doing MBS or not.
*/
-#if MBS_SUPPORT
#define increment_scan(scanp, len) incr_scan(scanp, len, & mbs)
-#else
-#define increment_scan(scanp, len) ((*scanp)++)
-#endif
-#if MBS_SUPPORT
/* incr_scan --- MBS version of increment_scan() */
static void
@@ -1478,7 +1459,6 @@ incr_scan(char **scanp, size_t len, mbstate_t *mbs)
} else
(*scanp)++;
}
-#endif
/*
* fpat_parse_field --- parse fields using a regexp.
@@ -1603,12 +1583,9 @@ fpat_parse_field(long up_to, /* parse only up to this field number */
bool need_to_set_sep;
bool non_empty;
bool eosflag;
-#if MBS_SUPPORT
mbstate_t mbs;
- if (gawk_mb_cur_max > 1)
- memset(&mbs, 0, sizeof(mbstate_t));
-#endif
+ memset(&mbs, 0, sizeof(mbstate_t));
if (up_to == UNLIMITED)
nf = 0;
diff --git a/interpret.h b/interpret.h
index 593f11a6..83ccbfc5 100644
--- a/interpret.h
+++ b/interpret.h
@@ -711,7 +711,6 @@ mod:
t1->stptr[nlen] = '\0';
t1->flags &= ~(NUMCUR|NUMBER|NUMINT);
-#if MBS_SUPPORT
if ((t1->flags & WSTRCUR) != 0 && (t2->flags & WSTRCUR) != 0) {
size_t wlen = t1->wstlen + t2->wstlen;
@@ -723,7 +722,6 @@ mod:
t1->flags |= WSTRCUR;
} else
free_wstr(*lhs);
-#endif
} else {
size_t nlen = t1->stlen + t2->stlen;
char *p;
diff --git a/io.c b/io.c
index c584a0c2..1d15d887 100644
--- a/io.c
+++ b/io.c
@@ -3073,10 +3073,8 @@ rs1scan(IOBUF *iop, struct recmatch *recm, SCANSTATE *state)
{
char *bp;
char rs;
-#if MBS_SUPPORT
size_t mbclen = 0;
mbstate_t mbs;
-#endif
memset(recm, '\0', sizeof(struct recmatch));
rs = RS->stptr[0];
@@ -3087,7 +3085,6 @@ rs1scan(IOBUF *iop, struct recmatch *recm, SCANSTATE *state)
if (*state == INDATA) /* skip over data we've already seen */
bp += iop->scanoff;
-#if MBS_SUPPORT
/*
* From: Bruno Haible <bruno@clisp.org>
* To: Aharon Robbins <arnold@skeeve.com>, gnits@gnits.org
@@ -3184,7 +3181,7 @@ rs1scan(IOBUF *iop, struct recmatch *recm, SCANSTATE *state)
return NOTERM;
}
}
-#endif
+
while (*bp != rs)
bp++;
diff --git a/main.c b/main.c
index b9b76618..ddda1d66 100644
--- a/main.c
+++ b/main.c
@@ -155,9 +155,7 @@ static const char *locale = ""; /* default value to setlocale */
int use_lc_numeric = false; /* obey locale for decimal point */
-#if MBS_SUPPORT
int gawk_mb_cur_max; /* MB_CUR_MAX value, see comment in main() */
-#endif
FILE *output_fp; /* default gawk output, can be redirected in the debugger */
bool output_is_tty = false; /* control flushing of output */
@@ -290,14 +288,12 @@ main(int argc, char **argv)
set_locale_stuff();
-#if MBS_SUPPORT
/*
* In glibc, MB_CUR_MAX is actually a function. This value is
* tested *a lot* in many speed-critical places in gawk. Caching
* this value once makes a speed difference.
*/
gawk_mb_cur_max = MB_CUR_MAX;
- /* Without MBS_SUPPORT, gawk_mb_cur_max is 1. */
#ifdef LIBC_IS_BORKED
{
const char *env_lc;
@@ -312,7 +308,6 @@ main(int argc, char **argv)
/* init the cache for checking bytes if they're characters */
init_btowc_cache();
-#endif
if (do_nostalgia)
@@ -346,7 +341,6 @@ main(int argc, char **argv)
if (do_lint && os_is_setuid())
warning(_("running %s setuid root may be a security problem"), myname);
-#if MBS_SUPPORT
if (do_binary) {
if (do_posix)
warning(_("`--posix' overrides `--characters-as-bytes'"));
@@ -356,7 +350,6 @@ main(int argc, char **argv)
setlocale(LC_ALL, "C");
#endif
}
-#endif
if (do_debug) /* Need to register the debugger pre-exec hook before any other */
init_debug();
diff --git a/mbsupport.h b/mbsupport.h
index 9a62486f..f4e1a821 100644
--- a/mbsupport.h
+++ b/mbsupport.h
@@ -23,81 +23,25 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
-/*
- * This file is needed because we test for i18n support in 3 different
- * places, and we want a consistent definition in all of them. Following
- * the ``Don't Repeat Yourself'' principle from "The Pragmatic Programmer",
- * we centralize the tests here.
- *
- * This test is the union of all the current tests.
- */
-
-#ifdef HAVE_STDLIB_H
-#include <stdlib.h>
-#endif
-
-#ifndef NO_MBSUPPORT
-
-#if defined(HAVE_ISWCTYPE) \
- && defined(HAVE_LOCALE_H) \
- && (defined(HAVE_BTOWC) || defined(ZOS_USS)) \
- && defined(HAVE_MBRLEN) \
- && defined(HAVE_MBRTOWC) \
- && defined(HAVE_WCHAR_H) \
- && defined(HAVE_WCRTOMB) \
- && defined(HAVE_WCSCOLL) \
- && defined(HAVE_WCTYPE) \
- && defined(HAVE_WCTYPE_H) \
- && defined(HAVE_WCTYPE_T) \
- && defined(HAVE_WINT_T) \
- && defined(HAVE_ISWLOWER) \
- && defined(HAVE_ISWUPPER) \
- && defined(HAVE_TOWLOWER) \
- && defined(HAVE_TOWUPPER) \
- && (defined(HAVE_STDLIB_H) && defined(MB_CUR_MAX)) \
-/* We can handle multibyte strings. */
-# define MBS_SUPPORT 1
-#else
-# define MBS_SUPPORT 0
-#endif
-#else /* NO_MBSUPPORT is defined */
-# define MBS_SUPPORT 0
-#endif
-
-#if ! MBS_SUPPORT
+#ifdef __DJGPP__
# undef MB_CUR_MAX
# define MB_CUR_MAX 1
-/* All this glop is for dfa.c. Bleah. */
-
-#ifndef __DJGPP__
-#define wchar_t char
-#endif
+/* All this glop is for DGJPP */
-#define wctype_t int
-#define wint_t int
-#define mbstate_t int
-#define WEOF EOF
#define towupper toupper
#define towlower tolower
-#ifndef __DJGPP__
-#define btowc(x) ((int)x)
-#endif
#define iswalnum isalnum
#define iswalpha isalpha
#define iswupper isupper
-#if defined(ZOS_USS)
-#undef towupper
-#undef towlower
-#undef btowc
-#undef iswalnum
-#undef iswalpha
-#undef iswupper
-#undef wctype
-#undef iswctype
-#undef wcscoll
-#endif
+#define iswlower islower
+
+#define mbrtowc(wcp, s, e, mbs) (-1)
+#define mbrlen(s, e, mbs) strlen(s)
+#define wcrtomb(wc, b, mbs) (-1)
+#define wcslen strlen
+#define wctob(wc) (EOF)
extern wctype_t wctype(const char *name);
extern int iswctype(wint_t wc, wctype_t desc);
diff --git a/mpfr.c b/mpfr.c
index e53af616..a89b2bc6 100644
--- a/mpfr.c
+++ b/mpfr.c
@@ -121,10 +121,8 @@ mpg_node(unsigned int tp)
r->flags |= MALLOC|NUMBER|NUMCUR;
r->stptr = NULL;
r->stlen = 0;
-#if MBS_SUPPORT
r->wstptr = NULL;
r->wstlen = 0;
-#endif /* defined MBS_SUPPORT */
return r;
}
diff --git a/node.c b/node.c
index a3264f2d..9fd4c7b9 100644
--- a/node.c
+++ b/node.c
@@ -281,7 +281,6 @@ r_dupnode(NODE *n)
r->flags &= ~FIELD;
r->flags |= MALLOC;
r->valref = 1;
-#if MBS_SUPPORT
/*
* DON'T call free_wstr(r) here!
* r->wstptr still points at n->wstptr's value, and we
@@ -289,13 +288,11 @@ r_dupnode(NODE *n)
*/
r->wstptr = NULL;
r->wstlen = 0;
-#endif /* MBS_SUPPORT */
if ((n->flags & STRCUR) != 0) {
emalloc(r->stptr, char *, n->stlen + 2, "r_dupnode");
memcpy(r->stptr, n->stptr, n->stlen);
r->stptr[n->stlen] = '\0';
-#if MBS_SUPPORT
if ((n->flags & WSTRCUR) != 0) {
r->wstlen = n->wstlen;
emalloc(r->wstptr, wchar_t *, sizeof(wchar_t) * (n->wstlen + 2), "r_dupnode");
@@ -303,7 +300,6 @@ r_dupnode(NODE *n)
r->wstptr[n->wstlen] = L'\0';
r->flags |= WSTRCUR;
}
-#endif /* MBS_SUPPORT */
}
return r;
@@ -322,10 +318,8 @@ r_make_number(double x)
r->valref = 1;
r->stptr = NULL;
r->stlen = 0;
-#if MBS_SUPPORT
r->wstptr = NULL;
r->wstlen = 0;
-#endif /* defined MBS_SUPPORT */
return r;
}
@@ -368,11 +362,8 @@ make_str_node(const char *s, size_t len, int flags)
r->flags = (MALLOC|STRING|STRCUR);
r->valref = 1;
r->stfmt = -1;
-
-#if MBS_SUPPORT
r->wstptr = NULL;
r->wstlen = 0;
-#endif /* MBS_SUPPORT */
if ((flags & ALREADY_MALLOCED) != 0)
r->stptr = (char *) s;
@@ -387,15 +378,12 @@ make_str_node(const char *s, size_t len, int flags)
char *ptm;
int c;
const char *end;
-#if MBS_SUPPORT
mbstate_t cur_state;
memset(& cur_state, 0, sizeof(cur_state));
-#endif
end = &(r->stptr[len]);
for (pf = ptm = r->stptr; pf < end;) {
-#if MBS_SUPPORT
/*
* Keep multibyte characters together. This avoids
* problems if a subsequent byte of a multibyte
@@ -412,7 +400,7 @@ make_str_node(const char *s, size_t len, int flags)
continue;
}
}
-#endif
+
c = *pf++;
if (c == '\\') {
c = parse_escape(&pf);
@@ -642,7 +630,6 @@ get_numbase(const char *s, bool use_locale)
return 8;
}
-#if MBS_SUPPORT
/* str2wstr --- convert a multibyte string to a wide string */
NODE *
@@ -891,7 +878,6 @@ out: ;
return NULL;
}
-#endif /* MBS_SUPPORT */
/* is_ieee_magic_val --- return true for +inf, -inf, +nan, -nan */
@@ -938,7 +924,6 @@ get_ieee_magic_val(const char *val)
return v;
}
-#if MBS_SUPPORT
wint_t btowc_cache[256];
/* init_btowc_cache --- initialize the cache */
@@ -951,7 +936,6 @@ void init_btowc_cache()
btowc_cache[i] = btowc(i);
}
}
-#endif
#define BLOCKCHUNK 100
diff --git a/re.c b/re.c
index 12c212a6..edb5bc48 100644
--- a/re.c
+++ b/re.c
@@ -54,12 +54,9 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal)
* It is 0, when the current character is a singlebyte character.
*/
size_t is_multibyte = 0;
-#if MBS_SUPPORT
mbstate_t mbs;
- if (gawk_mb_cur_max > 1)
- memset(&mbs, 0, sizeof(mbstate_t)); /* Initialize. */
-#endif
+ memset(&mbs, 0, sizeof(mbstate_t)); /* Initialize. */
if (first) {
first = false;
@@ -87,7 +84,6 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal)
dest = buf;
while (src < end) {
-#if MBS_SUPPORT
if (gawk_mb_cur_max > 1 && ! is_multibyte) {
/* The previous byte is a singlebyte character, or last byte
of a multibyte character. We check the next character. */
@@ -100,7 +96,6 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal)
is_multibyte = 0;
}
}
-#endif
/* We skip multibyte character, since it must not be a special
character. */
diff --git a/regex_internal.h b/regex_internal.h
index c8981a08..3fc2fc58 100644
--- a/regex_internal.h
+++ b/regex_internal.h
@@ -26,18 +26,16 @@
#include <stdlib.h>
#include <string.h>
-#include "mbsupport.h" /* gawk */
-
#if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC
# include <langinfo.h>
#endif
#if defined HAVE_LOCALE_H || defined _LIBC
# include <locale.h>
#endif
-#if MBS_SUPPORT && (defined HAVE_WCHAR_H || defined _LIBC)
+#if defined HAVE_WCHAR_H || defined _LIBC
# include <wchar.h>
#endif /* HAVE_WCHAR_H || _LIBC */
-#if MBS_SUPPORT && (defined HAVE_WCTYPE_H || defined _LIBC)
+#if defined HAVE_WCTYPE_H || defined _LIBC
# include <wctype.h>
#endif /* HAVE_WCTYPE_H || _LIBC */
#if defined HAVE_STDBOOL_H || defined _LIBC
@@ -109,7 +107,7 @@ is_blank (int c)
# define SIZE_MAX ((size_t) -1)
#endif
-#if MBS_SUPPORT || _LIBC
+#if ! defined(__DJGPP__) && (defined(GAWK) || _LIBC)
# define RE_ENABLE_I18N
#endif
diff --git a/replace.c b/replace.c
index 71a8dc51..6d345f52 100644
--- a/replace.c
+++ b/replace.c
@@ -111,6 +111,6 @@
#include "missing_d/strcoll.c"
#endif
-#if ! MBS_SUPPORT
+#if defined(__DJGPP__)
#include "missing_d/wcmisc.c"
#endif