summaryrefslogtreecommitdiff
path: root/lib/sh
diff options
context:
space:
mode:
authorChet Ramey <chet.ramey@case.edu>2019-01-07 09:27:52 -0500
committerChet Ramey <chet.ramey@case.edu>2019-01-07 09:27:52 -0500
commitd233b485e83c3a784b803fb894280773f16f2deb (patch)
tree16d51f3ccca2d4ad2d8f2da564d68ca848de595b /lib/sh
parent64447609994bfddeef1061948022c074093e9a9f (diff)
downloadbash-d233b485e83c3a784b803fb894280773f16f2deb.tar.gz
bash-5.0 distribution sources and documentationbash-5.0
Diffstat (limited to 'lib/sh')
-rw-r--r--lib/sh/Makefile.in15
-rw-r--r--lib/sh/casemod.c5
-rw-r--r--lib/sh/clock.c10
-rw-r--r--lib/sh/eaccess.c4
-rw-r--r--lib/sh/getenv.c2
-rw-r--r--lib/sh/mbschr.c6
-rw-r--r--lib/sh/mbscmp.c6
-rw-r--r--lib/sh/pathcanon.c2
-rw-r--r--lib/sh/pathphys.c2
-rw-r--r--lib/sh/shmbchar.c25
-rw-r--r--lib/sh/shquote.c5
-rw-r--r--lib/sh/snprintf.c2
-rw-r--r--lib/sh/strcasecmp.c2
-rw-r--r--lib/sh/strtrans.c4
-rw-r--r--lib/sh/timeval.c9
-rw-r--r--lib/sh/ufuncs.c45
-rw-r--r--lib/sh/unicode.c46
-rw-r--r--lib/sh/utf8.c147
-rw-r--r--lib/sh/zread.c4
19 files changed, 292 insertions, 49 deletions
diff --git a/lib/sh/Makefile.in b/lib/sh/Makefile.in
index 2ca921b3..06917ba4 100644
--- a/lib/sh/Makefile.in
+++ b/lib/sh/Makefile.in
@@ -67,8 +67,8 @@ LOCAL_DEFS = @LOCAL_DEFS@
INCLUDES = -I. -I../.. -I$(topdir) -I$(topdir)/lib -I$(BASHINCDIR) -I$(srcdir) $(INTL_INC)
-CCFLAGS = ${PROFILE_FLAGS} ${INCLUDES} $(DEFS) $(LOCAL_DEFS) $(LOCAL_CFLAGS) \
- $(CFLAGS) $(CPPFLAGS)
+CCFLAGS = ${ADDON_CFLAGS} ${PROFILE_FLAGS} ${INCLUDES} $(DEFS) $(LOCAL_DEFS) \
+ $(LOCAL_CFLAGS) $(CFLAGS) $(CPPFLAGS)
GCC_LINT_FLAGS = -Wall -Wshadow -Wpointer-arith -Wcast-qual \
-Wcast-align -Wstrict-prototypes -Wconversion \
@@ -92,7 +92,8 @@ CSOURCES = clktck.c clock.c getcwd.c getenv.c oslib.c setlinebuf.c \
mktime.c strftime.c mbschr.c zcatfd.c zmapfd.c winsize.c eaccess.c \
wcsdup.c fpurge.c zgetline.c mbscmp.c uconvert.c ufuncs.c \
casemod.c dprintf.c input_avail.c mbscasecmp.c fnxform.c \
- strchrnul.c unicode.c wcswidth.c wcsnwidth.c shmbchar.c strdup.c
+ strchrnul.c unicode.c wcswidth.c wcsnwidth.c shmbchar.c strdup.c \
+ utf8.c
# The header files for this library.
HSOURCES =
@@ -107,7 +108,7 @@ OBJECTS = clktck.o clock.o getenv.o oslib.o setlinebuf.o strnlen.o \
fmtullong.o fmtumax.o zcatfd.o zmapfd.o winsize.o wcsdup.o \
fpurge.o zgetline.o mbscmp.o uconvert.o ufuncs.o casemod.o \
input_avail.o mbscasecmp.o fnxform.o unicode.o shmbchar.o \
- wcsnwidth.o ${LIBOBJS}
+ utf8.o wcsnwidth.o ${LIBOBJS}
SUPPORT = Makefile
@@ -200,6 +201,7 @@ tmpfile.o: tmpfile.c
uconvert.o: uconvert.c
ufuncs.o: ufuncs.c
unicode.o: unicode.c
+utf8.o: utf8.c
vprint.o: vprint.c
wcsdup.o: wcsdup.c
wcsnwidth.o: wcsnwidth.c
@@ -277,6 +279,7 @@ tmpfile.o: ${BUILD_DIR}/config.h
uconvert.o: ${BUILD_DIR}/config.h
ufuncs.o: ${BUILD_DIR}/config.h
unicode.o: ${BUILD_DIR}/config.h
+utf8.o: ${BUILD_DIR}/config.h
vprint.o: ${BUILD_DIR}/config.h
wcsdup.o: ${BUILD_DIR}/config.h
wcsnwidth.o: ${BUILD_DIR}/config.h
@@ -612,6 +615,10 @@ unicode.o: ${topdir}/bashansi.h ${BASHINCDIR}/ansi_stdlib.h
unicode.o: ${BASHINCDIR}/stdc.h
unicode.o: ${topdir}/xmalloc.h
+utf8.o: ${topdir}/bashansi.h
+utf8.o: ${BASHINCDIR}/ansi_stdlib.h
+utf8.o: ${BASHINCDIR}/shmbutil.h ${BASHINCDIR}/shmbchar.h
+
winsize.o: ${BASHINCDIR}/stdc.h
winsize.o: ${topdir}/xmalloc.h
winsize.o: ${topdir}/bashtypes.h
diff --git a/lib/sh/casemod.c b/lib/sh/casemod.c
index f68303bc..7cdd4178 100644
--- a/lib/sh/casemod.c
+++ b/lib/sh/casemod.c
@@ -229,7 +229,10 @@ singlebyte:
else
{
m = mbrtowc (&wc, string + start, end - start, &state);
- if (MB_INVALIDCH (m) || m == 1)
+ /* Have to go through wide case conversion even for single-byte
+ chars, to accommodate single-byte characters where the
+ corresponding upper or lower case equivalent is multibyte. */
+ if (MB_INVALIDCH (m))
{
wc = (unsigned char)string[start];
goto singlebyte;
diff --git a/lib/sh/clock.c b/lib/sh/clock.c
index 84cdbc54..c6c52bf8 100644
--- a/lib/sh/clock.c
+++ b/lib/sh/clock.c
@@ -32,7 +32,13 @@
#include <stdio.h>
#include <stdc.h>
-extern long get_clk_tck __P((void));
+#include <bashintl.h>
+
+#ifndef locale_decpoint
+extern int locale_decpoint PARAMS((void));
+#endif
+
+extern long get_clk_tck PARAMS((void));
void
clock_t_to_secs (t, sp, sfp)
@@ -76,6 +82,6 @@ print_clock_t (fp, t)
minutes = timestamp / 60;
seconds = timestamp % 60;
- fprintf (fp, "%ldm%d.%03ds", minutes, seconds, seconds_fraction);
+ fprintf (fp, "%ldm%d%c%03ds", minutes, seconds, locale_decpoint(), seconds_fraction);
}
#endif /* HAVE_TIMES */
diff --git a/lib/sh/eaccess.c b/lib/sh/eaccess.c
index 8fd8a43e..3d8ae4e8 100644
--- a/lib/sh/eaccess.c
+++ b/lib/sh/eaccess.c
@@ -91,7 +91,9 @@ sh_stat (path, finfo)
}
if (path[0] == '/' && path[1] == 'd' && strncmp (path, "/dev/fd/", 8) == 0)
{
-#if !defined (HAVE_DEV_FD)
+ /* If stating /dev/fd/n doesn't produce the same results as fstat of
+ FD N, then define DEV_FD_STAT_BROKEN */
+#if !defined (HAVE_DEV_FD) || defined (DEV_FD_STAT_BROKEN)
intmax_t fd;
int r;
diff --git a/lib/sh/getenv.c b/lib/sh/getenv.c
index 8b5e3406..1e682aef 100644
--- a/lib/sh/getenv.c
+++ b/lib/sh/getenv.c
@@ -69,7 +69,7 @@ getenv (name)
if (var && exported_p (var))
return (value_cell (var));
}
- else
+ else if (environ)
{
register int i, len;
diff --git a/lib/sh/mbschr.c b/lib/sh/mbschr.c
index 7730e46e..639962d4 100644
--- a/lib/sh/mbschr.c
+++ b/lib/sh/mbschr.c
@@ -28,9 +28,12 @@
#include "shmbutil.h"
extern int locale_mb_cur_max;
+extern int locale_utf8locale;
#undef mbschr
+extern char *utf8_mbschr (const char *, int); /* XXX */
+
/* In some locales, the non-first byte of some multibyte characters have
the same value as some ascii character. Faced with these strings, a
legacy strchr() might return the wrong value. */
@@ -49,6 +52,9 @@ mbschr (s, c)
mbstate_t state;
size_t strlength, mblength;
+ if (locale_utf8locale && c < 0x80)
+ return (utf8_mbschr (s, c)); /* XXX */
+
/* The locale encodings with said weird property are BIG5, BIG5-HKSCS,
GBK, GB18030, SHIFT_JIS, and JOHAB. They exhibit the problem only
when c >= 0x30. We can therefore use the faster bytewise search if
diff --git a/lib/sh/mbscmp.c b/lib/sh/mbscmp.c
index aaf81f5e..c7c84435 100644
--- a/lib/sh/mbscmp.c
+++ b/lib/sh/mbscmp.c
@@ -1,6 +1,6 @@
/* mbscmp - multibyte string comparison. */
-/* Copyright (C) 1995-2015 Free Software Foundation, Inc.
+/* Copyright (C) 1995-2018 Free Software Foundation, Inc.
This file is part of GNU Bash, the Bourne Again SHell.
@@ -26,6 +26,10 @@
#include <stddef.h>
#include <string.h>
+extern int locale_utf8locale;
+
+extern int utf8_mbscmp (const char *, const char *);
+
/* Compare MBS1 and MBS2. */
int
mbscmp (mbs1, mbs2)
diff --git a/lib/sh/pathcanon.c b/lib/sh/pathcanon.c
index f19bd55f..f9506dff 100644
--- a/lib/sh/pathcanon.c
+++ b/lib/sh/pathcanon.c
@@ -227,7 +227,7 @@ sh_canonpath (path, flags)
if (result[2] == '\0') /* short-circuit for bare `//' */
result[1] = '\0';
else
- strcpy (result, result + 1);
+ memmove (result, result + 1, strlen (result + 1) + 1);
}
return (result);
diff --git a/lib/sh/pathphys.c b/lib/sh/pathphys.c
index 26016b76..99390cef 100644
--- a/lib/sh/pathphys.c
+++ b/lib/sh/pathphys.c
@@ -245,7 +245,7 @@ error:
if (result[2] == '\0') /* short-circuit for bare `//' */
result[1] = '\0';
else
- strcpy (result, result + 1);
+ memmove (result, result + 1, strlen (result + 1) + 1);
}
return (result);
diff --git a/lib/sh/shmbchar.c b/lib/sh/shmbchar.c
index 7f14208e..f2f2582b 100644
--- a/lib/sh/shmbchar.c
+++ b/lib/sh/shmbchar.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2001, 2006, 2009, 2010, 2012, 2015 Free Software Foundation, Inc.
+/* Copyright (C) 2001, 2006, 2009, 2010, 2012, 2015-2018 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -20,9 +20,15 @@
#include <stdlib.h>
#include <limits.h>
+#include <errno.h>
+
#include <shmbutil.h>
#include <shmbchar.h>
+#ifndef errno
+extern int errno;
+#endif
+
#if IS_BASIC_ASCII
/* Bit table of characters in the ISO C "basic character set". */
@@ -37,6 +43,13 @@ const unsigned int is_basic_table [UCHAR_MAX / 32 + 1] =
#endif /* IS_BASIC_ASCII */
+extern int locale_utf8locale;
+
+extern char *utf8_mbsmbchar (const char *);
+extern int utf8_mblen (const char *, size_t);
+
+/* Count the number of characters in S, counting multi-byte characters as a
+ single character. */
size_t
mbstrlen (s)
const char *s;
@@ -65,6 +78,8 @@ mbstrlen (s)
}
/* Return pointer to first multibyte char in S, or NULL if none. */
+/* XXX - if we know that the locale is UTF-8, we can just check whether or
+ not any byte has the eighth bit turned on */
char *
mbsmbchar (s)
const char *s;
@@ -74,13 +89,19 @@ mbsmbchar (s)
mbstate_t mbs = { 0 };
int mb_cur_max;
+ if (locale_utf8locale)
+ return (utf8_mbsmbchar (s)); /* XXX */
+
mb_cur_max = MB_CUR_MAX;
for (t = (char *)s; *t; t++)
{
if (is_basic (*t))
continue;
- clen = mbrlen (t, mb_cur_max, &mbs);
+ if (locale_utf8locale) /* not used if above code active */
+ clen = utf8_mblen (t, mb_cur_max);
+ else
+ clen = mbrlen (t, mb_cur_max, &mbs);
if (clen == 0)
return 0;
diff --git a/lib/sh/shquote.c b/lib/sh/shquote.c
index ecec5971..97e2bc53 100644
--- a/lib/sh/shquote.c
+++ b/lib/sh/shquote.c
@@ -228,7 +228,8 @@ sh_un_double_quote (string)
going through the shell parser, which will protect the internal
quoting characters. TABLE, if set, points to a map of the ascii code
set with char needing to be backslash-quoted if table[char]==1. FLAGS,
- if 1, causes tildes to be quoted as well. */
+ if 1, causes tildes to be quoted as well. If FLAGS&2, backslash-quote
+ other shell blank characters. */
char *
sh_backslash_quote (string, table, flags)
@@ -273,6 +274,8 @@ sh_backslash_quote (string, table, flags)
/* Tildes are special at the start of a word or after a `:' or `='
(technically unquoted, but it doesn't make a difference in practice) */
*r++ = '\\';
+ else if ((flags&2) && shellblank((unsigned char)c))
+ *r++ = '\\';
*r++ = c;
}
diff --git a/lib/sh/snprintf.c b/lib/sh/snprintf.c
index 87ca2173..6e5892ee 100644
--- a/lib/sh/snprintf.c
+++ b/lib/sh/snprintf.c
@@ -142,9 +142,11 @@ extern char *fmtullong __P((unsigned long long int, int, char *, size_t, int));
302 / 1000 is log10 (2) rounded up;
add one for integer division truncation;
add one more for a minus sign if t is signed. */
+#ifndef INT_STRLEN_BOUND
#define INT_STRLEN_BOUND(t) \
((sizeof (t) * CHAR_BIT - TYPE_SIGNED (t)) * 302 / 1000 \
+ 1 + TYPE_SIGNED (t))
+#endif
/* conversion flags */
#define PF_ALTFORM 0x00001 /* # */
diff --git a/lib/sh/strcasecmp.c b/lib/sh/strcasecmp.c
index 5542f715..70d0551a 100644
--- a/lib/sh/strcasecmp.c
+++ b/lib/sh/strcasecmp.c
@@ -32,7 +32,7 @@ int
strncasecmp (string1, string2, count)
const char *string1;
const char *string2;
- int count;
+ size_t count;
{
register const char *s1;
register const char *s2;
diff --git a/lib/sh/strtrans.c b/lib/sh/strtrans.c
index 79831476..48f255f5 100644
--- a/lib/sh/strtrans.c
+++ b/lib/sh/strtrans.c
@@ -230,8 +230,6 @@ ansic_quote (str, flags, rlen)
*r++ = '$';
*r++ = '\'';
- s = str;
-
for (s = str; c = *s; s++)
{
b = l = 1; /* 1 == add backslash; 0 == no backslash */
@@ -305,11 +303,9 @@ ansic_wshouldquote (string)
{
const wchar_t *wcs;
wchar_t wcc;
-
wchar_t *wcstr = NULL;
size_t slen;
-
slen = mbstowcs (wcstr, string, 0);
if (slen == (size_t)-1)
diff --git a/lib/sh/timeval.c b/lib/sh/timeval.c
index 7bd9df8f..c4b61dc8 100644
--- a/lib/sh/timeval.c
+++ b/lib/sh/timeval.c
@@ -25,6 +25,13 @@
#include <sys/types.h>
#include <posixtime.h>
+#include <bashintl.h>
+#include <stdc.h>
+
+#ifndef locale_decpoint
+extern int locale_decpoint PARAMS((void));
+#endif
+
#include <stdio.h>
struct timeval *
@@ -140,6 +147,6 @@ print_timeval (fp, tvp)
minutes = timestamp / 60;
seconds = timestamp % 60;
- fprintf (fp, "%ldm%d.%03ds", minutes, seconds, seconds_fraction);
+ fprintf (fp, "%ldm%d%c%03ds", minutes, seconds, locale_decpoint (), seconds_fraction);
}
#endif /* HAVE_TIMEVAL */
diff --git a/lib/sh/ufuncs.c b/lib/sh/ufuncs.c
index fcd4c044..ad9284cc 100644
--- a/lib/sh/ufuncs.c
+++ b/lib/sh/ufuncs.c
@@ -37,8 +37,16 @@
#include <unistd.h>
#endif
+#include <errno.h>
+#if !defined (errno)
+extern int errno;
+#endif /* !errno */
+
#if defined (HAVE_SELECT)
# include "posixselect.h"
+# include "quit.h"
+# include "trap.h"
+# include "stat-time.h"
#endif
/* A version of `alarm' using setitimer if it's available. */
@@ -84,17 +92,50 @@ falarm (secs, usecs)
/* A version of sleep using fractional seconds and select. I'd like to use
`usleep', but it's already taken */
-#if defined (HAVE_TIMEVAL) && defined (HAVE_SELECT)
+#if defined (HAVE_TIMEVAL) && (defined (HAVE_SELECT) || defined (HAVE_PSELECT))
int
fsleep(sec, usec)
unsigned int sec, usec;
{
+ int e, r;
+ sigset_t blocked_sigs, prevmask;
+#if defined (HAVE_PSELECT)
+ struct timespec ts;
+#else
struct timeval tv;
+#endif
+ sigemptyset (&blocked_sigs);
+# if defined (SIGCHLD)
+ sigaddset (&blocked_sigs, SIGCHLD);
+# endif
+
+#if defined (HAVE_PSELECT)
+ ts.tv_sec = sec;
+ ts.tv_nsec = usec * 1000;
+#else
+ sigemptyset (&prevmask);
tv.tv_sec = sec;
tv.tv_usec = usec;
+#endif /* !HAVE_PSELECT */
+
+ do
+ {
+#if defined (HAVE_PSELECT)
+ r = pselect(0, (fd_set *)0, (fd_set *)0, (fd_set *)0, &ts, &blocked_sigs);
+#else
+ sigprocmask (SIG_SETMASK, &blocked_sigs, &prevmask);
+ r = select(0, (fd_set *)0, (fd_set *)0, (fd_set *)0, &tv);
+ sigprocmask (SIG_SETMASK, &prevmask, NULL);
+#endif
+ e = errno;
+ if (r < 0 && errno == EINTR)
+ QUIT; /* just signals, no traps */
+ errno = e;
+ }
+ while (r < 0 && errno == EINTR);
- return select(0, (fd_set *)0, (fd_set *)0, (fd_set *)0, &tv);
+ return r;
}
#else /* !HAVE_TIMEVAL || !HAVE_SELECT */
int
diff --git a/lib/sh/unicode.c b/lib/sh/unicode.c
index b58eaefd..fe13c4a0 100644
--- a/lib/sh/unicode.c
+++ b/lib/sh/unicode.c
@@ -1,6 +1,6 @@
/* unicode.c - functions to convert unicode characters */
-/* Copyright (C) 2010-2015 Free Software Foundation, Inc.
+/* Copyright (C) 2010-2016 Free Software Foundation, Inc.
This file is part of GNU Bash, the Bourne Again SHell.
@@ -55,6 +55,8 @@ extern const char *locale_charset __P((void));
extern char *get_locale_var __P((char *));
#endif
+extern int locale_utf8locale;
+
static int u32init = 0;
static int utf8locale = 0;
#if defined (HAVE_ICONV)
@@ -219,12 +221,12 @@ u32toutf16 (c, s)
int l;
l = 0;
- if (c < 0x0d800)
+ if (c < 0x0d800 || (c >= 0x0e000 && c <= 0x0ffff))
{
s[0] = (unsigned short) (c & 0xFFFF);
l = 1;
}
- else if (c >= 0x0e000 && c <= 0x010ffff)
+ else if (c >= 0x10000 && c <= 0x010ffff)
{
c -= 0x010000;
s[0] = (unsigned short)((c >> 10) + 0xd800);
@@ -265,28 +267,21 @@ u32cconv (c, s)
return n;
#endif
-#if HAVE_NL_LANGINFO
- codeset = nl_langinfo (CODESET);
- if (STREQ (codeset, "UTF-8"))
- {
- n = u32toutf8 (c, s);
- return n;
- }
-#endif
-
#if HAVE_ICONV
/* this is mostly from coreutils-8.5/lib/unicodeio.c */
if (u32init == 0)
{
-# if HAVE_LOCALE_CHARSET
- charset = locale_charset (); /* XXX - fix later */
-# else
- charset = stub_charset ();
-# endif
- if (STREQ (charset, "UTF-8"))
- utf8locale = 1;
- else
+ utf8locale = locale_utf8locale;
+ localconv = (iconv_t)-1;
+ if (utf8locale == 0)
{
+#if HAVE_LOCALE_CHARSET
+ charset = locale_charset ();
+#elif HAVE_NL_LANGINFO
+ charset = nl_langinfo (CODESET);
+#else
+ charset = stub_charset ();
+#endif
localconv = iconv_open (charset, "UTF-8");
if (localconv == (iconv_t)-1)
/* We assume ASCII when presented with an unknown encoding. */
@@ -295,6 +290,8 @@ u32cconv (c, s)
u32init = 1;
}
+ /* NL_LANGINFO and locale_charset used when setting locale_utf8locale */
+
/* If we have a UTF-8 locale, convert to UTF-8 and return converted value. */
n = u32toutf8 (c, s);
if (utf8locale)
@@ -315,12 +312,8 @@ u32cconv (c, s)
if (iconv (localconv, (ICONV_CONST char **)&iptr, &sn, &optr, &obytesleft) == (size_t)-1)
{
-#if 1
/* You get ISO C99 escape sequences if iconv fails */
n = u32tocesc (c, s);
-#else
- /* You get UTF-8 if iconv fails */
-#endif
return n;
}
@@ -332,7 +325,10 @@ u32cconv (c, s)
return (optr - obuf);
#endif /* HAVE_ICONV */
- n = u32tocesc (c, s); /* fallback is ISO C99 escape sequences */
+ if (locale_utf8locale)
+ n = u32toutf8 (c, s);
+ else
+ n = u32tocesc (c, s); /* fallback is ISO C99 escape sequences */
return n;
}
#else
diff --git a/lib/sh/utf8.c b/lib/sh/utf8.c
new file mode 100644
index 00000000..d27fcf54
--- /dev/null
+++ b/lib/sh/utf8.c
@@ -0,0 +1,147 @@
+/* utf8.c - UTF-8 character handling functions */
+
+/* Copyright (C) 2018 Free Software Foundation, Inc.
+
+ This file is part of GNU Bash, the Bourne Again SHell.
+
+ Bash is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Bash is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Bash. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <config.h>
+
+#ifdef HAVE_STDLIB_H
+# include <stdlib.h>
+#endif
+
+#include "bashansi.h"
+#include "shmbutil.h"
+
+extern int locale_mb_cur_max;
+extern int locale_utf8locale;
+
+#if defined (HANDLE_MULTIBYTE)
+
+char *
+utf8_mbschr (s, c)
+ const char *s;
+ int c;
+{
+ return strchr (s, c); /* for now */
+}
+
+int
+utf8_mbscmp (s1, s2)
+ const char *s1, *s2;
+{
+ /* Use the fact that the UTF-8 encoding preserves lexicographic order. */
+ return strcmp (s1, s2);
+}
+
+char *
+utf8_mbsmbchar (str)
+ const char *str;
+{
+ register char *s;
+
+ for (s = (char *)str; *s; s++)
+ if ((*s & 0xc0) == 0x80)
+ return s;
+ return (0);
+}
+
+int
+utf8_mbsnlen(src, srclen, maxlen)
+ const char *src;
+ size_t srclen;
+ int maxlen;
+{
+ register int sind, count;
+
+ for (sind = count = 0; src[sind] && sind <= maxlen; sind++)
+ {
+ if ((src[sind] & 0xc0) != 0x80)
+ count++;
+ }
+ return (count);
+}
+
+/* Adapted from GNU gnulib */
+int
+utf8_mblen (s, n)
+ const char *s;
+ size_t n;
+{
+ unsigned char c, c1;
+
+ if (s == 0)
+ return (0); /* no shift states */
+ if (n <= 0)
+ return (-1);
+
+ c = (unsigned char)*s;
+ if (c < 0x80)
+ return (c != 0);
+ if (c >= 0xc2)
+ {
+ c1 = (unsigned char)s[1];
+ if (c < 0xe0)
+ {
+ if (n >= 2 && (s[1] ^ 0x80) < 0x40)
+ return 2;
+ }
+ else if (c < 0xf0)
+ {
+ if (n >= 3
+ && (s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (c >= 0xe1 || c1 >= 0xa0)
+ && (c != 0xed || c1 < 0xa0))
+ return 3;
+ }
+ else if (c < 0xf8)
+ {
+ if (n >= 4
+ && (s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40
+ && (c >= 0xf1 || c1 >= 0x90)
+ && (c < 0xf4 || (c == 0xf4 && c1 < 0x90)))
+ return 4;
+ }
+ }
+ /* invalid or incomplete multibyte character */
+ return -1;
+}
+
+/* We can optimize this if we know the locale is UTF-8, but needs to handle
+ malformed byte sequences. */
+size_t
+utf8_mbstrlen(s)
+ const char *s;
+{
+ size_t clen, nc;
+ int mb_cur_max;
+
+ nc = 0;
+ mb_cur_max = MB_CUR_MAX;
+ while (*s && (clen = (size_t)utf8_mblen(s, mb_cur_max)) != 0)
+ {
+ if (MB_INVALIDCH(clen))
+ clen = 1; /* assume single byte */
+
+ s += clen;
+ nc++;
+ }
+ return nc;
+}
+
+#endif
diff --git a/lib/sh/zread.c b/lib/sh/zread.c
index 496f20b8..8b7ecedf 100644
--- a/lib/sh/zread.c
+++ b/lib/sh/zread.c
@@ -1,6 +1,6 @@
/* zread - read data from file descriptor into buffer with retries */
-/* Copyright (C) 1999-2002 Free Software Foundation, Inc.
+/* Copyright (C) 1999-2017 Free Software Foundation, Inc.
This file is part of GNU Bash, the Bourne Again SHell.
@@ -53,6 +53,7 @@ zread (fd, buf, len)
{
ssize_t r;
+ check_signals (); /* check for signals before a blocking read */
while ((r = read (fd, buf, len)) < 0 && errno == EINTR)
/* XXX - bash-5.0 */
/* We check executing_builtin and run traps here for backwards compatibility */
@@ -103,6 +104,7 @@ zreadintr (fd, buf, len)
char *buf;
size_t len;
{
+ check_signals ();
return (read (fd, buf, len));
}