diff options
author | Chet Ramey <chet.ramey@case.edu> | 2019-01-07 09:27:52 -0500 |
---|---|---|
committer | Chet Ramey <chet.ramey@case.edu> | 2019-01-07 09:27:52 -0500 |
commit | d233b485e83c3a784b803fb894280773f16f2deb (patch) | |
tree | 16d51f3ccca2d4ad2d8f2da564d68ca848de595b /lib/sh | |
parent | 64447609994bfddeef1061948022c074093e9a9f (diff) | |
download | bash-d233b485e83c3a784b803fb894280773f16f2deb.tar.gz |
bash-5.0 distribution sources and documentationbash-5.0
Diffstat (limited to 'lib/sh')
-rw-r--r-- | lib/sh/Makefile.in | 15 | ||||
-rw-r--r-- | lib/sh/casemod.c | 5 | ||||
-rw-r--r-- | lib/sh/clock.c | 10 | ||||
-rw-r--r-- | lib/sh/eaccess.c | 4 | ||||
-rw-r--r-- | lib/sh/getenv.c | 2 | ||||
-rw-r--r-- | lib/sh/mbschr.c | 6 | ||||
-rw-r--r-- | lib/sh/mbscmp.c | 6 | ||||
-rw-r--r-- | lib/sh/pathcanon.c | 2 | ||||
-rw-r--r-- | lib/sh/pathphys.c | 2 | ||||
-rw-r--r-- | lib/sh/shmbchar.c | 25 | ||||
-rw-r--r-- | lib/sh/shquote.c | 5 | ||||
-rw-r--r-- | lib/sh/snprintf.c | 2 | ||||
-rw-r--r-- | lib/sh/strcasecmp.c | 2 | ||||
-rw-r--r-- | lib/sh/strtrans.c | 4 | ||||
-rw-r--r-- | lib/sh/timeval.c | 9 | ||||
-rw-r--r-- | lib/sh/ufuncs.c | 45 | ||||
-rw-r--r-- | lib/sh/unicode.c | 46 | ||||
-rw-r--r-- | lib/sh/utf8.c | 147 | ||||
-rw-r--r-- | lib/sh/zread.c | 4 |
19 files changed, 292 insertions, 49 deletions
diff --git a/lib/sh/Makefile.in b/lib/sh/Makefile.in index 2ca921b3..06917ba4 100644 --- a/lib/sh/Makefile.in +++ b/lib/sh/Makefile.in @@ -67,8 +67,8 @@ LOCAL_DEFS = @LOCAL_DEFS@ INCLUDES = -I. -I../.. -I$(topdir) -I$(topdir)/lib -I$(BASHINCDIR) -I$(srcdir) $(INTL_INC) -CCFLAGS = ${PROFILE_FLAGS} ${INCLUDES} $(DEFS) $(LOCAL_DEFS) $(LOCAL_CFLAGS) \ - $(CFLAGS) $(CPPFLAGS) +CCFLAGS = ${ADDON_CFLAGS} ${PROFILE_FLAGS} ${INCLUDES} $(DEFS) $(LOCAL_DEFS) \ + $(LOCAL_CFLAGS) $(CFLAGS) $(CPPFLAGS) GCC_LINT_FLAGS = -Wall -Wshadow -Wpointer-arith -Wcast-qual \ -Wcast-align -Wstrict-prototypes -Wconversion \ @@ -92,7 +92,8 @@ CSOURCES = clktck.c clock.c getcwd.c getenv.c oslib.c setlinebuf.c \ mktime.c strftime.c mbschr.c zcatfd.c zmapfd.c winsize.c eaccess.c \ wcsdup.c fpurge.c zgetline.c mbscmp.c uconvert.c ufuncs.c \ casemod.c dprintf.c input_avail.c mbscasecmp.c fnxform.c \ - strchrnul.c unicode.c wcswidth.c wcsnwidth.c shmbchar.c strdup.c + strchrnul.c unicode.c wcswidth.c wcsnwidth.c shmbchar.c strdup.c \ + utf8.c # The header files for this library. HSOURCES = @@ -107,7 +108,7 @@ OBJECTS = clktck.o clock.o getenv.o oslib.o setlinebuf.o strnlen.o \ fmtullong.o fmtumax.o zcatfd.o zmapfd.o winsize.o wcsdup.o \ fpurge.o zgetline.o mbscmp.o uconvert.o ufuncs.o casemod.o \ input_avail.o mbscasecmp.o fnxform.o unicode.o shmbchar.o \ - wcsnwidth.o ${LIBOBJS} + utf8.o wcsnwidth.o ${LIBOBJS} SUPPORT = Makefile @@ -200,6 +201,7 @@ tmpfile.o: tmpfile.c uconvert.o: uconvert.c ufuncs.o: ufuncs.c unicode.o: unicode.c +utf8.o: utf8.c vprint.o: vprint.c wcsdup.o: wcsdup.c wcsnwidth.o: wcsnwidth.c @@ -277,6 +279,7 @@ tmpfile.o: ${BUILD_DIR}/config.h uconvert.o: ${BUILD_DIR}/config.h ufuncs.o: ${BUILD_DIR}/config.h unicode.o: ${BUILD_DIR}/config.h +utf8.o: ${BUILD_DIR}/config.h vprint.o: ${BUILD_DIR}/config.h wcsdup.o: ${BUILD_DIR}/config.h wcsnwidth.o: ${BUILD_DIR}/config.h @@ -612,6 +615,10 @@ unicode.o: ${topdir}/bashansi.h ${BASHINCDIR}/ansi_stdlib.h unicode.o: ${BASHINCDIR}/stdc.h unicode.o: ${topdir}/xmalloc.h +utf8.o: ${topdir}/bashansi.h +utf8.o: ${BASHINCDIR}/ansi_stdlib.h +utf8.o: ${BASHINCDIR}/shmbutil.h ${BASHINCDIR}/shmbchar.h + winsize.o: ${BASHINCDIR}/stdc.h winsize.o: ${topdir}/xmalloc.h winsize.o: ${topdir}/bashtypes.h diff --git a/lib/sh/casemod.c b/lib/sh/casemod.c index f68303bc..7cdd4178 100644 --- a/lib/sh/casemod.c +++ b/lib/sh/casemod.c @@ -229,7 +229,10 @@ singlebyte: else { m = mbrtowc (&wc, string + start, end - start, &state); - if (MB_INVALIDCH (m) || m == 1) + /* Have to go through wide case conversion even for single-byte + chars, to accommodate single-byte characters where the + corresponding upper or lower case equivalent is multibyte. */ + if (MB_INVALIDCH (m)) { wc = (unsigned char)string[start]; goto singlebyte; diff --git a/lib/sh/clock.c b/lib/sh/clock.c index 84cdbc54..c6c52bf8 100644 --- a/lib/sh/clock.c +++ b/lib/sh/clock.c @@ -32,7 +32,13 @@ #include <stdio.h> #include <stdc.h> -extern long get_clk_tck __P((void)); +#include <bashintl.h> + +#ifndef locale_decpoint +extern int locale_decpoint PARAMS((void)); +#endif + +extern long get_clk_tck PARAMS((void)); void clock_t_to_secs (t, sp, sfp) @@ -76,6 +82,6 @@ print_clock_t (fp, t) minutes = timestamp / 60; seconds = timestamp % 60; - fprintf (fp, "%ldm%d.%03ds", minutes, seconds, seconds_fraction); + fprintf (fp, "%ldm%d%c%03ds", minutes, seconds, locale_decpoint(), seconds_fraction); } #endif /* HAVE_TIMES */ diff --git a/lib/sh/eaccess.c b/lib/sh/eaccess.c index 8fd8a43e..3d8ae4e8 100644 --- a/lib/sh/eaccess.c +++ b/lib/sh/eaccess.c @@ -91,7 +91,9 @@ sh_stat (path, finfo) } if (path[0] == '/' && path[1] == 'd' && strncmp (path, "/dev/fd/", 8) == 0) { -#if !defined (HAVE_DEV_FD) + /* If stating /dev/fd/n doesn't produce the same results as fstat of + FD N, then define DEV_FD_STAT_BROKEN */ +#if !defined (HAVE_DEV_FD) || defined (DEV_FD_STAT_BROKEN) intmax_t fd; int r; diff --git a/lib/sh/getenv.c b/lib/sh/getenv.c index 8b5e3406..1e682aef 100644 --- a/lib/sh/getenv.c +++ b/lib/sh/getenv.c @@ -69,7 +69,7 @@ getenv (name) if (var && exported_p (var)) return (value_cell (var)); } - else + else if (environ) { register int i, len; diff --git a/lib/sh/mbschr.c b/lib/sh/mbschr.c index 7730e46e..639962d4 100644 --- a/lib/sh/mbschr.c +++ b/lib/sh/mbschr.c @@ -28,9 +28,12 @@ #include "shmbutil.h" extern int locale_mb_cur_max; +extern int locale_utf8locale; #undef mbschr +extern char *utf8_mbschr (const char *, int); /* XXX */ + /* In some locales, the non-first byte of some multibyte characters have the same value as some ascii character. Faced with these strings, a legacy strchr() might return the wrong value. */ @@ -49,6 +52,9 @@ mbschr (s, c) mbstate_t state; size_t strlength, mblength; + if (locale_utf8locale && c < 0x80) + return (utf8_mbschr (s, c)); /* XXX */ + /* The locale encodings with said weird property are BIG5, BIG5-HKSCS, GBK, GB18030, SHIFT_JIS, and JOHAB. They exhibit the problem only when c >= 0x30. We can therefore use the faster bytewise search if diff --git a/lib/sh/mbscmp.c b/lib/sh/mbscmp.c index aaf81f5e..c7c84435 100644 --- a/lib/sh/mbscmp.c +++ b/lib/sh/mbscmp.c @@ -1,6 +1,6 @@ /* mbscmp - multibyte string comparison. */ -/* Copyright (C) 1995-2015 Free Software Foundation, Inc. +/* Copyright (C) 1995-2018 Free Software Foundation, Inc. This file is part of GNU Bash, the Bourne Again SHell. @@ -26,6 +26,10 @@ #include <stddef.h> #include <string.h> +extern int locale_utf8locale; + +extern int utf8_mbscmp (const char *, const char *); + /* Compare MBS1 and MBS2. */ int mbscmp (mbs1, mbs2) diff --git a/lib/sh/pathcanon.c b/lib/sh/pathcanon.c index f19bd55f..f9506dff 100644 --- a/lib/sh/pathcanon.c +++ b/lib/sh/pathcanon.c @@ -227,7 +227,7 @@ sh_canonpath (path, flags) if (result[2] == '\0') /* short-circuit for bare `//' */ result[1] = '\0'; else - strcpy (result, result + 1); + memmove (result, result + 1, strlen (result + 1) + 1); } return (result); diff --git a/lib/sh/pathphys.c b/lib/sh/pathphys.c index 26016b76..99390cef 100644 --- a/lib/sh/pathphys.c +++ b/lib/sh/pathphys.c @@ -245,7 +245,7 @@ error: if (result[2] == '\0') /* short-circuit for bare `//' */ result[1] = '\0'; else - strcpy (result, result + 1); + memmove (result, result + 1, strlen (result + 1) + 1); } return (result); diff --git a/lib/sh/shmbchar.c b/lib/sh/shmbchar.c index 7f14208e..f2f2582b 100644 --- a/lib/sh/shmbchar.c +++ b/lib/sh/shmbchar.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2001, 2006, 2009, 2010, 2012, 2015 Free Software Foundation, Inc. +/* Copyright (C) 2001, 2006, 2009, 2010, 2012, 2015-2018 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,9 +20,15 @@ #include <stdlib.h> #include <limits.h> +#include <errno.h> + #include <shmbutil.h> #include <shmbchar.h> +#ifndef errno +extern int errno; +#endif + #if IS_BASIC_ASCII /* Bit table of characters in the ISO C "basic character set". */ @@ -37,6 +43,13 @@ const unsigned int is_basic_table [UCHAR_MAX / 32 + 1] = #endif /* IS_BASIC_ASCII */ +extern int locale_utf8locale; + +extern char *utf8_mbsmbchar (const char *); +extern int utf8_mblen (const char *, size_t); + +/* Count the number of characters in S, counting multi-byte characters as a + single character. */ size_t mbstrlen (s) const char *s; @@ -65,6 +78,8 @@ mbstrlen (s) } /* Return pointer to first multibyte char in S, or NULL if none. */ +/* XXX - if we know that the locale is UTF-8, we can just check whether or + not any byte has the eighth bit turned on */ char * mbsmbchar (s) const char *s; @@ -74,13 +89,19 @@ mbsmbchar (s) mbstate_t mbs = { 0 }; int mb_cur_max; + if (locale_utf8locale) + return (utf8_mbsmbchar (s)); /* XXX */ + mb_cur_max = MB_CUR_MAX; for (t = (char *)s; *t; t++) { if (is_basic (*t)) continue; - clen = mbrlen (t, mb_cur_max, &mbs); + if (locale_utf8locale) /* not used if above code active */ + clen = utf8_mblen (t, mb_cur_max); + else + clen = mbrlen (t, mb_cur_max, &mbs); if (clen == 0) return 0; diff --git a/lib/sh/shquote.c b/lib/sh/shquote.c index ecec5971..97e2bc53 100644 --- a/lib/sh/shquote.c +++ b/lib/sh/shquote.c @@ -228,7 +228,8 @@ sh_un_double_quote (string) going through the shell parser, which will protect the internal quoting characters. TABLE, if set, points to a map of the ascii code set with char needing to be backslash-quoted if table[char]==1. FLAGS, - if 1, causes tildes to be quoted as well. */ + if 1, causes tildes to be quoted as well. If FLAGS&2, backslash-quote + other shell blank characters. */ char * sh_backslash_quote (string, table, flags) @@ -273,6 +274,8 @@ sh_backslash_quote (string, table, flags) /* Tildes are special at the start of a word or after a `:' or `=' (technically unquoted, but it doesn't make a difference in practice) */ *r++ = '\\'; + else if ((flags&2) && shellblank((unsigned char)c)) + *r++ = '\\'; *r++ = c; } diff --git a/lib/sh/snprintf.c b/lib/sh/snprintf.c index 87ca2173..6e5892ee 100644 --- a/lib/sh/snprintf.c +++ b/lib/sh/snprintf.c @@ -142,9 +142,11 @@ extern char *fmtullong __P((unsigned long long int, int, char *, size_t, int)); 302 / 1000 is log10 (2) rounded up; add one for integer division truncation; add one more for a minus sign if t is signed. */ +#ifndef INT_STRLEN_BOUND #define INT_STRLEN_BOUND(t) \ ((sizeof (t) * CHAR_BIT - TYPE_SIGNED (t)) * 302 / 1000 \ + 1 + TYPE_SIGNED (t)) +#endif /* conversion flags */ #define PF_ALTFORM 0x00001 /* # */ diff --git a/lib/sh/strcasecmp.c b/lib/sh/strcasecmp.c index 5542f715..70d0551a 100644 --- a/lib/sh/strcasecmp.c +++ b/lib/sh/strcasecmp.c @@ -32,7 +32,7 @@ int strncasecmp (string1, string2, count) const char *string1; const char *string2; - int count; + size_t count; { register const char *s1; register const char *s2; diff --git a/lib/sh/strtrans.c b/lib/sh/strtrans.c index 79831476..48f255f5 100644 --- a/lib/sh/strtrans.c +++ b/lib/sh/strtrans.c @@ -230,8 +230,6 @@ ansic_quote (str, flags, rlen) *r++ = '$'; *r++ = '\''; - s = str; - for (s = str; c = *s; s++) { b = l = 1; /* 1 == add backslash; 0 == no backslash */ @@ -305,11 +303,9 @@ ansic_wshouldquote (string) { const wchar_t *wcs; wchar_t wcc; - wchar_t *wcstr = NULL; size_t slen; - slen = mbstowcs (wcstr, string, 0); if (slen == (size_t)-1) diff --git a/lib/sh/timeval.c b/lib/sh/timeval.c index 7bd9df8f..c4b61dc8 100644 --- a/lib/sh/timeval.c +++ b/lib/sh/timeval.c @@ -25,6 +25,13 @@ #include <sys/types.h> #include <posixtime.h> +#include <bashintl.h> +#include <stdc.h> + +#ifndef locale_decpoint +extern int locale_decpoint PARAMS((void)); +#endif + #include <stdio.h> struct timeval * @@ -140,6 +147,6 @@ print_timeval (fp, tvp) minutes = timestamp / 60; seconds = timestamp % 60; - fprintf (fp, "%ldm%d.%03ds", minutes, seconds, seconds_fraction); + fprintf (fp, "%ldm%d%c%03ds", minutes, seconds, locale_decpoint (), seconds_fraction); } #endif /* HAVE_TIMEVAL */ diff --git a/lib/sh/ufuncs.c b/lib/sh/ufuncs.c index fcd4c044..ad9284cc 100644 --- a/lib/sh/ufuncs.c +++ b/lib/sh/ufuncs.c @@ -37,8 +37,16 @@ #include <unistd.h> #endif +#include <errno.h> +#if !defined (errno) +extern int errno; +#endif /* !errno */ + #if defined (HAVE_SELECT) # include "posixselect.h" +# include "quit.h" +# include "trap.h" +# include "stat-time.h" #endif /* A version of `alarm' using setitimer if it's available. */ @@ -84,17 +92,50 @@ falarm (secs, usecs) /* A version of sleep using fractional seconds and select. I'd like to use `usleep', but it's already taken */ -#if defined (HAVE_TIMEVAL) && defined (HAVE_SELECT) +#if defined (HAVE_TIMEVAL) && (defined (HAVE_SELECT) || defined (HAVE_PSELECT)) int fsleep(sec, usec) unsigned int sec, usec; { + int e, r; + sigset_t blocked_sigs, prevmask; +#if defined (HAVE_PSELECT) + struct timespec ts; +#else struct timeval tv; +#endif + sigemptyset (&blocked_sigs); +# if defined (SIGCHLD) + sigaddset (&blocked_sigs, SIGCHLD); +# endif + +#if defined (HAVE_PSELECT) + ts.tv_sec = sec; + ts.tv_nsec = usec * 1000; +#else + sigemptyset (&prevmask); tv.tv_sec = sec; tv.tv_usec = usec; +#endif /* !HAVE_PSELECT */ + + do + { +#if defined (HAVE_PSELECT) + r = pselect(0, (fd_set *)0, (fd_set *)0, (fd_set *)0, &ts, &blocked_sigs); +#else + sigprocmask (SIG_SETMASK, &blocked_sigs, &prevmask); + r = select(0, (fd_set *)0, (fd_set *)0, (fd_set *)0, &tv); + sigprocmask (SIG_SETMASK, &prevmask, NULL); +#endif + e = errno; + if (r < 0 && errno == EINTR) + QUIT; /* just signals, no traps */ + errno = e; + } + while (r < 0 && errno == EINTR); - return select(0, (fd_set *)0, (fd_set *)0, (fd_set *)0, &tv); + return r; } #else /* !HAVE_TIMEVAL || !HAVE_SELECT */ int diff --git a/lib/sh/unicode.c b/lib/sh/unicode.c index b58eaefd..fe13c4a0 100644 --- a/lib/sh/unicode.c +++ b/lib/sh/unicode.c @@ -1,6 +1,6 @@ /* unicode.c - functions to convert unicode characters */ -/* Copyright (C) 2010-2015 Free Software Foundation, Inc. +/* Copyright (C) 2010-2016 Free Software Foundation, Inc. This file is part of GNU Bash, the Bourne Again SHell. @@ -55,6 +55,8 @@ extern const char *locale_charset __P((void)); extern char *get_locale_var __P((char *)); #endif +extern int locale_utf8locale; + static int u32init = 0; static int utf8locale = 0; #if defined (HAVE_ICONV) @@ -219,12 +221,12 @@ u32toutf16 (c, s) int l; l = 0; - if (c < 0x0d800) + if (c < 0x0d800 || (c >= 0x0e000 && c <= 0x0ffff)) { s[0] = (unsigned short) (c & 0xFFFF); l = 1; } - else if (c >= 0x0e000 && c <= 0x010ffff) + else if (c >= 0x10000 && c <= 0x010ffff) { c -= 0x010000; s[0] = (unsigned short)((c >> 10) + 0xd800); @@ -265,28 +267,21 @@ u32cconv (c, s) return n; #endif -#if HAVE_NL_LANGINFO - codeset = nl_langinfo (CODESET); - if (STREQ (codeset, "UTF-8")) - { - n = u32toutf8 (c, s); - return n; - } -#endif - #if HAVE_ICONV /* this is mostly from coreutils-8.5/lib/unicodeio.c */ if (u32init == 0) { -# if HAVE_LOCALE_CHARSET - charset = locale_charset (); /* XXX - fix later */ -# else - charset = stub_charset (); -# endif - if (STREQ (charset, "UTF-8")) - utf8locale = 1; - else + utf8locale = locale_utf8locale; + localconv = (iconv_t)-1; + if (utf8locale == 0) { +#if HAVE_LOCALE_CHARSET + charset = locale_charset (); +#elif HAVE_NL_LANGINFO + charset = nl_langinfo (CODESET); +#else + charset = stub_charset (); +#endif localconv = iconv_open (charset, "UTF-8"); if (localconv == (iconv_t)-1) /* We assume ASCII when presented with an unknown encoding. */ @@ -295,6 +290,8 @@ u32cconv (c, s) u32init = 1; } + /* NL_LANGINFO and locale_charset used when setting locale_utf8locale */ + /* If we have a UTF-8 locale, convert to UTF-8 and return converted value. */ n = u32toutf8 (c, s); if (utf8locale) @@ -315,12 +312,8 @@ u32cconv (c, s) if (iconv (localconv, (ICONV_CONST char **)&iptr, &sn, &optr, &obytesleft) == (size_t)-1) { -#if 1 /* You get ISO C99 escape sequences if iconv fails */ n = u32tocesc (c, s); -#else - /* You get UTF-8 if iconv fails */ -#endif return n; } @@ -332,7 +325,10 @@ u32cconv (c, s) return (optr - obuf); #endif /* HAVE_ICONV */ - n = u32tocesc (c, s); /* fallback is ISO C99 escape sequences */ + if (locale_utf8locale) + n = u32toutf8 (c, s); + else + n = u32tocesc (c, s); /* fallback is ISO C99 escape sequences */ return n; } #else diff --git a/lib/sh/utf8.c b/lib/sh/utf8.c new file mode 100644 index 00000000..d27fcf54 --- /dev/null +++ b/lib/sh/utf8.c @@ -0,0 +1,147 @@ +/* utf8.c - UTF-8 character handling functions */ + +/* Copyright (C) 2018 Free Software Foundation, Inc. + + This file is part of GNU Bash, the Bourne Again SHell. + + Bash is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bash is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Bash. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <config.h> + +#ifdef HAVE_STDLIB_H +# include <stdlib.h> +#endif + +#include "bashansi.h" +#include "shmbutil.h" + +extern int locale_mb_cur_max; +extern int locale_utf8locale; + +#if defined (HANDLE_MULTIBYTE) + +char * +utf8_mbschr (s, c) + const char *s; + int c; +{ + return strchr (s, c); /* for now */ +} + +int +utf8_mbscmp (s1, s2) + const char *s1, *s2; +{ + /* Use the fact that the UTF-8 encoding preserves lexicographic order. */ + return strcmp (s1, s2); +} + +char * +utf8_mbsmbchar (str) + const char *str; +{ + register char *s; + + for (s = (char *)str; *s; s++) + if ((*s & 0xc0) == 0x80) + return s; + return (0); +} + +int +utf8_mbsnlen(src, srclen, maxlen) + const char *src; + size_t srclen; + int maxlen; +{ + register int sind, count; + + for (sind = count = 0; src[sind] && sind <= maxlen; sind++) + { + if ((src[sind] & 0xc0) != 0x80) + count++; + } + return (count); +} + +/* Adapted from GNU gnulib */ +int +utf8_mblen (s, n) + const char *s; + size_t n; +{ + unsigned char c, c1; + + if (s == 0) + return (0); /* no shift states */ + if (n <= 0) + return (-1); + + c = (unsigned char)*s; + if (c < 0x80) + return (c != 0); + if (c >= 0xc2) + { + c1 = (unsigned char)s[1]; + if (c < 0xe0) + { + if (n >= 2 && (s[1] ^ 0x80) < 0x40) + return 2; + } + else if (c < 0xf0) + { + if (n >= 3 + && (s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 + && (c >= 0xe1 || c1 >= 0xa0) + && (c != 0xed || c1 < 0xa0)) + return 3; + } + else if (c < 0xf8) + { + if (n >= 4 + && (s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 + && (s[3] ^ 0x80) < 0x40 + && (c >= 0xf1 || c1 >= 0x90) + && (c < 0xf4 || (c == 0xf4 && c1 < 0x90))) + return 4; + } + } + /* invalid or incomplete multibyte character */ + return -1; +} + +/* We can optimize this if we know the locale is UTF-8, but needs to handle + malformed byte sequences. */ +size_t +utf8_mbstrlen(s) + const char *s; +{ + size_t clen, nc; + int mb_cur_max; + + nc = 0; + mb_cur_max = MB_CUR_MAX; + while (*s && (clen = (size_t)utf8_mblen(s, mb_cur_max)) != 0) + { + if (MB_INVALIDCH(clen)) + clen = 1; /* assume single byte */ + + s += clen; + nc++; + } + return nc; +} + +#endif diff --git a/lib/sh/zread.c b/lib/sh/zread.c index 496f20b8..8b7ecedf 100644 --- a/lib/sh/zread.c +++ b/lib/sh/zread.c @@ -1,6 +1,6 @@ /* zread - read data from file descriptor into buffer with retries */ -/* Copyright (C) 1999-2002 Free Software Foundation, Inc. +/* Copyright (C) 1999-2017 Free Software Foundation, Inc. This file is part of GNU Bash, the Bourne Again SHell. @@ -53,6 +53,7 @@ zread (fd, buf, len) { ssize_t r; + check_signals (); /* check for signals before a blocking read */ while ((r = read (fd, buf, len)) < 0 && errno == EINTR) /* XXX - bash-5.0 */ /* We check executing_builtin and run traps here for backwards compatibility */ @@ -103,6 +104,7 @@ zreadintr (fd, buf, len) char *buf; size_t len; { + check_signals (); return (read (fd, buf, len)); } |