diff options
-rw-r--r-- | ChangeLog | 26 | ||||
-rw-r--r-- | lib/c-ctype.c | 253 | ||||
-rw-r--r-- | tests/test-c-ctype.c | 106 |
3 files changed, 216 insertions, 169 deletions
@@ -1,3 +1,29 @@ +2015-09-22 Paul Eggert <eggert@cs.ucla.edu> + + c-ctype: port better to EBCDIC + Problems reported by Daniel Richard G. in + http://lists.gnu.org/archive/html/bug-gnulib/2015-09/msg00020.html + * lib/c-ctype.c: Include <limits.h>, for CHAR_MIN and CHAR_MAX. + Include "verify.h". + (C_CTYPE_ASCII, C_CTYPE_CONSECUTIVE_DIGITS) + (C_CTYPE_CONSECUTIVE_LOWERCASE, C_CTYPE_CONSECUTIVE_UPPERCASE): + Define as enum constants with value false, if not defined, so that + code can use 'if' instead of 'ifdef'. Using 'if' helps make the + code more portable, as both branches of the 'if' are compiled on + all platforms. + (C_CTYPE_EBCDIC): New constant. + (to_char): New static function. + (c_isalnum, c_isalpha, c_isdigit, c_islower, c_isgraph, c_isprint) + (c_ispunct, c_isupper, c_isxdigit, c_tolower, c_toupper): + Rewrite to use 'if' instead of 'ifdef'. + Use to_char if non-ASCII. Prefer <= to >=. + Prefer true and false to 1 and 0, for booleans. + (c_iscntrl): Use 'if', not 'ifdef'. Special case for EBCDIC. + Verify that the character set is either ASCII or EBCDIC. + * tests/test-c-ctype.c: Include <limits.h>, for CHAR_MIN + (to_char): New function. + (test_all): Port to EBCDIC. Add some more tests, e.g., for c_ispunct. + 2015-09-21 Pádraig Brady <P@draigBrady.com> nanosleep: fix return code for interrupted replacement diff --git a/lib/c-ctype.c b/lib/c-ctype.c index 6635d34ecf..916d46eb30 100644 --- a/lib/c-ctype.c +++ b/lib/c-ctype.c @@ -21,6 +21,34 @@ along with this program; if not, see <http://www.gnu.org/licenses/>. */ #define NO_C_CTYPE_MACROS #include "c-ctype.h" +#include <limits.h> +#include "verify.h" + +#ifndef C_CTYPE_ASCII +enum { C_CTYPE_ASCII = false }; +#endif +#ifndef C_CTYPE_CONSECUTIVE_DIGITS +enum { C_CTYPE_CONSECUTIVE_DIGITS = false }; +#endif +#ifndef C_CTYPE_CONSECUTIVE_LOWERCASE +enum { C_CTYPE_CONSECUTIVE_LOWERCASE = false }; +#endif +#ifndef C_CTYPE_CONSECUTIVE_UPPERCASE +enum { C_CTYPE_CONSECUTIVE_UPPERCASE = false }; +#endif + +/* Convert an int, which may be promoted from either an unsigned or a + signed char, to the corresponding char. */ + +static char +to_char (int c) +{ + enum { nchars = CHAR_MAX - CHAR_MIN + 1 }; + if (CHAR_MIN < 0 && CHAR_MAX < c && c < nchars) + return c - nchars; + return c; +} + /* The function isascii is not locale dependent. Its use in EBCDIC is questionable. */ bool @@ -32,18 +60,20 @@ c_isascii (int c) bool c_isalnum (int c) { -#if C_CTYPE_CONSECUTIVE_DIGITS \ - && C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE -#if C_CTYPE_ASCII - return ((c >= '0' && c <= '9') - || ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'Z')); -#else - return ((c >= '0' && c <= '9') - || (c >= 'A' && c <= 'Z') - || (c >= 'a' && c <= 'z')); -#endif -#else - switch (c) + if (C_CTYPE_CONSECUTIVE_DIGITS + && C_CTYPE_CONSECUTIVE_UPPERCASE + && C_CTYPE_CONSECUTIVE_LOWERCASE) + { + if (C_CTYPE_ASCII) + return (('0' <= c && c <= '9') + || ('A' <= (c & ~0x20) && (c & ~0x20) <= 'Z')); + else + return (('0' <= c && c <= '9') + || ('A' <= c && c <= 'Z') + || ('a' <= c && c <= 'z')); + } + + switch (to_char (c)) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': @@ -57,24 +87,24 @@ c_isalnum (int c) case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': - return 1; + return true; default: - return 0; + return false; } -#endif } bool c_isalpha (int c) { -#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE -#if C_CTYPE_ASCII - return ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'Z'); -#else - return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')); -#endif -#else - switch (c) + if (C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE) + { + if (C_CTYPE_ASCII) + return 'A' <= (c & ~0x20) && (c & ~0x20) <= 'Z'; + else + return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z'); + } + + switch (to_char (c)) { case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': @@ -86,11 +116,10 @@ c_isalpha (int c) case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': - return 1; + return true; default: - return 0; + return false; } -#endif } bool @@ -102,81 +131,65 @@ c_isblank (int c) bool c_iscntrl (int c) { -#if C_CTYPE_ASCII - return ((c & ~0x1f) == 0 || c == 0x7f); -#else - switch (c) - { - case ' ': case '!': case '"': case '#': case '$': case '%': - case '&': case '\'': case '(': case ')': case '*': case '+': - case ',': case '-': case '.': case '/': - case '0': case '1': case '2': case '3': case '4': case '5': - case '6': case '7': case '8': case '9': - case ':': case ';': case '<': case '=': case '>': case '?': - case '@': - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': - case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': - case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': - case 'Y': case 'Z': - case '[': case '\\': case ']': case '^': case '_': case '`': - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': - case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': - case 's': case 't': case 'u': case 'v': case 'w': case 'x': - case 'y': case 'z': - case '{': case '|': case '}': case '~': - return 0; - default: - return 1; - } -#endif + enum { C_CTYPE_EBCDIC = (' ' == 64 && '0' == 240 + && 'A' == 193 && 'J' == 209 && 'S' == 226 + && 'A' == 129 && 'J' == 145 && 'S' == 162) }; + verify (C_CTYPE_ASCII || C_CTYPE_EBCDIC); + + if (0 <= c && c < ' ') + return true; + if (C_CTYPE_ASCII) + return c == 0x7f; + else + return c == 0xff || c == -1; } bool c_isdigit (int c) { -#if C_CTYPE_CONSECUTIVE_DIGITS - return (c >= '0' && c <= '9'); -#else + if (C_CTYPE_ASCII) + return '0' <= c && c <= '9'; + + c = to_char (c); + if (C_CTYPE_CONSECUTIVE_DIGITS) + return '0' <= c && c <= '9'; + switch (c) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': - return 1; + return true; default: - return 0; + return false; } -#endif } bool c_islower (int c) { -#if C_CTYPE_CONSECUTIVE_LOWERCASE - return (c >= 'a' && c <= 'z'); -#else - switch (c) + if (C_CTYPE_CONSECUTIVE_LOWERCASE) + return 'a' <= c && c <= 'z'; + + switch (to_char (c)) { case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': - return 1; + return true; default: - return 0; + return false; } -#endif } bool c_isgraph (int c) { -#if C_CTYPE_ASCII - return (c >= '!' && c <= '~'); -#else - switch (c) + if (C_CTYPE_ASCII) + return '!' <= c && c <= '~'; + + switch (to_char (c)) { case '!': case '"': case '#': case '$': case '%': case '&': case '\'': case '(': case ')': case '*': case '+': case ',': @@ -197,20 +210,19 @@ c_isgraph (int c) case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case '{': case '|': case '}': case '~': - return 1; + return true; default: - return 0; + return false; } -#endif } bool c_isprint (int c) { -#if C_CTYPE_ASCII - return (c >= ' ' && c <= '~'); -#else - switch (c) + if (C_CTYPE_ASCII) + return ' ' <= c && c <= '~'; + + switch (to_char (c)) { case ' ': case '!': case '"': case '#': case '$': case '%': case '&': case '\'': case '(': case ')': case '*': case '+': @@ -231,22 +243,21 @@ c_isprint (int c) case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case '{': case '|': case '}': case '~': - return 1; + return true; default: - return 0; + return false; } -#endif } bool c_ispunct (int c) { -#if C_CTYPE_ASCII - return ((c >= '!' && c <= '~') - && !((c >= '0' && c <= '9') - || ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'Z'))); -#else - switch (c) + if (C_CTYPE_ASCII) + return (('!' <= c && c <= '~') + && !(('0' <= c && c <= '9') + || ('A' <= (c & ~0x20) && (c & ~0x20) <= 'Z'))); + + switch (to_char (c)) { case '!': case '"': case '#': case '$': case '%': case '&': case '\'': case '(': case ')': case '*': case '+': case ',': @@ -255,11 +266,10 @@ c_ispunct (int c) case '@': case '[': case '\\': case ']': case '^': case '_': case '`': case '{': case '|': case '}': case '~': - return 1; + return true; default: - return 0; + return false; } -#endif } bool @@ -272,57 +282,56 @@ c_isspace (int c) bool c_isupper (int c) { -#if C_CTYPE_CONSECUTIVE_UPPERCASE - return (c >= 'A' && c <= 'Z'); -#else - switch (c) + if (C_CTYPE_CONSECUTIVE_UPPERCASE) + return 'A' <= c && c <= 'Z'; + + switch (to_char (c)) { case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': - return 1; + return true; default: - return 0; + return false; } -#endif } bool c_isxdigit (int c) { -#if C_CTYPE_CONSECUTIVE_DIGITS \ - && C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE -#if C_CTYPE_ASCII - return ((c >= '0' && c <= '9') - || ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'F')); -#else - return ((c >= '0' && c <= '9') - || (c >= 'A' && c <= 'F') - || (c >= 'a' && c <= 'f')); -#endif -#else - switch (c) + if (C_CTYPE_CONSECUTIVE_DIGITS + && C_CTYPE_CONSECUTIVE_UPPERCASE + && C_CTYPE_CONSECUTIVE_LOWERCASE) + { + if ('0' <= c && c <= '9') + return true; + if (C_CTYPE_ASCII) + return 'A' <= (c & ~0x20) && (c & ~0x20) <= 'F'; + return (('A' <= c && c <= 'F') + || ('a' <= c && c <= 'f')); + } + + switch (to_char (c)) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - return 1; + return true; default: - return 0; + return false; } -#endif } int c_tolower (int c) { -#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE - return (c >= 'A' && c <= 'Z' ? c - 'A' + 'a' : c); -#else - switch (c) + if (C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE) + return c_isupper (c) ? c - 'A' + 'a' : c; + + switch (to_char (c)) { case 'A': return 'a'; case 'B': return 'b'; @@ -352,16 +361,15 @@ c_tolower (int c) case 'Z': return 'z'; default: return c; } -#endif } int c_toupper (int c) { -#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE - return (c >= 'a' && c <= 'z' ? c - 'a' + 'A' : c); -#else - switch (c) + if (C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE) + return c_islower (c) ? c - 'a' + 'A' : c; + + switch (to_char (c)) { case 'a': return 'A'; case 'b': return 'B'; @@ -391,5 +399,4 @@ c_toupper (int c) case 'z': return 'Z'; default: return c; } -#endif } diff --git a/tests/test-c-ctype.c b/tests/test-c-ctype.c index 81fe936ec1..63d0af9581 100644 --- a/tests/test-c-ctype.c +++ b/tests/test-c-ctype.c @@ -20,10 +20,19 @@ #include "c-ctype.h" +#include <limits.h> #include <locale.h> #include "macros.h" +static char +to_char (int c) +{ + if (CHAR_MIN < 0 && CHAR_MAX < c) + return c - CHAR_MAX - 1 + CHAR_MIN; + return c; +} + static void test_all (void) { @@ -31,49 +40,32 @@ test_all (void) for (c = -0x80; c < 0x100; c++) { - ASSERT (c_isascii (c) == (c >= 0 && c < 0x80)); - - switch (c) + if (c < 0) { - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': - case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': - case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': - case 'Y': case 'Z': - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': - case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': - case 's': case 't': case 'u': case 'v': case 'w': case 'x': - case 'y': case 'z': - case '0': case '1': case '2': case '3': case '4': case '5': - case '6': case '7': case '8': case '9': - ASSERT (c_isalnum (c) == 1); - break; - default: - ASSERT (c_isalnum (c) == 0); - break; + ASSERT (c_isascii (c) == c_isascii (c + 0x100)); + ASSERT (c_isalnum (c) == c_isalnum (c + 0x100)); + ASSERT (c_isalpha (c) == c_isalpha (c + 0x100)); + ASSERT (c_isblank (c) == c_isblank (c + 0x100)); + ASSERT (c_iscntrl (c) == c_iscntrl (c + 0x100)); + ASSERT (c_isdigit (c) == c_isdigit (c + 0x100)); + ASSERT (c_islower (c) == c_islower (c + 0x100)); + ASSERT (c_isgraph (c) == c_isgraph (c + 0x100)); + ASSERT (c_isprint (c) == c_isprint (c + 0x100)); + ASSERT (c_ispunct (c) == c_ispunct (c + 0x100)); + ASSERT (c_isspace (c) == c_isspace (c + 0x100)); + ASSERT (c_isupper (c) == c_isupper (c + 0x100)); + ASSERT (c_isxdigit (c) == c_isxdigit (c + 0x100)); + ASSERT (to_char (c_tolower (c)) == to_char (c_tolower (c + 0x100))); + ASSERT (to_char (c_toupper (c)) == to_char (c_toupper (c + 0x100))); } - switch (c) - { - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': - case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': - case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': - case 'Y': case 'Z': - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': - case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': - case 's': case 't': case 'u': case 'v': case 'w': case 'x': - case 'y': case 'z': - ASSERT (c_isalpha (c) == 1); - break; - default: - ASSERT (c_isalpha (c) == 0); - break; - } + ASSERT (c_isascii (c) == (c >= 0 && c < 0x80)); + + ASSERT (c_isalnum (c) == (c_isalpha (c) || c_isdigit (c))); + + ASSERT (c_isalpha (c) == (c_islower (c) || c_isupper (c))); - switch (c) + switch (to_char (c)) { case '\t': case ' ': ASSERT (c_isblank (c) == 1); @@ -83,9 +75,13 @@ test_all (void) break; } +#ifdef C_CTYPE_ASCII ASSERT (c_iscntrl (c) == ((c >= 0 && c < 0x20) || c == 0x7f)); +#endif - switch (c) + ASSERT (! (c_iscntrl (c) && c_isprint (c))); + + switch (to_char (c)) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': @@ -96,7 +92,7 @@ test_all (void) break; } - switch (c) + switch (to_char (c)) { case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': @@ -110,13 +106,31 @@ test_all (void) break; } +#ifdef C_CTYPE_ASCII ASSERT (c_isgraph (c) == ((c >= 0x20 && c < 0x7f) && c != ' ')); ASSERT (c_isprint (c) == (c >= 0x20 && c < 0x7f)); +#endif + + ASSERT (c_isgraph (c) == (c_isalnum (c) || c_ispunct (c))); + + ASSERT (c_isprint (c) == (c_isgraph (c) || c == ' ')); - ASSERT (c_ispunct (c) == (c_isgraph (c) && !c_isalnum (c))); + switch (to_char (c)) + { + case '!': case '"': case '#': case '$': case '%': case '&': case '\'': + case '(': case ')': case '*': case '+': case ',': case '-': case '.': + case '/': case ':': case ';': case '<': case '=': case '>': case '?': + case '@': case '[': case'\\': case ']': case '^': case '_': case '`': + case '{': case '|': case '}': case '~': + ASSERT (c_ispunct (c) == 1); + break; + default: + ASSERT (c_ispunct (c) == 0); + break; + } - switch (c) + switch (to_char (c)) { case ' ': case '\t': case '\n': case '\v': case '\f': case '\r': ASSERT (c_isspace (c) == 1); @@ -126,7 +140,7 @@ test_all (void) break; } - switch (c) + switch (to_char (c)) { case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': @@ -140,7 +154,7 @@ test_all (void) break; } - switch (c) + switch (to_char (c)) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': @@ -153,7 +167,7 @@ test_all (void) break; } - switch (c) + switch (to_char (c)) { case 'A': ASSERT (c_tolower (c) == 'a'); |