summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog26
-rw-r--r--lib/c-ctype.c253
-rw-r--r--tests/test-c-ctype.c106
3 files changed, 216 insertions, 169 deletions
diff --git a/ChangeLog b/ChangeLog
index c552225b64..8723b38f0c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,29 @@
+2015-09-22 Paul Eggert <eggert@cs.ucla.edu>
+
+ c-ctype: port better to EBCDIC
+ Problems reported by Daniel Richard G. in
+ http://lists.gnu.org/archive/html/bug-gnulib/2015-09/msg00020.html
+ * lib/c-ctype.c: Include <limits.h>, for CHAR_MIN and CHAR_MAX.
+ Include "verify.h".
+ (C_CTYPE_ASCII, C_CTYPE_CONSECUTIVE_DIGITS)
+ (C_CTYPE_CONSECUTIVE_LOWERCASE, C_CTYPE_CONSECUTIVE_UPPERCASE):
+ Define as enum constants with value false, if not defined, so that
+ code can use 'if' instead of 'ifdef'. Using 'if' helps make the
+ code more portable, as both branches of the 'if' are compiled on
+ all platforms.
+ (C_CTYPE_EBCDIC): New constant.
+ (to_char): New static function.
+ (c_isalnum, c_isalpha, c_isdigit, c_islower, c_isgraph, c_isprint)
+ (c_ispunct, c_isupper, c_isxdigit, c_tolower, c_toupper):
+ Rewrite to use 'if' instead of 'ifdef'.
+ Use to_char if non-ASCII. Prefer <= to >=.
+ Prefer true and false to 1 and 0, for booleans.
+ (c_iscntrl): Use 'if', not 'ifdef'. Special case for EBCDIC.
+ Verify that the character set is either ASCII or EBCDIC.
+ * tests/test-c-ctype.c: Include <limits.h>, for CHAR_MIN
+ (to_char): New function.
+ (test_all): Port to EBCDIC. Add some more tests, e.g., for c_ispunct.
+
2015-09-21 Pádraig Brady <P@draigBrady.com>
nanosleep: fix return code for interrupted replacement
diff --git a/lib/c-ctype.c b/lib/c-ctype.c
index 6635d34ecf..916d46eb30 100644
--- a/lib/c-ctype.c
+++ b/lib/c-ctype.c
@@ -21,6 +21,34 @@ along with this program; if not, see <http://www.gnu.org/licenses/>. */
#define NO_C_CTYPE_MACROS
#include "c-ctype.h"
+#include <limits.h>
+#include "verify.h"
+
+#ifndef C_CTYPE_ASCII
+enum { C_CTYPE_ASCII = false };
+#endif
+#ifndef C_CTYPE_CONSECUTIVE_DIGITS
+enum { C_CTYPE_CONSECUTIVE_DIGITS = false };
+#endif
+#ifndef C_CTYPE_CONSECUTIVE_LOWERCASE
+enum { C_CTYPE_CONSECUTIVE_LOWERCASE = false };
+#endif
+#ifndef C_CTYPE_CONSECUTIVE_UPPERCASE
+enum { C_CTYPE_CONSECUTIVE_UPPERCASE = false };
+#endif
+
+/* Convert an int, which may be promoted from either an unsigned or a
+ signed char, to the corresponding char. */
+
+static char
+to_char (int c)
+{
+ enum { nchars = CHAR_MAX - CHAR_MIN + 1 };
+ if (CHAR_MIN < 0 && CHAR_MAX < c && c < nchars)
+ return c - nchars;
+ return c;
+}
+
/* The function isascii is not locale dependent. Its use in EBCDIC is
questionable. */
bool
@@ -32,18 +60,20 @@ c_isascii (int c)
bool
c_isalnum (int c)
{
-#if C_CTYPE_CONSECUTIVE_DIGITS \
- && C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE
-#if C_CTYPE_ASCII
- return ((c >= '0' && c <= '9')
- || ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'Z'));
-#else
- return ((c >= '0' && c <= '9')
- || (c >= 'A' && c <= 'Z')
- || (c >= 'a' && c <= 'z'));
-#endif
-#else
- switch (c)
+ if (C_CTYPE_CONSECUTIVE_DIGITS
+ && C_CTYPE_CONSECUTIVE_UPPERCASE
+ && C_CTYPE_CONSECUTIVE_LOWERCASE)
+ {
+ if (C_CTYPE_ASCII)
+ return (('0' <= c && c <= '9')
+ || ('A' <= (c & ~0x20) && (c & ~0x20) <= 'Z'));
+ else
+ return (('0' <= c && c <= '9')
+ || ('A' <= c && c <= 'Z')
+ || ('a' <= c && c <= 'z'));
+ }
+
+ switch (to_char (c))
{
case '0': case '1': case '2': case '3': case '4': case '5':
case '6': case '7': case '8': case '9':
@@ -57,24 +87,24 @@ c_isalnum (int c)
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
case 'y': case 'z':
- return 1;
+ return true;
default:
- return 0;
+ return false;
}
-#endif
}
bool
c_isalpha (int c)
{
-#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE
-#if C_CTYPE_ASCII
- return ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'Z');
-#else
- return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'));
-#endif
-#else
- switch (c)
+ if (C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE)
+ {
+ if (C_CTYPE_ASCII)
+ return 'A' <= (c & ~0x20) && (c & ~0x20) <= 'Z';
+ else
+ return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z');
+ }
+
+ switch (to_char (c))
{
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
@@ -86,11 +116,10 @@ c_isalpha (int c)
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
case 'y': case 'z':
- return 1;
+ return true;
default:
- return 0;
+ return false;
}
-#endif
}
bool
@@ -102,81 +131,65 @@ c_isblank (int c)
bool
c_iscntrl (int c)
{
-#if C_CTYPE_ASCII
- return ((c & ~0x1f) == 0 || c == 0x7f);
-#else
- switch (c)
- {
- case ' ': case '!': case '"': case '#': case '$': case '%':
- case '&': case '\'': case '(': case ')': case '*': case '+':
- case ',': case '-': case '.': case '/':
- case '0': case '1': case '2': case '3': case '4': case '5':
- case '6': case '7': case '8': case '9':
- case ':': case ';': case '<': case '=': case '>': case '?':
- case '@':
- case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
- case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
- case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
- case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
- case 'Y': case 'Z':
- case '[': case '\\': case ']': case '^': case '_': case '`':
- case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
- case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
- case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
- case 's': case 't': case 'u': case 'v': case 'w': case 'x':
- case 'y': case 'z':
- case '{': case '|': case '}': case '~':
- return 0;
- default:
- return 1;
- }
-#endif
+ enum { C_CTYPE_EBCDIC = (' ' == 64 && '0' == 240
+ && 'A' == 193 && 'J' == 209 && 'S' == 226
+ && 'A' == 129 && 'J' == 145 && 'S' == 162) };
+ verify (C_CTYPE_ASCII || C_CTYPE_EBCDIC);
+
+ if (0 <= c && c < ' ')
+ return true;
+ if (C_CTYPE_ASCII)
+ return c == 0x7f;
+ else
+ return c == 0xff || c == -1;
}
bool
c_isdigit (int c)
{
-#if C_CTYPE_CONSECUTIVE_DIGITS
- return (c >= '0' && c <= '9');
-#else
+ if (C_CTYPE_ASCII)
+ return '0' <= c && c <= '9';
+
+ c = to_char (c);
+ if (C_CTYPE_CONSECUTIVE_DIGITS)
+ return '0' <= c && c <= '9';
+
switch (c)
{
case '0': case '1': case '2': case '3': case '4': case '5':
case '6': case '7': case '8': case '9':
- return 1;
+ return true;
default:
- return 0;
+ return false;
}
-#endif
}
bool
c_islower (int c)
{
-#if C_CTYPE_CONSECUTIVE_LOWERCASE
- return (c >= 'a' && c <= 'z');
-#else
- switch (c)
+ if (C_CTYPE_CONSECUTIVE_LOWERCASE)
+ return 'a' <= c && c <= 'z';
+
+ switch (to_char (c))
{
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
case 'y': case 'z':
- return 1;
+ return true;
default:
- return 0;
+ return false;
}
-#endif
}
bool
c_isgraph (int c)
{
-#if C_CTYPE_ASCII
- return (c >= '!' && c <= '~');
-#else
- switch (c)
+ if (C_CTYPE_ASCII)
+ return '!' <= c && c <= '~';
+
+ switch (to_char (c))
{
case '!': case '"': case '#': case '$': case '%': case '&':
case '\'': case '(': case ')': case '*': case '+': case ',':
@@ -197,20 +210,19 @@ c_isgraph (int c)
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
case 'y': case 'z':
case '{': case '|': case '}': case '~':
- return 1;
+ return true;
default:
- return 0;
+ return false;
}
-#endif
}
bool
c_isprint (int c)
{
-#if C_CTYPE_ASCII
- return (c >= ' ' && c <= '~');
-#else
- switch (c)
+ if (C_CTYPE_ASCII)
+ return ' ' <= c && c <= '~';
+
+ switch (to_char (c))
{
case ' ': case '!': case '"': case '#': case '$': case '%':
case '&': case '\'': case '(': case ')': case '*': case '+':
@@ -231,22 +243,21 @@ c_isprint (int c)
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
case 'y': case 'z':
case '{': case '|': case '}': case '~':
- return 1;
+ return true;
default:
- return 0;
+ return false;
}
-#endif
}
bool
c_ispunct (int c)
{
-#if C_CTYPE_ASCII
- return ((c >= '!' && c <= '~')
- && !((c >= '0' && c <= '9')
- || ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'Z')));
-#else
- switch (c)
+ if (C_CTYPE_ASCII)
+ return (('!' <= c && c <= '~')
+ && !(('0' <= c && c <= '9')
+ || ('A' <= (c & ~0x20) && (c & ~0x20) <= 'Z')));
+
+ switch (to_char (c))
{
case '!': case '"': case '#': case '$': case '%': case '&':
case '\'': case '(': case ')': case '*': case '+': case ',':
@@ -255,11 +266,10 @@ c_ispunct (int c)
case '@':
case '[': case '\\': case ']': case '^': case '_': case '`':
case '{': case '|': case '}': case '~':
- return 1;
+ return true;
default:
- return 0;
+ return false;
}
-#endif
}
bool
@@ -272,57 +282,56 @@ c_isspace (int c)
bool
c_isupper (int c)
{
-#if C_CTYPE_CONSECUTIVE_UPPERCASE
- return (c >= 'A' && c <= 'Z');
-#else
- switch (c)
+ if (C_CTYPE_CONSECUTIVE_UPPERCASE)
+ return 'A' <= c && c <= 'Z';
+
+ switch (to_char (c))
{
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
case 'Y': case 'Z':
- return 1;
+ return true;
default:
- return 0;
+ return false;
}
-#endif
}
bool
c_isxdigit (int c)
{
-#if C_CTYPE_CONSECUTIVE_DIGITS \
- && C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE
-#if C_CTYPE_ASCII
- return ((c >= '0' && c <= '9')
- || ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'F'));
-#else
- return ((c >= '0' && c <= '9')
- || (c >= 'A' && c <= 'F')
- || (c >= 'a' && c <= 'f'));
-#endif
-#else
- switch (c)
+ if (C_CTYPE_CONSECUTIVE_DIGITS
+ && C_CTYPE_CONSECUTIVE_UPPERCASE
+ && C_CTYPE_CONSECUTIVE_LOWERCASE)
+ {
+ if ('0' <= c && c <= '9')
+ return true;
+ if (C_CTYPE_ASCII)
+ return 'A' <= (c & ~0x20) && (c & ~0x20) <= 'F';
+ return (('A' <= c && c <= 'F')
+ || ('a' <= c && c <= 'f'));
+ }
+
+ switch (to_char (c))
{
case '0': case '1': case '2': case '3': case '4': case '5':
case '6': case '7': case '8': case '9':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
- return 1;
+ return true;
default:
- return 0;
+ return false;
}
-#endif
}
int
c_tolower (int c)
{
-#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE
- return (c >= 'A' && c <= 'Z' ? c - 'A' + 'a' : c);
-#else
- switch (c)
+ if (C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE)
+ return c_isupper (c) ? c - 'A' + 'a' : c;
+
+ switch (to_char (c))
{
case 'A': return 'a';
case 'B': return 'b';
@@ -352,16 +361,15 @@ c_tolower (int c)
case 'Z': return 'z';
default: return c;
}
-#endif
}
int
c_toupper (int c)
{
-#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE
- return (c >= 'a' && c <= 'z' ? c - 'a' + 'A' : c);
-#else
- switch (c)
+ if (C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE)
+ return c_islower (c) ? c - 'a' + 'A' : c;
+
+ switch (to_char (c))
{
case 'a': return 'A';
case 'b': return 'B';
@@ -391,5 +399,4 @@ c_toupper (int c)
case 'z': return 'Z';
default: return c;
}
-#endif
}
diff --git a/tests/test-c-ctype.c b/tests/test-c-ctype.c
index 81fe936ec1..63d0af9581 100644
--- a/tests/test-c-ctype.c
+++ b/tests/test-c-ctype.c
@@ -20,10 +20,19 @@
#include "c-ctype.h"
+#include <limits.h>
#include <locale.h>
#include "macros.h"
+static char
+to_char (int c)
+{
+ if (CHAR_MIN < 0 && CHAR_MAX < c)
+ return c - CHAR_MAX - 1 + CHAR_MIN;
+ return c;
+}
+
static void
test_all (void)
{
@@ -31,49 +40,32 @@ test_all (void)
for (c = -0x80; c < 0x100; c++)
{
- ASSERT (c_isascii (c) == (c >= 0 && c < 0x80));
-
- switch (c)
+ if (c < 0)
{
- case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
- case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
- case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
- case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
- case 'Y': case 'Z':
- case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
- case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
- case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
- case 's': case 't': case 'u': case 'v': case 'w': case 'x':
- case 'y': case 'z':
- case '0': case '1': case '2': case '3': case '4': case '5':
- case '6': case '7': case '8': case '9':
- ASSERT (c_isalnum (c) == 1);
- break;
- default:
- ASSERT (c_isalnum (c) == 0);
- break;
+ ASSERT (c_isascii (c) == c_isascii (c + 0x100));
+ ASSERT (c_isalnum (c) == c_isalnum (c + 0x100));
+ ASSERT (c_isalpha (c) == c_isalpha (c + 0x100));
+ ASSERT (c_isblank (c) == c_isblank (c + 0x100));
+ ASSERT (c_iscntrl (c) == c_iscntrl (c + 0x100));
+ ASSERT (c_isdigit (c) == c_isdigit (c + 0x100));
+ ASSERT (c_islower (c) == c_islower (c + 0x100));
+ ASSERT (c_isgraph (c) == c_isgraph (c + 0x100));
+ ASSERT (c_isprint (c) == c_isprint (c + 0x100));
+ ASSERT (c_ispunct (c) == c_ispunct (c + 0x100));
+ ASSERT (c_isspace (c) == c_isspace (c + 0x100));
+ ASSERT (c_isupper (c) == c_isupper (c + 0x100));
+ ASSERT (c_isxdigit (c) == c_isxdigit (c + 0x100));
+ ASSERT (to_char (c_tolower (c)) == to_char (c_tolower (c + 0x100)));
+ ASSERT (to_char (c_toupper (c)) == to_char (c_toupper (c + 0x100)));
}
- switch (c)
- {
- case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
- case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
- case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
- case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
- case 'Y': case 'Z':
- case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
- case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
- case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
- case 's': case 't': case 'u': case 'v': case 'w': case 'x':
- case 'y': case 'z':
- ASSERT (c_isalpha (c) == 1);
- break;
- default:
- ASSERT (c_isalpha (c) == 0);
- break;
- }
+ ASSERT (c_isascii (c) == (c >= 0 && c < 0x80));
+
+ ASSERT (c_isalnum (c) == (c_isalpha (c) || c_isdigit (c)));
+
+ ASSERT (c_isalpha (c) == (c_islower (c) || c_isupper (c)));
- switch (c)
+ switch (to_char (c))
{
case '\t': case ' ':
ASSERT (c_isblank (c) == 1);
@@ -83,9 +75,13 @@ test_all (void)
break;
}
+#ifdef C_CTYPE_ASCII
ASSERT (c_iscntrl (c) == ((c >= 0 && c < 0x20) || c == 0x7f));
+#endif
- switch (c)
+ ASSERT (! (c_iscntrl (c) && c_isprint (c)));
+
+ switch (to_char (c))
{
case '0': case '1': case '2': case '3': case '4': case '5':
case '6': case '7': case '8': case '9':
@@ -96,7 +92,7 @@ test_all (void)
break;
}
- switch (c)
+ switch (to_char (c))
{
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
@@ -110,13 +106,31 @@ test_all (void)
break;
}
+#ifdef C_CTYPE_ASCII
ASSERT (c_isgraph (c) == ((c >= 0x20 && c < 0x7f) && c != ' '));
ASSERT (c_isprint (c) == (c >= 0x20 && c < 0x7f));
+#endif
+
+ ASSERT (c_isgraph (c) == (c_isalnum (c) || c_ispunct (c)));
+
+ ASSERT (c_isprint (c) == (c_isgraph (c) || c == ' '));
- ASSERT (c_ispunct (c) == (c_isgraph (c) && !c_isalnum (c)));
+ switch (to_char (c))
+ {
+ case '!': case '"': case '#': case '$': case '%': case '&': case '\'':
+ case '(': case ')': case '*': case '+': case ',': case '-': case '.':
+ case '/': case ':': case ';': case '<': case '=': case '>': case '?':
+ case '@': case '[': case'\\': case ']': case '^': case '_': case '`':
+ case '{': case '|': case '}': case '~':
+ ASSERT (c_ispunct (c) == 1);
+ break;
+ default:
+ ASSERT (c_ispunct (c) == 0);
+ break;
+ }
- switch (c)
+ switch (to_char (c))
{
case ' ': case '\t': case '\n': case '\v': case '\f': case '\r':
ASSERT (c_isspace (c) == 1);
@@ -126,7 +140,7 @@ test_all (void)
break;
}
- switch (c)
+ switch (to_char (c))
{
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
@@ -140,7 +154,7 @@ test_all (void)
break;
}
- switch (c)
+ switch (to_char (c))
{
case '0': case '1': case '2': case '3': case '4': case '5':
case '6': case '7': case '8': case '9':
@@ -153,7 +167,7 @@ test_all (void)
break;
}
- switch (c)
+ switch (to_char (c))
{
case 'A':
ASSERT (c_tolower (c) == 'a');