c-ctype: port better to EBCDIC

Problems reported by Daniel Richard G. in http://lists.gnu.org/archive/html/bug-gnulib/2015-09/msg00020.html * lib/c-ctype.c: Include <limits.h>, for CHAR_MIN and CHAR_MAX. Include "verify.h". (C_CTYPE_ASCII, C_CTYPE_CONSECUTIVE_DIGITS) (C_CTYPE_CONSECUTIVE_LOWERCASE, C_CTYPE_CONSECUTIVE_UPPERCASE): Define as enum constants with value false, if not defined, so that code can use 'if' instead of 'ifdef'. Using 'if' helps make the code more portable, as both branches of the 'if' are compiled on all platforms. (C_CTYPE_EBCDIC): New constant. (to_char): New static function. (c_isalnum, c_isalpha, c_isdigit, c_islower, c_isgraph, c_isprint) (c_ispunct, c_isupper, c_isxdigit, c_tolower, c_toupper): Rewrite to use 'if' instead of 'ifdef'. Use to_char if non-ASCII. Prefer <= to >=. Prefer true and false to 1 and 0, for booleans. (c_iscntrl): Use 'if', not 'ifdef'. Special case for EBCDIC. Verify that the character set is either ASCII or EBCDIC. * tests/test-c-ctype.c: Include <limits.h>, for CHAR_MIN (to_char): New function. (test_all): Port to EBCDIC. Add some more tests, e.g., for c_ispunct.
author: Paul Eggert <eggert@cs.ucla.edu> 2015-09-22 12:17:06 -0700
committer: Paul Eggert <eggert@cs.ucla.edu> 2015-09-22 12:18:18 -0700
commit: 7e2a8ea26c4bf72c6d143170cf2a0ebff5b50d65 (patch)
tree: 35acec3e089ad968796f83afa566a9c995b55cf9 /lib/c-ctype.c
parent: 6439a0af0ca6b0256a4273a8ae0175e896bcc8da (diff)
download: gnulib-7e2a8ea26c4bf72c6d143170cf2a0ebff5b50d65.tar.gz
1 files changed, 130 insertions, 123 deletions
diff --git a/lib/c-ctype.c b/lib/c-ctype.c
index 6635d34ecf..916d46eb30 100644
--- a/lib/c-ctype.c
+++ b/lib/c-ctype.c
@@ -21,6 +21,34 @@ along with this program; if not, see <http://www.gnu.org/licenses/>.  */
 #define NO_C_CTYPE_MACROS
 #include "c-ctype.h"
 
+#include <limits.h>
+#include "verify.h"
+
+#ifndef C_CTYPE_ASCII
+enum { C_CTYPE_ASCII = false };
+#endif
+#ifndef C_CTYPE_CONSECUTIVE_DIGITS
+enum { C_CTYPE_CONSECUTIVE_DIGITS = false };
+#endif
+#ifndef C_CTYPE_CONSECUTIVE_LOWERCASE
+enum { C_CTYPE_CONSECUTIVE_LOWERCASE = false };
+#endif
+#ifndef C_CTYPE_CONSECUTIVE_UPPERCASE
+enum { C_CTYPE_CONSECUTIVE_UPPERCASE = false };
+#endif
+
+/* Convert an int, which may be promoted from either an unsigned or a
+   signed char, to the corresponding char.  */
+
+static char
+to_char (int c)
+{
+  enum { nchars = CHAR_MAX - CHAR_MIN + 1 };
+  if (CHAR_MIN < 0 && CHAR_MAX < c && c < nchars)
+    return c - nchars;
+  return c;
+}
+
 /* The function isascii is not locale dependent. Its use in EBCDIC is
    questionable. */
 bool
@@ -32,18 +60,20 @@ c_isascii (int c)
 bool
 c_isalnum (int c)
 {
-#if C_CTYPE_CONSECUTIVE_DIGITS \
-    && C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE
-#if C_CTYPE_ASCII
-  return ((c >= '0' && c <= '9')
-          || ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'Z'));
-#else
-  return ((c >= '0' && c <= '9')
-          || (c >= 'A' && c <= 'Z')
-          || (c >= 'a' && c <= 'z'));
-#endif
-#else
-  switch (c)
+  if (C_CTYPE_CONSECUTIVE_DIGITS
+      && C_CTYPE_CONSECUTIVE_UPPERCASE
+      && C_CTYPE_CONSECUTIVE_LOWERCASE)
+    {
+      if (C_CTYPE_ASCII)
+        return (('0' <= c && c <= '9')
+                || ('A' <= (c & ~0x20) && (c & ~0x20) <= 'Z'));
+      else
+        return (('0' <= c && c <= '9')
+                || ('A' <= c && c <= 'Z')
+                || ('a' <= c && c <= 'z'));
+    }
+
+  switch (to_char (c))
     {
     case '0': case '1': case '2': case '3': case '4': case '5':
     case '6': case '7': case '8': case '9':
@@ -57,24 +87,24 @@ c_isalnum (int c)
     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
     case 'y': case 'z':
-      return 1;
+      return true;
     default:
-      return 0;
+      return false;
     }
-#endif
 }
 
 bool
 c_isalpha (int c)
 {
-#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE
-#if C_CTYPE_ASCII
-  return ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'Z');
-#else
-  return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'));
-#endif
-#else
-  switch (c)
+  if (C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE)
+    {
+      if (C_CTYPE_ASCII)
+        return 'A' <= (c & ~0x20) && (c & ~0x20) <= 'Z';
+      else
+        return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z');
+    }
+
+  switch (to_char (c))
     {
     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
@@ -86,11 +116,10 @@ c_isalpha (int c)
     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
     case 'y': case 'z':
-      return 1;
+      return true;
     default:
-      return 0;
+      return false;
     }
-#endif
 }
 
 bool
@@ -102,81 +131,65 @@ c_isblank (int c)
 bool
 c_iscntrl (int c)
 {
-#if C_CTYPE_ASCII
-  return ((c & ~0x1f) == 0 || c == 0x7f);
-#else
-  switch (c)
-    {
-    case ' ': case '!': case '"': case '#': case '$': case '%':
-    case '&': case '\'': case '(': case ')': case '*': case '+':
-    case ',': case '-': case '.': case '/':
-    case '0': case '1': case '2': case '3': case '4': case '5':
-    case '6': case '7': case '8': case '9':
-    case ':': case ';': case '<': case '=': case '>': case '?':
-    case '@':
-    case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
-    case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
-    case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
-    case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
-    case 'Y': case 'Z':
-    case '[': case '\\': case ']': case '^': case '_': case '`':
-    case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
-    case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
-    case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
-    case 's': case 't': case 'u': case 'v': case 'w': case 'x':
-    case 'y': case 'z':
-    case '{': case '|': case '}': case '~':
-      return 0;
-    default:
-      return 1;
-    }
-#endif
+  enum { C_CTYPE_EBCDIC = (' ' == 64 && '0' == 240
+                           && 'A' == 193 && 'J' == 209 && 'S' == 226
+                           && 'A' == 129 && 'J' == 145 && 'S' == 162) };
+  verify (C_CTYPE_ASCII || C_CTYPE_EBCDIC);
+
+  if (0 <= c && c < ' ')
+    return true;
+  if (C_CTYPE_ASCII)
+    return c == 0x7f;
+  else
+    return c == 0xff || c == -1;
 }
 
 bool
 c_isdigit (int c)
 {
-#if C_CTYPE_CONSECUTIVE_DIGITS
-  return (c >= '0' && c <= '9');
-#else
+  if (C_CTYPE_ASCII)
+    return '0' <= c && c <= '9';
+
+  c = to_char (c);
+  if (C_CTYPE_CONSECUTIVE_DIGITS)
+    return '0' <= c && c <= '9';
+
   switch (c)
     {
     case '0': case '1': case '2': case '3': case '4': case '5':
     case '6': case '7': case '8': case '9':
-      return 1;
+      return true;
     default:
-      return 0;
+      return false;
     }
-#endif
 }
 
 bool
 c_islower (int c)
 {
-#if C_CTYPE_CONSECUTIVE_LOWERCASE
-  return (c >= 'a' && c <= 'z');
-#else
-  switch (c)
+  if (C_CTYPE_CONSECUTIVE_LOWERCASE)
+    return 'a' <= c && c <= 'z';
+
+  switch (to_char (c))
     {
     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
     case 'y': case 'z':
-      return 1;
+      return true;
     default:
-      return 0;
+      return false;
     }
-#endif
 }
 
 bool
 c_isgraph (int c)
 {
-#if C_CTYPE_ASCII
-  return (c >= '!' && c <= '~');
-#else
-  switch (c)
+  if (C_CTYPE_ASCII)
+    return '!' <= c && c <= '~';
+
+  switch (to_char (c))
     {
     case '!': case '"': case '#': case '$': case '%': case '&':
     case '\'': case '(': case ')': case '*': case '+': case ',':
@@ -197,20 +210,19 @@ c_isgraph (int c)
     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
     case 'y': case 'z':
     case '{': case '|': case '}': case '~':
-      return 1;
+      return true;
     default:
-      return 0;
+      return false;
     }
-#endif
 }
 
 bool
 c_isprint (int c)
 {
-#if C_CTYPE_ASCII
-  return (c >= ' ' && c <= '~');
-#else
-  switch (c)
+  if (C_CTYPE_ASCII)
+    return ' ' <= c && c <= '~';
+
+  switch (to_char (c))
     {
     case ' ': case '!': case '"': case '#': case '$': case '%':
     case '&': case '\'': case '(': case ')': case '*': case '+':
@@ -231,22 +243,21 @@ c_isprint (int c)
     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
     case 'y': case 'z':
     case '{': case '|': case '}': case '~':
-      return 1;
+      return true;
     default:
-      return 0;
+      return false;
     }
-#endif
 }
 
 bool
 c_ispunct (int c)
 {
-#if C_CTYPE_ASCII
-  return ((c >= '!' && c <= '~')
-          && !((c >= '0' && c <= '9')
-               || ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'Z')));
-#else
-  switch (c)
+  if (C_CTYPE_ASCII)
+    return (('!' <= c && c <= '~')
+            && !(('0' <= c && c <= '9')
+                 || ('A' <= (c & ~0x20) && (c & ~0x20) <= 'Z')));
+
+  switch (to_char (c))
     {
     case '!': case '"': case '#': case '$': case '%': case '&':
     case '\'': case '(': case ')': case '*': case '+': case ',':
@@ -255,11 +266,10 @@ c_ispunct (int c)
     case '@':
     case '[': case '\\': case ']': case '^': case '_': case '`':
     case '{': case '|': case '}': case '~':
-      return 1;
+      return true;
     default:
-      return 0;
+      return false;
     }
-#endif
 }
 
 bool
@@ -272,57 +282,56 @@ c_isspace (int c)
 bool
 c_isupper (int c)
 {
-#if C_CTYPE_CONSECUTIVE_UPPERCASE
-  return (c >= 'A' && c <= 'Z');
-#else
-  switch (c)
+  if (C_CTYPE_CONSECUTIVE_UPPERCASE)
+    return 'A' <= c && c <= 'Z';
+
+  switch (to_char (c))
     {
     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
     case 'Y': case 'Z':
-      return 1;
+      return true;
     default:
-      return 0;
+      return false;
     }
-#endif
 }
 
 bool
 c_isxdigit (int c)
 {
-#if C_CTYPE_CONSECUTIVE_DIGITS \
-    && C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE
-#if C_CTYPE_ASCII
-  return ((c >= '0' && c <= '9')
-          || ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'F'));
-#else
-  return ((c >= '0' && c <= '9')
-          || (c >= 'A' && c <= 'F')
-          || (c >= 'a' && c <= 'f'));
-#endif
-#else
-  switch (c)
+  if (C_CTYPE_CONSECUTIVE_DIGITS
+      && C_CTYPE_CONSECUTIVE_UPPERCASE
+      && C_CTYPE_CONSECUTIVE_LOWERCASE)
+    {
+      if ('0' <= c && c <= '9')
+        return true;
+      if (C_CTYPE_ASCII)
+        return 'A' <= (c & ~0x20) && (c & ~0x20) <= 'F';
+      return (('A' <= c && c <= 'F')
+              || ('a' <= c && c <= 'f'));
+    }
+
+  switch (to_char (c))
     {
     case '0': case '1': case '2': case '3': case '4': case '5':
     case '6': case '7': case '8': case '9':
     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
-      return 1;
+      return true;
     default:
-      return 0;
+      return false;
     }
-#endif
 }
 
 int
 c_tolower (int c)
 {
-#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE
-  return (c >= 'A' && c <= 'Z' ? c - 'A' + 'a' : c);
-#else
-  switch (c)
+  if (C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE)
+    return c_isupper (c) ? c - 'A' + 'a' : c;
+
+  switch (to_char (c))
     {
     case 'A': return 'a';
     case 'B': return 'b';
@@ -352,16 +361,15 @@ c_tolower (int c)
     case 'Z': return 'z';
     default: return c;
     }
-#endif
 }
 
 int
 c_toupper (int c)
 {
-#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE
-  return (c >= 'a' && c <= 'z' ? c - 'a' + 'A' : c);
-#else
-  switch (c)
+  if (C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE)
+    return c_islower (c) ? c - 'a' + 'A' : c;
+
+  switch (to_char (c))
     {
     case 'a': return 'A';
     case 'b': return 'B';
@@ -391,5 +399,4 @@ c_toupper (int c)
     case 'z': return 'Z';
     default: return c;
     }
-#endif
 }
author	Paul Eggert <eggert@cs.ucla.edu>	2015-09-22 12:17:06 -0700
committer	Paul Eggert <eggert@cs.ucla.edu>	2015-09-22 12:18:18 -0700
commit	7e2a8ea26c4bf72c6d143170cf2a0ebff5b50d65 (patch)
tree	35acec3e089ad968796f83afa566a9c995b55cf9 /lib/c-ctype.c
parent	6439a0af0ca6b0256a4273a8ae0175e896bcc8da (diff)
download	gnulib-7e2a8ea26c4bf72c6d143170cf2a0ebff5b50d65.tar.gz