summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2014-06-03 19:37:45 -0600
committerKarl Williamson <khw@cpan.org>2014-06-05 12:23:02 -0600
commit375f5f0648cdf36c13cb11499b332c99c710d138 (patch)
tree7b2ea9ff5a2a14dfb1f3ebff202f92ab49fac010
parent5320b60d881861d12d3f678c90a6eafe50077814 (diff)
downloadperl-375f5f0648cdf36c13cb11499b332c99c710d138.tar.gz
Fix Windows ctype functions
Windows doesn't follow the Posix standard for their functions like isalnum(), isdigit(), etc. This forces compliance by changing the macros that are the interfaces to those functions to be smarter than just calling the raw functions.
-rw-r--r--handy.h32
-rw-r--r--pod/perldelta.pod21
2 files changed, 52 insertions, 1 deletions
diff --git a/handy.h b/handy.h
index 6810ef8499..351ffcd009 100644
--- a/handy.h
+++ b/handy.h
@@ -1359,7 +1359,37 @@ EXTCONST U32 PL_charclass[];
# define _LC_CAST U8
-# if defined(CTYPE256) || (!defined(isascii) && !defined(HAS_ISASCII))
+# ifdef WIN32
+ /* The Windows functions don't bother to follow the POSIX standard, which
+ * for example says that something can't both be a printable and a control.
+ * But Windows treats the \t control as a printable, and does such things
+ * as making superscripts into both digits and punctuation. This tames
+ * these flaws by assuming that the definitions of both controls and space
+ * are correct, and then making sure that other definitions don't have
+ * weirdnesses, by making sure that isalnum() isn't also ispunct(), etc.
+ * Not all possible weirdnesses are checked for, just the ones that were
+ * detected on actual Microsoft code pages */
+
+# define isCNTRL_LC(c) _generic_LC(c, _CC_CNTRL, iscntrl)
+# define isSPACE_LC(c) _generic_LC(c, _CC_SPACE, isspace)
+
+# define isALPHA_LC(c) (_generic_LC(c, _CC_ALPHA, isalpha) && isALPHANUMERIC_LC(c))
+# define isALPHANUMERIC_LC(c) (_generic_LC(c, _CC_ALPHANUMERIC, isalnum) && ! isPUNCT_LC(c))
+# define isDIGIT_LC(c) (_generic_LC(c, _CC_DIGIT, isdigit) && isALPHANUMERIC_LC(c))
+# define isGRAPH_LC(c) (_generic_LC(c, _CC_GRAPH, isgraph) && isPRINT_LC(c))
+# define isIDFIRST_LC(c) (((c) == '_') || (_generic_LC(c, _CC_IDFIRST, isalpha) && ! isPUNCT_LC(c)))
+# define isLOWER_LC(c) (_generic_LC(c, _CC_LOWER, islower) && isALPHA_LC(c))
+# define isPRINT_LC(c) (_generic_LC(c, _CC_PRINT, isprint) && ! isCNTRL_LC(c))
+# define isPUNCT_LC(c) (_generic_LC(c, _CC_PUNCT, ispunct) && ! isCNTRL_LC(c))
+# define isUPPER_LC(c) (_generic_LC(c, _CC_UPPER, isupper) && isALPHA_LC(c))
+# define isWORDCHAR_LC(c) (((c) == '_') || isALPHANUMERIC_LC(c))
+# define isXDIGIT_LC(c) (_generic_LC(c, _CC_XDIGIT, isxdigit) && isALPHANUMERIC_LC(c))
+
+# define toLOWER_LC(c) _generic_toLOWER_LC((c), tolower, U8)
+# define toUPPER_LC(c) _generic_toUPPER_LC((c), toupper, U8)
+# define toFOLD_LC(c) _generic_toFOLD_LC((c), tolower, U8)
+
+# elif defined(CTYPE256) || (!defined(isascii) && !defined(HAS_ISASCII))
/* For most other platforms */
# define isALPHA_LC(c) _generic_LC(c, _CC_ALPHA, isalpha)
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index d4278b3c54..21e59ebc13 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -553,6 +553,27 @@ C<POSIX::localeconv()> now marks appropriately the values it returns as
UTF-8 or not. Previously they were always returned as a bytes, even if
they were supposed to be encoded as UTF-8.
+=item *
+
+On Microsoft Windows, within the scope of C<S<use locale>>, the following
+POSIX character classes gave results for many locales that did not
+conform to the POSIX standard:
+C<[[:alnum:]]>,
+C<[[:alpha:]]>,
+C<[[:blank:]]>,
+C<[[:digit:]]>,
+C<[[:graph:]]>,
+C<[[:lower:]]>,
+C<[[:print:]]>,
+C<[[:punct:]]>,
+C<[[:upper:]]>,
+C<[[:word:]]>,
+and
+C<[[:xdigit:]]>.
+These are because the underlying Microsoft implementation does not
+follow the standard. Perl now takes special precautions to correct for
+this.
+
=back
=head1 Known Problems