diff options
author | Karl Williamson <khw@cpan.org> | 2014-06-03 19:37:45 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2014-06-05 12:23:02 -0600 |
commit | 375f5f0648cdf36c13cb11499b332c99c710d138 (patch) | |
tree | 7b2ea9ff5a2a14dfb1f3ebff202f92ab49fac010 | |
parent | 5320b60d881861d12d3f678c90a6eafe50077814 (diff) | |
download | perl-375f5f0648cdf36c13cb11499b332c99c710d138.tar.gz |
Fix Windows ctype functions
Windows doesn't follow the Posix standard for their functions like
isalnum(), isdigit(), etc. This forces compliance by changing the
macros that are the interfaces to those functions to be smarter than
just calling the raw functions.
-rw-r--r-- | handy.h | 32 | ||||
-rw-r--r-- | pod/perldelta.pod | 21 |
2 files changed, 52 insertions, 1 deletions
@@ -1359,7 +1359,37 @@ EXTCONST U32 PL_charclass[]; # define _LC_CAST U8 -# if defined(CTYPE256) || (!defined(isascii) && !defined(HAS_ISASCII)) +# ifdef WIN32 + /* The Windows functions don't bother to follow the POSIX standard, which + * for example says that something can't both be a printable and a control. + * But Windows treats the \t control as a printable, and does such things + * as making superscripts into both digits and punctuation. This tames + * these flaws by assuming that the definitions of both controls and space + * are correct, and then making sure that other definitions don't have + * weirdnesses, by making sure that isalnum() isn't also ispunct(), etc. + * Not all possible weirdnesses are checked for, just the ones that were + * detected on actual Microsoft code pages */ + +# define isCNTRL_LC(c) _generic_LC(c, _CC_CNTRL, iscntrl) +# define isSPACE_LC(c) _generic_LC(c, _CC_SPACE, isspace) + +# define isALPHA_LC(c) (_generic_LC(c, _CC_ALPHA, isalpha) && isALPHANUMERIC_LC(c)) +# define isALPHANUMERIC_LC(c) (_generic_LC(c, _CC_ALPHANUMERIC, isalnum) && ! isPUNCT_LC(c)) +# define isDIGIT_LC(c) (_generic_LC(c, _CC_DIGIT, isdigit) && isALPHANUMERIC_LC(c)) +# define isGRAPH_LC(c) (_generic_LC(c, _CC_GRAPH, isgraph) && isPRINT_LC(c)) +# define isIDFIRST_LC(c) (((c) == '_') || (_generic_LC(c, _CC_IDFIRST, isalpha) && ! isPUNCT_LC(c))) +# define isLOWER_LC(c) (_generic_LC(c, _CC_LOWER, islower) && isALPHA_LC(c)) +# define isPRINT_LC(c) (_generic_LC(c, _CC_PRINT, isprint) && ! isCNTRL_LC(c)) +# define isPUNCT_LC(c) (_generic_LC(c, _CC_PUNCT, ispunct) && ! isCNTRL_LC(c)) +# define isUPPER_LC(c) (_generic_LC(c, _CC_UPPER, isupper) && isALPHA_LC(c)) +# define isWORDCHAR_LC(c) (((c) == '_') || isALPHANUMERIC_LC(c)) +# define isXDIGIT_LC(c) (_generic_LC(c, _CC_XDIGIT, isxdigit) && isALPHANUMERIC_LC(c)) + +# define toLOWER_LC(c) _generic_toLOWER_LC((c), tolower, U8) +# define toUPPER_LC(c) _generic_toUPPER_LC((c), toupper, U8) +# define toFOLD_LC(c) _generic_toFOLD_LC((c), tolower, U8) + +# elif defined(CTYPE256) || (!defined(isascii) && !defined(HAS_ISASCII)) /* For most other platforms */ # define isALPHA_LC(c) _generic_LC(c, _CC_ALPHA, isalpha) diff --git a/pod/perldelta.pod b/pod/perldelta.pod index d4278b3c54..21e59ebc13 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -553,6 +553,27 @@ C<POSIX::localeconv()> now marks appropriately the values it returns as UTF-8 or not. Previously they were always returned as a bytes, even if they were supposed to be encoded as UTF-8. +=item * + +On Microsoft Windows, within the scope of C<S<use locale>>, the following +POSIX character classes gave results for many locales that did not +conform to the POSIX standard: +C<[[:alnum:]]>, +C<[[:alpha:]]>, +C<[[:blank:]]>, +C<[[:digit:]]>, +C<[[:graph:]]>, +C<[[:lower:]]>, +C<[[:print:]]>, +C<[[:punct:]]>, +C<[[:upper:]]>, +C<[[:word:]]>, +and +C<[[:xdigit:]]>. +These are because the underlying Microsoft implementation does not +follow the standard. Perl now takes special precautions to correct for +this. + =back =head1 Known Problems |