handy.h: Create isALPHANUMERIC() and kin

Perl has had an undocumented macro isALNUMC() for a long time. I want to document it, but the name is very obscure. Neither Yves nor I are sure what it is. My best guess is "C's alnum". It corresponds to /[[:alnum:]]/, and so its best name would be isALNUM(). But that is the name long given to what matches \w. A new synonym, isWORDCHAR(), has been in place for several releases for that, but the old isALNUM() should remain for backwards compatibility. I don't think that the name isALNUMC() should be published, as it is too close to isALNUM(). I finally came to the conclusion that isALPHANUMERIC() is the best name; it describes its purpose clearly; the disadvantage is its long length. I doubt that it will get much use, but we need something, I think, that we can publish to accomplish this functionality. This commit also converts core uses of isALNUMC to isALPHANUMERIC. (I intended to that separately, but made a mistake in rebasing, and combined the two patches; and it seemed like not a big enough problem to separate them out again.)
author: Karl Williamson <public@khwilliamson.com> 2012-12-11 20:47:25 -0700
committer: Karl Williamson <public@khwilliamson.com> 2012-12-22 11:11:27 -0700
commit: 15861f948fe52aa0c72233cd9dfc0190bfa3fbb1 (patch)
tree: 68e1d074c5f89237a0bcbc739836c5aac749cac9 /handy.h
parent: fbc19f27a35cc90f77285a47de2ff0dd21ff50c6 (diff)
download: perl-15861f948fe52aa0c72233cd9dfc0190bfa3fbb1.tar.gz
1 files changed, 38 insertions, 17 deletions
diff --git a/handy.h b/handy.h
index fd23c1dcb1..077952c42f 100644
--- a/handy.h
+++ b/handy.h
@@ -536,6 +536,15 @@ See the L<top of this section|/Character classes> for an explanation of variants
 C<isALPHA_A>, C<isALPHA_L1>, C<isALPHA_uni>, C<isALPHA_utf8>, C<isALPHA_LC>
 C<isALPHA_LC_uvchr>, and C<isALPHA_LC_utf8>.
 
+=for apidoc Am|bool|isALPHANUMERIC|char ch
+Returns a boolean indicating whether the specified character is a either an
+alphabetic character or decimal digit in the platform's native character set,
+analogous to C<m/[[:alnum:]]/>.
+See the L<top of this section|/Character classes> for an explanation of variants
+C<isALPHANUMERIC_A>, C<isALPHANUMERIC_L1>, C<isALPHANUMERIC_uni>,
+C<isALPHANUMERIC_utf8>, C<isALPHANUMERIC_LC> C<isALPHANUMERIC_LC_uvchr>, and
+C<isALPHANUMERIC_LC_utf8>.
+
 =for apidoc Am|bool|isASCII|char ch
 Returns a boolean indicating whether the specified character is one of the 128
 characters in the ASCII character set, analogous to C<m/[[:ascii:]]/>.
@@ -670,7 +679,7 @@ character set, if possible; otherwise returns the input character itself.
 
 =cut
 
-Still undocumented are ALNUMC, PSXSPC, VERTSPACE, and IDFIRST, and the other
+Still undocumented are , PSXSPC, VERTSPACE, and IDFIRST, and the other
 toUPPER etc functions
 
 Note that these macros are repeated in Devel::PPPort, so should also be
@@ -729,7 +738,7 @@ patched there.  The file as of this writing is cpan/Devel-PPPort/parts/inc/misc
 #  define _CC_UPPER              4      /* [:upper:] */
 #  define _CC_PUNCT              5      /* [:punct:] */
 #  define _CC_PRINT              6      /* [:print:] */
-#  define _CC_ALNUMC             7      /* [:alnum:] */
+#  define _CC_ALPHANUMERIC       7      /* [:alnum:] */
 #  define _CC_GRAPH              8      /* [:graph:] */
 
 #define _FIRST_NON_SWASH_CC      9
@@ -772,7 +781,7 @@ patched there.  The file as of this writing is cpan/Devel-PPPort/parts/inc/misc
 /* An enum version of the character class numbers, to help compilers
  * optimize */
 typedef enum {
-    _CC_ENUM_ALNUMC         = _CC_ALNUMC,
+    _CC_ENUM_ALPHANUMERIC   = _CC_ALPHANUMERIC,
     _CC_ENUM_ALPHA          = _CC_ALPHA,
     _CC_ENUM_DIGIT          = _CC_DIGIT,
     _CC_ENUM_GRAPH          = _CC_GRAPH,
@@ -794,7 +803,7 @@ typedef enum {
 #define POSIX_SWASH_COUNT _FIRST_NON_SWASH_CC
 
 #define PL_utf8_alnum   PL_utf8_swash_ptrs[_CC_WORDCHAR]
-#define PL_utf8_alnumc	PL_utf8_swash_ptrs[_CC_ALNUMC]
+#define PL_utf8_alnumc	PL_utf8_swash_ptrs[_CC_ALPHANUMERIC]
 #define PL_utf8_alpha	PL_utf8_swash_ptrs[_CC_ALPHA]
 #define PL_utf8_graph	PL_utf8_swash_ptrs[_CC_GRAPH]
 #define PL_utf8_digit	PL_utf8_swash_ptrs[_CC_DIGIT]
@@ -827,8 +836,8 @@ EXTCONST U32 PL_charclass[];
         && ((PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_mask_A(classnum)) \
                                 == _CC_mask_A(classnum)))
 
-#   define isALNUMC_A(c) _generic_isCC_A(c, _CC_ALNUMC)
 #   define isALPHA_A(c)  _generic_isCC_A(c, _CC_ALPHA)
+#   define isALPHANUMERIC_A(c) _generic_isCC_A(c, _CC_ALPHANUMERIC)
 #   define isBLANK_A(c)  _generic_isCC_A(c, _CC_BLANK)
 #   define isCNTRL_A(c)  _generic_isCC_A(c, _CC_CNTRL)
 #   define isDIGIT_A(c)  _generic_isCC(c, _CC_DIGIT)
@@ -854,8 +863,8 @@ EXTCONST U32 PL_charclass[];
                                             _generic_isCC(c, _CC_IS_IN_SOME_FOLD)
 #else   /* No perl.h. */
 #   ifdef EBCDIC
-#       define isALNUMC_A(c)   (isASCII(c) && isALNUMC(c))
 #       define isALPHA_A(c)    (isASCII(c) && isALPHA(c))
+#       define isALPHANUMERIC_A(c) (isASCII(c) && isALPHANUMERIC(c))
 #       define isBLANK_A(c)    (isASCII(c) && isBLANK(c))
 #       define isCNTRL_A(c)    (isASCII(c) && isCNTRL(c))
 #       define isDIGIT_A(c)    (isASCII(c) && isDIGIT(c))
@@ -870,8 +879,8 @@ EXTCONST U32 PL_charclass[];
 #       define isWORDCHAR_A(c) (isASCII(c) && isWORDCHAR(c))
 #       define isXDIGIT_A(c)   (isASCII(c) && isXDIGIT(c))
 #   else   /* ASCII platform, no perl.h */
-#       define isALNUMC_A(c) (isALPHA_A(c) || isDIGIT_A(c))
 #       define isALPHA_A(c)  (isUPPER_A(c) || isLOWER_A(c))
+#       define isALPHANUMERIC_A(c) (isALPHA_A(c) || isDIGIT_A(c))
 #       define isBLANK_A(c)  ((c) == ' ' || (c) == '\t')
 #       define isCNTRL_A(c) (FITS_IN_8_BITS(c) && ((U8) (c) < ' ' || (c) == 127))
 #       define isDIGIT_A(c)  ((c) <= '9' && (c) >= '0')
@@ -899,8 +908,8 @@ EXTCONST U32 PL_charclass[];
 
 /* Latin1 definitions */
 #ifdef H_PERL
-#   define isALNUMC_L1(c) _generic_isCC(c, _CC_ALNUMC)
 #   define isALPHA_L1(c)  _generic_isCC(c, _CC_ALPHA)
+#   define isALPHANUMERIC_L1(c) _generic_isCC(c, _CC_ALPHANUMERIC)
 #   define isBLANK_L1(c)  _generic_isCC(c, _CC_BLANK)
 
 /*  continuation character for legal NAME in \N{NAME} */
@@ -942,8 +951,8 @@ EXTCONST U32 PL_charclass[];
  * with Latin1, which the three currently recognized by Perl are.  Some libc's
  * have an isblank(), but it's not guaranteed. */
 #ifdef EBCDIC
-#   define isALNUMC(c)	isalnum(c)
 #   define isALPHA(c)	isalpha(c)
+#   define isALPHANUMERIC(c)	isalnum(c)
 #   define isBLANK(c)	((c) == ' ' || (c) == '\t' || NATIVE_TO_UNI(c) == 0xA0)
 #   define isCNTRL(c)	iscntrl(c)
 #   define isDIGIT(c)	isdigit(c)
@@ -961,7 +970,7 @@ EXTCONST U32 PL_charclass[];
 #   define toLOWER(c)	tolower(c)
 #   define toUPPER(c)	toupper(c)
 #else /* Not EBCDIC: ASCII-only matching */
-#   define isALNUMC(c)  isALNUMC_A(c) /* Mnemonic: "C's alnum" = alpha + digit */
+#   define isALPHANUMERIC(c)  isALPHANUMERIC_A(c)
 #   define isALPHA(c)   isALPHA_A(c)
 #   define isBLANK(c)   isBLANK_A(c)
 #   define isCNTRL(c)   isCNTRL_A(c)
@@ -1003,7 +1012,7 @@ EXTCONST U32 PL_charclass[];
 
 #ifdef USE_NEXT_CTYPE
 
-#  define isALNUMC_LC(c)	NXIsAlNum((unsigned int)(c))
+#  define isALPHANUMERIC_LC(c)	NXIsAlNum((unsigned int)(c))
 #  define isALNUM_LC(c)         isWORDCHAR_LC(c)
 #  define isALPHA_LC(c)		NXIsAlpha((unsigned int)(c))
 #  define isASCII_LC(c)		isASCII((unsigned int)(c))
@@ -1028,9 +1037,10 @@ EXTCONST U32 PL_charclass[];
 
 /* Use foo_LC_uvchr() instead  of these for beyond the Latin1 range */
 
-#    define isALNUMC_LC(c)   (FITS_IN_8_BITS(c) && isalnum((unsigned char)(c)))
 #    define isALNUM_LC(c) isWORDCHAR_LC(c)
 #    define isALPHA_LC(c)   (FITS_IN_8_BITS(c) && isalpha((unsigned char)(c)))
+#    define isALPHANUMERIC_LC(c)   (FITS_IN_8_BITS(c)                          \
+                                               && isalnum((unsigned char)(c)))
 #    ifdef HAS_ISASCII
 #	define isASCII_LC(c) (FITS_IN_8_BITS(c) && isascii((unsigned char)(c)))
 #    else
@@ -1059,9 +1069,9 @@ EXTCONST U32 PL_charclass[];
 
 #  else
 
-#    define isALNUMC_LC(c)	(isascii(c) && isalnum(c))
 #    define isALNUM_LC(c)	isWORDCHAR_LC(c)
 #    define isALPHA_LC(c)	(isascii(c) && isalpha(c))
+#    define isALPHANUMERIC_LC(c) (isascii(c) && isalnum(c))
 #    define isASCII_LC(c)	isascii(c)
 #    ifdef HAS_ISBLANK
 #	define isBLANK_LC(c)	(isascii(c) && isblank(c))
@@ -1097,9 +1107,9 @@ EXTCONST U32 PL_charclass[];
 #define _generic_uni(classnum, function, c) ((c) < 256                    \
                                              ? _generic_isCC(c, classnum) \
                                              : function(c))
-#define isALNUMC_uni(c)     _generic_uni(_CC_ALNUMC, is_uni_alnumc, c)
 #define isALNUM_uni(c)      isWORDCHAR_uni(c)
 #define isALPHA_uni(c)      _generic_uni(_CC_ALPHA, is_uni_alpha, c)
+#define isALPHANUMERIC_uni(c) _generic_uni(_CC_ALPHANUMERIC, is_uni_alnumc, c)
 #define isASCII_uni(c)      isASCII(c)
 #define isBLANK_uni(c)      _generic_uni(_CC_BLANK, is_HORIZWS_cp_high, c)
 #define isCNTRL_uni(c)      isCNTRL_L1(c) /* All controls are in Latin1 */
@@ -1126,9 +1136,10 @@ EXTCONST U32 PL_charclass[];
 
 #define _gnrc_is_LC_uvchr(latin1, above_latin1, c)                            \
                         (c < 256 ? latin1(c) : above_latin1(NATIVE_TO_UNI(c)))
-#define isALNUMC_LC_uvchr(c)  _gnrc_is_LC_uvchr(isALNUMC_LC, is_uni_alnumc_lc, c)
 #define isALNUM_LC_uvchr(c)  isWORDCHAR_LC_uvchr(c)
 #define isALPHA_LC_uvchr(c)  _gnrc_is_LC_uvchr(isALPHA_LC, is_uni_alpha_lc, c)
+#define isALPHANUMERIC_LC_uvchr(c)  _gnrc_is_LC_uvchr(isALPHANUMERIC_LC,      \
+                                                         is_uni_alnumc_lc, c)
 #define isASCII_LC_uvchr(c)  isASCII_LC(c)
 #define isBLANK_LC_uvchr(c)  _gnrc_is_LC_uvchr(isBLANK_LC, is_HORIZWS_cp_high, c)
 #define isCNTRL_LC_uvchr(c)  (c < 256 ? isCNTRL_LC(c) : 0)
@@ -1187,9 +1198,10 @@ EXTCONST U32 PL_charclass[];
  * points; the regcharclass.h ones are implemented as a series of
  * "if-else-if-else ..." */
 
-#define isALNUMC_utf8(p)        _generic_utf8(_CC_ALNUMC, is_utf8_alnumc, p)
 #define isALNUM_utf8(p)         isWORDCHAR_utf8(p)  /* back compat */
 #define isALPHA_utf8(p)         _generic_utf8(_CC_ALPHA, is_utf8_alpha, p)
+#define isALPHANUMERIC_utf8(p)        _generic_utf8(_CC_ALPHANUMERIC,      \
+                                                        is_utf8_alnumc, p)
 #define isASCII_utf8(p)         isASCII(*p) /* Because ASCII is invariant under
                                                utf8, the non-utf8 macro works
                                              */
@@ -1242,8 +1254,9 @@ EXTCONST U32 PL_charclass[];
 #define _generic_LC_utf8(macro, utf8_func, p)                              \
                             _generic_LC_utf8_utf8(macro, p, utf8_func(p))
 
-#define isALNUMC_LC_utf8(p)  _generic_LC_utf8(isALNUMC_LC, is_utf8_alnumc, p)
 #define isALNUM_LC_utf8(p)   isWORDCHAR_LC_utf8(p)
+#define isALPHANUMERIC_LC_utf8(p)  _generic_LC_utf8(isALPHANUMERIC_LC,       \
+                                                          is_utf8_alnumc, p)
 #define isALPHA_LC_utf8(p)   _generic_LC_utf8(isALPHA_LC, is_utf8_alpha, p)
 #define isASCII_LC_utf8(p)   isASCII_LC(*p)
 #define isBLANK_LC_utf8(p)   _generic_LC_utf8(isBLANK_LC, is_HORIZWS_high, p)
@@ -1271,6 +1284,14 @@ EXTCONST U32 PL_charclass[];
 #define isOCTAL(c)      isOCTAL_A(c)
 #define isOCTAL_L1(c)   isOCTAL_A(c)
 #define isXDIGIT_L1(c)  isXDIGIT_A(c)
+#define isALNUMC_A(c)   isALPHANUMERIC_A(c)      /* Mnemonic: "C's alnum" */
+#define isALNUMC_L1(c)  isALPHANUMERIC_L1(c)
+#define isALNUMC(c)	isALPHANUMERIC(c)
+#define isALNUMC_LC(c)	isALPHANUMERIC_LC(c)
+#define isALNUMC_uni(c) isALPHANUMERIC_uni(c)
+#define isALNUMC_LC_uvchr(c) isALPHANUMERIC_LC_uvchr(c)
+#define isALNUMC_utf8(p) isALPHANUMERIC_utf8(p)
+#define isALNUMC_LC_utf8(p) isALPHANUMERIC_LC_utf8(p)
 
 /* This conversion works both ways, strangely enough. On EBCDIC platforms,
  * CTRL-@ is 0, CTRL-A is 1, etc, just like on ASCII, except that they don't
author	Karl Williamson <public@khwilliamson.com>	2012-12-11 20:47:25 -0700
committer	Karl Williamson <public@khwilliamson.com>	2012-12-22 11:11:27 -0700
commit	15861f948fe52aa0c72233cd9dfc0190bfa3fbb1 (patch)
tree	68e1d074c5f89237a0bcbc739836c5aac749cac9 /handy.h
parent	fbc19f27a35cc90f77285a47de2ff0dd21ff50c6 (diff)
download	perl-15861f948fe52aa0c72233cd9dfc0190bfa3fbb1.tar.gz