summaryrefslogtreecommitdiff
path: root/handy.h
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2012-12-11 20:47:25 -0700
committerKarl Williamson <public@khwilliamson.com>2012-12-22 11:11:27 -0700
commit15861f948fe52aa0c72233cd9dfc0190bfa3fbb1 (patch)
tree68e1d074c5f89237a0bcbc739836c5aac749cac9 /handy.h
parentfbc19f27a35cc90f77285a47de2ff0dd21ff50c6 (diff)
downloadperl-15861f948fe52aa0c72233cd9dfc0190bfa3fbb1.tar.gz
handy.h: Create isALPHANUMERIC() and kin
Perl has had an undocumented macro isALNUMC() for a long time. I want to document it, but the name is very obscure. Neither Yves nor I are sure what it is. My best guess is "C's alnum". It corresponds to /[[:alnum:]]/, and so its best name would be isALNUM(). But that is the name long given to what matches \w. A new synonym, isWORDCHAR(), has been in place for several releases for that, but the old isALNUM() should remain for backwards compatibility. I don't think that the name isALNUMC() should be published, as it is too close to isALNUM(). I finally came to the conclusion that isALPHANUMERIC() is the best name; it describes its purpose clearly; the disadvantage is its long length. I doubt that it will get much use, but we need something, I think, that we can publish to accomplish this functionality. This commit also converts core uses of isALNUMC to isALPHANUMERIC. (I intended to that separately, but made a mistake in rebasing, and combined the two patches; and it seemed like not a big enough problem to separate them out again.)
Diffstat (limited to 'handy.h')
-rw-r--r--handy.h55
1 files changed, 38 insertions, 17 deletions
diff --git a/handy.h b/handy.h
index fd23c1dcb1..077952c42f 100644
--- a/handy.h
+++ b/handy.h
@@ -536,6 +536,15 @@ See the L<top of this section|/Character classes> for an explanation of variants
C<isALPHA_A>, C<isALPHA_L1>, C<isALPHA_uni>, C<isALPHA_utf8>, C<isALPHA_LC>
C<isALPHA_LC_uvchr>, and C<isALPHA_LC_utf8>.
+=for apidoc Am|bool|isALPHANUMERIC|char ch
+Returns a boolean indicating whether the specified character is a either an
+alphabetic character or decimal digit in the platform's native character set,
+analogous to C<m/[[:alnum:]]/>.
+See the L<top of this section|/Character classes> for an explanation of variants
+C<isALPHANUMERIC_A>, C<isALPHANUMERIC_L1>, C<isALPHANUMERIC_uni>,
+C<isALPHANUMERIC_utf8>, C<isALPHANUMERIC_LC> C<isALPHANUMERIC_LC_uvchr>, and
+C<isALPHANUMERIC_LC_utf8>.
+
=for apidoc Am|bool|isASCII|char ch
Returns a boolean indicating whether the specified character is one of the 128
characters in the ASCII character set, analogous to C<m/[[:ascii:]]/>.
@@ -670,7 +679,7 @@ character set, if possible; otherwise returns the input character itself.
=cut
-Still undocumented are ALNUMC, PSXSPC, VERTSPACE, and IDFIRST, and the other
+Still undocumented are , PSXSPC, VERTSPACE, and IDFIRST, and the other
toUPPER etc functions
Note that these macros are repeated in Devel::PPPort, so should also be
@@ -729,7 +738,7 @@ patched there. The file as of this writing is cpan/Devel-PPPort/parts/inc/misc
# define _CC_UPPER 4 /* [:upper:] */
# define _CC_PUNCT 5 /* [:punct:] */
# define _CC_PRINT 6 /* [:print:] */
-# define _CC_ALNUMC 7 /* [:alnum:] */
+# define _CC_ALPHANUMERIC 7 /* [:alnum:] */
# define _CC_GRAPH 8 /* [:graph:] */
#define _FIRST_NON_SWASH_CC 9
@@ -772,7 +781,7 @@ patched there. The file as of this writing is cpan/Devel-PPPort/parts/inc/misc
/* An enum version of the character class numbers, to help compilers
* optimize */
typedef enum {
- _CC_ENUM_ALNUMC = _CC_ALNUMC,
+ _CC_ENUM_ALPHANUMERIC = _CC_ALPHANUMERIC,
_CC_ENUM_ALPHA = _CC_ALPHA,
_CC_ENUM_DIGIT = _CC_DIGIT,
_CC_ENUM_GRAPH = _CC_GRAPH,
@@ -794,7 +803,7 @@ typedef enum {
#define POSIX_SWASH_COUNT _FIRST_NON_SWASH_CC
#define PL_utf8_alnum PL_utf8_swash_ptrs[_CC_WORDCHAR]
-#define PL_utf8_alnumc PL_utf8_swash_ptrs[_CC_ALNUMC]
+#define PL_utf8_alnumc PL_utf8_swash_ptrs[_CC_ALPHANUMERIC]
#define PL_utf8_alpha PL_utf8_swash_ptrs[_CC_ALPHA]
#define PL_utf8_graph PL_utf8_swash_ptrs[_CC_GRAPH]
#define PL_utf8_digit PL_utf8_swash_ptrs[_CC_DIGIT]
@@ -827,8 +836,8 @@ EXTCONST U32 PL_charclass[];
&& ((PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_mask_A(classnum)) \
== _CC_mask_A(classnum)))
-# define isALNUMC_A(c) _generic_isCC_A(c, _CC_ALNUMC)
# define isALPHA_A(c) _generic_isCC_A(c, _CC_ALPHA)
+# define isALPHANUMERIC_A(c) _generic_isCC_A(c, _CC_ALPHANUMERIC)
# define isBLANK_A(c) _generic_isCC_A(c, _CC_BLANK)
# define isCNTRL_A(c) _generic_isCC_A(c, _CC_CNTRL)
# define isDIGIT_A(c) _generic_isCC(c, _CC_DIGIT)
@@ -854,8 +863,8 @@ EXTCONST U32 PL_charclass[];
_generic_isCC(c, _CC_IS_IN_SOME_FOLD)
#else /* No perl.h. */
# ifdef EBCDIC
-# define isALNUMC_A(c) (isASCII(c) && isALNUMC(c))
# define isALPHA_A(c) (isASCII(c) && isALPHA(c))
+# define isALPHANUMERIC_A(c) (isASCII(c) && isALPHANUMERIC(c))
# define isBLANK_A(c) (isASCII(c) && isBLANK(c))
# define isCNTRL_A(c) (isASCII(c) && isCNTRL(c))
# define isDIGIT_A(c) (isASCII(c) && isDIGIT(c))
@@ -870,8 +879,8 @@ EXTCONST U32 PL_charclass[];
# define isWORDCHAR_A(c) (isASCII(c) && isWORDCHAR(c))
# define isXDIGIT_A(c) (isASCII(c) && isXDIGIT(c))
# else /* ASCII platform, no perl.h */
-# define isALNUMC_A(c) (isALPHA_A(c) || isDIGIT_A(c))
# define isALPHA_A(c) (isUPPER_A(c) || isLOWER_A(c))
+# define isALPHANUMERIC_A(c) (isALPHA_A(c) || isDIGIT_A(c))
# define isBLANK_A(c) ((c) == ' ' || (c) == '\t')
# define isCNTRL_A(c) (FITS_IN_8_BITS(c) && ((U8) (c) < ' ' || (c) == 127))
# define isDIGIT_A(c) ((c) <= '9' && (c) >= '0')
@@ -899,8 +908,8 @@ EXTCONST U32 PL_charclass[];
/* Latin1 definitions */
#ifdef H_PERL
-# define isALNUMC_L1(c) _generic_isCC(c, _CC_ALNUMC)
# define isALPHA_L1(c) _generic_isCC(c, _CC_ALPHA)
+# define isALPHANUMERIC_L1(c) _generic_isCC(c, _CC_ALPHANUMERIC)
# define isBLANK_L1(c) _generic_isCC(c, _CC_BLANK)
/* continuation character for legal NAME in \N{NAME} */
@@ -942,8 +951,8 @@ EXTCONST U32 PL_charclass[];
* with Latin1, which the three currently recognized by Perl are. Some libc's
* have an isblank(), but it's not guaranteed. */
#ifdef EBCDIC
-# define isALNUMC(c) isalnum(c)
# define isALPHA(c) isalpha(c)
+# define isALPHANUMERIC(c) isalnum(c)
# define isBLANK(c) ((c) == ' ' || (c) == '\t' || NATIVE_TO_UNI(c) == 0xA0)
# define isCNTRL(c) iscntrl(c)
# define isDIGIT(c) isdigit(c)
@@ -961,7 +970,7 @@ EXTCONST U32 PL_charclass[];
# define toLOWER(c) tolower(c)
# define toUPPER(c) toupper(c)
#else /* Not EBCDIC: ASCII-only matching */
-# define isALNUMC(c) isALNUMC_A(c) /* Mnemonic: "C's alnum" = alpha + digit */
+# define isALPHANUMERIC(c) isALPHANUMERIC_A(c)
# define isALPHA(c) isALPHA_A(c)
# define isBLANK(c) isBLANK_A(c)
# define isCNTRL(c) isCNTRL_A(c)
@@ -1003,7 +1012,7 @@ EXTCONST U32 PL_charclass[];
#ifdef USE_NEXT_CTYPE
-# define isALNUMC_LC(c) NXIsAlNum((unsigned int)(c))
+# define isALPHANUMERIC_LC(c) NXIsAlNum((unsigned int)(c))
# define isALNUM_LC(c) isWORDCHAR_LC(c)
# define isALPHA_LC(c) NXIsAlpha((unsigned int)(c))
# define isASCII_LC(c) isASCII((unsigned int)(c))
@@ -1028,9 +1037,10 @@ EXTCONST U32 PL_charclass[];
/* Use foo_LC_uvchr() instead of these for beyond the Latin1 range */
-# define isALNUMC_LC(c) (FITS_IN_8_BITS(c) && isalnum((unsigned char)(c)))
# define isALNUM_LC(c) isWORDCHAR_LC(c)
# define isALPHA_LC(c) (FITS_IN_8_BITS(c) && isalpha((unsigned char)(c)))
+# define isALPHANUMERIC_LC(c) (FITS_IN_8_BITS(c) \
+ && isalnum((unsigned char)(c)))
# ifdef HAS_ISASCII
# define isASCII_LC(c) (FITS_IN_8_BITS(c) && isascii((unsigned char)(c)))
# else
@@ -1059,9 +1069,9 @@ EXTCONST U32 PL_charclass[];
# else
-# define isALNUMC_LC(c) (isascii(c) && isalnum(c))
# define isALNUM_LC(c) isWORDCHAR_LC(c)
# define isALPHA_LC(c) (isascii(c) && isalpha(c))
+# define isALPHANUMERIC_LC(c) (isascii(c) && isalnum(c))
# define isASCII_LC(c) isascii(c)
# ifdef HAS_ISBLANK
# define isBLANK_LC(c) (isascii(c) && isblank(c))
@@ -1097,9 +1107,9 @@ EXTCONST U32 PL_charclass[];
#define _generic_uni(classnum, function, c) ((c) < 256 \
? _generic_isCC(c, classnum) \
: function(c))
-#define isALNUMC_uni(c) _generic_uni(_CC_ALNUMC, is_uni_alnumc, c)
#define isALNUM_uni(c) isWORDCHAR_uni(c)
#define isALPHA_uni(c) _generic_uni(_CC_ALPHA, is_uni_alpha, c)
+#define isALPHANUMERIC_uni(c) _generic_uni(_CC_ALPHANUMERIC, is_uni_alnumc, c)
#define isASCII_uni(c) isASCII(c)
#define isBLANK_uni(c) _generic_uni(_CC_BLANK, is_HORIZWS_cp_high, c)
#define isCNTRL_uni(c) isCNTRL_L1(c) /* All controls are in Latin1 */
@@ -1126,9 +1136,10 @@ EXTCONST U32 PL_charclass[];
#define _gnrc_is_LC_uvchr(latin1, above_latin1, c) \
(c < 256 ? latin1(c) : above_latin1(NATIVE_TO_UNI(c)))
-#define isALNUMC_LC_uvchr(c) _gnrc_is_LC_uvchr(isALNUMC_LC, is_uni_alnumc_lc, c)
#define isALNUM_LC_uvchr(c) isWORDCHAR_LC_uvchr(c)
#define isALPHA_LC_uvchr(c) _gnrc_is_LC_uvchr(isALPHA_LC, is_uni_alpha_lc, c)
+#define isALPHANUMERIC_LC_uvchr(c) _gnrc_is_LC_uvchr(isALPHANUMERIC_LC, \
+ is_uni_alnumc_lc, c)
#define isASCII_LC_uvchr(c) isASCII_LC(c)
#define isBLANK_LC_uvchr(c) _gnrc_is_LC_uvchr(isBLANK_LC, is_HORIZWS_cp_high, c)
#define isCNTRL_LC_uvchr(c) (c < 256 ? isCNTRL_LC(c) : 0)
@@ -1187,9 +1198,10 @@ EXTCONST U32 PL_charclass[];
* points; the regcharclass.h ones are implemented as a series of
* "if-else-if-else ..." */
-#define isALNUMC_utf8(p) _generic_utf8(_CC_ALNUMC, is_utf8_alnumc, p)
#define isALNUM_utf8(p) isWORDCHAR_utf8(p) /* back compat */
#define isALPHA_utf8(p) _generic_utf8(_CC_ALPHA, is_utf8_alpha, p)
+#define isALPHANUMERIC_utf8(p) _generic_utf8(_CC_ALPHANUMERIC, \
+ is_utf8_alnumc, p)
#define isASCII_utf8(p) isASCII(*p) /* Because ASCII is invariant under
utf8, the non-utf8 macro works
*/
@@ -1242,8 +1254,9 @@ EXTCONST U32 PL_charclass[];
#define _generic_LC_utf8(macro, utf8_func, p) \
_generic_LC_utf8_utf8(macro, p, utf8_func(p))
-#define isALNUMC_LC_utf8(p) _generic_LC_utf8(isALNUMC_LC, is_utf8_alnumc, p)
#define isALNUM_LC_utf8(p) isWORDCHAR_LC_utf8(p)
+#define isALPHANUMERIC_LC_utf8(p) _generic_LC_utf8(isALPHANUMERIC_LC, \
+ is_utf8_alnumc, p)
#define isALPHA_LC_utf8(p) _generic_LC_utf8(isALPHA_LC, is_utf8_alpha, p)
#define isASCII_LC_utf8(p) isASCII_LC(*p)
#define isBLANK_LC_utf8(p) _generic_LC_utf8(isBLANK_LC, is_HORIZWS_high, p)
@@ -1271,6 +1284,14 @@ EXTCONST U32 PL_charclass[];
#define isOCTAL(c) isOCTAL_A(c)
#define isOCTAL_L1(c) isOCTAL_A(c)
#define isXDIGIT_L1(c) isXDIGIT_A(c)
+#define isALNUMC_A(c) isALPHANUMERIC_A(c) /* Mnemonic: "C's alnum" */
+#define isALNUMC_L1(c) isALPHANUMERIC_L1(c)
+#define isALNUMC(c) isALPHANUMERIC(c)
+#define isALNUMC_LC(c) isALPHANUMERIC_LC(c)
+#define isALNUMC_uni(c) isALPHANUMERIC_uni(c)
+#define isALNUMC_LC_uvchr(c) isALPHANUMERIC_LC_uvchr(c)
+#define isALNUMC_utf8(p) isALPHANUMERIC_utf8(p)
+#define isALNUMC_LC_utf8(p) isALPHANUMERIC_LC_utf8(p)
/* This conversion works both ways, strangely enough. On EBCDIC platforms,
* CTRL-@ is 0, CTRL-A is 1, etc, just like on ASCII, except that they don't