diff options
author | Karl Williamson <public@khwilliamson.com> | 2012-12-11 20:47:25 -0700 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2012-12-22 11:11:27 -0700 |
commit | 15861f948fe52aa0c72233cd9dfc0190bfa3fbb1 (patch) | |
tree | 68e1d074c5f89237a0bcbc739836c5aac749cac9 /handy.h | |
parent | fbc19f27a35cc90f77285a47de2ff0dd21ff50c6 (diff) | |
download | perl-15861f948fe52aa0c72233cd9dfc0190bfa3fbb1.tar.gz |
handy.h: Create isALPHANUMERIC() and kin
Perl has had an undocumented macro isALNUMC() for a long time. I want
to document it, but the name is very obscure. Neither Yves nor I are
sure what it is. My best guess is "C's alnum". It corresponds to
/[[:alnum:]]/, and so its best name would be isALNUM(). But that is the
name long given to what matches \w. A new synonym, isWORDCHAR(), has
been in place for several releases for that, but the old isALNUM()
should remain for backwards compatibility.
I don't think that the name isALNUMC() should be published, as it is too
close to isALNUM(). I finally came to the conclusion that
isALPHANUMERIC() is the best name; it describes its purpose clearly; the
disadvantage is its long length. I doubt that it will get much use, but
we need something, I think, that we can publish to accomplish this
functionality.
This commit also converts core uses of isALNUMC to isALPHANUMERIC. (I
intended to that separately, but made a mistake in rebasing, and
combined the two patches; and it seemed like not a big enough problem to
separate them out again.)
Diffstat (limited to 'handy.h')
-rw-r--r-- | handy.h | 55 |
1 files changed, 38 insertions, 17 deletions
@@ -536,6 +536,15 @@ See the L<top of this section|/Character classes> for an explanation of variants C<isALPHA_A>, C<isALPHA_L1>, C<isALPHA_uni>, C<isALPHA_utf8>, C<isALPHA_LC> C<isALPHA_LC_uvchr>, and C<isALPHA_LC_utf8>. +=for apidoc Am|bool|isALPHANUMERIC|char ch +Returns a boolean indicating whether the specified character is a either an +alphabetic character or decimal digit in the platform's native character set, +analogous to C<m/[[:alnum:]]/>. +See the L<top of this section|/Character classes> for an explanation of variants +C<isALPHANUMERIC_A>, C<isALPHANUMERIC_L1>, C<isALPHANUMERIC_uni>, +C<isALPHANUMERIC_utf8>, C<isALPHANUMERIC_LC> C<isALPHANUMERIC_LC_uvchr>, and +C<isALPHANUMERIC_LC_utf8>. + =for apidoc Am|bool|isASCII|char ch Returns a boolean indicating whether the specified character is one of the 128 characters in the ASCII character set, analogous to C<m/[[:ascii:]]/>. @@ -670,7 +679,7 @@ character set, if possible; otherwise returns the input character itself. =cut -Still undocumented are ALNUMC, PSXSPC, VERTSPACE, and IDFIRST, and the other +Still undocumented are , PSXSPC, VERTSPACE, and IDFIRST, and the other toUPPER etc functions Note that these macros are repeated in Devel::PPPort, so should also be @@ -729,7 +738,7 @@ patched there. The file as of this writing is cpan/Devel-PPPort/parts/inc/misc # define _CC_UPPER 4 /* [:upper:] */ # define _CC_PUNCT 5 /* [:punct:] */ # define _CC_PRINT 6 /* [:print:] */ -# define _CC_ALNUMC 7 /* [:alnum:] */ +# define _CC_ALPHANUMERIC 7 /* [:alnum:] */ # define _CC_GRAPH 8 /* [:graph:] */ #define _FIRST_NON_SWASH_CC 9 @@ -772,7 +781,7 @@ patched there. The file as of this writing is cpan/Devel-PPPort/parts/inc/misc /* An enum version of the character class numbers, to help compilers * optimize */ typedef enum { - _CC_ENUM_ALNUMC = _CC_ALNUMC, + _CC_ENUM_ALPHANUMERIC = _CC_ALPHANUMERIC, _CC_ENUM_ALPHA = _CC_ALPHA, _CC_ENUM_DIGIT = _CC_DIGIT, _CC_ENUM_GRAPH = _CC_GRAPH, @@ -794,7 +803,7 @@ typedef enum { #define POSIX_SWASH_COUNT _FIRST_NON_SWASH_CC #define PL_utf8_alnum PL_utf8_swash_ptrs[_CC_WORDCHAR] -#define PL_utf8_alnumc PL_utf8_swash_ptrs[_CC_ALNUMC] +#define PL_utf8_alnumc PL_utf8_swash_ptrs[_CC_ALPHANUMERIC] #define PL_utf8_alpha PL_utf8_swash_ptrs[_CC_ALPHA] #define PL_utf8_graph PL_utf8_swash_ptrs[_CC_GRAPH] #define PL_utf8_digit PL_utf8_swash_ptrs[_CC_DIGIT] @@ -827,8 +836,8 @@ EXTCONST U32 PL_charclass[]; && ((PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_mask_A(classnum)) \ == _CC_mask_A(classnum))) -# define isALNUMC_A(c) _generic_isCC_A(c, _CC_ALNUMC) # define isALPHA_A(c) _generic_isCC_A(c, _CC_ALPHA) +# define isALPHANUMERIC_A(c) _generic_isCC_A(c, _CC_ALPHANUMERIC) # define isBLANK_A(c) _generic_isCC_A(c, _CC_BLANK) # define isCNTRL_A(c) _generic_isCC_A(c, _CC_CNTRL) # define isDIGIT_A(c) _generic_isCC(c, _CC_DIGIT) @@ -854,8 +863,8 @@ EXTCONST U32 PL_charclass[]; _generic_isCC(c, _CC_IS_IN_SOME_FOLD) #else /* No perl.h. */ # ifdef EBCDIC -# define isALNUMC_A(c) (isASCII(c) && isALNUMC(c)) # define isALPHA_A(c) (isASCII(c) && isALPHA(c)) +# define isALPHANUMERIC_A(c) (isASCII(c) && isALPHANUMERIC(c)) # define isBLANK_A(c) (isASCII(c) && isBLANK(c)) # define isCNTRL_A(c) (isASCII(c) && isCNTRL(c)) # define isDIGIT_A(c) (isASCII(c) && isDIGIT(c)) @@ -870,8 +879,8 @@ EXTCONST U32 PL_charclass[]; # define isWORDCHAR_A(c) (isASCII(c) && isWORDCHAR(c)) # define isXDIGIT_A(c) (isASCII(c) && isXDIGIT(c)) # else /* ASCII platform, no perl.h */ -# define isALNUMC_A(c) (isALPHA_A(c) || isDIGIT_A(c)) # define isALPHA_A(c) (isUPPER_A(c) || isLOWER_A(c)) +# define isALPHANUMERIC_A(c) (isALPHA_A(c) || isDIGIT_A(c)) # define isBLANK_A(c) ((c) == ' ' || (c) == '\t') # define isCNTRL_A(c) (FITS_IN_8_BITS(c) && ((U8) (c) < ' ' || (c) == 127)) # define isDIGIT_A(c) ((c) <= '9' && (c) >= '0') @@ -899,8 +908,8 @@ EXTCONST U32 PL_charclass[]; /* Latin1 definitions */ #ifdef H_PERL -# define isALNUMC_L1(c) _generic_isCC(c, _CC_ALNUMC) # define isALPHA_L1(c) _generic_isCC(c, _CC_ALPHA) +# define isALPHANUMERIC_L1(c) _generic_isCC(c, _CC_ALPHANUMERIC) # define isBLANK_L1(c) _generic_isCC(c, _CC_BLANK) /* continuation character for legal NAME in \N{NAME} */ @@ -942,8 +951,8 @@ EXTCONST U32 PL_charclass[]; * with Latin1, which the three currently recognized by Perl are. Some libc's * have an isblank(), but it's not guaranteed. */ #ifdef EBCDIC -# define isALNUMC(c) isalnum(c) # define isALPHA(c) isalpha(c) +# define isALPHANUMERIC(c) isalnum(c) # define isBLANK(c) ((c) == ' ' || (c) == '\t' || NATIVE_TO_UNI(c) == 0xA0) # define isCNTRL(c) iscntrl(c) # define isDIGIT(c) isdigit(c) @@ -961,7 +970,7 @@ EXTCONST U32 PL_charclass[]; # define toLOWER(c) tolower(c) # define toUPPER(c) toupper(c) #else /* Not EBCDIC: ASCII-only matching */ -# define isALNUMC(c) isALNUMC_A(c) /* Mnemonic: "C's alnum" = alpha + digit */ +# define isALPHANUMERIC(c) isALPHANUMERIC_A(c) # define isALPHA(c) isALPHA_A(c) # define isBLANK(c) isBLANK_A(c) # define isCNTRL(c) isCNTRL_A(c) @@ -1003,7 +1012,7 @@ EXTCONST U32 PL_charclass[]; #ifdef USE_NEXT_CTYPE -# define isALNUMC_LC(c) NXIsAlNum((unsigned int)(c)) +# define isALPHANUMERIC_LC(c) NXIsAlNum((unsigned int)(c)) # define isALNUM_LC(c) isWORDCHAR_LC(c) # define isALPHA_LC(c) NXIsAlpha((unsigned int)(c)) # define isASCII_LC(c) isASCII((unsigned int)(c)) @@ -1028,9 +1037,10 @@ EXTCONST U32 PL_charclass[]; /* Use foo_LC_uvchr() instead of these for beyond the Latin1 range */ -# define isALNUMC_LC(c) (FITS_IN_8_BITS(c) && isalnum((unsigned char)(c))) # define isALNUM_LC(c) isWORDCHAR_LC(c) # define isALPHA_LC(c) (FITS_IN_8_BITS(c) && isalpha((unsigned char)(c))) +# define isALPHANUMERIC_LC(c) (FITS_IN_8_BITS(c) \ + && isalnum((unsigned char)(c))) # ifdef HAS_ISASCII # define isASCII_LC(c) (FITS_IN_8_BITS(c) && isascii((unsigned char)(c))) # else @@ -1059,9 +1069,9 @@ EXTCONST U32 PL_charclass[]; # else -# define isALNUMC_LC(c) (isascii(c) && isalnum(c)) # define isALNUM_LC(c) isWORDCHAR_LC(c) # define isALPHA_LC(c) (isascii(c) && isalpha(c)) +# define isALPHANUMERIC_LC(c) (isascii(c) && isalnum(c)) # define isASCII_LC(c) isascii(c) # ifdef HAS_ISBLANK # define isBLANK_LC(c) (isascii(c) && isblank(c)) @@ -1097,9 +1107,9 @@ EXTCONST U32 PL_charclass[]; #define _generic_uni(classnum, function, c) ((c) < 256 \ ? _generic_isCC(c, classnum) \ : function(c)) -#define isALNUMC_uni(c) _generic_uni(_CC_ALNUMC, is_uni_alnumc, c) #define isALNUM_uni(c) isWORDCHAR_uni(c) #define isALPHA_uni(c) _generic_uni(_CC_ALPHA, is_uni_alpha, c) +#define isALPHANUMERIC_uni(c) _generic_uni(_CC_ALPHANUMERIC, is_uni_alnumc, c) #define isASCII_uni(c) isASCII(c) #define isBLANK_uni(c) _generic_uni(_CC_BLANK, is_HORIZWS_cp_high, c) #define isCNTRL_uni(c) isCNTRL_L1(c) /* All controls are in Latin1 */ @@ -1126,9 +1136,10 @@ EXTCONST U32 PL_charclass[]; #define _gnrc_is_LC_uvchr(latin1, above_latin1, c) \ (c < 256 ? latin1(c) : above_latin1(NATIVE_TO_UNI(c))) -#define isALNUMC_LC_uvchr(c) _gnrc_is_LC_uvchr(isALNUMC_LC, is_uni_alnumc_lc, c) #define isALNUM_LC_uvchr(c) isWORDCHAR_LC_uvchr(c) #define isALPHA_LC_uvchr(c) _gnrc_is_LC_uvchr(isALPHA_LC, is_uni_alpha_lc, c) +#define isALPHANUMERIC_LC_uvchr(c) _gnrc_is_LC_uvchr(isALPHANUMERIC_LC, \ + is_uni_alnumc_lc, c) #define isASCII_LC_uvchr(c) isASCII_LC(c) #define isBLANK_LC_uvchr(c) _gnrc_is_LC_uvchr(isBLANK_LC, is_HORIZWS_cp_high, c) #define isCNTRL_LC_uvchr(c) (c < 256 ? isCNTRL_LC(c) : 0) @@ -1187,9 +1198,10 @@ EXTCONST U32 PL_charclass[]; * points; the regcharclass.h ones are implemented as a series of * "if-else-if-else ..." */ -#define isALNUMC_utf8(p) _generic_utf8(_CC_ALNUMC, is_utf8_alnumc, p) #define isALNUM_utf8(p) isWORDCHAR_utf8(p) /* back compat */ #define isALPHA_utf8(p) _generic_utf8(_CC_ALPHA, is_utf8_alpha, p) +#define isALPHANUMERIC_utf8(p) _generic_utf8(_CC_ALPHANUMERIC, \ + is_utf8_alnumc, p) #define isASCII_utf8(p) isASCII(*p) /* Because ASCII is invariant under utf8, the non-utf8 macro works */ @@ -1242,8 +1254,9 @@ EXTCONST U32 PL_charclass[]; #define _generic_LC_utf8(macro, utf8_func, p) \ _generic_LC_utf8_utf8(macro, p, utf8_func(p)) -#define isALNUMC_LC_utf8(p) _generic_LC_utf8(isALNUMC_LC, is_utf8_alnumc, p) #define isALNUM_LC_utf8(p) isWORDCHAR_LC_utf8(p) +#define isALPHANUMERIC_LC_utf8(p) _generic_LC_utf8(isALPHANUMERIC_LC, \ + is_utf8_alnumc, p) #define isALPHA_LC_utf8(p) _generic_LC_utf8(isALPHA_LC, is_utf8_alpha, p) #define isASCII_LC_utf8(p) isASCII_LC(*p) #define isBLANK_LC_utf8(p) _generic_LC_utf8(isBLANK_LC, is_HORIZWS_high, p) @@ -1271,6 +1284,14 @@ EXTCONST U32 PL_charclass[]; #define isOCTAL(c) isOCTAL_A(c) #define isOCTAL_L1(c) isOCTAL_A(c) #define isXDIGIT_L1(c) isXDIGIT_A(c) +#define isALNUMC_A(c) isALPHANUMERIC_A(c) /* Mnemonic: "C's alnum" */ +#define isALNUMC_L1(c) isALPHANUMERIC_L1(c) +#define isALNUMC(c) isALPHANUMERIC(c) +#define isALNUMC_LC(c) isALPHANUMERIC_LC(c) +#define isALNUMC_uni(c) isALPHANUMERIC_uni(c) +#define isALNUMC_LC_uvchr(c) isALPHANUMERIC_LC_uvchr(c) +#define isALNUMC_utf8(p) isALPHANUMERIC_utf8(p) +#define isALNUMC_LC_utf8(p) isALPHANUMERIC_LC_utf8(p) /* This conversion works both ways, strangely enough. On EBCDIC platforms, * CTRL-@ is 0, CTRL-A is 1, etc, just like on ASCII, except that they don't |