summaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2012-12-12 09:17:50 -0700
committerKarl Williamson <public@khwilliamson.com>2012-12-22 11:11:27 -0700
commit922e8cb4d0c8566afd151f6ffc58369c567e6407 (patch)
treebc5574211a6bc33cbf5940b74f865fdb3e200fab /utf8.c
parent15861f948fe52aa0c72233cd9dfc0190bfa3fbb1 (diff)
downloadperl-922e8cb4d0c8566afd151f6ffc58369c567e6407.tar.gz
Add generic _is_(uni|utf8)_FOO() function
This function uses table lookup to replace 9 more specific functions, which can be deprecated. They should not have been exposed to the public API in the first place
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c56
1 files changed, 38 insertions, 18 deletions
diff --git a/utf8.c b/utf8.c
index 2a5aff1aea..2fb39c4b6d 100644
--- a/utf8.c
+++ b/utf8.c
@@ -1479,6 +1479,14 @@ Perl_utf16_to_utf8_reversed(pTHX_ U8* p, U8* d, I32 bytelen, I32 *newlen)
return utf16_to_utf8(p, d, bytelen, newlen);
}
+bool
+Perl__is_uni_FOO(pTHX_ const U8 classnum, const UV c)
+{
+ U8 tmpbuf[UTF8_MAXBYTES+1];
+ uvchr_to_utf8(tmpbuf, c);
+ return _is_utf8_FOO(classnum, tmpbuf);
+}
+
/* for now these are all defined (inefficiently) in terms of the utf8 versions.
* Note that the macros in handy.h that call these short-circuit calling them
* for Latin-1 range inputs */
@@ -1488,7 +1496,7 @@ Perl_is_uni_alnum(pTHX_ UV c)
{
U8 tmpbuf[UTF8_MAXBYTES+1];
uvchr_to_utf8(tmpbuf, c);
- return is_utf8_alnum(tmpbuf);
+ return _is_utf8_FOO(_CC_WORDCHAR, tmpbuf);
}
bool
@@ -1496,7 +1504,7 @@ Perl_is_uni_alnumc(pTHX_ UV c)
{
U8 tmpbuf[UTF8_MAXBYTES+1];
uvchr_to_utf8(tmpbuf, c);
- return is_utf8_alnumc(tmpbuf);
+ return _is_utf8_FOO(_CC_ALPHANUMERIC, tmpbuf);
}
bool /* Internal function so we can deprecate the external one, and call
@@ -1532,7 +1540,7 @@ Perl_is_uni_alpha(pTHX_ UV c)
{
U8 tmpbuf[UTF8_MAXBYTES+1];
uvchr_to_utf8(tmpbuf, c);
- return is_utf8_alpha(tmpbuf);
+ return _is_utf8_FOO(_CC_ALPHA, tmpbuf);
}
bool
@@ -1558,7 +1566,7 @@ Perl_is_uni_digit(pTHX_ UV c)
{
U8 tmpbuf[UTF8_MAXBYTES+1];
uvchr_to_utf8(tmpbuf, c);
- return is_utf8_digit(tmpbuf);
+ return _is_utf8_FOO(_CC_DIGIT, tmpbuf);
}
bool
@@ -1566,7 +1574,7 @@ Perl_is_uni_upper(pTHX_ UV c)
{
U8 tmpbuf[UTF8_MAXBYTES+1];
uvchr_to_utf8(tmpbuf, c);
- return is_utf8_upper(tmpbuf);
+ return _is_utf8_FOO(_CC_UPPER, tmpbuf);
}
bool
@@ -1574,7 +1582,7 @@ Perl_is_uni_lower(pTHX_ UV c)
{
U8 tmpbuf[UTF8_MAXBYTES+1];
uvchr_to_utf8(tmpbuf, c);
- return is_utf8_lower(tmpbuf);
+ return _is_utf8_FOO(_CC_LOWER, tmpbuf);
}
bool
@@ -1588,7 +1596,7 @@ Perl_is_uni_graph(pTHX_ UV c)
{
U8 tmpbuf[UTF8_MAXBYTES+1];
uvchr_to_utf8(tmpbuf, c);
- return is_utf8_graph(tmpbuf);
+ return _is_utf8_FOO(_CC_GRAPH, tmpbuf);
}
bool
@@ -1596,7 +1604,7 @@ Perl_is_uni_print(pTHX_ UV c)
{
U8 tmpbuf[UTF8_MAXBYTES+1];
uvchr_to_utf8(tmpbuf, c);
- return is_utf8_print(tmpbuf);
+ return _is_utf8_FOO(_CC_PRINT, tmpbuf);
}
bool
@@ -1604,7 +1612,7 @@ Perl_is_uni_punct(pTHX_ UV c)
{
U8 tmpbuf[UTF8_MAXBYTES+1];
uvchr_to_utf8(tmpbuf, c);
- return is_utf8_punct(tmpbuf);
+ return _is_utf8_FOO(_CC_PUNCT, tmpbuf);
}
bool
@@ -1841,7 +1849,7 @@ Perl_is_uni_alnum_lc(pTHX_ UV c)
if (c < 256) {
return isALNUM_LC(UNI_TO_NATIVE(c));
}
- return is_uni_alnum(c);
+ return _is_uni_FOO(_CC_WORDCHAR, c);
}
bool
@@ -1850,7 +1858,7 @@ Perl_is_uni_alnumc_lc(pTHX_ UV c)
if (c < 256) {
return isALPHANUMERIC_LC(UNI_TO_NATIVE(c));
}
- return is_uni_alnumc(c);
+ return _is_uni_FOO(_CC_ALPHANUMERIC, c);
}
bool
@@ -1868,7 +1876,7 @@ Perl_is_uni_alpha_lc(pTHX_ UV c)
if (c < 256) {
return isALPHA_LC(UNI_TO_NATIVE(c));
}
- return is_uni_alpha(c);
+ return _is_uni_FOO(_CC_ALPHA, c);
}
bool
@@ -1904,7 +1912,7 @@ Perl_is_uni_digit_lc(pTHX_ UV c)
if (c < 256) {
return isDIGIT_LC(UNI_TO_NATIVE(c));
}
- return is_uni_digit(c);
+ return _is_uni_FOO(_CC_DIGIT, c);
}
bool
@@ -1913,7 +1921,7 @@ Perl_is_uni_upper_lc(pTHX_ UV c)
if (c < 256) {
return isUPPER_LC(UNI_TO_NATIVE(c));
}
- return is_uni_upper(c);
+ return _is_uni_FOO(_CC_UPPER, c);
}
bool
@@ -1922,7 +1930,7 @@ Perl_is_uni_lower_lc(pTHX_ UV c)
if (c < 256) {
return isLOWER_LC(UNI_TO_NATIVE(c));
}
- return is_uni_lower(c);
+ return _is_uni_FOO(_CC_LOWER, c);
}
bool
@@ -1940,7 +1948,7 @@ Perl_is_uni_graph_lc(pTHX_ UV c)
if (c < 256) {
return isGRAPH_LC(UNI_TO_NATIVE(c));
}
- return is_uni_graph(c);
+ return _is_uni_FOO(_CC_GRAPH, c);
}
bool
@@ -1949,7 +1957,7 @@ Perl_is_uni_print_lc(pTHX_ UV c)
if (c < 256) {
return isPRINT_LC(UNI_TO_NATIVE(c));
}
- return is_uni_print(c);
+ return _is_uni_FOO(_CC_PRINT, c);
}
bool
@@ -1958,7 +1966,7 @@ Perl_is_uni_punct_lc(pTHX_ UV c)
if (c < 256) {
return isPUNCT_LC(UNI_TO_NATIVE(c));
}
- return is_uni_punct(c);
+ return _is_uni_FOO(_CC_PUNCT, c);
}
bool
@@ -2034,6 +2042,18 @@ S_is_utf8_common(pTHX_ const U8 *const p, SV **swash,
}
bool
+Perl__is_utf8_FOO(pTHX_ const U8 classnum, const U8 *p)
+{
+ dVAR;
+
+ PERL_ARGS_ASSERT__IS_UTF8_FOO;
+
+ assert(classnum < _FIRST_NON_SWASH_CC);
+
+ return is_utf8_common(p, &PL_utf8_swash_ptrs[classnum], swash_property_names[classnum]);
+}
+
+bool
Perl_is_utf8_alnum(pTHX_ const U8 *p)
{
dVAR;