summaryrefslogtreecommitdiff
path: root/utf8.h
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2021-04-15 10:55:32 -0600
committerKarl Williamson <khw@cpan.org>2021-08-25 11:16:34 -0600
commit296969d3c2b710adc063a323e364dae12729e066 (patch)
treedc0e351bf0a9dea3408abc3028238f36b964d2f0 /utf8.h
parentf79e2ff95fbb22eaf18e130c7cba8a9d40be3d75 (diff)
downloadperl-296969d3c2b710adc063a323e364dae12729e066.tar.gz
Make paradigm into a macro
These macros use (x) | 0 to get a compiler error if x is a pointer rather than a value. This was instituted because there was confusion in them as to what they were called with. But the purpose of the paradigm wasn't obvious to even some experts; it was documented in every file in which it was used, but not at every occurrence. And, not every compiler can cope with them, it turns out. Making the paradigm into a macro, which this commit does, makes the uses self-documenting, albeit at the expense of cluttering up the macro definition somewhat; and allows the mechanism to be turned off if necessary for some compilers. Since it will be enabled for the majority of compilers, the potential bugs will be caught anyway.
Diffstat (limited to 'utf8.h')
-rw-r--r--utf8.h22
1 files changed, 12 insertions, 10 deletions
diff --git a/utf8.h b/utf8.h
index 662e4cf529..2f2be9ebac 100644
--- a/utf8.h
+++ b/utf8.h
@@ -199,8 +199,10 @@ adding no time nor space requirements to the implementation.
=cut
*/
-# define NATIVE_TO_LATIN1(ch) (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ((ch) | 0)))
-# define LATIN1_TO_NATIVE(ch) (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ((ch) | 0)))
+#define NATIVE_TO_LATIN1(ch) \
+ (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ASSERT_NOT_PTR(ch)))
+#define LATIN1_TO_NATIVE(ch) \
+ (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ASSERT_NOT_PTR(ch)))
/* I8 is an intermediate version of UTF-8 used only in UTF-EBCDIC. We thus
* consider it to be identical to UTF-8 on ASCII platforms. Strictly speaking
@@ -208,11 +210,13 @@ adding no time nor space requirements to the implementation.
* because they are 8-bit encodings that serve the same purpose in Perl, and
* rarely do we need to distinguish them. The term "NATIVE_UTF8" applies to
* whichever one is applicable on the current platform */
-# define NATIVE_UTF8_TO_I8(ch) (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ((ch) | 0)))
-# define I8_TO_NATIVE_UTF8(ch) (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ((ch) | 0)))
+#define NATIVE_UTF8_TO_I8(ch) \
+ (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ASSERT_NOT_PTR(ch)))
+#define I8_TO_NATIVE_UTF8(ch) \
+ (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ASSERT_NOT_PTR(ch)))
-# define UNI_TO_NATIVE(ch) ((UV) ((ch) | 0))
-# define NATIVE_TO_UNI(ch) ((UV) ((ch) | 0))
+#define UNI_TO_NATIVE(ch) ((UV) ASSERT_NOT_PTR(ch))
+#define NATIVE_TO_UNI(ch) ((UV) ASSERT_NOT_PTR(ch))
/*
@@ -761,10 +765,8 @@ The reason it works on both UTF-8 encoded strings and non-UTF-8 encoded, is
that it returns TRUE in each for the exact same set of bit patterns. It is
valid on a subset of what UVCHR_IS_INVARIANT is valid on, so can just use that;
and the compiler should optimize out anything extraneous given the
-implementation of the latter. The |0 makes sure this isn't mistakenly called
-with a ptr argument.
-*/
-#define UTF8_IS_INVARIANT(c) UVCHR_IS_INVARIANT((c) | 0)
+implementation of the latter. */
+#define UTF8_IS_INVARIANT(c) UVCHR_IS_INVARIANT(ASSERT_NOT_PTR(c))
/* Like the above, but its name implies a non-UTF8 input, which as the comments
* above show, doesn't matter as to its implementation */