Make paradigm into a macro

These macros use (x) | 0 to get a compiler error if x is a pointer rather than a value. This was instituted because there was confusion in them as to what they were called with. But the purpose of the paradigm wasn't obvious to even some experts; it was documented in every file in which it was used, but not at every occurrence. And, not every compiler can cope with them, it turns out. Making the paradigm into a macro, which this commit does, makes the uses self-documenting, albeit at the expense of cluttering up the macro definition somewhat; and allows the mechanism to be turned off if necessary for some compilers. Since it will be enabled for the majority of compilers, the potential bugs will be caught anyway.
author: Karl Williamson <khw@cpan.org> 2021-04-15 10:55:32 -0600
committer: Karl Williamson <khw@cpan.org> 2021-08-25 11:16:34 -0600
commit: 296969d3c2b710adc063a323e364dae12729e066 (patch)
tree: dc0e351bf0a9dea3408abc3028238f36b964d2f0 /utf8.h
parent: f79e2ff95fbb22eaf18e130c7cba8a9d40be3d75 (diff)
download: perl-296969d3c2b710adc063a323e364dae12729e066.tar.gz
1 files changed, 12 insertions, 10 deletions
diff --git a/utf8.h b/utf8.h
index 662e4cf529..2f2be9ebac 100644
--- a/utf8.h
+++ b/utf8.h
@@ -199,8 +199,10 @@ adding no time nor space requirements to the implementation.
 =cut
 */
 
-#  define NATIVE_TO_LATIN1(ch)     (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ((ch) | 0)))
-#  define LATIN1_TO_NATIVE(ch)     (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ((ch) | 0)))
+#define NATIVE_TO_LATIN1(ch)                                                \
+                (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ASSERT_NOT_PTR(ch)))
+#define LATIN1_TO_NATIVE(ch)                                                \
+                (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ASSERT_NOT_PTR(ch)))
 
 /* I8 is an intermediate version of UTF-8 used only in UTF-EBCDIC.  We thus
  * consider it to be identical to UTF-8 on ASCII platforms.  Strictly speaking
@@ -208,11 +210,13 @@ adding no time nor space requirements to the implementation.
  * because they are 8-bit encodings that serve the same purpose in Perl, and
  * rarely do we need to distinguish them.  The term "NATIVE_UTF8" applies to
  * whichever one is applicable on the current platform */
-#  define NATIVE_UTF8_TO_I8(ch) (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ((ch) | 0)))
-#  define I8_TO_NATIVE_UTF8(ch) (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ((ch) | 0)))
+#define NATIVE_UTF8_TO_I8(ch)                                               \
+                (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ASSERT_NOT_PTR(ch)))
+#define I8_TO_NATIVE_UTF8(ch)                                               \
+                (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ASSERT_NOT_PTR(ch)))
 
-#  define UNI_TO_NATIVE(ch)        ((UV) ((ch) | 0))
-#  define NATIVE_TO_UNI(ch)        ((UV) ((ch) | 0))
+#define UNI_TO_NATIVE(ch)        ((UV) ASSERT_NOT_PTR(ch))
+#define NATIVE_TO_UNI(ch)        ((UV) ASSERT_NOT_PTR(ch))
 
 /*
 
@@ -761,10 +765,8 @@ The reason it works on both UTF-8 encoded strings and non-UTF-8 encoded, is
 that it returns TRUE in each for the exact same set of bit patterns.  It is
 valid on a subset of what UVCHR_IS_INVARIANT is valid on, so can just use that;
 and the compiler should optimize out anything extraneous given the
-implementation of the latter.  The |0 makes sure this isn't mistakenly called
-with a ptr argument.
-*/
-#define UTF8_IS_INVARIANT(c)	UVCHR_IS_INVARIANT((c) | 0)
+implementation of the latter. */
+#define UTF8_IS_INVARIANT(c)	UVCHR_IS_INVARIANT(ASSERT_NOT_PTR(c))
 
 /* Like the above, but its name implies a non-UTF8 input, which as the comments
  * above show, doesn't matter as to its implementation */
author	Karl Williamson <khw@cpan.org>	2021-04-15 10:55:32 -0600
committer	Karl Williamson <khw@cpan.org>	2021-08-25 11:16:34 -0600
commit	296969d3c2b710adc063a323e364dae12729e066 (patch)
tree	dc0e351bf0a9dea3408abc3028238f36b964d2f0 /utf8.h
parent	f79e2ff95fbb22eaf18e130c7cba8a9d40be3d75 (diff)
download	perl-296969d3c2b710adc063a323e364dae12729e066.tar.gz