summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--op.c10
-rw-r--r--toke.c4
-rw-r--r--utf8.c16
-rw-r--r--utf8.h10
-rw-r--r--utfebcdic.h27
5 files changed, 37 insertions, 30 deletions
diff --git a/op.c b/op.c
index de98f62e67..7e1d74b46a 100644
--- a/op.c
+++ b/op.c
@@ -4133,7 +4133,7 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
while (t < tend) {
cp[2*i] = utf8n_to_uvuni(t, tend-t, &ulen, flags);
t += ulen;
- if (t < tend && NATIVE_TO_UTF(*t) == 0xff) {
+ if (t < tend && NATIVE_UTF8_TO_I8(*t) == 0xff) {
t++;
cp[2*i+1] = utf8n_to_uvuni(t, tend-t, &ulen, flags);
t += ulen;
@@ -4151,7 +4151,7 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
t = uvuni_to_utf8(tmpbuf,nextmin);
sv_catpvn(transv, (char*)tmpbuf, t - tmpbuf);
if (diff > 1) {
- U8 range_mark = UTF_TO_NATIVE(0xff);
+ U8 range_mark = I8_TO_NATIVE_UTF8(0xff);
t = uvuni_to_utf8(tmpbuf, val - 1);
sv_catpvn(transv, (char *)&range_mark, 1);
sv_catpvn(transv, (char*)tmpbuf, t - tmpbuf);
@@ -4164,7 +4164,7 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
t = uvuni_to_utf8(tmpbuf,nextmin);
sv_catpvn(transv, (char*)tmpbuf, t - tmpbuf);
{
- U8 range_mark = UTF_TO_NATIVE(0xff);
+ U8 range_mark = I8_TO_NATIVE_UTF8(0xff);
sv_catpvn(transv, (char *)&range_mark, 1);
}
t = uvuni_to_utf8(tmpbuf, 0x7fffffff);
@@ -4190,7 +4190,7 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
if (tfirst > tlast) {
tfirst = (I32)utf8n_to_uvuni(t, tend - t, &ulen, flags);
t += ulen;
- if (t < tend && NATIVE_TO_UTF(*t) == 0xff) { /* illegal utf8 val indicates range */
+ if (t < tend && NATIVE_UTF8_TO_I8(*t) == 0xff) { /* illegal utf8 val indicates range */
t++;
tlast = (I32)utf8n_to_uvuni(t, tend - t, &ulen, flags);
t += ulen;
@@ -4204,7 +4204,7 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
if (r < rend) {
rfirst = (I32)utf8n_to_uvuni(r, rend - r, &ulen, flags);
r += ulen;
- if (r < rend && NATIVE_TO_UTF(*r) == 0xff) { /* illegal utf8 val indicates range */
+ if (r < rend && NATIVE_UTF8_TO_I8(*r) == 0xff) { /* illegal utf8 val indicates range */
r++;
rlast = (I32)utf8n_to_uvuni(r, rend - r, &ulen, flags);
r += ulen;
diff --git a/toke.c b/toke.c
index 53f65c5930..49d1b22019 100644
--- a/toke.c
+++ b/toke.c
@@ -3105,7 +3105,7 @@ S_scan_const(pTHX_ char *start)
char *e = d++;
while (e-- > c)
*(e + 1) = *e;
- *c = (char)UTF_TO_NATIVE(0xff);
+ *c = (char)I8_TO_NATIVE_UTF8(0xff);
/* mark the range as done, and continue */
dorange = FALSE;
didrange = TRUE;
@@ -3217,7 +3217,7 @@ S_scan_const(pTHX_ char *start)
&& !native_range
#endif
) {
- *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
+ *d++ = (char)I8_TO_NATIVE_UTF8(0xff); /* use illegal utf8 byte--see pmtrans */
s++;
continue;
}
diff --git a/utf8.c b/utf8.c
index 8ee5818fea..d33ff4726c 100644
--- a/utf8.c
+++ b/utf8.c
@@ -182,7 +182,7 @@ Perl_uvuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
}
}
if (UNI_IS_INVARIANT(uv)) {
- *d++ = (U8)UTF_TO_NATIVE(uv);
+ *d++ = (U8) I8_TO_NATIVE_UTF8(uv);
return d;
}
#if defined(EBCDIC)
@@ -190,10 +190,10 @@ Perl_uvuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
STRLEN len = UNISKIP(uv);
U8 *p = d+len-1;
while (p > d) {
- *p-- = (U8)UTF_TO_NATIVE((uv & UTF_CONTINUATION_MASK) | UTF_CONTINUATION_MARK);
+ *p-- = (U8) I8_TO_NATIVE_UTF8((uv & UTF_CONTINUATION_MASK) | UTF_CONTINUATION_MARK);
uv >>= UTF_ACCUMULATION_SHIFT;
}
- *p = (U8)UTF_TO_NATIVE((uv & UTF_START_MASK(len)) | UTF_START_MARK(len));
+ *p = (U8) I8_TO_NATIVE_UTF8((uv & UTF_START_MASK(len)) | UTF_START_MARK(len));
return d+len;
}
#else /* Non loop style */
@@ -623,7 +623,7 @@ Perl_utf8n_to_uvuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags)
/* An invariant is trivially well-formed */
if (UTF8_IS_INVARIANT(uv)) {
- return (UV) (NATIVE_TO_UTF(*s));
+ return (UV) (NATIVE_UTF8_TO_I8(*s));
}
/* A continuation character can't start a valid sequence */
@@ -643,7 +643,7 @@ Perl_utf8n_to_uvuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags)
}
#ifdef EBCDIC
- uv = NATIVE_TO_UTF(uv);
+ uv = NATIVE_UTF8_TO_I8(uv);
#endif
/* Here is not a continuation byte, nor an invariant. The only thing left
@@ -1013,7 +1013,7 @@ Perl_valid_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen)
{
UV expectlen = UTF8SKIP(s);
const U8* send = s + expectlen;
- UV uv = NATIVE_TO_UTF(*s);
+ UV uv = NATIVE_UTF8_TO_I8(*s);
PERL_ARGS_ASSERT_VALID_UTF8_TO_UVUNI;
@@ -3231,12 +3231,12 @@ Perl_swash_fetch(pTHX_ SV *swash, const U8 *ptr, bool do_utf8)
* In both UTF-8 and UTF-8-MOD that happens to be UTF_CONTINUATION_MARK
*/
needents = UTF_CONTINUATION_MARK;
- off = NATIVE_TO_UTF(ptr[klen]);
+ off = NATIVE_UTF8_TO_I8(ptr[klen]);
}
else {
/* If char is encoded then swatch is for the prefix */
needents = (1 << UTF_ACCUMULATION_SHIFT);
- off = NATIVE_TO_UTF(ptr[klen]) & UTF_CONTINUATION_MASK;
+ off = NATIVE_UTF8_TO_I8(ptr[klen]) & UTF_CONTINUATION_MASK;
}
/*
diff --git a/utf8.h b/utf8.h
index 2bd42fa6ed..bd5315fa32 100644
--- a/utf8.h
+++ b/utf8.h
@@ -319,9 +319,9 @@ Perl's extended UTF-8 means we can have start bytes up to FF.
* works on both UTF-8 encoded strings and non-encoded, as it returns TRUE in
* each for the exact same set of bit patterns. (And it works on any byte in a
* UTF-8 encoded string) */
-#define UTF8_IS_INVARIANT(c) UNI_IS_INVARIANT(NATIVE_TO_UTF(c))
+#define UTF8_IS_INVARIANT(c) UNI_IS_INVARIANT(NATIVE_UTF8_TO_I8(c))
-#define NATIVE_IS_INVARIANT(c) UNI_IS_INVARIANT(NATIVE8_TO_UNI(c))
+#define NATIVE_IS_INVARIANT(c) UNI_IS_INVARIANT(NATIVE_TO_LATIN1(c))
#define MAX_PORTABLE_UTF8_TWO_BYTE 0x3FF /* constrained by EBCDIC */
@@ -431,9 +431,9 @@ Perl's extended UTF-8 means we can have start bytes up to FF.
* U+110001: \xF4\x90\x80\x81 \xF9\xA2\xA0\xA0\xA1
*/
#ifdef EBCDIC /* Both versions assume well-formed UTF8 */
-# define UTF8_IS_SUPER(s) (NATIVE_TO_I8(* (U8*) (s)) >= 0xF9 \
- && (NATIVE_TO_I8(* (U8*) (s)) > 0xF9 \
- || (NATIVE_TO_I8(* (U8*) ((s)) + 1 >= 0xA2))))
+# define UTF8_IS_SUPER(s) (NATIVE_UTF8_TO_I8(* (U8*) (s)) >= 0xF9 \
+ && (NATIVE_UTF8_TO_I8(* (U8*) (s)) > 0xF9 \
+ || (NATIVE_UTF8_TO_I8(* (U8*) ((s)) + 1 >= 0xA2))))
#else
# define UTF8_IS_SUPER(s) (*(U8*) (s) >= 0xF4 \
&& (*(U8*) (s) > 0xF4 || (*((U8*) (s) + 1) >= 0x90)))
diff --git a/utfebcdic.h b/utfebcdic.h
index 9f2bfa6d9f..ec342b5623 100644
--- a/utfebcdic.h
+++ b/utfebcdic.h
@@ -527,12 +527,17 @@ END_EXTERN_C
#define NATIVE_UTF8_TO_I8(ch) (ch) PL_e2utf[(U8)(ch)]
#define I8_TO_NATIVE_UTF8(ch) (ch) PL_utf2e[(U8)(ch)]
-/* Transform in wide UV char space */
-#define NATIVE_TO_UNI(ch) (((ch) > 255) ? (ch) : NATIVE_TO_ASCII(ch))
-#define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : ASCII_TO_NATIVE(ch))
+/* Transforms in wide UV chars */
+#define NATIVE_TO_UNI(ch) (((ch) > 255) ? (ch) : NATIVE_TO_LATIN1(ch))
+#define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : LATIN1_TO_NATIVE(ch))
+
/* Transform in invariant..byte space */
-#define NATIVE_TO_NEED(enc,ch) ((enc) ? UTF_TO_NATIVE(NATIVE_TO_ASCII(ch)) : (ch))
-#define ASCII_TO_NEED(enc,ch) ((enc) ? UTF_TO_NATIVE(ch) : ASCII_TO_NATIVE(ch))
+#define NATIVE_TO_NEED(enc,ch) ((enc) \
+ ? I8_TO_NATIVE_UTF8(NATIVE_TO_LATIN1(ch)) \
+ : (ch))
+#define ASCII_TO_NEED(enc,ch) ((enc) \
+ ? I8_TO_NATIVE_UTF8(ch) \
+ : LATIN1_TO_NATIVE(ch))
/*
The following table is adapted from tr16, it shows I8 encoding of Unicode code points.
@@ -565,11 +570,13 @@ END_EXTERN_C
* Comments as to the meaning of each are given at their corresponding utf8.h
* definitions */
-#define UTF8_IS_START(c) (NATIVE_TO_UTF(c) >= 0xC5 && NATIVE_TO_UTF(c) != 0xE0)
-#define UTF8_IS_CONTINUATION(c) ((NATIVE_TO_UTF(c) & 0xE0) == 0xA0)
-#define UTF8_IS_CONTINUED(c) (NATIVE_TO_UTF(c) >= 0xA0)
-#define UTF8_IS_DOWNGRADEABLE_START(c) (NATIVE_TO_UTF(c) >= 0xC5 && NATIVE_TO_UTF(c) <= 0xC7)
-#define UTF8_IS_ABOVE_LATIN1(c) (NATIVE_TO_I8(c) >= 0xC8)
+#define UTF8_IS_START(c) (NATIVE_UTF8_TO_I8(c) >= 0xC5 \
+ && NATIVE_UTF8_TO_I8(c) != 0xE0)
+#define UTF8_IS_CONTINUATION(c) ((NATIVE_UTF8_TO_I8(c) & 0xE0) == 0xA0)
+#define UTF8_IS_CONTINUED(c) (NATIVE_UTF8_TO_I8(c) >= 0xA0)
+#define UTF8_IS_DOWNGRADEABLE_START(c) (NATIVE_UTF8_TO_I8(c) >= 0xC5 \
+ && NATIVE_UTF8_TO_I8(c) <= 0xC7)
+#define UTF8_IS_ABOVE_LATIN1(c) (NATIVE_UTF8_TO_I8(c) >= 0xC8)
#define UTF_START_MARK(len) (((len) > 7) ? 0xFF : ((U8)(0xFE << (7-(len)))))
#define UTF_START_MASK(len) (((len) >= 6) ? 0x01 : (0x1F >> ((len)-2)))