summaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2002-04-02 20:35:13 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2002-04-02 20:35:13 +0000
commit872c91ae155f6880f8bf2b15c143bda5279a5794 (patch)
tree663505dbece632b3e75b0158fb36720c2d54829f /utf8.c
parentb929be1d26aeff4c8c3d4bbd655731a23247ecc8 (diff)
downloadperl-872c91ae155f6880f8bf2b15c143bda5279a5794.tar.gz
What started as a small nit (the charnames test, nit found
be Hugo), ballooned a bit... the goal is Larry's wish that illegal Unicode (such as U+FFFF) by default doesn't warn, since what if somebody WANTS to create illegal Unicode? Now getting close to this in the regex runtime. (Also, fix more of my fixation that BOM would be U+FFFE.) p4raw-id: //depot/perl@15689
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c16
1 files changed, 10 insertions, 6 deletions
diff --git a/utf8.c b/utf8.c
index 85a22a1ffd..0100eb17a7 100644
--- a/utf8.c
+++ b/utf8.c
@@ -64,13 +64,13 @@ Perl_uvuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
((uv >= 0xFDD0 && uv <= 0xFDEF &&
!(flags & UNICODE_ALLOW_FDD0))
||
- ((uv & 0xFFFF) == 0xFFFE &&
- !(flags & UNICODE_ALLOW_FFFE))
+ (UNICODE_IS_BYTE_ORDER_MARK(uv) &&
+ !(flags & UNICODE_ALLOW_BOM))
||
((uv & 0xFFFF) == 0xFFFF &&
!(flags & UNICODE_ALLOW_FFFF))) &&
/* UNICODE_ALLOW_SUPER includes
- * FFFEs and FFFFs beyond 0x10FFFF. */
+ * FFFFs beyond 0x10FFFF. */
((uv <= PERL_UNICODE_MAX) ||
!(flags & UNICODE_ALLOW_SUPER))
)
@@ -500,7 +500,8 @@ returned and retlen is set, if possible, to -1.
UV
Perl_utf8_to_uvchr(pTHX_ U8 *s, STRLEN *retlen)
{
- return Perl_utf8n_to_uvchr(aTHX_ s, UTF8_MAXLEN, retlen, 0);
+ return Perl_utf8n_to_uvchr(aTHX_ s, UTF8_MAXLEN, retlen,
+ ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
}
/*
@@ -523,7 +524,8 @@ UV
Perl_utf8_to_uvuni(pTHX_ U8 *s, STRLEN *retlen)
{
/* Call the low level routine asking for checks */
- return Perl_utf8n_to_uvuni(aTHX_ s, UTF8_MAXLEN, retlen, 0);
+ return Perl_utf8n_to_uvuni(aTHX_ s, UTF8_MAXLEN, retlen,
+ ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
}
/*
@@ -1626,7 +1628,9 @@ Perl_swash_fetch(pTHX_ SV *sv, U8 *ptr, bool do_utf8)
/* We use utf8n_to_uvuni() as we want an index into
Unicode tables, not a native character number.
*/
- UV code_point = utf8n_to_uvuni(ptr, UTF8_MAXLEN, NULL, 0);
+ UV code_point = utf8n_to_uvuni(ptr, UTF8_MAXLEN, 0,
+ ckWARN(WARN_UTF8) ?
+ 0 : UTF8_ALLOW_ANY);
SV *errsv_save;
ENTER;
SAVETMPS;