summaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
authorM. J. T. Guy <mjtg@cus.cam.ac.uk>2000-07-25 13:52:45 +0100
committerJarkko Hietaniemi <jhi@iki.fi>2000-07-25 13:59:28 +0000
commitd3586b13f289ef1a971ee02271851834910db0d7 (patch)
tree245564d3a87f911b5bb3077960f5b71e5685f21f /utf8.c
parent34f814c44266cc41fe602b984782769d69f8d93e (diff)
downloadperl-d3586b13f289ef1a971ee02271851834910db0d7.tar.gz
Get UTF16 BOMs working. Patch from
Subject: Re: [ID 20000719.001] Problem with bleadperl of 7/18/00 Date: Tue, 25 Jul 2000 12:52:45 +0100 Message-Id: <E13H3GP-0004MR-00@libra.cus.cam.ac.uk> and notes from Subject: Re: [ID 20000719.001] Problem with bleadperl of 7/18/00 From: "M.J.T. Guy" <mjtg@cus.cam.ac.uk> Date: Tue, 25 Jul 2000 11:43:25 +0100 Message-Id: <E13H2BJ-0002nG-00@libra.cus.cam.ac.uk> p4raw-id: //depot/perl@6435
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c9
1 files changed, 8 insertions, 1 deletions
diff --git a/utf8.c b/utf8.c
index 666ec3476a..95f457f9b2 100644
--- a/utf8.c
+++ b/utf8.c
@@ -320,13 +320,20 @@ Perl_bytes_to_utf8(pTHX_ U8* s, STRLEN *len)
return dst;
}
-/* XXX NOTHING CALLS THE FOLLOWING TWO ROUTINES YET!!! */
/*
* Convert native or reversed UTF-16 to UTF-8.
*
* Destination must be pre-extended to 3/2 source. Do not use in-place.
* We optimize for native, for obvious reasons. */
+/* There are several problems with utf16_to_utf8().
+ * (1) U16 is not necessarily *exactly* two bytes.
+ * (2) Secondly, no check is made for odd length.
+ * (3) Thirdly, the "Malformed UTF-16 surrogate" should probably be
+ * a hard error (and it should be listed in perldiag).
+ * (4) The tests (in comp/t/require.t) are a joke: the UTF16 BOM
+ * really ought to be followed by valid UTF16 characters.
+ * --Mike Guy */
U8*
Perl_utf16_to_utf8(pTHX_ U16* p, U8* d, I32 bytelen)
{