summaryrefslogtreecommitdiff
path: root/ext
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2000-09-14 14:40:40 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2000-09-14 14:40:40 +0000
commit67e989fb549091286d76fd8d29f1ec03b9da175d (patch)
treeb435bb5d55ee1fd063a1afe459e143ab597037ba /ext
parentde6193504aa249326a30bbe962866c18d77ea85d (diff)
downloadperl-67e989fb549091286d76fd8d29f1ec03b9da175d.tar.gz
Batch of UTF-8 patches from Simon Cozens.
p4raw-id: //depot/perl@7075
Diffstat (limited to 'ext')
-rw-r--r--ext/Encode/Encode.xs103
1 files changed, 89 insertions, 14 deletions
diff --git a/ext/Encode/Encode.xs b/ext/Encode/Encode.xs
index cc0a86a117..5f4a77e6af 100644
--- a/ext/Encode/Encode.xs
+++ b/ext/Encode/Encode.xs
@@ -2,29 +2,104 @@
#include "perl.h"
#include "XSUB.h"
-MODULE = Encode PACKAGE = Encode
+#define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) { \
+ Perl_croak("panic_unimplemented"); \
+ }
+UNIMPLEMENTED(_encoded_utf8_to_bytes, I32)
+UNIMPLEMENTED(_encoded_bytes_to_utf8, I32)
+
+void call_failure (SV *routine, U8* done, U8* dest, U8* orig);
+
+MODULE = Encode PACKAGE = Encode
PROTOTYPES: ENABLE
-SV *
+I32
_bytes_to_utf8(sv, ...)
- SV * sv
+ SV * sv
CODE:
- {
- SV * encoding = 2 ? ST(1) : Nullsv;
- RETVAL = &PL_sv_undef;
- }
+ {
+ SV * encoding = items == 2 ? ST(1) : Nullsv;
+
+ if (encoding)
+ RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding));
+ else {
+ STRLEN len;
+ U8* s = SvPV(sv, len);
+ U8* converted;
+
+ converted = bytes_to_utf8(s, &len); /* This allocs */
+ sv_setpvn(sv, converted, len);
+ SvUTF8_on(sv); /* XXX Should we? */
+ Safefree(converted); /* ... so free it */
+ RETVAL = len;
+ }
+ }
OUTPUT:
- RETVAL
+ RETVAL
-SV *
+I32
_utf8_to_bytes(sv, ...)
- SV * sv
+ SV * sv
CODE:
- {
- SV * to = items > 1 ? ST(1) : Nullsv;
- SV * check = items > 2 ? ST(2) : Nullsv;
- RETVAL = &PL_sv_undef;
+ {
+ SV * to = items > 1 ? ST(1) : Nullsv;
+ SV * check = items > 2 ? ST(2) : Nullsv;
+
+ if (to)
+ RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to));
+ else {
+ U8 *s;
+ STRLEN len;
+ s = SvPV(sv, len);
+
+ if (SvTRUE(check)) {
+ /* Must do things the slow way */
+ U8 *dest;
+ U8 *src = savepv(s); /* We need a copy to pass to check() */
+ U8 *send = s + len;
+
+ New(83, dest, len, U8); /* I think */
+
+ while (s < send) {
+ if (*s < 0x80)
+ *dest++ = *s++;
+ else {
+ I32 ulen;
+ I32 byte;
+ I32 uv = *s++;
+
+ /* Have to do it all ourselves because of error routine,
+ aargh. */
+ if (!(uv & 0x40))
+ goto failure;
+ if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; }
+ else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; }
+ else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; }
+ else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; }
+ else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; }
+ else if (!(uv & 0x01)) { ulen = 7; uv = 0; }
+ else { ulen = 13; uv = 0; }
+
+ /* Note change to utf8.c variable naming, for variety */
+ while (ulen--) {
+ if ((*s & 0xc0) != 0x80)
+ goto failure;
+
+ else
+ uv = (uv << 6) | (*s++ & 0x3f);
+ }
+ if (uv > 256) {
+ failure:
+ call_failure(check, s, dest, src);
+ /* Now what happens? */
+ }
+ *dest++ = (U8)uv;
+ }
+ }
+ } else
+ RETVAL = (utf8_to_bytes(s, &len) ? len : 0);
+ }
}
OUTPUT:
RETVAL