diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2000-09-14 14:40:40 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2000-09-14 14:40:40 +0000 |
commit | 67e989fb549091286d76fd8d29f1ec03b9da175d (patch) | |
tree | b435bb5d55ee1fd063a1afe459e143ab597037ba /ext | |
parent | de6193504aa249326a30bbe962866c18d77ea85d (diff) | |
download | perl-67e989fb549091286d76fd8d29f1ec03b9da175d.tar.gz |
Batch of UTF-8 patches from Simon Cozens.
p4raw-id: //depot/perl@7075
Diffstat (limited to 'ext')
-rw-r--r-- | ext/Encode/Encode.xs | 103 |
1 files changed, 89 insertions, 14 deletions
diff --git a/ext/Encode/Encode.xs b/ext/Encode/Encode.xs index cc0a86a117..5f4a77e6af 100644 --- a/ext/Encode/Encode.xs +++ b/ext/Encode/Encode.xs @@ -2,29 +2,104 @@ #include "perl.h" #include "XSUB.h" -MODULE = Encode PACKAGE = Encode +#define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) { \ + Perl_croak("panic_unimplemented"); \ + } +UNIMPLEMENTED(_encoded_utf8_to_bytes, I32) +UNIMPLEMENTED(_encoded_bytes_to_utf8, I32) + +void call_failure (SV *routine, U8* done, U8* dest, U8* orig); + +MODULE = Encode PACKAGE = Encode PROTOTYPES: ENABLE -SV * +I32 _bytes_to_utf8(sv, ...) - SV * sv + SV * sv CODE: - { - SV * encoding = 2 ? ST(1) : Nullsv; - RETVAL = &PL_sv_undef; - } + { + SV * encoding = items == 2 ? ST(1) : Nullsv; + + if (encoding) + RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding)); + else { + STRLEN len; + U8* s = SvPV(sv, len); + U8* converted; + + converted = bytes_to_utf8(s, &len); /* This allocs */ + sv_setpvn(sv, converted, len); + SvUTF8_on(sv); /* XXX Should we? */ + Safefree(converted); /* ... so free it */ + RETVAL = len; + } + } OUTPUT: - RETVAL + RETVAL -SV * +I32 _utf8_to_bytes(sv, ...) - SV * sv + SV * sv CODE: - { - SV * to = items > 1 ? ST(1) : Nullsv; - SV * check = items > 2 ? ST(2) : Nullsv; - RETVAL = &PL_sv_undef; + { + SV * to = items > 1 ? ST(1) : Nullsv; + SV * check = items > 2 ? ST(2) : Nullsv; + + if (to) + RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to)); + else { + U8 *s; + STRLEN len; + s = SvPV(sv, len); + + if (SvTRUE(check)) { + /* Must do things the slow way */ + U8 *dest; + U8 *src = savepv(s); /* We need a copy to pass to check() */ + U8 *send = s + len; + + New(83, dest, len, U8); /* I think */ + + while (s < send) { + if (*s < 0x80) + *dest++ = *s++; + else { + I32 ulen; + I32 byte; + I32 uv = *s++; + + /* Have to do it all ourselves because of error routine, + aargh. */ + if (!(uv & 0x40)) + goto failure; + if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; } + else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; } + else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; } + else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; } + else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; } + else if (!(uv & 0x01)) { ulen = 7; uv = 0; } + else { ulen = 13; uv = 0; } + + /* Note change to utf8.c variable naming, for variety */ + while (ulen--) { + if ((*s & 0xc0) != 0x80) + goto failure; + + else + uv = (uv << 6) | (*s++ & 0x3f); + } + if (uv > 256) { + failure: + call_failure(check, s, dest, src); + /* Now what happens? */ + } + *dest++ = (U8)uv; + } + } + } else + RETVAL = (utf8_to_bytes(s, &len) ? len : 0); + } } OUTPUT: RETVAL |