diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2002-02-17 01:12:37 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2002-02-17 01:12:37 +0000 |
commit | 1feea2c750cf5f74093deff93d1536d7d44a8925 (patch) | |
tree | b2007ead2265e4aba5344f9c2d3d040dbf42f67d /utf8.c | |
parent | 640ed10d7c25ae7621893c11f70653a184961f71 (diff) | |
download | perl-1feea2c750cf5f74093deff93d1536d7d44a8925.tar.gz |
The #14715 and #14716 were okay: they just revealed
a bug in the EXACTF matching.
p4raw-id: //depot/perl@14724
Diffstat (limited to 'utf8.c')
-rw-r--r-- | utf8.c | 44 |
1 files changed, 30 insertions, 14 deletions
@@ -1287,23 +1287,38 @@ to the hash is by Perl_to_utf8_case(). UV Perl_to_utf8_case(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp, SV **swashp, char *normal, char *special) { - UV uv; + UV uv0, uv1, uv2; + U8 tmpbuf[UTF8_MAXLEN_FOLD+1]; + STRLEN len; if (!*swashp) *swashp = swash_init("utf8", normal, &PL_sv_undef, 4, 0); - uv = swash_fetch(*swashp, p, TRUE); - if (!uv) { + uv0 = utf8_to_uvchr(p, 0); + /* The NATIVE_TO_UNI() and UNI_TO_NATIVE() mappings + * are necessary in EBCDIC, they are redundant no-ops + * in ASCII-ish platforms, and hopefully optimized away. */ + uv1 = NATIVE_TO_UNI(uv0); + uvuni_to_utf8(tmpbuf, uv1); + uv2 = swash_fetch(*swashp, tmpbuf, TRUE); + if (uv2) { + /* It was "normal" (single character mapping). */ + UV uv3 = UNI_TO_NATIVE(uv2); + + len = uvchr_to_utf8(ustrp, uv3) - ustrp; + if (lenp) + *lenp = len; + + return uv3; + } + else { HV *hv; SV *keysv; HE *he; - uv = utf8_to_uvchr(p, 0); - if ((hv = get_hv(special, FALSE)) && - (keysv = sv_2mortal(Perl_newSVpvf(aTHX_ "%04"UVXf, uv))) && + (keysv = sv_2mortal(Perl_newSVpvf(aTHX_ "%04"UVXf, uv1))) && (he = hv_fetch_ent(hv, keysv, FALSE, 0))) { SV *val = HeVAL(he); - STRLEN len; char *s = SvPV(val, len); if (len > 1) { @@ -1316,8 +1331,6 @@ Perl_to_utf8_case(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp, SV **swashp, char *norma * mapping, since any characters in the low 256 * are in Unicode code points, not EBCDIC. * --jhi */ - - U8 tmpbuf[UTF8_MAXLEN_FOLD+1]; U8 *d = tmpbuf; U8 *t, *tend; @@ -1351,14 +1364,17 @@ Perl_to_utf8_case(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp, SV **swashp, char *norma } if (lenp) *lenp = len; + return utf8_to_uvchr(ustrp, 0); } - uv = NATIVE_TO_UNI(uv); + + /* So it was not "special": just copy it. */ + len = uvchr_to_utf8(ustrp, uv0) - ustrp; + if (lenp) + *lenp = len; + + return uv0; } - if (lenp) - *lenp = UNISKIP(uv); - uvuni_to_utf8(ustrp, uv); - return uv; } /* |