summaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2001-03-27 20:24:31 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2001-03-27 20:24:31 +0000
commit7d85a32c7dc09903975590ebedb298bcbd436874 (patch)
treeeb8b461ed09d1aa3c50f0a74a94105d9fcfcc833 /utf8.c
parent3d5d53b8d98b0d07bb5d52680efed0e988a1fe89 (diff)
downloadperl-7d85a32c7dc09903975590ebedb298bcbd436874.tar.gz
Integrate perlio:
[ 9384] Various EBCDIC fixes: - major revelation that swash code is encoding aware, (or thought it was) - now it is ;-) - With that out of the way fix a slab of tr/// cases. - Fix Encode 'Unicode' to be true Unicode so tests pass. - As anticipated Base64.xs needed tweaks. - Until tr/// works right avoid old_encode64 in MIME tests. p4raw-link: @9384 on //depot/perlio: 5ad8ef521b3ffc4e6bbbb9941bc4940d442b56b2 p4raw-id: //depot/perl@9389
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c26
1 files changed, 23 insertions, 3 deletions
diff --git a/utf8.c b/utf8.c
index b95c7ad164..66d3fec81c 100644
--- a/utf8.c
+++ b/utf8.c
@@ -1280,14 +1280,34 @@ UV
Perl_swash_fetch(pTHX_ SV *sv, U8 *ptr)
{
HV* hv = (HV*)SvRV(sv);
+ /* Given a UTF-X encoded char 0xAA..0xYY,0xZZ
+ then the "swatch" is a vec() for al the chars which start
+ with 0xAA..0xYY
+ So the key in the hash is length of encoded char -1
+ */
U32 klen = UTF8SKIP(ptr) - 1;
- U32 off = ptr[klen] & 127; /* NB: 64 bit always 0 when len > 1 */
+ U32 off = ptr[klen];
STRLEN slen;
- STRLEN needents = (klen ? 64 : 128);
+ STRLEN needents;
U8 *tmps;
U32 bit;
SV *retval;
+ if (klen == 0)
+ {
+ /* If char in invariant then swatch is for all the invariant chars
+ * In both UTF-8 and UTF8-MOD that happens to be UTF_CONTINUATION_MARK
+ */
+ needents = UTF_CONTINUATION_MARK;
+ off = NATIVE_TO_UTF(ptr[klen]);
+ }
+ else
+ {
+ /* If char is encoded then swatch is for the prefix */
+ needents = (1 << UTF_ACCUMULATION_SHIFT);
+ off = NATIVE_TO_UTF(ptr[klen]) & UTF_CONTINUATION_MASK;
+ }
+
/*
* This single-entry cache saves about 1/3 of the utf8 overhead in test
* suite. (That is, only 7-8% overall over just a hash cache. Still,
@@ -1337,7 +1357,7 @@ Perl_swash_fetch(pTHX_ SV *sv, U8 *ptr)
svp = hv_store(hv, (char*)ptr, klen, retval, 0);
- if (!svp || !(tmps = (U8*)SvPV(*svp, slen)) || slen < 8)
+ if (!svp || !(tmps = (U8*)SvPV(*svp, slen)) || (slen << 3) < needents)
Perl_croak(aTHX_ "SWASHGET didn't return result of proper length");
}