summaryrefslogtreecommitdiff
path: root/cpan/Unicode-Collate/Collate.xs
diff options
context:
space:
mode:
authorChris 'BinGOs' Williams <chris@bingosnet.co.uk>2011-01-23 16:48:32 +0000
committerChris 'BinGOs' Williams <chris@bingosnet.co.uk>2011-01-23 16:52:18 +0000
commitf58b9ef133ec1792309e75435d4b73428cef3ea2 (patch)
tree2475b305366f9bb2c65bc7dbd640060f352f7a87 /cpan/Unicode-Collate/Collate.xs
parent8d884f4a41f98f5a1bbcd60e3cd3e9fe2b2d9c58 (diff)
downloadperl-f58b9ef133ec1792309e75435d4b73428cef3ea2.tar.gz
Update Unicode-Collate to CPAN version 0.72
Second attempt to integrate the XS version of Unicode::Collate into core. [DELTA] 0.72 Sat Jan 22 17:28:32 2011 - xs: fix mixing char* and U8*. 0.71 Tue Jan 18 22:29:44 2011 - t/loc_test.t should not fail without Unicode::Normalize. 0.70 Sun Jan 16 20:31:07 2011 - Now U::C::Locale->new will use the compiled DUCET via XS if available. added some tests in t/loc_test.t. 0.69 Sat Jan 15 19:41:11 2011 - clarified about XSUB. revised INSTALL in README. - xs: flag passed to utf8n_to_uvuni(). - doc and comments: [perl #81876] Fix typos by Peter J. Acklam.
Diffstat (limited to 'cpan/Unicode-Collate/Collate.xs')
-rw-r--r--cpan/Unicode-Collate/Collate.xs691
1 files changed, 691 insertions, 0 deletions
diff --git a/cpan/Unicode-Collate/Collate.xs b/cpan/Unicode-Collate/Collate.xs
new file mode 100644
index 0000000000..d96912bf6d
--- /dev/null
+++ b/cpan/Unicode-Collate/Collate.xs
@@ -0,0 +1,691 @@
+#include "EXTERN.h"
+#include "perl.h"
+#include "XSUB.h"
+
+/* This file is prepared by mkheader */
+#include "ucatbl.h"
+
+/* Perl 5.6.1 ? */
+#ifndef utf8n_to_uvuni
+#define utf8n_to_uvuni utf8_to_uv
+#endif /* utf8n_to_uvuni */
+
+/* UTF8_ALLOW_BOM is used before Perl 5.8.0 */
+#ifndef UTF8_ALLOW_BOM
+#define UTF8_ALLOW_BOM (0)
+#endif /* UTF8_ALLOW_BOM */
+
+#ifndef UTF8_ALLOW_SURROGATE
+#define UTF8_ALLOW_SURROGATE (0)
+#endif /* UTF8_ALLOW_SURROGATE */
+
+#ifndef UTF8_ALLOW_FE_FF
+#define UTF8_ALLOW_FE_FF (0)
+#endif /* UTF8_ALLOW_FE_FF */
+
+#ifndef UTF8_ALLOW_FFFF
+#define UTF8_ALLOW_FFFF (0)
+#endif /* UTF8_ALLOW_FFFF */
+
+#define AllowAnyUTF (UTF8_ALLOW_SURROGATE|UTF8_ALLOW_BOM|UTF8_ALLOW_FE_FF|UTF8_ALLOW_FFFF)
+
+/* if utf8n_to_uvuni() sets retlen to 0 (?) */
+#define ErrRetlenIsZero "panic (Unicode::Collate): zero-length character"
+
+/* At present, char > 0x10ffff are unaffected without complaint, right? */
+#define VALID_UTF_MAX (0x10ffff)
+#define OVER_UTF_MAX(uv) (VALID_UTF_MAX < (uv))
+
+static const UV max_div_16 = UV_MAX / 16;
+
+/* Supported Levels */
+#define MinLevel (1)
+#define MaxLevel (4)
+
+/* Shifted weight at 4th level */
+#define Shift4Wt (0xFFFF)
+
+#define VCE_Length (9)
+
+#define Hangul_SBase (0xAC00)
+#define Hangul_SIni (0xAC00)
+#define Hangul_SFin (0xD7A3)
+#define Hangul_NCount (588)
+#define Hangul_TCount (28)
+#define Hangul_LBase (0x1100)
+#define Hangul_LIni (0x1100)
+#define Hangul_LFin (0x1159)
+#define Hangul_LFill (0x115F)
+#define Hangul_LEnd (0x115F) /* Unicode 5.2 */
+#define Hangul_VBase (0x1161)
+#define Hangul_VIni (0x1160) /* from Vowel Filler */
+#define Hangul_VFin (0x11A2)
+#define Hangul_VEnd (0x11A7) /* Unicode 5.2 */
+#define Hangul_TBase (0x11A7) /* from "no-final" codepoint */
+#define Hangul_TIni (0x11A8)
+#define Hangul_TFin (0x11F9)
+#define Hangul_TEnd (0x11FF) /* Unicode 5.2 */
+#define HangulL2Ini (0xA960) /* Unicode 5.2 */
+#define HangulL2Fin (0xA97C) /* Unicode 5.2 */
+#define HangulV2Ini (0xD7B0) /* Unicode 5.2 */
+#define HangulV2Fin (0xD7C6) /* Unicode 5.2 */
+#define HangulT2Ini (0xD7CB) /* Unicode 5.2 */
+#define HangulT2Fin (0xD7FB) /* Unicode 5.2 */
+
+#define CJK_UidIni (0x4E00)
+#define CJK_UidFin (0x9FA5)
+#define CJK_UidF41 (0x9FBB)
+#define CJK_UidF51 (0x9FC3)
+#define CJK_UidF52 (0x9FCB)
+#define CJK_ExtAIni (0x3400) /* Unicode 3.0 */
+#define CJK_ExtAFin (0x4DB5) /* Unicode 3.0 */
+#define CJK_ExtBIni (0x20000) /* Unicode 3.1 */
+#define CJK_ExtBFin (0x2A6D6) /* Unicode 3.1 */
+#define CJK_ExtCIni (0x2A700) /* Unicode 5.2 */
+#define CJK_ExtCFin (0x2B734) /* Unicode 5.2 */
+#define CJK_ExtDIni (0x2B740) /* Unicode 6.0 */
+#define CJK_ExtDFin (0x2B81D) /* Unicode 6.0 */
+
+static STDCHAR UnifiedCompat[] = {
+ 1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,1
+}; /* E F 0 1 2 3 4 5 6 7 8 9 A B C D E F 0 1 2 3 4 5 6 7 8 9 */
+
+#define codeRange(bcode, ecode) ((bcode) <= code && code <= (ecode))
+
+MODULE = Unicode::Collate PACKAGE = Unicode::Collate
+
+PROTOTYPES: DISABLE
+
+void
+_fetch_rest ()
+ PREINIT:
+ char ** rest;
+ PPCODE:
+ for (rest = UCA_rest; *rest; ++rest) {
+ XPUSHs(sv_2mortal(newSVpv((char *) *rest, 0)));
+ }
+
+
+void
+_fetch_simple (uv)
+ UV uv
+ PREINIT:
+ U8 ***plane, **row;
+ U8* result = NULL;
+ PPCODE:
+ if (!OVER_UTF_MAX(uv)){
+ plane = (U8***)UCA_simple[uv >> 16];
+ if (plane) {
+ row = plane[(uv >> 8) & 0xff];
+ result = row ? row[uv & 0xff] : NULL;
+ }
+ }
+ if (result) {
+ int i;
+ int num = (int)*result;
+ ++result;
+ for (i = 0; i < num; ++i) {
+ XPUSHs(sv_2mortal(newSVpvn((char *) result, VCE_Length)));
+ result += VCE_Length;
+ }
+ } else {
+ XPUSHs(sv_2mortal(newSViv(0)));
+ }
+
+SV*
+_ignorable_simple (uv)
+ UV uv
+ ALIAS:
+ _exists_simple = 1
+ PREINIT:
+ U8 ***plane, **row;
+ int num = -1;
+ U8* result = NULL;
+ CODE:
+ if (!OVER_UTF_MAX(uv)){
+ plane = (U8***)UCA_simple[uv >> 16];
+ if (plane) {
+ row = plane[(uv >> 8) & 0xff];
+ result = row ? row[uv & 0xff] : NULL;
+ }
+ if (result)
+ num = (int)*result; /* assuming 0 <= num < 128 */
+ }
+
+ if (ix)
+ RETVAL = boolSV(num >0);
+ else
+ RETVAL = boolSV(num==0);
+ OUTPUT:
+ RETVAL
+
+
+void
+_getHexArray (src)
+ SV* src
+ PREINIT:
+ char *s, *e;
+ STRLEN byte;
+ UV value;
+ bool overflowed = FALSE;
+ const char *hexdigit;
+ PPCODE:
+ s = SvPV(src,byte);
+ for (e = s + byte; s < e;) {
+ hexdigit = strchr((char *) PL_hexdigit, *s++);
+ if (! hexdigit)
+ continue;
+ value = (hexdigit - PL_hexdigit) & 0xF;
+ while (*s) {
+ hexdigit = strchr((char *) PL_hexdigit, *s++);
+ if (! hexdigit)
+ break;
+ if (overflowed)
+ continue;
+ if (value > max_div_16) {
+ overflowed = TRUE;
+ continue;
+ }
+ value = (value << 4) | ((hexdigit - PL_hexdigit) & 0xF);
+ }
+ XPUSHs(sv_2mortal(newSVuv(overflowed ? UV_MAX : value)));
+ }
+
+
+SV*
+_isIllegal (sv)
+ SV* sv
+ PREINIT:
+ UV uv;
+ CODE:
+ if (!sv || !SvIOK(sv))
+ XSRETURN_YES;
+ uv = SvUVX(sv);
+ RETVAL = boolSV(
+ 0x10FFFF < uv /* out of range */
+ );
+OUTPUT:
+ RETVAL
+
+
+SV*
+_isNonchar (sv)
+ SV* sv
+ PREINIT:
+ UV uv;
+ CODE:
+ /* should be called only if ! _isIllegal(sv). */
+ uv = SvUVX(sv);
+ RETVAL = boolSV(
+ ((uv & 0xFFFE) == 0xFFFE) /* ??FFF[EF] (cf. utf8.c) */
+ || (0xD800 <= uv && uv <= 0xDFFF) /* unpaired surrogates */
+ || (0xFDD0 <= uv && uv <= 0xFDEF) /* other non-characters */
+ );
+OUTPUT:
+ RETVAL
+
+
+void
+_decompHangul (code)
+ UV code
+ PREINIT:
+ UV sindex, lindex, vindex, tindex;
+ PPCODE:
+ /* code *must* be in Hangul syllable.
+ * Check it before you enter here. */
+ sindex = code - Hangul_SBase;
+ lindex = sindex / Hangul_NCount;
+ vindex = (sindex % Hangul_NCount) / Hangul_TCount;
+ tindex = sindex % Hangul_TCount;
+
+ XPUSHs(sv_2mortal(newSVuv(lindex + Hangul_LBase)));
+ XPUSHs(sv_2mortal(newSVuv(vindex + Hangul_VBase)));
+ if (tindex)
+ XPUSHs(sv_2mortal(newSVuv(tindex + Hangul_TBase)));
+
+
+SV*
+getHST (code, uca_vers = 0)
+ UV code;
+ IV uca_vers;
+ PREINIT:
+ char * hangtype;
+ STRLEN typelen;
+ CODE:
+ if (codeRange(Hangul_SIni, Hangul_SFin)) {
+ if ((code - Hangul_SBase) % Hangul_TCount) {
+ hangtype = "LVT"; typelen = 3;
+ } else {
+ hangtype = "LV"; typelen = 2;
+ }
+ } else if (uca_vers < 20) {
+ if (codeRange(Hangul_LIni, Hangul_LFin) || code == Hangul_LFill) {
+ hangtype = "L"; typelen = 1;
+ } else if (codeRange(Hangul_VIni, Hangul_VFin)) {
+ hangtype = "V"; typelen = 1;
+ } else if (codeRange(Hangul_TIni, Hangul_TFin)) {
+ hangtype = "T"; typelen = 1;
+ } else {
+ hangtype = ""; typelen = 0;
+ }
+ } else {
+ if (codeRange(Hangul_LIni, Hangul_LEnd) ||
+ codeRange(HangulL2Ini, HangulL2Fin)) {
+ hangtype = "L"; typelen = 1;
+ } else if (codeRange(Hangul_VIni, Hangul_VEnd) ||
+ codeRange(HangulV2Ini, HangulV2Fin)) {
+ hangtype = "V"; typelen = 1;
+ } else if (codeRange(Hangul_TIni, Hangul_TEnd) ||
+ codeRange(HangulT2Ini, HangulT2Fin)) {
+ hangtype = "T"; typelen = 1;
+ } else {
+ hangtype = ""; typelen = 0;
+ }
+ }
+
+ RETVAL = newSVpvn(hangtype, typelen);
+OUTPUT:
+ RETVAL
+
+
+void
+_derivCE_9 (code)
+ UV code
+ ALIAS:
+ _derivCE_14 = 1
+ _derivCE_18 = 2
+ _derivCE_20 = 3
+ _derivCE_22 = 4
+ PREINIT:
+ UV base, aaaa, bbbb;
+ U8 a[VCE_Length + 1] = "\x00\xFF\xFF\x00\x20\x00\x02\xFF\xFF";
+ U8 b[VCE_Length + 1] = "\x00\xFF\xFF\x00\x00\x00\x00\xFF\xFF";
+ bool basic_unified = 0;
+ PPCODE:
+ if (CJK_UidIni <= code) {
+ if (codeRange(0xFA0E, 0xFA29))
+ basic_unified = (bool)UnifiedCompat[code - 0xFA0E];
+ else
+ basic_unified = (ix >= 3 ? (code <= CJK_UidF52) :
+ ix == 2 ? (code <= CJK_UidF51) :
+ ix == 1 ? (code <= CJK_UidF41) :
+ (code <= CJK_UidFin));
+ }
+ base = (basic_unified)
+ ? 0xFB40 : /* CJK */
+ ((codeRange(CJK_ExtAIni, CJK_ExtAFin))
+ ||
+ (codeRange(CJK_ExtBIni, CJK_ExtBFin))
+ ||
+ (ix >= 3 && codeRange(CJK_ExtCIni, CJK_ExtCFin))
+ ||
+ (ix >= 4 && codeRange(CJK_ExtDIni, CJK_ExtDFin)))
+ ? 0xFB80 /* CJK ext. */
+ : 0xFBC0; /* others */
+ aaaa = base + (code >> 15);
+ bbbb = (code & 0x7FFF) | 0x8000;
+ a[1] = (U8)(aaaa >> 8);
+ a[2] = (U8)(aaaa & 0xFF);
+ b[1] = (U8)(bbbb >> 8);
+ b[2] = (U8)(bbbb & 0xFF);
+ a[7] = b[7] = (U8)(code >> 8);
+ a[8] = b[8] = (U8)(code & 0xFF);
+ XPUSHs(sv_2mortal(newSVpvn((char *) a, VCE_Length)));
+ XPUSHs(sv_2mortal(newSVpvn((char *) b, VCE_Length)));
+
+
+void
+_derivCE_8 (code)
+ UV code
+ PREINIT:
+ UV aaaa, bbbb;
+ U8 a[VCE_Length + 1] = "\x00\xFF\xFF\x00\x02\x00\x01\xFF\xFF";
+ U8 b[VCE_Length + 1] = "\x00\xFF\xFF\x00\x00\x00\x00\xFF\xFF";
+ PPCODE:
+ aaaa = 0xFF80 + (code >> 15);
+ bbbb = (code & 0x7FFF) | 0x8000;
+ a[1] = (U8)(aaaa >> 8);
+ a[2] = (U8)(aaaa & 0xFF);
+ b[1] = (U8)(bbbb >> 8);
+ b[2] = (U8)(bbbb & 0xFF);
+ a[7] = b[7] = (U8)(code >> 8);
+ a[8] = b[8] = (U8)(code & 0xFF);
+ XPUSHs(sv_2mortal(newSVpvn((char *) a, VCE_Length)));
+ XPUSHs(sv_2mortal(newSVpvn((char *) b, VCE_Length)));
+
+
+void
+_uideoCE_8 (code)
+ UV code
+ PREINIT:
+ U8 uice[VCE_Length + 1] = "\x00\xFF\xFF\x00\x20\x00\x02\xFF\xFF";
+ PPCODE:
+ uice[1] = uice[7] = (U8)(code >> 8);
+ uice[2] = uice[8] = (U8)(code & 0xFF);
+ XPUSHs(sv_2mortal(newSVpvn((char *) uice, VCE_Length)));
+
+
+SV*
+_isUIdeo (code, uca_vers)
+ UV code;
+ IV uca_vers;
+ bool basic_unified = 0;
+ CODE:
+ /* uca_vers = 0 for _uideoCE_8() */
+ if (CJK_UidIni <= code) {
+ if (codeRange(0xFA0E, 0xFA29))
+ basic_unified = (bool)UnifiedCompat[code - 0xFA0E];
+ else
+ basic_unified = (uca_vers >= 20 ? (code <= CJK_UidF52) :
+ uca_vers >= 18 ? (code <= CJK_UidF51) :
+ uca_vers >= 14 ? (code <= CJK_UidF41) :
+ (code <= CJK_UidFin));
+ }
+ RETVAL = boolSV(
+ (basic_unified)
+ ||
+ (codeRange(CJK_ExtAIni, CJK_ExtAFin))
+ ||
+ (uca_vers >= 8 && codeRange(CJK_ExtBIni, CJK_ExtBFin))
+ ||
+ (uca_vers >= 20 && codeRange(CJK_ExtCIni, CJK_ExtCFin))
+ ||
+ (uca_vers >= 22 && codeRange(CJK_ExtDIni, CJK_ExtDFin))
+ );
+OUTPUT:
+ RETVAL
+
+
+SV*
+mk_SortKey (self, buf)
+ SV* self;
+ SV* buf;
+ PREINIT:
+ SV *dst, **svp;
+ STRLEN dlen, vlen;
+ U8 *d, *p, *e, *v, *s[MaxLevel], *eachlevel[MaxLevel];
+ AV *bufAV;
+ HV *selfHV;
+ UV back_flag;
+ I32 i, buf_len;
+ IV lv, level, uca_vers;
+ bool upper_lower, kata_hira, v2i, last_is_var;
+ CODE:
+ if (SvROK(self) && SvTYPE(SvRV(self)) == SVt_PVHV)
+ selfHV = (HV*)SvRV(self);
+ else
+ croak("$self is not a HASHREF.");
+
+ svp = hv_fetch(selfHV, "level", 5, FALSE);
+ level = svp ? SvIV(*svp) : MaxLevel;
+
+ if (SvROK(buf) && SvTYPE(SvRV(buf)) == SVt_PVAV)
+ bufAV = (AV*)SvRV(buf);
+ else
+ croak("XSUB, not an ARRAYREF.");
+
+ buf_len = av_len(bufAV);
+
+ if (buf_len < 0) { /* empty: -1 */
+ dlen = 2 * (MaxLevel - 1);
+ dst = newSV(dlen);
+ (void)SvPOK_only(dst);
+ d = (U8*)SvPVX(dst);
+ while (dlen--)
+ *d++ = '\0';
+ }
+ else {
+ for (lv = 0; lv < level; lv++) {
+ New(0, eachlevel[lv], 2 * (1 + buf_len) + 1, U8);
+ s[lv] = eachlevel[lv];
+ }
+
+ svp = hv_fetch(selfHV, "upper_before_lower", 18, FALSE);
+ upper_lower = svp ? SvTRUE(*svp) : FALSE;
+ svp = hv_fetch(selfHV, "katakana_before_hiragana", 24, FALSE);
+ kata_hira = svp ? SvTRUE(*svp) : FALSE;
+ svp = hv_fetch(selfHV, "UCA_Version", 11, FALSE);
+ uca_vers = SvIV(*svp);
+ svp = hv_fetch(selfHV, "variable", 8, FALSE);
+ v2i = uca_vers >= 9 && svp /* (vers >= 9) and not (non-ignorable) */
+ ? !(SvCUR(*svp) == 13 && memEQ(SvPVX(*svp), "non-ignorable", 13))
+ : FALSE;
+
+ last_is_var = FALSE;
+ for (i = 0; i <= buf_len; i++) {
+ svp = av_fetch(bufAV, i, FALSE);
+
+ if (svp && SvPOK(*svp))
+ v = (U8*)SvPV(*svp, vlen);
+ else
+ croak("not a vwt.");
+
+ if (vlen < VCE_Length) /* ignore short VCE (unexpected) */
+ continue;
+
+ /* "Ignorable (L1, L2) after Variable" since track. v. 9 */
+ if (v2i) {
+ if (*v)
+ last_is_var = TRUE;
+ else if (v[1] || v[2]) /* non zero primary weight */
+ last_is_var = FALSE;
+ else if (last_is_var) /* zero primary weight; skipped */
+ continue;
+ }
+
+ if (v[5] == 0) { /* tert wt < 256 */
+ if (upper_lower) {
+ if (0x8 <= v[6] && v[6] <= 0xC) /* lower */
+ v[6] -= 6;
+ else if (0x2 <= v[6] && v[6] <= 0x6) /* upper */
+ v[6] += 6;
+ else if (v[6] == 0x1C) /* square upper */
+ v[6]++;
+ else if (v[6] == 0x1D) /* square lower */
+ v[6]--;
+ }
+ if (kata_hira) {
+ if (0x0F <= v[6] && v[6] <= 0x13) /* katakana */
+ v[6] -= 2;
+ else if (0xD <= v[6] && v[6] <= 0xE) /* hiragana */
+ v[6] += 5;
+ }
+ }
+
+ for (lv = 0; lv < level; lv++) {
+ if (v[2 * lv + 1] || v[2 * lv + 2]) {
+ *s[lv]++ = v[2 * lv + 1];
+ *s[lv]++ = v[2 * lv + 2];
+ }
+ }
+ }
+
+ dlen = 2 * (MaxLevel - 1);
+ for (lv = 0; lv < level; lv++)
+ dlen += s[lv] - eachlevel[lv];
+
+ dst = newSV(dlen);
+ (void)SvPOK_only(dst);
+ d = (U8*)SvPVX(dst);
+
+ svp = hv_fetch(selfHV, "backwardsFlag", 13, FALSE);
+ back_flag = svp ? SvUV(*svp) : (UV)0;
+
+ for (lv = 0; lv < level; lv++) {
+ if (back_flag & (1 << (lv + 1))) {
+ p = s[lv];
+ e = eachlevel[lv];
+ for ( ; e < p; p -= 2) {
+ *d++ = p[-2];
+ *d++ = p[-1];
+ }
+ }
+ else {
+ p = eachlevel[lv];
+ e = s[lv];
+ while (p < e)
+ *d++ = *p++;
+ }
+ if (lv + 1 < MaxLevel) { /* lv + 1 == real level */
+ *d++ = '\0';
+ *d++ = '\0';
+ }
+ }
+
+ for (lv = level; lv < MaxLevel; lv++) {
+ if (lv + 1 < MaxLevel) { /* lv + 1 == real level */
+ *d++ = '\0';
+ *d++ = '\0';
+ }
+ }
+
+ for (lv = 0; lv < level; lv++) {
+ Safefree(eachlevel[lv]);
+ }
+ }
+ *d = '\0';
+ SvCUR_set(dst, d - (U8*)SvPVX(dst));
+ RETVAL = dst;
+OUTPUT:
+ RETVAL
+
+
+SV*
+_varCE (vbl, vce)
+ SV* vbl
+ SV* vce
+ PREINIT:
+ SV *dst;
+ U8 *a, *v, *d;
+ STRLEN alen, vlen;
+ CODE:
+ a = (U8*)SvPV(vbl, alen);
+ v = (U8*)SvPV(vce, vlen);
+
+ dst = newSV(vlen);
+ d = (U8*)SvPVX(dst);
+ (void)SvPOK_only(dst);
+ Copy(v, d, vlen, U8);
+ SvCUR_set(dst, vlen);
+ d[vlen] = '\0';
+
+ /* variable: checked only the first char and the length,
+ trusting checkCollator() and %VariableOK in Perl ... */
+
+ if (vlen < VCE_Length /* ignore short VCE (unexpected) */
+ ||
+ *a == 'n') /* 'non-ignorable' */
+ 1;
+ else if (*v) {
+ if (*a == 's') { /* shifted or shift-trimmed */
+ d[7] = d[1]; /* wt level 1 to 4 */
+ d[8] = d[2];
+ }
+ d[1] = d[2] = d[3] = d[4] = d[5] = d[6] = '\0';
+ }
+ else if (*a == 'b') /* blanked */
+ 1;
+ else if (*a == 's') { /* shifted or shift-trimmed */
+ if (alen == 7 && (d[1] + d[2] + d[3] + d[4] + d[5] + d[6])) {
+ d[7] = (U8)(Shift4Wt >> 8);
+ d[8] = (U8)(Shift4Wt & 0xFF);
+ }
+ else {
+ d[7] = d[8] = 0;
+ }
+ }
+ else
+ croak("unknown variable value '%s'", a);
+ RETVAL = dst;
+OUTPUT:
+ RETVAL
+
+
+
+SV*
+visualizeSortKey (self, key)
+ SV * self
+ SV * key
+ PREINIT:
+ HV *selfHV;
+ SV **svp, *dst;
+ U8 *s, *e, *d;
+ STRLEN klen, dlen;
+ UV uv;
+ IV uca_vers;
+ static char *upperhex = "0123456789ABCDEF";
+ CODE:
+ if (SvROK(self) && SvTYPE(SvRV(self)) == SVt_PVHV)
+ selfHV = (HV*)SvRV(self);
+ else
+ croak("$self is not a HASHREF.");
+
+ svp = hv_fetch(selfHV, "UCA_Version", 11, FALSE);
+ if (!svp)
+ croak("Panic: no $self->{UCA_Version} in visualizeSortKey");
+ uca_vers = SvIV(*svp);
+
+ s = (U8*)SvPV(key, klen);
+
+ /* slightly *longer* than the need, but I'm afraid of miscounting;
+ exactly: (klen / 2) * 5 + MaxLevel * 2 - 1 (excluding '\0')
+ = (klen / 2) * 5 - 1 # FFFF (16bit) and ' ' between 16bit units
+ + (MaxLevel - 1) * 2 # ' ' and '|' for level boundaries
+ + 2 # '[' and ']'
+ */
+ dlen = (klen / 2) * 5 + MaxLevel * 2 + 2;
+ dst = newSV(dlen);
+ (void)SvPOK_only(dst);
+ d = (U8*)SvPVX(dst);
+
+ *d++ = '[';
+ for (e = s + klen; s < e; s += 2) {
+ uv = (U16)(*s << 8 | s[1]);
+ if (uv) {
+ if ((d[-1] != '[') && ((9 <= uca_vers) || (d[-1] != '|')))
+ *d++ = ' ';
+ *d++ = upperhex[ (s[0] >> 4) & 0xF ];
+ *d++ = upperhex[ s[0] & 0xF ];
+ *d++ = upperhex[ (s[1] >> 4) & 0xF ];
+ *d++ = upperhex[ s[1] & 0xF ];
+ }
+ else {
+ if ((9 <= uca_vers) && (d[-1] != '['))
+ *d++ = ' ';
+ *d++ = '|';
+ }
+ }
+ *d++ = ']';
+ *d = '\0';
+ SvCUR_set(dst, d - (U8*)SvPVX(dst));
+ RETVAL = dst;
+OUTPUT:
+ RETVAL
+
+
+
+void
+unpack_U (src)
+ SV* src
+ PREINIT:
+ STRLEN srclen, retlen;
+ U8 *s, *p, *e;
+ UV uv;
+ PPCODE:
+ s = (U8*)SvPV(src,srclen);
+ if (!SvUTF8(src)) {
+ SV* tmpsv = sv_mortalcopy(src);
+ if (!SvPOK(tmpsv))
+ (void)sv_pvn_force(tmpsv,&srclen);
+ sv_utf8_upgrade(tmpsv);
+ s = (U8*)SvPV(tmpsv,srclen);
+ }
+ e = s + srclen;
+
+ for (p = s; p < e; p += retlen) {
+ uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);
+ if (!retlen)
+ croak(ErrRetlenIsZero);
+ XPUSHs(sv_2mortal(newSVuv(uv)));
+ }
+