summaryrefslogtreecommitdiff
path: root/universal.c
diff options
context:
space:
mode:
authorSADAHIRO Tomoyuki <BQW10602@nifty.com>2005-11-24 02:57:34 +0900
committerRafael Garcia-Suarez <rgarciasuarez@gmail.com>2005-11-23 15:34:54 +0000
commit4a818d86735b88cd762faade9872a9c2e89ab057 (patch)
treeb48aa406fa47b65737b3da2fcc50fe068f4fe679 /universal.c
parentb9ff9ac175df263d69b7bed8aefc4f20969baf73 (diff)
downloadperl-4a818d86735b88cd762faade9872a9c2e89ab057.tar.gz
XS-assisted SWASHGET (esp. for t/uni/class.t speedup)
Message-Id: <20051123175603.FFD5.BQW10602@nifty.com> And : Message-Id: <20051123202935.4D9D.BQW10602@nifty.com> with some nits to use U8 instead of char more consistently p4raw-id: //depot/perl@26199
Diffstat (limited to 'universal.c')
-rw-r--r--universal.c413
1 files changed, 413 insertions, 0 deletions
diff --git a/universal.c b/universal.c
index 10dddb5efe..4d44aa7577 100644
--- a/universal.c
+++ b/universal.c
@@ -199,6 +199,7 @@ XS(XS_Regexp_DESTROY);
XS(XS_Internals_hash_seed);
XS(XS_Internals_rehash_seed);
XS(XS_Internals_HvREHASH);
+XS(XS_utf8_SWASHGET_heavy);
void
Perl_boot_core_UNIVERSAL(pTHX)
@@ -247,6 +248,7 @@ Perl_boot_core_UNIVERSAL(pTHX)
newXSproto("Internals::hash_seed",XS_Internals_hash_seed, file, "");
newXSproto("Internals::rehash_seed",XS_Internals_rehash_seed, file, "");
newXSproto("Internals::HvREHASH", XS_Internals_HvREHASH, file, "\\%");
+ newXS("utf8::SWASHGET_heavy", XS_utf8_SWASHGET_heavy, file);
}
@@ -949,6 +951,417 @@ XS(XS_Internals_HvREHASH) /* Subject to change */
Perl_croak(aTHX_ "Internals::HvREHASH $hashref");
}
+XS(XS_utf8_SWASHGET_heavy)
+{
+ dXSARGS;
+ if (items != 4) {
+ Perl_croak(aTHX_
+ "Usage: utf8::SWASHGET_heavy($self, $start, $len, DEBUG)");
+ }
+ {
+ SV* self = ST(0);
+ const I32 i_start = (I32)SvIV(ST(1));
+ const I32 i_len = (I32)SvIV(ST(2));
+ const I32 debug = (I32)SvIV(ST(3));
+ U32 start = (U32)i_start;
+ U32 len = (U32)i_len;
+
+ HV *hv;
+ SV **listsvp, **typesvp, **bitssvp, **nonesvp, **extssvp, *swatch;
+ U8 *l, *lend, *x, *xend, *s, *nextline;
+ STRLEN lcur, xcur, scur;
+ U8* typestr;
+ int typeto;
+ U32 bits, none, end, octets;
+
+ if (SvROK(self) && SvTYPE(SvRV(self))==SVt_PVHV)
+ hv = (HV*)SvRV(self);
+ else
+ Perl_croak(aTHX_ "hv is not a hash reference");
+
+ if (i_start < 0)
+ Perl_croak(aTHX_ "SWASHGET negative start");
+ if (i_len < 0)
+ Perl_croak(aTHX_ "SWASHGET negative len");
+
+ listsvp = hv_fetch(hv, "LIST", 4, FALSE);
+ typesvp = hv_fetch(hv, "TYPE", 4, FALSE);
+ bitssvp = hv_fetch(hv, "BITS", 4, FALSE);
+ nonesvp = hv_fetch(hv, "NONE", 4, FALSE);
+ extssvp = hv_fetch(hv, "EXTRAS", 6, FALSE);
+ typestr = SvPV_nolen(*typesvp);
+ typeto = typestr[0] == 'T' && typestr[1] == 'o';
+ bits = (U32)SvUV(*bitssvp);
+ none = (U32)SvUV(*nonesvp);
+ end = start + len;
+ octets = bits >> 3; /* if bits == 1, then octets == 0 */
+
+ if (bits != 1 && bits != 8 && bits != 16 && bits != 32) {
+ Perl_croak(aTHX_ "SWASHGET unknown bits %"UVuf, (UV)bits);
+ }
+ if (debug) {
+ char* selfstr = SvPV_nolen(self);
+ PerlIO_printf(Perl_error_log, "SWASHGET ");
+ PerlIO_printf(Perl_error_log, "%s %"UVuf" %"UVuf" ",
+ selfstr, (UV)start, (UV)len);
+ PerlIO_printf(Perl_error_log, "[%s/%"UVuf"/%"UVuf"]\n",
+ typestr, (UV)bits, (UV)none);
+ }
+
+ /* initialize $swatch */
+ swatch = newSVpvn("",0);
+ scur = octets ? (len * octets) : (len + 7) / 8;
+ SvGROW(swatch, scur + 1);
+ s = (U8*)SvPVX(swatch);
+ if (octets && none) {
+ const U8* e = s + scur;
+ while (s < e) {
+ if (bits == 8)
+ *s++ = (U8)(none & 0xff);
+ else if (bits == 16) {
+ *s++ = (U8)((none >> 8) & 0xff);
+ *s++ = (U8)( none & 0xff);
+ }
+ else if (bits == 32) {
+ *s++ = (U8)((none >> 24) & 0xff);
+ *s++ = (U8)((none >> 16) & 0xff);
+ *s++ = (U8)((none >> 8) & 0xff);
+ *s++ = (U8)( none & 0xff);
+ }
+ }
+ *s = '\0';
+ }
+ else {
+ (void)memzero((U8*)s, scur + 1);
+ }
+ SvCUR_set(swatch, scur);
+ s = (U8*)SvPVX(swatch);
+
+ /* read $self->{LIST} */
+ l = (U8*)SvPV(*listsvp, lcur);
+ lend = l + lcur;
+ while (l < lend) {
+ U32 min, max, val, key;
+ STRLEN numlen;
+ I32 flags = PERL_SCAN_SILENT_ILLDIGIT | PERL_SCAN_DISALLOW_PREFIX;
+
+ nextline = (U8*)memchr(l, '\n', lend - l);
+
+ numlen = lend - l;
+ min = (U32)grok_hex(l, &numlen, &flags, NULL);
+ if (numlen)
+ l += numlen;
+ else if (nextline) {
+ l = nextline + 1; /* 1 is length of "\n" */
+ continue;
+ }
+ else {
+ l = lend; /* to the end of LIST, at which no \n */
+ break;
+ }
+
+ if (isBLANK(*l)) {
+ ++l;
+ flags = PERL_SCAN_SILENT_ILLDIGIT | PERL_SCAN_DISALLOW_PREFIX;
+ numlen = lend - l;
+ max = (U32)grok_hex(l, &numlen, &flags, NULL);
+ if (numlen)
+ l += numlen;
+ else
+ max = min;
+
+ if (octets) {
+ if (isBLANK(*l)) {
+ ++l;
+ flags = PERL_SCAN_SILENT_ILLDIGIT |
+ PERL_SCAN_DISALLOW_PREFIX;
+ numlen = lend - l;
+ val = (U32)grok_hex(l, &numlen, &flags, NULL);
+ if (numlen)
+ l += numlen;
+ else
+ val = 0;
+ }
+ else {
+ val = 0;
+ if (typeto) {
+ Perl_croak(aTHX_ "%s: illegal mapping '%s'",
+ typestr, l);
+ }
+ }
+ }
+ }
+ else {
+ max = min;
+ if (octets) {
+ val = 0;
+ if (typeto) {
+ Perl_croak(aTHX_ "%s: illegal mapping '%s'",
+ typestr, l);
+ }
+ }
+ }
+
+ if (nextline)
+ l = nextline + 1;
+ else
+ l = lend;
+
+ if (max < start)
+ continue;
+
+ if (octets) {
+ if (debug) {
+ PerlIO_printf(Perl_error_log,
+ "%"UVuf" %"UVuf" %"UVuf"\n",
+ (UV)min, (UV)max, (UV)val);
+ }
+ if (min < start) {
+ if (!none || val < none) {
+ val += start - min;
+ }
+ min = start;
+ }
+ for (key = min; key <= max; key++) {
+ U32 offset;
+ if (key >= end)
+ goto go_out_list;
+ if (debug) {
+ PerlIO_printf(Perl_error_log,
+ "%"UVuf" => %"UVuf"\n",
+ (UV)key, (UV)val);
+ }
+
+ /* offset must be non-negative (start <= min <= key < end) */
+ offset = (key - start) * octets;
+ if (bits == 8)
+ s[offset] = (U8)(val & 0xff);
+ else if (bits == 16) {
+ s[offset ] = (U8)((val >> 8) & 0xff);
+ s[offset + 1] = (U8)( val & 0xff);
+ }
+ else if (bits == 32) {
+ s[offset ] = (U8)((val >> 24) & 0xff);
+ s[offset + 1] = (U8)((val >> 16) & 0xff);
+ s[offset + 2] = (U8)((val >> 8) & 0xff);
+ s[offset + 3] = (U8)( val & 0xff);
+ }
+
+ if (!none || val < none)
+ ++val;
+ }
+ }
+ else {
+ if (min < start)
+ min = start;
+ for (key = min; key <= max; key++) {
+ U32 offset = key - start;
+ if (key >= end)
+ goto go_out_list;
+ if (debug) {
+ PerlIO_printf(Perl_error_log,
+ "%"UVuf" => 1\n", (UV)key);
+ }
+ s[offset >> 3] |= 1 << (offset & 7);
+ }
+ }
+ }
+ go_out_list:
+
+ /* read $self->{EXTRAS} */
+ x = (U8*)SvPV(*extssvp, xcur);
+ xend = x + xcur;
+ while (x < xend) {
+ STRLEN namelen;
+ U8 *namestr;
+ SV** othersvp;
+ U32 otherbits;
+
+ U8 opc = *x++;
+ if (opc == '\n')
+ continue;
+
+ nextline = (U8*)memchr(x, '\n', xend - x);
+
+ if (opc != '-' && opc != '+' && opc != '!' && opc != '&') {
+ if (nextline) {
+ x = nextline + 1;
+ continue;
+ }
+ else {
+ x = xend;
+ break;
+ }
+ }
+
+ namestr = x;
+
+ if (nextline) {
+ namelen = nextline - namestr;
+ x = nextline + 1;
+ }
+ else {
+ namelen = xend - namestr;
+ x = xend;
+ }
+
+ if (debug) {
+ U8* tmpstr;
+ Newx(tmpstr, namelen + 1, U8);
+ Move(namestr, tmpstr, namelen, U8);
+ tmpstr[namelen] = '\0';
+ PerlIO_printf(Perl_error_log,
+ "INDIRECT %c %s\n", opc, tmpstr);
+ Safefree(tmpstr);
+ }
+
+ {
+ HV* otherhv;
+ SV **otherbitssvp;
+
+ othersvp = hv_fetch(hv, namestr, namelen, FALSE);
+ if (*othersvp && SvROK(*othersvp) &&
+ SvTYPE(SvRV(*othersvp))==SVt_PVHV)
+ otherhv = (HV*)SvRV(*othersvp);
+ else
+ Perl_croak(aTHX_ "otherhv is not a hash reference");
+
+ otherbitssvp = hv_fetch(otherhv, "BITS", 4, FALSE);
+ otherbits = (U32)SvUV(*otherbitssvp);
+ if (bits < otherbits)
+ Perl_croak(aTHX_ "SWASHGET size mismatch");
+ }
+
+ {
+ dSP;
+ ENTER;
+ SAVETMPS;
+ PUSHMARK(SP);
+ EXTEND(SP,3);
+ PUSHs(*othersvp);
+ PUSHs(sv_2mortal(newSViv(start)));
+ PUSHs(sv_2mortal(newSViv(len)));
+ PUTBACK;
+ if (call_method("SWASHGET", G_SCALAR)) {
+ U8 *s, *o;
+ STRLEN slen, olen;
+ SV* tmpsv = *PL_stack_sp--;
+ o = (U8*)SvPV(tmpsv, olen);
+
+ if (!olen)
+ Perl_croak(aTHX_ "SWASHGET didn't return valid swatch");
+ s = SvPV(swatch, slen);
+ if (bits == 1 && otherbits == 1) {
+ if (slen != olen)
+ Perl_croak(aTHX_ "SWASHGET length mismatch");
+
+ switch (opc) {
+ case '+':
+ while (slen--)
+ *s++ |= *o++;
+ break;
+ case '!':
+ while (slen--)
+ *s++ |= ~*o++;
+ break;
+ case '-':
+ while (slen--)
+ *s++ &= ~*o++;
+ break;
+ case '&':
+ while (slen--)
+ *s++ &= *o++;
+ break;
+ default:
+ break;
+ }
+ }
+ else {
+ U32 otheroctets = otherbits / 8;
+ U32 offset = 0;
+ U8* send = s + slen;
+
+ while (s < send) {
+ U32 val = 0;
+
+ if (otherbits == 1) {
+ val = (o[offset >> 3] >> (offset & 7)) & 1;
+ ++offset;
+ }
+ else {
+ U32 vlen = otheroctets;
+ val = *o++;
+ while (--vlen) {
+ val <<= 8;
+ val |= *o++;
+ }
+ }
+
+ if (opc == '+' && val)
+ val = 1;
+ else if (opc == '!' && !val)
+ val = 1;
+ else if (opc == '-' && val)
+ val = 0;
+ else if (opc == '&' && !val)
+ val = 0;
+ else {
+ s += octets;
+ continue;
+ }
+
+ if (bits == 8)
+ *s++ = (U8)( val & 0xff);
+ else if (bits == 16) {
+ *s++ = (U8)((val >> 8) & 0xff);
+ *s++ = (U8)( val & 0xff);
+ }
+ else if (bits == 32) {
+ *s++ = (U8)((val >> 24) & 0xff);
+ *s++ = (U8)((val >> 16) & 0xff);
+ *s++ = (U8)((val >> 8) & 0xff);
+ *s++ = (U8)( val & 0xff);
+ }
+ }
+ }
+ }
+ FREETMPS;
+ LEAVE;
+ }
+ }
+
+ if (debug) {
+ U8* s = (U8*)SvPVX(swatch);
+ PerlIO_printf(Perl_error_log, "CELLS ");
+ if (bits == 1) {
+ U32 key;
+ for (key = 0; key < len; key++) {
+ int val = (s[key >> 3] >> (key & 7)) & 1;
+ PerlIO_printf(Perl_error_log, val ? "1 " : "0 ");
+ }
+ }
+ else {
+ U8* send = s + len * octets;
+ while (s < send) {
+ U32 vlen = octets;
+ U32 val = *s++;
+ while (--vlen) {
+ val <<= 8;
+ val |= *s++;
+ }
+ PerlIO_printf(Perl_error_log, "%"UVuf" ", (UV)val);
+ }
+ }
+ PerlIO_printf(Perl_error_log, "\n");
+ }
+
+ ST(0) = swatch;
+ sv_2mortal(ST(0));
+ }
+ XSRETURN(1);
+}
+
+
/*
* Local variables:
* c-indentation-style: bsd