diff options
author | SADAHIRO Tomoyuki <BQW10602@nifty.com> | 2005-12-05 01:28:35 +0900 |
---|---|---|
committer | Rafael Garcia-Suarez <rgarciasuarez@gmail.com> | 2005-12-05 15:15:53 +0000 |
commit | 711a919c41ea4a93a45e4b6240ea6550abe85615 (patch) | |
tree | 5d03c5186caa4dcc34d4f9fe86280116b40235ec /utf8.c | |
parent | ec5fee461a5f1addabf15a3f9a2832a74bed5afe (diff) | |
download | perl-711a919c41ea4a93a45e4b6240ea6550abe85615.tar.gz |
Clarification and cleanup of the XS SWASHGET code
Subject: Re: XS-assisted SWASHGET (esp. for t/uni/class.t speedup)
Message-Id: <20051204162508.D726.BQW10602@nifty.com>
p4raw-id: //depot/perl@26255
Diffstat (limited to 'utf8.c')
-rw-r--r-- | utf8.c | 31 |
1 files changed, 19 insertions, 12 deletions
@@ -1243,7 +1243,7 @@ S_is_utf8_common(pTHX_ const U8 *const p, SV **swash, if (!is_utf8_char(p)) return FALSE; if (!*swash) - *swash = swash_init("utf8", swashname, &PL_sv_undef, 0, 0); + *swash = swash_init("utf8", swashname, &PL_sv_undef, 1, 0); return swash_fetch(*swash, p, TRUE) != 0; } @@ -1545,8 +1545,12 @@ Perl_to_utf8_fold(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp) &PL_utf8_tofold, "ToFold", "utf8::ToSpecFold"); } -/* a "swash" is a swatch hash */ - +/* Note: + * A "swash" is a swatch hash. + * A "swatch" is a bit vector generated by utf8.c:S_swash_get(). + * C<pkg> is a pointer to a package name for SWASHNEW, should be "utf8". + * For other parameters, see utf8::SWASHNEW in lib/utf8_heavy.pl. + */ SV* Perl_swash_init(pTHX_ const char* pkg, const char* name, SV *listsv, I32 minbits, I32 none) { @@ -1744,7 +1748,7 @@ STATIC SV* S_swash_get(pTHX_ SV* swash, UV start, UV span) { SV *swatch; - U8 *l, *lend, *x, *xend, *s, *nl; + U8 *l, *lend, *x, *xend, *s; STRLEN lcur, xcur, scur; HV* const hv = (HV*)SvRV(swash); @@ -1801,7 +1805,7 @@ S_swash_get(pTHX_ SV* swash, UV start, UV span) STRLEN numlen; I32 flags = PERL_SCAN_SILENT_ILLDIGIT | PERL_SCAN_DISALLOW_PREFIX; - nl = (U8*)memchr(l, '\n', lend - l); + U8* nl = (U8*)memchr(l, '\n', lend - l); numlen = lend - l; min = grok_hex((char *)l, &numlen, &flags, NULL); @@ -1846,6 +1850,8 @@ S_swash_get(pTHX_ SV* swash, UV start, UV span) } } } + else + val = 0; /* bits == 1, then val should be ignored */ } else { max = min; @@ -1855,6 +1861,8 @@ S_swash_get(pTHX_ SV* swash, UV start, UV span) Perl_croak(aTHX_ "%s: illegal mapping '%s'", typestr, l); } } + else + val = 0; /* bits == 1, then val should be ignored */ } if (nl) @@ -1895,7 +1903,7 @@ S_swash_get(pTHX_ SV* swash, UV start, UV span) ++val; } } - else { + else { /* bits == 1, then val should be ignored */ if (min < start) min = start; for (key = min; key <= max; key++) { @@ -1918,7 +1926,7 @@ S_swash_get(pTHX_ SV* swash, UV start, UV span) HV* otherhv; STRLEN otherbits; SV **otherbitssvp, *other; - U8 *s, *o; + U8 *s, *o, *nl; STRLEN slen, olen; U8 opc = *x++; @@ -1993,8 +2001,7 @@ S_swash_get(pTHX_ SV* swash, UV start, UV span) break; } } - else { /* bits >= 8 */ - /* XXX: but weirdly otherval is treated as boolean */ + else { STRLEN otheroctets = otherbits >> 3; STRLEN offset = 0; U8* send = s + slen; @@ -2015,8 +2022,8 @@ S_swash_get(pTHX_ SV* swash, UV start, UV span) } } - if (opc == '+' && otherval) - otherval = 1; + if (opc == '+' && otherval) + ; /* replace with otherval */ else if (opc == '!' && !otherval) otherval = 1; else if (opc == '-' && otherval) @@ -2024,7 +2031,7 @@ S_swash_get(pTHX_ SV* swash, UV start, UV span) else if (opc == '&' && !otherval) otherval = 0; else { - s += octets; /* not modify orig swatch */ + s += octets; /* no replacement */ continue; } |