summaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
authorSADAHIRO Tomoyuki <BQW10602@nifty.com>2005-12-05 01:28:35 +0900
committerRafael Garcia-Suarez <rgarciasuarez@gmail.com>2005-12-05 15:15:53 +0000
commit711a919c41ea4a93a45e4b6240ea6550abe85615 (patch)
tree5d03c5186caa4dcc34d4f9fe86280116b40235ec /utf8.c
parentec5fee461a5f1addabf15a3f9a2832a74bed5afe (diff)
downloadperl-711a919c41ea4a93a45e4b6240ea6550abe85615.tar.gz
Clarification and cleanup of the XS SWASHGET code
Subject: Re: XS-assisted SWASHGET (esp. for t/uni/class.t speedup) Message-Id: <20051204162508.D726.BQW10602@nifty.com> p4raw-id: //depot/perl@26255
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c31
1 files changed, 19 insertions, 12 deletions
diff --git a/utf8.c b/utf8.c
index 813a64fcdd..8c9992f554 100644
--- a/utf8.c
+++ b/utf8.c
@@ -1243,7 +1243,7 @@ S_is_utf8_common(pTHX_ const U8 *const p, SV **swash,
if (!is_utf8_char(p))
return FALSE;
if (!*swash)
- *swash = swash_init("utf8", swashname, &PL_sv_undef, 0, 0);
+ *swash = swash_init("utf8", swashname, &PL_sv_undef, 1, 0);
return swash_fetch(*swash, p, TRUE) != 0;
}
@@ -1545,8 +1545,12 @@ Perl_to_utf8_fold(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp)
&PL_utf8_tofold, "ToFold", "utf8::ToSpecFold");
}
-/* a "swash" is a swatch hash */
-
+/* Note:
+ * A "swash" is a swatch hash.
+ * A "swatch" is a bit vector generated by utf8.c:S_swash_get().
+ * C<pkg> is a pointer to a package name for SWASHNEW, should be "utf8".
+ * For other parameters, see utf8::SWASHNEW in lib/utf8_heavy.pl.
+ */
SV*
Perl_swash_init(pTHX_ const char* pkg, const char* name, SV *listsv, I32 minbits, I32 none)
{
@@ -1744,7 +1748,7 @@ STATIC SV*
S_swash_get(pTHX_ SV* swash, UV start, UV span)
{
SV *swatch;
- U8 *l, *lend, *x, *xend, *s, *nl;
+ U8 *l, *lend, *x, *xend, *s;
STRLEN lcur, xcur, scur;
HV* const hv = (HV*)SvRV(swash);
@@ -1801,7 +1805,7 @@ S_swash_get(pTHX_ SV* swash, UV start, UV span)
STRLEN numlen;
I32 flags = PERL_SCAN_SILENT_ILLDIGIT | PERL_SCAN_DISALLOW_PREFIX;
- nl = (U8*)memchr(l, '\n', lend - l);
+ U8* nl = (U8*)memchr(l, '\n', lend - l);
numlen = lend - l;
min = grok_hex((char *)l, &numlen, &flags, NULL);
@@ -1846,6 +1850,8 @@ S_swash_get(pTHX_ SV* swash, UV start, UV span)
}
}
}
+ else
+ val = 0; /* bits == 1, then val should be ignored */
}
else {
max = min;
@@ -1855,6 +1861,8 @@ S_swash_get(pTHX_ SV* swash, UV start, UV span)
Perl_croak(aTHX_ "%s: illegal mapping '%s'", typestr, l);
}
}
+ else
+ val = 0; /* bits == 1, then val should be ignored */
}
if (nl)
@@ -1895,7 +1903,7 @@ S_swash_get(pTHX_ SV* swash, UV start, UV span)
++val;
}
}
- else {
+ else { /* bits == 1, then val should be ignored */
if (min < start)
min = start;
for (key = min; key <= max; key++) {
@@ -1918,7 +1926,7 @@ S_swash_get(pTHX_ SV* swash, UV start, UV span)
HV* otherhv;
STRLEN otherbits;
SV **otherbitssvp, *other;
- U8 *s, *o;
+ U8 *s, *o, *nl;
STRLEN slen, olen;
U8 opc = *x++;
@@ -1993,8 +2001,7 @@ S_swash_get(pTHX_ SV* swash, UV start, UV span)
break;
}
}
- else { /* bits >= 8 */
- /* XXX: but weirdly otherval is treated as boolean */
+ else {
STRLEN otheroctets = otherbits >> 3;
STRLEN offset = 0;
U8* send = s + slen;
@@ -2015,8 +2022,8 @@ S_swash_get(pTHX_ SV* swash, UV start, UV span)
}
}
- if (opc == '+' && otherval)
- otherval = 1;
+ if (opc == '+' && otherval)
+ ; /* replace with otherval */
else if (opc == '!' && !otherval)
otherval = 1;
else if (opc == '-' && otherval)
@@ -2024,7 +2031,7 @@ S_swash_get(pTHX_ SV* swash, UV start, UV span)
else if (opc == '&' && !otherval)
otherval = 0;
else {
- s += octets; /* not modify orig swatch */
+ s += octets; /* no replacement */
continue;
}