diff options
author | Karl Williamson <public@khwilliamson.com> | 2011-07-01 09:11:48 -0600 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2011-07-03 14:05:48 -0600 |
commit | d73c39c516d283552da40a4edb8a2139e3e48da4 (patch) | |
tree | b6dd72b13d077c7b09706a8e0a2c92106ec5307d /utf8.c | |
parent | c69a9c6805950038741cc55e80637bd2c2473e31 (diff) | |
download | perl-d73c39c516d283552da40a4edb8a2139e3e48da4.tar.gz |
utf8.c: swash_to_invlist() handle EXTRAS
This function has not been able to handle what are called EXTRAS in
its input. These are things like:
!utf8::InHiragana
-utf8::InKatakana
+utf8::IsCn
besides the normal list of ranges.
This commit allows this function to handle all the same constructs as
the regular swash input function, from which most of the new code was
copied.
Diffstat (limited to 'utf8.c')
-rw-r--r-- | utf8.c | 79 |
1 files changed, 78 insertions, 1 deletions
@@ -2471,7 +2471,8 @@ S_swash_get(pTHX_ SV* swash, UV start, UV span) } /* while */ go_out_list: - /* read $swash->{EXTRAS} */ + /* read $swash->{EXTRAS} + * This code also copied to swash_to_invlist() below */ x = (U8*)SvPV(*extssvp, xcur); xend = x + xcur; while (x < xend) { @@ -2889,10 +2890,13 @@ Perl__swash_to_invlist(pTHX_ SV* const swash) SV** const listsvp = hv_fetchs(hv, "LIST", FALSE); SV** const typesvp = hv_fetchs(hv, "TYPE", FALSE); SV** const bitssvp = hv_fetchs(hv, "BITS", FALSE); + SV** const extssvp = hv_fetchs(hv, "EXTRAS", FALSE); const U8* const typestr = (U8*)SvPV_nolen(*typesvp); const STRLEN bits = SvUV(*bitssvp); const STRLEN octets = bits >> 3; /* if bits == 1, then octets == 0 */ + U8 *x, *xend; + STRLEN xcur; SV* invlist; @@ -2944,6 +2948,79 @@ Perl__swash_to_invlist(pTHX_ SV* const swash) _append_range_to_invlist(invlist, start, end); } + /* This code is copied from swash_get() + * read $swash->{EXTRAS} */ + x = (U8*)SvPV(*extssvp, xcur); + xend = x + xcur; + while (x < xend) { + STRLEN namelen; + U8 *namestr; + SV** othersvp; + HV* otherhv; + STRLEN otherbits; + SV **otherbitssvp, *other; + U8 *nl; + + const U8 opc = *x++; + if (opc == '\n') + continue; + + nl = (U8*)memchr(x, '\n', xend - x); + + if (opc != '-' && opc != '+' && opc != '!' && opc != '&') { + if (nl) { + x = nl + 1; /* 1 is length of "\n" */ + continue; + } + else { + x = xend; /* to EXTRAS' end at which \n is not found */ + break; + } + } + + namestr = x; + if (nl) { + namelen = nl - namestr; + x = nl + 1; + } + else { + namelen = xend - namestr; + x = xend; + } + + othersvp = hv_fetch(hv, (char *)namestr, namelen, FALSE); + otherhv = MUTABLE_HV(SvRV(*othersvp)); + otherbitssvp = hv_fetchs(otherhv, "BITS", FALSE); + otherbits = (STRLEN)SvUV(*otherbitssvp); + + if (bits != otherbits || bits != 1) { + Perl_croak(aTHX_ "panic: _swash_to_invlist only operates on boolean properties"); + } + + /* The "other" swatch must be destroyed after. */ + other = _swash_to_invlist((SV *)*othersvp); + + /* End of code copied from swash_get() */ + switch (opc) { + case '+': + _invlist_union(invlist, other, &invlist); + break; + case '!': + _invlist_invert(other); + _invlist_union(invlist, other, &invlist); + break; + case '-': + _invlist_subtract(invlist, other, &invlist); + break; + case '&': + _invlist_intersection(invlist, other, &invlist); + break; + default: + break; + } + sv_free(other); /* through with it! */ + } + return invlist; } |