summaryrefslogtreecommitdiff
path: root/pp.c
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2014-01-27 10:42:28 -0700
committerKarl Williamson <public@khwilliamson.com>2014-01-27 11:07:18 -0700
commit445bf929f6118f5f2b0e19171f576c3a6d7ada50 (patch)
treecab3ea7a043854d09ad59b32342755b1e8b5bffb /pp.c
parent1b4a62a4c81dba4bbb281d24fde1c3b7308fdbfe (diff)
downloadperl-445bf929f6118f5f2b0e19171f576c3a6d7ada50.tar.gz
Taint more operands with case changes
The documentation says that Perl taints certain operations when subject to locale rules, such as lc() and ucfirst(). Prior to this commit there were exceptions when the operand to these functions contained no characters whose case change actually varied depending on the locale, for example the empty string or above-Latin1 code points. Changing to conform to the documentation simplifies the core code, and yields more consistent results.
Diffstat (limited to 'pp.c')
-rw-r--r--pp.c64
1 files changed, 23 insertions, 41 deletions
diff --git a/pp.c b/pp.c
index 22133b3834..c4117685c2 100644
--- a/pp.c
+++ b/pp.c
@@ -3479,7 +3479,6 @@ PP(pp_ucfirst)
STRLEN tculen; /* tculen is the byte length of the freshly titlecased (or
* lowercased) character stored in tmpbuf. May be either
* UTF-8 or not, but in either case is the number of bytes */
- bool tainted = FALSE;
s = (const U8*)SvPV_const(source, slen);
@@ -3503,12 +3502,10 @@ PP(pp_ucfirst)
doing_utf8 = TRUE;
ulen = UTF8SKIP(s);
if (op_type == OP_UCFIRST) {
- _to_utf8_title_flags(s, tmpbuf, &tculen,
- IN_LOCALE_RUNTIME, &tainted);
+ _to_utf8_title_flags(s, tmpbuf, &tculen, IN_LOCALE_RUNTIME);
}
else {
- _to_utf8_lower_flags(s, tmpbuf, &tculen,
- IN_LOCALE_RUNTIME, &tainted);
+ _to_utf8_lower_flags(s, tmpbuf, &tculen, IN_LOCALE_RUNTIME);
}
/* we can't do in-place if the length changes. */
@@ -3642,17 +3639,9 @@ PP(pp_ucfirst)
SvCUR_set(dest, need - 1);
}
- if (tainted) {
- TAINT;
- SvTAINTED_on(dest);
- }
}
else { /* Neither source nor dest are in or need to be UTF-8 */
if (slen) {
- if (IN_LOCALE_RUNTIME) {
- TAINT;
- SvTAINTED_on(dest);
- }
if (inplace) { /* in-place, only need to change the 1st char */
*d = *tmpbuf;
}
@@ -3680,6 +3669,10 @@ PP(pp_ucfirst)
SvCUR_set(dest, need - 1);
}
}
+ if (IN_LOCALE_RUNTIME) {
+ TAINT;
+ SvTAINTED_on(dest);
+ }
if (dest != source && SvTAINTED(source))
SvTAINT(dest);
SvSETMAGIC(dest);
@@ -3740,7 +3733,6 @@ PP(pp_uc)
if (DO_UTF8(source)) {
const U8 *const send = s + len;
U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
- bool tainted = FALSE;
/* All occurrences of these are to be moved to follow any other marks.
* This is context-dependent. We may not be passed enough context to
@@ -3770,8 +3762,7 @@ PP(pp_uc)
* and copy it to the output buffer */
u = UTF8SKIP(s);
- uv = _to_utf8_upper_flags(s, tmpbuf, &ulen,
- IN_LOCALE_RUNTIME, &tainted);
+ uv = _to_utf8_upper_flags(s, tmpbuf, &ulen, IN_LOCALE_RUNTIME);
#define GREEK_CAPITAL_LETTER_IOTA 0x0399
#define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
if (uv == GREEK_CAPITAL_LETTER_IOTA
@@ -3806,10 +3797,6 @@ PP(pp_uc)
*d = '\0';
SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
- if (tainted) {
- TAINT;
- SvTAINTED_on(dest);
- }
}
else { /* Not UTF-8 */
if (len) {
@@ -3819,8 +3806,6 @@ PP(pp_uc)
* latin1 as having case; otherwise the latin1 casing. Do the
* whole thing in a tight loop, for speed, */
if (IN_LOCALE_RUNTIME) {
- TAINT;
- SvTAINTED_on(dest);
for (; s < send; d++, s++)
*d = toUPPER_LC(*s);
}
@@ -3918,6 +3903,10 @@ PP(pp_uc)
SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
}
} /* End of isn't utf8 */
+ if (IN_LOCALE_RUNTIME) {
+ TAINT;
+ SvTAINTED_on(dest);
+ }
if (dest != source && SvTAINTED(source))
SvTAINT(dest);
SvSETMAGIC(dest);
@@ -3970,14 +3959,12 @@ PP(pp_lc)
if (DO_UTF8(source)) {
const U8 *const send = s + len;
U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
- bool tainted = FALSE;
while (s < send) {
const STRLEN u = UTF8SKIP(s);
STRLEN ulen;
- _to_utf8_lower_flags(s, tmpbuf, &ulen,
- IN_LOCALE_RUNTIME, &tainted);
+ _to_utf8_lower_flags(s, tmpbuf, &ulen, IN_LOCALE_RUNTIME);
/* Here is where we would do context-sensitive actions. See the
* commit message for 86510fb15 for why there isn't any */
@@ -4007,10 +3994,6 @@ PP(pp_lc)
SvUTF8_on(dest);
*d = '\0';
SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
- if (tainted) {
- TAINT;
- SvTAINTED_on(dest);
- }
} else { /* Not utf8 */
if (len) {
const U8 *const send = s + len;
@@ -4018,12 +4001,10 @@ PP(pp_lc)
/* Use locale casing if in locale; regular style if not treating
* latin1 as having case; otherwise the latin1 casing. Do the
* whole thing in a tight loop, for speed, */
- if (IN_LOCALE_RUNTIME) {
- TAINT;
- SvTAINTED_on(dest);
+ if (IN_LOCALE_RUNTIME) {
for (; s < send; d++, s++)
*d = toLOWER_LC(*s);
- }
+ }
else if (! IN_UNI_8_BIT) {
for (; s < send; d++, s++) {
*d = toLOWER(*s);
@@ -4040,6 +4021,10 @@ PP(pp_lc)
SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
}
}
+ if (IN_LOCALE_RUNTIME) {
+ TAINT;
+ SvTAINTED_on(dest);
+ }
if (dest != source && SvTAINTED(source))
SvTAINT(dest);
SvSETMAGIC(dest);
@@ -4164,12 +4149,11 @@ PP(pp_fc)
send = s + len;
if (DO_UTF8(source)) { /* UTF-8 flagged string. */
- bool tainted = FALSE;
while (s < send) {
const STRLEN u = UTF8SKIP(s);
STRLEN ulen;
- _to_utf8_fold_flags(s, tmpbuf, &ulen, flags, &tainted);
+ _to_utf8_fold_flags(s, tmpbuf, &ulen, flags);
if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
const UV o = d - (U8*)SvPVX_const(dest);
@@ -4182,15 +4166,9 @@ PP(pp_fc)
s += u;
}
SvUTF8_on(dest);
- if (tainted) {
- TAINT;
- SvTAINTED_on(dest);
- }
} /* Unflagged string */
else if (len) {
if ( IN_LOCALE_RUNTIME ) { /* Under locale */
- TAINT;
- SvTAINTED_on(dest);
for (; s < send; d++, s++)
*d = toFOLD_LC(*s);
}
@@ -4265,6 +4243,10 @@ PP(pp_fc)
*d = '\0';
SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
+ if (IN_LOCALE_RUNTIME) {
+ TAINT;
+ SvTAINTED_on(dest);
+ }
if (SvTAINTED(source))
SvTAINT(dest);
SvSETMAGIC(dest);