diff options
author | Karl Williamson <khw@cpan.org> | 2015-01-19 23:51:55 -0700 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2015-01-20 10:38:03 -0700 |
commit | c877af1b1d4b8cf208483b79695143d40560a8ee (patch) | |
tree | 863e23d875969c0f4711ae33d5ad04a42bcd64c0 /regcomp.c | |
parent | 21adcf33cfe83d19ce1fc78c9e222a52e661e4f4 (diff) | |
download | perl-c877af1b1d4b8cf208483b79695143d40560a8ee.tar.gz |
regcomp.c: Add warnings under re 'strict'
Diffstat (limited to 'regcomp.c')
-rw-r--r-- | regcomp.c | 53 |
1 files changed, 53 insertions, 0 deletions
@@ -14664,6 +14664,59 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, if (unicode_range && non_portable_endpoint && prevvalue < 256) { vWARN(RExC_parse, "Both or neither range ends should be Unicode"); } + else if (prevvalue != value) { + + /* Under strict, ranges that stop and/or end in an ASCII + * printable should have each end point be a portable value + * for it (preferably like 'A', but we don't warn if it is + * a (portable) Unicode name or code point), and the range + * must be be all digits or all letters of the same case. + * Otherwise, the range is non-portable and unclear as to + * what it contains */ + if ((isPRINT_A(prevvalue) || isPRINT_A(value)) + && (non_portable_endpoint + || ! ((isDIGIT_A(prevvalue) && isDIGIT_A(value)) + || (isLOWER_A(prevvalue) && isLOWER_A(value)) + || (isUPPER_A(prevvalue) && isUPPER_A(value))))) + { + vWARN(RExC_parse, "Ranges of ASCII printables should be some subset of \"0-9\", \"A-Z\", or \"a-z\""); + } + else if (prevvalue >= 0x660) { /* ARABIC_INDIC_DIGIT_ZERO */ + + /* But the nature of Unicode and languages mean we + * can't do the same checks for above-ASCII ranges, + * except in the case of digit ones. These should + * contain only digits from the same group of 10. The + * ASCII case is handled just above. 0x660 is the + * first digit character beyond ASCII. Hence here, the + * range could be a range of digits. Find out. */ + IV index_start = _invlist_search(PL_XPosix_ptrs[_CC_DIGIT], + prevvalue); + IV index_final = _invlist_search(PL_XPosix_ptrs[_CC_DIGIT], + value); + + /* If the range start and final points are in the same + * inversion list element, it means that either both + * are not digits, or both are digits in a consecutive + * sequence of digits. (So far, Unicode has kept all + * such sequences as distinct groups of 10, but assert + * to make sure). If the end points are not in the + * same element, neither should be a digit. */ + if (index_start == index_final) { + assert(! ELEMENT_RANGE_MATCHES_INVLIST(index_start) + || invlist_array(PL_XPosix_ptrs[_CC_DIGIT])[index_start+1] + - invlist_array(PL_XPosix_ptrs[_CC_DIGIT])[index_start] + == 10); + } + else if ((index_start >= 0 + && ELEMENT_RANGE_MATCHES_INVLIST(index_start)) + || (index_final >= 0 + && ELEMENT_RANGE_MATCHES_INVLIST(index_final))) + { + vWARN(RExC_parse, "Ranges of digits should be from the same group of 10"); + } + } + } } } |