summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2017-01-18 14:19:37 -0700
committerKarl Williamson <khw@cpan.org>2017-01-19 14:42:07 -0700
commit02cd137d3207e4ac5ebf41ec4cc32f381d012586 (patch)
tree94b0073a747ff0dfc63da72f03cd3c0dcfd95401
parent323e4ec46db0dc8d22d9eae846bc5f0fe9d642ec (diff)
downloadperl-02cd137d3207e4ac5ebf41ec4cc32f381d012586.tar.gz
toke.c: Add, clarify some comments, white-space
-rw-r--r--toke.c136
1 files changed, 67 insertions, 69 deletions
diff --git a/toke.c b/toke.c
index c60d32dfd8..9700d35520 100644
--- a/toke.c
+++ b/toke.c
@@ -2837,7 +2837,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
} (end if backslash)
handle regular character
} (end while character to read)
-
+
*/
STATIC char *
@@ -2912,8 +2912,8 @@ S_scan_const(pTHX_ char *start)
* range, so for most cases we just drop down and handle the value
* as any other. There are two exceptions.
*
- * 1. A minus sign indicates that we are actually going to have
- * a range. In this case, skip the '-', set a flag, then drop
+ * 1. A hyphen indicates that we are actually going to have a
+ * range. In this case, skip the '-', set a flag, then drop
* down to handle what should be the end range value.
* 2. After we've handled that value, the next time through, that
* flag is set and we fix up the range.
@@ -2931,11 +2931,10 @@ S_scan_const(pTHX_ char *start)
if (! dorange) {
- /* Here, we don't think we're in a range. If we've processed
- * at least one character, then see if this next one is a '-',
- * indicating the previous one was the start of a range. But
- * don't bother if we're too close to the end for the minus to
- * mean that. */
+ /* Here, we don't think we're in a range. If the new character
+ * is not a hyphen; or if it is a hyphen, but it's too close to
+ * either edge to indicate a range, then it's a regular
+ * character. */
if (*s != '-' || s >= send - 1 || s == start) {
/* A regular character. Process like any other, but first
@@ -2946,25 +2945,26 @@ S_scan_const(pTHX_ char *start)
non_portable_endpoint = 0;
backslash_N = 0;
#endif
- /* The tests here and the following 'else' for being above
- * Latin1 suffice to find all such occurences in the
- * constant, except those added by a backslash escape
- * sequence, like \x{100}. And all those set
- * 'has_above_latin1' as appropriate */
+ /* The tests here for being above Latin1 and similar ones
+ * in the following 'else' suffice to find all such
+ * occurences in the constant, except those added by a
+ * backslash escape sequence, like \x{100}. And all those
+ * set 'has_above_latin1' as appropriate */
if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
has_above_latin1 = TRUE;
}
/* Drops down to generic code to process current byte */
}
- else {
+ else { /* Is a '-' in the context where it means a range */
if (didrange) { /* Something like y/A-C-Z// */
- Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
+ Perl_croak(aTHX_ "Ambiguous range in transliteration"
+ " operator");
}
dorange = TRUE;
- s++; /* Skip past the minus */
+ s++; /* Skip past the hyphen */
/* d now points to where the end-range character will be
* placed. Save it so won't have to go finding it later,
@@ -2978,6 +2978,8 @@ S_scan_const(pTHX_ char *start)
if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
has_above_latin1 = TRUE;
}
+
+ /* Drops down to generic code to process current byte */
}
} /* End of not a range */
else {
@@ -2989,7 +2991,7 @@ S_scan_const(pTHX_ char *start)
* 'd' points to just beyond the range end in the 'sv' string,
* where we would next place something
* 'offset_to_max' is the offset in 'sv' at which the character
- * before 'd' begins.
+ * (the range's maximum end point) before 'd' begins.
*/
const char * max_ptr = SvPVX_const(sv) + offset_to_max;
const char * min_ptr;
@@ -3002,7 +3004,7 @@ S_scan_const(pTHX_ char *start)
IV real_range_max = 0;
#endif
- /* Get the range-ends code point values. */
+ /* Get the code point values of the range ends. */
if (has_utf8) {
/* We know the utf8 is valid, because we just constructed
* it ourselves in previous loop iterations */
@@ -3023,16 +3025,16 @@ S_scan_const(pTHX_ char *start)
* [A-Z] or [a-z], and both ends are literal characters,
* like 'A', and not like \x{C1} */
convert_unicode =
- cBOOL(backslash_N) /* \N{} forces Unicode, hence
- portable range */
- || ( ! non_portable_endpoint
- && (( isLOWER_A(range_min) && isLOWER_A(range_max))
- || (isUPPER_A(range_min) && isUPPER_A(range_max))));
+ cBOOL(backslash_N) /* \N{} forces Unicode,
+ hence portable range */
+ || ( ! non_portable_endpoint
+ && (( isLOWER_A(range_min) && isLOWER_A(range_max))
+ || (isUPPER_A(range_min) && isUPPER_A(range_max))));
if (convert_unicode) {
/* Special handling is needed for these portable ranges.
- * They are defined to all be in Unicode terms, which
- * include all Unicode code points between the end points.
+ * They are defined to be in Unicode terms, which includes
+ * all the Unicode code points between the end points.
* Convert to Unicode to get the Unicode range. Later we
* will convert each code point in the range back to
* native. */
@@ -3061,19 +3063,19 @@ S_scan_const(pTHX_ char *start)
}
#ifdef EBCDIC
else if (convert_unicode) {
- /* diag_listed_as: Invalid range "%s" in transliteration operator */
+ /* diag_listed_as: Invalid range "%s" in transliteration operator */
Perl_croak(aTHX_
- "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04" UVXf "}\""
- " in transliteration operator",
- range_min, range_max);
+ "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04"
+ UVXf "}\" in transliteration operator",
+ range_min, range_max);
}
#endif
else {
- /* diag_listed_as: Invalid range "%s" in transliteration operator */
+ /* diag_listed_as: Invalid range "%s" in transliteration operator */
Perl_croak(aTHX_
- "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
- " in transliteration operator",
- range_min, range_max);
+ "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
+ " in transliteration operator",
+ range_min, range_max);
}
}
@@ -3081,12 +3083,12 @@ S_scan_const(pTHX_ char *start)
/* If everything in the transliteration is below 256, we
* can avoid special handling later. A translation table
- * of each of those bytes is created. And so we expand out
- * all ranges to their constituent code points. But if
- * we've encountered something above 255, the expanding
- * won't help, so skip doing that. But if it's EBCDIC, we
- * may have to look at each character below 256 if we have
- * to convert to/from Unicode values */
+ * for each of those bytes is created by op.c. So we
+ * expand out all ranges to their constituent code points.
+ * But if we've encountered something above 255, the
+ * expanding won't help, so skip doing that. But if it's
+ * EBCDIC, we may have to look at each character below 256
+ * if we have to convert to/from Unicode values */
if ( has_above_latin1
#ifdef EBCDIC
&& (range_min > 255 || ! convert_unicode)
@@ -3095,7 +3097,7 @@ S_scan_const(pTHX_ char *start)
/* Move the high character one byte to the right; then
* insert between it and the range begin, an illegal
* byte which serves to indicate this is a range (using
- * a '-' could be ambiguous). */
+ * a '-' would be ambiguous). */
char *e = d++;
while (e-- > max_ptr) {
*(e + 1) = *e;
@@ -3160,8 +3162,9 @@ S_scan_const(pTHX_ char *start)
* equivalent */
if (has_utf8) {
for (i = range_min; i <= range_max; i++) {
- append_utf8_from_native_byte(LATIN1_TO_NATIVE((U8) i),
- (U8 **) &d);
+ append_utf8_from_native_byte(
+ LATIN1_TO_NATIVE((U8) i),
+ (U8 **) &d);
}
}
else {
@@ -3194,7 +3197,8 @@ S_scan_const(pTHX_ char *start)
}
#ifdef EBCDIC
- /* If the original range extended above 255, add in that portion. */
+ /* If the original range extended above 255, add in that
+ * portion. */
if (real_range_max) {
*d++ = (char) UTF8_TWO_BYTE_HI(0x100);
*d++ = (char) UTF8_TWO_BYTE_LO(0x100);
@@ -3232,11 +3236,9 @@ S_scan_const(pTHX_ char *start)
if (!esc)
in_charclass = FALSE;
}
-
- /* skip for regexp comments /(?#comment)/, except for the last
- * char, which will be done separately.
- * Stop on (?{..}) and friends */
-
+ /* skip for regexp comments /(?#comment)/, except for the last
+ * char, which will be done separately. Stop on (?{..}) and
+ * friends */
else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
if (s[2] == '#') {
while (s+1 < send && *s != ')')
@@ -3249,8 +3251,7 @@ S_scan_const(pTHX_ char *start)
break;
}
}
-
- /* likewise skip #-initiated comments in //x patterns */
+ /* likewise skip #-initiated comments in //x patterns */
else if (*s == '#'
&& PL_lex_inpat
&& !in_charclass
@@ -3259,14 +3260,13 @@ S_scan_const(pTHX_ char *start)
while (s < send && *s != '\n')
*d++ = *s++;
}
-
- /* no further processing of single-quoted regex */
+ /* no further processing of single-quoted regex */
else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
goto default_action;
- /* check for embedded arrays
- (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
- */
+ /* check for embedded arrays
+ * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
+ */
else if (*s == '@' && s[1]) {
if (UTF
? isIDFIRST_utf8_safe(s+1, send)
@@ -3279,10 +3279,8 @@ S_scan_const(pTHX_ char *start)
if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
break; /* in regexp, neither @+ nor @- are interpolated */
}
-
- /* check for embedded scalars. only stop if we're sure it's a
- variable.
- */
+ /* check for embedded scalars. only stop if we're sure it's a
+ * variable. */
else if (*s == '$') {
if (!PL_lex_inpat) /* not a regexp, so $ must be var */
break;
@@ -3415,7 +3413,7 @@ S_scan_const(pTHX_ char *start)
NUM_ESCAPE_INSERT:
/* Insert oct or hex escaped character. */
-
+
/* Here uv is the ordinal of the next character being added */
if (UVCHR_IS_INVARIANT(uv)) {
*d++ = (char) uv;
@@ -7031,7 +7029,7 @@ Perl_yylex(pTHX)
if (tmp == KEY_dump) {
Perl_ck_warner_d(aTHX_ packWARN2(WARN_MISC,WARN_DEPRECATED),
"dump() better written as CORE::dump(). "
- "dump() will no longer be available "
+ "dump() will no longer be available "
"in Perl 5.30");
}
gv = NULL;
@@ -7717,7 +7715,7 @@ Perl_yylex(pTHX)
case KEY_exists:
UNI(OP_EXISTS);
-
+
case KEY_exit:
UNI(OP_EXIT);
@@ -7953,7 +7951,7 @@ Perl_yylex(pTHX)
case KEY_last:
LOOPX(OP_LAST);
-
+
case KEY_lc:
UNI(OP_LC);
@@ -8133,7 +8131,7 @@ Perl_yylex(pTHX)
case KEY_pos:
UNIDOR(OP_POS);
-
+
case KEY_pack:
LOP(OP_PACK,XTERM);
@@ -8321,7 +8319,7 @@ Perl_yylex(pTHX)
case KEY_chomp:
UNI(OP_CHOMP);
-
+
case KEY_scalar:
UNI(OP_SCALAR);
@@ -8961,7 +8959,7 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
|| ! SvOK(*cvp))
{
char *msg;
-
+
/* Here haven't found what we're looking for. If it is charnames,
* perhaps it needs to be loaded. Try doing that before giving up */
if (*key == 'c') {
@@ -10291,7 +10289,7 @@ S_scan_inputsymbol(pTHX_ char *start)
($*@) sub prototypes sub foo ($)
(stuff) sub attr parameters sub foo : attr(stuff)
<> readline or globs <FOO>, <>, <$fh>, or <*.c>
-
+
In most of these cases (all but <>, patterns and transliterate)
yylex() calls scan_str(). m// makes yylex() call scan_pat() which
calls scan_str(). s/// makes yylex() call scan_subst() which calls
@@ -10454,7 +10452,7 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
*to = *s;
}
}
-
+
/* if the terminator isn't the same as the start character (e.g.,
matched brackets), we have to allow more in the quoting, and
be prepared for nested brackets.
@@ -10512,7 +10510,7 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
to[-1] = '\n';
#endif
-
+
/* if we're out of file, or a read fails, bail and reset the current
line marker so we can report where the unterminated string began
*/