diff options
author | Karl Williamson <khw@cpan.org> | 2017-01-18 14:19:37 -0700 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2017-01-19 14:42:07 -0700 |
commit | 02cd137d3207e4ac5ebf41ec4cc32f381d012586 (patch) | |
tree | 94b0073a747ff0dfc63da72f03cd3c0dcfd95401 | |
parent | 323e4ec46db0dc8d22d9eae846bc5f0fe9d642ec (diff) | |
download | perl-02cd137d3207e4ac5ebf41ec4cc32f381d012586.tar.gz |
toke.c: Add, clarify some comments, white-space
-rw-r--r-- | toke.c | 136 |
1 files changed, 67 insertions, 69 deletions
@@ -2837,7 +2837,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) } (end if backslash) handle regular character } (end while character to read) - + */ STATIC char * @@ -2912,8 +2912,8 @@ S_scan_const(pTHX_ char *start) * range, so for most cases we just drop down and handle the value * as any other. There are two exceptions. * - * 1. A minus sign indicates that we are actually going to have - * a range. In this case, skip the '-', set a flag, then drop + * 1. A hyphen indicates that we are actually going to have a + * range. In this case, skip the '-', set a flag, then drop * down to handle what should be the end range value. * 2. After we've handled that value, the next time through, that * flag is set and we fix up the range. @@ -2931,11 +2931,10 @@ S_scan_const(pTHX_ char *start) if (! dorange) { - /* Here, we don't think we're in a range. If we've processed - * at least one character, then see if this next one is a '-', - * indicating the previous one was the start of a range. But - * don't bother if we're too close to the end for the minus to - * mean that. */ + /* Here, we don't think we're in a range. If the new character + * is not a hyphen; or if it is a hyphen, but it's too close to + * either edge to indicate a range, then it's a regular + * character. */ if (*s != '-' || s >= send - 1 || s == start) { /* A regular character. Process like any other, but first @@ -2946,25 +2945,26 @@ S_scan_const(pTHX_ char *start) non_portable_endpoint = 0; backslash_N = 0; #endif - /* The tests here and the following 'else' for being above - * Latin1 suffice to find all such occurences in the - * constant, except those added by a backslash escape - * sequence, like \x{100}. And all those set - * 'has_above_latin1' as appropriate */ + /* The tests here for being above Latin1 and similar ones + * in the following 'else' suffice to find all such + * occurences in the constant, except those added by a + * backslash escape sequence, like \x{100}. And all those + * set 'has_above_latin1' as appropriate */ if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) { has_above_latin1 = TRUE; } /* Drops down to generic code to process current byte */ } - else { + else { /* Is a '-' in the context where it means a range */ if (didrange) { /* Something like y/A-C-Z// */ - Perl_croak(aTHX_ "Ambiguous range in transliteration operator"); + Perl_croak(aTHX_ "Ambiguous range in transliteration" + " operator"); } dorange = TRUE; - s++; /* Skip past the minus */ + s++; /* Skip past the hyphen */ /* d now points to where the end-range character will be * placed. Save it so won't have to go finding it later, @@ -2978,6 +2978,8 @@ S_scan_const(pTHX_ char *start) if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) { has_above_latin1 = TRUE; } + + /* Drops down to generic code to process current byte */ } } /* End of not a range */ else { @@ -2989,7 +2991,7 @@ S_scan_const(pTHX_ char *start) * 'd' points to just beyond the range end in the 'sv' string, * where we would next place something * 'offset_to_max' is the offset in 'sv' at which the character - * before 'd' begins. + * (the range's maximum end point) before 'd' begins. */ const char * max_ptr = SvPVX_const(sv) + offset_to_max; const char * min_ptr; @@ -3002,7 +3004,7 @@ S_scan_const(pTHX_ char *start) IV real_range_max = 0; #endif - /* Get the range-ends code point values. */ + /* Get the code point values of the range ends. */ if (has_utf8) { /* We know the utf8 is valid, because we just constructed * it ourselves in previous loop iterations */ @@ -3023,16 +3025,16 @@ S_scan_const(pTHX_ char *start) * [A-Z] or [a-z], and both ends are literal characters, * like 'A', and not like \x{C1} */ convert_unicode = - cBOOL(backslash_N) /* \N{} forces Unicode, hence - portable range */ - || ( ! non_portable_endpoint - && (( isLOWER_A(range_min) && isLOWER_A(range_max)) - || (isUPPER_A(range_min) && isUPPER_A(range_max)))); + cBOOL(backslash_N) /* \N{} forces Unicode, + hence portable range */ + || ( ! non_portable_endpoint + && (( isLOWER_A(range_min) && isLOWER_A(range_max)) + || (isUPPER_A(range_min) && isUPPER_A(range_max)))); if (convert_unicode) { /* Special handling is needed for these portable ranges. - * They are defined to all be in Unicode terms, which - * include all Unicode code points between the end points. + * They are defined to be in Unicode terms, which includes + * all the Unicode code points between the end points. * Convert to Unicode to get the Unicode range. Later we * will convert each code point in the range back to * native. */ @@ -3061,19 +3063,19 @@ S_scan_const(pTHX_ char *start) } #ifdef EBCDIC else if (convert_unicode) { - /* diag_listed_as: Invalid range "%s" in transliteration operator */ + /* diag_listed_as: Invalid range "%s" in transliteration operator */ Perl_croak(aTHX_ - "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04" UVXf "}\"" - " in transliteration operator", - range_min, range_max); + "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04" + UVXf "}\" in transliteration operator", + range_min, range_max); } #endif else { - /* diag_listed_as: Invalid range "%s" in transliteration operator */ + /* diag_listed_as: Invalid range "%s" in transliteration operator */ Perl_croak(aTHX_ - "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\"" - " in transliteration operator", - range_min, range_max); + "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\"" + " in transliteration operator", + range_min, range_max); } } @@ -3081,12 +3083,12 @@ S_scan_const(pTHX_ char *start) /* If everything in the transliteration is below 256, we * can avoid special handling later. A translation table - * of each of those bytes is created. And so we expand out - * all ranges to their constituent code points. But if - * we've encountered something above 255, the expanding - * won't help, so skip doing that. But if it's EBCDIC, we - * may have to look at each character below 256 if we have - * to convert to/from Unicode values */ + * for each of those bytes is created by op.c. So we + * expand out all ranges to their constituent code points. + * But if we've encountered something above 255, the + * expanding won't help, so skip doing that. But if it's + * EBCDIC, we may have to look at each character below 256 + * if we have to convert to/from Unicode values */ if ( has_above_latin1 #ifdef EBCDIC && (range_min > 255 || ! convert_unicode) @@ -3095,7 +3097,7 @@ S_scan_const(pTHX_ char *start) /* Move the high character one byte to the right; then * insert between it and the range begin, an illegal * byte which serves to indicate this is a range (using - * a '-' could be ambiguous). */ + * a '-' would be ambiguous). */ char *e = d++; while (e-- > max_ptr) { *(e + 1) = *e; @@ -3160,8 +3162,9 @@ S_scan_const(pTHX_ char *start) * equivalent */ if (has_utf8) { for (i = range_min; i <= range_max; i++) { - append_utf8_from_native_byte(LATIN1_TO_NATIVE((U8) i), - (U8 **) &d); + append_utf8_from_native_byte( + LATIN1_TO_NATIVE((U8) i), + (U8 **) &d); } } else { @@ -3194,7 +3197,8 @@ S_scan_const(pTHX_ char *start) } #ifdef EBCDIC - /* If the original range extended above 255, add in that portion. */ + /* If the original range extended above 255, add in that + * portion. */ if (real_range_max) { *d++ = (char) UTF8_TWO_BYTE_HI(0x100); *d++ = (char) UTF8_TWO_BYTE_LO(0x100); @@ -3232,11 +3236,9 @@ S_scan_const(pTHX_ char *start) if (!esc) in_charclass = FALSE; } - - /* skip for regexp comments /(?#comment)/, except for the last - * char, which will be done separately. - * Stop on (?{..}) and friends */ - + /* skip for regexp comments /(?#comment)/, except for the last + * char, which will be done separately. Stop on (?{..}) and + * friends */ else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) { if (s[2] == '#') { while (s+1 < send && *s != ')') @@ -3249,8 +3251,7 @@ S_scan_const(pTHX_ char *start) break; } } - - /* likewise skip #-initiated comments in //x patterns */ + /* likewise skip #-initiated comments in //x patterns */ else if (*s == '#' && PL_lex_inpat && !in_charclass @@ -3259,14 +3260,13 @@ S_scan_const(pTHX_ char *start) while (s < send && *s != '\n') *d++ = *s++; } - - /* no further processing of single-quoted regex */ + /* no further processing of single-quoted regex */ else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') goto default_action; - /* check for embedded arrays - (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-) - */ + /* check for embedded arrays + * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-) + */ else if (*s == '@' && s[1]) { if (UTF ? isIDFIRST_utf8_safe(s+1, send) @@ -3279,10 +3279,8 @@ S_scan_const(pTHX_ char *start) if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-')) break; /* in regexp, neither @+ nor @- are interpolated */ } - - /* check for embedded scalars. only stop if we're sure it's a - variable. - */ + /* check for embedded scalars. only stop if we're sure it's a + * variable. */ else if (*s == '$') { if (!PL_lex_inpat) /* not a regexp, so $ must be var */ break; @@ -3415,7 +3413,7 @@ S_scan_const(pTHX_ char *start) NUM_ESCAPE_INSERT: /* Insert oct or hex escaped character. */ - + /* Here uv is the ordinal of the next character being added */ if (UVCHR_IS_INVARIANT(uv)) { *d++ = (char) uv; @@ -7031,7 +7029,7 @@ Perl_yylex(pTHX) if (tmp == KEY_dump) { Perl_ck_warner_d(aTHX_ packWARN2(WARN_MISC,WARN_DEPRECATED), "dump() better written as CORE::dump(). " - "dump() will no longer be available " + "dump() will no longer be available " "in Perl 5.30"); } gv = NULL; @@ -7717,7 +7715,7 @@ Perl_yylex(pTHX) case KEY_exists: UNI(OP_EXISTS); - + case KEY_exit: UNI(OP_EXIT); @@ -7953,7 +7951,7 @@ Perl_yylex(pTHX) case KEY_last: LOOPX(OP_LAST); - + case KEY_lc: UNI(OP_LC); @@ -8133,7 +8131,7 @@ Perl_yylex(pTHX) case KEY_pos: UNIDOR(OP_POS); - + case KEY_pack: LOP(OP_PACK,XTERM); @@ -8321,7 +8319,7 @@ Perl_yylex(pTHX) case KEY_chomp: UNI(OP_CHOMP); - + case KEY_scalar: UNI(OP_SCALAR); @@ -8961,7 +8959,7 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen, || ! SvOK(*cvp)) { char *msg; - + /* Here haven't found what we're looking for. If it is charnames, * perhaps it needs to be loaded. Try doing that before giving up */ if (*key == 'c') { @@ -10291,7 +10289,7 @@ S_scan_inputsymbol(pTHX_ char *start) ($*@) sub prototypes sub foo ($) (stuff) sub attr parameters sub foo : attr(stuff) <> readline or globs <FOO>, <>, <$fh>, or <*.c> - + In most of these cases (all but <>, patterns and transliterate) yylex() calls scan_str(). m// makes yylex() call scan_pat() which calls scan_str(). s/// makes yylex() call scan_subst() which calls @@ -10454,7 +10452,7 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re *to = *s; } } - + /* if the terminator isn't the same as the start character (e.g., matched brackets), we have to allow more in the quoting, and be prepared for nested brackets. @@ -10512,7 +10510,7 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r') to[-1] = '\n'; #endif - + /* if we're out of file, or a read fails, bail and reset the current line marker so we can report where the unterminated string began */ |