diff options
author | nagachika <nagachika@ruby-lang.org> | 2023-03-25 10:33:20 +0900 |
---|---|---|
committer | nagachika <nagachika@ruby-lang.org> | 2023-03-25 10:33:20 +0900 |
commit | 8c99882dad0b6fedbd656889829a3780366bd8b6 (patch) | |
tree | ba97c1f87dbd18df1fa04ce65a382f9ed07b72d7 | |
parent | 477ab3f6c7f14c79e13f694463aa06a59206dda1 (diff) | |
download | ruby-8c99882dad0b6fedbd656889829a3780366bd8b6.tar.gz |
merge revision(s) cd5cafa4a380e2459862b6e99ff0c381362ef1be: [Backport #18827]
Respect the encoding of the source [Bug #18827]
Do not override the input string encoding at the time of preparation,
the source encoding is not determined from the input yet.
---
parse.y | 26 ++++++++++++++++----------
test/ruby/test_ast.rb | 13 +++++++++++++
test/ruby/test_syntax.rb | 9 +++++++++
3 files changed, 38 insertions(+), 10 deletions(-)
-rw-r--r-- | parse.y | 26 | ||||
-rw-r--r-- | test/ruby/test_ast.rb | 13 | ||||
-rw-r--r-- | test/ruby/test_syntax.rb | 9 | ||||
-rw-r--r-- | version.h | 2 |
4 files changed, 39 insertions, 11 deletions
@@ -6454,12 +6454,6 @@ lex_getline(struct parser_params *p) if (NIL_P(line)) return line; must_be_ascii_compatible(line); if (RB_OBJ_FROZEN(line)) line = rb_str_dup(line); // needed for RubyVM::AST.of because script_lines in iseq is deep-frozen -#ifndef RIPPER - if (p->debug_lines) { - rb_enc_associate(line, p->enc); - rb_ary_push(p->debug_lines, line); - } -#endif p->line_count++; return line; } @@ -6606,7 +6600,7 @@ add_delayed_token(struct parser_params *p, const char *tok, const char *end) #endif static int -nextline(struct parser_params *p) +nextline(struct parser_params *p, int set_encoding) { VALUE v = p->lex.nextline; p->lex.nextline = 0; @@ -6624,6 +6618,12 @@ nextline(struct parser_params *p) lex_goto_eol(p); return -1; } +#ifndef RIPPER + if (p->debug_lines) { + if (set_encoding) rb_enc_associate(v, p->enc); + rb_ary_push(p->debug_lines, v); + } +#endif p->cr_seen = FALSE; } else if (NIL_P(v)) { @@ -6655,12 +6655,12 @@ parser_cr(struct parser_params *p, int c) } static inline int -nextc(struct parser_params *p) +nextc0(struct parser_params *p, int set_encoding) { int c; if (UNLIKELY((p->lex.pcur == p->lex.pend) || p->eofp || RTEST(p->lex.nextline))) { - if (nextline(p)) return -1; + if (nextline(p, set_encoding)) return -1; } c = (unsigned char)*p->lex.pcur++; if (UNLIKELY(c == '\r')) { @@ -6669,6 +6669,7 @@ nextc(struct parser_params *p) return c; } +#define nextc(p) nextc0(p, TRUE) static void pushback(struct parser_params *p, int c) @@ -8455,7 +8456,7 @@ set_file_encoding(struct parser_params *p, const char *str, const char *send) static void parser_prepare(struct parser_params *p) { - int c = nextc(p); + int c = nextc0(p, FALSE); p->token_info_enabled = !compile_for_eval && RTEST(ruby_verbose); switch (c) { case '#': @@ -8467,6 +8468,11 @@ parser_prepare(struct parser_params *p) (unsigned char)p->lex.pcur[1] == 0xbf) { p->enc = rb_utf8_encoding(); p->lex.pcur += 2; +#ifndef RIPPER + if (p->debug_lines) { + rb_enc_associate(p->lex.lastline, p->enc); + } +#endif p->lex.pbeg = p->lex.pcur; return; } diff --git a/test/ruby/test_ast.rb b/test/ruby/test_ast.rb index a4edfd3cbe..cd7299f200 100644 --- a/test/ruby/test_ast.rb +++ b/test/ruby/test_ast.rb @@ -538,6 +538,19 @@ dummy assert_equal("def test_keep_script_lines_for_of\n", node_method.source.lines.first) end + def test_encoding_with_keep_script_lines + enc = Encoding::EUC_JP + code = "__ENCODING__".encode(enc) + + assert_equal(enc, eval(code)) + + node = RubyVM::AbstractSyntaxTree.parse(code, keep_script_lines: false) + assert_equal(enc, node.children[2].children[0]) + + node = RubyVM::AbstractSyntaxTree.parse(code, keep_script_lines: true) + assert_equal(enc, node.children[2].children[0]) + end + def test_e_option assert_in_out_err(["-e", "def foo; end; pp RubyVM::AbstractSyntaxTree.of(method(:foo)).type"], "", [":SCOPE"], []) diff --git a/test/ruby/test_syntax.rb b/test/ruby/test_syntax.rb index 5dc9bbdaeb..53036cab3b 100644 --- a/test/ruby/test_syntax.rb +++ b/test/ruby/test_syntax.rb @@ -66,6 +66,15 @@ class TestSyntax < Test::Unit::TestCase f&.close! end + def test_script_lines_encoding + require 'tmpdir' + Dir.mktmpdir do |dir| + File.write(File.join(dir, "script_lines.rb"), "SCRIPT_LINES__ = {}\n") + assert_in_out_err(%w"-r./script_lines -w -Ke", "puts __ENCODING__.name", + %w"EUC-JP", /-K is specified/, chdir: dir) + end + end + def test_anonymous_block_forwarding assert_syntax_error("def b; c(&); end", /no anonymous block parameter/) assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}") @@ -11,7 +11,7 @@ # define RUBY_VERSION_MINOR RUBY_API_VERSION_MINOR #define RUBY_VERSION_TEENY 4 #define RUBY_RELEASE_DATE RUBY_RELEASE_YEAR_STR"-"RUBY_RELEASE_MONTH_STR"-"RUBY_RELEASE_DAY_STR -#define RUBY_PATCHLEVEL 211 +#define RUBY_PATCHLEVEL 212 #define RUBY_RELEASE_YEAR 2023 #define RUBY_RELEASE_MONTH 3 |