summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornagachika <nagachika@ruby-lang.org>2023-03-25 10:33:20 +0900
committernagachika <nagachika@ruby-lang.org>2023-03-25 10:33:20 +0900
commit8c99882dad0b6fedbd656889829a3780366bd8b6 (patch)
treeba97c1f87dbd18df1fa04ce65a382f9ed07b72d7
parent477ab3f6c7f14c79e13f694463aa06a59206dda1 (diff)
downloadruby-8c99882dad0b6fedbd656889829a3780366bd8b6.tar.gz
merge revision(s) cd5cafa4a380e2459862b6e99ff0c381362ef1be: [Backport #18827]
Respect the encoding of the source [Bug #18827] Do not override the input string encoding at the time of preparation, the source encoding is not determined from the input yet. --- parse.y | 26 ++++++++++++++++---------- test/ruby/test_ast.rb | 13 +++++++++++++ test/ruby/test_syntax.rb | 9 +++++++++ 3 files changed, 38 insertions(+), 10 deletions(-)
-rw-r--r--parse.y26
-rw-r--r--test/ruby/test_ast.rb13
-rw-r--r--test/ruby/test_syntax.rb9
-rw-r--r--version.h2
4 files changed, 39 insertions, 11 deletions
diff --git a/parse.y b/parse.y
index 27890ff4e8..11b158954d 100644
--- a/parse.y
+++ b/parse.y
@@ -6454,12 +6454,6 @@ lex_getline(struct parser_params *p)
if (NIL_P(line)) return line;
must_be_ascii_compatible(line);
if (RB_OBJ_FROZEN(line)) line = rb_str_dup(line); // needed for RubyVM::AST.of because script_lines in iseq is deep-frozen
-#ifndef RIPPER
- if (p->debug_lines) {
- rb_enc_associate(line, p->enc);
- rb_ary_push(p->debug_lines, line);
- }
-#endif
p->line_count++;
return line;
}
@@ -6606,7 +6600,7 @@ add_delayed_token(struct parser_params *p, const char *tok, const char *end)
#endif
static int
-nextline(struct parser_params *p)
+nextline(struct parser_params *p, int set_encoding)
{
VALUE v = p->lex.nextline;
p->lex.nextline = 0;
@@ -6624,6 +6618,12 @@ nextline(struct parser_params *p)
lex_goto_eol(p);
return -1;
}
+#ifndef RIPPER
+ if (p->debug_lines) {
+ if (set_encoding) rb_enc_associate(v, p->enc);
+ rb_ary_push(p->debug_lines, v);
+ }
+#endif
p->cr_seen = FALSE;
}
else if (NIL_P(v)) {
@@ -6655,12 +6655,12 @@ parser_cr(struct parser_params *p, int c)
}
static inline int
-nextc(struct parser_params *p)
+nextc0(struct parser_params *p, int set_encoding)
{
int c;
if (UNLIKELY((p->lex.pcur == p->lex.pend) || p->eofp || RTEST(p->lex.nextline))) {
- if (nextline(p)) return -1;
+ if (nextline(p, set_encoding)) return -1;
}
c = (unsigned char)*p->lex.pcur++;
if (UNLIKELY(c == '\r')) {
@@ -6669,6 +6669,7 @@ nextc(struct parser_params *p)
return c;
}
+#define nextc(p) nextc0(p, TRUE)
static void
pushback(struct parser_params *p, int c)
@@ -8455,7 +8456,7 @@ set_file_encoding(struct parser_params *p, const char *str, const char *send)
static void
parser_prepare(struct parser_params *p)
{
- int c = nextc(p);
+ int c = nextc0(p, FALSE);
p->token_info_enabled = !compile_for_eval && RTEST(ruby_verbose);
switch (c) {
case '#':
@@ -8467,6 +8468,11 @@ parser_prepare(struct parser_params *p)
(unsigned char)p->lex.pcur[1] == 0xbf) {
p->enc = rb_utf8_encoding();
p->lex.pcur += 2;
+#ifndef RIPPER
+ if (p->debug_lines) {
+ rb_enc_associate(p->lex.lastline, p->enc);
+ }
+#endif
p->lex.pbeg = p->lex.pcur;
return;
}
diff --git a/test/ruby/test_ast.rb b/test/ruby/test_ast.rb
index a4edfd3cbe..cd7299f200 100644
--- a/test/ruby/test_ast.rb
+++ b/test/ruby/test_ast.rb
@@ -538,6 +538,19 @@ dummy
assert_equal("def test_keep_script_lines_for_of\n", node_method.source.lines.first)
end
+ def test_encoding_with_keep_script_lines
+ enc = Encoding::EUC_JP
+ code = "__ENCODING__".encode(enc)
+
+ assert_equal(enc, eval(code))
+
+ node = RubyVM::AbstractSyntaxTree.parse(code, keep_script_lines: false)
+ assert_equal(enc, node.children[2].children[0])
+
+ node = RubyVM::AbstractSyntaxTree.parse(code, keep_script_lines: true)
+ assert_equal(enc, node.children[2].children[0])
+ end
+
def test_e_option
assert_in_out_err(["-e", "def foo; end; pp RubyVM::AbstractSyntaxTree.of(method(:foo)).type"],
"", [":SCOPE"], [])
diff --git a/test/ruby/test_syntax.rb b/test/ruby/test_syntax.rb
index 5dc9bbdaeb..53036cab3b 100644
--- a/test/ruby/test_syntax.rb
+++ b/test/ruby/test_syntax.rb
@@ -66,6 +66,15 @@ class TestSyntax < Test::Unit::TestCase
f&.close!
end
+ def test_script_lines_encoding
+ require 'tmpdir'
+ Dir.mktmpdir do |dir|
+ File.write(File.join(dir, "script_lines.rb"), "SCRIPT_LINES__ = {}\n")
+ assert_in_out_err(%w"-r./script_lines -w -Ke", "puts __ENCODING__.name",
+ %w"EUC-JP", /-K is specified/, chdir: dir)
+ end
+ end
+
def test_anonymous_block_forwarding
assert_syntax_error("def b; c(&); end", /no anonymous block parameter/)
assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}")
diff --git a/version.h b/version.h
index 0f28e33d86..5688813a20 100644
--- a/version.h
+++ b/version.h
@@ -11,7 +11,7 @@
# define RUBY_VERSION_MINOR RUBY_API_VERSION_MINOR
#define RUBY_VERSION_TEENY 4
#define RUBY_RELEASE_DATE RUBY_RELEASE_YEAR_STR"-"RUBY_RELEASE_MONTH_STR"-"RUBY_RELEASE_DAY_STR
-#define RUBY_PATCHLEVEL 211
+#define RUBY_PATCHLEVEL 212
#define RUBY_RELEASE_YEAR 2023
#define RUBY_RELEASE_MONTH 3