summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeremy Evans <code@jeremyevans.net>2022-12-20 12:44:11 -0800
committerJeremy Evans <code@jeremyevans.net>2022-12-22 11:50:26 -0800
commit7e8fa06022a9e412e3f8e6c8b6f0ba1909f648d5 (patch)
treed677df86e4df121b3b005fa7d91c1a084e7734fc
parent9dcee2d80ee995e11b0fd437d4a94930ccb6db67 (diff)
downloadruby-7e8fa06022a9e412e3f8e6c8b6f0ba1909f648d5.tar.gz
Always issue deprecation warning when calling Regexp.new with 3rd positional argument
Previously, only certain values of the 3rd argument triggered a deprecation warning. First step for fix for bug #18797. Support for the 3rd argument will be removed after the release of Ruby 3.2. Fix minor fallout discovered by the tests. Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
-rw-r--r--lib/racc/statetransitiontable.rb2
-rw-r--r--re.c24
-rw-r--r--spec/ruby/core/regexp/shared/new.rb82
-rw-r--r--test/psych/test_yaml.rb2
-rw-r--r--test/ruby/test_regexp.rb40
5 files changed, 87 insertions, 63 deletions
diff --git a/lib/racc/statetransitiontable.rb b/lib/racc/statetransitiontable.rb
index cae411c98b..d75fa1657a 100644
--- a/lib/racc/statetransitiontable.rb
+++ b/lib/racc/statetransitiontable.rb
@@ -216,7 +216,7 @@ module Racc
end
i = ii
end
- Regexp.compile(map, nil, 'n')
+ Regexp.compile(map, Regexp::NOENCODING)
end
def set_table(entries, dummy, tbl, chk, ptr)
diff --git a/re.c b/re.c
index 837ca87835..7a74318558 100644
--- a/re.c
+++ b/re.c
@@ -3759,10 +3759,11 @@ struct reg_init_args {
static VALUE reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args);
static VALUE reg_init_args(VALUE self, VALUE str, rb_encoding *enc, int flags);
+void rb_warn_deprecated_to_remove(const char *removal, const char *fmt, const char *suggest, ...);
/*
* call-seq:
- * Regexp.new(string, options = 0, n_flag = nil, timeout: nil) -> regexp
+ * Regexp.new(string, options = 0, timeout: nil) -> regexp
* Regexp.new(regexp, timeout: nil) -> regexp
*
* With argument +string+ given, returns a new regexp with the given string
@@ -3780,24 +3781,18 @@ static VALUE reg_init_args(VALUE self, VALUE str, rb_encoding *enc, int flags);
* Regexp.new('foo', 'im') # => /foo/im
*
* - The logical OR of one or more of the constants
- * Regexp::EXTENDED, Regexp::IGNORECASE, and Regexp::MULTILINE:
+ * Regexp::EXTENDED, Regexp::IGNORECASE, Regexp::MULTILINE, and
+ * Regexp::NOENCODING:
*
* Regexp.new('foo', Regexp::IGNORECASE) # => /foo/i
* Regexp.new('foo', Regexp::EXTENDED) # => /foo/x
* Regexp.new('foo', Regexp::MULTILINE) # => /foo/m
+ * Regexp.new('foo', Regexp::NOENCODING) # => /foo/n
* flags = Regexp::IGNORECASE | Regexp::EXTENDED | Regexp::MULTILINE
* Regexp.new('foo', flags) # => /foo/mix
*
* - +nil+ or +false+, which is ignored.
*
- * If optional argument +n_flag+ if it is a string starts with
- * <code>'n'</code> or <code>'N'</code>, the encoding of +string+ is
- * ignored and the new regexp encoding is fixed to +ASCII-8BIT+ or
- * +US-ASCII+, by its content.
- *
- * Regexp.new('foo', nil, 'n') # => /foo/n
- * Regexp.new("\u3042", nil, 'n') # => /\xE3\x81\x82/n
- *
* If optional keyword argument +timeout+ is given,
* its float value overrides the timeout interval for the class,
* Regexp.timeout.
@@ -3841,7 +3836,7 @@ reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args)
VALUE str, src, opts = Qundef, n_flag = Qundef, kwargs;
VALUE re = Qnil;
- rb_scan_args(argc, argv, "12:", &src, &opts, &n_flag, &kwargs);
+ argc = rb_scan_args(argc, argv, "12:", &src, &opts, &n_flag, &kwargs);
args->timeout = Qnil;
if (!NIL_P(kwargs)) {
@@ -3852,6 +3847,10 @@ reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args)
rb_get_kwargs(kwargs, keywords, 0, 1, &args->timeout);
}
+ if (argc == 3) {
+ rb_warn_deprecated_to_remove("3.3", "3rd argument to Regexp.new", "2nd argument");
+ }
+
if (RB_TYPE_P(src, T_REGEXP)) {
re = src;
@@ -3876,9 +3875,6 @@ reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args)
enc = rb_ascii8bit_encoding();
flags |= ARG_ENCODING_NONE;
}
- else {
- rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, "encoding option is ignored - %s", kcode);
- }
}
str = StringValue(src);
}
diff --git a/spec/ruby/core/regexp/shared/new.rb b/spec/ruby/core/regexp/shared/new.rb
index 10c2d3d390..058a51b1aa 100644
--- a/spec/ruby/core/regexp/shared/new.rb
+++ b/spec/ruby/core/regexp/shared/new.rb
@@ -197,48 +197,50 @@ describe :regexp_new_string, shared: true do
end
end
- it "ignores the third argument if it is 'e' or 'euc' (case-insensitive)" do
- -> {
- Regexp.send(@method, 'Hi', nil, 'e').encoding.should == Encoding::US_ASCII
- Regexp.send(@method, 'Hi', nil, 'euc').encoding.should == Encoding::US_ASCII
- Regexp.send(@method, 'Hi', nil, 'E').encoding.should == Encoding::US_ASCII
- Regexp.send(@method, 'Hi', nil, 'EUC').encoding.should == Encoding::US_ASCII
- }.should complain(/encoding option is ignored/)
- end
+ ruby_version_is ""..."3.2" do
+ it "ignores the third argument if it is 'e' or 'euc' (case-insensitive)" do
+ -> {
+ Regexp.send(@method, 'Hi', nil, 'e').encoding.should == Encoding::US_ASCII
+ Regexp.send(@method, 'Hi', nil, 'euc').encoding.should == Encoding::US_ASCII
+ Regexp.send(@method, 'Hi', nil, 'E').encoding.should == Encoding::US_ASCII
+ Regexp.send(@method, 'Hi', nil, 'EUC').encoding.should == Encoding::US_ASCII
+ }.should complain(/encoding option is ignored/)
+ end
- it "ignores the third argument if it is 's' or 'sjis' (case-insensitive)" do
- -> {
- Regexp.send(@method, 'Hi', nil, 's').encoding.should == Encoding::US_ASCII
- Regexp.send(@method, 'Hi', nil, 'sjis').encoding.should == Encoding::US_ASCII
- Regexp.send(@method, 'Hi', nil, 'S').encoding.should == Encoding::US_ASCII
- Regexp.send(@method, 'Hi', nil, 'SJIS').encoding.should == Encoding::US_ASCII
- }.should complain(/encoding option is ignored/)
- end
+ it "ignores the third argument if it is 's' or 'sjis' (case-insensitive)" do
+ -> {
+ Regexp.send(@method, 'Hi', nil, 's').encoding.should == Encoding::US_ASCII
+ Regexp.send(@method, 'Hi', nil, 'sjis').encoding.should == Encoding::US_ASCII
+ Regexp.send(@method, 'Hi', nil, 'S').encoding.should == Encoding::US_ASCII
+ Regexp.send(@method, 'Hi', nil, 'SJIS').encoding.should == Encoding::US_ASCII
+ }.should complain(/encoding option is ignored/)
+ end
- it "ignores the third argument if it is 'u' or 'utf8' (case-insensitive)" do
- -> {
- Regexp.send(@method, 'Hi', nil, 'u').encoding.should == Encoding::US_ASCII
- Regexp.send(@method, 'Hi', nil, 'utf8').encoding.should == Encoding::US_ASCII
- Regexp.send(@method, 'Hi', nil, 'U').encoding.should == Encoding::US_ASCII
- Regexp.send(@method, 'Hi', nil, 'UTF8').encoding.should == Encoding::US_ASCII
- }.should complain(/encoding option is ignored/)
- end
+ it "ignores the third argument if it is 'u' or 'utf8' (case-insensitive)" do
+ -> {
+ Regexp.send(@method, 'Hi', nil, 'u').encoding.should == Encoding::US_ASCII
+ Regexp.send(@method, 'Hi', nil, 'utf8').encoding.should == Encoding::US_ASCII
+ Regexp.send(@method, 'Hi', nil, 'U').encoding.should == Encoding::US_ASCII
+ Regexp.send(@method, 'Hi', nil, 'UTF8').encoding.should == Encoding::US_ASCII
+ }.should complain(/encoding option is ignored/)
+ end
- it "uses US_ASCII encoding if third argument is 'n' or 'none' (case insensitive) and only ascii characters" do
- Regexp.send(@method, 'Hi', nil, 'n').encoding.should == Encoding::US_ASCII
- Regexp.send(@method, 'Hi', nil, 'none').encoding.should == Encoding::US_ASCII
- Regexp.send(@method, 'Hi', nil, 'N').encoding.should == Encoding::US_ASCII
- Regexp.send(@method, 'Hi', nil, 'NONE').encoding.should == Encoding::US_ASCII
- end
+ it "uses US_ASCII encoding if third argument is 'n' or 'none' (case insensitive) and only ascii characters" do
+ Regexp.send(@method, 'Hi', nil, 'n').encoding.should == Encoding::US_ASCII
+ Regexp.send(@method, 'Hi', nil, 'none').encoding.should == Encoding::US_ASCII
+ Regexp.send(@method, 'Hi', nil, 'N').encoding.should == Encoding::US_ASCII
+ Regexp.send(@method, 'Hi', nil, 'NONE').encoding.should == Encoding::US_ASCII
+ end
- it "uses ASCII_8BIT encoding if third argument is 'n' or 'none' (case insensitive) and non-ascii characters" do
- a = "(?:[\x8E\xA1-\xFE])"
- str = "\A(?:#{a}|x*)\z"
+ it "uses ASCII_8BIT encoding if third argument is 'n' or 'none' (case insensitive) and non-ascii characters" do
+ a = "(?:[\x8E\xA1-\xFE])"
+ str = "\A(?:#{a}|x*)\z"
- Regexp.send(@method, str, nil, 'N').encoding.should == Encoding::BINARY
- Regexp.send(@method, str, nil, 'n').encoding.should == Encoding::BINARY
- Regexp.send(@method, str, nil, 'none').encoding.should == Encoding::BINARY
- Regexp.send(@method, str, nil, 'NONE').encoding.should == Encoding::BINARY
+ Regexp.send(@method, str, nil, 'N').encoding.should == Encoding::BINARY
+ Regexp.send(@method, str, nil, 'n').encoding.should == Encoding::BINARY
+ Regexp.send(@method, str, nil, 'none').encoding.should == Encoding::BINARY
+ Regexp.send(@method, str, nil, 'NONE').encoding.should == Encoding::BINARY
+ end
end
describe "with escaped characters" do
@@ -598,8 +600,10 @@ describe :regexp_new_regexp, shared: true do
Regexp.send(@method, /Hi/n).encoding.should == Encoding::US_ASCII
end
- it "sets the encoding to source String's encoding if the Regexp literal has the 'n' option and the source String is not ASCII only" do
- Regexp.send(@method, Regexp.new("\\xff", nil, 'n')).encoding.should == Encoding::BINARY
+ ruby_version_is ''...'3.2' do
+ it "sets the encoding to source String's encoding if the Regexp literal has the 'n' option and the source String is not ASCII only" do
+ Regexp.send(@method, Regexp.new("\\xff", nil, 'n')).encoding.should == Encoding::BINARY
+ end
end
end
end
diff --git a/test/psych/test_yaml.rb b/test/psych/test_yaml.rb
index e12b9769fe..32dd43b98e 100644
--- a/test/psych/test_yaml.rb
+++ b/test/psych/test_yaml.rb
@@ -34,7 +34,7 @@ class Psych_Unit_Tests < Psych::TestCase
# [ruby-core:34969]
def test_regexp_with_n
- assert_cycle(Regexp.new('',0,'n'))
+ assert_cycle(Regexp.new('',Regexp::NOENCODING))
end
#
# Tests modified from 00basic.t in Psych.pm
diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb
index fb2176d889..40d3559c0b 100644
--- a/test/ruby/test_regexp.rb
+++ b/test/ruby/test_regexp.rb
@@ -42,14 +42,14 @@ class TestRegexp < Test::Unit::TestCase
def test_yoshidam_net_20041111_1
s = "[\xC2\xA0-\xC3\xBE]"
- r = assert_deprecated_warning(/ignored/) {Regexp.new(s, nil, "u")}
+ r = assert_deprecated_warning(/3\.3/) {Regexp.new(s, nil, "u")}
assert_match(r, "\xC3\xBE")
end
def test_yoshidam_net_20041111_2
assert_raise(RegexpError) do
s = "[\xFF-\xFF]".force_encoding("utf-8")
- assert_warning(/ignored/) {Regexp.new(s, nil, "u")}
+ assert_warning(/3\.3/) {Regexp.new(s, nil, "u")}
end
end
@@ -646,13 +646,37 @@ class TestRegexp < Test::Unit::TestCase
assert_equal(/foo/, assert_no_warning(/ignored/) {Regexp.new(/foo/)})
assert_equal(/foo/, assert_no_warning(/ignored/) {Regexp.new(/foo/, timeout: nil)})
- assert_equal(Encoding.find("US-ASCII"), Regexp.new("b..", nil, "n").encoding)
- assert_equal("bar", "foobarbaz"[Regexp.new("b..", nil, "n")])
- assert_equal(//n, Regexp.new("", nil, "n"))
+ arg_encoding_none = //n.options # ARG_ENCODING_NONE is implementation defined value
- arg_encoding_none = 32 # ARG_ENCODING_NONE is implementation defined value
- assert_equal(arg_encoding_none, Regexp.new("", nil, "n").options)
- assert_equal(arg_encoding_none, Regexp.new("", nil, "N").options)
+ assert_deprecated_warning('') do
+ assert_equal(Encoding.find("US-ASCII"), Regexp.new("b..", Regexp::NOENCODING).encoding)
+ assert_equal("bar", "foobarbaz"[Regexp.new("b..", Regexp::NOENCODING)])
+ assert_equal(//, Regexp.new(""))
+ assert_equal(//, Regexp.new("", timeout: 1))
+ assert_equal(//n, Regexp.new("", Regexp::NOENCODING))
+ assert_equal(//n, Regexp.new("", Regexp::NOENCODING, timeout: 1))
+
+ assert_equal(arg_encoding_none, Regexp.new("", Regexp::NOENCODING).options)
+ end
+
+ assert_deprecated_warning(/3\.3/) do
+ assert_equal(Encoding.find("US-ASCII"), Regexp.new("b..", nil, "n").encoding)
+ end
+ assert_deprecated_warning(/3\.3/) do
+ assert_equal(Encoding.find("US-ASCII"), Regexp.new("b..", nil, "n", timeout: 1).encoding)
+ end
+ assert_deprecated_warning(/3\.3/) do
+ assert_equal("bar", "foobarbaz"[Regexp.new("b..", nil, "n")])
+ end
+ assert_deprecated_warning(/3\.3/) do
+ assert_equal(//n, Regexp.new("", nil, "n"))
+ end
+ assert_deprecated_warning(/3\.3/) do
+ assert_equal(arg_encoding_none, Regexp.new("", nil, "n").options)
+ end
+ assert_deprecated_warning(/3\.3/) do
+ assert_equal(arg_encoding_none, Regexp.new("", nil, "N").options)
+ end
assert_raise(RegexpError) { Regexp.new(")(") }
assert_raise(RegexpError) { Regexp.new('[\\40000000000') }