diff options
author | Jeremy Evans <code@jeremyevans.net> | 2022-12-20 12:44:11 -0800 |
---|---|---|
committer | Jeremy Evans <code@jeremyevans.net> | 2022-12-22 11:50:26 -0800 |
commit | 7e8fa06022a9e412e3f8e6c8b6f0ba1909f648d5 (patch) | |
tree | d677df86e4df121b3b005fa7d91c1a084e7734fc | |
parent | 9dcee2d80ee995e11b0fd437d4a94930ccb6db67 (diff) | |
download | ruby-7e8fa06022a9e412e3f8e6c8b6f0ba1909f648d5.tar.gz |
Always issue deprecation warning when calling Regexp.new with 3rd positional argument
Previously, only certain values of the 3rd argument triggered a
deprecation warning.
First step for fix for bug #18797. Support for the 3rd argument
will be removed after the release of Ruby 3.2.
Fix minor fallout discovered by the tests.
Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
-rw-r--r-- | lib/racc/statetransitiontable.rb | 2 | ||||
-rw-r--r-- | re.c | 24 | ||||
-rw-r--r-- | spec/ruby/core/regexp/shared/new.rb | 82 | ||||
-rw-r--r-- | test/psych/test_yaml.rb | 2 | ||||
-rw-r--r-- | test/ruby/test_regexp.rb | 40 |
5 files changed, 87 insertions, 63 deletions
diff --git a/lib/racc/statetransitiontable.rb b/lib/racc/statetransitiontable.rb index cae411c98b..d75fa1657a 100644 --- a/lib/racc/statetransitiontable.rb +++ b/lib/racc/statetransitiontable.rb @@ -216,7 +216,7 @@ module Racc end i = ii end - Regexp.compile(map, nil, 'n') + Regexp.compile(map, Regexp::NOENCODING) end def set_table(entries, dummy, tbl, chk, ptr) @@ -3759,10 +3759,11 @@ struct reg_init_args { static VALUE reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args); static VALUE reg_init_args(VALUE self, VALUE str, rb_encoding *enc, int flags); +void rb_warn_deprecated_to_remove(const char *removal, const char *fmt, const char *suggest, ...); /* * call-seq: - * Regexp.new(string, options = 0, n_flag = nil, timeout: nil) -> regexp + * Regexp.new(string, options = 0, timeout: nil) -> regexp * Regexp.new(regexp, timeout: nil) -> regexp * * With argument +string+ given, returns a new regexp with the given string @@ -3780,24 +3781,18 @@ static VALUE reg_init_args(VALUE self, VALUE str, rb_encoding *enc, int flags); * Regexp.new('foo', 'im') # => /foo/im * * - The logical OR of one or more of the constants - * Regexp::EXTENDED, Regexp::IGNORECASE, and Regexp::MULTILINE: + * Regexp::EXTENDED, Regexp::IGNORECASE, Regexp::MULTILINE, and + * Regexp::NOENCODING: * * Regexp.new('foo', Regexp::IGNORECASE) # => /foo/i * Regexp.new('foo', Regexp::EXTENDED) # => /foo/x * Regexp.new('foo', Regexp::MULTILINE) # => /foo/m + * Regexp.new('foo', Regexp::NOENCODING) # => /foo/n * flags = Regexp::IGNORECASE | Regexp::EXTENDED | Regexp::MULTILINE * Regexp.new('foo', flags) # => /foo/mix * * - +nil+ or +false+, which is ignored. * - * If optional argument +n_flag+ if it is a string starts with - * <code>'n'</code> or <code>'N'</code>, the encoding of +string+ is - * ignored and the new regexp encoding is fixed to +ASCII-8BIT+ or - * +US-ASCII+, by its content. - * - * Regexp.new('foo', nil, 'n') # => /foo/n - * Regexp.new("\u3042", nil, 'n') # => /\xE3\x81\x82/n - * * If optional keyword argument +timeout+ is given, * its float value overrides the timeout interval for the class, * Regexp.timeout. @@ -3841,7 +3836,7 @@ reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args) VALUE str, src, opts = Qundef, n_flag = Qundef, kwargs; VALUE re = Qnil; - rb_scan_args(argc, argv, "12:", &src, &opts, &n_flag, &kwargs); + argc = rb_scan_args(argc, argv, "12:", &src, &opts, &n_flag, &kwargs); args->timeout = Qnil; if (!NIL_P(kwargs)) { @@ -3852,6 +3847,10 @@ reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args) rb_get_kwargs(kwargs, keywords, 0, 1, &args->timeout); } + if (argc == 3) { + rb_warn_deprecated_to_remove("3.3", "3rd argument to Regexp.new", "2nd argument"); + } + if (RB_TYPE_P(src, T_REGEXP)) { re = src; @@ -3876,9 +3875,6 @@ reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args) enc = rb_ascii8bit_encoding(); flags |= ARG_ENCODING_NONE; } - else { - rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, "encoding option is ignored - %s", kcode); - } } str = StringValue(src); } diff --git a/spec/ruby/core/regexp/shared/new.rb b/spec/ruby/core/regexp/shared/new.rb index 10c2d3d390..058a51b1aa 100644 --- a/spec/ruby/core/regexp/shared/new.rb +++ b/spec/ruby/core/regexp/shared/new.rb @@ -197,48 +197,50 @@ describe :regexp_new_string, shared: true do end end - it "ignores the third argument if it is 'e' or 'euc' (case-insensitive)" do - -> { - Regexp.send(@method, 'Hi', nil, 'e').encoding.should == Encoding::US_ASCII - Regexp.send(@method, 'Hi', nil, 'euc').encoding.should == Encoding::US_ASCII - Regexp.send(@method, 'Hi', nil, 'E').encoding.should == Encoding::US_ASCII - Regexp.send(@method, 'Hi', nil, 'EUC').encoding.should == Encoding::US_ASCII - }.should complain(/encoding option is ignored/) - end + ruby_version_is ""..."3.2" do + it "ignores the third argument if it is 'e' or 'euc' (case-insensitive)" do + -> { + Regexp.send(@method, 'Hi', nil, 'e').encoding.should == Encoding::US_ASCII + Regexp.send(@method, 'Hi', nil, 'euc').encoding.should == Encoding::US_ASCII + Regexp.send(@method, 'Hi', nil, 'E').encoding.should == Encoding::US_ASCII + Regexp.send(@method, 'Hi', nil, 'EUC').encoding.should == Encoding::US_ASCII + }.should complain(/encoding option is ignored/) + end - it "ignores the third argument if it is 's' or 'sjis' (case-insensitive)" do - -> { - Regexp.send(@method, 'Hi', nil, 's').encoding.should == Encoding::US_ASCII - Regexp.send(@method, 'Hi', nil, 'sjis').encoding.should == Encoding::US_ASCII - Regexp.send(@method, 'Hi', nil, 'S').encoding.should == Encoding::US_ASCII - Regexp.send(@method, 'Hi', nil, 'SJIS').encoding.should == Encoding::US_ASCII - }.should complain(/encoding option is ignored/) - end + it "ignores the third argument if it is 's' or 'sjis' (case-insensitive)" do + -> { + Regexp.send(@method, 'Hi', nil, 's').encoding.should == Encoding::US_ASCII + Regexp.send(@method, 'Hi', nil, 'sjis').encoding.should == Encoding::US_ASCII + Regexp.send(@method, 'Hi', nil, 'S').encoding.should == Encoding::US_ASCII + Regexp.send(@method, 'Hi', nil, 'SJIS').encoding.should == Encoding::US_ASCII + }.should complain(/encoding option is ignored/) + end - it "ignores the third argument if it is 'u' or 'utf8' (case-insensitive)" do - -> { - Regexp.send(@method, 'Hi', nil, 'u').encoding.should == Encoding::US_ASCII - Regexp.send(@method, 'Hi', nil, 'utf8').encoding.should == Encoding::US_ASCII - Regexp.send(@method, 'Hi', nil, 'U').encoding.should == Encoding::US_ASCII - Regexp.send(@method, 'Hi', nil, 'UTF8').encoding.should == Encoding::US_ASCII - }.should complain(/encoding option is ignored/) - end + it "ignores the third argument if it is 'u' or 'utf8' (case-insensitive)" do + -> { + Regexp.send(@method, 'Hi', nil, 'u').encoding.should == Encoding::US_ASCII + Regexp.send(@method, 'Hi', nil, 'utf8').encoding.should == Encoding::US_ASCII + Regexp.send(@method, 'Hi', nil, 'U').encoding.should == Encoding::US_ASCII + Regexp.send(@method, 'Hi', nil, 'UTF8').encoding.should == Encoding::US_ASCII + }.should complain(/encoding option is ignored/) + end - it "uses US_ASCII encoding if third argument is 'n' or 'none' (case insensitive) and only ascii characters" do - Regexp.send(@method, 'Hi', nil, 'n').encoding.should == Encoding::US_ASCII - Regexp.send(@method, 'Hi', nil, 'none').encoding.should == Encoding::US_ASCII - Regexp.send(@method, 'Hi', nil, 'N').encoding.should == Encoding::US_ASCII - Regexp.send(@method, 'Hi', nil, 'NONE').encoding.should == Encoding::US_ASCII - end + it "uses US_ASCII encoding if third argument is 'n' or 'none' (case insensitive) and only ascii characters" do + Regexp.send(@method, 'Hi', nil, 'n').encoding.should == Encoding::US_ASCII + Regexp.send(@method, 'Hi', nil, 'none').encoding.should == Encoding::US_ASCII + Regexp.send(@method, 'Hi', nil, 'N').encoding.should == Encoding::US_ASCII + Regexp.send(@method, 'Hi', nil, 'NONE').encoding.should == Encoding::US_ASCII + end - it "uses ASCII_8BIT encoding if third argument is 'n' or 'none' (case insensitive) and non-ascii characters" do - a = "(?:[\x8E\xA1-\xFE])" - str = "\A(?:#{a}|x*)\z" + it "uses ASCII_8BIT encoding if third argument is 'n' or 'none' (case insensitive) and non-ascii characters" do + a = "(?:[\x8E\xA1-\xFE])" + str = "\A(?:#{a}|x*)\z" - Regexp.send(@method, str, nil, 'N').encoding.should == Encoding::BINARY - Regexp.send(@method, str, nil, 'n').encoding.should == Encoding::BINARY - Regexp.send(@method, str, nil, 'none').encoding.should == Encoding::BINARY - Regexp.send(@method, str, nil, 'NONE').encoding.should == Encoding::BINARY + Regexp.send(@method, str, nil, 'N').encoding.should == Encoding::BINARY + Regexp.send(@method, str, nil, 'n').encoding.should == Encoding::BINARY + Regexp.send(@method, str, nil, 'none').encoding.should == Encoding::BINARY + Regexp.send(@method, str, nil, 'NONE').encoding.should == Encoding::BINARY + end end describe "with escaped characters" do @@ -598,8 +600,10 @@ describe :regexp_new_regexp, shared: true do Regexp.send(@method, /Hi/n).encoding.should == Encoding::US_ASCII end - it "sets the encoding to source String's encoding if the Regexp literal has the 'n' option and the source String is not ASCII only" do - Regexp.send(@method, Regexp.new("\\xff", nil, 'n')).encoding.should == Encoding::BINARY + ruby_version_is ''...'3.2' do + it "sets the encoding to source String's encoding if the Regexp literal has the 'n' option and the source String is not ASCII only" do + Regexp.send(@method, Regexp.new("\\xff", nil, 'n')).encoding.should == Encoding::BINARY + end end end end diff --git a/test/psych/test_yaml.rb b/test/psych/test_yaml.rb index e12b9769fe..32dd43b98e 100644 --- a/test/psych/test_yaml.rb +++ b/test/psych/test_yaml.rb @@ -34,7 +34,7 @@ class Psych_Unit_Tests < Psych::TestCase # [ruby-core:34969] def test_regexp_with_n - assert_cycle(Regexp.new('',0,'n')) + assert_cycle(Regexp.new('',Regexp::NOENCODING)) end # # Tests modified from 00basic.t in Psych.pm diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb index fb2176d889..40d3559c0b 100644 --- a/test/ruby/test_regexp.rb +++ b/test/ruby/test_regexp.rb @@ -42,14 +42,14 @@ class TestRegexp < Test::Unit::TestCase def test_yoshidam_net_20041111_1 s = "[\xC2\xA0-\xC3\xBE]" - r = assert_deprecated_warning(/ignored/) {Regexp.new(s, nil, "u")} + r = assert_deprecated_warning(/3\.3/) {Regexp.new(s, nil, "u")} assert_match(r, "\xC3\xBE") end def test_yoshidam_net_20041111_2 assert_raise(RegexpError) do s = "[\xFF-\xFF]".force_encoding("utf-8") - assert_warning(/ignored/) {Regexp.new(s, nil, "u")} + assert_warning(/3\.3/) {Regexp.new(s, nil, "u")} end end @@ -646,13 +646,37 @@ class TestRegexp < Test::Unit::TestCase assert_equal(/foo/, assert_no_warning(/ignored/) {Regexp.new(/foo/)}) assert_equal(/foo/, assert_no_warning(/ignored/) {Regexp.new(/foo/, timeout: nil)}) - assert_equal(Encoding.find("US-ASCII"), Regexp.new("b..", nil, "n").encoding) - assert_equal("bar", "foobarbaz"[Regexp.new("b..", nil, "n")]) - assert_equal(//n, Regexp.new("", nil, "n")) + arg_encoding_none = //n.options # ARG_ENCODING_NONE is implementation defined value - arg_encoding_none = 32 # ARG_ENCODING_NONE is implementation defined value - assert_equal(arg_encoding_none, Regexp.new("", nil, "n").options) - assert_equal(arg_encoding_none, Regexp.new("", nil, "N").options) + assert_deprecated_warning('') do + assert_equal(Encoding.find("US-ASCII"), Regexp.new("b..", Regexp::NOENCODING).encoding) + assert_equal("bar", "foobarbaz"[Regexp.new("b..", Regexp::NOENCODING)]) + assert_equal(//, Regexp.new("")) + assert_equal(//, Regexp.new("", timeout: 1)) + assert_equal(//n, Regexp.new("", Regexp::NOENCODING)) + assert_equal(//n, Regexp.new("", Regexp::NOENCODING, timeout: 1)) + + assert_equal(arg_encoding_none, Regexp.new("", Regexp::NOENCODING).options) + end + + assert_deprecated_warning(/3\.3/) do + assert_equal(Encoding.find("US-ASCII"), Regexp.new("b..", nil, "n").encoding) + end + assert_deprecated_warning(/3\.3/) do + assert_equal(Encoding.find("US-ASCII"), Regexp.new("b..", nil, "n", timeout: 1).encoding) + end + assert_deprecated_warning(/3\.3/) do + assert_equal("bar", "foobarbaz"[Regexp.new("b..", nil, "n")]) + end + assert_deprecated_warning(/3\.3/) do + assert_equal(//n, Regexp.new("", nil, "n")) + end + assert_deprecated_warning(/3\.3/) do + assert_equal(arg_encoding_none, Regexp.new("", nil, "n").options) + end + assert_deprecated_warning(/3\.3/) do + assert_equal(arg_encoding_none, Regexp.new("", nil, "N").options) + end assert_raise(RegexpError) { Regexp.new(")(") } assert_raise(RegexpError) { Regexp.new('[\\40000000000') } |