From d5f0d338c7b5d3d64929b51d29061d369550e8c4 Mon Sep 17 00:00:00 2001 From: Marc-Andre Lafortune Date: Tue, 8 Dec 2020 21:20:37 -0500 Subject: Optimize `Enumerable#grep{_v}` [Bug #17030] --- NEWS.md | 4 +++ enum.c | 52 ++++++++++++++++++++++++-------- spec/ruby/core/enumerable/grep_spec.rb | 24 ++++++++++----- spec/ruby/core/enumerable/grep_v_spec.rb | 24 ++++++++++----- test/ruby/test_enum.rb | 21 +++++++++++++ 5 files changed, 99 insertions(+), 26 deletions(-) diff --git a/NEWS.md b/NEWS.md index f5da2fadef..817d591a27 100644 --- a/NEWS.md +++ b/NEWS.md @@ -452,6 +452,9 @@ Excluding feature bug fixes. * Integer#zero? overrides Numeric#zero? for optimization. [[Misc #16961]] +* Enumerable#grep and grep_v when passed a Regexp and no block no longer modify + Regexp.last_match [[Bug #17030]] + * Requiring 'open-uri' no longer redefines `Kernel#open`. Call `URI.open` directly or `use URI#open` instead. [[Misc #15893]] @@ -682,3 +685,4 @@ end [Feature #17351]: https://bugs.ruby-lang.org/issues/17351 [Feature #17371]: https://bugs.ruby-lang.org/issues/17371 [GH-2991]: https://github.com/ruby/ruby/pull/2991 +[Bug #17030]: https://bugs.ruby-lang.org/issues/17030 diff --git a/enum.c b/enum.c index d21becd8fa..6f47921dea 100644 --- a/enum.c +++ b/enum.c @@ -19,6 +19,7 @@ #include "internal/object.h" #include "internal/proc.h" #include "internal/rational.h" +#include "internal/re.h" #include "ruby/util.h" #include "ruby_assert.h" #include "symbol.h" @@ -81,6 +82,22 @@ grep_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, args)) return Qnil; } +static VALUE +grep_regexp_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, args)) +{ + struct MEMO *memo = MEMO_CAST(args); + VALUE converted_element, match; + ENUM_WANT_SVALUE(); + + /* In case element can't be converted to a Symbol or String: not a match (don't raise) */ + converted_element = SYMBOL_P(i) ? i : rb_check_string_type(i); + match = NIL_P(converted_element) ? Qfalse : rb_reg_match_p(memo->v1, i, 0); + if (match == memo->u3.value) { + rb_ary_push(memo->v2, i); + } + return Qnil; +} + static VALUE grep_iter_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, args)) { @@ -93,6 +110,27 @@ grep_iter_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, args)) return Qnil; } +static VALUE +enum_grep0(VALUE obj, VALUE pat, VALUE test) +{ + VALUE ary = rb_ary_new(); + struct MEMO *memo = MEMO_NEW(pat, ary, test); + rb_block_call_func_t fn; + if (rb_block_given_p()) { + fn = grep_iter_i; + } + else if (RB_TYPE_P(pat, T_REGEXP) && + LIKELY(rb_method_basic_definition_p(CLASS_OF(pat), idEqq))) { + fn = grep_regexp_i; + } + else { + fn = grep_i; + } + rb_block_call(obj, id_each, 0, 0, fn, (VALUE)memo); + + return ary; +} + /* * call-seq: * enum.grep(pattern) -> array @@ -114,12 +152,7 @@ grep_iter_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, args)) static VALUE enum_grep(VALUE obj, VALUE pat) { - VALUE ary = rb_ary_new(); - struct MEMO *memo = MEMO_NEW(pat, ary, Qtrue); - - rb_block_call(obj, id_each, 0, 0, rb_block_given_p() ? grep_iter_i : grep_i, (VALUE)memo); - - return ary; + return enum_grep0(obj, pat, Qtrue); } /* @@ -140,12 +173,7 @@ enum_grep(VALUE obj, VALUE pat) static VALUE enum_grep_v(VALUE obj, VALUE pat) { - VALUE ary = rb_ary_new(); - struct MEMO *memo = MEMO_NEW(pat, ary, Qfalse); - - rb_block_call(obj, id_each, 0, 0, rb_block_given_p() ? grep_iter_i : grep_i, (VALUE)memo); - - return ary; + return enum_grep0(obj, pat, Qfalse); } #define COUNT_BIGNUM IMEMO_FL_USER0 diff --git a/spec/ruby/core/enumerable/grep_spec.rb b/spec/ruby/core/enumerable/grep_spec.rb index c9c0f34e27..4e66bb85ea 100644 --- a/spec/ruby/core/enumerable/grep_spec.rb +++ b/spec/ruby/core/enumerable/grep_spec.rb @@ -40,15 +40,25 @@ describe "Enumerable#grep" do $~.should == nil end - it "sets $~ to the last match when given no block" do - "z" =~ /z/ # Reset $~ - ["abc", "def"].grep(/b/).should == ["abc"] + ruby_version_is ""..."3.0.0" do + it "sets $~ to the last match when given no block" do + "z" =~ /z/ # Reset $~ + ["abc", "def"].grep(/b/).should == ["abc"] - # Set by the failed match of "def" - $~.should == nil + # Set by the failed match of "def" + $~.should == nil - ["abc", "def"].grep(/e/) - $&.should == "e" + ["abc", "def"].grep(/e/) + $&.should == "e" + end + end + + ruby_version_is "3.0.0" do + it "does not set $~ when given no block" do + "z" =~ /z/ # Reset $~ + ["abc", "def"].grep(/b/).should == ["abc"] + $&.should == "z" + end end describe "with a block" do diff --git a/spec/ruby/core/enumerable/grep_v_spec.rb b/spec/ruby/core/enumerable/grep_v_spec.rb index 6dec487065..8cae3ac618 100644 --- a/spec/ruby/core/enumerable/grep_v_spec.rb +++ b/spec/ruby/core/enumerable/grep_v_spec.rb @@ -20,15 +20,25 @@ describe "Enumerable#grep_v" do $&.should == "e" end - it "sets $~ to the last match when given no block" do - "z" =~ /z/ # Reset $~ - ["abc", "def"].grep_v(/e/).should == ["abc"] + ruby_version_is ""..."3.0.0" do + it "sets $~ to the last match when given no block" do + "z" =~ /z/ # Reset $~ + ["abc", "def"].grep_v(/e/).should == ["abc"] - # Set by the match of "def" - $&.should == "e" + # Set by the match of "def" + $&.should == "e" - ["abc", "def"].grep_v(/b/) - $&.should == nil + ["abc", "def"].grep_v(/b/) + $&.should == nil + end + end + + ruby_version_is "3.0.0" do + it "does not set $~ when given no block" do + "z" =~ /z/ # Reset $~ + ["abc", "def"].grep_v(/e/).should == ["abc"] + $&.should == "z" + end end describe "without block" do diff --git a/test/ruby/test_enum.rb b/test/ruby/test_enum.rb index 8d30b343a8..aa93b95c2a 100644 --- a/test/ruby/test_enum.rb +++ b/test/ruby/test_enum.rb @@ -63,6 +63,27 @@ class TestEnumerable < Test::Unit::TestCase assert_equal([[2, 1], [2, 4]], a) end + def test_grep_optimization + bug17030 = '[ruby-core:99156]' + 'set last match' =~ /set last (.*)/ + assert_equal([:a, 'b', :c], [:a, 'b', 'z', :c, 42, nil].grep(/[a-d]/)) + assert_equal(['z', 42, nil], [:a, 'b', 'z', :c, 42, nil].grep_v(/[a-d]/)) + assert_equal('match', $1) + + regexp = Regexp.new('x') + assert_equal([], @obj.grep(regexp)) # sanity check + def regexp.===(other) + true + end + assert_equal([1, 2, 3, 1, 2], @obj.grep(regexp)) + + o = Object.new + def o.to_str + 'hello' + end + assert_same(o, [o].grep(/ll/).first) + end + def test_count assert_equal(5, @obj.count) assert_equal(2, @obj.count(1)) -- cgit v1.2.1