summaryrefslogtreecommitdiff
path: root/lib/gitlab/untrusted_regexp.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/gitlab/untrusted_regexp.rb')
-rw-r--r--lib/gitlab/untrusted_regexp.rb49
1 files changed, 49 insertions, 0 deletions
diff --git a/lib/gitlab/untrusted_regexp.rb b/lib/gitlab/untrusted_regexp.rb
new file mode 100644
index 00000000000..7ce2e9d636e
--- /dev/null
+++ b/lib/gitlab/untrusted_regexp.rb
@@ -0,0 +1,49 @@
+module Gitlab
+ # An untrusted regular expression is any regexp containing patterns sourced
+ # from user input.
+ #
+ # Ruby's built-in regular expression library allows patterns which complete in
+ # exponential time, permitting denial-of-service attacks.
+ #
+ # Not all regular expression features are available in untrusted regexes, and
+ # there is a strict limit on total execution time. See the RE2 documentation
+ # at https://github.com/google/re2/wiki/Syntax for more details.
+ class UntrustedRegexp
+ delegate :===, to: :regexp
+
+ def initialize(pattern)
+ @regexp = RE2::Regexp.new(pattern, log_errors: false)
+
+ raise RegexpError.new(regexp.error) unless regexp.ok?
+ end
+
+ def replace_all(text, rewrite)
+ RE2.GlobalReplace(text, regexp, rewrite)
+ end
+
+ def scan(text)
+ matches = scan_regexp.scan(text).to_a
+ matches.map!(&:first) if regexp.number_of_capturing_groups.zero?
+ matches
+ end
+
+ def replace(text, rewrite)
+ RE2.Replace(text, regexp, rewrite)
+ end
+
+ private
+
+ attr_reader :regexp
+
+ # RE2 scan operates differently to Ruby scan when there are no capture
+ # groups, so work around it
+ def scan_regexp
+ @scan_regexp ||=
+ if regexp.number_of_capturing_groups.zero?
+ RE2::Regexp.new('(' + regexp.source + ')')
+ else
+ regexp
+ end
+ end
+ end
+end