diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/banzai/filter/redactor_filter.rb | 6 | ||||
-rw-r--r-- | lib/banzai/filter/reference_filter.rb | 6 | ||||
-rw-r--r-- | lib/banzai/filter/reference_gatherer_filter.rb | 8 | ||||
-rw-r--r-- | lib/banzai/filter/user_reference_filter.rb | 14 | ||||
-rw-r--r-- | lib/gitlab/metrics.rb | 64 | ||||
-rw-r--r-- | lib/gitlab/metrics/delta.rb | 32 | ||||
-rw-r--r-- | lib/gitlab/metrics/instrumentation.rb | 146 | ||||
-rw-r--r-- | lib/gitlab/metrics/metric.rb | 34 | ||||
-rw-r--r-- | lib/gitlab/metrics/obfuscated_sql.rb | 47 | ||||
-rw-r--r-- | lib/gitlab/metrics/rack_middleware.rb | 49 | ||||
-rw-r--r-- | lib/gitlab/metrics/sampler.rb | 98 | ||||
-rw-r--r-- | lib/gitlab/metrics/sidekiq_middleware.rb | 30 | ||||
-rw-r--r-- | lib/gitlab/metrics/subscribers/action_view.rb | 53 | ||||
-rw-r--r-- | lib/gitlab/metrics/subscribers/active_record.rb | 48 | ||||
-rw-r--r-- | lib/gitlab/metrics/system.rb | 35 | ||||
-rw-r--r-- | lib/gitlab/metrics/transaction.rb | 66 | ||||
-rw-r--r-- | lib/gitlab/reference_extractor.rb | 19 |
17 files changed, 742 insertions, 13 deletions
diff --git a/lib/banzai/filter/redactor_filter.rb b/lib/banzai/filter/redactor_filter.rb index 89e7a79789a..f01a32b5ae5 100644 --- a/lib/banzai/filter/redactor_filter.rb +++ b/lib/banzai/filter/redactor_filter.rb @@ -11,7 +11,7 @@ module Banzai class RedactorFilter < HTML::Pipeline::Filter def call doc.css('a.gfm').each do |node| - unless user_can_reference?(node) + unless user_can_see_reference?(node) # The reference should be replaced by the original text, # which is not always the same as the rendered text. text = node.attr('data-original') || node.text @@ -24,12 +24,12 @@ module Banzai private - def user_can_reference?(node) + def user_can_see_reference?(node) if node.has_attribute?('data-reference-filter') reference_type = node.attr('data-reference-filter') reference_filter = Banzai::Filter.const_get(reference_type) - reference_filter.user_can_reference?(current_user, node, context) + reference_filter.user_can_see_reference?(current_user, node, context) else true end diff --git a/lib/banzai/filter/reference_filter.rb b/lib/banzai/filter/reference_filter.rb index a22a7a7afd3..8ca05ace88c 100644 --- a/lib/banzai/filter/reference_filter.rb +++ b/lib/banzai/filter/reference_filter.rb @@ -12,7 +12,7 @@ module Banzai # :project (required) - Current project, ignored if reference is cross-project. # :only_path - Generate path-only links. class ReferenceFilter < HTML::Pipeline::Filter - def self.user_can_reference?(user, node, context) + def self.user_can_see_reference?(user, node, context) if node.has_attribute?('data-project') project_id = node.attr('data-project').to_i return true if project_id == context[:project].try(:id) @@ -24,6 +24,10 @@ module Banzai end end + def self.user_can_reference?(user, node, context) + true + end + def self.referenced_by(node) raise NotImplementedError, "#{self} does not implement #{__method__}" end diff --git a/lib/banzai/filter/reference_gatherer_filter.rb b/lib/banzai/filter/reference_gatherer_filter.rb index 855f238ac1e..12412ff7ea9 100644 --- a/lib/banzai/filter/reference_gatherer_filter.rb +++ b/lib/banzai/filter/reference_gatherer_filter.rb @@ -35,7 +35,9 @@ module Banzai return if context[:reference_filter] && reference_filter != context[:reference_filter] - return unless reference_filter.user_can_reference?(current_user, node, context) + return if author && !reference_filter.user_can_reference?(author, node, context) + + return unless reference_filter.user_can_see_reference?(current_user, node, context) references = reference_filter.referenced_by(node) return unless references @@ -57,6 +59,10 @@ module Banzai def current_user context[:current_user] end + + def author + context[:author] + end end end end diff --git a/lib/banzai/filter/user_reference_filter.rb b/lib/banzai/filter/user_reference_filter.rb index 7f302d51dd7..964ab60f614 100644 --- a/lib/banzai/filter/user_reference_filter.rb +++ b/lib/banzai/filter/user_reference_filter.rb @@ -39,7 +39,7 @@ module Banzai end end - def self.user_can_reference?(user, node, context) + def self.user_can_see_reference?(user, node, context) if node.has_attribute?('data-group') group = Group.find(node.attr('data-group')) rescue nil Ability.abilities.allowed?(user, :read_group, group) @@ -48,6 +48,18 @@ module Banzai end end + def self.user_can_reference?(user, node, context) + # Only team members can reference `@all` + if node.has_attribute?('data-project') + project = Project.find(node.attr('data-project')) rescue nil + return false unless project + + user && project.team.member?(user) + else + super + end + end + def call replace_text_nodes_matching(User.reference_pattern) do |content| user_link_filter(content) diff --git a/lib/gitlab/metrics.rb b/lib/gitlab/metrics.rb new file mode 100644 index 00000000000..d6f60732455 --- /dev/null +++ b/lib/gitlab/metrics.rb @@ -0,0 +1,64 @@ +module Gitlab + module Metrics + RAILS_ROOT = Rails.root.to_s + METRICS_ROOT = Rails.root.join('lib', 'gitlab', 'metrics').to_s + PATH_REGEX = /^#{RAILS_ROOT}\/?/ + + def self.pool_size + Settings.metrics['pool_size'] || 16 + end + + def self.timeout + Settings.metrics['timeout'] || 10 + end + + def self.enabled? + !!Settings.metrics['enabled'] + end + + def self.mri? + RUBY_ENGINE == 'ruby' + end + + def self.method_call_threshold + Settings.metrics['method_call_threshold'] || 10 + end + + def self.pool + @pool + end + + def self.hostname + @hostname + end + + # Returns a relative path and line number based on the last application call + # frame. + def self.last_relative_application_frame + frame = caller_locations.find do |l| + l.path.start_with?(RAILS_ROOT) && !l.path.start_with?(METRICS_ROOT) + end + + if frame + return frame.path.sub(PATH_REGEX, ''), frame.lineno + else + return nil, nil + end + end + + @hostname = Socket.gethostname + + # When enabled this should be set before being used as the usual pattern + # "@foo ||= bar" is _not_ thread-safe. + if enabled? + @pool = ConnectionPool.new(size: pool_size, timeout: timeout) do + host = Settings.metrics['host'] + db = Settings.metrics['database'] + user = Settings.metrics['username'] + pw = Settings.metrics['password'] + + InfluxDB::Client.new(db, host: host, username: user, password: pw) + end + end + end +end diff --git a/lib/gitlab/metrics/delta.rb b/lib/gitlab/metrics/delta.rb new file mode 100644 index 00000000000..bcf28eed84d --- /dev/null +++ b/lib/gitlab/metrics/delta.rb @@ -0,0 +1,32 @@ +module Gitlab + module Metrics + # Class for calculating the difference between two numeric values. + # + # Every call to `compared_with` updates the internal value. This makes it + # possible to use a single Delta instance to calculate the delta over time + # of an ever increasing number. + # + # Example usage: + # + # delta = Delta.new(0) + # + # delta.compared_with(10) # => 10 + # delta.compared_with(15) # => 5 + # delta.compared_with(20) # => 5 + class Delta + def initialize(value = 0) + @value = value + end + + # new_value - The value to compare with as a Numeric. + # + # Returns a new Numeric (depending on the type of `new_value`). + def compared_with(new_value) + delta = new_value - @value + @value = new_value + + delta + end + end + end +end diff --git a/lib/gitlab/metrics/instrumentation.rb b/lib/gitlab/metrics/instrumentation.rb new file mode 100644 index 00000000000..06fc2f25948 --- /dev/null +++ b/lib/gitlab/metrics/instrumentation.rb @@ -0,0 +1,146 @@ +module Gitlab + module Metrics + # Module for instrumenting methods. + # + # This module allows instrumenting of methods without having to actually + # alter the target code (e.g. by including modules). + # + # Example usage: + # + # Gitlab::Metrics::Instrumentation.instrument_method(User, :by_login) + module Instrumentation + SERIES = 'method_calls' + + def self.configure + yield self + end + + # Instruments a class method. + # + # mod - The module to instrument as a Module/Class. + # name - The name of the method to instrument. + def self.instrument_method(mod, name) + instrument(:class, mod, name) + end + + # Instruments an instance method. + # + # mod - The module to instrument as a Module/Class. + # name - The name of the method to instrument. + def self.instrument_instance_method(mod, name) + instrument(:instance, mod, name) + end + + # Recursively instruments all subclasses of the given root module. + # + # This can be used to for example instrument all ActiveRecord models (as + # these all inherit from ActiveRecord::Base). + # + # This method can optionally take a block to pass to `instrument_methods` + # and `instrument_instance_methods`. + # + # root - The root module for which to instrument subclasses. The root + # module itself is not instrumented. + def self.instrument_class_hierarchy(root, &block) + visit = root.subclasses + + until visit.empty? + klass = visit.pop + + instrument_methods(klass, &block) + instrument_instance_methods(klass, &block) + + klass.subclasses.each { |c| visit << c } + end + end + + # Instruments all public methods of a module. + # + # This method optionally takes a block that can be used to determine if a + # method should be instrumented or not. The block is passed the receiving + # module and an UnboundMethod. If the block returns a non truthy value the + # method is not instrumented. + # + # mod - The module to instrument. + def self.instrument_methods(mod) + mod.public_methods(false).each do |name| + method = mod.method(name) + + if method.owner == mod.singleton_class + if !block_given? || block_given? && yield(mod, method) + instrument_method(mod, name) + end + end + end + end + + # Instruments all public instance methods of a module. + # + # See `instrument_methods` for more information. + # + # mod - The module to instrument. + def self.instrument_instance_methods(mod) + mod.public_instance_methods(false).each do |name| + method = mod.instance_method(name) + + if method.owner == mod + if !block_given? || block_given? && yield(mod, method) + instrument_instance_method(mod, name) + end + end + end + end + + # Instruments a method. + # + # type - The type (:class or :instance) of method to instrument. + # mod - The module containing the method. + # name - The name of the method to instrument. + def self.instrument(type, mod, name) + return unless Metrics.enabled? + + name = name.to_sym + alias_name = :"_original_#{name}" + target = type == :instance ? mod : mod.singleton_class + + if type == :instance + target = mod + label = "#{mod.name}##{name}" + else + target = mod.singleton_class + label = "#{mod.name}.#{name}" + end + + target.class_eval <<-EOF, __FILE__, __LINE__ + 1 + alias_method #{alias_name.inspect}, #{name.inspect} + + def #{name}(*args, &block) + trans = Gitlab::Metrics::Instrumentation.transaction + + if trans + start = Time.now + retval = __send__(#{alias_name.inspect}, *args, &block) + duration = (Time.now - start) * 1000.0 + + if duration >= Gitlab::Metrics.method_call_threshold + trans.add_metric(Gitlab::Metrics::Instrumentation::SERIES, + { duration: duration }, + method: #{label.inspect}) + end + + retval + else + __send__(#{alias_name.inspect}, *args, &block) + end + end + EOF + end + + # Small layer of indirection to make it easier to stub out the current + # transaction. + def self.transaction + Transaction.current + end + end + end +end diff --git a/lib/gitlab/metrics/metric.rb b/lib/gitlab/metrics/metric.rb new file mode 100644 index 00000000000..f592f4e571f --- /dev/null +++ b/lib/gitlab/metrics/metric.rb @@ -0,0 +1,34 @@ +module Gitlab + module Metrics + # Class for storing details of a single metric (label, value, etc). + class Metric + attr_reader :series, :values, :tags, :created_at + + # series - The name of the series (as a String) to store the metric in. + # values - A Hash containing the values to store. + # tags - A Hash containing extra tags to add to the metrics. + def initialize(series, values, tags = {}) + @values = values + @series = series + @tags = tags + @created_at = Time.now.utc + end + + # Returns a Hash in a format that can be directly written to InfluxDB. + def to_hash + { + series: @series, + tags: @tags.merge( + hostname: Metrics.hostname, + ruby_engine: RUBY_ENGINE, + ruby_version: RUBY_VERSION, + gitlab_version: Gitlab::VERSION, + process_type: Sidekiq.server? ? 'sidekiq' : 'rails' + ), + values: @values, + timestamp: @created_at.to_i + } + end + end + end +end diff --git a/lib/gitlab/metrics/obfuscated_sql.rb b/lib/gitlab/metrics/obfuscated_sql.rb new file mode 100644 index 00000000000..481aca56efb --- /dev/null +++ b/lib/gitlab/metrics/obfuscated_sql.rb @@ -0,0 +1,47 @@ +module Gitlab + module Metrics + # Class for producing SQL queries with sensitive data stripped out. + class ObfuscatedSQL + REPLACEMENT = / + \d+(\.\d+)? # integers, floats + | '.+?' # single quoted strings + | \/.+?(?<!\\)\/ # regexps (including escaped slashes) + /x + + MYSQL_REPLACEMENTS = / + ".+?" # double quoted strings + /x + + # Regex to replace consecutive placeholders with a single one indicating + # the length. This can be useful when a "IN" statement uses thousands of + # IDs (storing this would just be a waste of space). + CONSECUTIVE = /(\?(\s*,\s*)?){2,}/ + + # sql - The raw SQL query as a String. + def initialize(sql) + @sql = sql + end + + # Returns a new, obfuscated SQL query. + def to_s + regex = REPLACEMENT + + if Gitlab::Database.mysql? + regex = Regexp.union(regex, MYSQL_REPLACEMENTS) + end + + sql = @sql.gsub(regex, '?').gsub(CONSECUTIVE) do |match| + "#{match.count(',') + 1} values" + end + + # InfluxDB escapes double quotes upon output, so lets get rid of them + # whenever we can. + if Gitlab::Database.postgresql? + sql = sql.delete('"') + end + + sql + end + end + end +end diff --git a/lib/gitlab/metrics/rack_middleware.rb b/lib/gitlab/metrics/rack_middleware.rb new file mode 100644 index 00000000000..5c0587c4c51 --- /dev/null +++ b/lib/gitlab/metrics/rack_middleware.rb @@ -0,0 +1,49 @@ +module Gitlab + module Metrics + # Rack middleware for tracking Rails requests. + class RackMiddleware + CONTROLLER_KEY = 'action_controller.instance' + + def initialize(app) + @app = app + end + + # env - A Hash containing Rack environment details. + def call(env) + trans = transaction_from_env(env) + retval = nil + + begin + retval = trans.run { @app.call(env) } + + # Even in the event of an error we want to submit any metrics we + # might've gathered up to this point. + ensure + if env[CONTROLLER_KEY] + tag_controller(trans, env) + end + + trans.finish + end + + retval + end + + def transaction_from_env(env) + trans = Transaction.new + + trans.add_tag(:request_method, env['REQUEST_METHOD']) + trans.add_tag(:request_uri, env['REQUEST_URI']) + + trans + end + + def tag_controller(trans, env) + controller = env[CONTROLLER_KEY] + label = "#{controller.class.name}##{controller.action_name}" + + trans.add_tag(:action, label) + end + end + end +end diff --git a/lib/gitlab/metrics/sampler.rb b/lib/gitlab/metrics/sampler.rb new file mode 100644 index 00000000000..828ee1f8c62 --- /dev/null +++ b/lib/gitlab/metrics/sampler.rb @@ -0,0 +1,98 @@ +module Gitlab + module Metrics + # Class that sends certain metrics to InfluxDB at a specific interval. + # + # This class is used to gather statistics that can't be directly associated + # with a transaction such as system memory usage, garbage collection + # statistics, etc. + class Sampler + # interval - The sampling interval in seconds. + def initialize(interval = 15) + @interval = interval + @metrics = [] + + @last_minor_gc = Delta.new(GC.stat[:minor_gc_count]) + @last_major_gc = Delta.new(GC.stat[:major_gc_count]) + + if Gitlab::Metrics.mri? + require 'allocations' + + Allocations.start + end + end + + def start + Thread.new do + Thread.current.abort_on_exception = true + + loop do + sleep(@interval) + + sample + end + end + end + + def sample + sample_memory_usage + sample_file_descriptors + sample_objects + sample_gc + + flush + ensure + GC::Profiler.clear + @metrics.clear + end + + def flush + MetricsWorker.perform_async(@metrics.map(&:to_hash)) + end + + def sample_memory_usage + @metrics << Metric.new('memory_usage', value: System.memory_usage) + end + + def sample_file_descriptors + @metrics << Metric. + new('file_descriptors', value: System.file_descriptor_count) + end + + if Metrics.mri? + def sample_objects + sample = Allocations.to_hash + counts = sample.each_with_object({}) do |(klass, count), hash| + hash[klass.name] = count + end + + # Symbols aren't allocated so we'll need to add those manually. + counts['Symbol'] = Symbol.all_symbols.length + + counts.each do |name, count| + @metrics << Metric.new('object_counts', { count: count }, type: name) + end + end + else + def sample_objects + end + end + + def sample_gc + time = GC::Profiler.total_time * 1000.0 + stats = GC.stat.merge(total_time: time) + + # We want the difference of GC runs compared to the last sample, not the + # total amount since the process started. + stats[:minor_gc_count] = + @last_minor_gc.compared_with(stats[:minor_gc_count]) + + stats[:major_gc_count] = + @last_major_gc.compared_with(stats[:major_gc_count]) + + stats[:count] = stats[:minor_gc_count] + stats[:major_gc_count] + + @metrics << Metric.new('gc_statistics', stats) + end + end + end +end diff --git a/lib/gitlab/metrics/sidekiq_middleware.rb b/lib/gitlab/metrics/sidekiq_middleware.rb new file mode 100644 index 00000000000..ec10707d1fb --- /dev/null +++ b/lib/gitlab/metrics/sidekiq_middleware.rb @@ -0,0 +1,30 @@ +module Gitlab + module Metrics + # Sidekiq middleware for tracking jobs. + # + # This middleware is intended to be used as a server-side middleware. + class SidekiqMiddleware + def call(worker, message, queue) + # We don't want to track the MetricsWorker itself as otherwise we'll end + # up in an infinite loop. + if worker.class == MetricsWorker + yield + return + end + + trans = Transaction.new + + begin + trans.run { yield } + ensure + tag_worker(trans, worker) + trans.finish + end + end + + def tag_worker(trans, worker) + trans.add_tag(:action, "#{worker.class.name}#perform") + end + end + end +end diff --git a/lib/gitlab/metrics/subscribers/action_view.rb b/lib/gitlab/metrics/subscribers/action_view.rb new file mode 100644 index 00000000000..7e0dcf99d92 --- /dev/null +++ b/lib/gitlab/metrics/subscribers/action_view.rb @@ -0,0 +1,53 @@ +module Gitlab + module Metrics + module Subscribers + # Class for tracking the rendering timings of views. + class ActionView < ActiveSupport::Subscriber + attach_to :action_view + + SERIES = 'views' + + def render_template(event) + track(event) if current_transaction + end + + alias_method :render_view, :render_template + + private + + def track(event) + values = values_for(event) + tags = tags_for(event) + + current_transaction.add_metric(SERIES, values, tags) + end + + def relative_path(path) + path.gsub(/^#{Rails.root.to_s}\/?/, '') + end + + def values_for(event) + { duration: event.duration } + end + + def tags_for(event) + path = relative_path(event.payload[:identifier]) + tags = { view: path } + + file, line = Metrics.last_relative_application_frame + + if file and line + tags[:file] = file + tags[:line] = line + end + + tags + end + + def current_transaction + Transaction.current + end + end + end + end +end diff --git a/lib/gitlab/metrics/subscribers/active_record.rb b/lib/gitlab/metrics/subscribers/active_record.rb new file mode 100644 index 00000000000..d947c128ce2 --- /dev/null +++ b/lib/gitlab/metrics/subscribers/active_record.rb @@ -0,0 +1,48 @@ +module Gitlab + module Metrics + module Subscribers + # Class for tracking raw SQL queries. + # + # Queries are obfuscated before being logged to ensure no private data is + # exposed via InfluxDB/Grafana. + class ActiveRecord < ActiveSupport::Subscriber + attach_to :active_record + + SERIES = 'sql_queries' + + def sql(event) + return unless current_transaction + + values = values_for(event) + tags = tags_for(event) + + current_transaction.add_metric(SERIES, values, tags) + end + + private + + def values_for(event) + { duration: event.duration } + end + + def tags_for(event) + sql = ObfuscatedSQL.new(event.payload[:sql]).to_s + tags = { sql: sql } + + file, line = Metrics.last_relative_application_frame + + if file and line + tags[:file] = file + tags[:line] = line + end + + tags + end + + def current_transaction + Transaction.current + end + end + end + end +end diff --git a/lib/gitlab/metrics/system.rb b/lib/gitlab/metrics/system.rb new file mode 100644 index 00000000000..83371265278 --- /dev/null +++ b/lib/gitlab/metrics/system.rb @@ -0,0 +1,35 @@ +module Gitlab + module Metrics + # Module for gathering system/process statistics such as the memory usage. + # + # This module relies on the /proc filesystem being available. If /proc is + # not available the methods of this module will be stubbed. + module System + if File.exist?('/proc') + # Returns the current process' memory usage in bytes. + def self.memory_usage + mem = 0 + match = File.read('/proc/self/status').match(/VmRSS:\s+(\d+)/) + + if match and match[1] + mem = match[1].to_f * 1024 + end + + mem + end + + def self.file_descriptor_count + Dir.glob('/proc/self/fd/*').length + end + else + def self.memory_usage + 0.0 + end + + def self.file_descriptor_count + 0 + end + end + end + end +end diff --git a/lib/gitlab/metrics/transaction.rb b/lib/gitlab/metrics/transaction.rb new file mode 100644 index 00000000000..568f9d6ae0c --- /dev/null +++ b/lib/gitlab/metrics/transaction.rb @@ -0,0 +1,66 @@ +module Gitlab + module Metrics + # Class for storing metrics information of a single transaction. + class Transaction + THREAD_KEY = :_gitlab_metrics_transaction + + SERIES = 'transactions' + + attr_reader :uuid, :tags + + def self.current + Thread.current[THREAD_KEY] + end + + # name - The name of this transaction as a String. + def initialize + @metrics = [] + @uuid = SecureRandom.uuid + + @started_at = nil + @finished_at = nil + + @tags = {} + end + + def duration + @finished_at ? (@finished_at - @started_at) * 1000.0 : 0.0 + end + + def run + Thread.current[THREAD_KEY] = self + + @started_at = Time.now + + yield + ensure + @finished_at = Time.now + + Thread.current[THREAD_KEY] = nil + end + + def add_metric(series, values, tags = {}) + tags = tags.merge(transaction_id: @uuid) + + @metrics << Metric.new(series, values, tags) + end + + def add_tag(key, value) + @tags[key] = value + end + + def finish + track_self + submit + end + + def track_self + add_metric(SERIES, { duration: duration }, @tags) + end + + def submit + MetricsWorker.perform_async(@metrics.map(&:to_hash)) + end + end + end +end diff --git a/lib/gitlab/reference_extractor.rb b/lib/gitlab/reference_extractor.rb index 0a70d21b1ce..be795649e59 100644 --- a/lib/gitlab/reference_extractor.rb +++ b/lib/gitlab/reference_extractor.rb @@ -3,11 +3,12 @@ require 'banzai' module Gitlab # Extract possible GFM references from an arbitrary String for further processing. class ReferenceExtractor < Banzai::ReferenceExtractor - attr_accessor :project, :current_user + attr_accessor :project, :current_user, :author - def initialize(project, current_user = nil) + def initialize(project, current_user = nil, author = nil) @project = project @current_user = current_user + @author = author @references = {} @@ -20,18 +21,22 @@ module Gitlab %i(user label merge_request snippet commit commit_range).each do |type| define_method("#{type}s") do - @references[type] ||= references(type, project: project, current_user: current_user) + @references[type] ||= references(type, reference_context) end end def issues - options = { project: project, current_user: current_user } - if project && project.jira_tracker? - @references[:external_issue] ||= references(:external_issue, options) + @references[:external_issue] ||= references(:external_issue, reference_context) else - @references[:issue] ||= references(:issue, options) + @references[:issue] ||= references(:issue, reference_context) end end + + private + + def reference_context + { project: project, current_user: current_user, author: author } + end end end |