# frozen_string_literal: true # module Gitlab module Diff class HighlightCache include Gitlab::Metrics::Methods include Gitlab::Utils::StrongMemoize EXPIRATION = 1.week VERSION = 1 delegate :diffable, to: :@diff_collection delegate :diff_options, to: :@diff_collection define_histogram :gitlab_redis_diff_caching_memory_usage_bytes do docstring 'Redis diff caching memory usage by key' buckets [100, 1_000, 10_000, 100_000, 1_000_000, 10_000_000] end define_counter :gitlab_redis_diff_caching_hit do docstring 'Redis diff caching hits' end define_counter :gitlab_redis_diff_caching_miss do docstring 'Redis diff caching misses' end def initialize(diff_collection) @diff_collection = diff_collection end # - Reads from cache # - Assigns DiffFile#highlighted_diff_lines for cached files # def decorate(diff_file) if content = read_file(diff_file) diff_file.highlighted_diff_lines = content.map do |line| Gitlab::Diff::Line.safe_init_from_hash(line) end end end # For every file that isn't already contained in the redis hash, store the # result of #highlighted_diff_lines, then submit the uncached content # to #write_to_redis_hash to submit a single write. This avoids excessive # IO generated by N+1's (1 writing for each highlighted line or file). # def write_if_empty return if cacheable_files.empty? new_cache_content = {} cacheable_files.each do |diff_file| new_cache_content[diff_file.file_path] = diff_file.highlighted_diff_lines.map(&:to_hash) end write_to_redis_hash(new_cache_content) end def clear Gitlab::Redis::Cache.with do |redis| redis.del(key) end end def key strong_memoize(:redis_key) do ['highlighted-diff-files', diffable.cache_key, VERSION, diff_options].join(":") end end private def cacheable_files strong_memoize(:cacheable_files) do diff_files.select { |file| cacheable?(file) && read_file(file).nil? } end end # Given a hash of: # { "file/to/cache" => # [ { line_code: "a5cc2925ca8258af241be7e5b0381edf30266302_19_19", # rich_text: " config/initializers/secret_token.rb\n", # text: " config/initializers/secret_token.rb", # type: nil, # index: 3, # old_pos: 19, # new_pos: 19 } # ] } # # ...it will write/update a Gitlab::Redis hash (HSET) # def write_to_redis_hash(hash) Gitlab::Redis::Cache.with do |redis| redis.pipelined do hash.each do |diff_file_id, highlighted_diff_lines_hash| redis.hset( key, diff_file_id, compose_data(highlighted_diff_lines_hash.to_json) ) end # HSETs have to have their expiration date manually updated # redis.expire(key, EXPIRATION) end record_memory_usage(fetch_memory_usage(redis, key)) end # Subsequent read_file calls would need the latest cache. # clear_memoization(:cached_content) clear_memoization(:cacheable_files) end def record_memory_usage(memory_usage) if memory_usage self.class.gitlab_redis_diff_caching_memory_usage_bytes.observe({}, memory_usage) end end def fetch_memory_usage(redis, key) # Redis versions prior to 4.0.0 do not support memory usage reporting # for a specific key. As of 11-March-2020 we support Redis 3.x, so # need to account for this. We can remove this check once we # officially cease supporting versions <4.0.0. # return if Gem::Version.new(redis.info["redis_version"]) < Gem::Version.new("4") redis.memory("USAGE", key) end def file_paths strong_memoize(:file_paths) do diff_files.collect(&:file_path) end end def read_file(diff_file) cached_content[diff_file.file_path] end def cached_content strong_memoize(:cached_content) { read_cache } end def read_cache return {} unless file_paths.any? results = [] Gitlab::Redis::Cache.with do |redis| results = redis.hmget(key, file_paths) end results.map! do |result| JSON.parse(extract_data(result), symbolize_names: true) unless result.nil? end file_paths.zip(results).to_h end def compose_data(json_data) if ::Feature.enabled?(:gzip_diff_cache, default_enabled: true) # #compress returns ASCII-8BIT, so we need to force the encoding to # UTF-8 before caching it in redis, else we risk encoding mismatch # errors. # ActiveSupport::Gzip.compress(json_data).force_encoding("UTF-8") else json_data end rescue Zlib::GzipFile::Error json_data end def extract_data(data) # Since when we deploy this code, we'll be dealing with an already # populated cache full of data that isn't gzipped, we want to also # check to see if the data is gzipped before we attempt to #decompress # it, thus we check the first 2 bytes for "\x1F\x8B" to confirm it is # a gzipped string. While a non-gzipped string will raise a # Zlib::GzipFile::Error, which we're rescuing, we don't want to count # on rescue for control flow. This check can be removed in the release # after this change is released. # if ::Feature.enabled?(:gzip_diff_cache, default_enabled: true) && data[0..1] == "\x1F\x8B" ActiveSupport::Gzip.decompress(data) else data end rescue Zlib::GzipFile::Error data end def cacheable?(diff_file) diffable.present? && diff_file.text? && diff_file.diffable? end def diff_files # We access raw_diff_files here, as diff_files will attempt to apply the # highlighting code found in this class, leading to a circular # reference. # @diff_collection.raw_diff_files end end end end