summaryrefslogtreecommitdiff
path: root/lib/gitlab/metrics/obfuscated_sql.rb
blob: 481aca56efba43ebe9ba2ee21c5b73bedd0dbd18 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
module Gitlab
  module Metrics
    # Class for producing SQL queries with sensitive data stripped out.
    class ObfuscatedSQL
      REPLACEMENT = /
        \d+(\.\d+)?      # integers, floats
        | '.+?'          # single quoted strings
        | \/.+?(?<!\\)\/ # regexps (including escaped slashes)
      /x

      MYSQL_REPLACEMENTS = /
        ".+?" # double quoted strings
      /x

      # Regex to replace consecutive placeholders with a single one indicating
      # the length. This can be useful when a "IN" statement uses thousands of
      # IDs (storing this would just be a waste of space).
      CONSECUTIVE = /(\?(\s*,\s*)?){2,}/

      # sql - The raw SQL query as a String.
      def initialize(sql)
        @sql = sql
      end

      # Returns a new, obfuscated SQL query.
      def to_s
        regex = REPLACEMENT

        if Gitlab::Database.mysql?
          regex = Regexp.union(regex, MYSQL_REPLACEMENTS)
        end

        sql = @sql.gsub(regex, '?').gsub(CONSECUTIVE) do |match|
          "#{match.count(',') + 1} values"
        end

        # InfluxDB escapes double quotes upon output, so lets get rid of them
        # whenever we can.
        if Gitlab::Database.postgresql?
          sql = sql.delete('"')
        end

        sql
      end
    end
  end
end