summaryrefslogtreecommitdiff
path: root/lib/csv_builder.rb
blob: a54c355396dcb10049c48ea23acc2ef9d59d606d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# frozen_string_literal: true

# Generates CSV when given a collection and a mapping.
#
# Example:
#
#     columns = {
#       'Title' => 'title',
#       'Comment' => 'comment',
#       'Author' => -> (post) { post.author.full_name }
#       'Created At (UTC)' => -> (post) { post.created_at&.strftime('%Y-%m-%d %H:%M:%S') }
#     }
#
#     CsvBuilder.new(@posts, columns).render
#
class CsvBuilder
  DEFAULT_ORDER_BY = 'id'
  DEFAULT_BATCH_SIZE = 1000
  PREFIX_REGEX = /\A[=\+\-@;]/.freeze

  attr_reader :rows_written

  #
  # * +collection+ - The data collection to be used
  # * +header_to_hash_value+ - A hash of 'Column Heading' => 'value_method'.
  # * +associations_to_preload+ - An array of records to preload with a batch of records.
  #
  # The value method will be called once for each object in the collection, to
  # determine the value for that row. It can either be the name of a method on
  # the object, or a lamda to call passing in the object.
  def initialize(collection, header_to_value_hash, associations_to_preload = [])
    @header_to_value_hash = header_to_value_hash
    @collection = collection
    @truncated = false
    @rows_written = 0
    @associations_to_preload = associations_to_preload
  end

  # Renders the csv to a string
  def render(truncate_after_bytes = nil)
    Tempfile.open(['csv']) do |tempfile|
      csv = CSV.new(tempfile)

      write_csv csv, until_condition: -> do
        truncate_after_bytes && tempfile.size > truncate_after_bytes
      end

      if block_given?
        yield tempfile
      else
        tempfile.rewind
        tempfile.read
      end
    end
  end

  def truncated?
    @truncated
  end

  def rows_expected
    if truncated? || rows_written == 0
      @collection.count
    else
      rows_written
    end
  end

  def status
    {
      truncated: truncated?,
      rows_written: rows_written,
      rows_expected: rows_expected
    }
  end

  protected

  def each(&block)
    if @associations_to_preload.present? && @collection.respond_to?(:each_batch)
      @collection.each_batch(order_hint: :created_at) do |relation|
        relation.preload(@associations_to_preload).order(:id).each(&block) # rubocop:disable CodeReuse/ActiveRecord
      end
    else
      @collection.find_each(&block) # rubocop: disable CodeReuse/ActiveRecord
    end
  end

  private

  def headers
    @headers ||= @header_to_value_hash.keys
  end

  def attributes
    @attributes ||= @header_to_value_hash.values
  end

  def row(object)
    attributes.map do |attribute|
      if attribute.respond_to?(:call)
        excel_sanitize(attribute.call(object))
      else
        excel_sanitize(object.public_send(attribute)) # rubocop:disable GitlabSecurity/PublicSend
      end
    end
  end

  def write_csv(csv, until_condition:)
    csv << headers

    each do |object|
      csv << row(object)

      @rows_written += 1

      if until_condition.call
        @truncated = true
        break
      end
    end
  end

  def excel_sanitize(line)
    return if line.nil?
    return line unless line.is_a?(String) && line.match?(PREFIX_REGEX)

    ["'", line].join
  end
end