1 files changed, 145 insertions, 0 deletions
diff --git a/lib/gitlab/blob_helper.rb b/lib/gitlab/blob_helper.rb
new file mode 100644
index 00000000000..9b3b383b0c8
--- /dev/null
+++ b/lib/gitlab/blob_helper.rb
@@ -0,0 +1,145 @@
+# This has been extracted from https://github.com/github/linguist/blob/master/lib/linguist/blob_helper.rb
+module Gitlab
+  module BlobHelper
+    def extname
+      File.extname(name.to_s)
+    end
+
+    def known_extension?
+      LanguageData.extensions.include?(extname)
+    end
+
+    def viewable?
+      !large? && text?
+    end
+
+    MEGABYTE = 1024 * 1024
+
+    def large?
+      size.to_i > MEGABYTE
+    end
+
+    def binary?
+      # Large blobs aren't even loaded into memory
+      if data.nil?
+        true
+
+      # Treat blank files as text
+      elsif data == ""
+        false
+
+      # Charlock doesn't know what to think
+      elsif encoding.nil?
+        true
+
+      # If Charlock says its binary
+      else
+        detect_encoding[:type] == :binary
+      end
+    end
+
+    def text?
+      !binary?
+    end
+
+    def image?
+      ['.png', '.jpg', '.jpeg', '.gif'].include?(extname.downcase)
+    end
+
+    # Internal: Lookup mime type for extension.
+    #
+    # Returns a MIME::Type
+    # rubocop:disable Gitlab/ModuleWithInstanceVariables
+    def _mime_type
+      if defined? @_mime_type
+        @_mime_type
+      else
+        guesses = ::MIME::Types.type_for(extname.to_s)
+
+        # Prefer text mime types over binary
+        @_mime_type = guesses.detect { |type| type.ascii? } || guesses.first
+      end
+    end
+    # rubocop:enable Gitlab/ModuleWithInstanceVariables
+
+    # Public: Get the actual blob mime type
+    #
+    # Examples
+    #
+    #   # => 'text/plain'
+    #   # => 'text/html'
+    #
+    # Returns a mime type String.
+    def mime_type
+      _mime_type ? _mime_type.to_s : 'text/plain'
+    end
+
+    def binary_mime_type?
+      _mime_type ? _mime_type.binary? : false
+    end
+
+    def lines
+      @lines ||=
+        if viewable? && data
+          # `data` is usually encoded as ASCII-8BIT even when the content has
+          # been detected as a different encoding. However, we are not allowed
+          # to change the encoding of `data` because we've made the implicit
+          # guarantee that each entry in `lines` is encoded the same way as
+          # `data`.
+          #
+          # Instead, we re-encode each possible newline sequence as the
+          # detected encoding, then force them back to the encoding of `data`
+          # (usually a binary encoding like ASCII-8BIT). This means that the
+          # byte sequence will match how newlines are likely encoded in the
+          # file, but we don't have to change the encoding of `data` as far as
+          # Ruby is concerned. This allows us to correctly parse out each line
+          # without changing the encoding of `data`, and
+          # also--importantly--without having to duplicate many (potentially
+          # large) strings.
+          begin
+            data.split(encoded_newlines_re, -1)
+          rescue Encoding::ConverterNotFoundError
+            # The data is not splittable in the detected encoding.  Assume it's
+            # one big line.
+            [data]
+          end
+        else
+          []
+        end
+    end
+
+    def content_type
+      # rubocop:disable Style/MultilineTernaryOperator
+      # rubocop:disable Style/NestedTernaryOperator
+      @content_type ||= binary_mime_type? || binary? ? mime_type :
+                          (encoding ? "text/plain; charset=#{encoding.downcase}" : "text/plain")
+      # rubocop:enable Style/NestedTernaryOperator
+      # rubocop:enable Style/MultilineTernaryOperator
+    end
+
+    def encoded_newlines_re
+      @encoded_newlines_re ||=
+        Regexp.union(["\r\n", "\r", "\n"].map { |nl| nl.encode(ruby_encoding, "ASCII-8BIT").force_encoding(data.encoding) })
+    end
+
+    def ruby_encoding
+      if hash = detect_encoding
+        hash[:ruby_encoding]
+      end
+    end
+
+    def encoding
+      if hash = detect_encoding
+        hash[:encoding]
+      end
+    end
+
+    def detect_encoding
+      @detect_encoding ||= CharlockHolmes::EncodingDetector.new.detect(data) if data # rubocop:disable Gitlab/ModuleWithInstanceVariables
+    end
+
+    def empty?
+      data.nil? || data == ""
+    end
+  end
+end