summaryrefslogtreecommitdiff
path: root/lib/gitlab/github_import/markdown_text.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/gitlab/github_import/markdown_text.rb')
-rw-r--r--lib/gitlab/github_import/markdown_text.rb50
1 files changed, 21 insertions, 29 deletions
diff --git a/lib/gitlab/github_import/markdown_text.rb b/lib/gitlab/github_import/markdown_text.rb
index bf2856bc77f..2424b3e8c1f 100644
--- a/lib/gitlab/github_import/markdown_text.rb
+++ b/lib/gitlab/github_import/markdown_text.rb
@@ -8,23 +8,12 @@ module Gitlab
class MarkdownText
include Gitlab::EncodingHelper
- ISSUE_REF_MATCHER = '%{github_url}/%{import_source}/issues'
- PULL_REF_MATCHER = '%{github_url}/%{import_source}/pull'
-
- MEDIA_TYPES = %w[gif jpeg jpg mov mp4 png svg webm].freeze
- DOC_TYPES = %w[
- csv docx fodg fodp fods fodt gz log md odf odg odp ods
- odt pdf pptx tgz txt xls xlsx zip
- ].freeze
- ALL_TYPES = (MEDIA_TYPES + DOC_TYPES).freeze
-
# On github.com we have base url for docs and CDN url for media.
# On github EE as far as we can know there is no CDN urls and media is placed on base url.
- # To no escape the escaping symbol we use single quotes instead of double with interpolation.
- # rubocop:disable Style/StringConcatenation
- CDN_URL_MATCHER = '(!\[.+\]\(%{github_media_cdn}/\d+/(\w|-)+\.(' + MEDIA_TYPES.join('|') + ')\))'
- BASE_URL_MATCHER = '(\[.+\]\(%{github_url}/.+/.+/files/\d+/.+\.(' + ALL_TYPES.join('|') + ')\))'
- # rubocop:enable Style/StringConcatenation
+ GITHUB_MEDIA_CDN = 'https://user-images.githubusercontent.com'
+
+ ISSUE_REF_MATCHER = '%{github_url}/%{import_source}/issues'
+ PULL_REF_MATCHER = '%{github_url}/%{import_source}/pull'
class << self
def format(*args)
@@ -42,20 +31,6 @@ module Gitlab
.gsub(pull_ref_matcher, url_helpers.project_merge_requests_url(project))
end
- def fetch_attachment_urls(text)
- cdn_url_matcher = CDN_URL_MATCHER % { github_media_cdn: Regexp.escape(github_media_cdn) }
- doc_url_matcher = BASE_URL_MATCHER % { github_url: Regexp.escape(github_url) }
-
- text.scan(Regexp.new(cdn_url_matcher)).map(&:first) +
- text.scan(Regexp.new(doc_url_matcher)).map(&:first)
- end
-
- private
-
- def github_media_cdn
- 'https://user-images.githubusercontent.com'
- end
-
# Returns github domain without slash in the end
def github_url
oauth_config = Gitlab::Auth::OAuth::Provider.config_for('github') || {}
@@ -63,6 +38,23 @@ module Gitlab
url = url.chop if url.end_with?('/')
url
end
+
+ def fetch_attachments(text)
+ attachments = []
+ doc = CommonMarker.render_doc(text)
+
+ doc.walk do |node|
+ attachment = extract_attachment(node)
+ attachments << attachment if attachment
+ end
+ attachments
+ end
+
+ private
+
+ def extract_attachment(node)
+ ::Gitlab::GithubImport::Markdown::Attachment.from_markdown(node)
+ end
end
# text - The Markdown text as a String.